[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnue] r8400 - in trunk/gnue-common/src/formatting/masks: . tests
From: |
jamest |
Subject: |
[gnue] r8400 - in trunk/gnue-common/src/formatting/masks: . tests |
Date: |
Wed, 12 Apr 2006 20:17:52 -0500 (CDT) |
Author: jamest
Date: 2006-04-12 20:17:51 -0500 (Wed, 12 Apr 2006)
New Revision: 8400
Added:
trunk/gnue-common/src/formatting/masks/InputTokens.py
trunk/gnue-common/src/formatting/masks/MaskTokenizer.py
trunk/gnue-common/src/formatting/masks/MaskTokens.py
Removed:
trunk/gnue-common/src/formatting/masks/MaskParser.py
trunk/gnue-common/src/formatting/masks/Tokens.py
Modified:
trunk/gnue-common/src/formatting/masks/InputMask.py
trunk/gnue-common/src/formatting/masks/tests/mask_tokenizer.py
Log:
cleanup and documentation
more pep8 changes
Modified: trunk/gnue-common/src/formatting/masks/InputMask.py
===================================================================
--- trunk/gnue-common/src/formatting/masks/InputMask.py 2006-04-12 19:11:58 UTC
(rev 8399)
+++ trunk/gnue-common/src/formatting/masks/InputMask.py 2006-04-13 01:17:51 UTC
(rev 8400)
@@ -27,17 +27,18 @@
Based on lex/yacc parsing (via Plex)
"""
-# NOTES:
-#
+__revision__ = "$Id$"
+
+from cStringIO import StringIO
import pprint
-from gnue.common.external.plex import *
-from cStringIO import StringIO
-import string
-import MaskParser
+from gnue.common.external.plex import Scanner, Lexicon, Errors, \
+ Str, Begin, State, AnyChar, Rep1, Any, Bol, Eof, IGNORE, Opt
from Errors import *
-import Tokens
+import MaskTokenizer
+import MaskTokens
+import InputTokens
class InputMask:
"""
@@ -81,7 +82,7 @@
## print
## print "Cursor=%s" % self.cursor
while i <= last_token:
- if isinstance(self.tokens[i], Tokens.LitTok):
+ if isinstance(self.tokens[i], InputTokens.LitTok):
if len(disp) < self.cursor:
disp += emptyDisplay[i]
else:
@@ -168,10 +169,10 @@
while last_pos > 0 and not inputted[last_pos]:
last_pos -= 1
- if last_pos < last_token and isinstance(tokens[last_pos+1], Tokens.LitTok):
+ if last_pos < last_token and isinstance(tokens[last_pos+1],
InputTokens.LitTok):
last_pos += 1
while last_pos < last_token and \
- isinstance(tokens[last_pos], Tokens.LitTok):
+ isinstance(tokens[last_pos], InputTokens.LitTok):
last_pos += 1
@@ -191,7 +192,7 @@
emptyDisplay = self.emptyDisplay
actualDisplay = self.actualDisplay = []
for i in range(len(self.tokens)):
- if isinstance(tokens, Tokens.LitTok):
+ if isinstance(tokens, InputTokens.LitTok):
actualDisplay.append(emptyDisplay[i])
inputted[i] = ""
else:
@@ -209,7 +210,7 @@
inputted[i] = ni
actualDisplay.append(inputted[i])
- self.display = string.join(actualDisplay,"")
+ self.display = "".join(actualDisplay)
return (self.display, self.cursor)
@@ -293,12 +294,12 @@
## print "Token at %s, pos=%s, rpos=%s" % (token_at, pos, rpos)
if left:
- while token_at > 0 and isinstance(self.tokens[token_at],Tokens.LitTok):
+ while token_at > 0 and
isinstance(self.tokens[token_at],InputTokens.LitTok):
pos -= len(self.emptyDisplay[token_at])
token_at -= 1
else:
while token_at < last_token and \
- isinstance(self.tokens[token_at],Tokens.LitTok):
+ isinstance(self.tokens[token_at],InputTokens.LitTok):
pos += len(self.emptyDisplay[token_at])
token_at += 1
@@ -331,11 +332,12 @@
@param numeric: Is this input mask numeric input only?
@param date: Is this input mask date input only?
"""
+
# -------------------------------------------------------------------------
# Generate a list of parser tokens that define the input mask
# -------------------------------------------------------------------------
#
- parser = MaskParser.InputMaskParser(mask,'inline')
+ parser = MaskTokenizer.MaskTokenizer(mask,'inline')
self.pp = pprint.PrettyPrinter(indent=4)
self.isnumeric = numeric
@@ -344,9 +346,10 @@
# List of all tokens. Note that all {#}
# expansions have already happened.
- ptokens = parser.tokens
+ maskTokens = parser.tokens
+
# print "1st token list"
-# for item in ptokens:
+# for item in maskTokens:
# print item.token,
# print
# If non-zero, position of the right-to-left token
@@ -369,24 +372,24 @@
#
#
i = 0
- while i < len(ptokens):
- ptoken=ptokens[i]
- if isinstance(ptoken ,MaskParser.Literal):
+ while i < len(maskTokens):
+ ptoken=maskTokens[i]
+ if isinstance(ptoken ,MaskTokens.Literal):
chars = ""
# Merge consecutive literals into one rule
# (makes for easier logic)
- while i < len(ptokens) and isinstance(ptokens[i], MaskParser.Literal):
- chars += ptokens[i].token
+ while i < len(maskTokens) and isinstance(maskTokens[i],
MaskTokens.Literal):
+ chars += maskTokens[i].token
i += 1
- token = Tokens.tLiteral(chars)
+ token = InputTokens.tLiteral(chars)
i -= 1 # Because we add one later...
- elif isinstance(ptoken ,MaskParser.TokenSet):
+ elif isinstance(ptoken ,MaskTokens.TokenSet):
if ptoken.numeric:
- token = Tokens.tCustomNum(ptoken.token)
+ token = InputTokens.tCustomNum(ptoken.token)
else:
- token = Tokens.tCustom(ptoken.token)
+ token = InputTokens.tCustom(ptoken.token)
else:
- token = Tokens.tokenMap[ptoken.token]()
+ token = InputTokens.tokenMap[ptoken.token]()
# Honor force_upper/lower
try:
@@ -461,13 +464,13 @@
# to complete the ruleset
try:
possibly_completed = possibly_completed or \
- ruleset[k+1]==Tokens.forcible
+ ruleset[k+1]==InputTokens.forcible
except IndexError:
pass
# Add the rule, skipping any class foricble items
# as they are not actually tokens
- if not path == Tokens.forcible:
+ if not path == InputTokens.forcible:
if (k < len(ruleset) - 1):
# There are additional items in this ruleset so
@@ -517,12 +520,12 @@
# this will be used in the next iteration to
#
- Tokens.printLexiconTree(lexicon)
+ InputTokens.printLexiconTree(lexicon)
# Create a consolidated validation rule so we
# can test if inputted string is "complete". This
# creates the single rule for each position.
- self.validationRule = Tokens.buildValidationRule(tokens)
+ self.validationRule = InputTokens.buildValidationRule(tokens)
# Pre-compile the lexicon for this mask
DEBUG=StringIO()
Copied: trunk/gnue-common/src/formatting/masks/InputTokens.py (from rev 8399,
trunk/gnue-common/src/formatting/masks/Tokens.py)
===================================================================
--- trunk/gnue-common/src/formatting/masks/Tokens.py 2006-04-12 19:11:58 UTC
(rev 8399)
+++ trunk/gnue-common/src/formatting/masks/InputTokens.py 2006-04-13
01:17:51 UTC (rev 8400)
@@ -0,0 +1,471 @@
+#
+# This file is part of GNU Enterprise.
+#
+# GNU Enterprise is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation; either
+# version 2, or (at your option) any later version.
+#
+# GNU Enterprise is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with program; see the file COPYING. If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place
+# - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Copyright 2001-2006 Free Software Foundation
+#
+# pylint: disable-msg=R0903,
+# R0903 disabled as these classes represent placeholders and
+# as such don't have public methods
+#
+# FILE:
+# Tokens.py
+#
+# DESCRIPTION:
+"""
+Tokens used to define the components that make up an input mask.
+
+These tokens are used to define the final lexicon used by the
+mask.
+"""
+__revision__ = "$Id$"
+
+from gnue.common.external.plex \
+ import Str, State, AnyChar, Rep1, Any, Range, NoCase, Bol, Eol, Opt
+
+import string
+import locale
+
+FORCE_UPPER = True
+FORCE_LOWER = False
+
+digit = Any(string.digits)
+letter = Any(string.letters)
+
+# =============================================================================
+# Base tokens
+#
+# These are inherited be other tokens that are actually
+# used in the input mask. Instances of these classes should not
+# be used directly.
+# =============================================================================
+class Tok:
+ """
+ Base token containing all the flags and values that an
+ input token may require.
+ """
+
+ # True if this character is optional
+ optional = False
+
+ # If set, the character to auto-fill the string with
+ autochar = None
+
+ # If set, the mask char to fill the display with
+ # (Note: autochar takes precedence; default is _)
+ maskchar = None
+
+ # A list of partial grammar rules to
+ # build our character-at-a-time parser
+ # This list should itself contain lists
+ paths = []
+
+ # Number of characters this space takes up
+ maxchars = 1
+
+ # Left pad with zeros
+ # (only makes sense if maxchars > 1)
+ zero_pad = False
+
+ # As implied...
+ force_upper = False
+ force_lower = False
+
+ def __repr__(self):
+ return "%s(%s)" % (self.__class__.__name__, self.symbol)
+
+ def getProperDisplay(self, text):
+ if self.zero_pad and self.maxchars == 2 and len(text) == 1:
+ return "0" + text
+ else:
+ return text
+
+class TextTok(Tok):
+ """
+ Base text token
+ """
+
+class DateTok(Tok):
+ """
+ Base date token
+ """
+
+class NumTok(Tok):
+ """
+ Base numeric token
+ """
+
+class LitTok(Tok):
+ """
+ Base literal token
+ """
+ optional = True
+ def __repr__(self):
+ """
+ Force the class to return a custom string representation of itself.
+ Not sure why yet.
+ """
+ return "%s(%s)" % (self.__class__.__name__, self.autochar)
+
+
+class forcible:
+ """
+ This is a placeholder for the paths=[]
+ that denotes when a mask can be considered complete
+ *if* a forced marker is provided.
+ """
+
+# =============================================================================
+# Base tokens
+# =============================================================================
+class tChar(TextTok):
+ """
+ Any character, required
+ """
+ symbol = '_'
+ paths = [[Any(string.letters + string.digits + ' ' + string.punctuation)]]
+
+class tCharOpt(tChar):
+ """
+ Any character, optional
+ """
+ symbol = '?'
+ optional = True
+
+class tA(TextTok):
+ """
+ Any alphanumeric, required
+ """
+ symbol = 'A'
+ paths = [[Any(string.letters + string.digits)]]
+
+class ta(tA):
+ """
+ Any alphanumeric, optional
+ """
+ symbol = 'a'
+ optional = True
+
+class tL(TextTok):
+ """
+ Any letter, required
+ """
+ symbol = 'L'
+ paths = [[letter]]
+
+class tl(tL):
+ """
+ Any letter, optional
+ """
+ symbol = 'l'
+ optional = True
+
+class tC(TextTok):
+ """
+ Any character (alphanum) or space, required
+ """
+ symbol = 'C'
+ paths = [[Any(string.letters + string.digits + ' ')]]
+
+class tc(tC):
+ """
+ Any character (alphanum) or space, optional
+ """
+ symbol = 'c'
+ optional = True
+
+class tsign(NumTok):
+ """
+ Positive or negative sign (one per mask) (literal)
+ """
+ symbol = '-'
+ optional = True
+ paths = [[Any('+-')]]
+
+class tDigit(NumTok):
+ """
+ Any digit, required
+ """
+ symbol = '0'
+ paths = [[digit]]
+# optional=True # For input masks, this is largely true?
+
+class tDigitOpt(tDigit):
+ """
+ Any digit, optional
+ """
+ symbol = '#'
+ optional = True
+
+class tM(DateTok):
+ """
+ Month, with zero padding
+ """
+ symbol = 'M'
+ maxchars = 2
+ zero_pad = True
+ paths = [ [ Str('1'), forcible, Any('012') ], # months 1, 10 - 12
+ [ Str('0'), Range('19') ], # months 01 - 09
+ [ Range('29') ] ] # months 2 - 9
+
+class tm(tM):
+ """
+ Month, no zero padding
+ """
+ symbol = 'm'
+
+class tD(DateTok):
+ """
+ Day
+ """
+ symbol = 'D'
+ zero_pad = True
+ maxchars = 2
+ paths = [ [ Str('3'), forcible, Any('01') ], # days 3, 30 - 31
+ [ Any('12'), forcible, digit ], # days 1, 2, 10 - 29
+ [ Str('0'), Range('19')], # days 01 - 09
+ [ Range('49') ] ] # days 4 - 9
+
+class td(tD):
+ """
+ Day, no zero padding
+ """
+ symbol = 'd'
+
+class tY(DateTok):
+ """
+ Year - 4 digits
+ """
+ symbol = 'Y'
+ maxchars = 4
+ paths = [ [ digit ]*4 ]
+
+class ty(DateTok):
+ """
+ Year - 2 digits
+ """
+ symbol = 'y'
+ maxchars = 2
+ paths = [ [ digit ]*2 ]
+
+class tH(DateTok):
+ """
+ Hour
+ """
+ symbol = 'H'
+ maxchars = 2
+ paths = [ [ Str('2'), forcible, Any('0123') ], # Hour 2, 20-23
+ [ Any('01'), forcible, digit ], # Hour 00 - 19
+ [ Range('39') ] ] # Hour 3 - 9
+
+class tI(DateTok):
+ """
+ Minute
+ """
+ symbol = 'I'
+ maxchars = 2
+ paths = [ [Any('012345'), digit ] ]
+
+class tS(DateTok):
+ """
+ Seconds
+ """
+ symbol = 'S'
+ maxchars = 2
+ paths = [ [ Any('012345'), digit ] ]
+
+class tP(DateTok):
+ """
+ PM AM token
+ """
+ symbol = 'P'
+ maxchars = 2
+ paths = [ [ NoCase(Str('p','a')), NoCase(Str('m')) ] ]
+ force_upper = True
+
+class tp(tP):
+ """
+ pm am token
+ """
+ symbol = 'p'
+ maxchars = 2
+ force_lower = True
+
+class tLiteral(LitTok):
+ def __init__(self, char):
+ path = []
+
+ for ch in char:
+ path.append(Str(ch))
+ if len(char) == 1:
+ self.symbol = "\\%s" % char
+ else:
+ self.symbol = '"' + char.replace('\\','\\\\').replace('"','\\"') +
'"'
+ self.paths = [path]
+ self.autochar = char
+
+class tDecSep(LitTok):
+ """
+ Decimal separator
+ """
+ symbol = '.'
+ autochar = locale.localeconv()['decimal_point'] or '.'
+ paths = [[Str(autochar)]]
+
+class tThouSep(LitTok):
+ """
+ Thousands separator
+ """
+ symbol = ','
+ autochar = locale.localeconv()['thousands_sep'] or ','
+ paths = [[Str(autochar)]]
+
+class tTimeSep(LitTok):
+ """
+ Time Separator
+ """
+ symbol = ':'
+ autochar = ':' # TODO: *Where* is this in locale?!?!?
+ # >>> locale.nl_langinfo(locale.T_FMT) ?
+ paths = [[Str(autochar)]]
+
+
+class tDateSep(LitTok):
+ """
+ Date Separator
+ """
+ symbol = '/'
+ autochar = '/' # TODO: *Where* is this in locale?!?!?
+ # >>> locale.nl_langinfo(locale.D_FMT) ?
+ paths=[[Str(autochar)]]
+
+class tCustom(TextTok):
+ """
+ Custom text token
+ (Passed in a set of valid characters)
+ """
+ def __init__(self, chars):
+ self.paths = [[Any(chars)]]
+
+class tCustomNum(NumTok):
+ """
+ Custom numeric token
+ (Passed in a set of valid digits)
+ """
+ def __init__(self, chars):
+ self.paths = [[Any(chars)]]
+ self.symbol = '[%s]' %
chars.replace('\\','\\\\').replace(']','\\]').replace('-','\\-')
+
+# ---------------------------------------------------------------------------
+# Map of tokens to classes
+# ---------------------------------------------------------------------------
+tokenMap = {
+ # Input/output tokens
+ '_': tChar, # Any character, required
+ '?': tCharOpt, # Any character, optional
+ 'A': tA, # Any alphanumeric, required
+ 'a': ta, # Any alphanumeric, optional
+ 'L': tL, # Any letter, required
+ 'l': tl, # Any letter, optional
+ 'C': tC, # Any character (alphanum) or space, required
+ 'c': tc, # Any character (alphanum) or space, optional
+ '+': tsign, # Positive or negative sign (one per mask)
+ '0': tDigit, # Any digit, required
+ '#': tDigitOpt, # Any digit, optional
+ 'M': tM, # Month, zero padding
+ 'D': tD, # Day, zero padding
+ 'Y': tY, # Year - 4 digits
+ 'y': ty, # Year - 2 digits
+ 'H': tH, # Hour
+ 'I': tI, # Minute
+ 'S': tS, # Seconds
+ 'P': tP, # PM AM token
+ 'p': tp, # pm am token
+ '.': tDecSep, # Decimal separator
+ ',': tThouSep, # Thousands separator
+ ':': tTimeSep, # Time Separator
+ '/': tDateSep, # Date Separator
+ # Output-only
+ 'm': tm, # Month, no zero padding
+ 'd': td, # Day, no zero padding
+}
+
+# =============================================================================
+# Module level functions
+# =============================================================================
+def buildSingleValidationRule(token, honorOptional = True):
+ """
+ Build a validation rule for a specific token
+ """
+ val = None
+ for ruleset in token.paths:
+ v2 = v3 = None
+ startoptional = False
+ for rule in ruleset:
+ if rule == forcible:
+ startoptional = True
+ continue
+ if startoptional:
+ if v3 is None:
+ v3 = rule
+ else:
+ v3 = v3 + rule
+ else:
+ if v2 is None:
+ v2 = rule
+ else:
+ v2 = v2 + rule
+ if v3 is not None:
+ v2 = v2 + v3
+ if val is None:
+ val = v2
+ else:
+ val = val | v2
+ if honorOptional and token.optional:
+ return Opt(val)
+ else:
+ return val
+
+def buildValidationRule(tokens):
+ """
+ Take a list of tokens and combine all their rule paths
+ into a single rule that validates whether a string is
+ "complete" wrt the input mask or not.
+ """
+ val = Bol
+ for token in tokens:
+ val = val + buildSingleValidationRule(token)
+ if not tokens:
+ val = val + Rep1(AnyChar)
+ return val + Eol
+
+
+# =============================================================================
+# Debugging functions
+# =============================================================================
+def printLexiconTree(lexicon, indent = 0):
+ """
+ Function useful for debuging.
+ """
+ for foo in lexicon:
+ if isinstance(foo, State):
+ print (" "*indent) + ("State: %s" % str((foo.name)))
+ printLexiconTree(foo.tokens, indent + 2)
+ elif type(foo) == type(()) and len(foo) == 2:
+ print " "*indent + str(foo[0])
+ else:
+ print " "*indent + str(foo)
Deleted: trunk/gnue-common/src/formatting/masks/MaskParser.py
===================================================================
--- trunk/gnue-common/src/formatting/masks/MaskParser.py 2006-04-12
19:11:58 UTC (rev 8399)
+++ trunk/gnue-common/src/formatting/masks/MaskParser.py 2006-04-13
01:17:51 UTC (rev 8400)
@@ -1,573 +0,0 @@
-#
-# This file is part of GNU Enterprise.
-#
-# GNU Enterprise is free software; you can redistribute it
-# and/or modify it under the terms of the GNU General Public
-# License as published by the Free Software Foundation; either
-# version 2, or (at your option) any later version.
-#
-# GNU Enterprise is distributed in the hope that it will be
-# useful, but WITHOUT ANY WARRANTY; without even the implied
-# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-# PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public
-# License along with program; see the file COPYING. If not,
-# write to the Free Software Foundation, Inc., 59 Temple Place
-# - Suite 330, Boston, MA 02111-1307, USA.
-#
-# Copyright 2001-2006 Free Software Foundation
-#
-# FILE:
-# MaskParser
-#
-# DESCRIPTION:
-"""
-A parser which takes a text based mask and generates a list of
-token class instances that represent that mask. Any repeaters
-found in the mask ({3}) are replaced with the appropriate number
-of tokens so that the mask class will not have to deal with
-them.
-
-Valid tokens include:
-
-"""
-__revision__ = "$Id$"
-
-import string
-import StringIO
-
-from gnue.common.external.plex import \
- Scanner, Lexicon, Errors, \
- Str, Begin, State, AnyChar, Rep1, Any
-
-from gnue.common.formatting.masks.Errors import MaskDefinitionError
-
-# =============================================================================
-# Support token classes
-# =============================================================================
-#
-# The following classes all represent tokens returned by the mask tokenizer
-#
-
-class BaseToken:
- """
- Basic parser token class.
-
- Not used directly, but inherited by the other defined tokens
- Literal, Token, etc.
- """
- numeric = False
- date = False
- text = False
- literal = False
- token = False
-
- def __init__(self, t1, t2=None, *args):
- """
- Token construtor
- """
- if t2:
- self.token = t2
- else:
- self.token = t1
-
-# -----------------------------------------------------------------------------
-# Standard token classes
-# -----------------------------------------------------------------------------
-class Token(BaseToken):
- """
- Class typically used to create normal tokens as
- opposed to special tokens like literal.
-
- It sets the standard options so that each individual
- token class doesn't need to.
- """
- force_lower = False
- force_upper = False
- token = True
-
-class NumberToken(Token):
- """
- Numeric token (#9-,.)
- """
- numeric = True
-
-class DateToken(Token):
- """
- Date token (MDYyHIS:/)
- """
- date = True
-
-class TextToken(Token):
- """
- Text token
-
- A test token represents 1 standard alphanumeric character.
- """
- text = True
-
-class TokenSet(Token):
- """
- Token defined by user with [] notation.
- Can behave like a NumberToken or TextToken,
- depending on contents of [].
- """
- def __init__(self, token, *args):
-
- Token.__init__(self, token, *args)
-
- # Are we all-numeric?
- self.numeric = token.isdigit()
- self.token = token
-
- if not self.numeric:
- self.text = True
-
-# -----------------------------------------------------------------------------
-# Special token classes
-# -----------------------------------------------------------------------------
-class Literal(BaseToken):
- """
- A literal string that the developer wants in the string.
- Note that for our purposes, the basic "literals" aren't
- really Literals(), but special cases of Token classes.
- So all literals represented by this class are denoted
- with \ or "" syntaxes.
- """
- literal = True
-
-
-class RightToLeft(BaseToken):
- """
- Temporary token class used to note the
- position of ! modifiers
- """
- numeric = True
-
-class CaseModifier:
- """
- Temporary token class used to record > and <
- markers. These cause the modified token to have
- either force_upper or force_lower set, so the
- other classes won't ever see CaseModifier
- instances.
- """
- pass
-
-class Repeater:
- """
- Temporary token class used to record {#}
- markers. These are replaced with the actual
- represented tokens before being passed out
- of MaskParser (i.e., 0{3} would be returned
- as 000, so the other classes won't ever see
- Repeater instances.
- """
- def __init__(self, count):
- self.count = count
-
-
-# =============================================================================
-# Input mask parser
-# =============================================================================
-class InputMaskParser(Scanner):
- """
- Custom plex scanner used to contstruct a token list which represents
- an input mask. This token list is used by the input mask to define
- valid input for each position of the input.
-
- Takes a file handle containing an input mask and creates a
- list of Tokens which define the input mask
- """
- def get_type(self):
- """
- Returns the apparent type of this mask.
-
- @rtype: string
- @return: The value 'text', 'numeric', or 'date'
- """
- return type
-
- def get_tokens(self):
- """
- Returns a list of the tokens after parsing the input mask.
-
- @rtype: list
- @return: The list of tokens
- """
- return self.tokens[:]
-
- # =========================================================================
- # Private stuff
- # =========================================================================
-
- # -------------------------------------------------------------------------
- # Lexicon action functions
- # -------------------------------------------------------------------------
- def _check_single(self, text):
- """
- Function to add single instance tokens to the input mask.
-
- A single instance token is something that can appear only once
- in an input mask.
- """
- if text in self.__singles:
- raise Errors.UnrecognizedInput(self, \
- 'Mask can only have one "%s" token' %text)
- self.__singles.append(text)
- if text == '!':
- self.produce (RightToLeft(text))
- elif text in '.+,':
- self.produce(NumberToken(text))
- else:
- self.produce(TextToken(text))
-
- def _literal(self, text):
- """
- A text literal that should appear as is in the mask
- """
- self.produce(Literal(text))
-
- def _literal_2nd(self, text):
- """
- Closes the literal string
- """
- return self._literal(text[1:])
-
- def _escape(self, text):
- """
- An escaped character such as \$ to display a $
- """
- self.begin('')
- self.produce(Literal(text))
-
- def _repeater(self, text):
- """
- Action to process an input mask repeater.
-
- A repeater tells the parser to repeat the previous token a
- specified number of times.
-
- @param text: The value pulled from between the {} which
- denotes the number of times to repeat.
- """
- self.produce(Repeater(int(text)))
-
- def _begin_set(self, text):
- """
- Action to process the start of a set of valid characters.
-
- The scanner will be placed into set state and the list
- of valid characters will be reset.
- """
- self.begin('set')
- self._set = ""
-
- def _add_set(self, text):
- """
- Action to add a character to the set currently being constructed.
-
- Only called when the scanner is in state "set".
-
- The character read will be added to the character sting
- containing the possible valid values.
- """
- self._set += text
-
- def _add_set_2nd(self, text):
- """
- Action to add a special character to a set being built.
-
- Used when an escaped set character \[ or \] is found
- in the list of valid characters to be added to the set
- """
- return self._add_set(text[1:])
-
- def _end_set(self, text):
- """
- Action to process the end of a set.
-
- Only called when the scanner is in state "set".
-
- The list of possible characters that were defined in the set will be
used
- to build an instance of a TokenSet class. As part of this function the
- scanner will set to default state.
- """
- self.begin('')
- self.produce(TokenSet(self._set))
-
- # =========================================================================
- # Lexicon defintions
- # =========================================================================
- #
- # -------------------------------------------------------------------------
- # Base Lexicon definition
- # -------------------------------------------------------------------------
- # This lexicon is the base used by all masks
- #
-
- _lexicon = [
- # ---------------------------------------------------------------------
- # Default state definitions
- # ---------------------------------------------------------------------
- (Str('\\'), Begin('escape')), # found \, set state to escape
- #
- (Str("'"), Begin('quoted')), # found ', set state to quoted
- #
- (Str('"'), Begin('quoted2')), # found ", set state to qoute2
- #
- (Str('{'), Begin('repeater')), # found {, set state to
- # repeater
- #
- (Str('['), _begin_set), # found [, execute _begin_set
- # the function will set state
- # to set when executed
- #
- (Str(' '), Literal), # found a space, return a
- # literal char instance
- #
- (Any('+.,'), _check_single), # these characters can appear
- # only once in an input mask
- #
- (Any('_?AaLlCc'), TextToken), # found a text character
- # return a text token instance
- #
- (Any('MDYyHISPp:/'), DateToken), # found a date character
- # return a date token instance
- #
- (Any('#0'), NumberToken), # found a number character
- # return a number token
- # instance
- #
- (Any('<>'), CaseModifier), # found a case modifier
- # return case modifier
- # instance
-
- # ---------------------------------------------------------------------
- # Escape State
- # ---------------------------------------------------------------------
- # The escape state is entered whenever a backslash is encountered while
- # in the default state. It's purpose is to allow the placement of what
- # would normally be reserved characters into the input mask
- #
- State('escape', [
- (AnyChar, _escape), # No matter which character is next
- # execute _escape, the function will
- # create a literal instance and set
- # the state back to default
- ]),
-
- # ---------------------------------------------------------------------
- # Quoted state
- # ---------------------------------------------------------------------
- # The quoted state is entered whenever a single quote is encountered
- # thile in the default state. It's purpose is to allow quoted strings
- # inside the input mask to sent through as their literal value
- #
- State('quoted', [
- (Str("\\")+Str("'"), _literal_2nd), # Handle \' in the string
- (Str("'"), Begin('')), # found ', set state to default
- (AnyChar, _literal) # Process as literal character
- ]),
-
- # ---------------------------------------------------------------------
- # quote2 state
- # ---------------------------------------------------------------------
- # This works the exact same way as the quoted state but is used
- # when a double quote is encountered. ' and " get seperate states
- # so that one type can always enclose the other
- #
- # Example : "Today's date: "
- #
- State('quoted2', [
- (Str("\\")+Str('"'), _literal_2nd), # Handle \" in the string
- (Str('"'), Begin('')), # found ", set state to default
- (AnyChar, _literal) # Process as literal character
- ]),
-
- # ---------------------------------------------------------------------
- # repeater state
- # ---------------------------------------------------------------------
- # The repeater state is entered whenever a { is encountered
- # while in the default state. This state allows an input
- # mask to include a number inside of {} to cause the previous
- # token to repeat
- #
- # Example : A{5} is the same as AAAAA
- #
- State('repeater', [
- (Str('}'), Begin('')),# found }, set state to
- # default
- (Rep1(Any(string.digits)), _repeater) # grab all digits inside
- # the {} execute _repeater,
- # the function will recreate
- # a repeater instance
- # containing the obtained
- # number
- ]),
-
- # ---------------------------------------------------------------------
- # Set state
- # ---------------------------------------------------------------------
- # The set state is entered whenever a [ is encountered while in the
- # default state. This provides basic regex set support where any
- # character inside the [] is matched.
- #
- # Example : [ABCDEF]
- #
- State('set', [
- (Str("\\")+Any('[]'), _add_set_2nd), #
- (Str(']'), _end_set), #
- (AnyChar, _add_set) #
- ]),
- ]
-
- # -------------------------------------------------------------------------
- # Additional lexicon definitions for input masks
- # -------------------------------------------------------------------------
- _extra_lexicon = [
- (Any('!'), _check_single),
- ]
-
- def __process(self, token):
- """
- Adds a token class instance to this instances list of tokens.
-
- As token instances are generated from the input mask they
- are processed and then added to the scanners working list
- of tokens. Special tokens such as repeater and case modifiers
- are processed during this state.
- """
-
- if isinstance(token, Repeater):
- # If the incoming token is a repeater then replace
- # the repeater with the appropriate number of the
- # previous token.
- for unused in range(0, token.count-1):
- self.__process(self.__last)
-
- elif isinstance(token, CaseModifier):
- # If then incomming token is a case modifier
- # then add the modifier token to the list of
- # modifiers stored in the scanner
- self.__modify.append(token)
- else:
- # Standard tokens
- if self.__modify and isinstance(token, TextToken):
- # If a case modifier is stored and the incoming
- # token is text then force case based upon the
- # modifier
- mod = self.__modify.pop(0)
- if mod.token == '<':
- token.force_upper = True
- elif mod.token == '>':
- token.force_lower = True
-
- self.tokens.append(token)
-
- # TODO: Should this be storing modifiers and the like? It is.
- self.__last = token
-
- def __init__(self, mask_text, name):
- """
- Input mask scanner constructor.
-
- The input mask scanner will create a list of class instances
- that describe the input mask.
-
- @type text: string
- @param text: The text to be used as the mask
- @type name: string
- @param name: The name of the input mask(TODO: ?)
- @type numeric: boolean
- @param numeric: Is this a numeric input mask
- @type date: boolean
- @param date: Is this a numeric input mask
- """
- self.__singles = []
- self.tokens = []
- self.__last = None # The last token generated from the input mask
- self.__modify = []
-
- mask = StringIO.StringIO(mask_text)
-
- # ---------------------------------------------------------------------
- # Read the input mask and convert into instances of Token classes
- # ---------------------------------------------------------------------
- try:
- Scanner.__init__(self,
- Lexicon(self._lexicon + self._extra_lexicon),
- mask, name)
-
- while True:
- token, unused = self.read()
- if token is None:
- break
-
- # Process the returned token
- self.__process(token)
-
- except Errors.PlexError, msg:
- raise MaskDefinitionError, msg
-
- if self.__modify:
- print "WARNING: Modifier found at end of mask."
-
- # ---------------------------------------------------------------------
- # Build a count of the various token types created during parsing
- # ---------------------------------------------------------------------
- #
- num_markers = 0 # Number of numeric token instances found
- date_markers = 0 # Number of date token instances found
- text_markers = 0 # Number of text token instances found
- rtl_pos = -1 # Right to left token
- # TODO: Unknown functionality at this time
-
- for (position, token) in enumerate(self.tokens):
- if isinstance(token, RightToLeft):
- rtl_pos = position
- if not isinstance(token, Literal):
- if token.numeric:
- num_markers += 1
- elif token.date:
- date_markers += 1
- else:
- text_markers += 1
-
- # Check for "!" in non-numeric mask
- if rtl_pos >= 0:
- self.tokens.pop(rtl_pos)
- else:
- rtl_pos = 0
-
- self.rtl_pos = rtl_pos
-
- # ---------------------------------------------------------------------
- # Check for errors and mixed marker types
- # ---------------------------------------------------------------------
- #
- # TODO: I'm not sure we should block mixed input types
- #
- #if not (num_markers or date_markers or text_markers):
- #raise MaskDefinitionError, 'Mask has no character tokens'
-
- #if (num_markers) and (date_markers or text_markers):
- #raise MaskDefinitionError, \
- #'Numeric mask %s has non-numeric tokens' % mask_text
-
- #if (date_markers) and (num_markers or text_markers):
- #raise MaskDefinitionError, 'Date/Time mask has non-date tokens'
-
- # ---------------------------------------------------------------------
- # Set the type of parser based upon the marker counts
- # ---------------------------------------------------------------------
- # If any two of these are non-zero, then the mask is a text mask,
- # not date or numeric.
- #
- if (num_markers and date_markers) or text_markers:
- self.type = 'text'
- elif num_markers:
- self.type = 'numeric'
- else:
- self.type = 'date'
\ No newline at end of file
Copied: trunk/gnue-common/src/formatting/masks/MaskTokenizer.py (from rev 8399,
trunk/gnue-common/src/formatting/masks/MaskParser.py)
===================================================================
--- trunk/gnue-common/src/formatting/masks/MaskParser.py 2006-04-12
19:11:58 UTC (rev 8399)
+++ trunk/gnue-common/src/formatting/masks/MaskTokenizer.py 2006-04-13
01:17:51 UTC (rev 8400)
@@ -0,0 +1,452 @@
+#
+# This file is part of GNU Enterprise.
+#
+# GNU Enterprise is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation; either
+# version 2, or (at your option) any later version.
+#
+# GNU Enterprise is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with program; see the file COPYING. If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place
+# - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Copyright 2001-2006 Free Software Foundation
+#
+# pylint: disable-msg=W0402,
+# W0402 is disabled as the only obsolete module (as of 4/12/06)
+# is the string module which is used for string.digits
+#
+# FILE:
+# MaskParser
+#
+# DESCRIPTION:
+"""
+A parser which takes a text based mask and generates a list of
+token class instances that represent that mask. Any repeaters
+found in the mask ({3}) are replaced with the appropriate number
+of tokens so that the mask class will not have to deal with
+them.
+
+Valid tokens include:
+
+"""
+__revision__ = "$Id$"
+
+import string
+import StringIO
+
+from gnue.common.external.plex import \
+ Scanner, Lexicon, Errors, \
+ Str, Begin, State, AnyChar, Rep1, Any
+from gnue.common.formatting.masks.Errors import MaskDefinitionError
+from gnue.common.formatting.masks import MaskTokens
+
+class MaskTokenizer(Scanner):
+ """
+ Custom plex scanner used to contstruct a token list which represents
+ an input mask. This token list is used by the input mask to define
+ valid input for each position of the input.
+
+ Takes a file handle containing an input mask and creates a
+ list of Tokens which define the input mask
+ """
+ def get_type(self):
+ """
+ Returns the apparent type of this mask.
+
+ @rtype: string
+ @return: The value 'text', 'numeric', or 'date'
+ """
+ return self.type
+
+ def get_tokens(self):
+ """
+ Returns a list of the tokens after parsing the input mask.
+
+ @rtype: list
+ @return: The list of tokens
+ """
+ return self.tokens[:]
+
+ # =========================================================================
+ # Private stuff
+ # =========================================================================
+
+ # -------------------------------------------------------------------------
+ # Lexicon action functions
+ # -------------------------------------------------------------------------
+ def _check_single(self, text):
+ """
+ Function to add single instance tokens to the input mask.
+
+ A single instance token is something that can appear only once
+ in an input mask.
+ """
+ if text in self.__singles:
+ raise Errors.UnrecognizedInput(self, \
+ 'Mask can only have one "%s" token' %text)
+ self.__singles.append(text)
+ if text == '!':
+ self.produce (MaskTokens.RightToLeft(text))
+ elif text in '.+,':
+ self.produce(MaskTokens.NumberToken(text))
+ else:
+ self.produce(MaskTokens.TextToken(text))
+
+ def _literal(self, text):
+ """
+ A text literal that should appear as is in the mask
+ """
+ self.produce(MaskTokens.Literal(text))
+
+ def _literal_2nd(self, text):
+ """
+ Closes the literal string
+ """
+ return self._literal(text[1:])
+
+ def _escape(self, text):
+ """
+ An escaped character such as \$ to display a $
+ """
+ self.begin('')
+ self.produce(MaskTokens.Literal(text))
+
+ def _repeater(self, text):
+ """
+ Action to process an input mask repeater.
+
+ A repeater tells the parser to repeat the previous token a
+ specified number of times.
+
+ @param text: The value pulled from between the {} which
+ denotes the number of times to repeat.
+ """
+ self.produce(MaskTokens.Repeater(int(text)))
+
+ def _begin_set(self, text):
+ """
+ Action to process the start of a set of valid characters.
+
+ The scanner will be placed into set state and the list
+ of valid characters will be reset.
+ """
+ self.begin('set')
+ self._set = ""
+
+ def _add_set(self, text):
+ """
+ Action to add a character to the set currently being constructed.
+
+ Only called when the scanner is in state "set".
+
+ The character read will be added to the character sting
+ containing the possible valid values.
+ """
+ self._set += text
+
+ def _add_set_2nd(self, text):
+ """
+ Action to add a special character to a set being built.
+
+ Used when an escaped set character \[ or \] is found
+ in the list of valid characters to be added to the set
+ """
+ return self._add_set(text[1:])
+
+ def _end_set(self, text):
+ """
+ Action to process the end of a set.
+
+ Only called when the scanner is in state "set".
+
+ The list of possible characters that were defined in the set will be
used
+ to build an instance of a TokenSet class. As part of this function the
+ scanner will set to default state.
+ """
+ self.begin('')
+ self.produce(MaskTokens.TokenSet(self._set))
+
+ # =========================================================================
+ # Lexicon defintions
+ # =========================================================================
+ #
+ # -------------------------------------------------------------------------
+ # Base Lexicon definition
+ # -------------------------------------------------------------------------
+ # This lexicon is the base used by all masks
+ #
+
+ _lexicon = [
+ # ---------------------------------------------------------------------
+ # Default state definitions
+ # ---------------------------------------------------------------------
+ (Str('\\'), Begin('escape')), # found \, set state to escape
+ #
+ (Str("'"), Begin('quoted')), # found ', set state to quoted
+ #
+ (Str('"'), Begin('quoted2')), # found ", set state to qoute2
+ #
+ (Str('{'), Begin('repeater')), # found {, set state to
+ # repeater
+ #
+ (Str('['), _begin_set), # found [, execute _begin_set
+ # the function will set state
+ # to set when executed
+ #
+ (Str(' '), MaskTokens.Literal),# found a space, return a
+ # literal char instance
+ #
+ (Any('+.,'), _check_single), # these characters can appear
+ # only once in an input mask
+ #
+ (Any('_?AaLlCc'), MaskTokens.TextToken), # found a text character
+ # return a text token
+ # instance
+ #
+ (Any('MDYyHISPp:/'), MaskTokens.DateToken), # found a date character
+ # return a date token
+ # instance
+ #
+ (Any('#0'), MaskTokens.NumberToken), # found a number
character
+ # return a number token
+ # instance
+ #
+ (Any('<>'), MaskTokens.CaseModifier), # found a case modifier
+ # return case modifier
+ # instance
+
+ # ---------------------------------------------------------------------
+ # Escape State
+ # ---------------------------------------------------------------------
+ # The escape state is entered whenever a backslash is encountered while
+ # in the default state. It's purpose is to allow the placement of what
+ # would normally be reserved characters into the input mask
+ #
+ State('escape', [
+ (AnyChar, _escape), # No matter which character is next
+ # execute _escape, the function will
+ # create a literal instance and set
+ # the state back to default
+ ]),
+
+ # ---------------------------------------------------------------------
+ # Quoted state
+ # ---------------------------------------------------------------------
+ # The quoted state is entered whenever a single quote is encountered
+ # thile in the default state. It's purpose is to allow quoted strings
+ # inside the input mask to sent through as their literal value
+ #
+ State('quoted', [
+ (Str("\\")+Str("'"), _literal_2nd), # Handle \' in the string
+ (Str("'"), Begin('')), # found ', set state to default
+ (AnyChar, _literal) # Process as literal character
+ ]),
+
+ # ---------------------------------------------------------------------
+ # quote2 state
+ # ---------------------------------------------------------------------
+ # This works the exact same way as the quoted state but is used
+ # when a double quote is encountered. ' and " get seperate states
+ # so that one type can always enclose the other
+ #
+ # Example : "Today's date: "
+ #
+ State('quoted2', [
+ (Str("\\")+Str('"'), _literal_2nd), # Handle \" in the string
+ (Str('"'), Begin('')), # found ", set state to default
+ (AnyChar, _literal) # Process as literal character
+ ]),
+
+ # ---------------------------------------------------------------------
+ # repeater state
+ # ---------------------------------------------------------------------
+ # The repeater state is entered whenever a { is encountered
+ # while in the default state. This state allows an input
+ # mask to include a number inside of {} to cause the previous
+ # token to repeat
+ #
+ # Example : A{5} is the same as AAAAA
+ #
+ State('repeater', [
+ (Str('}'), Begin('')),# found }, set state to
+ # default
+ (Rep1(Any(string.digits)), _repeater) # grab all digits inside
+ # the {} execute _repeater,
+ # the function will recreate
+ # a repeater instance
+ # containing the obtained
+ # number
+ ]),
+
+ # ---------------------------------------------------------------------
+ # Set state
+ # ---------------------------------------------------------------------
+ # The set state is entered whenever a [ is encountered while in the
+ # default state. This provides basic regex set support where any
+ # character inside the [] is matched.
+ #
+ # Example : [ABCDEF]
+ #
+ State('set', [
+ (Str("\\")+Any('[]'), _add_set_2nd), #
+ (Str(']'), _end_set), #
+ (AnyChar, _add_set) #
+ ]),
+ ]
+
+ # -------------------------------------------------------------------------
+ # Additional lexicon definitions for input masks
+ # -------------------------------------------------------------------------
+ _extra_lexicon = [
+ (Any('!'), _check_single),
+ ]
+
+ def __process(self, token):
+ """
+ Adds a token class instance to this instances list of tokens.
+
+ As token instances are generated from the input mask they
+ are processed and then added to the scanners working list
+ of tokens. Special tokens such as repeater and case modifiers
+ are processed during this state.
+ """
+
+ if isinstance(token, MaskTokens.Repeater):
+ # If the incoming token is a repeater then replace
+ # the repeater with the appropriate number of the
+ # previous token.
+ for unused in range(0, token.count-1):
+ self.__process(self.__last)
+
+ elif isinstance(token, MaskTokens.CaseModifier):
+ # If then incomming token is a case modifier
+ # then add the modifier token to the list of
+ # modifiers stored in the scanner
+ self.__modify.append(token)
+ else:
+ # Standard tokens
+ if self.__modify and isinstance(token, MaskTokens.TextToken):
+ # If a case modifier is stored and the incoming
+ # token is text then force case based upon the
+ # modifier
+ mod = self.__modify.pop(0)
+ if mod.token == '<':
+ token.force_upper = True
+ elif mod.token == '>':
+ token.force_lower = True
+
+ self.tokens.append(token)
+
+ # TODO: Should this be storing modifiers and the like? It is.
+ self.__last = token
+
+ def __init__(self, mask_text, name):
+ """
+ Input mask scanner constructor.
+
+ The input mask scanner will create a list of class instances
+ that describe the input mask.
+
+ @type text: string
+ @param text: The text to be used as the mask
+ @type name: string
+ @param name: The name of the input mask(TODO: ?)
+ @type numeric: boolean
+ @param numeric: Is this a numeric input mask
+ @type date: boolean
+ @param date: Is this a numeric input mask
+ """
+ self._set = ""
+ self.__singles = []
+ self.tokens = []
+ self.__last = None # The last token generated from the input mask
+ self.__modify = []
+
+ mask = StringIO.StringIO(mask_text)
+
+ # ---------------------------------------------------------------------
+ # Read the input mask and convert into instances of Token classes
+ # ---------------------------------------------------------------------
+ try:
+ Scanner.__init__(self,
+ Lexicon(self._lexicon + self._extra_lexicon),
+ mask, name)
+
+ while True:
+ token, unused = self.read()
+ if token is None:
+ break
+
+ # Process the returned token
+ self.__process(token)
+
+ except Errors.PlexError, msg:
+ raise MaskDefinitionError, msg
+
+ if self.__modify:
+ print "WARNING: Modifier found at end of mask."
+
+ # ---------------------------------------------------------------------
+ # Build a count of the various token types created during parsing
+ # ---------------------------------------------------------------------
+ #
+ num_markers = 0 # Number of numeric token instances found
+ date_markers = 0 # Number of date token instances found
+ text_markers = 0 # Number of text token instances found
+ rtl_pos = -1 # Right to left token
+ # TODO: Unknown functionality at this time
+
+ for (position, token) in enumerate(self.tokens):
+ if isinstance(token, MaskTokens.RightToLeft):
+ rtl_pos = position
+ if not isinstance(token, MaskTokens.Literal):
+ if token.numeric:
+ num_markers += 1
+ elif token.date:
+ date_markers += 1
+ else:
+ text_markers += 1
+
+ # Check for "!" in non-numeric mask
+ if rtl_pos >= 0:
+ self.tokens.pop(rtl_pos)
+ else:
+ rtl_pos = 0
+
+ self.rtl_pos = rtl_pos
+
+ # ---------------------------------------------------------------------
+ # Check for errors and mixed marker types
+ # ---------------------------------------------------------------------
+ #
+ # TODO: I'm not sure we should block mixed input types
+ #
+ #if not (num_markers or date_markers or text_markers):
+ #raise MaskDefinitionError, 'Mask has no character tokens'
+
+ #if (num_markers) and (date_markers or text_markers):
+ #raise MaskDefinitionError, \
+ #'Numeric mask %s has non-numeric tokens' % mask_text
+
+ #if (date_markers) and (num_markers or text_markers):
+ #raise MaskDefinitionError, 'Date/Time mask has non-date tokens'
+
+ # ---------------------------------------------------------------------
+ # Set the type of parser based upon the marker counts
+ # ---------------------------------------------------------------------
+ # If any two of these are non-zero, then the mask is a text mask,
+ # not date or numeric.
+ #
+ if (num_markers and date_markers) or text_markers:
+ self.type = 'text'
+ elif num_markers:
+ self.type = 'numeric'
+ else:
+ self.type = 'date'
\ No newline at end of file
Added: trunk/gnue-common/src/formatting/masks/MaskTokens.py
===================================================================
--- trunk/gnue-common/src/formatting/masks/MaskTokens.py 2006-04-12
19:11:58 UTC (rev 8399)
+++ trunk/gnue-common/src/formatting/masks/MaskTokens.py 2006-04-13
01:17:51 UTC (rev 8400)
@@ -0,0 +1,151 @@
+#
+# This file is part of GNU Enterprise.
+#
+# GNU Enterprise is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation; either
+# version 2, or (at your option) any later version.
+#
+# GNU Enterprise is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with program; see the file COPYING. If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place
+# - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Copyright 2001-2006 Free Software Foundation
+#
+# pylint: disable-msg=R0903,
+# R0903 disabled as these classes represent placeholders and
+# as such don't have public methods
+#
+# FILE:
+# MaskTokens.py
+#
+# DESCRIPTION:
+"""
+Tokens returned by the mask tokenizer.
+
+These tokens are used by an input mask to create a set of InputTokens.
+"""
+__revision__ = "$Id$"
+
+class BaseToken:
+ """
+ Basic parser token class.
+
+ Not used directly, but inherited by the other defined tokens
+ Literal, Token, etc.
+ """
+ numeric = False
+ date = False
+ text = False
+ literal = False
+ token = False
+
+ def __init__(self, base_token, override_token=None): # , *args):
+ """
+ Token construtor
+ """
+ if override_token is not None:
+ self.token = override_token
+ else:
+ self.token = base_token
+
+# -----------------------------------------------------------------------------
+# Standard token classes
+# -----------------------------------------------------------------------------
+class Token(BaseToken):
+ """
+ Class typically used to create normal tokens as
+ opposed to special tokens like literal.
+
+ It sets the standard options so that each individual
+ token class doesn't need to.
+ """
+ force_lower = False
+ force_upper = False
+ token = True
+
+class NumberToken(Token):
+ """
+ Numeric token (#9-,.)
+ """
+ numeric = True
+
+class DateToken(Token):
+ """
+ Date token (MDYyHIS:/)
+ """
+ date = True
+
+class TextToken(Token):
+ """
+ Text token
+
+ A test token represents 1 standard alphanumeric character.
+ """
+ text = True
+
+class TokenSet(Token):
+ """
+ Token defined by user with [] notation.
+ Can behave like a NumberToken or TextToken,
+ depending on contents of [].
+ """
+ def __init__(self, token): # , *args):
+
+ Token.__init__(self, token) #, *args)
+
+ # Are we all-numeric?
+ self.numeric = token.isdigit()
+ self.token = token
+
+ if not self.numeric:
+ self.text = True
+
+# -----------------------------------------------------------------------------
+# Special token classes
+# -----------------------------------------------------------------------------
+class Literal(BaseToken):
+ """
+ A literal string that the developer wants in the string.
+ Note that for our purposes, the basic "literals" aren't
+ really Literals(), but special cases of Token classes.
+ So all literals represented by this class are denoted
+ with \ or "" syntaxes.
+ """
+ literal = True
+
+class RightToLeft(BaseToken):
+ """
+ Temporary token class used to note the
+ position of ! modifiers
+ """
+ numeric = True
+
+class CaseModifier:
+ """
+ Temporary token class used to record > and <
+ markers. These cause the modified token to have
+ either force_upper or force_lower set, so the
+ other classes won't ever see CaseModifier
+ instances.
+ """
+ def __init__(self):
+ pass
+
+class Repeater:
+ """
+ Temporary token class used to record {#}
+ markers. These are replaced with the actual
+ represented tokens before being passed out
+ of MaskParser (i.e., 0{3} would be returned
+ as 000, so the other classes won't ever see
+ Repeater instances.
+ """
+ def __init__(self, count):
+ self.count = count
\ No newline at end of file
Deleted: trunk/gnue-common/src/formatting/masks/Tokens.py
===================================================================
--- trunk/gnue-common/src/formatting/masks/Tokens.py 2006-04-12 19:11:58 UTC
(rev 8399)
+++ trunk/gnue-common/src/formatting/masks/Tokens.py 2006-04-13 01:17:51 UTC
(rev 8400)
@@ -1,465 +0,0 @@
-#
-# This file is part of GNU Enterprise.
-#
-# GNU Enterprise is free software; you can redistribute it
-# and/or modify it under the terms of the GNU General Public
-# License as published by the Free Software Foundation; either
-# version 2, or (at your option) any later version.
-#
-# GNU Enterprise is distributed in the hope that it will be
-# useful, but WITHOUT ANY WARRANTY; without even the implied
-# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-# PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public
-# License along with program; see the file COPYING. If not,
-# write to the Free Software Foundation, Inc., 59 Temple Place
-# - Suite 330, Boston, MA 02111-1307, USA.
-#
-# Copyright 2001-2006 Free Software Foundation
-#
-# FILE:
-# Tokens.py
-#
-# DESCRIPTION:
-"""
-Tokens used to define the components that make up an input mask.
-These tokens are used to define the final lexicon used by the
-mask.
-"""
-# NOTES:
-#
-
-from gnue.common.external.plex import *
-
-import string
-import locale
-
-FORCE_UPPER = True
-FORCE_LOWER = False
-
-digit = Any(string.digits)
-letter = Any(string.letters)
-
-# =============================================================================
-# Base tokens
-#
-# These are inherited be other tokens that are actually
-# used in the input mask. Instances of these classes should not
-# be used directly.
-# =============================================================================
-class Tok:
- """
- Base token containing all the flags and values that an
- input token may require.
- """
-
- # True if this character is optional
- optional = False
-
- # If set, the character to auto-fill the string with
- autochar = None
-
- # If set, the mask char to fill the display with
- # (Note: autochar takes precedence; default is _)
- maskchar = None
-
- # A list of partial grammar rules to
- # build our character-at-a-time parser
- # This list should itself contain lists
- paths = []
-
- # Number of characters this space takes up
- maxchars = 1
-
- # Left pad with zeros
- # (only makes sense if maxchars > 1)
- zero_pad = False
-
- # As implied...
- force_upper = False
- force_lower = False
-
- def __repr__(self):
- return "%s(%s)" % (self.__class__.__name__, self.symbol)
-
- def getProperDisplay(self, text):
- if self.zero_pad and self.maxchars == 2 and len(text) == 1:
- return "0" + text
- else:
- return text
-
-class TextTok(Tok):
- """
- Base text token
- """
-
-class DateTok(Tok):
- """
- Base date token
- """
-
-class NumTok(Tok):
- """
- Base numeric token
- """
-
-class LitTok(Tok):
- """
- Base literal token
- """
- optional = True
- def __repr__(self):
- """
- Force the class to return a custom string representation of itself.
- Not sure why yet.
- """
- return "%s(%s)" % (self.__class__.__name__, self.autochar)
-
-
-class forcible:
- """
- This is a placeholder for the paths=[]
- that denotes when a mask can be considered complete
- *if* a forced marker is provided.
- """
-
-# =============================================================================
-# Base tokens
-# =============================================================================
-class tChar(TextTok):
- """
- Any character, required
- """
- symbol='_'
- paths=[[Any(string.letters+string.digits+' '+string.punctuation)]]
-
-class tCharOpt(tChar):
- """
- Any character, optional
- """
- symbol='?'
- optional=True
-
-class tA(TextTok):
- """
- Any alphanumeric, required
- """
- symbol='A'
- paths=[[Any(string.letters+string.digits)]]
-
-class ta(tA):
- """
- Any alphanumeric, optional
- """
- symbol='a'
- optional=True
-
-class tL(TextTok):
- """
- Any letter, required
- """
- symbol='L'
- paths=[[letter]]
-
-class tl(tL):
- """
- Any letter, optional
- """
- symbol='l'
- optional=True
-
-class tC(TextTok):
- """
- Any character (alphanum) or space, required
- """
- symbol='C'
- paths=[[Any(string.letters+string.digits+' ')]]
-
-class tc(tC):
- """
- Any character (alphanum) or space, optional
- """
- symbol='c'
- optional=True
-
-class tsign(NumTok):
- """
- Positive or negative sign (one per mask) (literal)
- """
- symbol='-'
- optional=True
- paths=[[Any('+-')]]
-
-class tDigit(NumTok):
- """
- Any digit, required
- """
- symbol='0'
- paths=[[digit]]
-# optional=True # For input masks, this is largely true?
-
-class tDigitOpt(tDigit):
- """
- Any digit, optional
- """
- symbol='#'
- optional=True
-
-class tM(DateTok):
- """
- Month, with zero padding
- """
- symbol='M'
- maxchars = 2
- zero_pad = True
- paths=[ [ Str('1'), forcible, Any('012') ], # months 1, 10 - 12
- [ Str('0'), Range('19') ], # months 01 - 09
- [ Range('29') ] ] # months 2 - 9
-
-class tm(tM):
- """
- Month, no zero padding
- """
- symbol = 'm'
-
-class tD(DateTok):
- """
- Day
- """
- symbol='D'
- zero_pad = True
- maxchars = 2
- paths=[ [ Str('3'), forcible, Any('01') ], # days 3, 30 - 31
- [ Any('12'), forcible, digit ], # days 1,2,10 - 29
- [ Str('0'), Range('19')], # days 01 - 09
- [ Range('49') ] ] # days 4 - 9
-
-class td(tD):
- """
- Day, no zero padding
- """
- symbol = 'd'
-
-class tY(DateTok):
- """
- Year - 4 digits
- """
- symbol='Y'
- maxchars = 4
- paths = [ [ digit ]*4 ]
-
-class ty(DateTok):
- """
- Year - 2 digits
- """
- symbol='y'
- maxchars = 2
- paths = [ [ digit ]*2 ]
-
-class tH(DateTok):
- """
- Hour
- """
- symbol='H'
- maxchars = 2
- paths = [ [ Str('2'),forcible, Any('0123') ], # Hour 2, 20-23
- [ Any('01'), forcible, digit ], # Hour 00 - 19
- [ Range('39') ] ] # Hour 3 - 9
-
-class tI(DateTok):
- """
- Minute
- """
- symbol='I'
- maxchars = 2
- paths = [ [Any('012345'), digit ] ]
-
-class tS(DateTok):
- """
- Seconds
- """
- symbol='S'
- maxchars = 2
- paths = [ [ Any('012345'), digit ] ]
-
-class tP(DateTok):
- """
- PM AM token
- """
- symbol='P'
- maxchars = 2
- paths = [ [ NoCase(Str('p','a')), NoCase(Str('m')) ] ]
- force_upper = True
-
-class tp(tP):
- """
- pm am token
- """
- symbol='p'
- maxchars = 2
- force_lower = True
-
-class tLiteral(LitTok):
- def __init__(self, char):
- path = []
-
- for ch in char:
- path.append(Str(ch))
- if len(char) == 1:
- self.symbol = "\\%s" % char
- else:
- self.symbol = '"' + char.replace('\\','\\\\').replace('"','\\"') + '"'
- self.paths = [path]
- self.autochar = char
-
-class tDecSep(LitTok):
- """
- Decimal separator
- """
- symbol='.'
- autochar=locale.localeconv()['decimal_point'] or '.'
- paths = [[Str(autochar)]]
-
-class tThouSep(LitTok):
- """
- Thousands separator
- """
- symbol=','
- autochar=locale.localeconv()['thousands_sep'] or ','
- paths = [[Str(autochar)]]
-
-class tTimeSep(LitTok):
- """
- Time Separator
- """
- symbol=':'
- autochar=':' # TODO: *Where* is this in locale?!?!?
- paths=[[Str(autochar)]]
-
-
-class tDateSep(LitTok):
- """
- Date Separator
- """
- symbol='/'
- autochar='/' # TODO: *Where* is this in locale?!?!?
- paths=[[Str(autochar)]]
-
-class tCustom(TextTok):
- """
- Custom text token
- (Passed in a set of valid characters)
- """
- def __init__(self, chars):
- self.paths = [[Any(chars)]]
-
-class tCustomNum(NumTok):
- """
- Custom numeric token
- (Passed in a set of valid digits)
- """
- def __init__(self, chars):
- self.paths = [[Any(chars)]]
- self.symbol = '[%s]' %
chars.replace('\\','\\\\').replace(']','\\]').replace('-','\\-')
-
-# ---------------------------------------------------------------------------
-# Map of tokens to classes
-# ---------------------------------------------------------------------------
-tokenMap = {
- # Input/output tokens
- '_': tChar, # Any character, required
- '?': tCharOpt, # Any character, optional
- 'A': tA, # Any alphanumeric, required
- 'a': ta, # Any alphanumeric, optional
- 'L': tL, # Any letter, required
- 'l': tl, # Any letter, optional
- 'C': tC, # Any character (alphanum) or space, required
- 'c': tc, # Any character (alphanum) or space, optional
- '+': tsign, # Positive or negative sign (one per mask)
- '0': tDigit, # Any digit, required
- '#': tDigitOpt, # Any digit, optional
- 'M': tM, # Month, zero padding
- 'D': tD, # Day, zero padding
- 'Y': tY, # Year - 4 digits
- 'y': ty, # Year - 2 digits
- 'H': tH, # Hour
- 'I': tI, # Minute
- 'S': tS, # Seconds
- 'P': tP, # PM AM token
- 'p': tp, # pm am token
- '.': tDecSep, # Decimal separator
- ',': tThouSep, # Thousands separator
- ':': tTimeSep, # Time Separator
- '/': tDateSep, # Date Separator
- # Output-only
- 'm': tm, # Month, no zero padding
- 'd': td, # Day, no zero padding
-}
-
-# =============================================================================
-# Module level functions
-# =============================================================================
-def buildSingleValidationRule(token, honorOptional=True):
- """
- Build a validation rule for a specific token
- """
- val = None
- for ruleset in token.paths:
- v2 = v3 = None
- startoptional = False
- for rule in ruleset:
- if rule == forcible:
- startoptional = True
- continue
- if startoptional:
- if v3 is None:
- v3 = rule
- else:
- v3 = v3 + rule
- else:
- if v2 is None:
- v2 = rule
- else:
- v2 = v2 + rule
- if v3 is not None:
- v2 = v2 + v3
- if val is None:
- val = v2
- else:
- val = val | v2
- if honorOptional and token.optional:
- return Opt(val)
- else:
- return val
-
-def buildValidationRule(tokens):
- """
- Take a list of tokens and combine all their rule paths
- into a single rule that validates whether a string is
- "complete" wrt the input mask or not.
- """
- val = Bol
- for token in tokens:
- val = val + buildSingleValidationRule(token)
- if not tokens:
- val = val + Rep1(AnyChar)
- return val + Eol
-
-
-# =============================================================================
-# Debugging functions
-# =============================================================================
-def printLexiconTree(lexicon, indent=0):
- """
- Function useful for debuging.
- """
- for foo in lexicon:
- if isinstance(foo, State):
- print (" "*indent) + ("State: %s" % str((foo.name)))
- printLexiconTree(foo.tokens,indent+2)
- elif type(foo) == type(()) and len(foo) == 2:
- print " "*indent + str(foo[0])
- else:
- print " "*indent + str(foo)
-
Modified: trunk/gnue-common/src/formatting/masks/tests/mask_tokenizer.py
===================================================================
--- trunk/gnue-common/src/formatting/masks/tests/mask_tokenizer.py
2006-04-12 19:11:58 UTC (rev 8399)
+++ trunk/gnue-common/src/formatting/masks/tests/mask_tokenizer.py
2006-04-13 01:17:51 UTC (rev 8400)
@@ -3,8 +3,10 @@
locale.setlocale(locale.LC_ALL,'')
-from gnue.common.formatting.masks.MaskParser \
- import InputMaskParser, Literal, TextToken, NumberToken, DateToken, TokenSet
+from gnue.common.formatting.masks.MaskTokenizer \
+ import MaskTokenizer
+from gnue.common.formatting.masks.MaskTokens \
+ import Literal, TextToken, NumberToken, DateToken, TokenSet
class TextTestCase(unittest.TestCase):
@@ -31,7 +33,7 @@
for test in testInputs:
maskText, result = test
- mask = InputMaskParser(maskText, 'bogus')
+ mask = MaskTokenizer(maskText, 'bogus')
self._verifyTokens(maskText, mask.tokens, result)
def testDateMasks(self):
@@ -45,7 +47,7 @@
for test in testInputs:
maskText, result = test
- mask = InputMaskParser(maskText, 'bogus')
+ mask = MaskTokenizer(maskText, 'bogus')
self._verifyTokens(maskText, mask.tokens, result)
def testNumericMasks(self):
@@ -60,7 +62,7 @@
for test in testInputs:
maskText, result = test
- mask = InputMaskParser(maskText, 'bogus')
+ mask = MaskTokenizer(maskText, 'bogus')
self._verifyTokens(maskText, mask.tokens, result)
def suite():
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [gnue] r8400 - in trunk/gnue-common/src/formatting/masks: . tests,
jamest <=