Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.4" 
  61  __versionTime__ = "13 May 2016 18:25 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75  import traceback 
  76   
  77  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  78   
  79  __all__ = [ 
  80  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  81  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  82  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  83  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  84  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  85  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  86  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  87  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  88  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  89  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  90  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
  91  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  92  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  93  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  94  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  95  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  96  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  97  'pyparsing_common', 
  98  ] 
  99   
 100  system_version = tuple(sys.version_info)[:3] 
 101  PY_3 = system_version[0] == 3 
 102  if PY_3: 
 103      _MAX_INT = sys.maxsize 
 104      basestring = str 
 105      unichr = chr 
 106      _ustr = str 
 107   
 108      # build list of single arg builtins, that can be used as parse actions 
 109      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 110   
 111  else: 
 112      _MAX_INT = sys.maxint 
 113      range = xrange 
114 115 - def _ustr(obj):
116 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 117 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 118 then < returns the unicode object | encodes it with the default encoding | ... >. 119 """ 120 if isinstance(obj,unicode): 121 return obj 122 123 try: 124 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 125 # it won't break any existing code. 126 return str(obj) 127 128 except UnicodeEncodeError: 129 # Else encode it 130 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 131 xmlcharref = Regex('&#\d+;') 132 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 133 return xmlcharref.transformString(ret)
134 135 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 136 singleArgBuiltins = [] 137 import __builtin__ 138 for fname in "sum len sorted reversed list tuple set any all min max".split(): 139 try: 140 singleArgBuiltins.append(getattr(__builtin__,fname)) 141 except AttributeError: 142 continue 143 144 _generatorType = type((y for y in range(1)))
145 146 -def _xml_escape(data):
147 """Escape &, <, >, ", ', etc. in a string of data.""" 148 149 # ampersand must be replaced first 150 from_symbols = '&><"\'' 151 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 152 for from_,to_ in zip(from_symbols, to_symbols): 153 data = data.replace(from_, to_) 154 return data
155
156 -class _Constants(object):
157 pass
158 159 alphas = string.ascii_uppercase + string.ascii_lowercase 160 nums = "0123456789" 161 hexnums = nums + "ABCDEFabcdef" 162 alphanums = alphas + nums 163 _bslash = chr(92) 164 printables = "".join(c for c in string.printable if c not in string.whitespace)
165 166 -class ParseBaseException(Exception):
167 """base exception class for all parsing runtime exceptions""" 168 # Performance tuning: we construct a *lot* of these, so keep this 169 # constructor as small and fast as possible
170 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
171 self.loc = loc 172 if msg is None: 173 self.msg = pstr 174 self.pstr = "" 175 else: 176 self.msg = msg 177 self.pstr = pstr 178 self.parserElement = elem
179
180 - def __getattr__( self, aname ):
181 """supported attributes by name are: 182 - lineno - returns the line number of the exception text 183 - col - returns the column number of the exception text 184 - line - returns the line containing the exception text 185 """ 186 if( aname == "lineno" ): 187 return lineno( self.loc, self.pstr ) 188 elif( aname in ("col", "column") ): 189 return col( self.loc, self.pstr ) 190 elif( aname == "line" ): 191 return line( self.loc, self.pstr ) 192 else: 193 raise AttributeError(aname)
194
195 - def __str__( self ):
196 return "%s (at char %d), (line:%d, col:%d)" % \ 197 ( self.msg, self.loc, self.lineno, self.column )
198 - def __repr__( self ):
199 return _ustr(self)
200 - def markInputline( self, markerString = ">!<" ):
201 """Extracts the exception line from the input string, and marks 202 the location of the exception with a special symbol. 203 """ 204 line_str = self.line 205 line_column = self.column - 1 206 if markerString: 207 line_str = "".join((line_str[:line_column], 208 markerString, line_str[line_column:])) 209 return line_str.strip()
210 - def __dir__(self):
211 return "lineno col line".split() + dir(type(self))
212
213 -class ParseException(ParseBaseException):
214 """exception thrown when parse expressions don't match class; 215 supported attributes by name are: 216 - lineno - returns the line number of the exception text 217 - col - returns the column number of the exception text 218 - line - returns the line containing the exception text 219 """ 220 pass
221
222 -class ParseFatalException(ParseBaseException):
223 """user-throwable exception thrown when inconsistent parse content 224 is found; stops all parsing immediately""" 225 pass
226
227 -class ParseSyntaxException(ParseFatalException):
228 """just like C{L{ParseFatalException}}, but thrown internally when an 229 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 230 an unbacktrackable syntax error has been found"""
231 - def __init__(self, pe):
232 super(ParseSyntaxException, self).__init__( 233 pe.pstr, pe.loc, pe.msg, pe.parserElement)
234
235 #~ class ReparseException(ParseBaseException): 236 #~ """Experimental class - parse actions can raise this exception to cause 237 #~ pyparsing to reparse the input string: 238 #~ - with a modified input string, and/or 239 #~ - with a modified start location 240 #~ Set the values of the ReparseException in the constructor, and raise the 241 #~ exception in a parse action to cause pyparsing to use the new string/location. 242 #~ Setting the values as None causes no change to be made. 243 #~ """ 244 #~ def __init_( self, newstring, restartLoc ): 245 #~ self.newParseText = newstring 246 #~ self.reparseLoc = restartLoc 247 248 -class RecursiveGrammarException(Exception):
249 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
250 - def __init__( self, parseElementList ):
251 self.parseElementTrace = parseElementList
252
253 - def __str__( self ):
254 return "RecursiveGrammarException: %s" % self.parseElementTrace
255
256 -class _ParseResultsWithOffset(object):
257 - def __init__(self,p1,p2):
258 self.tup = (p1,p2)
259 - def __getitem__(self,i):
260 return self.tup[i]
261 - def __repr__(self):
262 return repr(self.tup)
263 - def setOffset(self,i):
264 self.tup = (self.tup[0],i)
265
266 -class ParseResults(object):
267 """Structured parse results, to provide multiple means of access to the parsed data: 268 - as a list (C{len(results)}) 269 - by list index (C{results[0], results[1]}, etc.) 270 - by attribute (C{results.<resultsName>}) 271 """
272 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
273 if isinstance(toklist, cls): 274 return toklist 275 retobj = object.__new__(cls) 276 retobj.__doinit = True 277 return retobj
278 279 # Performance tuning: we construct a *lot* of these, so keep this 280 # constructor as small and fast as possible
281 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
282 if self.__doinit: 283 self.__doinit = False 284 self.__name = None 285 self.__parent = None 286 self.__accumNames = {} 287 self.__asList = asList 288 self.__modal = modal 289 if toklist is None: 290 toklist = [] 291 if isinstance(toklist, list): 292 self.__toklist = toklist[:] 293 elif isinstance(toklist, _generatorType): 294 self.__toklist = list(toklist) 295 else: 296 self.__toklist = [toklist] 297 self.__tokdict = dict() 298 299 if name is not None and name: 300 if not modal: 301 self.__accumNames[name] = 0 302 if isinstance(name,int): 303 name = _ustr(name) # will always return a str, but use _ustr for consistency 304 self.__name = name 305 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 306 if isinstance(toklist,basestring): 307 toklist = [ toklist ] 308 if asList: 309 if isinstance(toklist,ParseResults): 310 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 311 else: 312 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 313 self[name].__name = name 314 else: 315 try: 316 self[name] = toklist[0] 317 except (KeyError,TypeError,IndexError): 318 self[name] = toklist
319
320 - def __getitem__( self, i ):
321 if isinstance( i, (int,slice) ): 322 return self.__toklist[i] 323 else: 324 if i not in self.__accumNames: 325 return self.__tokdict[i][-1][0] 326 else: 327 return ParseResults([ v[0] for v in self.__tokdict[i] ])
328
329 - def __setitem__( self, k, v, isinstance=isinstance ):
330 if isinstance(v,_ParseResultsWithOffset): 331 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 332 sub = v[0] 333 elif isinstance(k,(int,slice)): 334 self.__toklist[k] = v 335 sub = v 336 else: 337 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 338 sub = v 339 if isinstance(sub,ParseResults): 340 sub.__parent = wkref(self)
341
342 - def __delitem__( self, i ):
343 if isinstance(i,(int,slice)): 344 mylen = len( self.__toklist ) 345 del self.__toklist[i] 346 347 # convert int to slice 348 if isinstance(i, int): 349 if i < 0: 350 i += mylen 351 i = slice(i, i+1) 352 # get removed indices 353 removed = list(range(*i.indices(mylen))) 354 removed.reverse() 355 # fixup indices in token dictionary 356 #~ for name in self.__tokdict: 357 #~ occurrences = self.__tokdict[name] 358 #~ for j in removed: 359 #~ for k, (value, position) in enumerate(occurrences): 360 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 361 for name,occurrences in self.__tokdict.items(): 362 for j in removed: 363 for k, (value, position) in enumerate(occurrences): 364 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 365 else: 366 del self.__tokdict[i]
367
368 - def __contains__( self, k ):
369 return k in self.__tokdict
370
371 - def __len__( self ): return len( self.__toklist )
372 - def __bool__(self): return ( not not self.__toklist )
373 __nonzero__ = __bool__
374 - def __iter__( self ): return iter( self.__toklist )
375 - def __reversed__( self ): return iter( self.__toklist[::-1] )
376 - def iterkeys( self ):
377 """Returns all named result keys.""" 378 if hasattr(self.__tokdict, "iterkeys"): 379 return self.__tokdict.iterkeys() 380 else: 381 return iter(self.__tokdict)
382
383 - def itervalues( self ):
384 """Returns all named result values.""" 385 return (self[k] for k in self.iterkeys())
386
387 - def iteritems( self ):
388 return ((k, self[k]) for k in self.iterkeys())
389 390 if PY_3: 391 keys = iterkeys 392 values = itervalues 393 items = iteritems 394 else:
395 - def keys( self ):
396 """Returns all named result keys.""" 397 return list(self.iterkeys())
398
399 - def values( self ):
400 """Returns all named result values.""" 401 return list(self.itervalues())
402
403 - def items( self ):
404 """Returns all named result keys and values as a list of tuples.""" 405 return list(self.iteritems())
406
407 - def haskeys( self ):
408 """Since keys() returns an iterator, this method is helpful in bypassing 409 code that looks for the existence of any defined results names.""" 410 return bool(self.__tokdict)
411
412 - def pop( self, *args, **kwargs):
413 """Removes and returns item at specified index (default=last). 414 Supports both list and dict semantics for pop(). If passed no 415 argument or an integer argument, it will use list semantics 416 and pop tokens from the list of parsed tokens. If passed a 417 non-integer argument (most likely a string), it will use dict 418 semantics and pop the corresponding value from any defined 419 results names. A second default return value argument is 420 supported, just as in dict.pop().""" 421 if not args: 422 args = [-1] 423 for k,v in kwargs.items(): 424 if k == 'default': 425 args = (args[0], v) 426 else: 427 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 428 if (isinstance(args[0], int) or 429 len(args) == 1 or 430 args[0] in self): 431 index = args[0] 432 ret = self[index] 433 del self[index] 434 return ret 435 else: 436 defaultvalue = args[1] 437 return defaultvalue
438
439 - def get(self, key, defaultValue=None):
440 """Returns named result matching the given key, or if there is no 441 such name, then returns the given C{defaultValue} or C{None} if no 442 C{defaultValue} is specified.""" 443 if key in self: 444 return self[key] 445 else: 446 return defaultValue
447
448 - def insert( self, index, insStr ):
449 """Inserts new element at location index in the list of parsed tokens.""" 450 self.__toklist.insert(index, insStr) 451 # fixup indices in token dictionary 452 #~ for name in self.__tokdict: 453 #~ occurrences = self.__tokdict[name] 454 #~ for k, (value, position) in enumerate(occurrences): 455 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 456 for name,occurrences in self.__tokdict.items(): 457 for k, (value, position) in enumerate(occurrences): 458 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
459
460 - def append( self, item ):
461 """Add single element to end of ParseResults list of elements.""" 462 self.__toklist.append(item)
463
464 - def extend( self, itemseq ):
465 """Add sequence of elements to end of ParseResults list of elements.""" 466 if isinstance(itemseq, ParseResults): 467 self += itemseq 468 else: 469 self.__toklist.extend(itemseq)
470
471 - def clear( self ):
472 """Clear all elements and results names.""" 473 del self.__toklist[:] 474 self.__tokdict.clear()
475
476 - def __getattr__( self, name ):
477 try: 478 return self[name] 479 except KeyError: 480 return "" 481 482 if name in self.__tokdict: 483 if name not in self.__accumNames: 484 return self.__tokdict[name][-1][0] 485 else: 486 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 487 else: 488 return ""
489
490 - def __add__( self, other ):
491 ret = self.copy() 492 ret += other 493 return ret
494
495 - def __iadd__( self, other ):
496 if other.__tokdict: 497 offset = len(self.__toklist) 498 addoffset = lambda a: offset if a<0 else a+offset 499 otheritems = other.__tokdict.items() 500 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 501 for (k,vlist) in otheritems for v in vlist] 502 for k,v in otherdictitems: 503 self[k] = v 504 if isinstance(v[0],ParseResults): 505 v[0].__parent = wkref(self) 506 507 self.__toklist += other.__toklist 508 self.__accumNames.update( other.__accumNames ) 509 return self
510
511 - def __radd__(self, other):
512 if isinstance(other,int) and other == 0: 513 # useful for merging many ParseResults using sum() builtin 514 return self.copy() 515 else: 516 # this may raise a TypeError - so be it 517 return other + self
518
519 - def __repr__( self ):
520 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
521
522 - def __str__( self ):
523 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
524
525 - def _asStringList( self, sep='' ):
526 out = [] 527 for item in self.__toklist: 528 if out and sep: 529 out.append(sep) 530 if isinstance( item, ParseResults ): 531 out += item._asStringList() 532 else: 533 out.append( _ustr(item) ) 534 return out
535
536 - def asList( self ):
537 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 538 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
539
540 - def asDict( self ):
541 """Returns the named parse results as a nested dictionary.""" 542 if PY_3: 543 item_fn = self.items 544 else: 545 item_fn = self.iteritems 546 547 def toItem(obj): 548 if isinstance(obj, ParseResults): 549 if obj.haskeys(): 550 return obj.asDict() 551 else: 552 return [toItem(v) for v in obj] 553 else: 554 return obj
555 556 return dict((k,toItem(v)) for k,v in item_fn())
557
558 - def copy( self ):
559 """Returns a new copy of a C{ParseResults} object.""" 560 ret = ParseResults( self.__toklist ) 561 ret.__tokdict = self.__tokdict.copy() 562 ret.__parent = self.__parent 563 ret.__accumNames.update( self.__accumNames ) 564 ret.__name = self.__name 565 return ret
566
567 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
568 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 569 nl = "\n" 570 out = [] 571 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 572 for v in vlist) 573 nextLevelIndent = indent + " " 574 575 # collapse out indents if formatting is not desired 576 if not formatted: 577 indent = "" 578 nextLevelIndent = "" 579 nl = "" 580 581 selfTag = None 582 if doctag is not None: 583 selfTag = doctag 584 else: 585 if self.__name: 586 selfTag = self.__name 587 588 if not selfTag: 589 if namedItemsOnly: 590 return "" 591 else: 592 selfTag = "ITEM" 593 594 out += [ nl, indent, "<", selfTag, ">" ] 595 596 for i,res in enumerate(self.__toklist): 597 if isinstance(res,ParseResults): 598 if i in namedItems: 599 out += [ res.asXML(namedItems[i], 600 namedItemsOnly and doctag is None, 601 nextLevelIndent, 602 formatted)] 603 else: 604 out += [ res.asXML(None, 605 namedItemsOnly and doctag is None, 606 nextLevelIndent, 607 formatted)] 608 else: 609 # individual token, see if there is a name for it 610 resTag = None 611 if i in namedItems: 612 resTag = namedItems[i] 613 if not resTag: 614 if namedItemsOnly: 615 continue 616 else: 617 resTag = "ITEM" 618 xmlBodyText = _xml_escape(_ustr(res)) 619 out += [ nl, nextLevelIndent, "<", resTag, ">", 620 xmlBodyText, 621 "</", resTag, ">" ] 622 623 out += [ nl, indent, "</", selfTag, ">" ] 624 return "".join(out)
625
626 - def __lookup(self,sub):
627 for k,vlist in self.__tokdict.items(): 628 for v,loc in vlist: 629 if sub is v: 630 return k 631 return None
632
633 - def getName(self):
634 """Returns the results name for this token expression.""" 635 if self.__name: 636 return self.__name 637 elif self.__parent: 638 par = self.__parent() 639 if par: 640 return par.__lookup(self) 641 else: 642 return None 643 elif (len(self) == 1 and 644 len(self.__tokdict) == 1 and 645 self.__tokdict.values()[0][0][1] in (0,-1)): 646 return self.__tokdict.keys()[0] 647 else: 648 return None
649
650 - def dump(self,indent='',depth=0):
651 """Diagnostic method for listing out the contents of a C{ParseResults}. 652 Accepts an optional C{indent} argument so that this string can be embedded 653 in a nested display of other data.""" 654 out = [] 655 NL = '\n' 656 out.append( indent+_ustr(self.asList()) ) 657 if self.haskeys(): 658 items = sorted(self.items()) 659 for k,v in items: 660 if out: 661 out.append(NL) 662 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 663 if isinstance(v,ParseResults): 664 if v: 665 out.append( v.dump(indent,depth+1) ) 666 else: 667 out.append(_ustr(v)) 668 else: 669 out.append(_ustr(v)) 670 elif any(isinstance(vv,ParseResults) for vv in self): 671 v = self 672 for i,vv in enumerate(v): 673 if isinstance(vv,ParseResults): 674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 675 else: 676 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 677 678 return "".join(out)
679
680 - def pprint(self, *args, **kwargs):
681 """Pretty-printer for parsed results as a list, using the C{pprint} module. 682 Accepts additional positional or keyword args as defined for the 683 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 684 pprint.pprint(self.asList(), *args, **kwargs)
685 686 # add support for pickle protocol
687 - def __getstate__(self):
688 return ( self.__toklist, 689 ( self.__tokdict.copy(), 690 self.__parent is not None and self.__parent() or None, 691 self.__accumNames, 692 self.__name ) )
693
694 - def __setstate__(self,state):
695 self.__toklist = state[0] 696 (self.__tokdict, 697 par, 698 inAccumNames, 699 self.__name) = state[1] 700 self.__accumNames = {} 701 self.__accumNames.update(inAccumNames) 702 if par is not None: 703 self.__parent = wkref(par) 704 else: 705 self.__parent = None
706
707 - def __getnewargs__(self):
708 return self.__toklist, self.__name, self.__asList, self.__modal
709
710 - def __dir__(self):
711 return (dir(type(self)) + list(self.keys()))
712 713 collections.MutableMapping.register(ParseResults)
714 715 -def col (loc,strg):
716 """Returns current column within a string, counting newlines as line separators. 717 The first column is number 1. 718 719 Note: the default parsing behavior is to expand tabs in the input string 720 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 721 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 722 consistent view of the parsed string, the parse location, and line and column 723 positions within the parsed string. 724 """ 725 s = strg 726 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
727
728 -def lineno(loc,strg):
729 """Returns current line number within a string, counting newlines as line separators. 730 The first line is number 1. 731 732 Note: the default parsing behavior is to expand tabs in the input string 733 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 734 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 735 consistent view of the parsed string, the parse location, and line and column 736 positions within the parsed string. 737 """ 738 return strg.count("\n",0,loc) + 1
739
740 -def line( loc, strg ):
741 """Returns the line of text containing loc within a string, counting newlines as line separators. 742 """ 743 lastCR = strg.rfind("\n", 0, loc) 744 nextCR = strg.find("\n", loc) 745 if nextCR >= 0: 746 return strg[lastCR+1:nextCR] 747 else: 748 return strg[lastCR+1:]
749
750 -def _defaultStartDebugAction( instring, loc, expr ):
751 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
752
753 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
754 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
755
756 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
757 print ("Exception raised:" + _ustr(exc))
758
759 -def nullDebugAction(*args):
760 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 761 pass
762 763 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 764 #~ 'decorator to trim function calls to match the arity of the target' 765 #~ def _trim_arity(func, maxargs=3): 766 #~ if func in singleArgBuiltins: 767 #~ return lambda s,l,t: func(t) 768 #~ limit = 0 769 #~ foundArity = False 770 #~ def wrapper(*args): 771 #~ nonlocal limit,foundArity 772 #~ while 1: 773 #~ try: 774 #~ ret = func(*args[limit:]) 775 #~ foundArity = True 776 #~ return ret 777 #~ except TypeError: 778 #~ if limit == maxargs or foundArity: 779 #~ raise 780 #~ limit += 1 781 #~ continue 782 #~ return wrapper 783 784 # this version is Python 2.x-3.x cross-compatible 785 'decorator to trim function calls to match the arity of the target'
786 -def _trim_arity(func, maxargs=2):
787 if func in singleArgBuiltins: 788 return lambda s,l,t: func(t) 789 limit = [0] 790 foundArity = [False] 791 792 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 793 if system_version[:2] >= (3,5): 794 def extract_stack(): 795 # special handling for Python 3.5.0 - extra deep call stack by 1 796 offset = -3 if system_version == (3,5,0) else -2 797 frame_summary = traceback.extract_stack()[offset] 798 return [(frame_summary.filename, frame_summary.lineno)]
799 def extract_tb(tb): 800 frames = traceback.extract_tb(tb) 801 frame_summary = frames[-1] 802 return [(frame_summary.filename, frame_summary.lineno)] 803 else: 804 extract_stack = traceback.extract_stack 805 extract_tb = traceback.extract_tb 806 807 # synthesize what would be returned by traceback.extract_stack at the call to 808 # user's parse action 'func', so that we don't incur call penalty at parse time 809 810 LINE_DIFF = 6 811 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 812 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 813 this_line = extract_stack()[-1] 814 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 815 816 def wrapper(*args): 817 while 1: 818 try: 819 ret = func(*args[limit[0]:]) 820 foundArity[0] = True 821 return ret 822 except TypeError: 823 # re-raise TypeErrors if they did not come from our arity testing 824 if foundArity[0]: 825 raise 826 else: 827 try: 828 tb = sys.exc_info()[-1] 829 if not extract_tb(tb)[-1][:2] == pa_call_line_synth: 830 raise 831 finally: 832 del tb 833 834 if limit[0] <= maxargs: 835 limit[0] += 1 836 continue 837 raise 838 839 # copy func name to wrapper for sensible debug output 840 func_name = "<parse action>" 841 try: 842 func_name = getattr(func, '__name__', 843 getattr(func, '__class__').__name__) 844 except Exception: 845 func_name = str(func) 846 wrapper.__name__ = func_name 847 848 return wrapper 849
850 -class ParserElement(object):
851 """Abstract base level parser element class.""" 852 DEFAULT_WHITE_CHARS = " \n\t\r" 853 verbose_stacktrace = False 854 855 @staticmethod
856 - def setDefaultWhitespaceChars( chars ):
857 """Overrides the default whitespace chars 858 """ 859 ParserElement.DEFAULT_WHITE_CHARS = chars
860 861 @staticmethod
862 - def inlineLiteralsUsing(cls):
863 """ 864 Set class to be used for inclusion of string literals into a parser. 865 """ 866 ParserElement.literalStringClass = cls
867
868 - def __init__( self, savelist=False ):
869 self.parseAction = list() 870 self.failAction = None 871 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 872 self.strRepr = None 873 self.resultsName = None 874 self.saveAsList = savelist 875 self.skipWhitespace = True 876 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 877 self.copyDefaultWhiteChars = True 878 self.mayReturnEmpty = False # used when checking for left-recursion 879 self.keepTabs = False 880 self.ignoreExprs = list() 881 self.debug = False 882 self.streamlined = False 883 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 884 self.errmsg = "" 885 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 886 self.debugActions = ( None, None, None ) #custom debug actions 887 self.re = None 888 self.callPreparse = True # used to avoid redundant calls to preParse 889 self.callDuringTry = False
890
891 - def copy( self ):
892 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 893 for the same parsing pattern, using copies of the original parse element.""" 894 cpy = copy.copy( self ) 895 cpy.parseAction = self.parseAction[:] 896 cpy.ignoreExprs = self.ignoreExprs[:] 897 if self.copyDefaultWhiteChars: 898 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 899 return cpy
900
901 - def setName( self, name ):
902 """Define name for this expression, for use in debugging.""" 903 self.name = name 904 self.errmsg = "Expected " + self.name 905 if hasattr(self,"exception"): 906 self.exception.msg = self.errmsg 907 return self
908
909 - def setResultsName( self, name, listAllMatches=False ):
910 """Define name for referencing matching tokens as a nested attribute 911 of the returned parse results. 912 NOTE: this returns a *copy* of the original C{ParserElement} object; 913 this is so that the client can define a basic element, such as an 914 integer, and reference it in multiple places with different names. 915 916 You can also set results names using the abbreviated syntax, 917 C{expr("name")} in place of C{expr.setResultsName("name")} - 918 see L{I{__call__}<__call__>}. 919 """ 920 newself = self.copy() 921 if name.endswith("*"): 922 name = name[:-1] 923 listAllMatches=True 924 newself.resultsName = name 925 newself.modalResults = not listAllMatches 926 return newself
927
928 - def setBreak(self,breakFlag = True):
929 """Method to invoke the Python pdb debugger when this element is 930 about to be parsed. Set C{breakFlag} to True to enable, False to 931 disable. 932 """ 933 if breakFlag: 934 _parseMethod = self._parse 935 def breaker(instring, loc, doActions=True, callPreParse=True): 936 import pdb 937 pdb.set_trace() 938 return _parseMethod( instring, loc, doActions, callPreParse )
939 breaker._originalParseMethod = _parseMethod 940 self._parse = breaker 941 else: 942 if hasattr(self._parse,"_originalParseMethod"): 943 self._parse = self._parse._originalParseMethod 944 return self
945
946 - def setParseAction( self, *fns, **kwargs ):
947 """Define action to perform when successfully matching parse element definition. 948 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 949 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 950 - s = the original string being parsed (see note below) 951 - loc = the location of the matching substring 952 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 953 If the functions in fns modify the tokens, they can return them as the return 954 value from fn, and the modified list of tokens will replace the original. 955 Otherwise, fn does not need to return any value. 956 957 Optional keyword arguments:: 958 - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing 959 960 Note: the default parsing behavior is to expand tabs in the input string 961 before starting the parsing process. See L{I{parseString}<parseString>} for more information 962 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 963 consistent view of the parsed string, the parse location, and line and column 964 positions within the parsed string. 965 """ 966 self.parseAction = list(map(_trim_arity, list(fns))) 967 self.callDuringTry = kwargs.get("callDuringTry", False) 968 return self
969
970 - def addParseAction( self, *fns, **kwargs ):
971 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 972 self.parseAction += list(map(_trim_arity, list(fns))) 973 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 974 return self
975
976 - def addCondition(self, *fns, **kwargs):
977 """Add a boolean predicate function to expression's list of parse actions. See 978 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 979 functions passed to C{addCondition} need to return boolean success/fail of the condition. 980 981 Optional keyword arguments:: 982 - message = define a custom message to be used in the raised exception 983 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 984 """ 985 msg = kwargs.get("message", "failed user-defined condition") 986 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 987 for fn in fns: 988 def pa(s,l,t): 989 if not bool(_trim_arity(fn)(s,l,t)): 990 raise exc_type(s,l,msg)
991 self.parseAction.append(pa) 992 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 993 return self 994
995 - def setFailAction( self, fn ):
996 """Define action to perform if parsing fails at this expression. 997 Fail acton fn is a callable function that takes the arguments 998 C{fn(s,loc,expr,err)} where: 999 - s = string being parsed 1000 - loc = location where expression match was attempted and failed 1001 - expr = the parse expression that failed 1002 - err = the exception thrown 1003 The function returns no value. It may throw C{L{ParseFatalException}} 1004 if it is desired to stop parsing immediately.""" 1005 self.failAction = fn 1006 return self
1007
1008 - def _skipIgnorables( self, instring, loc ):
1009 exprsFound = True 1010 while exprsFound: 1011 exprsFound = False 1012 for e in self.ignoreExprs: 1013 try: 1014 while 1: 1015 loc,dummy = e._parse( instring, loc ) 1016 exprsFound = True 1017 except ParseException: 1018 pass 1019 return loc
1020
1021 - def preParse( self, instring, loc ):
1022 if self.ignoreExprs: 1023 loc = self._skipIgnorables( instring, loc ) 1024 1025 if self.skipWhitespace: 1026 wt = self.whiteChars 1027 instrlen = len(instring) 1028 while loc < instrlen and instring[loc] in wt: 1029 loc += 1 1030 1031 return loc
1032
1033 - def parseImpl( self, instring, loc, doActions=True ):
1034 return loc, []
1035
1036 - def postParse( self, instring, loc, tokenlist ):
1037 return tokenlist
1038 1039 #~ @profile
1040 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1041 debugging = ( self.debug ) #and doActions ) 1042 1043 if debugging or self.failAction: 1044 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1045 if (self.debugActions[0] ): 1046 self.debugActions[0]( instring, loc, self ) 1047 if callPreParse and self.callPreparse: 1048 preloc = self.preParse( instring, loc ) 1049 else: 1050 preloc = loc 1051 tokensStart = preloc 1052 try: 1053 try: 1054 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1055 except IndexError: 1056 raise ParseException( instring, len(instring), self.errmsg, self ) 1057 except ParseBaseException as err: 1058 #~ print ("Exception raised:", err) 1059 if self.debugActions[2]: 1060 self.debugActions[2]( instring, tokensStart, self, err ) 1061 if self.failAction: 1062 self.failAction( instring, tokensStart, self, err ) 1063 raise 1064 else: 1065 if callPreParse and self.callPreparse: 1066 preloc = self.preParse( instring, loc ) 1067 else: 1068 preloc = loc 1069 tokensStart = preloc 1070 if self.mayIndexError or loc >= len(instring): 1071 try: 1072 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1073 except IndexError: 1074 raise ParseException( instring, len(instring), self.errmsg, self ) 1075 else: 1076 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1077 1078 tokens = self.postParse( instring, loc, tokens ) 1079 1080 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1081 if self.parseAction and (doActions or self.callDuringTry): 1082 if debugging: 1083 try: 1084 for fn in self.parseAction: 1085 tokens = fn( instring, tokensStart, retTokens ) 1086 if tokens is not None: 1087 retTokens = ParseResults( tokens, 1088 self.resultsName, 1089 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1090 modal=self.modalResults ) 1091 except ParseBaseException as err: 1092 #~ print "Exception raised in user parse action:", err 1093 if (self.debugActions[2] ): 1094 self.debugActions[2]( instring, tokensStart, self, err ) 1095 raise 1096 else: 1097 for fn in self.parseAction: 1098 tokens = fn( instring, tokensStart, retTokens ) 1099 if tokens is not None: 1100 retTokens = ParseResults( tokens, 1101 self.resultsName, 1102 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1103 modal=self.modalResults ) 1104 1105 if debugging: 1106 #~ print ("Matched",self,"->",retTokens.asList()) 1107 if (self.debugActions[1] ): 1108 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1109 1110 return loc, retTokens
1111
1112 - def tryParse( self, instring, loc ):
1113 try: 1114 return self._parse( instring, loc, doActions=False )[0] 1115 except ParseFatalException: 1116 raise ParseException( instring, loc, self.errmsg, self)
1117
1118 - def canParseNext(self, instring, loc):
1119 try: 1120 self.tryParse(instring, loc) 1121 except (ParseException, IndexError): 1122 return False 1123 else: 1124 return True
1125 1126 # this method gets repeatedly called during backtracking with the same arguments - 1127 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1128 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1129 lookup = (self,instring,loc,callPreParse,doActions) 1130 if lookup in ParserElement._exprArgCache: 1131 value = ParserElement._exprArgCache[ lookup ] 1132 if isinstance(value, Exception): 1133 raise value 1134 return (value[0],value[1].copy()) 1135 else: 1136 try: 1137 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1138 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1139 return value 1140 except ParseBaseException as pe: 1141 pe.__traceback__ = None 1142 ParserElement._exprArgCache[ lookup ] = pe 1143 raise
1144 1145 _parse = _parseNoCache 1146 1147 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1148 _exprArgCache = {} 1149 @staticmethod
1150 - def resetCache():
1151 ParserElement._exprArgCache.clear()
1152 1153 _packratEnabled = False 1154 @staticmethod
1155 - def enablePackrat():
1156 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1157 Repeated parse attempts at the same string location (which happens 1158 often in many complex grammars) can immediately return a cached value, 1159 instead of re-executing parsing/validating code. Memoizing is done of 1160 both valid results and parsing exceptions. 1161 1162 This speedup may break existing programs that use parse actions that 1163 have side-effects. For this reason, packrat parsing is disabled when 1164 you first import pyparsing. To activate the packrat feature, your 1165 program must call the class method C{ParserElement.enablePackrat()}. If 1166 your program uses C{psyco} to "compile as you go", you must call 1167 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1168 Python will crash. For best results, call C{enablePackrat()} immediately 1169 after importing pyparsing. 1170 """ 1171 if not ParserElement._packratEnabled: 1172 ParserElement._packratEnabled = True 1173 ParserElement._parse = ParserElement._parseCache
1174
1175 - def parseString( self, instring, parseAll=False ):
1176 """Execute the parse expression with the given string. 1177 This is the main interface to the client code, once the complete 1178 expression has been built. 1179 1180 If you want the grammar to require that the entire input string be 1181 successfully parsed, then set C{parseAll} to True (equivalent to ending 1182 the grammar with C{L{StringEnd()}}). 1183 1184 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1185 in order to report proper column numbers in parse actions. 1186 If the input string contains tabs and 1187 the grammar uses parse actions that use the C{loc} argument to index into the 1188 string being parsed, you can ensure you have a consistent view of the input 1189 string by: 1190 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1191 (see L{I{parseWithTabs}<parseWithTabs>}) 1192 - define your parse action using the full C{(s,loc,toks)} signature, and 1193 reference the input string using the parse action's C{s} argument 1194 - explictly expand the tabs in your input string before calling 1195 C{parseString} 1196 """ 1197 ParserElement.resetCache() 1198 if not self.streamlined: 1199 self.streamline() 1200 #~ self.saveAsList = True 1201 for e in self.ignoreExprs: 1202 e.streamline() 1203 if not self.keepTabs: 1204 instring = instring.expandtabs() 1205 try: 1206 loc, tokens = self._parse( instring, 0 ) 1207 if parseAll: 1208 loc = self.preParse( instring, loc ) 1209 se = Empty() + StringEnd() 1210 se._parse( instring, loc ) 1211 except ParseBaseException as exc: 1212 if ParserElement.verbose_stacktrace: 1213 raise 1214 else: 1215 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1216 raise exc 1217 else: 1218 return tokens
1219
1220 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1221 """Scan the input string for expression matches. Each match will return the 1222 matching tokens, start location, and end location. May be called with optional 1223 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1224 C{overlap} is specified, then overlapping matches will be reported. 1225 1226 Note that the start and end locations are reported relative to the string 1227 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1228 strings with embedded tabs.""" 1229 if not self.streamlined: 1230 self.streamline() 1231 for e in self.ignoreExprs: 1232 e.streamline() 1233 1234 if not self.keepTabs: 1235 instring = _ustr(instring).expandtabs() 1236 instrlen = len(instring) 1237 loc = 0 1238 preparseFn = self.preParse 1239 parseFn = self._parse 1240 ParserElement.resetCache() 1241 matches = 0 1242 try: 1243 while loc <= instrlen and matches < maxMatches: 1244 try: 1245 preloc = preparseFn( instring, loc ) 1246 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1247 except ParseException: 1248 loc = preloc+1 1249 else: 1250 if nextLoc > loc: 1251 matches += 1 1252 yield tokens, preloc, nextLoc 1253 if overlap: 1254 nextloc = preparseFn( instring, loc ) 1255 if nextloc > loc: 1256 loc = nextLoc 1257 else: 1258 loc += 1 1259 else: 1260 loc = nextLoc 1261 else: 1262 loc = preloc+1 1263 except ParseBaseException as exc: 1264 if ParserElement.verbose_stacktrace: 1265 raise 1266 else: 1267 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1268 raise exc
1269
1270 - def transformString( self, instring ):
1271 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1272 be returned from a parse action. To use C{transformString}, define a grammar and 1273 attach a parse action to it that modifies the returned token list. 1274 Invoking C{transformString()} on a target string will then scan for matches, 1275 and replace the matched text patterns according to the logic in the parse 1276 action. C{transformString()} returns the resulting transformed string.""" 1277 out = [] 1278 lastE = 0 1279 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1280 # keep string locs straight between transformString and scanString 1281 self.keepTabs = True 1282 try: 1283 for t,s,e in self.scanString( instring ): 1284 out.append( instring[lastE:s] ) 1285 if t: 1286 if isinstance(t,ParseResults): 1287 out += t.asList() 1288 elif isinstance(t,list): 1289 out += t 1290 else: 1291 out.append(t) 1292 lastE = e 1293 out.append(instring[lastE:]) 1294 out = [o for o in out if o] 1295 return "".join(map(_ustr,_flatten(out))) 1296 except ParseBaseException as exc: 1297 if ParserElement.verbose_stacktrace: 1298 raise 1299 else: 1300 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1301 raise exc
1302
1303 - def searchString( self, instring, maxMatches=_MAX_INT ):
1304 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1305 to match the given parse expression. May be called with optional 1306 C{maxMatches} argument, to clip searching after 'n' matches are found. 1307 """ 1308 try: 1309 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1310 except ParseBaseException as exc: 1311 if ParserElement.verbose_stacktrace: 1312 raise 1313 else: 1314 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1315 raise exc
1316
1317 - def __add__(self, other ):
1318 """Implementation of + operator - returns C{L{And}}""" 1319 if isinstance( other, basestring ): 1320 other = ParserElement.literalStringClass( other ) 1321 if not isinstance( other, ParserElement ): 1322 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1323 SyntaxWarning, stacklevel=2) 1324 return None 1325 return And( [ self, other ] )
1326
1327 - def __radd__(self, other ):
1328 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1329 if isinstance( other, basestring ): 1330 other = ParserElement.literalStringClass( other ) 1331 if not isinstance( other, ParserElement ): 1332 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1333 SyntaxWarning, stacklevel=2) 1334 return None 1335 return other + self
1336
1337 - def __sub__(self, other):
1338 """Implementation of - operator, returns C{L{And}} with error stop""" 1339 if isinstance( other, basestring ): 1340 other = ParserElement.literalStringClass( other ) 1341 if not isinstance( other, ParserElement ): 1342 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1343 SyntaxWarning, stacklevel=2) 1344 return None 1345 return And( [ self, And._ErrorStop(), other ] )
1346
1347 - def __rsub__(self, other ):
1348 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1349 if isinstance( other, basestring ): 1350 other = ParserElement.literalStringClass( other ) 1351 if not isinstance( other, ParserElement ): 1352 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1353 SyntaxWarning, stacklevel=2) 1354 return None 1355 return other - self
1356
1357 - def __mul__(self,other):
1358 """Implementation of * operator, allows use of C{expr * 3} in place of 1359 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1360 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1361 may also include C{None} as in: 1362 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1363 to C{expr*n + L{ZeroOrMore}(expr)} 1364 (read as "at least n instances of C{expr}") 1365 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1366 (read as "0 to n instances of C{expr}") 1367 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1368 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1369 1370 Note that C{expr*(None,n)} does not raise an exception if 1371 more than n exprs exist in the input stream; that is, 1372 C{expr*(None,n)} does not enforce a maximum number of expr 1373 occurrences. If this behavior is desired, then write 1374 C{expr*(None,n) + ~expr} 1375 1376 """ 1377 if isinstance(other,int): 1378 minElements, optElements = other,0 1379 elif isinstance(other,tuple): 1380 other = (other + (None, None))[:2] 1381 if other[0] is None: 1382 other = (0, other[1]) 1383 if isinstance(other[0],int) and other[1] is None: 1384 if other[0] == 0: 1385 return ZeroOrMore(self) 1386 if other[0] == 1: 1387 return OneOrMore(self) 1388 else: 1389 return self*other[0] + ZeroOrMore(self) 1390 elif isinstance(other[0],int) and isinstance(other[1],int): 1391 minElements, optElements = other 1392 optElements -= minElements 1393 else: 1394 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1395 else: 1396 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1397 1398 if minElements < 0: 1399 raise ValueError("cannot multiply ParserElement by negative value") 1400 if optElements < 0: 1401 raise ValueError("second tuple value must be greater or equal to first tuple value") 1402 if minElements == optElements == 0: 1403 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1404 1405 if (optElements): 1406 def makeOptionalList(n): 1407 if n>1: 1408 return Optional(self + makeOptionalList(n-1)) 1409 else: 1410 return Optional(self)
1411 if minElements: 1412 if minElements == 1: 1413 ret = self + makeOptionalList(optElements) 1414 else: 1415 ret = And([self]*minElements) + makeOptionalList(optElements) 1416 else: 1417 ret = makeOptionalList(optElements) 1418 else: 1419 if minElements == 1: 1420 ret = self 1421 else: 1422 ret = And([self]*minElements) 1423 return ret 1424
1425 - def __rmul__(self, other):
1426 return self.__mul__(other)
1427
1428 - def __or__(self, other ):
1429 """Implementation of | operator - returns C{L{MatchFirst}}""" 1430 if isinstance( other, basestring ): 1431 other = ParserElement.literalStringClass( other ) 1432 if not isinstance( other, ParserElement ): 1433 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1434 SyntaxWarning, stacklevel=2) 1435 return None 1436 return MatchFirst( [ self, other ] )
1437
1438 - def __ror__(self, other ):
1439 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1440 if isinstance( other, basestring ): 1441 other = ParserElement.literalStringClass( other ) 1442 if not isinstance( other, ParserElement ): 1443 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1444 SyntaxWarning, stacklevel=2) 1445 return None 1446 return other | self
1447
1448 - def __xor__(self, other ):
1449 """Implementation of ^ operator - returns C{L{Or}}""" 1450 if isinstance( other, basestring ): 1451 other = ParserElement.literalStringClass( other ) 1452 if not isinstance( other, ParserElement ): 1453 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1454 SyntaxWarning, stacklevel=2) 1455 return None 1456 return Or( [ self, other ] )
1457
1458 - def __rxor__(self, other ):
1459 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1460 if isinstance( other, basestring ): 1461 other = ParserElement.literalStringClass( other ) 1462 if not isinstance( other, ParserElement ): 1463 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1464 SyntaxWarning, stacklevel=2) 1465 return None 1466 return other ^ self
1467
1468 - def __and__(self, other ):
1469 """Implementation of & operator - returns C{L{Each}}""" 1470 if isinstance( other, basestring ): 1471 other = ParserElement.literalStringClass( other ) 1472 if not isinstance( other, ParserElement ): 1473 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1474 SyntaxWarning, stacklevel=2) 1475 return None 1476 return Each( [ self, other ] )
1477
1478 - def __rand__(self, other ):
1479 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1480 if isinstance( other, basestring ): 1481 other = ParserElement.literalStringClass( other ) 1482 if not isinstance( other, ParserElement ): 1483 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1484 SyntaxWarning, stacklevel=2) 1485 return None 1486 return other & self
1487
1488 - def __invert__( self ):
1489 """Implementation of ~ operator - returns C{L{NotAny}}""" 1490 return NotAny( self )
1491
1492 - def __call__(self, name=None):
1493 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1494 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1495 could be written as:: 1496 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1497 1498 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1499 passed as C{True}. 1500 1501 If C{name} is omitted, same as calling C{L{copy}}. 1502 """ 1503 if name is not None: 1504 return self.setResultsName(name) 1505 else: 1506 return self.copy()
1507
1508 - def suppress( self ):
1509 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1510 cluttering up returned output. 1511 """ 1512 return Suppress( self )
1513
1514 - def leaveWhitespace( self ):
1515 """Disables the skipping of whitespace before matching the characters in the 1516 C{ParserElement}'s defined pattern. This is normally only used internally by 1517 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1518 """ 1519 self.skipWhitespace = False 1520 return self
1521
1522 - def setWhitespaceChars( self, chars ):
1523 """Overrides the default whitespace chars 1524 """ 1525 self.skipWhitespace = True 1526 self.whiteChars = chars 1527 self.copyDefaultWhiteChars = False 1528 return self
1529
1530 - def parseWithTabs( self ):
1531 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1532 Must be called before C{parseString} when the input grammar contains elements that 1533 match C{<TAB>} characters.""" 1534 self.keepTabs = True 1535 return self
1536
1537 - def ignore( self, other ):
1538 """Define expression to be ignored (e.g., comments) while doing pattern 1539 matching; may be called repeatedly, to define multiple comment or other 1540 ignorable patterns. 1541 """ 1542 if isinstance(other, basestring): 1543 other = Suppress(other) 1544 1545 if isinstance( other, Suppress ): 1546 if other not in self.ignoreExprs: 1547 self.ignoreExprs.append(other) 1548 else: 1549 self.ignoreExprs.append( Suppress( other.copy() ) ) 1550 return self
1551
1552 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1553 """Enable display of debugging messages while doing pattern matching.""" 1554 self.debugActions = (startAction or _defaultStartDebugAction, 1555 successAction or _defaultSuccessDebugAction, 1556 exceptionAction or _defaultExceptionDebugAction) 1557 self.debug = True 1558 return self
1559
1560 - def setDebug( self, flag=True ):
1561 """Enable display of debugging messages while doing pattern matching. 1562 Set C{flag} to True to enable, False to disable.""" 1563 if flag: 1564 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1565 else: 1566 self.debug = False 1567 return self
1568
1569 - def __str__( self ):
1570 return self.name
1571
1572 - def __repr__( self ):
1573 return _ustr(self)
1574
1575 - def streamline( self ):
1576 self.streamlined = True 1577 self.strRepr = None 1578 return self
1579
1580 - def checkRecursion( self, parseElementList ):
1581 pass
1582
1583 - def validate( self, validateTrace=[] ):
1584 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1585 self.checkRecursion( [] )
1586
1587 - def parseFile( self, file_or_filename, parseAll=False ):
1588 """Execute the parse expression on the given file or filename. 1589 If a filename is specified (instead of a file object), 1590 the entire file is opened, read, and closed before parsing. 1591 """ 1592 try: 1593 file_contents = file_or_filename.read() 1594 except AttributeError: 1595 f = open(file_or_filename, "r") 1596 file_contents = f.read() 1597 f.close() 1598 try: 1599 return self.parseString(file_contents, parseAll) 1600 except ParseBaseException as exc: 1601 if ParserElement.verbose_stacktrace: 1602 raise 1603 else: 1604 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1605 raise exc
1606
1607 - def __eq__(self,other):
1608 if isinstance(other, ParserElement): 1609 return self is other or vars(self) == vars(other) 1610 elif isinstance(other, basestring): 1611 return self.matches(other) 1612 else: 1613 return super(ParserElement,self)==other
1614
1615 - def __ne__(self,other):
1616 return not (self == other)
1617
1618 - def __hash__(self):
1619 return hash(id(self))
1620
1621 - def __req__(self,other):
1622 return self == other
1623
1624 - def __rne__(self,other):
1625 return not (self == other)
1626
1627 - def matches(self, s, parseAll=True):
1628 """Method for quick testing of a parser against a test string. Good for simple 1629 inline microtests of sub expressions while building up larger parser, as in: 1630 1631 expr = Word(nums) 1632 assert expr.matches("100") 1633 1634 Parameters: 1635 - testString - string 1636 """ 1637 try: 1638 self.parseString(_ustr(s), parseAll=parseAll) 1639 return True 1640 except ParseBaseException: 1641 return False
1642
1643 - def runTests(self, tests, parseAll=False, comment='#', printResults=True):
1644 """Execute the parse expression on a series of test strings, showing each 1645 test, the parsed results or where the parse failed. Quick and easy way to 1646 run a parse expression against a list of sample strings. 1647 1648 Parameters: 1649 - tests - a list of separate test strings, or a multiline string of test strings 1650 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests 1651 - comment - (default='#') - expression for indicating embedded comments in the test 1652 string; pass None to disable comment filtering 1653 - printResults - (default=True) prints test output to stdout; if False, returns a 1654 (success, results) tuple, where success indicates that all tests succeeded, and the 1655 results contain a list of lines of each test's output as it would have been 1656 printed to stdout 1657 """ 1658 if isinstance(tests, basestring): 1659 tests = list(map(str.strip, tests.splitlines())) 1660 if isinstance(comment, basestring): 1661 comment = Literal(comment) 1662 allResults = [] 1663 comments = [] 1664 success = True 1665 for t in tests: 1666 if comment is not None and comment.matches(t, False) or comments and not t: 1667 comments.append(t) 1668 continue 1669 if not t: 1670 continue 1671 out = ['\n'.join(comments), t] 1672 comments = [] 1673 try: 1674 out.append(self.parseString(t, parseAll=parseAll).dump()) 1675 except ParseBaseException as pe: 1676 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 1677 if '\n' in t: 1678 out.append(line(pe.loc, t)) 1679 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 1680 else: 1681 out.append(' '*pe.loc + '^' + fatal) 1682 out.append("FAIL: " + str(pe)) 1683 success = False 1684 1685 if printResults: 1686 out.append('') 1687 print('\n'.join(out)) 1688 else: 1689 allResults.append(out) 1690 1691 if not printResults: 1692 return success, allResults
1693
1694 1695 -class Token(ParserElement):
1696 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1697 - def __init__( self ):
1698 super(Token,self).__init__( savelist=False )
1699
1700 1701 -class Empty(Token):
1702 """An empty token, will always match."""
1703 - def __init__( self ):
1704 super(Empty,self).__init__() 1705 self.name = "Empty" 1706 self.mayReturnEmpty = True 1707 self.mayIndexError = False
1708
1709 1710 -class NoMatch(Token):
1711 """A token that will never match."""
1712 - def __init__( self ):
1713 super(NoMatch,self).__init__() 1714 self.name = "NoMatch" 1715 self.mayReturnEmpty = True 1716 self.mayIndexError = False 1717 self.errmsg = "Unmatchable token"
1718
1719 - def parseImpl( self, instring, loc, doActions=True ):
1720 raise ParseException(instring, loc, self.errmsg, self)
1721
1722 1723 -class Literal(Token):
1724 """Token to exactly match a specified string."""
1725 - def __init__( self, matchString ):
1726 super(Literal,self).__init__() 1727 self.match = matchString 1728 self.matchLen = len(matchString) 1729 try: 1730 self.firstMatchChar = matchString[0] 1731 except IndexError: 1732 warnings.warn("null string passed to Literal; use Empty() instead", 1733 SyntaxWarning, stacklevel=2) 1734 self.__class__ = Empty 1735 self.name = '"%s"' % _ustr(self.match) 1736 self.errmsg = "Expected " + self.name 1737 self.mayReturnEmpty = False 1738 self.mayIndexError = False
1739 1740 # Performance tuning: this routine gets called a *lot* 1741 # if this is a single character match string and the first character matches, 1742 # short-circuit as quickly as possible, and avoid calling startswith 1743 #~ @profile
1744 - def parseImpl( self, instring, loc, doActions=True ):
1745 if (instring[loc] == self.firstMatchChar and 1746 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1747 return loc+self.matchLen, self.match 1748 raise ParseException(instring, loc, self.errmsg, self)
1749 _L = Literal 1750 ParserElement.literalStringClass = Literal
1751 1752 -class Keyword(Token):
1753 """Token to exactly match a specified string as a keyword, that is, it must be 1754 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1755 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1756 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1757 Accepts two optional constructor arguments in addition to the keyword string: 1758 C{identChars} is a string of characters that would be valid identifier characters, 1759 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1760 matching, default is C{False}. 1761 """ 1762 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1763
1764 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1765 super(Keyword,self).__init__() 1766 self.match = matchString 1767 self.matchLen = len(matchString) 1768 try: 1769 self.firstMatchChar = matchString[0] 1770 except IndexError: 1771 warnings.warn("null string passed to Keyword; use Empty() instead", 1772 SyntaxWarning, stacklevel=2) 1773 self.name = '"%s"' % self.match 1774 self.errmsg = "Expected " + self.name 1775 self.mayReturnEmpty = False 1776 self.mayIndexError = False 1777 self.caseless = caseless 1778 if caseless: 1779 self.caselessmatch = matchString.upper() 1780 identChars = identChars.upper() 1781 self.identChars = set(identChars)
1782
1783 - def parseImpl( self, instring, loc, doActions=True ):
1784 if self.caseless: 1785 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1786 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1787 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1788 return loc+self.matchLen, self.match 1789 else: 1790 if (instring[loc] == self.firstMatchChar and 1791 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1792 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1793 (loc == 0 or instring[loc-1] not in self.identChars) ): 1794 return loc+self.matchLen, self.match 1795 raise ParseException(instring, loc, self.errmsg, self)
1796
1797 - def copy(self):
1798 c = super(Keyword,self).copy() 1799 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1800 return c
1801 1802 @staticmethod
1803 - def setDefaultKeywordChars( chars ):
1804 """Overrides the default Keyword chars 1805 """ 1806 Keyword.DEFAULT_KEYWORD_CHARS = chars
1807
1808 -class CaselessLiteral(Literal):
1809 """Token to match a specified string, ignoring case of letters. 1810 Note: the matched results will always be in the case of the given 1811 match string, NOT the case of the input text. 1812 """
1813 - def __init__( self, matchString ):
1814 super(CaselessLiteral,self).__init__( matchString.upper() ) 1815 # Preserve the defining literal. 1816 self.returnString = matchString 1817 self.name = "'%s'" % self.returnString 1818 self.errmsg = "Expected " + self.name
1819
1820 - def parseImpl( self, instring, loc, doActions=True ):
1821 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1822 return loc+self.matchLen, self.returnString 1823 raise ParseException(instring, loc, self.errmsg, self)
1824
1825 -class CaselessKeyword(Keyword):
1826 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1827 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1828
1829 - def parseImpl( self, instring, loc, doActions=True ):
1830 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1831 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1832 return loc+self.matchLen, self.match 1833 raise ParseException(instring, loc, self.errmsg, self)
1834
1835 -class Word(Token):
1836 """Token for matching words composed of allowed character sets. 1837 Defined with string containing all allowed initial characters, 1838 an optional string containing allowed body characters (if omitted, 1839 defaults to the initial character set), and an optional minimum, 1840 maximum, and/or exact length. The default value for C{min} is 1 (a 1841 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1842 are 0, meaning no maximum or exact length restriction. An optional 1843 C{excludeChars} parameter can list characters that might be found in 1844 the input C{bodyChars} string; useful to define a word of all printables 1845 except for one or two characters, for instance. 1846 """
1847 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1848 super(Word,self).__init__() 1849 if excludeChars: 1850 initChars = ''.join(c for c in initChars if c not in excludeChars) 1851 if bodyChars: 1852 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1853 self.initCharsOrig = initChars 1854 self.initChars = set(initChars) 1855 if bodyChars : 1856 self.bodyCharsOrig = bodyChars 1857 self.bodyChars = set(bodyChars) 1858 else: 1859 self.bodyCharsOrig = initChars 1860 self.bodyChars = set(initChars) 1861 1862 self.maxSpecified = max > 0 1863 1864 if min < 1: 1865 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1866 1867 self.minLen = min 1868 1869 if max > 0: 1870 self.maxLen = max 1871 else: 1872 self.maxLen = _MAX_INT 1873 1874 if exact > 0: 1875 self.maxLen = exact 1876 self.minLen = exact 1877 1878 self.name = _ustr(self) 1879 self.errmsg = "Expected " + self.name 1880 self.mayIndexError = False 1881 self.asKeyword = asKeyword 1882 1883 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1884 if self.bodyCharsOrig == self.initCharsOrig: 1885 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1886 elif len(self.initCharsOrig) == 1: 1887 self.reString = "%s[%s]*" % \ 1888 (re.escape(self.initCharsOrig), 1889 _escapeRegexRangeChars(self.bodyCharsOrig),) 1890 else: 1891 self.reString = "[%s][%s]*" % \ 1892 (_escapeRegexRangeChars(self.initCharsOrig), 1893 _escapeRegexRangeChars(self.bodyCharsOrig),) 1894 if self.asKeyword: 1895 self.reString = r"\b"+self.reString+r"\b" 1896 try: 1897 self.re = re.compile( self.reString ) 1898 except: 1899 self.re = None
1900
1901 - def parseImpl( self, instring, loc, doActions=True ):
1902 if self.re: 1903 result = self.re.match(instring,loc) 1904 if not result: 1905 raise ParseException(instring, loc, self.errmsg, self) 1906 1907 loc = result.end() 1908 return loc, result.group() 1909 1910 if not(instring[ loc ] in self.initChars): 1911 raise ParseException(instring, loc, self.errmsg, self) 1912 1913 start = loc 1914 loc += 1 1915 instrlen = len(instring) 1916 bodychars = self.bodyChars 1917 maxloc = start + self.maxLen 1918 maxloc = min( maxloc, instrlen ) 1919 while loc < maxloc and instring[loc] in bodychars: 1920 loc += 1 1921 1922 throwException = False 1923 if loc - start < self.minLen: 1924 throwException = True 1925 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1926 throwException = True 1927 if self.asKeyword: 1928 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1929 throwException = True 1930 1931 if throwException: 1932 raise ParseException(instring, loc, self.errmsg, self) 1933 1934 return loc, instring[start:loc]
1935
1936 - def __str__( self ):
1937 try: 1938 return super(Word,self).__str__() 1939 except: 1940 pass 1941 1942 1943 if self.strRepr is None: 1944 1945 def charsAsStr(s): 1946 if len(s)>4: 1947 return s[:4]+"..." 1948 else: 1949 return s
1950 1951 if ( self.initCharsOrig != self.bodyCharsOrig ): 1952 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1953 else: 1954 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1955 1956 return self.strRepr
1957
1958 1959 -class Regex(Token):
1960 """Token for matching strings that match a given regular expression. 1961 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1962 """ 1963 compiledREtype = type(re.compile("[A-Z]"))
1964 - def __init__( self, pattern, flags=0):
1965 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1966 super(Regex,self).__init__() 1967 1968 if isinstance(pattern, basestring): 1969 if not pattern: 1970 warnings.warn("null string passed to Regex; use Empty() instead", 1971 SyntaxWarning, stacklevel=2) 1972 1973 self.pattern = pattern 1974 self.flags = flags 1975 1976 try: 1977 self.re = re.compile(self.pattern, self.flags) 1978 self.reString = self.pattern 1979 except sre_constants.error: 1980 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1981 SyntaxWarning, stacklevel=2) 1982 raise 1983 1984 elif isinstance(pattern, Regex.compiledREtype): 1985 self.re = pattern 1986 self.pattern = \ 1987 self.reString = str(pattern) 1988 self.flags = flags 1989 1990 else: 1991 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1992 1993 self.name = _ustr(self) 1994 self.errmsg = "Expected " + self.name 1995 self.mayIndexError = False 1996 self.mayReturnEmpty = True
1997
1998 - def parseImpl( self, instring, loc, doActions=True ):
1999 result = self.re.match(instring,loc) 2000 if not result: 2001 raise ParseException(instring, loc, self.errmsg, self) 2002 2003 loc = result.end() 2004 d = result.groupdict() 2005 ret = ParseResults(result.group()) 2006 if d: 2007 for k in d: 2008 ret[k] = d[k] 2009 return loc,ret
2010
2011 - def __str__( self ):
2012 try: 2013 return super(Regex,self).__str__() 2014 except: 2015 pass 2016 2017 if self.strRepr is None: 2018 self.strRepr = "Re:(%s)" % repr(self.pattern) 2019 2020 return self.strRepr
2021
2022 2023 -class QuotedString(Token):
2024 """Token for matching strings that are delimited by quoting characters. 2025 """
2026 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2027 r"""Defined with the following parameters: 2028 - quoteChar - string of one or more characters defining the quote delimiting string 2029 - escChar - character to escape quotes, typically backslash (default=None) 2030 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 2031 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2032 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2033 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2034 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2035 """ 2036 super(QuotedString,self).__init__() 2037 2038 # remove white space from quote chars - wont work anyway 2039 quoteChar = quoteChar.strip() 2040 if not quoteChar: 2041 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2042 raise SyntaxError() 2043 2044 if endQuoteChar is None: 2045 endQuoteChar = quoteChar 2046 else: 2047 endQuoteChar = endQuoteChar.strip() 2048 if not endQuoteChar: 2049 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2050 raise SyntaxError() 2051 2052 self.quoteChar = quoteChar 2053 self.quoteCharLen = len(quoteChar) 2054 self.firstQuoteChar = quoteChar[0] 2055 self.endQuoteChar = endQuoteChar 2056 self.endQuoteCharLen = len(endQuoteChar) 2057 self.escChar = escChar 2058 self.escQuote = escQuote 2059 self.unquoteResults = unquoteResults 2060 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2061 2062 if multiline: 2063 self.flags = re.MULTILINE | re.DOTALL 2064 self.pattern = r'%s(?:[^%s%s]' % \ 2065 ( re.escape(self.quoteChar), 2066 _escapeRegexRangeChars(self.endQuoteChar[0]), 2067 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2068 else: 2069 self.flags = 0 2070 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2071 ( re.escape(self.quoteChar), 2072 _escapeRegexRangeChars(self.endQuoteChar[0]), 2073 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2074 if len(self.endQuoteChar) > 1: 2075 self.pattern += ( 2076 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2077 _escapeRegexRangeChars(self.endQuoteChar[i])) 2078 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2079 ) 2080 if escQuote: 2081 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2082 if escChar: 2083 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2084 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2085 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2086 2087 try: 2088 self.re = re.compile(self.pattern, self.flags) 2089 self.reString = self.pattern 2090 except sre_constants.error: 2091 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2092 SyntaxWarning, stacklevel=2) 2093 raise 2094 2095 self.name = _ustr(self) 2096 self.errmsg = "Expected " + self.name 2097 self.mayIndexError = False 2098 self.mayReturnEmpty = True
2099
2100 - def parseImpl( self, instring, loc, doActions=True ):
2101 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2102 if not result: 2103 raise ParseException(instring, loc, self.errmsg, self) 2104 2105 loc = result.end() 2106 ret = result.group() 2107 2108 if self.unquoteResults: 2109 2110 # strip off quotes 2111 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2112 2113 if isinstance(ret,basestring): 2114 # replace escaped whitespace 2115 if '\\' in ret and self.convertWhitespaceEscapes: 2116 ws_map = { 2117 r'\t' : '\t', 2118 r'\n' : '\n', 2119 r'\f' : '\f', 2120 r'\r' : '\r', 2121 } 2122 for wslit,wschar in ws_map.items(): 2123 ret = ret.replace(wslit, wschar) 2124 2125 # replace escaped characters 2126 if self.escChar: 2127 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2128 2129 # replace escaped quotes 2130 if self.escQuote: 2131 ret = ret.replace(self.escQuote, self.endQuoteChar) 2132 2133 return loc, ret
2134
2135 - def __str__( self ):
2136 try: 2137 return super(QuotedString,self).__str__() 2138 except: 2139 pass 2140 2141 if self.strRepr is None: 2142 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2143 2144 return self.strRepr
2145
2146 2147 -class CharsNotIn(Token):
2148 """Token for matching words composed of characters *not* in a given set. 2149 Defined with string containing all disallowed characters, and an optional 2150 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2151 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2152 are 0, meaning no maximum or exact length restriction. 2153 """
2154 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2155 super(CharsNotIn,self).__init__() 2156 self.skipWhitespace = False 2157 self.notChars = notChars 2158 2159 if min < 1: 2160 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2161 2162 self.minLen = min 2163 2164 if max > 0: 2165 self.maxLen = max 2166 else: 2167 self.maxLen = _MAX_INT 2168 2169 if exact > 0: 2170 self.maxLen = exact 2171 self.minLen = exact 2172 2173 self.name = _ustr(self) 2174 self.errmsg = "Expected " + self.name 2175 self.mayReturnEmpty = ( self.minLen == 0 ) 2176 self.mayIndexError = False
2177
2178 - def parseImpl( self, instring, loc, doActions=True ):
2179 if instring[loc] in self.notChars: 2180 raise ParseException(instring, loc, self.errmsg, self) 2181 2182 start = loc 2183 loc += 1 2184 notchars = self.notChars 2185 maxlen = min( start+self.maxLen, len(instring) ) 2186 while loc < maxlen and \ 2187 (instring[loc] not in notchars): 2188 loc += 1 2189 2190 if loc - start < self.minLen: 2191 raise ParseException(instring, loc, self.errmsg, self) 2192 2193 return loc, instring[start:loc]
2194
2195 - def __str__( self ):
2196 try: 2197 return super(CharsNotIn, self).__str__() 2198 except: 2199 pass 2200 2201 if self.strRepr is None: 2202 if len(self.notChars) > 4: 2203 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2204 else: 2205 self.strRepr = "!W:(%s)" % self.notChars 2206 2207 return self.strRepr
2208
2209 -class White(Token):
2210 """Special matching class for matching whitespace. Normally, whitespace is ignored 2211 by pyparsing grammars. This class is included when some whitespace structures 2212 are significant. Define with a string containing the whitespace characters to be 2213 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2214 as defined for the C{L{Word}} class.""" 2215 whiteStrs = { 2216 " " : "<SPC>", 2217 "\t": "<TAB>", 2218 "\n": "<LF>", 2219 "\r": "<CR>", 2220 "\f": "<FF>", 2221 }
2222 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2223 super(White,self).__init__() 2224 self.matchWhite = ws 2225 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2226 #~ self.leaveWhitespace() 2227 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2228 self.mayReturnEmpty = True 2229 self.errmsg = "Expected " + self.name 2230 2231 self.minLen = min 2232 2233 if max > 0: 2234 self.maxLen = max 2235 else: 2236 self.maxLen = _MAX_INT 2237 2238 if exact > 0: 2239 self.maxLen = exact 2240 self.minLen = exact
2241
2242 - def parseImpl( self, instring, loc, doActions=True ):
2243 if not(instring[ loc ] in self.matchWhite): 2244 raise ParseException(instring, loc, self.errmsg, self) 2245 start = loc 2246 loc += 1 2247 maxloc = start + self.maxLen 2248 maxloc = min( maxloc, len(instring) ) 2249 while loc < maxloc and instring[loc] in self.matchWhite: 2250 loc += 1 2251 2252 if loc - start < self.minLen: 2253 raise ParseException(instring, loc, self.errmsg, self) 2254 2255 return loc, instring[start:loc]
2256
2257 2258 -class _PositionToken(Token):
2259 - def __init__( self ):
2260 super(_PositionToken,self).__init__() 2261 self.name=self.__class__.__name__ 2262 self.mayReturnEmpty = True 2263 self.mayIndexError = False
2264
2265 -class GoToColumn(_PositionToken):
2266 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2267 - def __init__( self, colno ):
2268 super(GoToColumn,self).__init__() 2269 self.col = colno
2270
2271 - def preParse( self, instring, loc ):
2272 if col(loc,instring) != self.col: 2273 instrlen = len(instring) 2274 if self.ignoreExprs: 2275 loc = self._skipIgnorables( instring, loc ) 2276 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2277 loc += 1 2278 return loc
2279
2280 - def parseImpl( self, instring, loc, doActions=True ):
2281 thiscol = col( loc, instring ) 2282 if thiscol > self.col: 2283 raise ParseException( instring, loc, "Text not in expected column", self ) 2284 newloc = loc + self.col - thiscol 2285 ret = instring[ loc: newloc ] 2286 return newloc, ret
2287
2288 -class LineStart(_PositionToken):
2289 """Matches if current position is at the beginning of a line within the parse string"""
2290 - def __init__( self ):
2291 super(LineStart,self).__init__() 2292 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2293 self.errmsg = "Expected start of line"
2294
2295 - def preParse( self, instring, loc ):
2296 preloc = super(LineStart,self).preParse(instring,loc) 2297 if instring[preloc] == "\n": 2298 loc += 1 2299 return loc
2300
2301 - def parseImpl( self, instring, loc, doActions=True ):
2302 if not( loc==0 or 2303 (loc == self.preParse( instring, 0 )) or 2304 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2305 raise ParseException(instring, loc, self.errmsg, self) 2306 return loc, []
2307
2308 -class LineEnd(_PositionToken):
2309 """Matches if current position is at the end of a line within the parse string"""
2310 - def __init__( self ):
2311 super(LineEnd,self).__init__() 2312 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2313 self.errmsg = "Expected end of line"
2314
2315 - def parseImpl( self, instring, loc, doActions=True ):
2316 if loc<len(instring): 2317 if instring[loc] == "\n": 2318 return loc+1, "\n" 2319 else: 2320 raise ParseException(instring, loc, self.errmsg, self) 2321 elif loc == len(instring): 2322 return loc+1, [] 2323 else: 2324 raise ParseException(instring, loc, self.errmsg, self)
2325
2326 -class StringStart(_PositionToken):
2327 """Matches if current position is at the beginning of the parse string"""
2328 - def __init__( self ):
2329 super(StringStart,self).__init__() 2330 self.errmsg = "Expected start of text"
2331
2332 - def parseImpl( self, instring, loc, doActions=True ):
2333 if loc != 0: 2334 # see if entire string up to here is just whitespace and ignoreables 2335 if loc != self.preParse( instring, 0 ): 2336 raise ParseException(instring, loc, self.errmsg, self) 2337 return loc, []
2338
2339 -class StringEnd(_PositionToken):
2340 """Matches if current position is at the end of the parse string"""
2341 - def __init__( self ):
2342 super(StringEnd,self).__init__() 2343 self.errmsg = "Expected end of text"
2344
2345 - def parseImpl( self, instring, loc, doActions=True ):
2346 if loc < len(instring): 2347 raise ParseException(instring, loc, self.errmsg, self) 2348 elif loc == len(instring): 2349 return loc+1, [] 2350 elif loc > len(instring): 2351 return loc, [] 2352 else: 2353 raise ParseException(instring, loc, self.errmsg, self)
2354
2355 -class WordStart(_PositionToken):
2356 """Matches if the current position is at the beginning of a Word, and 2357 is not preceded by any character in a given set of C{wordChars} 2358 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2359 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2360 the string being parsed, or at the beginning of a line. 2361 """
2362 - def __init__(self, wordChars = printables):
2363 super(WordStart,self).__init__() 2364 self.wordChars = set(wordChars) 2365 self.errmsg = "Not at the start of a word"
2366
2367 - def parseImpl(self, instring, loc, doActions=True ):
2368 if loc != 0: 2369 if (instring[loc-1] in self.wordChars or 2370 instring[loc] not in self.wordChars): 2371 raise ParseException(instring, loc, self.errmsg, self) 2372 return loc, []
2373
2374 -class WordEnd(_PositionToken):
2375 """Matches if the current position is at the end of a Word, and 2376 is not followed by any character in a given set of C{wordChars} 2377 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2378 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2379 the string being parsed, or at the end of a line. 2380 """
2381 - def __init__(self, wordChars = printables):
2382 super(WordEnd,self).__init__() 2383 self.wordChars = set(wordChars) 2384 self.skipWhitespace = False 2385 self.errmsg = "Not at the end of a word"
2386
2387 - def parseImpl(self, instring, loc, doActions=True ):
2388 instrlen = len(instring) 2389 if instrlen>0 and loc<instrlen: 2390 if (instring[loc] in self.wordChars or 2391 instring[loc-1] not in self.wordChars): 2392 raise ParseException(instring, loc, self.errmsg, self) 2393 return loc, []
2394
2395 2396 -class ParseExpression(ParserElement):
2397 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2398 - def __init__( self, exprs, savelist = False ):
2399 super(ParseExpression,self).__init__(savelist) 2400 if isinstance( exprs, _generatorType ): 2401 exprs = list(exprs) 2402 2403 if isinstance( exprs, basestring ): 2404 self.exprs = [ Literal( exprs ) ] 2405 elif isinstance( exprs, collections.Sequence ): 2406 # if sequence of strings provided, wrap with Literal 2407 if all(isinstance(expr, basestring) for expr in exprs): 2408 exprs = map(Literal, exprs) 2409 self.exprs = list(exprs) 2410 else: 2411 try: 2412 self.exprs = list( exprs ) 2413 except TypeError: 2414 self.exprs = [ exprs ] 2415 self.callPreparse = False
2416
2417 - def __getitem__( self, i ):
2418 return self.exprs[i]
2419
2420 - def append( self, other ):
2421 self.exprs.append( other ) 2422 self.strRepr = None 2423 return self
2424
2425 - def leaveWhitespace( self ):
2426 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2427 all contained expressions.""" 2428 self.skipWhitespace = False 2429 self.exprs = [ e.copy() for e in self.exprs ] 2430 for e in self.exprs: 2431 e.leaveWhitespace() 2432 return self
2433
2434 - def ignore( self, other ):
2435 if isinstance( other, Suppress ): 2436 if other not in self.ignoreExprs: 2437 super( ParseExpression, self).ignore( other ) 2438 for e in self.exprs: 2439 e.ignore( self.ignoreExprs[-1] ) 2440 else: 2441 super( ParseExpression, self).ignore( other ) 2442 for e in self.exprs: 2443 e.ignore( self.ignoreExprs[-1] ) 2444 return self
2445
2446 - def __str__( self ):
2447 try: 2448 return super(ParseExpression,self).__str__() 2449 except: 2450 pass 2451 2452 if self.strRepr is None: 2453 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2454 return self.strRepr
2455
2456 - def streamline( self ):
2457 super(ParseExpression,self).streamline() 2458 2459 for e in self.exprs: 2460 e.streamline() 2461 2462 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2463 # but only if there are no parse actions or resultsNames on the nested And's 2464 # (likewise for Or's and MatchFirst's) 2465 if ( len(self.exprs) == 2 ): 2466 other = self.exprs[0] 2467 if ( isinstance( other, self.__class__ ) and 2468 not(other.parseAction) and 2469 other.resultsName is None and 2470 not other.debug ): 2471 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2472 self.strRepr = None 2473 self.mayReturnEmpty |= other.mayReturnEmpty 2474 self.mayIndexError |= other.mayIndexError 2475 2476 other = self.exprs[-1] 2477 if ( isinstance( other, self.__class__ ) and 2478 not(other.parseAction) and 2479 other.resultsName is None and 2480 not other.debug ): 2481 self.exprs = self.exprs[:-1] + other.exprs[:] 2482 self.strRepr = None 2483 self.mayReturnEmpty |= other.mayReturnEmpty 2484 self.mayIndexError |= other.mayIndexError 2485 2486 self.errmsg = "Expected " + _ustr(self) 2487 2488 return self
2489
2490 - def setResultsName( self, name, listAllMatches=False ):
2491 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2492 return ret
2493
2494 - def validate( self, validateTrace=[] ):
2495 tmp = validateTrace[:]+[self] 2496 for e in self.exprs: 2497 e.validate(tmp) 2498 self.checkRecursion( [] )
2499
2500 - def copy(self):
2501 ret = super(ParseExpression,self).copy() 2502 ret.exprs = [e.copy() for e in self.exprs] 2503 return ret
2504
2505 -class And(ParseExpression):
2506 """Requires all given C{ParseExpression}s to be found in the given order. 2507 Expressions may be separated by whitespace. 2508 May be constructed using the C{'+'} operator. 2509 """ 2510
2511 - class _ErrorStop(Empty):
2512 - def __init__(self, *args, **kwargs):
2513 super(And._ErrorStop,self).__init__(*args, **kwargs) 2514 self.name = '-' 2515 self.leaveWhitespace()
2516
2517 - def __init__( self, exprs, savelist = True ):
2518 super(And,self).__init__(exprs, savelist) 2519 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2520 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2521 self.skipWhitespace = self.exprs[0].skipWhitespace 2522 self.callPreparse = True
2523
2524 - def parseImpl( self, instring, loc, doActions=True ):
2525 # pass False as last arg to _parse for first element, since we already 2526 # pre-parsed the string as part of our And pre-parsing 2527 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2528 errorStop = False 2529 for e in self.exprs[1:]: 2530 if isinstance(e, And._ErrorStop): 2531 errorStop = True 2532 continue 2533 if errorStop: 2534 try: 2535 loc, exprtokens = e._parse( instring, loc, doActions ) 2536 except ParseSyntaxException: 2537 raise 2538 except ParseBaseException as pe: 2539 pe.__traceback__ = None 2540 raise ParseSyntaxException(pe) 2541 except IndexError: 2542 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2543 else: 2544 loc, exprtokens = e._parse( instring, loc, doActions ) 2545 if exprtokens or exprtokens.haskeys(): 2546 resultlist += exprtokens 2547 return loc, resultlist
2548
2549 - def __iadd__(self, other ):
2550 if isinstance( other, basestring ): 2551 other = Literal( other ) 2552 return self.append( other ) #And( [ self, other ] )
2553
2554 - def checkRecursion( self, parseElementList ):
2555 subRecCheckList = parseElementList[:] + [ self ] 2556 for e in self.exprs: 2557 e.checkRecursion( subRecCheckList ) 2558 if not e.mayReturnEmpty: 2559 break
2560
2561 - def __str__( self ):
2562 if hasattr(self,"name"): 2563 return self.name 2564 2565 if self.strRepr is None: 2566 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2567 2568 return self.strRepr
2569
2570 2571 -class Or(ParseExpression):
2572 """Requires that at least one C{ParseExpression} is found. 2573 If two expressions match, the expression that matches the longest string will be used. 2574 May be constructed using the C{'^'} operator. 2575 """
2576 - def __init__( self, exprs, savelist = False ):
2577 super(Or,self).__init__(exprs, savelist) 2578 if self.exprs: 2579 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2580 else: 2581 self.mayReturnEmpty = True
2582
2583 - def parseImpl( self, instring, loc, doActions=True ):
2584 maxExcLoc = -1 2585 maxException = None 2586 matches = [] 2587 for e in self.exprs: 2588 try: 2589 loc2 = e.tryParse( instring, loc ) 2590 except ParseException as err: 2591 err.__traceback__ = None 2592 if err.loc > maxExcLoc: 2593 maxException = err 2594 maxExcLoc = err.loc 2595 except IndexError: 2596 if len(instring) > maxExcLoc: 2597 maxException = ParseException(instring,len(instring),e.errmsg,self) 2598 maxExcLoc = len(instring) 2599 else: 2600 # save match among all matches, to retry longest to shortest 2601 matches.append((loc2, e)) 2602 2603 if matches: 2604 matches.sort(key=lambda x: -x[0]) 2605 for _,e in matches: 2606 try: 2607 return e._parse( instring, loc, doActions ) 2608 except ParseException as err: 2609 err.__traceback__ = None 2610 if err.loc > maxExcLoc: 2611 maxException = err 2612 maxExcLoc = err.loc 2613 2614 if maxException is not None: 2615 maxException.msg = self.errmsg 2616 raise maxException 2617 else: 2618 raise ParseException(instring, loc, "no defined alternatives to match", self)
2619 2620
2621 - def __ixor__(self, other ):
2622 if isinstance( other, basestring ): 2623 other = ParserElement.literalStringClass( other ) 2624 return self.append( other ) #Or( [ self, other ] )
2625
2626 - def __str__( self ):
2627 if hasattr(self,"name"): 2628 return self.name 2629 2630 if self.strRepr is None: 2631 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2632 2633 return self.strRepr
2634
2635 - def checkRecursion( self, parseElementList ):
2636 subRecCheckList = parseElementList[:] + [ self ] 2637 for e in self.exprs: 2638 e.checkRecursion( subRecCheckList )
2639
2640 2641 -class MatchFirst(ParseExpression):
2642 """Requires that at least one C{ParseExpression} is found. 2643 If two expressions match, the first one listed is the one that will match. 2644 May be constructed using the C{'|'} operator. 2645 """
2646 - def __init__( self, exprs, savelist = False ):
2647 super(MatchFirst,self).__init__(exprs, savelist) 2648 if self.exprs: 2649 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2650 else: 2651 self.mayReturnEmpty = True
2652
2653 - def parseImpl( self, instring, loc, doActions=True ):
2654 maxExcLoc = -1 2655 maxException = None 2656 for e in self.exprs: 2657 try: 2658 ret = e._parse( instring, loc, doActions ) 2659 return ret 2660 except ParseException as err: 2661 if err.loc > maxExcLoc: 2662 maxException = err 2663 maxExcLoc = err.loc 2664 except IndexError: 2665 if len(instring) > maxExcLoc: 2666 maxException = ParseException(instring,len(instring),e.errmsg,self) 2667 maxExcLoc = len(instring) 2668 2669 # only got here if no expression matched, raise exception for match that made it the furthest 2670 else: 2671 if maxException is not None: 2672 maxException.msg = self.errmsg 2673 raise maxException 2674 else: 2675 raise ParseException(instring, loc, "no defined alternatives to match", self)
2676
2677 - def __ior__(self, other ):
2678 if isinstance( other, basestring ): 2679 other = ParserElement.literalStringClass( other ) 2680 return self.append( other ) #MatchFirst( [ self, other ] )
2681
2682 - def __str__( self ):
2683 if hasattr(self,"name"): 2684 return self.name 2685 2686 if self.strRepr is None: 2687 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2688 2689 return self.strRepr
2690
2691 - def checkRecursion( self, parseElementList ):
2692 subRecCheckList = parseElementList[:] + [ self ] 2693 for e in self.exprs: 2694 e.checkRecursion( subRecCheckList )
2695
2696 2697 -class Each(ParseExpression):
2698 """Requires all given C{ParseExpression}s to be found, but in any order. 2699 Expressions may be separated by whitespace. 2700 May be constructed using the C{'&'} operator. 2701 """
2702 - def __init__( self, exprs, savelist = True ):
2703 super(Each,self).__init__(exprs, savelist) 2704 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2705 self.skipWhitespace = True 2706 self.initExprGroups = True
2707
2708 - def parseImpl( self, instring, loc, doActions=True ):
2709 if self.initExprGroups: 2710 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2711 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2712 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2713 self.optionals = opt1 + opt2 2714 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2715 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2716 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2717 self.required += self.multirequired 2718 self.initExprGroups = False 2719 tmpLoc = loc 2720 tmpReqd = self.required[:] 2721 tmpOpt = self.optionals[:] 2722 matchOrder = [] 2723 2724 keepMatching = True 2725 while keepMatching: 2726 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2727 failed = [] 2728 for e in tmpExprs: 2729 try: 2730 tmpLoc = e.tryParse( instring, tmpLoc ) 2731 except ParseException: 2732 failed.append(e) 2733 else: 2734 matchOrder.append(self.opt1map.get(id(e),e)) 2735 if e in tmpReqd: 2736 tmpReqd.remove(e) 2737 elif e in tmpOpt: 2738 tmpOpt.remove(e) 2739 if len(failed) == len(tmpExprs): 2740 keepMatching = False 2741 2742 if tmpReqd: 2743 missing = ", ".join(_ustr(e) for e in tmpReqd) 2744 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2745 2746 # add any unmatched Optionals, in case they have default values defined 2747 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2748 2749 resultlist = [] 2750 for e in matchOrder: 2751 loc,results = e._parse(instring,loc,doActions) 2752 resultlist.append(results) 2753 2754 finalResults = ParseResults() 2755 for r in resultlist: 2756 dups = {} 2757 for k in r.keys(): 2758 if k in finalResults: 2759 tmp = ParseResults(finalResults[k]) 2760 tmp += ParseResults(r[k]) 2761 dups[k] = tmp 2762 finalResults += ParseResults(r) 2763 for k,v in dups.items(): 2764 finalResults[k] = v 2765 return loc, finalResults
2766
2767 - def __str__( self ):
2768 if hasattr(self,"name"): 2769 return self.name 2770 2771 if self.strRepr is None: 2772 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2773 2774 return self.strRepr
2775
2776 - def checkRecursion( self, parseElementList ):
2777 subRecCheckList = parseElementList[:] + [ self ] 2778 for e in self.exprs: 2779 e.checkRecursion( subRecCheckList )
2780
2781 2782 -class ParseElementEnhance(ParserElement):
2783 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2784 - def __init__( self, expr, savelist=False ):
2785 super(ParseElementEnhance,self).__init__(savelist) 2786 if isinstance( expr, basestring ): 2787 expr = Literal(expr) 2788 self.expr = expr 2789 self.strRepr = None 2790 if expr is not None: 2791 self.mayIndexError = expr.mayIndexError 2792 self.mayReturnEmpty = expr.mayReturnEmpty 2793 self.setWhitespaceChars( expr.whiteChars ) 2794 self.skipWhitespace = expr.skipWhitespace 2795 self.saveAsList = expr.saveAsList 2796 self.callPreparse = expr.callPreparse 2797 self.ignoreExprs.extend(expr.ignoreExprs)
2798
2799 - def parseImpl( self, instring, loc, doActions=True ):
2800 if self.expr is not None: 2801 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2802 else: 2803 raise ParseException("",loc,self.errmsg,self)
2804
2805 - def leaveWhitespace( self ):
2806 self.skipWhitespace = False 2807 self.expr = self.expr.copy() 2808 if self.expr is not None: 2809 self.expr.leaveWhitespace() 2810 return self
2811
2812 - def ignore( self, other ):
2813 if isinstance( other, Suppress ): 2814 if other not in self.ignoreExprs: 2815 super( ParseElementEnhance, self).ignore( other ) 2816 if self.expr is not None: 2817 self.expr.ignore( self.ignoreExprs[-1] ) 2818 else: 2819 super( ParseElementEnhance, self).ignore( other ) 2820 if self.expr is not None: 2821 self.expr.ignore( self.ignoreExprs[-1] ) 2822 return self
2823
2824 - def streamline( self ):
2825 super(ParseElementEnhance,self).streamline() 2826 if self.expr is not None: 2827 self.expr.streamline() 2828 return self
2829
2830 - def checkRecursion( self, parseElementList ):
2831 if self in parseElementList: 2832 raise RecursiveGrammarException( parseElementList+[self] ) 2833 subRecCheckList = parseElementList[:] + [ self ] 2834 if self.expr is not None: 2835 self.expr.checkRecursion( subRecCheckList )
2836
2837 - def validate( self, validateTrace=[] ):
2838 tmp = validateTrace[:]+[self] 2839 if self.expr is not None: 2840 self.expr.validate(tmp) 2841 self.checkRecursion( [] )
2842
2843 - def __str__( self ):
2844 try: 2845 return super(ParseElementEnhance,self).__str__() 2846 except: 2847 pass 2848 2849 if self.strRepr is None and self.expr is not None: 2850 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2851 return self.strRepr
2852
2853 2854 -class FollowedBy(ParseElementEnhance):
2855 """Lookahead matching of the given parse expression. C{FollowedBy} 2856 does *not* advance the parsing position within the input string, it only 2857 verifies that the specified parse expression matches at the current 2858 position. C{FollowedBy} always returns a null token list."""
2859 - def __init__( self, expr ):
2860 super(FollowedBy,self).__init__(expr) 2861 self.mayReturnEmpty = True
2862
2863 - def parseImpl( self, instring, loc, doActions=True ):
2864 self.expr.tryParse( instring, loc ) 2865 return loc, []
2866
2867 2868 -class NotAny(ParseElementEnhance):
2869 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2870 does *not* advance the parsing position within the input string, it only 2871 verifies that the specified parse expression does *not* match at the current 2872 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2873 always returns a null token list. May be constructed using the '~' operator."""
2874 - def __init__( self, expr ):
2875 super(NotAny,self).__init__(expr) 2876 #~ self.leaveWhitespace() 2877 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2878 self.mayReturnEmpty = True 2879 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2880
2881 - def parseImpl( self, instring, loc, doActions=True ):
2882 if self.expr.canParseNext(instring, loc): 2883 raise ParseException(instring, loc, self.errmsg, self) 2884 return loc, []
2885
2886 - def __str__( self ):
2887 if hasattr(self,"name"): 2888 return self.name 2889 2890 if self.strRepr is None: 2891 self.strRepr = "~{" + _ustr(self.expr) + "}" 2892 2893 return self.strRepr
2894
2895 2896 -class OneOrMore(ParseElementEnhance):
2897 """Repetition of one or more of the given expression. 2898 2899 Parameters: 2900 - expr - expression that must match one or more times 2901 - stopOn - (default=None) - expression for a terminating sentinel 2902 (only required if the sentinel would ordinarily match the repetition 2903 expression) 2904 """
2905 - def __init__( self, expr, stopOn=None):
2906 super(OneOrMore, self).__init__(expr) 2907 ender = stopOn 2908 if isinstance(ender, basestring): 2909 ender = Literal(ender) 2910 self.not_ender = ~ender if ender is not None else None
2911
2912 - def parseImpl( self, instring, loc, doActions=True ):
2913 self_expr_parse = self.expr._parse 2914 self_skip_ignorables = self._skipIgnorables 2915 check_ender = self.not_ender is not None 2916 if check_ender: 2917 try_not_ender = self.not_ender.tryParse 2918 2919 # must be at least one (but first see if we are the stopOn sentinel; 2920 # if so, fail) 2921 if check_ender: 2922 try_not_ender(instring, loc) 2923 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 2924 try: 2925 hasIgnoreExprs = (not not self.ignoreExprs) 2926 while 1: 2927 if check_ender: 2928 try_not_ender(instring, loc) 2929 if hasIgnoreExprs: 2930 preloc = self_skip_ignorables( instring, loc ) 2931 else: 2932 preloc = loc 2933 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 2934 if tmptokens or tmptokens.haskeys(): 2935 tokens += tmptokens 2936 except (ParseException,IndexError): 2937 pass 2938 2939 return loc, tokens
2940
2941 - def __str__( self ):
2942 if hasattr(self,"name"): 2943 return self.name 2944 2945 if self.strRepr is None: 2946 self.strRepr = "{" + _ustr(self.expr) + "}..." 2947 2948 return self.strRepr
2949
2950 - def setResultsName( self, name, listAllMatches=False ):
2951 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2952 ret.saveAsList = True 2953 return ret
2954
2955 -class ZeroOrMore(OneOrMore):
2956 """Optional repetition of zero or more of the given expression. 2957 2958 Parameters: 2959 - expr - expression that must match zero or more times 2960 - stopOn - (default=None) - expression for a terminating sentinel 2961 (only required if the sentinel would ordinarily match the repetition 2962 expression) 2963 """
2964 - def __init__( self, expr, stopOn=None):
2965 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 2966 self.mayReturnEmpty = True
2967
2968 - def parseImpl( self, instring, loc, doActions=True ):
2969 try: 2970 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 2971 except (ParseException,IndexError): 2972 return loc, []
2973
2974 - def __str__( self ):
2975 if hasattr(self,"name"): 2976 return self.name 2977 2978 if self.strRepr is None: 2979 self.strRepr = "[" + _ustr(self.expr) + "]..." 2980 2981 return self.strRepr
2982
2983 -class _NullToken(object):
2984 - def __bool__(self):
2985 return False
2986 __nonzero__ = __bool__
2987 - def __str__(self):
2988 return ""
2989 2990 _optionalNotMatched = _NullToken()
2991 -class Optional(ParseElementEnhance):
2992 """Optional matching of the given expression. 2993 2994 Parameters: 2995 - expr - expression that must match zero or more times 2996 - default (optional) - value to be returned if the optional expression 2997 is not found. 2998 """
2999 - def __init__( self, expr, default=_optionalNotMatched ):
3000 super(Optional,self).__init__( expr, savelist=False ) 3001 self.defaultValue = default 3002 self.mayReturnEmpty = True
3003
3004 - def parseImpl( self, instring, loc, doActions=True ):
3005 try: 3006 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3007 except (ParseException,IndexError): 3008 if self.defaultValue is not _optionalNotMatched: 3009 if self.expr.resultsName: 3010 tokens = ParseResults([ self.defaultValue ]) 3011 tokens[self.expr.resultsName] = self.defaultValue 3012 else: 3013 tokens = [ self.defaultValue ] 3014 else: 3015 tokens = [] 3016 return loc, tokens
3017
3018 - def __str__( self ):
3019 if hasattr(self,"name"): 3020 return self.name 3021 3022 if self.strRepr is None: 3023 self.strRepr = "[" + _ustr(self.expr) + "]" 3024 3025 return self.strRepr
3026
3027 -class SkipTo(ParseElementEnhance):
3028 """Token for skipping over all undefined text until the matched expression is found. 3029 3030 Parameters: 3031 - expr - target expression marking the end of the data to be skipped 3032 - include - (default=False) if True, the target expression is also parsed 3033 (the skipped text and target expression are returned as a 2-element list). 3034 - ignore - (default=None) used to define grammars (typically quoted strings and 3035 comments) that might contain false matches to the target expression 3036 - failOn - (default=None) define expressions that are not allowed to be 3037 included in the skipped test; if found before the target expression is found, 3038 the SkipTo is not a match 3039 """
3040 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3041 super( SkipTo, self ).__init__( other ) 3042 self.ignoreExpr = ignore 3043 self.mayReturnEmpty = True 3044 self.mayIndexError = False 3045 self.includeMatch = include 3046 self.asList = False 3047 if isinstance(failOn, basestring): 3048 self.failOn = Literal(failOn) 3049 else: 3050 self.failOn = failOn 3051 self.errmsg = "No match found for "+_ustr(self.expr)
3052
3053 - def parseImpl( self, instring, loc, doActions=True ):
3054 startloc = loc 3055 instrlen = len(instring) 3056 expr = self.expr 3057 expr_parse = self.expr._parse 3058 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 3059 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 3060 3061 tmploc = loc 3062 while tmploc <= instrlen: 3063 if self_failOn_canParseNext is not None: 3064 # break if failOn expression matches 3065 if self_failOn_canParseNext(instring, tmploc): 3066 break 3067 3068 if self_ignoreExpr_tryParse is not None: 3069 # advance past ignore expressions 3070 while 1: 3071 try: 3072 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 3073 except ParseBaseException: 3074 break 3075 3076 try: 3077 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 3078 except (ParseException, IndexError): 3079 # no match, advance loc in string 3080 tmploc += 1 3081 else: 3082 # matched skipto expr, done 3083 break 3084 3085 else: 3086 # ran off the end of the input string without matching skipto expr, fail 3087 raise ParseException(instring, loc, self.errmsg, self) 3088 3089 # build up return values 3090 loc = tmploc 3091 skiptext = instring[startloc:loc] 3092 skipresult = ParseResults(skiptext) 3093 3094 if self.includeMatch: 3095 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 3096 skipresult += mat 3097 3098 return loc, skipresult
3099
3100 -class Forward(ParseElementEnhance):
3101 """Forward declaration of an expression to be defined later - 3102 used for recursive grammars, such as algebraic infix notation. 3103 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 3104 3105 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3106 Specifically, '|' has a lower precedence than '<<', so that:: 3107 fwdExpr << a | b | c 3108 will actually be evaluated as:: 3109 (fwdExpr << a) | b | c 3110 thereby leaving b and c out as parseable alternatives. It is recommended that you 3111 explicitly group the values inserted into the C{Forward}:: 3112 fwdExpr << (a | b | c) 3113 Converting to use the '<<=' operator instead will avoid this problem. 3114 """
3115 - def __init__( self, other=None ):
3116 super(Forward,self).__init__( other, savelist=False )
3117
3118 - def __lshift__( self, other ):
3119 if isinstance( other, basestring ): 3120 other = ParserElement.literalStringClass(other) 3121 self.expr = other 3122 self.strRepr = None 3123 self.mayIndexError = self.expr.mayIndexError 3124 self.mayReturnEmpty = self.expr.mayReturnEmpty 3125 self.setWhitespaceChars( self.expr.whiteChars ) 3126 self.skipWhitespace = self.expr.skipWhitespace 3127 self.saveAsList = self.expr.saveAsList 3128 self.ignoreExprs.extend(self.expr.ignoreExprs) 3129 return self
3130
3131 - def __ilshift__(self, other):
3132 return self << other
3133
3134 - def leaveWhitespace( self ):
3135 self.skipWhitespace = False 3136 return self
3137
3138 - def streamline( self ):
3139 if not self.streamlined: 3140 self.streamlined = True 3141 if self.expr is not None: 3142 self.expr.streamline() 3143 return self
3144
3145 - def validate( self, validateTrace=[] ):
3146 if self not in validateTrace: 3147 tmp = validateTrace[:]+[self] 3148 if self.expr is not None: 3149 self.expr.validate(tmp) 3150 self.checkRecursion([])
3151
3152 - def __str__( self ):
3153 if hasattr(self,"name"): 3154 return self.name 3155 return self.__class__.__name__ + ": ..." 3156 3157 # stubbed out for now - creates awful memory and perf issues 3158 self._revertClass = self.__class__ 3159 self.__class__ = _ForwardNoRecurse 3160 try: 3161 if self.expr is not None: 3162 retString = _ustr(self.expr) 3163 else: 3164 retString = "None" 3165 finally: 3166 self.__class__ = self._revertClass 3167 return self.__class__.__name__ + ": " + retString
3168
3169 - def copy(self):
3170 if self.expr is not None: 3171 return super(Forward,self).copy() 3172 else: 3173 ret = Forward() 3174 ret <<= self 3175 return ret
3176
3177 -class _ForwardNoRecurse(Forward):
3178 - def __str__( self ):
3179 return "..."
3180
3181 -class TokenConverter(ParseElementEnhance):
3182 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3183 - def __init__( self, expr, savelist=False ):
3184 super(TokenConverter,self).__init__( expr )#, savelist ) 3185 self.saveAsList = False
3186
3187 -class Combine(TokenConverter):
3188 """Converter to concatenate all matching tokens to a single string. 3189 By default, the matching patterns must also be contiguous in the input string; 3190 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3191 """
3192 - def __init__( self, expr, joinString="", adjacent=True ):
3193 super(Combine,self).__init__( expr ) 3194 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3195 if adjacent: 3196 self.leaveWhitespace() 3197 self.adjacent = adjacent 3198 self.skipWhitespace = True 3199 self.joinString = joinString 3200 self.callPreparse = True
3201
3202 - def ignore( self, other ):
3203 if self.adjacent: 3204 ParserElement.ignore(self, other) 3205 else: 3206 super( Combine, self).ignore( other ) 3207 return self
3208
3209 - def postParse( self, instring, loc, tokenlist ):
3210 retToks = tokenlist.copy() 3211 del retToks[:] 3212 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3213 3214 if self.resultsName and retToks.haskeys(): 3215 return [ retToks ] 3216 else: 3217 return retToks
3218
3219 -class Group(TokenConverter):
3220 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3221 - def __init__( self, expr ):
3222 super(Group,self).__init__( expr ) 3223 self.saveAsList = True
3224
3225 - def postParse( self, instring, loc, tokenlist ):
3226 return [ tokenlist ]
3227
3228 -class Dict(TokenConverter):
3229 """Converter to return a repetitive expression as a list, but also as a dictionary. 3230 Each element can also be referenced using the first token in the expression as its key. 3231 Useful for tabular report scraping when the first column can be used as a item key. 3232 """
3233 - def __init__( self, expr ):
3234 super(Dict,self).__init__( expr ) 3235 self.saveAsList = True
3236
3237 - def postParse( self, instring, loc, tokenlist ):
3238 for i,tok in enumerate(tokenlist): 3239 if len(tok) == 0: 3240 continue 3241 ikey = tok[0] 3242 if isinstance(ikey,int): 3243 ikey = _ustr(tok[0]).strip() 3244 if len(tok)==1: 3245 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3246 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3247 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3248 else: 3249 dictvalue = tok.copy() #ParseResults(i) 3250 del dictvalue[0] 3251 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3252 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3253 else: 3254 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3255 3256 if self.resultsName: 3257 return [ tokenlist ] 3258 else: 3259 return tokenlist
3260
3261 3262 -class Suppress(TokenConverter):
3263 """Converter for ignoring the results of a parsed expression."""
3264 - def postParse( self, instring, loc, tokenlist ):
3265 return []
3266
3267 - def suppress( self ):
3268 return self
3269
3270 3271 -class OnlyOnce(object):
3272 """Wrapper for parse actions, to ensure they are only called once."""
3273 - def __init__(self, methodCall):
3274 self.callable = _trim_arity(methodCall) 3275 self.called = False
3276 - def __call__(self,s,l,t):
3277 if not self.called: 3278 results = self.callable(s,l,t) 3279 self.called = True 3280 return results 3281 raise ParseException(s,l,"")
3282 - def reset(self):
3283 self.called = False
3284
3285 -def traceParseAction(f):
3286 """Decorator for debugging parse actions.""" 3287 f = _trim_arity(f) 3288 def z(*paArgs): 3289 thisFunc = f.__name__ 3290 s,l,t = paArgs[-3:] 3291 if len(paArgs)>3: 3292 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3293 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3294 try: 3295 ret = f(*paArgs) 3296 except Exception as exc: 3297 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3298 raise 3299 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3300 return ret
3301 try: 3302 z.__name__ = f.__name__ 3303 except AttributeError: 3304 pass 3305 return z 3306
3307 # 3308 # global helpers 3309 # 3310 -def delimitedList( expr, delim=",", combine=False ):
3311 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3312 By default, the list elements and delimiters can have intervening whitespace, and 3313 comments, but this can be overridden by passing C{combine=True} in the constructor. 3314 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3315 string, with the delimiters included; otherwise, the matching tokens are returned 3316 as a list of tokens, with the delimiters suppressed. 3317 """ 3318 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3319 if combine: 3320 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3321 else: 3322 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3323
3324 -def countedArray( expr, intExpr=None ):
3325 """Helper to define a counted list of expressions. 3326 This helper defines a pattern of the form:: 3327 integer expr expr expr... 3328 where the leading integer tells how many expr expressions follow. 3329 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3330 """ 3331 arrayExpr = Forward() 3332 def countFieldParseAction(s,l,t): 3333 n = t[0] 3334 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3335 return []
3336 if intExpr is None: 3337 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3338 else: 3339 intExpr = intExpr.copy() 3340 intExpr.setName("arrayLen") 3341 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3342 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 3343
3344 -def _flatten(L):
3345 ret = [] 3346 for i in L: 3347 if isinstance(i,list): 3348 ret.extend(_flatten(i)) 3349 else: 3350 ret.append(i) 3351 return ret
3352
3353 -def matchPreviousLiteral(expr):
3354 """Helper to define an expression that is indirectly defined from 3355 the tokens matched in a previous expression, that is, it looks 3356 for a 'repeat' of a previous expression. For example:: 3357 first = Word(nums) 3358 second = matchPreviousLiteral(first) 3359 matchExpr = first + ":" + second 3360 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3361 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3362 If this is not desired, use C{matchPreviousExpr}. 3363 Do *not* use with packrat parsing enabled. 3364 """ 3365 rep = Forward() 3366 def copyTokenToRepeater(s,l,t): 3367 if t: 3368 if len(t) == 1: 3369 rep << t[0] 3370 else: 3371 # flatten t tokens 3372 tflat = _flatten(t.asList()) 3373 rep << And(Literal(tt) for tt in tflat) 3374 else: 3375 rep << Empty()
3376 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3377 rep.setName('(prev) ' + _ustr(expr)) 3378 return rep 3379
3380 -def matchPreviousExpr(expr):
3381 """Helper to define an expression that is indirectly defined from 3382 the tokens matched in a previous expression, that is, it looks 3383 for a 'repeat' of a previous expression. For example:: 3384 first = Word(nums) 3385 second = matchPreviousExpr(first) 3386 matchExpr = first + ":" + second 3387 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3388 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3389 the expressions are evaluated first, and then compared, so 3390 C{"1"} is compared with C{"10"}. 3391 Do *not* use with packrat parsing enabled. 3392 """ 3393 rep = Forward() 3394 e2 = expr.copy() 3395 rep <<= e2 3396 def copyTokenToRepeater(s,l,t): 3397 matchTokens = _flatten(t.asList()) 3398 def mustMatchTheseTokens(s,l,t): 3399 theseTokens = _flatten(t.asList()) 3400 if theseTokens != matchTokens: 3401 raise ParseException("",0,"")
3402 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3403 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3404 rep.setName('(prev) ' + _ustr(expr)) 3405 return rep 3406
3407 -def _escapeRegexRangeChars(s):
3408 #~ escape these chars: ^-] 3409 for c in r"\^-]": 3410 s = s.replace(c,_bslash+c) 3411 s = s.replace("\n",r"\n") 3412 s = s.replace("\t",r"\t") 3413 return _ustr(s)
3414
3415 -def oneOf( strs, caseless=False, useRegex=True ):
3416 """Helper to quickly define a set of alternative Literals, and makes sure to do 3417 longest-first testing when there is a conflict, regardless of the input order, 3418 but returns a C{L{MatchFirst}} for best performance. 3419 3420 Parameters: 3421 - strs - a string of space-delimited literals, or a list of string literals 3422 - caseless - (default=False) - treat all literals as caseless 3423 - useRegex - (default=True) - as an optimization, will generate a Regex 3424 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3425 if creating a C{Regex} raises an exception) 3426 """ 3427 if caseless: 3428 isequal = ( lambda a,b: a.upper() == b.upper() ) 3429 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3430 parseElementClass = CaselessLiteral 3431 else: 3432 isequal = ( lambda a,b: a == b ) 3433 masks = ( lambda a,b: b.startswith(a) ) 3434 parseElementClass = Literal 3435 3436 symbols = [] 3437 if isinstance(strs,basestring): 3438 symbols = strs.split() 3439 elif isinstance(strs, collections.Sequence): 3440 symbols = list(strs[:]) 3441 elif isinstance(strs, _generatorType): 3442 symbols = list(strs) 3443 else: 3444 warnings.warn("Invalid argument to oneOf, expected string or list", 3445 SyntaxWarning, stacklevel=2) 3446 if not symbols: 3447 return NoMatch() 3448 3449 i = 0 3450 while i < len(symbols)-1: 3451 cur = symbols[i] 3452 for j,other in enumerate(symbols[i+1:]): 3453 if ( isequal(other, cur) ): 3454 del symbols[i+j+1] 3455 break 3456 elif ( masks(cur, other) ): 3457 del symbols[i+j+1] 3458 symbols.insert(i,other) 3459 cur = other 3460 break 3461 else: 3462 i += 1 3463 3464 if not caseless and useRegex: 3465 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3466 try: 3467 if len(symbols)==len("".join(symbols)): 3468 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3469 else: 3470 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3471 except: 3472 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3473 SyntaxWarning, stacklevel=2) 3474 3475 3476 # last resort, just use MatchFirst 3477 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3478
3479 -def dictOf( key, value ):
3480 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3481 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3482 in the proper order. The key pattern can include delimiting markers or punctuation, 3483 as long as they are suppressed, thereby leaving the significant key text. The value 3484 pattern can include named results, so that the C{Dict} results can include named token 3485 fields. 3486 """ 3487 return Dict( ZeroOrMore( Group ( key + value ) ) )
3488
3489 -def originalTextFor(expr, asString=True):
3490 """Helper to return the original, untokenized text for a given expression. Useful to 3491 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3492 revert separate tokens with intervening whitespace back to the original matching 3493 input text. By default, returns astring containing the original parsed text. 3494 3495 If the optional C{asString} argument is passed as C{False}, then the return value is a 3496 C{L{ParseResults}} containing any results names that were originally matched, and a 3497 single token containing the original matched text from the input string. So if 3498 the expression passed to C{L{originalTextFor}} contains expressions with defined 3499 results names, you must set C{asString} to C{False} if you want to preserve those 3500 results name values.""" 3501 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3502 endlocMarker = locMarker.copy() 3503 endlocMarker.callPreparse = False 3504 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3505 if asString: 3506 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3507 else: 3508 def extractText(s,l,t): 3509 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3510 matchExpr.setParseAction(extractText) 3511 return matchExpr 3512
3513 -def ungroup(expr):
3514 """Helper to undo pyparsing's default grouping of And expressions, even 3515 if all but one are non-empty.""" 3516 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3517
3518 -def locatedExpr(expr):
3519 """Helper to decorate a returned token with its starting and ending locations in the input string. 3520 This helper adds the following results names: 3521 - locn_start = location where matched expression begins 3522 - locn_end = location where matched expression ends 3523 - value = the actual parsed results 3524 3525 Be careful if the input text contains C{<TAB>} characters, you may want to call 3526 C{L{ParserElement.parseWithTabs}} 3527 """ 3528 locator = Empty().setParseAction(lambda s,l,t: l) 3529 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3530 3531 3532 # convenience constants for positional expressions 3533 empty = Empty().setName("empty") 3534 lineStart = LineStart().setName("lineStart") 3535 lineEnd = LineEnd().setName("lineEnd") 3536 stringStart = StringStart().setName("stringStart") 3537 stringEnd = StringEnd().setName("stringEnd") 3538 3539 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3540 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3541 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3542 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3543 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3544 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3545 3546 -def srange(s):
3547 r"""Helper to easily define string ranges for use in Word construction. Borrows 3548 syntax from regexp '[]' string range definitions:: 3549 srange("[0-9]") -> "0123456789" 3550 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3551 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3552 The input string must be enclosed in []'s, and the returned string is the expanded 3553 character set joined into a single string. 3554 The values enclosed in the []'s may be:: 3555 a single character 3556 an escaped character with a leading backslash (such as \- or \]) 3557 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3558 (\0x## is also supported for backwards compatibility) 3559 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3560 a range of any of the above, separated by a dash ('a-z', etc.) 3561 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3562 """ 3563 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3564 try: 3565 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3566 except: 3567 return ""
3568
3569 -def matchOnlyAtCol(n):
3570 """Helper method for defining parse actions that require matching at a specific 3571 column in the input text. 3572 """ 3573 def verifyCol(strg,locn,toks): 3574 if col(locn,strg) != n: 3575 raise ParseException(strg,locn,"matched token not at column %d" % n)
3576 return verifyCol 3577
3578 -def replaceWith(replStr):
3579 """Helper method for common parse actions that simply return a literal value. Especially 3580 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3581 """ 3582 return lambda s,l,t: [replStr]
3583
3584 -def removeQuotes(s,l,t):
3585 """Helper parse action for removing quotation marks from parsed quoted strings. 3586 To use, add this parse action to quoted string using:: 3587 quotedString.setParseAction( removeQuotes ) 3588 """ 3589 return t[0][1:-1]
3590
3591 -def upcaseTokens(s,l,t):
3592 """Helper parse action to convert tokens to upper case.""" 3593 return [ tt.upper() for tt in map(_ustr,t) ]
3594
3595 -def downcaseTokens(s,l,t):
3596 """Helper parse action to convert tokens to lower case.""" 3597 return [ tt.lower() for tt in map(_ustr,t) ]
3598
3599 -def _makeTags(tagStr, xml):
3600 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3601 if isinstance(tagStr,basestring): 3602 resname = tagStr 3603 tagStr = Keyword(tagStr, caseless=not xml) 3604 else: 3605 resname = tagStr.name 3606 3607 tagAttrName = Word(alphas,alphanums+"_-:") 3608 if (xml): 3609 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3610 openTag = Suppress("<") + tagStr("tag") + \ 3611 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3612 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3613 else: 3614 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3615 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3616 openTag = Suppress("<") + tagStr("tag") + \ 3617 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3618 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3619 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3620 closeTag = Combine(_L("</") + tagStr + ">") 3621 3622 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 3623 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 3624 openTag.tag = resname 3625 closeTag.tag = resname 3626 return openTag, closeTag
3627
3628 -def makeHTMLTags(tagStr):
3629 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3630 return _makeTags( tagStr, False )
3631
3632 -def makeXMLTags(tagStr):
3633 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3634 return _makeTags( tagStr, True )
3635
3636 -def withAttribute(*args,**attrDict):
3637 """Helper to create a validating parse action to be used with start tags created 3638 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3639 with a required attribute value, to avoid false matches on common tags such as 3640 C{<TD>} or C{<DIV>}. 3641 3642 Call C{withAttribute} with a series of attribute names and values. Specify the list 3643 of filter attributes names and values as: 3644 - keyword arguments, as in C{(align="right")}, or 3645 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3646 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3647 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3648 For attribute names with a namespace prefix, you must use the second form. Attribute 3649 names are matched insensitive to upper/lower case. 3650 3651 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3652 3653 To verify that the attribute exists, but without specifying a value, pass 3654 C{withAttribute.ANY_VALUE} as the value. 3655 """ 3656 if args: 3657 attrs = args[:] 3658 else: 3659 attrs = attrDict.items() 3660 attrs = [(k,v) for k,v in attrs] 3661 def pa(s,l,tokens): 3662 for attrName,attrValue in attrs: 3663 if attrName not in tokens: 3664 raise ParseException(s,l,"no matching attribute " + attrName) 3665 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3666 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3667 (attrName, tokens[attrName], attrValue))
3668 return pa 3669 withAttribute.ANY_VALUE = object()
3670 3671 -def withClass(classname, namespace=''):
3672 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3673 difficult because C{class} is a reserved word in Python. 3674 """ 3675 classattr = "%s:class" % namespace if namespace else "class" 3676 return withAttribute(**{classattr : classname})
3677 3678 opAssoc = _Constants() 3679 opAssoc.LEFT = object() 3680 opAssoc.RIGHT = object()
3681 3682 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3683 """Helper method for constructing grammars of expressions made up of 3684 operators working in a precedence hierarchy. Operators may be unary or 3685 binary, left- or right-associative. Parse actions can also be attached 3686 to operator expressions. 3687 3688 Parameters: 3689 - baseExpr - expression representing the most basic element for the nested 3690 - opList - list of tuples, one for each operator precedence level in the 3691 expression grammar; each tuple is of the form 3692 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3693 - opExpr is the pyparsing expression for the operator; 3694 may also be a string, which will be converted to a Literal; 3695 if numTerms is 3, opExpr is a tuple of two expressions, for the 3696 two operators separating the 3 terms 3697 - numTerms is the number of terms for this operator (must 3698 be 1, 2, or 3) 3699 - rightLeftAssoc is the indicator whether the operator is 3700 right or left associative, using the pyparsing-defined 3701 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3702 - parseAction is the parse action to be associated with 3703 expressions matching this operator expression (the 3704 parse action tuple member may be omitted) 3705 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3706 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3707 """ 3708 ret = Forward() 3709 lastExpr = baseExpr | ( lpar + ret + rpar ) 3710 for i,operDef in enumerate(opList): 3711 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3712 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 3713 if arity == 3: 3714 if opExpr is None or len(opExpr) != 2: 3715 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3716 opExpr1, opExpr2 = opExpr 3717 thisExpr = Forward().setName(termName) 3718 if rightLeftAssoc == opAssoc.LEFT: 3719 if arity == 1: 3720 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3721 elif arity == 2: 3722 if opExpr is not None: 3723 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3724 else: 3725 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3726 elif arity == 3: 3727 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3728 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3729 else: 3730 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3731 elif rightLeftAssoc == opAssoc.RIGHT: 3732 if arity == 1: 3733 # try to avoid LR with this extra test 3734 if not isinstance(opExpr, Optional): 3735 opExpr = Optional(opExpr) 3736 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3737 elif arity == 2: 3738 if opExpr is not None: 3739 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3740 else: 3741 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3742 elif arity == 3: 3743 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3744 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3745 else: 3746 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3747 else: 3748 raise ValueError("operator must indicate right or left associativity") 3749 if pa: 3750 matchExpr.setParseAction( pa ) 3751 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 3752 lastExpr = thisExpr 3753 ret <<= lastExpr 3754 return ret
3755 operatorPrecedence = infixNotation 3756 3757 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 3758 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 3759 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 3760 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 3761 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3762 3763 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3764 """Helper method for defining nested lists enclosed in opening and closing 3765 delimiters ("(" and ")" are the default). 3766 3767 Parameters: 3768 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3769 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3770 - content - expression for items within the nested lists (default=None) 3771 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3772 3773 If an expression is not provided for the content argument, the nested 3774 expression will capture all whitespace-delimited content between delimiters 3775 as a list of separate values. 3776 3777 Use the C{ignoreExpr} argument to define expressions that may contain 3778 opening or closing characters that should not be treated as opening 3779 or closing characters for nesting, such as quotedString or a comment 3780 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3781 The default is L{quotedString}, but if no expressions are to be ignored, 3782 then pass C{None} for this argument. 3783 """ 3784 if opener == closer: 3785 raise ValueError("opening and closing strings cannot be the same") 3786 if content is None: 3787 if isinstance(opener,basestring) and isinstance(closer,basestring): 3788 if len(opener) == 1 and len(closer)==1: 3789 if ignoreExpr is not None: 3790 content = (Combine(OneOrMore(~ignoreExpr + 3791 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3792 ).setParseAction(lambda t:t[0].strip())) 3793 else: 3794 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3795 ).setParseAction(lambda t:t[0].strip())) 3796 else: 3797 if ignoreExpr is not None: 3798 content = (Combine(OneOrMore(~ignoreExpr + 3799 ~Literal(opener) + ~Literal(closer) + 3800 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3801 ).setParseAction(lambda t:t[0].strip())) 3802 else: 3803 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3804 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3805 ).setParseAction(lambda t:t[0].strip())) 3806 else: 3807 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3808 ret = Forward() 3809 if ignoreExpr is not None: 3810 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3811 else: 3812 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3813 ret.setName('nested %s%s expression' % (opener,closer)) 3814 return ret
3815
3816 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3817 """Helper method for defining space-delimited indentation blocks, such as 3818 those used to define block statements in Python source code. 3819 3820 Parameters: 3821 - blockStatementExpr - expression defining syntax of statement that 3822 is repeated within the indented block 3823 - indentStack - list created by caller to manage indentation stack 3824 (multiple statementWithIndentedBlock expressions within a single grammar 3825 should share a common indentStack) 3826 - indent - boolean indicating whether block must be indented beyond the 3827 the current level; set to False for block of left-most statements 3828 (default=True) 3829 3830 A valid block must contain at least one C{blockStatement}. 3831 """ 3832 def checkPeerIndent(s,l,t): 3833 if l >= len(s): return 3834 curCol = col(l,s) 3835 if curCol != indentStack[-1]: 3836 if curCol > indentStack[-1]: 3837 raise ParseFatalException(s,l,"illegal nesting") 3838 raise ParseException(s,l,"not a peer entry")
3839 3840 def checkSubIndent(s,l,t): 3841 curCol = col(l,s) 3842 if curCol > indentStack[-1]: 3843 indentStack.append( curCol ) 3844 else: 3845 raise ParseException(s,l,"not a subentry") 3846 3847 def checkUnindent(s,l,t): 3848 if l >= len(s): return 3849 curCol = col(l,s) 3850 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3851 raise ParseException(s,l,"not an unindent") 3852 indentStack.pop() 3853 3854 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3855 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 3856 PEER = Empty().setParseAction(checkPeerIndent).setName('') 3857 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 3858 if indent: 3859 smExpr = Group( Optional(NL) + 3860 #~ FollowedBy(blockStatementExpr) + 3861 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3862 else: 3863 smExpr = Group( Optional(NL) + 3864 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3865 blockStatementExpr.ignore(_bslash + LineEnd()) 3866 return smExpr.setName('indented block') 3867 3868 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3869 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3870 3871 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 3872 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 3873 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3874 -def replaceHTMLEntity(t):
3875 """Helper parser action to replace common HTML entities with their special characters""" 3876 return _htmlEntityMap.get(t.entity)
3877 3878 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3879 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 3880 3881 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 3882 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 3883 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 3884 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 3885 3886 javaStyleComment = cppStyleComment 3887 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3888 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3889 Optional( Word(" \t") + 3890 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3891 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3892 3893 # some other useful expressions - using lower-case class name since we are really using this as a namespace 3894 -class pyparsing_common:
3895 """ 3896 Here are some common low-level expressions that may be useful in jump-starting parser development: 3897 - numeric forms (integers, reals, scientific notation) 3898 - parse actions for converting numeric strings to Python int and/or float types 3899 - common programming identifiers 3900 """ 3901
3902 - def convertToInteger(t):
3903 """ 3904 Parse action for converting parsed integers to Python int 3905 """ 3906 return int(t[0])
3907
3908 - def convertToFloat(t):
3909 """ 3910 Parse action for converting parsed numbers to Python float 3911 """ 3912 return float(t[0])
3913 3914 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 3915 """expression that parses an unsigned integer and returns an int""" 3916 3917 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 3918 """expression that parses an integer with optional leading sign and returns an int""" 3919 3920 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 3921 """expression that parses a floating point number and returns a float""" 3922 3923 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientfic notation").setParseAction(convertToFloat) 3924 """expression that parses a floating point number with optional scientfic notation and returns a float""" 3925 3926 # streamlining this expression makes the docs nicer-looking 3927 numeric = (sciReal | real | signedInteger).streamline() 3928 """any numeric expression, returns the corresponding Python type""" 3929 3930 number = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("number").setParseAction(convertToFloat) 3931 """any int or real number, returned as float""" 3932 3933 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 3934 """typical code identifier"""
3935 3936 3937 if __name__ == "__main__": 3938 3939 selectToken = CaselessLiteral("select") 3940 fromToken = CaselessLiteral("from") 3941 3942 ident = Word(alphas, alphanums + "_$") 3943 3944 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 3945 columnNameList = Group(delimitedList(columnName)).setName("columns") 3946 columnSpec = ('*' | columnNameList) 3947 3948 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 3949 tableNameList = Group(delimitedList(tableName)).setName("tables") 3950 3951 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 3952 3953 # demo runTests method, including embedded comments in test string 3954 simpleSQL.runTests(""" 3955 # '*' as column list and dotted table name 3956 select * from SYS.XYZZY 3957 3958 # caseless match on "SELECT", and casts back to "select" 3959 SELECT * from XYZZY, ABC 3960 3961 # list of column names, and mixed case SELECT keyword 3962 Select AA,BB,CC from Sys.dual 3963 3964 # multiple tables 3965 Select A, B, C from Sys.dual, Table2 3966 3967 # invalid SELECT keyword - should fail 3968 Xelect A, B, C from Sys.dual 3969 3970 # incomplete command - should fail 3971 Select 3972 3973 # invalid column name - should fail 3974 Select ^^^ frox Sys.dual 3975 3976 """) 3977 3978 pyparsing_common.numeric.runTests(""" 3979 100 3980 -100 3981 +100 3982 3.14159 3983 6.02e23 3984 1e-12 3985 """) 3986 3987 # any int or real number, returned as float 3988 pyparsing_common.number.runTests(""" 3989 100 3990 -100 3991 +100 3992 3.14159 3993 6.02e23 3994 1e-12 3995 """) 3996