1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from L{I{ParserElement.parseString}<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.8"
61 __versionTime__ = "14 Aug 2016 08:43 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import traceback
74 import types
75 from datetime import datetime
76
77 try:
78 from _thread import RLock
79 except ImportError:
80 from threading import RLock
81
82 try:
83 from collections import OrderedDict as _OrderedDict
84 except ImportError:
85 try:
86 from ordereddict import OrderedDict as _OrderedDict
87 except ImportError:
88 _OrderedDict = None
89
90
91
92 __all__ = [
93 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
94 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
95 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
96 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
97 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
98 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
99 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
100 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
101 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
102 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
103 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
104 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
105 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
106 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
107 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
108 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
109 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
110 'tokenMap', 'pyparsing_common',
111 ]
112
113 system_version = tuple(sys.version_info)[:3]
114 PY_3 = system_version[0] == 3
115 if PY_3:
116 _MAX_INT = sys.maxsize
117 basestring = str
118 unichr = chr
119 _ustr = str
120
121
122 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
123
124 else:
125 _MAX_INT = sys.maxint
126 range = xrange
129 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
130 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
131 then < returns the unicode object | encodes it with the default encoding | ... >.
132 """
133 if isinstance(obj,unicode):
134 return obj
135
136 try:
137
138
139 return str(obj)
140
141 except UnicodeEncodeError:
142
143 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
144 xmlcharref = Regex('&#\d+;')
145 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
146 return xmlcharref.transformString(ret)
147
148
149 singleArgBuiltins = []
150 import __builtin__
151 for fname in "sum len sorted reversed list tuple set any all min max".split():
152 try:
153 singleArgBuiltins.append(getattr(__builtin__,fname))
154 except AttributeError:
155 continue
156
157 _generatorType = type((y for y in range(1)))
160 """Escape &, <, >, ", ', etc. in a string of data."""
161
162
163 from_symbols = '&><"\''
164 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
165 for from_,to_ in zip(from_symbols, to_symbols):
166 data = data.replace(from_, to_)
167 return data
168
171
172 alphas = string.ascii_uppercase + string.ascii_lowercase
173 nums = "0123456789"
174 hexnums = nums + "ABCDEFabcdef"
175 alphanums = alphas + nums
176 _bslash = chr(92)
177 printables = "".join(c for c in string.printable if c not in string.whitespace)
180 """base exception class for all parsing runtime exceptions"""
181
182
183 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
184 self.loc = loc
185 if msg is None:
186 self.msg = pstr
187 self.pstr = ""
188 else:
189 self.msg = msg
190 self.pstr = pstr
191 self.parserElement = elem
192 self.args = (pstr, loc, msg)
193
194 @classmethod
196 """
197 internal factory method to simplify creating one type of ParseException
198 from another - avoids having __init__ signature conflicts among subclasses
199 """
200 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
201
203 """supported attributes by name are:
204 - lineno - returns the line number of the exception text
205 - col - returns the column number of the exception text
206 - line - returns the line containing the exception text
207 """
208 if( aname == "lineno" ):
209 return lineno( self.loc, self.pstr )
210 elif( aname in ("col", "column") ):
211 return col( self.loc, self.pstr )
212 elif( aname == "line" ):
213 return line( self.loc, self.pstr )
214 else:
215 raise AttributeError(aname)
216
218 return "%s (at char %d), (line:%d, col:%d)" % \
219 ( self.msg, self.loc, self.lineno, self.column )
233 return "lineno col line".split() + dir(type(self))
234
236 """
237 Exception thrown when parse expressions don't match class;
238 supported attributes by name are:
239 - lineno - returns the line number of the exception text
240 - col - returns the column number of the exception text
241 - line - returns the line containing the exception text
242
243 Example::
244 try:
245 Word(nums).setName("integer").parseString("ABC")
246 except ParseException as pe:
247 print(pe)
248 print("column: {}".format(pe.col))
249
250 prints::
251 Expected integer (at char 0), (line:1, col:1)
252 column: 1
253 """
254 pass
255
257 """user-throwable exception thrown when inconsistent parse content
258 is found; stops all parsing immediately"""
259 pass
260
262 """just like L{ParseFatalException}, but thrown internally when an
263 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
264 immediately because an unbacktrackable syntax error has been found"""
265 pass
266
281 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
282 - def __init__( self, parseElementList ):
283 self.parseElementTrace = parseElementList
284
286 return "RecursiveGrammarException: %s" % self.parseElementTrace
287
294 return repr(self.tup)
296 self.tup = (self.tup[0],i)
297
299 """
300 Structured parse results, to provide multiple means of access to the parsed data:
301 - as a list (C{len(results)})
302 - by list index (C{results[0], results[1]}, etc.)
303 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
304
305 Example::
306 integer = Word(nums)
307 date_str = (integer.setResultsName("year") + '/'
308 + integer.setResultsName("month") + '/'
309 + integer.setResultsName("day"))
310 # equivalent form:
311 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
312
313 result = date_str.parseString("1999/12/31")
314
315 def test(s, fn=repr):
316 print("%s -> %s" % (s, fn(eval(s))))
317 test("list(result)")
318 test("result[0]")
319 test("result['month']")
320 test("result.day")
321 test("'month' in result")
322 test("'minutes' in result")
323 test("result.dump()", str)
324 prints::
325 list(result) -> ['1999', '/', '12', '/', '31']
326 result[0] -> '1999'
327 result['month'] -> '12'
328 result.day -> '31'
329 'month' in result -> True
330 'minutes' in result -> False
331 result.dump() -> ['1999', '/', '12', '/', '31']
332 - day: 31
333 - month: 12
334 - year: 1999
335 """
336 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
337 if isinstance(toklist, cls):
338 return toklist
339 retobj = object.__new__(cls)
340 retobj.__doinit = True
341 return retobj
342
343
344
345 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
346 if self.__doinit:
347 self.__doinit = False
348 self.__name = None
349 self.__parent = None
350 self.__accumNames = {}
351 self.__asList = asList
352 self.__modal = modal
353 if toklist is None:
354 toklist = []
355 if isinstance(toklist, list):
356 self.__toklist = toklist[:]
357 elif isinstance(toklist, _generatorType):
358 self.__toklist = list(toklist)
359 else:
360 self.__toklist = [toklist]
361 self.__tokdict = dict()
362
363 if name is not None and name:
364 if not modal:
365 self.__accumNames[name] = 0
366 if isinstance(name,int):
367 name = _ustr(name)
368 self.__name = name
369 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
370 if isinstance(toklist,basestring):
371 toklist = [ toklist ]
372 if asList:
373 if isinstance(toklist,ParseResults):
374 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
375 else:
376 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
377 self[name].__name = name
378 else:
379 try:
380 self[name] = toklist[0]
381 except (KeyError,TypeError,IndexError):
382 self[name] = toklist
383
385 if isinstance( i, (int,slice) ):
386 return self.__toklist[i]
387 else:
388 if i not in self.__accumNames:
389 return self.__tokdict[i][-1][0]
390 else:
391 return ParseResults([ v[0] for v in self.__tokdict[i] ])
392
394 if isinstance(v,_ParseResultsWithOffset):
395 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
396 sub = v[0]
397 elif isinstance(k,(int,slice)):
398 self.__toklist[k] = v
399 sub = v
400 else:
401 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
402 sub = v
403 if isinstance(sub,ParseResults):
404 sub.__parent = wkref(self)
405
407 if isinstance(i,(int,slice)):
408 mylen = len( self.__toklist )
409 del self.__toklist[i]
410
411
412 if isinstance(i, int):
413 if i < 0:
414 i += mylen
415 i = slice(i, i+1)
416
417 removed = list(range(*i.indices(mylen)))
418 removed.reverse()
419
420 for name,occurrences in self.__tokdict.items():
421 for j in removed:
422 for k, (value, position) in enumerate(occurrences):
423 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
424 else:
425 del self.__tokdict[i]
426
428 return k in self.__tokdict
429
430 - def __len__( self ): return len( self.__toklist )
431 - def __bool__(self): return ( not not self.__toklist )
432 __nonzero__ = __bool__
433 - def __iter__( self ): return iter( self.__toklist )
434 - def __reversed__( self ): return iter( self.__toklist[::-1] )
436 if hasattr(self.__tokdict, "iterkeys"):
437 return self.__tokdict.iterkeys()
438 else:
439 return iter(self.__tokdict)
440
442 return (self[k] for k in self._iterkeys())
443
445 return ((k, self[k]) for k in self._iterkeys())
446
447 if PY_3:
448 keys = _iterkeys
449 """Returns an iterator of all named result keys (Python 3.x only)."""
450
451 values = _itervalues
452 """Returns an iterator of all named result values (Python 3.x only)."""
453
454 items = _iteritems
455 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
456
457 else:
458 iterkeys = _iterkeys
459 """Returns an iterator of all named result keys (Python 2.x only)."""
460
461 itervalues = _itervalues
462 """Returns an iterator of all named result values (Python 2.x only)."""
463
464 iteritems = _iteritems
465 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
466
468 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
469 return list(self.iterkeys())
470
472 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
473 return list(self.itervalues())
474
476 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
477 return list(self.iteritems())
478
480 """Since keys() returns an iterator, this method is helpful in bypassing
481 code that looks for the existence of any defined results names."""
482 return bool(self.__tokdict)
483
484 - def pop( self, *args, **kwargs):
485 """
486 Removes and returns item at specified index (default=C{last}).
487 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
488 argument or an integer argument, it will use C{list} semantics
489 and pop tokens from the list of parsed tokens. If passed a
490 non-integer argument (most likely a string), it will use C{dict}
491 semantics and pop the corresponding value from any defined
492 results names. A second default return value argument is
493 supported, just as in C{dict.pop()}.
494
495 Example::
496 def remove_first(tokens):
497 tokens.pop(0)
498 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
499 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
500
501 label = Word(alphas)
502 patt = label("LABEL") + OneOrMore(Word(nums))
503 print(patt.parseString("AAB 123 321").dump())
504
505 # Use pop() in a parse action to remove named result (note that corresponding value is not
506 # removed from list form of results)
507 def remove_LABEL(tokens):
508 tokens.pop("LABEL")
509 return tokens
510 patt.addParseAction(remove_LABEL)
511 print(patt.parseString("AAB 123 321").dump())
512 prints::
513 ['AAB', '123', '321']
514 - LABEL: AAB
515
516 ['AAB', '123', '321']
517 """
518 if not args:
519 args = [-1]
520 for k,v in kwargs.items():
521 if k == 'default':
522 args = (args[0], v)
523 else:
524 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
525 if (isinstance(args[0], int) or
526 len(args) == 1 or
527 args[0] in self):
528 index = args[0]
529 ret = self[index]
530 del self[index]
531 return ret
532 else:
533 defaultvalue = args[1]
534 return defaultvalue
535
536 - def get(self, key, defaultValue=None):
537 """
538 Returns named result matching the given key, or if there is no
539 such name, then returns the given C{defaultValue} or C{None} if no
540 C{defaultValue} is specified.
541
542 Similar to C{dict.get()}.
543
544 Example::
545 integer = Word(nums)
546 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
547
548 result = date_str.parseString("1999/12/31")
549 print(result.get("year")) # -> '1999'
550 print(result.get("hour", "not specified")) # -> 'not specified'
551 print(result.get("hour")) # -> None
552 """
553 if key in self:
554 return self[key]
555 else:
556 return defaultValue
557
558 - def insert( self, index, insStr ):
559 """
560 Inserts new element at location index in the list of parsed tokens.
561
562 Similar to C{list.insert()}.
563
564 Example::
565 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
566
567 # use a parse action to insert the parse location in the front of the parsed results
568 def insert_locn(locn, tokens):
569 tokens.insert(0, locn)
570 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
571 """
572 self.__toklist.insert(index, insStr)
573
574 for name,occurrences in self.__tokdict.items():
575 for k, (value, position) in enumerate(occurrences):
576 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
577
579 """
580 Add single element to end of ParseResults list of elements.
581
582 Example::
583 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
584
585 # use a parse action to compute the sum of the parsed integers, and add it to the end
586 def append_sum(tokens):
587 tokens.append(sum(map(int, tokens)))
588 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
589 """
590 self.__toklist.append(item)
591
593 """
594 Add sequence of elements to end of ParseResults list of elements.
595
596 Example::
597 patt = OneOrMore(Word(alphas))
598
599 # use a parse action to append the reverse of the matched strings, to make a palindrome
600 def make_palindrome(tokens):
601 tokens.extend(reversed([t[::-1] for t in tokens]))
602 return ''.join(tokens)
603 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
604 """
605 if isinstance(itemseq, ParseResults):
606 self += itemseq
607 else:
608 self.__toklist.extend(itemseq)
609
611 """
612 Clear all elements and results names.
613 """
614 del self.__toklist[:]
615 self.__tokdict.clear()
616
618 try:
619 return self[name]
620 except KeyError:
621 return ""
622
623 if name in self.__tokdict:
624 if name not in self.__accumNames:
625 return self.__tokdict[name][-1][0]
626 else:
627 return ParseResults([ v[0] for v in self.__tokdict[name] ])
628 else:
629 return ""
630
632 ret = self.copy()
633 ret += other
634 return ret
635
637 if other.__tokdict:
638 offset = len(self.__toklist)
639 addoffset = lambda a: offset if a<0 else a+offset
640 otheritems = other.__tokdict.items()
641 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
642 for (k,vlist) in otheritems for v in vlist]
643 for k,v in otherdictitems:
644 self[k] = v
645 if isinstance(v[0],ParseResults):
646 v[0].__parent = wkref(self)
647
648 self.__toklist += other.__toklist
649 self.__accumNames.update( other.__accumNames )
650 return self
651
653 if isinstance(other,int) and other == 0:
654
655 return self.copy()
656 else:
657
658 return other + self
659
661 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
662
664 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
665
667 out = []
668 for item in self.__toklist:
669 if out and sep:
670 out.append(sep)
671 if isinstance( item, ParseResults ):
672 out += item._asStringList()
673 else:
674 out.append( _ustr(item) )
675 return out
676
678 """
679 Returns the parse results as a nested list of matching tokens, all converted to strings.
680
681 Example::
682 patt = OneOrMore(Word(alphas))
683 result = patt.parseString("sldkj lsdkj sldkj")
684 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
685 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
686
687 # Use asList() to create an actual list
688 result_list = result.asList()
689 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
690 """
691 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
692
694 """
695 Returns the named parse results as a nested dictionary.
696
697 Example::
698 integer = Word(nums)
699 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
700
701 result = date_str.parseString('12/31/1999')
702 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
703
704 result_dict = result.asDict()
705 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
706
707 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
708 import json
709 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
710 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
711 """
712 if PY_3:
713 item_fn = self.items
714 else:
715 item_fn = self.iteritems
716
717 def toItem(obj):
718 if isinstance(obj, ParseResults):
719 if obj.haskeys():
720 return obj.asDict()
721 else:
722 return [toItem(v) for v in obj]
723 else:
724 return obj
725
726 return dict((k,toItem(v)) for k,v in item_fn())
727
729 """
730 Returns a new copy of a C{ParseResults} object.
731 """
732 ret = ParseResults( self.__toklist )
733 ret.__tokdict = self.__tokdict.copy()
734 ret.__parent = self.__parent
735 ret.__accumNames.update( self.__accumNames )
736 ret.__name = self.__name
737 return ret
738
739 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
740 """
741 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
742 """
743 nl = "\n"
744 out = []
745 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
746 for v in vlist)
747 nextLevelIndent = indent + " "
748
749
750 if not formatted:
751 indent = ""
752 nextLevelIndent = ""
753 nl = ""
754
755 selfTag = None
756 if doctag is not None:
757 selfTag = doctag
758 else:
759 if self.__name:
760 selfTag = self.__name
761
762 if not selfTag:
763 if namedItemsOnly:
764 return ""
765 else:
766 selfTag = "ITEM"
767
768 out += [ nl, indent, "<", selfTag, ">" ]
769
770 for i,res in enumerate(self.__toklist):
771 if isinstance(res,ParseResults):
772 if i in namedItems:
773 out += [ res.asXML(namedItems[i],
774 namedItemsOnly and doctag is None,
775 nextLevelIndent,
776 formatted)]
777 else:
778 out += [ res.asXML(None,
779 namedItemsOnly and doctag is None,
780 nextLevelIndent,
781 formatted)]
782 else:
783
784 resTag = None
785 if i in namedItems:
786 resTag = namedItems[i]
787 if not resTag:
788 if namedItemsOnly:
789 continue
790 else:
791 resTag = "ITEM"
792 xmlBodyText = _xml_escape(_ustr(res))
793 out += [ nl, nextLevelIndent, "<", resTag, ">",
794 xmlBodyText,
795 "</", resTag, ">" ]
796
797 out += [ nl, indent, "</", selfTag, ">" ]
798 return "".join(out)
799
801 for k,vlist in self.__tokdict.items():
802 for v,loc in vlist:
803 if sub is v:
804 return k
805 return None
806
808 """
809 Returns the results name for this token expression. Useful when several
810 different expressions might match at a particular location.
811
812 Example::
813 integer = Word(nums)
814 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
815 house_number_expr = Suppress('#') + Word(nums, alphanums)
816 user_data = (Group(house_number_expr)("house_number")
817 | Group(ssn_expr)("ssn")
818 | Group(integer)("age"))
819 user_info = OneOrMore(user_data)
820
821 result = user_info.parseString("22 111-22-3333 #221B")
822 for item in result:
823 print(item.getName(), ':', item[0])
824 prints::
825 age : 22
826 ssn : 111-22-3333
827 house_number : 221B
828 """
829 if self.__name:
830 return self.__name
831 elif self.__parent:
832 par = self.__parent()
833 if par:
834 return par.__lookup(self)
835 else:
836 return None
837 elif (len(self) == 1 and
838 len(self.__tokdict) == 1 and
839 self.__tokdict.values()[0][0][1] in (0,-1)):
840 return self.__tokdict.keys()[0]
841 else:
842 return None
843
844 - def dump(self, indent='', depth=0, full=True):
845 """
846 Diagnostic method for listing out the contents of a C{ParseResults}.
847 Accepts an optional C{indent} argument so that this string can be embedded
848 in a nested display of other data.
849
850 Example::
851 integer = Word(nums)
852 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
853
854 result = date_str.parseString('12/31/1999')
855 print(result.dump())
856 prints::
857 ['12', '/', '31', '/', '1999']
858 - day: 1999
859 - month: 31
860 - year: 12
861 """
862 out = []
863 NL = '\n'
864 out.append( indent+_ustr(self.asList()) )
865 if full:
866 if self.haskeys():
867 items = sorted(self.items())
868 for k,v in items:
869 if out:
870 out.append(NL)
871 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
872 if isinstance(v,ParseResults):
873 if v:
874 out.append( v.dump(indent,depth+1) )
875 else:
876 out.append(_ustr(v))
877 else:
878 out.append(_ustr(v))
879 elif any(isinstance(vv,ParseResults) for vv in self):
880 v = self
881 for i,vv in enumerate(v):
882 if isinstance(vv,ParseResults):
883 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
884 else:
885 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
886
887 return "".join(out)
888
889 - def pprint(self, *args, **kwargs):
890 """
891 Pretty-printer for parsed results as a list, using the C{pprint} module.
892 Accepts additional positional or keyword args as defined for the
893 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
894
895 Example::
896 ident = Word(alphas, alphanums)
897 num = Word(nums)
898 func = Forward()
899 term = ident | num | Group('(' + func + ')')
900 func <<= ident + Group(Optional(delimitedList(term)))
901 result = func.parseString("fna a,b,(fnb c,d,200),100")
902 result.pprint(width=40)
903 prints::
904 ['fna',
905 ['a',
906 'b',
907 ['(', 'fnb', ['c', 'd', '200'], ')'],
908 '100']]
909 """
910 pprint.pprint(self.asList(), *args, **kwargs)
911
912
914 return ( self.__toklist,
915 ( self.__tokdict.copy(),
916 self.__parent is not None and self.__parent() or None,
917 self.__accumNames,
918 self.__name ) )
919
921 self.__toklist = state[0]
922 (self.__tokdict,
923 par,
924 inAccumNames,
925 self.__name) = state[1]
926 self.__accumNames = {}
927 self.__accumNames.update(inAccumNames)
928 if par is not None:
929 self.__parent = wkref(par)
930 else:
931 self.__parent = None
932
934 return self.__toklist, self.__name, self.__asList, self.__modal
935
937 return (dir(type(self)) + list(self.keys()))
938
939 collections.MutableMapping.register(ParseResults)
940
941 -def col (loc,strg):
942 """Returns current column within a string, counting newlines as line separators.
943 The first column is number 1.
944
945 Note: the default parsing behavior is to expand tabs in the input string
946 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
947 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
948 consistent view of the parsed string, the parse location, and line and column
949 positions within the parsed string.
950 """
951 s = strg
952 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
953
955 """Returns current line number within a string, counting newlines as line separators.
956 The first line is number 1.
957
958 Note: the default parsing behavior is to expand tabs in the input string
959 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
960 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
961 consistent view of the parsed string, the parse location, and line and column
962 positions within the parsed string.
963 """
964 return strg.count("\n",0,loc) + 1
965
966 -def line( loc, strg ):
967 """Returns the line of text containing loc within a string, counting newlines as line separators.
968 """
969 lastCR = strg.rfind("\n", 0, loc)
970 nextCR = strg.find("\n", loc)
971 if nextCR >= 0:
972 return strg[lastCR+1:nextCR]
973 else:
974 return strg[lastCR+1:]
975
977 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
978
980 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
981
983 print ("Exception raised:" + _ustr(exc))
984
986 """'Do-nothing' debug action, to suppress debugging output during parsing."""
987 pass
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011 'decorator to trim function calls to match the arity of the target'
1013 if func in singleArgBuiltins:
1014 return lambda s,l,t: func(t)
1015 limit = [0]
1016 foundArity = [False]
1017
1018
1019 if system_version[:2] >= (3,5):
1020 def extract_stack(limit=0):
1021
1022 offset = -3 if system_version == (3,5,0) else -2
1023 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1024 return [(frame_summary.filename, frame_summary.lineno)]
1025 def extract_tb(tb, limit=0):
1026 frames = traceback.extract_tb(tb, limit=limit)
1027 frame_summary = frames[-1]
1028 return [(frame_summary.filename, frame_summary.lineno)]
1029 else:
1030 extract_stack = traceback.extract_stack
1031 extract_tb = traceback.extract_tb
1032
1033
1034
1035
1036 LINE_DIFF = 6
1037
1038
1039 this_line = extract_stack(limit=2)[-1]
1040 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1041
1042 def wrapper(*args):
1043 while 1:
1044 try:
1045 ret = func(*args[limit[0]:])
1046 foundArity[0] = True
1047 return ret
1048 except TypeError:
1049
1050 if foundArity[0]:
1051 raise
1052 else:
1053 try:
1054 tb = sys.exc_info()[-1]
1055 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1056 raise
1057 finally:
1058 del tb
1059
1060 if limit[0] <= maxargs:
1061 limit[0] += 1
1062 continue
1063 raise
1064
1065
1066 func_name = "<parse action>"
1067 try:
1068 func_name = getattr(func, '__name__',
1069 getattr(func, '__class__').__name__)
1070 except Exception:
1071 func_name = str(func)
1072 wrapper.__name__ = func_name
1073
1074 return wrapper
1075
1077 """Abstract base level parser element class."""
1078 DEFAULT_WHITE_CHARS = " \n\t\r"
1079 verbose_stacktrace = False
1080
1081 @staticmethod
1083 r"""
1084 Overrides the default whitespace chars
1085
1086 Example::
1087 # default whitespace chars are space, <TAB> and newline
1088 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1089
1090 # change to just treat newline as significant
1091 ParserElement.setDefaultWhitespaceChars(" \t")
1092 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1093 """
1094 ParserElement.DEFAULT_WHITE_CHARS = chars
1095
1096 @staticmethod
1098 """
1099 Set class to be used for inclusion of string literals into a parser.
1100
1101 Example::
1102 # default literal class used is Literal
1103 integer = Word(nums)
1104 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1105
1106 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1107
1108
1109 # change to Suppress
1110 ParserElement.inlineLiteralsUsing(Suppress)
1111 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1112
1113 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1114 """
1115 ParserElement._literalStringClass = cls
1116
1118 self.parseAction = list()
1119 self.failAction = None
1120
1121 self.strRepr = None
1122 self.resultsName = None
1123 self.saveAsList = savelist
1124 self.skipWhitespace = True
1125 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1126 self.copyDefaultWhiteChars = True
1127 self.mayReturnEmpty = False
1128 self.keepTabs = False
1129 self.ignoreExprs = list()
1130 self.debug = False
1131 self.streamlined = False
1132 self.mayIndexError = True
1133 self.errmsg = ""
1134 self.modalResults = True
1135 self.debugActions = ( None, None, None )
1136 self.re = None
1137 self.callPreparse = True
1138 self.callDuringTry = False
1139
1141 """
1142 Make a copy of this C{ParserElement}. Useful for defining different parse actions
1143 for the same parsing pattern, using copies of the original parse element.
1144
1145 Example::
1146 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1147 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1148 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1149
1150 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1151 prints::
1152 [5120, 100, 655360, 268435456]
1153 Equivalent form of C{expr.copy()} is just C{expr()}::
1154 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1155 """
1156 cpy = copy.copy( self )
1157 cpy.parseAction = self.parseAction[:]
1158 cpy.ignoreExprs = self.ignoreExprs[:]
1159 if self.copyDefaultWhiteChars:
1160 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1161 return cpy
1162
1164 """
1165 Define name for this expression, makes debugging and exception messages clearer.
1166
1167 Example::
1168 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1169 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1170 """
1171 self.name = name
1172 self.errmsg = "Expected " + self.name
1173 if hasattr(self,"exception"):
1174 self.exception.msg = self.errmsg
1175 return self
1176
1178 """
1179 Define name for referencing matching tokens as a nested attribute
1180 of the returned parse results.
1181 NOTE: this returns a *copy* of the original C{ParserElement} object;
1182 this is so that the client can define a basic element, such as an
1183 integer, and reference it in multiple places with different names.
1184
1185 You can also set results names using the abbreviated syntax,
1186 C{expr("name")} in place of C{expr.setResultsName("name")} -
1187 see L{I{__call__}<__call__>}.
1188
1189 Example::
1190 date_str = (integer.setResultsName("year") + '/'
1191 + integer.setResultsName("month") + '/'
1192 + integer.setResultsName("day"))
1193
1194 # equivalent form:
1195 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1196 """
1197 newself = self.copy()
1198 if name.endswith("*"):
1199 name = name[:-1]
1200 listAllMatches=True
1201 newself.resultsName = name
1202 newself.modalResults = not listAllMatches
1203 return newself
1204
1206 """Method to invoke the Python pdb debugger when this element is
1207 about to be parsed. Set C{breakFlag} to True to enable, False to
1208 disable.
1209 """
1210 if breakFlag:
1211 _parseMethod = self._parse
1212 def breaker(instring, loc, doActions=True, callPreParse=True):
1213 import pdb
1214 pdb.set_trace()
1215 return _parseMethod( instring, loc, doActions, callPreParse )
1216 breaker._originalParseMethod = _parseMethod
1217 self._parse = breaker
1218 else:
1219 if hasattr(self._parse,"_originalParseMethod"):
1220 self._parse = self._parse._originalParseMethod
1221 return self
1222
1224 """
1225 Define action to perform when successfully matching parse element definition.
1226 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1227 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1228 - s = the original string being parsed (see note below)
1229 - loc = the location of the matching substring
1230 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1231 If the functions in fns modify the tokens, they can return them as the return
1232 value from fn, and the modified list of tokens will replace the original.
1233 Otherwise, fn does not need to return any value.
1234
1235 Optional keyword arguments:
1236 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1237
1238 Note: the default parsing behavior is to expand tabs in the input string
1239 before starting the parsing process. See L{I{parseString}<parseString>} for more information
1240 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1241 consistent view of the parsed string, the parse location, and line and column
1242 positions within the parsed string.
1243
1244 Example::
1245 integer = Word(nums)
1246 date_str = integer + '/' + integer + '/' + integer
1247
1248 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1249
1250 # use parse action to convert to ints at parse time
1251 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1252 date_str = integer + '/' + integer + '/' + integer
1253
1254 # note that integer fields are now ints, not strings
1255 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1256 """
1257 self.parseAction = list(map(_trim_arity, list(fns)))
1258 self.callDuringTry = kwargs.get("callDuringTry", False)
1259 return self
1260
1262 """
1263 Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1264
1265 See examples in L{I{copy}<copy>}.
1266 """
1267 self.parseAction += list(map(_trim_arity, list(fns)))
1268 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1269 return self
1270
1272 """Add a boolean predicate function to expression's list of parse actions. See
1273 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1274 functions passed to C{addCondition} need to return boolean success/fail of the condition.
1275
1276 Optional keyword arguments:
1277 - message = define a custom message to be used in the raised exception
1278 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1279
1280 Example::
1281 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1282 year_int = integer.copy()
1283 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1284 date_str = year_int + '/' + integer + '/' + integer
1285
1286 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1287 """
1288 msg = kwargs.get("message", "failed user-defined condition")
1289 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1290 for fn in fns:
1291 def pa(s,l,t):
1292 if not bool(_trim_arity(fn)(s,l,t)):
1293 raise exc_type(s,l,msg)
1294 self.parseAction.append(pa)
1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1296 return self
1297
1299 """Define action to perform if parsing fails at this expression.
1300 Fail acton fn is a callable function that takes the arguments
1301 C{fn(s,loc,expr,err)} where:
1302 - s = string being parsed
1303 - loc = location where expression match was attempted and failed
1304 - expr = the parse expression that failed
1305 - err = the exception thrown
1306 The function returns no value. It may throw C{L{ParseFatalException}}
1307 if it is desired to stop parsing immediately."""
1308 self.failAction = fn
1309 return self
1310
1312 exprsFound = True
1313 while exprsFound:
1314 exprsFound = False
1315 for e in self.ignoreExprs:
1316 try:
1317 while 1:
1318 loc,dummy = e._parse( instring, loc )
1319 exprsFound = True
1320 except ParseException:
1321 pass
1322 return loc
1323
1325 if self.ignoreExprs:
1326 loc = self._skipIgnorables( instring, loc )
1327
1328 if self.skipWhitespace:
1329 wt = self.whiteChars
1330 instrlen = len(instring)
1331 while loc < instrlen and instring[loc] in wt:
1332 loc += 1
1333
1334 return loc
1335
1336 - def parseImpl( self, instring, loc, doActions=True ):
1338
1339 - def postParse( self, instring, loc, tokenlist ):
1341
1342
1343 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1344 debugging = ( self.debug )
1345
1346 if debugging or self.failAction:
1347
1348 if (self.debugActions[0] ):
1349 self.debugActions[0]( instring, loc, self )
1350 if callPreParse and self.callPreparse:
1351 preloc = self.preParse( instring, loc )
1352 else:
1353 preloc = loc
1354 tokensStart = preloc
1355 try:
1356 try:
1357 loc,tokens = self.parseImpl( instring, preloc, doActions )
1358 except IndexError:
1359 raise ParseException( instring, len(instring), self.errmsg, self )
1360 except ParseBaseException as err:
1361
1362 if self.debugActions[2]:
1363 self.debugActions[2]( instring, tokensStart, self, err )
1364 if self.failAction:
1365 self.failAction( instring, tokensStart, self, err )
1366 raise
1367 else:
1368 if callPreParse and self.callPreparse:
1369 preloc = self.preParse( instring, loc )
1370 else:
1371 preloc = loc
1372 tokensStart = preloc
1373 if self.mayIndexError or loc >= len(instring):
1374 try:
1375 loc,tokens = self.parseImpl( instring, preloc, doActions )
1376 except IndexError:
1377 raise ParseException( instring, len(instring), self.errmsg, self )
1378 else:
1379 loc,tokens = self.parseImpl( instring, preloc, doActions )
1380
1381 tokens = self.postParse( instring, loc, tokens )
1382
1383 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1384 if self.parseAction and (doActions or self.callDuringTry):
1385 if debugging:
1386 try:
1387 for fn in self.parseAction:
1388 tokens = fn( instring, tokensStart, retTokens )
1389 if tokens is not None:
1390 retTokens = ParseResults( tokens,
1391 self.resultsName,
1392 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1393 modal=self.modalResults )
1394 except ParseBaseException as err:
1395
1396 if (self.debugActions[2] ):
1397 self.debugActions[2]( instring, tokensStart, self, err )
1398 raise
1399 else:
1400 for fn in self.parseAction:
1401 tokens = fn( instring, tokensStart, retTokens )
1402 if tokens is not None:
1403 retTokens = ParseResults( tokens,
1404 self.resultsName,
1405 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1406 modal=self.modalResults )
1407
1408 if debugging:
1409
1410 if (self.debugActions[1] ):
1411 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1412
1413 return loc, retTokens
1414
1420
1422 try:
1423 self.tryParse(instring, loc)
1424 except (ParseException, IndexError):
1425 return False
1426 else:
1427 return True
1428
1431 cache = {}
1432 self.not_in_cache = not_in_cache = object()
1433
1434 def get(self, key):
1435 return cache.get(key, not_in_cache)
1436
1437 def set(self, key, value):
1438 cache[key] = value
1439
1440 def clear(self):
1441 cache.clear()
1442
1443 self.get = types.MethodType(get, self)
1444 self.set = types.MethodType(set, self)
1445 self.clear = types.MethodType(clear, self)
1446
1447 if _OrderedDict is not None:
1450 self.not_in_cache = not_in_cache = object()
1451
1452 cache = _OrderedDict()
1453
1454 def get(self, key):
1455 return cache.get(key, not_in_cache)
1456
1457 def set(self, key, value):
1458 cache[key] = value
1459 if len(cache) > size:
1460 cache.popitem(False)
1461
1462 def clear(self):
1463 cache.clear()
1464
1465 self.get = types.MethodType(get, self)
1466 self.set = types.MethodType(set, self)
1467 self.clear = types.MethodType(clear, self)
1468
1469 else:
1472 self.not_in_cache = not_in_cache = object()
1473
1474 cache = {}
1475 key_fifo = collections.deque([], size)
1476
1477 def get(self, key):
1478 return cache.get(key, not_in_cache)
1479
1480 def set(self, key, value):
1481 cache[key] = value
1482 if len(cache) > size:
1483 cache.pop(key_fifo.popleft(), None)
1484 key_fifo.append(key)
1485
1486 def clear(self):
1487 cache.clear()
1488 key_fifo.clear()
1489
1490 self.get = types.MethodType(get, self)
1491 self.set = types.MethodType(set, self)
1492 self.clear = types.MethodType(clear, self)
1493
1494
1495 packrat_cache = {}
1496 packrat_cache_lock = RLock()
1497 packrat_cache_stats = [0, 0]
1498
1499
1500
1501 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1502 HIT, MISS = 0, 1
1503 lookup = (self, instring, loc, callPreParse, doActions)
1504 with ParserElement.packrat_cache_lock:
1505 cache = ParserElement.packrat_cache
1506 value = cache.get(lookup)
1507 if value is cache.not_in_cache:
1508 ParserElement.packrat_cache_stats[MISS] += 1
1509 try:
1510 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1511 except ParseBaseException as pe:
1512
1513 cache.set(lookup, pe.__class__(*pe.args))
1514 raise
1515 else:
1516 cache.set(lookup, (value[0], value[1].copy()))
1517 return value
1518 else:
1519 ParserElement.packrat_cache_stats[HIT] += 1
1520 if isinstance(value, Exception):
1521 raise value
1522 return (value[0], value[1].copy())
1523
1524 _parse = _parseNoCache
1525
1526 @staticmethod
1530
1531 _packratEnabled = False
1532 @staticmethod
1534 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1535 Repeated parse attempts at the same string location (which happens
1536 often in many complex grammars) can immediately return a cached value,
1537 instead of re-executing parsing/validating code. Memoizing is done of
1538 both valid results and parsing exceptions.
1539
1540 Parameters:
1541 - cache_size_limit - (default=C{128}) - if an integer value is provided
1542 will limit the size of the packrat cache; if None is passed, then
1543 the cache size will be unbounded; if 0 is passed, the cache will
1544 be effectively disabled.
1545
1546 This speedup may break existing programs that use parse actions that
1547 have side-effects. For this reason, packrat parsing is disabled when
1548 you first import pyparsing. To activate the packrat feature, your
1549 program must call the class method C{ParserElement.enablePackrat()}. If
1550 your program uses C{psyco} to "compile as you go", you must call
1551 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1552 Python will crash. For best results, call C{enablePackrat()} immediately
1553 after importing pyparsing.
1554
1555 Example::
1556 import pyparsing
1557 pyparsing.ParserElement.enablePackrat()
1558 """
1559 if not ParserElement._packratEnabled:
1560 ParserElement._packratEnabled = True
1561 if cache_size_limit is None:
1562 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1563 else:
1564 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1565 ParserElement._parse = ParserElement._parseCache
1566
1568 """
1569 Execute the parse expression with the given string.
1570 This is the main interface to the client code, once the complete
1571 expression has been built.
1572
1573 If you want the grammar to require that the entire input string be
1574 successfully parsed, then set C{parseAll} to True (equivalent to ending
1575 the grammar with C{L{StringEnd()}}).
1576
1577 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1578 in order to report proper column numbers in parse actions.
1579 If the input string contains tabs and
1580 the grammar uses parse actions that use the C{loc} argument to index into the
1581 string being parsed, you can ensure you have a consistent view of the input
1582 string by:
1583 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1584 (see L{I{parseWithTabs}<parseWithTabs>})
1585 - define your parse action using the full C{(s,loc,toks)} signature, and
1586 reference the input string using the parse action's C{s} argument
1587 - explictly expand the tabs in your input string before calling
1588 C{parseString}
1589
1590 Example::
1591 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1592 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1593 """
1594 ParserElement.resetCache()
1595 if not self.streamlined:
1596 self.streamline()
1597
1598 for e in self.ignoreExprs:
1599 e.streamline()
1600 if not self.keepTabs:
1601 instring = instring.expandtabs()
1602 try:
1603 loc, tokens = self._parse( instring, 0 )
1604 if parseAll:
1605 loc = self.preParse( instring, loc )
1606 se = Empty() + StringEnd()
1607 se._parse( instring, loc )
1608 except ParseBaseException as exc:
1609 if ParserElement.verbose_stacktrace:
1610 raise
1611 else:
1612
1613 raise exc
1614 else:
1615 return tokens
1616
1618 """
1619 Scan the input string for expression matches. Each match will return the
1620 matching tokens, start location, and end location. May be called with optional
1621 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1622 C{overlap} is specified, then overlapping matches will be reported.
1623
1624 Note that the start and end locations are reported relative to the string
1625 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1626 strings with embedded tabs.
1627
1628 Example::
1629 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1630 print(source)
1631 for tokens,start,end in Word(alphas).scanString(source):
1632 print(' '*start + '^'*(end-start))
1633 print(' '*start + tokens[0])
1634
1635 prints::
1636
1637 sldjf123lsdjjkf345sldkjf879lkjsfd987
1638 ^^^^^
1639 sldjf
1640 ^^^^^^^
1641 lsdjjkf
1642 ^^^^^^
1643 sldkjf
1644 ^^^^^^
1645 lkjsfd
1646 """
1647 if not self.streamlined:
1648 self.streamline()
1649 for e in self.ignoreExprs:
1650 e.streamline()
1651
1652 if not self.keepTabs:
1653 instring = _ustr(instring).expandtabs()
1654 instrlen = len(instring)
1655 loc = 0
1656 preparseFn = self.preParse
1657 parseFn = self._parse
1658 ParserElement.resetCache()
1659 matches = 0
1660 try:
1661 while loc <= instrlen and matches < maxMatches:
1662 try:
1663 preloc = preparseFn( instring, loc )
1664 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1665 except ParseException:
1666 loc = preloc+1
1667 else:
1668 if nextLoc > loc:
1669 matches += 1
1670 yield tokens, preloc, nextLoc
1671 if overlap:
1672 nextloc = preparseFn( instring, loc )
1673 if nextloc > loc:
1674 loc = nextLoc
1675 else:
1676 loc += 1
1677 else:
1678 loc = nextLoc
1679 else:
1680 loc = preloc+1
1681 except ParseBaseException as exc:
1682 if ParserElement.verbose_stacktrace:
1683 raise
1684 else:
1685
1686 raise exc
1687
1730
1732 """
1733 Another extension to C{L{scanString}}, simplifying the access to the tokens found
1734 to match the given parse expression. May be called with optional
1735 C{maxMatches} argument, to clip searching after 'n' matches are found.
1736
1737 Example::
1738 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1739 cap_word = Word(alphas.upper(), alphas.lower())
1740
1741 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1742 prints::
1743 ['More', 'Iron', 'Lead', 'Gold', 'I']
1744 """
1745 try:
1746 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1747 except ParseBaseException as exc:
1748 if ParserElement.verbose_stacktrace:
1749 raise
1750 else:
1751
1752 raise exc
1753
1754 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1755 """
1756 Generator method to split a string using the given expression as a separator.
1757 May be called with optional C{maxsplit} argument, to limit the number of splits;
1758 and the optional C{includeSeparators} argument (default=C{False}), if the separating
1759 matching text should be included in the split results.
1760
1761 Example::
1762 punc = oneOf(list(".,;:/-!?"))
1763 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1764 prints::
1765 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1766 """
1767 splits = 0
1768 last = 0
1769 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1770 yield instring[last:s]
1771 if includeSeparators:
1772 yield t[0]
1773 last = e
1774 yield instring[last:]
1775
1777 """
1778 Implementation of + operator - returns C{L{And}}
1779 """
1780 if isinstance( other, basestring ):
1781 other = ParserElement._literalStringClass( other )
1782 if not isinstance( other, ParserElement ):
1783 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1784 SyntaxWarning, stacklevel=2)
1785 return None
1786 return And( [ self, other ] )
1787
1789 """
1790 Implementation of + operator when left operand is not a C{L{ParserElement}}
1791 """
1792 if isinstance( other, basestring ):
1793 other = ParserElement._literalStringClass( other )
1794 if not isinstance( other, ParserElement ):
1795 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1796 SyntaxWarning, stacklevel=2)
1797 return None
1798 return other + self
1799
1801 """
1802 Implementation of - operator, returns C{L{And}} with error stop
1803 """
1804 if isinstance( other, basestring ):
1805 other = ParserElement._literalStringClass( other )
1806 if not isinstance( other, ParserElement ):
1807 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1808 SyntaxWarning, stacklevel=2)
1809 return None
1810 return And( [ self, And._ErrorStop(), other ] )
1811
1813 """
1814 Implementation of - operator when left operand is not a C{L{ParserElement}}
1815 """
1816 if isinstance( other, basestring ):
1817 other = ParserElement._literalStringClass( other )
1818 if not isinstance( other, ParserElement ):
1819 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1820 SyntaxWarning, stacklevel=2)
1821 return None
1822 return other - self
1823
1825 """
1826 Implementation of * operator, allows use of C{expr * 3} in place of
1827 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1828 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1829 may also include C{None} as in:
1830 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1831 to C{expr*n + L{ZeroOrMore}(expr)}
1832 (read as "at least n instances of C{expr}")
1833 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1834 (read as "0 to n instances of C{expr}")
1835 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1836 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1837
1838 Note that C{expr*(None,n)} does not raise an exception if
1839 more than n exprs exist in the input stream; that is,
1840 C{expr*(None,n)} does not enforce a maximum number of expr
1841 occurrences. If this behavior is desired, then write
1842 C{expr*(None,n) + ~expr}
1843 """
1844 if isinstance(other,int):
1845 minElements, optElements = other,0
1846 elif isinstance(other,tuple):
1847 other = (other + (None, None))[:2]
1848 if other[0] is None:
1849 other = (0, other[1])
1850 if isinstance(other[0],int) and other[1] is None:
1851 if other[0] == 0:
1852 return ZeroOrMore(self)
1853 if other[0] == 1:
1854 return OneOrMore(self)
1855 else:
1856 return self*other[0] + ZeroOrMore(self)
1857 elif isinstance(other[0],int) and isinstance(other[1],int):
1858 minElements, optElements = other
1859 optElements -= minElements
1860 else:
1861 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1862 else:
1863 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1864
1865 if minElements < 0:
1866 raise ValueError("cannot multiply ParserElement by negative value")
1867 if optElements < 0:
1868 raise ValueError("second tuple value must be greater or equal to first tuple value")
1869 if minElements == optElements == 0:
1870 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1871
1872 if (optElements):
1873 def makeOptionalList(n):
1874 if n>1:
1875 return Optional(self + makeOptionalList(n-1))
1876 else:
1877 return Optional(self)
1878 if minElements:
1879 if minElements == 1:
1880 ret = self + makeOptionalList(optElements)
1881 else:
1882 ret = And([self]*minElements) + makeOptionalList(optElements)
1883 else:
1884 ret = makeOptionalList(optElements)
1885 else:
1886 if minElements == 1:
1887 ret = self
1888 else:
1889 ret = And([self]*minElements)
1890 return ret
1891
1894
1896 """
1897 Implementation of | operator - returns C{L{MatchFirst}}
1898 """
1899 if isinstance( other, basestring ):
1900 other = ParserElement._literalStringClass( other )
1901 if not isinstance( other, ParserElement ):
1902 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1903 SyntaxWarning, stacklevel=2)
1904 return None
1905 return MatchFirst( [ self, other ] )
1906
1908 """
1909 Implementation of | operator when left operand is not a C{L{ParserElement}}
1910 """
1911 if isinstance( other, basestring ):
1912 other = ParserElement._literalStringClass( other )
1913 if not isinstance( other, ParserElement ):
1914 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1915 SyntaxWarning, stacklevel=2)
1916 return None
1917 return other | self
1918
1920 """
1921 Implementation of ^ operator - returns C{L{Or}}
1922 """
1923 if isinstance( other, basestring ):
1924 other = ParserElement._literalStringClass( other )
1925 if not isinstance( other, ParserElement ):
1926 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1927 SyntaxWarning, stacklevel=2)
1928 return None
1929 return Or( [ self, other ] )
1930
1932 """
1933 Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1934 """
1935 if isinstance( other, basestring ):
1936 other = ParserElement._literalStringClass( other )
1937 if not isinstance( other, ParserElement ):
1938 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1939 SyntaxWarning, stacklevel=2)
1940 return None
1941 return other ^ self
1942
1944 """
1945 Implementation of & operator - returns C{L{Each}}
1946 """
1947 if isinstance( other, basestring ):
1948 other = ParserElement._literalStringClass( other )
1949 if not isinstance( other, ParserElement ):
1950 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1951 SyntaxWarning, stacklevel=2)
1952 return None
1953 return Each( [ self, other ] )
1954
1956 """
1957 Implementation of & operator when left operand is not a C{L{ParserElement}}
1958 """
1959 if isinstance( other, basestring ):
1960 other = ParserElement._literalStringClass( other )
1961 if not isinstance( other, ParserElement ):
1962 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1963 SyntaxWarning, stacklevel=2)
1964 return None
1965 return other & self
1966
1968 """
1969 Implementation of ~ operator - returns C{L{NotAny}}
1970 """
1971 return NotAny( self )
1972
1974 """
1975 Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}.
1976
1977 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1978 passed as C{True}.
1979
1980 If C{name} is omitted, same as calling C{L{copy}}.
1981
1982 Example::
1983 # these are equivalent
1984 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1985 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1986 """
1987 if name is not None:
1988 return self.setResultsName(name)
1989 else:
1990 return self.copy()
1991
1993 """
1994 Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1995 cluttering up returned output.
1996 """
1997 return Suppress( self )
1998
2000 """
2001 Disables the skipping of whitespace before matching the characters in the
2002 C{ParserElement}'s defined pattern. This is normally only used internally by
2003 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2004 """
2005 self.skipWhitespace = False
2006 return self
2007
2009 """
2010 Overrides the default whitespace chars
2011 """
2012 self.skipWhitespace = True
2013 self.whiteChars = chars
2014 self.copyDefaultWhiteChars = False
2015 return self
2016
2018 """
2019 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2020 Must be called before C{parseString} when the input grammar contains elements that
2021 match C{<TAB>} characters.
2022 """
2023 self.keepTabs = True
2024 return self
2025
2027 """
2028 Define expression to be ignored (e.g., comments) while doing pattern
2029 matching; may be called repeatedly, to define multiple comment or other
2030 ignorable patterns.
2031
2032 Example::
2033 patt = OneOrMore(Word(alphas))
2034 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2035
2036 patt.ignore(cStyleComment)
2037 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2038 """
2039 if isinstance(other, basestring):
2040 other = Suppress(other)
2041
2042 if isinstance( other, Suppress ):
2043 if other not in self.ignoreExprs:
2044 self.ignoreExprs.append(other)
2045 else:
2046 self.ignoreExprs.append( Suppress( other.copy() ) )
2047 return self
2048
2049 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2050 """
2051 Enable display of debugging messages while doing pattern matching.
2052 """
2053 self.debugActions = (startAction or _defaultStartDebugAction,
2054 successAction or _defaultSuccessDebugAction,
2055 exceptionAction or _defaultExceptionDebugAction)
2056 self.debug = True
2057 return self
2058
2060 """
2061 Enable display of debugging messages while doing pattern matching.
2062 Set C{flag} to True to enable, False to disable.
2063
2064 Example::
2065 wd = Word(alphas).setName("alphaword")
2066 integer = Word(nums).setName("numword")
2067 term = wd | integer
2068
2069 # turn on debugging for wd
2070 wd.setDebug()
2071
2072 OneOrMore(term).parseString("abc 123 xyz 890")
2073
2074 prints::
2075 Match alphaword at loc 0(1,1)
2076 Matched alphaword -> ['abc']
2077 Match alphaword at loc 3(1,4)
2078 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2079 Match alphaword at loc 7(1,8)
2080 Matched alphaword -> ['xyz']
2081 Match alphaword at loc 11(1,12)
2082 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2083 Match alphaword at loc 15(1,16)
2084 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2085
2086 The output shown is that produced by the default debug actions. Prior to attempting
2087 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2088 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2089 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2090 which makes debugging and exception messages easier to understand - for instance, the default
2091 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2092 """
2093 if flag:
2094 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2095 else:
2096 self.debug = False
2097 return self
2098
2101
2104
2106 self.streamlined = True
2107 self.strRepr = None
2108 return self
2109
2112
2113 - def validate( self, validateTrace=[] ):
2114 """
2115 Check defined expressions for valid structure, check for infinite recursive definitions.
2116 """
2117 self.checkRecursion( [] )
2118
2119 - def parseFile( self, file_or_filename, parseAll=False ):
2120 """
2121 Execute the parse expression on the given file or filename.
2122 If a filename is specified (instead of a file object),
2123 the entire file is opened, read, and closed before parsing.
2124 """
2125 try:
2126 file_contents = file_or_filename.read()
2127 except AttributeError:
2128 with open(file_or_filename, "r") as f:
2129 file_contents = f.read()
2130 try:
2131 return self.parseString(file_contents, parseAll)
2132 except ParseBaseException as exc:
2133 if ParserElement.verbose_stacktrace:
2134 raise
2135 else:
2136
2137 raise exc
2138
2140 if isinstance(other, ParserElement):
2141 return self is other or vars(self) == vars(other)
2142 elif isinstance(other, basestring):
2143 return self.matches(other)
2144 else:
2145 return super(ParserElement,self)==other
2146
2148 return not (self == other)
2149
2151 return hash(id(self))
2152
2154 return self == other
2155
2157 return not (self == other)
2158
2159 - def matches(self, testString, parseAll=True):
2160 """
2161 Method for quick testing of a parser against a test string. Good for simple
2162 inline microtests of sub expressions while building up larger parser.0
2163
2164 Parameters:
2165 - testString - to test against this expression for a match
2166 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2167
2168 Example::
2169 expr = Word(nums)
2170 assert expr.matches("100")
2171 """
2172 try:
2173 self.parseString(_ustr(testString), parseAll=parseAll)
2174 return True
2175 except ParseBaseException:
2176 return False
2177
2178 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2179 """
2180 Execute the parse expression on a series of test strings, showing each
2181 test, the parsed results or where the parse failed. Quick and easy way to
2182 run a parse expression against a list of sample strings.
2183
2184 Parameters:
2185 - tests - a list of separate test strings, or a multiline string of test strings
2186 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2187 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2188 string; pass None to disable comment filtering
2189 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2190 if False, only dump nested list
2191 - printResults - (default=C{True}) prints test output to stdout
2192 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2193
2194 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2195 (or failed if C{failureTests} is True), and the results contain a list of lines of each
2196 test's output
2197
2198 Example::
2199 number_expr = pyparsing_common.number.copy()
2200
2201 result = number_expr.runTests('''
2202 # unsigned integer
2203 100
2204 # negative integer
2205 -100
2206 # float with scientific notation
2207 6.02e23
2208 # integer with scientific notation
2209 1e-12
2210 ''')
2211 print("Success" if result[0] else "Failed!")
2212
2213 result = number_expr.runTests('''
2214 # stray character
2215 100Z
2216 # missing leading digit before '.'
2217 -.100
2218 # too many '.'
2219 3.14.159
2220 ''', failureTests=True)
2221 print("Success" if result[0] else "Failed!")
2222 prints::
2223 # unsigned integer
2224 100
2225 [100]
2226
2227 # negative integer
2228 -100
2229 [-100]
2230
2231 # float with scientific notation
2232 6.02e23
2233 [6.02e+23]
2234
2235 # integer with scientific notation
2236 1e-12
2237 [1e-12]
2238
2239 Success
2240
2241 # stray character
2242 100Z
2243 ^
2244 FAIL: Expected end of text (at char 3), (line:1, col:4)
2245
2246 # missing leading digit before '.'
2247 -.100
2248 ^
2249 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2250
2251 # too many '.'
2252 3.14.159
2253 ^
2254 FAIL: Expected end of text (at char 4), (line:1, col:5)
2255
2256 Success
2257 """
2258 if isinstance(tests, basestring):
2259 tests = list(map(str.strip, tests.rstrip().splitlines()))
2260 if isinstance(comment, basestring):
2261 comment = Literal(comment)
2262 allResults = []
2263 comments = []
2264 success = True
2265 for t in tests:
2266 if comment is not None and comment.matches(t, False) or comments and not t:
2267 comments.append(t)
2268 continue
2269 if not t:
2270 continue
2271 out = ['\n'.join(comments), t]
2272 comments = []
2273 try:
2274 result = self.parseString(t, parseAll=parseAll)
2275 out.append(result.dump(full=fullDump))
2276 success = success and not failureTests
2277 except ParseBaseException as pe:
2278 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2279 if '\n' in t:
2280 out.append(line(pe.loc, t))
2281 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2282 else:
2283 out.append(' '*pe.loc + '^' + fatal)
2284 out.append("FAIL: " + str(pe))
2285 success = success and failureTests
2286 result = pe
2287 except Exception as exc:
2288 out.append("FAIL-EXCEPTION: " + str(exc))
2289 success = success and failureTests
2290 result = exc
2291
2292 if printResults:
2293 if fullDump:
2294 out.append('')
2295 print('\n'.join(out))
2296
2297 allResults.append((t, result))
2298
2299 return success, allResults
2300
2301
2302 -class Token(ParserElement):
2303 """
2304 Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2305 """
2308
2309
2310 -class Empty(Token):
2311 """
2312 An empty token, will always match.
2313 """
2315 super(Empty,self).__init__()
2316 self.name = "Empty"
2317 self.mayReturnEmpty = True
2318 self.mayIndexError = False
2319
2322 """
2323 A token that will never match.
2324 """
2326 super(NoMatch,self).__init__()
2327 self.name = "NoMatch"
2328 self.mayReturnEmpty = True
2329 self.mayIndexError = False
2330 self.errmsg = "Unmatchable token"
2331
2332 - def parseImpl( self, instring, loc, doActions=True ):
2334
2337 """
2338 Token to exactly match a specified string.
2339
2340 Example::
2341 Literal('blah').parseString('blah') # -> ['blah']
2342 Literal('blah').parseString('blahfooblah') # -> ['blah']
2343 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2344
2345 For case-insensitive matching, use L{CaselessLiteral}.
2346
2347 For keyword matching (force word break before and after the matched string),
2348 use L{Keyword} or L{CaselessKeyword}.
2349 """
2351 super(Literal,self).__init__()
2352 self.match = matchString
2353 self.matchLen = len(matchString)
2354 try:
2355 self.firstMatchChar = matchString[0]
2356 except IndexError:
2357 warnings.warn("null string passed to Literal; use Empty() instead",
2358 SyntaxWarning, stacklevel=2)
2359 self.__class__ = Empty
2360 self.name = '"%s"' % _ustr(self.match)
2361 self.errmsg = "Expected " + self.name
2362 self.mayReturnEmpty = False
2363 self.mayIndexError = False
2364
2365
2366
2367
2368
2369 - def parseImpl( self, instring, loc, doActions=True ):
2370 if (instring[loc] == self.firstMatchChar and
2371 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2372 return loc+self.matchLen, self.match
2373 raise ParseException(instring, loc, self.errmsg, self)
2374 _L = Literal
2375 ParserElement._literalStringClass = Literal
2378 """
2379 Token to exactly match a specified string as a keyword, that is, it must be
2380 immediately followed by a non-keyword character. Compare with C{L{Literal}}:
2381 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2382 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2383 Accepts two optional constructor arguments in addition to the keyword string:
2384 - C{identChars} is a string of characters that would be valid identifier characters,
2385 defaulting to all alphanumerics + "_" and "$"
2386 - C{caseless} allows case-insensitive matching, default is C{False}.
2387
2388 Example::
2389 Keyword("start").parseString("start") # -> ['start']
2390 Keyword("start").parseString("starting") # -> Exception
2391
2392 For case-insensitive matching, use L{CaselessKeyword}.
2393 """
2394 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2395
2397 super(Keyword,self).__init__()
2398 self.match = matchString
2399 self.matchLen = len(matchString)
2400 try:
2401 self.firstMatchChar = matchString[0]
2402 except IndexError:
2403 warnings.warn("null string passed to Keyword; use Empty() instead",
2404 SyntaxWarning, stacklevel=2)
2405 self.name = '"%s"' % self.match
2406 self.errmsg = "Expected " + self.name
2407 self.mayReturnEmpty = False
2408 self.mayIndexError = False
2409 self.caseless = caseless
2410 if caseless:
2411 self.caselessmatch = matchString.upper()
2412 identChars = identChars.upper()
2413 self.identChars = set(identChars)
2414
2415 - def parseImpl( self, instring, loc, doActions=True ):
2416 if self.caseless:
2417 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2418 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2419 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2420 return loc+self.matchLen, self.match
2421 else:
2422 if (instring[loc] == self.firstMatchChar and
2423 (self.matchLen==1 or instring.startswith(self.match,loc)) and
2424 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2425 (loc == 0 or instring[loc-1] not in self.identChars) ):
2426 return loc+self.matchLen, self.match
2427 raise ParseException(instring, loc, self.errmsg, self)
2428
2433
2434 @staticmethod
2439
2441 """
2442 Token to match a specified string, ignoring case of letters.
2443 Note: the matched results will always be in the case of the given
2444 match string, NOT the case of the input text.
2445
2446 Example::
2447 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2448
2449 (Contrast with example for L{CaselessKeyword}.)
2450 """
2452 super(CaselessLiteral,self).__init__( matchString.upper() )
2453
2454 self.returnString = matchString
2455 self.name = "'%s'" % self.returnString
2456 self.errmsg = "Expected " + self.name
2457
2458 - def parseImpl( self, instring, loc, doActions=True ):
2459 if instring[ loc:loc+self.matchLen ].upper() == self.match:
2460 return loc+self.matchLen, self.returnString
2461 raise ParseException(instring, loc, self.errmsg, self)
2462
2464 """
2465 Caseless version of L{Keyword}.
2466
2467 Example::
2468 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2469
2470 (Contrast with example for L{CaselessLiteral}.)
2471 """
2474
2475 - def parseImpl( self, instring, loc, doActions=True ):
2476 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2477 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2478 return loc+self.matchLen, self.match
2479 raise ParseException(instring, loc, self.errmsg, self)
2480
2482 """
2483 Token for matching words composed of allowed character sets.
2484 Defined with string containing all allowed initial characters,
2485 an optional string containing allowed body characters (if omitted,
2486 defaults to the initial character set), and an optional minimum,
2487 maximum, and/or exact length. The default value for C{min} is 1 (a
2488 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2489 are 0, meaning no maximum or exact length restriction. An optional
2490 C{excludeChars} parameter can list characters that might be found in
2491 the input C{bodyChars} string; useful to define a word of all printables
2492 except for one or two characters, for instance.
2493
2494 L{srange} is useful for defining custom character set strings for defining
2495 C{Word} expressions, using range notation from regular expression character sets.
2496
2497 A common mistake is to use C{Word} to match a specific literal string, as in
2498 C{Word("Address")}. Remember that C{Word} uses the string argument to define
2499 I{sets} of matchable characters. This expression would match "Add", "AAA",
2500 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2501 To match an exact literal string, use L{Literal} or L{Keyword}.
2502
2503 pyparsing includes helper strings for building Words:
2504 - L{alphas}
2505 - L{nums}
2506 - L{alphanums}
2507 - L{hexnums}
2508 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2509 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2510 - L{printables} (any non-whitespace character)
2511
2512 Example::
2513 # a word composed of digits
2514 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2515
2516 # a word with a leading capital, and zero or more lowercase
2517 capital_word = Word(alphas.upper(), alphas.lower())
2518
2519 # hostnames are alphanumeric, with leading alpha, and '-'
2520 hostname = Word(alphas, alphanums+'-')
2521
2522 # roman numeral (not a strict parser, accepts invalid mix of characters)
2523 roman = Word("IVXLCDM")
2524
2525 # any string of non-whitespace characters, except for ','
2526 csv_value = Word(printables, excludeChars=",")
2527 """
2528 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2529 super(Word,self).__init__()
2530 if excludeChars:
2531 initChars = ''.join(c for c in initChars if c not in excludeChars)
2532 if bodyChars:
2533 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2534 self.initCharsOrig = initChars
2535 self.initChars = set(initChars)
2536 if bodyChars :
2537 self.bodyCharsOrig = bodyChars
2538 self.bodyChars = set(bodyChars)
2539 else:
2540 self.bodyCharsOrig = initChars
2541 self.bodyChars = set(initChars)
2542
2543 self.maxSpecified = max > 0
2544
2545 if min < 1:
2546 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2547
2548 self.minLen = min
2549
2550 if max > 0:
2551 self.maxLen = max
2552 else:
2553 self.maxLen = _MAX_INT
2554
2555 if exact > 0:
2556 self.maxLen = exact
2557 self.minLen = exact
2558
2559 self.name = _ustr(self)
2560 self.errmsg = "Expected " + self.name
2561 self.mayIndexError = False
2562 self.asKeyword = asKeyword
2563
2564 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2565 if self.bodyCharsOrig == self.initCharsOrig:
2566 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2567 elif len(self.initCharsOrig) == 1:
2568 self.reString = "%s[%s]*" % \
2569 (re.escape(self.initCharsOrig),
2570 _escapeRegexRangeChars(self.bodyCharsOrig),)
2571 else:
2572 self.reString = "[%s][%s]*" % \
2573 (_escapeRegexRangeChars(self.initCharsOrig),
2574 _escapeRegexRangeChars(self.bodyCharsOrig),)
2575 if self.asKeyword:
2576 self.reString = r"\b"+self.reString+r"\b"
2577 try:
2578 self.re = re.compile( self.reString )
2579 except:
2580 self.re = None
2581
2582 - def parseImpl( self, instring, loc, doActions=True ):
2583 if self.re:
2584 result = self.re.match(instring,loc)
2585 if not result:
2586 raise ParseException(instring, loc, self.errmsg, self)
2587
2588 loc = result.end()
2589 return loc, result.group()
2590
2591 if not(instring[ loc ] in self.initChars):
2592 raise ParseException(instring, loc, self.errmsg, self)
2593
2594 start = loc
2595 loc += 1
2596 instrlen = len(instring)
2597 bodychars = self.bodyChars
2598 maxloc = start + self.maxLen
2599 maxloc = min( maxloc, instrlen )
2600 while loc < maxloc and instring[loc] in bodychars:
2601 loc += 1
2602
2603 throwException = False
2604 if loc - start < self.minLen:
2605 throwException = True
2606 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2607 throwException = True
2608 if self.asKeyword:
2609 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2610 throwException = True
2611
2612 if throwException:
2613 raise ParseException(instring, loc, self.errmsg, self)
2614
2615 return loc, instring[start:loc]
2616
2618 try:
2619 return super(Word,self).__str__()
2620 except:
2621 pass
2622
2623
2624 if self.strRepr is None:
2625
2626 def charsAsStr(s):
2627 if len(s)>4:
2628 return s[:4]+"..."
2629 else:
2630 return s
2631
2632 if ( self.initCharsOrig != self.bodyCharsOrig ):
2633 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2634 else:
2635 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2636
2637 return self.strRepr
2638
2639
2640 -class Regex(Token):
2641 """
2642 Token for matching strings that match a given regular expression.
2643 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2644 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2645 named parse results.
2646
2647 Example::
2648 realnum = Regex(r"[+-]?\d+\.\d*")
2649 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)')
2650 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2651 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2652 """
2653 compiledREtype = type(re.compile("[A-Z]"))
2654 - def __init__( self, pattern, flags=0):
2655 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2656 super(Regex,self).__init__()
2657
2658 if isinstance(pattern, basestring):
2659 if not pattern:
2660 warnings.warn("null string passed to Regex; use Empty() instead",
2661 SyntaxWarning, stacklevel=2)
2662
2663 self.pattern = pattern
2664 self.flags = flags
2665
2666 try:
2667 self.re = re.compile(self.pattern, self.flags)
2668 self.reString = self.pattern
2669 except sre_constants.error:
2670 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2671 SyntaxWarning, stacklevel=2)
2672 raise
2673
2674 elif isinstance(pattern, Regex.compiledREtype):
2675 self.re = pattern
2676 self.pattern = \
2677 self.reString = str(pattern)
2678 self.flags = flags
2679
2680 else:
2681 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2682
2683 self.name = _ustr(self)
2684 self.errmsg = "Expected " + self.name
2685 self.mayIndexError = False
2686 self.mayReturnEmpty = True
2687
2688 - def parseImpl( self, instring, loc, doActions=True ):
2689 result = self.re.match(instring,loc)
2690 if not result:
2691 raise ParseException(instring, loc, self.errmsg, self)
2692
2693 loc = result.end()
2694 d = result.groupdict()
2695 ret = ParseResults(result.group())
2696 if d:
2697 for k in d:
2698 ret[k] = d[k]
2699 return loc,ret
2700
2702 try:
2703 return super(Regex,self).__str__()
2704 except:
2705 pass
2706
2707 if self.strRepr is None:
2708 self.strRepr = "Re:(%s)" % repr(self.pattern)
2709
2710 return self.strRepr
2711
2714 r"""
2715 Token for matching strings that are delimited by quoting characters.
2716
2717 Defined with the following parameters:
2718 - quoteChar - string of one or more characters defining the quote delimiting string
2719 - escChar - character to escape quotes, typically backslash (default=C{None})
2720 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2721 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2722 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2723 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2724 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2725
2726 Example::
2727 qs = QuotedString('"')
2728 print(qs.searchString('lsjdf "This is the quote" sldjf'))
2729 complex_qs = QuotedString('{{', endQuoteChar='}}')
2730 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2731 sql_qs = QuotedString('"', escQuote='""')
2732 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2733 prints::
2734 [['This is the quote']]
2735 [['This is the "quote"']]
2736 [['This is the quote with "embedded" quotes']]
2737 """
2738 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2739 super(QuotedString,self).__init__()
2740
2741
2742 quoteChar = quoteChar.strip()
2743 if not quoteChar:
2744 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2745 raise SyntaxError()
2746
2747 if endQuoteChar is None:
2748 endQuoteChar = quoteChar
2749 else:
2750 endQuoteChar = endQuoteChar.strip()
2751 if not endQuoteChar:
2752 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2753 raise SyntaxError()
2754
2755 self.quoteChar = quoteChar
2756 self.quoteCharLen = len(quoteChar)
2757 self.firstQuoteChar = quoteChar[0]
2758 self.endQuoteChar = endQuoteChar
2759 self.endQuoteCharLen = len(endQuoteChar)
2760 self.escChar = escChar
2761 self.escQuote = escQuote
2762 self.unquoteResults = unquoteResults
2763 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2764
2765 if multiline:
2766 self.flags = re.MULTILINE | re.DOTALL
2767 self.pattern = r'%s(?:[^%s%s]' % \
2768 ( re.escape(self.quoteChar),
2769 _escapeRegexRangeChars(self.endQuoteChar[0]),
2770 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2771 else:
2772 self.flags = 0
2773 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2774 ( re.escape(self.quoteChar),
2775 _escapeRegexRangeChars(self.endQuoteChar[0]),
2776 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2777 if len(self.endQuoteChar) > 1:
2778 self.pattern += (
2779 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2780 _escapeRegexRangeChars(self.endQuoteChar[i]))
2781 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2782 )
2783 if escQuote:
2784 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2785 if escChar:
2786 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2787 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2788 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2789
2790 try:
2791 self.re = re.compile(self.pattern, self.flags)
2792 self.reString = self.pattern
2793 except sre_constants.error:
2794 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2795 SyntaxWarning, stacklevel=2)
2796 raise
2797
2798 self.name = _ustr(self)
2799 self.errmsg = "Expected " + self.name
2800 self.mayIndexError = False
2801 self.mayReturnEmpty = True
2802
2803 - def parseImpl( self, instring, loc, doActions=True ):
2804 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2805 if not result:
2806 raise ParseException(instring, loc, self.errmsg, self)
2807
2808 loc = result.end()
2809 ret = result.group()
2810
2811 if self.unquoteResults:
2812
2813
2814 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2815
2816 if isinstance(ret,basestring):
2817
2818 if '\\' in ret and self.convertWhitespaceEscapes:
2819 ws_map = {
2820 r'\t' : '\t',
2821 r'\n' : '\n',
2822 r'\f' : '\f',
2823 r'\r' : '\r',
2824 }
2825 for wslit,wschar in ws_map.items():
2826 ret = ret.replace(wslit, wschar)
2827
2828
2829 if self.escChar:
2830 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2831
2832
2833 if self.escQuote:
2834 ret = ret.replace(self.escQuote, self.endQuoteChar)
2835
2836 return loc, ret
2837
2839 try:
2840 return super(QuotedString,self).__str__()
2841 except:
2842 pass
2843
2844 if self.strRepr is None:
2845 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2846
2847 return self.strRepr
2848
2851 """
2852 Token for matching words composed of characters I{not} in a given set (will
2853 include whitespace in matched characters if not listed in the provided exclusion set - see example).
2854 Defined with string containing all disallowed characters, and an optional
2855 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2856 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2857 are 0, meaning no maximum or exact length restriction.
2858
2859 Example::
2860 # define a comma-separated-value as anything that is not a ','
2861 csv_value = CharsNotIn(',')
2862 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2863 prints::
2864 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2865 """
2866 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2867 super(CharsNotIn,self).__init__()
2868 self.skipWhitespace = False
2869 self.notChars = notChars
2870
2871 if min < 1:
2872 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2873
2874 self.minLen = min
2875
2876 if max > 0:
2877 self.maxLen = max
2878 else:
2879 self.maxLen = _MAX_INT
2880
2881 if exact > 0:
2882 self.maxLen = exact
2883 self.minLen = exact
2884
2885 self.name = _ustr(self)
2886 self.errmsg = "Expected " + self.name
2887 self.mayReturnEmpty = ( self.minLen == 0 )
2888 self.mayIndexError = False
2889
2890 - def parseImpl( self, instring, loc, doActions=True ):
2891 if instring[loc] in self.notChars:
2892 raise ParseException(instring, loc, self.errmsg, self)
2893
2894 start = loc
2895 loc += 1
2896 notchars = self.notChars
2897 maxlen = min( start+self.maxLen, len(instring) )
2898 while loc < maxlen and \
2899 (instring[loc] not in notchars):
2900 loc += 1
2901
2902 if loc - start < self.minLen:
2903 raise ParseException(instring, loc, self.errmsg, self)
2904
2905 return loc, instring[start:loc]
2906
2908 try:
2909 return super(CharsNotIn, self).__str__()
2910 except:
2911 pass
2912
2913 if self.strRepr is None:
2914 if len(self.notChars) > 4:
2915 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2916 else:
2917 self.strRepr = "!W:(%s)" % self.notChars
2918
2919 return self.strRepr
2920
2922 """
2923 Special matching class for matching whitespace. Normally, whitespace is ignored
2924 by pyparsing grammars. This class is included when some whitespace structures
2925 are significant. Define with a string containing the whitespace characters to be
2926 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2927 as defined for the C{L{Word}} class.
2928 """
2929 whiteStrs = {
2930 " " : "<SPC>",
2931 "\t": "<TAB>",
2932 "\n": "<LF>",
2933 "\r": "<CR>",
2934 "\f": "<FF>",
2935 }
2936 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2937 super(White,self).__init__()
2938 self.matchWhite = ws
2939 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2940
2941 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2942 self.mayReturnEmpty = True
2943 self.errmsg = "Expected " + self.name
2944
2945 self.minLen = min
2946
2947 if max > 0:
2948 self.maxLen = max
2949 else:
2950 self.maxLen = _MAX_INT
2951
2952 if exact > 0:
2953 self.maxLen = exact
2954 self.minLen = exact
2955
2956 - def parseImpl( self, instring, loc, doActions=True ):
2957 if not(instring[ loc ] in self.matchWhite):
2958 raise ParseException(instring, loc, self.errmsg, self)
2959 start = loc
2960 loc += 1
2961 maxloc = start + self.maxLen
2962 maxloc = min( maxloc, len(instring) )
2963 while loc < maxloc and instring[loc] in self.matchWhite:
2964 loc += 1
2965
2966 if loc - start < self.minLen:
2967 raise ParseException(instring, loc, self.errmsg, self)
2968
2969 return loc, instring[start:loc]
2970
2974 super(_PositionToken,self).__init__()
2975 self.name=self.__class__.__name__
2976 self.mayReturnEmpty = True
2977 self.mayIndexError = False
2978
2980 """
2981 Token to advance to a specific column of input text; useful for tabular report scraping.
2982 """
2986
2988 if col(loc,instring) != self.col:
2989 instrlen = len(instring)
2990 if self.ignoreExprs:
2991 loc = self._skipIgnorables( instring, loc )
2992 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2993 loc += 1
2994 return loc
2995
2996 - def parseImpl( self, instring, loc, doActions=True ):
2997 thiscol = col( loc, instring )
2998 if thiscol > self.col:
2999 raise ParseException( instring, loc, "Text not in expected column", self )
3000 newloc = loc + self.col - thiscol
3001 ret = instring[ loc: newloc ]
3002 return newloc, ret
3003
3005 """
3006 Matches if current position is at the beginning of a line within the parse string
3007 """
3012
3014 preloc = super(LineStart,self).preParse(instring,loc)
3015 if instring[preloc] == "\n":
3016 loc += 1
3017 return loc
3018
3019 - def parseImpl( self, instring, loc, doActions=True ):
3020 if not( loc==0 or
3021 (loc == self.preParse( instring, 0 )) or
3022 (instring[loc-1] == "\n") ):
3023 raise ParseException(instring, loc, self.errmsg, self)
3024 return loc, []
3025
3027 """
3028 Matches if current position is at the end of a line within the parse string
3029 """
3034
3035 - def parseImpl( self, instring, loc, doActions=True ):
3036 if loc<len(instring):
3037 if instring[loc] == "\n":
3038 return loc+1, "\n"
3039 else:
3040 raise ParseException(instring, loc, self.errmsg, self)
3041 elif loc == len(instring):
3042 return loc+1, []
3043 else:
3044 raise ParseException(instring, loc, self.errmsg, self)
3045
3047 """
3048 Matches if current position is at the beginning of the parse string
3049 """
3053
3054 - def parseImpl( self, instring, loc, doActions=True ):
3055 if loc != 0:
3056
3057 if loc != self.preParse( instring, 0 ):
3058 raise ParseException(instring, loc, self.errmsg, self)
3059 return loc, []
3060
3062 """
3063 Matches if current position is at the end of the parse string
3064 """
3068
3069 - def parseImpl( self, instring, loc, doActions=True ):
3070 if loc < len(instring):
3071 raise ParseException(instring, loc, self.errmsg, self)
3072 elif loc == len(instring):
3073 return loc+1, []
3074 elif loc > len(instring):
3075 return loc, []
3076 else:
3077 raise ParseException(instring, loc, self.errmsg, self)
3078
3080 """
3081 Matches if the current position is at the beginning of a Word, and
3082 is not preceded by any character in a given set of C{wordChars}
3083 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3084 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3085 the string being parsed, or at the beginning of a line.
3086 """
3088 super(WordStart,self).__init__()
3089 self.wordChars = set(wordChars)
3090 self.errmsg = "Not at the start of a word"
3091
3092 - def parseImpl(self, instring, loc, doActions=True ):
3093 if loc != 0:
3094 if (instring[loc-1] in self.wordChars or
3095 instring[loc] not in self.wordChars):
3096 raise ParseException(instring, loc, self.errmsg, self)
3097 return loc, []
3098
3100 """
3101 Matches if the current position is at the end of a Word, and
3102 is not followed by any character in a given set of C{wordChars}
3103 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3104 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3105 the string being parsed, or at the end of a line.
3106 """
3108 super(WordEnd,self).__init__()
3109 self.wordChars = set(wordChars)
3110 self.skipWhitespace = False
3111 self.errmsg = "Not at the end of a word"
3112
3113 - def parseImpl(self, instring, loc, doActions=True ):
3114 instrlen = len(instring)
3115 if instrlen>0 and loc<instrlen:
3116 if (instring[loc] in self.wordChars or
3117 instring[loc-1] not in self.wordChars):
3118 raise ParseException(instring, loc, self.errmsg, self)
3119 return loc, []
3120
3123 """
3124 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3125 """
3126 - def __init__( self, exprs, savelist = False ):
3127 super(ParseExpression,self).__init__(savelist)
3128 if isinstance( exprs, _generatorType ):
3129 exprs = list(exprs)
3130
3131 if isinstance( exprs, basestring ):
3132 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3133 elif isinstance( exprs, collections.Iterable ):
3134 exprs = list(exprs)
3135
3136 if all(isinstance(expr, basestring) for expr in exprs):
3137 exprs = map(ParserElement._literalStringClass, exprs)
3138 self.exprs = list(exprs)
3139 else:
3140 try:
3141 self.exprs = list( exprs )
3142 except TypeError:
3143 self.exprs = [ exprs ]
3144 self.callPreparse = False
3145
3147 return self.exprs[i]
3148
3150 self.exprs.append( other )
3151 self.strRepr = None
3152 return self
3153
3155 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3156 all contained expressions."""
3157 self.skipWhitespace = False
3158 self.exprs = [ e.copy() for e in self.exprs ]
3159 for e in self.exprs:
3160 e.leaveWhitespace()
3161 return self
3162
3164 if isinstance( other, Suppress ):
3165 if other not in self.ignoreExprs:
3166 super( ParseExpression, self).ignore( other )
3167 for e in self.exprs:
3168 e.ignore( self.ignoreExprs[-1] )
3169 else:
3170 super( ParseExpression, self).ignore( other )
3171 for e in self.exprs:
3172 e.ignore( self.ignoreExprs[-1] )
3173 return self
3174
3176 try:
3177 return super(ParseExpression,self).__str__()
3178 except:
3179 pass
3180
3181 if self.strRepr is None:
3182 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3183 return self.strRepr
3184
3186 super(ParseExpression,self).streamline()
3187
3188 for e in self.exprs:
3189 e.streamline()
3190
3191
3192
3193
3194 if ( len(self.exprs) == 2 ):
3195 other = self.exprs[0]
3196 if ( isinstance( other, self.__class__ ) and
3197 not(other.parseAction) and
3198 other.resultsName is None and
3199 not other.debug ):
3200 self.exprs = other.exprs[:] + [ self.exprs[1] ]
3201 self.strRepr = None
3202 self.mayReturnEmpty |= other.mayReturnEmpty
3203 self.mayIndexError |= other.mayIndexError
3204
3205 other = self.exprs[-1]
3206 if ( isinstance( other, self.__class__ ) and
3207 not(other.parseAction) and
3208 other.resultsName is None and
3209 not other.debug ):
3210 self.exprs = self.exprs[:-1] + other.exprs[:]
3211 self.strRepr = None
3212 self.mayReturnEmpty |= other.mayReturnEmpty
3213 self.mayIndexError |= other.mayIndexError
3214
3215 self.errmsg = "Expected " + _ustr(self)
3216
3217 return self
3218
3222
3223 - def validate( self, validateTrace=[] ):
3224 tmp = validateTrace[:]+[self]
3225 for e in self.exprs:
3226 e.validate(tmp)
3227 self.checkRecursion( [] )
3228
3233
3234 -class And(ParseExpression):
3235 """
3236 Requires all given C{ParseExpression}s to be found in the given order.
3237 Expressions may be separated by whitespace.
3238 May be constructed using the C{'+'} operator.
3239 May also be constructed using the C{'-'} operator, which will suppress backtracking.
3240
3241 Example::
3242 integer = Word(nums)
3243 name_expr = OneOrMore(Word(alphas))
3244
3245 expr = And([integer("id"),name_expr("name"),integer("age")])
3246 # more easily written as:
3247 expr = integer("id") + name_expr("name") + integer("age")
3248 """
3249
3255
3256 - def __init__( self, exprs, savelist = True ):
3257 super(And,self).__init__(exprs, savelist)
3258 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3259 self.setWhitespaceChars( self.exprs[0].whiteChars )
3260 self.skipWhitespace = self.exprs[0].skipWhitespace
3261 self.callPreparse = True
3262
3263 - def parseImpl( self, instring, loc, doActions=True ):
3264
3265
3266 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3267 errorStop = False
3268 for e in self.exprs[1:]:
3269 if isinstance(e, And._ErrorStop):
3270 errorStop = True
3271 continue
3272 if errorStop:
3273 try:
3274 loc, exprtokens = e._parse( instring, loc, doActions )
3275 except ParseSyntaxException:
3276 raise
3277 except ParseBaseException as pe:
3278 pe.__traceback__ = None
3279 raise ParseSyntaxException._from_exception(pe)
3280 except IndexError:
3281 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3282 else:
3283 loc, exprtokens = e._parse( instring, loc, doActions )
3284 if exprtokens or exprtokens.haskeys():
3285 resultlist += exprtokens
3286 return loc, resultlist
3287
3289 if isinstance( other, basestring ):
3290 other = ParserElement._literalStringClass( other )
3291 return self.append( other )
3292
3294 subRecCheckList = parseElementList[:] + [ self ]
3295 for e in self.exprs:
3296 e.checkRecursion( subRecCheckList )
3297 if not e.mayReturnEmpty:
3298 break
3299
3301 if hasattr(self,"name"):
3302 return self.name
3303
3304 if self.strRepr is None:
3305 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3306
3307 return self.strRepr
3308
3309
3310 -class Or(ParseExpression):
3311 """
3312 Requires that at least one C{ParseExpression} is found.
3313 If two expressions match, the expression that matches the longest string will be used.
3314 May be constructed using the C{'^'} operator.
3315
3316 Example::
3317 # construct Or using '^' operator
3318
3319 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3320 print(number.searchString("123 3.1416 789"))
3321 prints::
3322 [['123'], ['3.1416'], ['789']]
3323 """
3324 - def __init__( self, exprs, savelist = False ):
3325 super(Or,self).__init__(exprs, savelist)
3326 if self.exprs:
3327 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3328 else:
3329 self.mayReturnEmpty = True
3330
3331 - def parseImpl( self, instring, loc, doActions=True ):
3332 maxExcLoc = -1
3333 maxException = None
3334 matches = []
3335 for e in self.exprs:
3336 try:
3337 loc2 = e.tryParse( instring, loc )
3338 except ParseException as err:
3339 err.__traceback__ = None
3340 if err.loc > maxExcLoc:
3341 maxException = err
3342 maxExcLoc = err.loc
3343 except IndexError:
3344 if len(instring) > maxExcLoc:
3345 maxException = ParseException(instring,len(instring),e.errmsg,self)
3346 maxExcLoc = len(instring)
3347 else:
3348
3349 matches.append((loc2, e))
3350
3351 if matches:
3352 matches.sort(key=lambda x: -x[0])
3353 for _,e in matches:
3354 try:
3355 return e._parse( instring, loc, doActions )
3356 except ParseException as err:
3357 err.__traceback__ = None
3358 if err.loc > maxExcLoc:
3359 maxException = err
3360 maxExcLoc = err.loc
3361
3362 if maxException is not None:
3363 maxException.msg = self.errmsg
3364 raise maxException
3365 else:
3366 raise ParseException(instring, loc, "no defined alternatives to match", self)
3367
3368
3370 if isinstance( other, basestring ):
3371 other = ParserElement._literalStringClass( other )
3372 return self.append( other )
3373
3375 if hasattr(self,"name"):
3376 return self.name
3377
3378 if self.strRepr is None:
3379 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3380
3381 return self.strRepr
3382
3384 subRecCheckList = parseElementList[:] + [ self ]
3385 for e in self.exprs:
3386 e.checkRecursion( subRecCheckList )
3387
3390 """
3391 Requires that at least one C{ParseExpression} is found.
3392 If two expressions match, the first one listed is the one that will match.
3393 May be constructed using the C{'|'} operator.
3394
3395 Example::
3396 # construct MatchFirst using '|' operator
3397
3398 # watch the order of expressions to match
3399 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3400 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3401
3402 # put more selective expression first
3403 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3404 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3405 """
3406 - def __init__( self, exprs, savelist = False ):
3407 super(MatchFirst,self).__init__(exprs, savelist)
3408 if self.exprs:
3409 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3410 else:
3411 self.mayReturnEmpty = True
3412
3413 - def parseImpl( self, instring, loc, doActions=True ):
3414 maxExcLoc = -1
3415 maxException = None
3416 for e in self.exprs:
3417 try:
3418 ret = e._parse( instring, loc, doActions )
3419 return ret
3420 except ParseException as err:
3421 if err.loc > maxExcLoc:
3422 maxException = err
3423 maxExcLoc = err.loc
3424 except IndexError:
3425 if len(instring) > maxExcLoc:
3426 maxException = ParseException(instring,len(instring),e.errmsg,self)
3427 maxExcLoc = len(instring)
3428
3429
3430 else:
3431 if maxException is not None:
3432 maxException.msg = self.errmsg
3433 raise maxException
3434 else:
3435 raise ParseException(instring, loc, "no defined alternatives to match", self)
3436
3438 if isinstance( other, basestring ):
3439 other = ParserElement._literalStringClass( other )
3440 return self.append( other )
3441
3443 if hasattr(self,"name"):
3444 return self.name
3445
3446 if self.strRepr is None:
3447 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3448
3449 return self.strRepr
3450
3452 subRecCheckList = parseElementList[:] + [ self ]
3453 for e in self.exprs:
3454 e.checkRecursion( subRecCheckList )
3455
3456
3457 -class Each(ParseExpression):
3458 """
3459 Requires all given C{ParseExpression}s to be found, but in any order.
3460 Expressions may be separated by whitespace.
3461 May be constructed using the C{'&'} operator.
3462
3463 Example::
3464 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3465 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3466 integer = Word(nums)
3467 shape_attr = "shape:" + shape_type("shape")
3468 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3469 color_attr = "color:" + color("color")
3470 size_attr = "size:" + integer("size")
3471
3472 # use Each (using operator '&') to accept attributes in any order
3473 # (shape and posn are required, color and size are optional)
3474 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3475
3476 shape_spec.runTests('''
3477 shape: SQUARE color: BLACK posn: 100, 120
3478 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3479 color:GREEN size:20 shape:TRIANGLE posn:20,40
3480 '''
3481 )
3482 prints::
3483 shape: SQUARE color: BLACK posn: 100, 120
3484 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3485 - color: BLACK
3486 - posn: ['100', ',', '120']
3487 - x: 100
3488 - y: 120
3489 - shape: SQUARE
3490
3491
3492 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3493 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3494 - color: BLUE
3495 - posn: ['50', ',', '80']
3496 - x: 50
3497 - y: 80
3498 - shape: CIRCLE
3499 - size: 50
3500
3501
3502 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3503 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3504 - color: GREEN
3505 - posn: ['20', ',', '40']
3506 - x: 20
3507 - y: 40
3508 - shape: TRIANGLE
3509 - size: 20
3510 """
3511 - def __init__( self, exprs, savelist = True ):
3512 super(Each,self).__init__(exprs, savelist)
3513 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3514 self.skipWhitespace = True
3515 self.initExprGroups = True
3516
3517 - def parseImpl( self, instring, loc, doActions=True ):
3518 if self.initExprGroups:
3519 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3520 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3521 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3522 self.optionals = opt1 + opt2
3523 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3524 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3525 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3526 self.required += self.multirequired
3527 self.initExprGroups = False
3528 tmpLoc = loc
3529 tmpReqd = self.required[:]
3530 tmpOpt = self.optionals[:]
3531 matchOrder = []
3532
3533 keepMatching = True
3534 while keepMatching:
3535 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3536 failed = []
3537 for e in tmpExprs:
3538 try:
3539 tmpLoc = e.tryParse( instring, tmpLoc )
3540 except ParseException:
3541 failed.append(e)
3542 else:
3543 matchOrder.append(self.opt1map.get(id(e),e))
3544 if e in tmpReqd:
3545 tmpReqd.remove(e)
3546 elif e in tmpOpt:
3547 tmpOpt.remove(e)
3548 if len(failed) == len(tmpExprs):
3549 keepMatching = False
3550
3551 if tmpReqd:
3552 missing = ", ".join(_ustr(e) for e in tmpReqd)
3553 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3554
3555
3556 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3557
3558 resultlist = []
3559 for e in matchOrder:
3560 loc,results = e._parse(instring,loc,doActions)
3561 resultlist.append(results)
3562
3563 finalResults = sum(resultlist, ParseResults([]))
3564 return loc, finalResults
3565
3567 if hasattr(self,"name"):
3568 return self.name
3569
3570 if self.strRepr is None:
3571 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3572
3573 return self.strRepr
3574
3576 subRecCheckList = parseElementList[:] + [ self ]
3577 for e in self.exprs:
3578 e.checkRecursion( subRecCheckList )
3579
3582 """
3583 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3584 """
3585 - def __init__( self, expr, savelist=False ):
3586 super(ParseElementEnhance,self).__init__(savelist)
3587 if isinstance( expr, basestring ):
3588 if issubclass(ParserElement._literalStringClass, Token):
3589 expr = ParserElement._literalStringClass(expr)
3590 else:
3591 expr = ParserElement._literalStringClass(Literal(expr))
3592 self.expr = expr
3593 self.strRepr = None
3594 if expr is not None:
3595 self.mayIndexError = expr.mayIndexError
3596 self.mayReturnEmpty = expr.mayReturnEmpty
3597 self.setWhitespaceChars( expr.whiteChars )
3598 self.skipWhitespace = expr.skipWhitespace
3599 self.saveAsList = expr.saveAsList
3600 self.callPreparse = expr.callPreparse
3601 self.ignoreExprs.extend(expr.ignoreExprs)
3602
3603 - def parseImpl( self, instring, loc, doActions=True ):
3604 if self.expr is not None:
3605 return self.expr._parse( instring, loc, doActions, callPreParse=False )
3606 else:
3607 raise ParseException("",loc,self.errmsg,self)
3608
3610 self.skipWhitespace = False
3611 self.expr = self.expr.copy()
3612 if self.expr is not None:
3613 self.expr.leaveWhitespace()
3614 return self
3615
3617 if isinstance( other, Suppress ):
3618 if other not in self.ignoreExprs:
3619 super( ParseElementEnhance, self).ignore( other )
3620 if self.expr is not None:
3621 self.expr.ignore( self.ignoreExprs[-1] )
3622 else:
3623 super( ParseElementEnhance, self).ignore( other )
3624 if self.expr is not None:
3625 self.expr.ignore( self.ignoreExprs[-1] )
3626 return self
3627
3633
3635 if self in parseElementList:
3636 raise RecursiveGrammarException( parseElementList+[self] )
3637 subRecCheckList = parseElementList[:] + [ self ]
3638 if self.expr is not None:
3639 self.expr.checkRecursion( subRecCheckList )
3640
3641 - def validate( self, validateTrace=[] ):
3642 tmp = validateTrace[:]+[self]
3643 if self.expr is not None:
3644 self.expr.validate(tmp)
3645 self.checkRecursion( [] )
3646
3648 try:
3649 return super(ParseElementEnhance,self).__str__()
3650 except:
3651 pass
3652
3653 if self.strRepr is None and self.expr is not None:
3654 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3655 return self.strRepr
3656
3659 """
3660 Lookahead matching of the given parse expression. C{FollowedBy}
3661 does I{not} advance the parsing position within the input string, it only
3662 verifies that the specified parse expression matches at the current
3663 position. C{FollowedBy} always returns a null token list.
3664
3665 Example::
3666 # use FollowedBy to match a label only if it is followed by a ':'
3667 data_word = Word(alphas)
3668 label = data_word + FollowedBy(':')
3669 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3670
3671 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3672 prints::
3673 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3674 """
3678
3679 - def parseImpl( self, instring, loc, doActions=True ):
3680 self.expr.tryParse( instring, loc )
3681 return loc, []
3682
3683
3684 -class NotAny(ParseElementEnhance):
3685 """
3686 Lookahead to disallow matching with the given parse expression. C{NotAny}
3687 does I{not} advance the parsing position within the input string, it only
3688 verifies that the specified parse expression does I{not} match at the current
3689 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3690 always returns a null token list. May be constructed using the '~' operator.
3691
3692 Example::
3693
3694 """
3696 super(NotAny,self).__init__(expr)
3697
3698 self.skipWhitespace = False
3699 self.mayReturnEmpty = True
3700 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3701
3702 - def parseImpl( self, instring, loc, doActions=True ):
3706
3708 if hasattr(self,"name"):
3709 return self.name
3710
3711 if self.strRepr is None:
3712 self.strRepr = "~{" + _ustr(self.expr) + "}"
3713
3714 return self.strRepr
3715
3717 - def __init__( self, expr, stopOn=None):
3718 super(_MultipleMatch, self).__init__(expr)
3719 ender = stopOn
3720 if isinstance(ender, basestring):
3721 ender = ParserElement._literalStringClass(ender)
3722 self.not_ender = ~ender if ender is not None else None
3723
3724 - def parseImpl( self, instring, loc, doActions=True ):
3725 self_expr_parse = self.expr._parse
3726 self_skip_ignorables = self._skipIgnorables
3727 check_ender = self.not_ender is not None
3728 if check_ender:
3729 try_not_ender = self.not_ender.tryParse
3730
3731
3732
3733 if check_ender:
3734 try_not_ender(instring, loc)
3735 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3736 try:
3737 hasIgnoreExprs = (not not self.ignoreExprs)
3738 while 1:
3739 if check_ender:
3740 try_not_ender(instring, loc)
3741 if hasIgnoreExprs:
3742 preloc = self_skip_ignorables( instring, loc )
3743 else:
3744 preloc = loc
3745 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3746 if tmptokens or tmptokens.haskeys():
3747 tokens += tmptokens
3748 except (ParseException,IndexError):
3749 pass
3750
3751 return loc, tokens
3752
3754 """
3755 Repetition of one or more of the given expression.
3756
3757 Parameters:
3758 - expr - expression that must match one or more times
3759 - stopOn - (default=C{None}) - expression for a terminating sentinel
3760 (only required if the sentinel would ordinarily match the repetition
3761 expression)
3762
3763 Example::
3764 data_word = Word(alphas)
3765 label = data_word + FollowedBy(':')
3766 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3767
3768 text = "shape: SQUARE posn: upper left color: BLACK"
3769 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3770
3771 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3772 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3773 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3774
3775 # could also be written as
3776 (attr_expr * (1,)).parseString(text).pprint()
3777 """
3778
3780 if hasattr(self,"name"):
3781 return self.name
3782
3783 if self.strRepr is None:
3784 self.strRepr = "{" + _ustr(self.expr) + "}..."
3785
3786 return self.strRepr
3787
3792
3794 """
3795 Optional repetition of zero or more of the given expression.
3796
3797 Parameters:
3798 - expr - expression that must match zero or more times
3799 - stopOn - (default=C{None}) - expression for a terminating sentinel
3800 (only required if the sentinel would ordinarily match the repetition
3801 expression)
3802
3803 Example: similar to L{OneOrMore}
3804 """
3805 - def __init__( self, expr, stopOn=None):
3808
3809 - def parseImpl( self, instring, loc, doActions=True ):
3814
3816 if hasattr(self,"name"):
3817 return self.name
3818
3819 if self.strRepr is None:
3820 self.strRepr = "[" + _ustr(self.expr) + "]..."
3821
3822 return self.strRepr
3823
3830
3831 _optionalNotMatched = _NullToken()
3833 """
3834 Optional matching of the given expression.
3835
3836 Parameters:
3837 - expr - expression that must match zero or more times
3838 - default (optional) - value to be returned if the optional expression is not found.
3839
3840 Example::
3841 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3842 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3843 zip.runTests('''
3844 # traditional ZIP code
3845 12345
3846
3847 # ZIP+4 form
3848 12101-0001
3849
3850 # invalid ZIP
3851 98765-
3852 ''')
3853 prints::
3854 # traditional ZIP code
3855 12345
3856 ['12345']
3857
3858 # ZIP+4 form
3859 12101-0001
3860 ['12101-0001']
3861
3862 # invalid ZIP
3863 98765-
3864 ^
3865 FAIL: Expected end of text (at char 5), (line:1, col:6)
3866 """
3868 super(Optional,self).__init__( expr, savelist=False )
3869 self.defaultValue = default
3870 self.mayReturnEmpty = True
3871
3872 - def parseImpl( self, instring, loc, doActions=True ):
3873 try:
3874 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
3875 except (ParseException,IndexError):
3876 if self.defaultValue is not _optionalNotMatched:
3877 if self.expr.resultsName:
3878 tokens = ParseResults([ self.defaultValue ])
3879 tokens[self.expr.resultsName] = self.defaultValue
3880 else:
3881 tokens = [ self.defaultValue ]
3882 else:
3883 tokens = []
3884 return loc, tokens
3885
3887 if hasattr(self,"name"):
3888 return self.name
3889
3890 if self.strRepr is None:
3891 self.strRepr = "[" + _ustr(self.expr) + "]"
3892
3893 return self.strRepr
3894
3895 -class SkipTo(ParseElementEnhance):
3896 """
3897 Token for skipping over all undefined text until the matched expression is found.
3898
3899 Parameters:
3900 - expr - target expression marking the end of the data to be skipped
3901 - include - (default=C{False}) if True, the target expression is also parsed
3902 (the skipped text and target expression are returned as a 2-element list).
3903 - ignore - (default=C{None}) used to define grammars (typically quoted strings and
3904 comments) that might contain false matches to the target expression
3905 - failOn - (default=C{None}) define expressions that are not allowed to be
3906 included in the skipped test; if found before the target expression is found,
3907 the SkipTo is not a match
3908
3909 Example::
3910 report = '''
3911 Outstanding Issues Report - 1 Jan 2000
3912
3913 # | Severity | Description | Days Open
3914 -----+----------+-------------------------------------------+-----------
3915 101 | Critical | Intermittent system crash | 6
3916 94 | Cosmetic | Spelling error on Login ('log|n') | 14
3917 79 | Minor | System slow when running too many reports | 47
3918 '''
3919 integer = Word(nums)
3920 SEP = Suppress('|')
3921 # use SkipTo to simply match everything up until the next SEP
3922 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
3923 # - parse action will call token.strip() for each matched token, i.e., the description body
3924 string_data = SkipTo(SEP, ignore=quotedString)
3925 string_data.setParseAction(tokenMap(str.strip))
3926 ticket_expr = (integer("issue_num") + SEP
3927 + string_data("sev") + SEP
3928 + string_data("desc") + SEP
3929 + integer("days_open"))
3930
3931 for tkt in ticket_expr.searchString(report):
3932 print tkt.dump()
3933 prints::
3934 ['101', 'Critical', 'Intermittent system crash', '6']
3935 - days_open: 6
3936 - desc: Intermittent system crash
3937 - issue_num: 101
3938 - sev: Critical
3939 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
3940 - days_open: 14
3941 - desc: Spelling error on Login ('log|n')
3942 - issue_num: 94
3943 - sev: Cosmetic
3944 ['79', 'Minor', 'System slow when running too many reports', '47']
3945 - days_open: 47
3946 - desc: System slow when running too many reports
3947 - issue_num: 79
3948 - sev: Minor
3949 """
3950 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3951 super( SkipTo, self ).__init__( other )
3952 self.ignoreExpr = ignore
3953 self.mayReturnEmpty = True
3954 self.mayIndexError = False
3955 self.includeMatch = include
3956 self.asList = False
3957 if isinstance(failOn, basestring):
3958 self.failOn = ParserElement._literalStringClass(failOn)
3959 else:
3960 self.failOn = failOn
3961 self.errmsg = "No match found for "+_ustr(self.expr)
3962
3963 - def parseImpl( self, instring, loc, doActions=True ):
3964 startloc = loc
3965 instrlen = len(instring)
3966 expr = self.expr
3967 expr_parse = self.expr._parse
3968 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
3969 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
3970
3971 tmploc = loc
3972 while tmploc <= instrlen:
3973 if self_failOn_canParseNext is not None:
3974
3975 if self_failOn_canParseNext(instring, tmploc):
3976 break
3977
3978 if self_ignoreExpr_tryParse is not None:
3979
3980 while 1:
3981 try:
3982 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
3983 except ParseBaseException:
3984 break
3985
3986 try:
3987 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
3988 except (ParseException, IndexError):
3989
3990 tmploc += 1
3991 else:
3992
3993 break
3994
3995 else:
3996
3997 raise ParseException(instring, loc, self.errmsg, self)
3998
3999
4000 loc = tmploc
4001 skiptext = instring[startloc:loc]
4002 skipresult = ParseResults(skiptext)
4003
4004 if self.includeMatch:
4005 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4006 skipresult += mat
4007
4008 return loc, skipresult
4009
4010 -class Forward(ParseElementEnhance):
4011 """
4012 Forward declaration of an expression to be defined later -
4013 used for recursive grammars, such as algebraic infix notation.
4014 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4015
4016 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4017 Specifically, '|' has a lower precedence than '<<', so that::
4018 fwdExpr << a | b | c
4019 will actually be evaluated as::
4020 (fwdExpr << a) | b | c
4021 thereby leaving b and c out as parseable alternatives. It is recommended that you
4022 explicitly group the values inserted into the C{Forward}::
4023 fwdExpr << (a | b | c)
4024 Converting to use the '<<=' operator instead will avoid this problem.
4025
4026 See L{ParseResults.pprint} for an example of a recursive parser created using
4027 C{Forward}.
4028 """
4031
4033 if isinstance( other, basestring ):
4034 other = ParserElement._literalStringClass(other)
4035 self.expr = other
4036 self.strRepr = None
4037 self.mayIndexError = self.expr.mayIndexError
4038 self.mayReturnEmpty = self.expr.mayReturnEmpty
4039 self.setWhitespaceChars( self.expr.whiteChars )
4040 self.skipWhitespace = self.expr.skipWhitespace
4041 self.saveAsList = self.expr.saveAsList
4042 self.ignoreExprs.extend(self.expr.ignoreExprs)
4043 return self
4044
4046 return self << other
4047
4049 self.skipWhitespace = False
4050 return self
4051
4053 if not self.streamlined:
4054 self.streamlined = True
4055 if self.expr is not None:
4056 self.expr.streamline()
4057 return self
4058
4059 - def validate( self, validateTrace=[] ):
4060 if self not in validateTrace:
4061 tmp = validateTrace[:]+[self]
4062 if self.expr is not None:
4063 self.expr.validate(tmp)
4064 self.checkRecursion([])
4065
4067 if hasattr(self,"name"):
4068 return self.name
4069 return self.__class__.__name__ + ": ..."
4070
4071
4072 self._revertClass = self.__class__
4073 self.__class__ = _ForwardNoRecurse
4074 try:
4075 if self.expr is not None:
4076 retString = _ustr(self.expr)
4077 else:
4078 retString = "None"
4079 finally:
4080 self.__class__ = self._revertClass
4081 return self.__class__.__name__ + ": " + retString
4082
4084 if self.expr is not None:
4085 return super(Forward,self).copy()
4086 else:
4087 ret = Forward()
4088 ret <<= self
4089 return ret
4090
4094
4096 """
4097 Abstract subclass of C{ParseExpression}, for converting parsed results.
4098 """
4099 - def __init__( self, expr, savelist=False ):
4102
4104 """
4105 Converter to concatenate all matching tokens to a single string.
4106 By default, the matching patterns must also be contiguous in the input string;
4107 this can be disabled by specifying C{'adjacent=False'} in the constructor.
4108
4109 Example::
4110 real = Word(nums) + '.' + Word(nums)
4111 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4112 # will also erroneously match the following
4113 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4114
4115 real = Combine(Word(nums) + '.' + Word(nums))
4116 print(real.parseString('3.1416')) # -> ['3.1416']
4117 # no match when there are internal spaces
4118 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4119 """
4120 - def __init__( self, expr, joinString="", adjacent=True ):
4121 super(Combine,self).__init__( expr )
4122
4123 if adjacent:
4124 self.leaveWhitespace()
4125 self.adjacent = adjacent
4126 self.skipWhitespace = True
4127 self.joinString = joinString
4128 self.callPreparse = True
4129
4136
4137 - def postParse( self, instring, loc, tokenlist ):
4138 retToks = tokenlist.copy()
4139 del retToks[:]
4140 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4141
4142 if self.resultsName and retToks.haskeys():
4143 return [ retToks ]
4144 else:
4145 return retToks
4146
4147 -class Group(TokenConverter):
4148 """
4149 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4150
4151 Example::
4152 ident = Word(alphas)
4153 num = Word(nums)
4154 term = ident | num
4155 func = ident + Optional(delimitedList(term))
4156 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4157
4158 func = ident + Group(Optional(delimitedList(term)))
4159 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4160 """
4162 super(Group,self).__init__( expr )
4163 self.saveAsList = True
4164
4165 - def postParse( self, instring, loc, tokenlist ):
4166 return [ tokenlist ]
4167
4168 -class Dict(TokenConverter):
4169 """
4170 Converter to return a repetitive expression as a list, but also as a dictionary.
4171 Each element can also be referenced using the first token in the expression as its key.
4172 Useful for tabular report scraping when the first column can be used as a item key.
4173
4174 Example::
4175 data_word = Word(alphas)
4176 label = data_word + FollowedBy(':')
4177 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4178
4179 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4180 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4181
4182 # print attributes as plain groups
4183 print(OneOrMore(attr_expr).parseString(text).dump())
4184
4185 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4186 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4187 print(result.dump())
4188
4189 # access named fields as dict entries, or output as dict
4190 print(result['shape'])
4191 print(result.asDict())
4192 prints::
4193 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4194
4195 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4196 - color: light blue
4197 - posn: upper left
4198 - shape: SQUARE
4199 - texture: burlap
4200 SQUARE
4201 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4202 See more examples at L{ParseResults} of accessing fields by results name.
4203 """
4205 super(Dict,self).__init__( expr )
4206 self.saveAsList = True
4207
4208 - def postParse( self, instring, loc, tokenlist ):
4209 for i,tok in enumerate(tokenlist):
4210 if len(tok) == 0:
4211 continue
4212 ikey = tok[0]
4213 if isinstance(ikey,int):
4214 ikey = _ustr(tok[0]).strip()
4215 if len(tok)==1:
4216 tokenlist[ikey] = _ParseResultsWithOffset("",i)
4217 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4218 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4219 else:
4220 dictvalue = tok.copy()
4221 del dictvalue[0]
4222 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4223 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4224 else:
4225 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4226
4227 if self.resultsName:
4228 return [ tokenlist ]
4229 else:
4230 return tokenlist
4231
4234 """
4235 Converter for ignoring the results of a parsed expression.
4236
4237 Example::
4238 source = "a, b, c,d"
4239 wd = Word(alphas)
4240 wd_list1 = wd + ZeroOrMore(',' + wd)
4241 print(wd_list1.parseString(source))
4242
4243 # often, delimiters that are useful during parsing are just in the
4244 # way afterward - use Suppress to keep them out of the parsed output
4245 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4246 print(wd_list2.parseString(source))
4247 prints::
4248 ['a', ',', 'b', ',', 'c', ',', 'd']
4249 ['a', 'b', 'c', 'd']
4250 (See also L{delimitedList}.)
4251 """
4252 - def postParse( self, instring, loc, tokenlist ):
4254
4257
4260 """
4261 Wrapper for parse actions, to ensure they are only called once.
4262 """
4264 self.callable = _trim_arity(methodCall)
4265 self.called = False
4267 if not self.called:
4268 results = self.callable(s,l,t)
4269 self.called = True
4270 return results
4271 raise ParseException(s,l,"")
4274
4276 """
4277 Decorator for debugging parse actions.
4278
4279 Example::
4280 wd = Word(alphas)
4281
4282 @traceParseAction
4283 def remove_duplicate_chars(tokens):
4284 return ''.join(sorted(set(''.join(tokens)))
4285
4286 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4287 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4288 prints::
4289 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4290 <<leaving remove_duplicate_chars (ret: 'dfjkls')
4291 ['dfjkls']
4292 """
4293 f = _trim_arity(f)
4294 def z(*paArgs):
4295 thisFunc = f.__name__
4296 s,l,t = paArgs[-3:]
4297 if len(paArgs)>3:
4298 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4299 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4300 try:
4301 ret = f(*paArgs)
4302 except Exception as exc:
4303 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4304 raise
4305 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4306 return ret
4307 try:
4308 z.__name__ = f.__name__
4309 except AttributeError:
4310 pass
4311 return z
4312
4313
4314
4315
4316 -def delimitedList( expr, delim=",", combine=False ):
4317 """
4318 Helper to define a delimited list of expressions - the delimiter defaults to ','.
4319 By default, the list elements and delimiters can have intervening whitespace, and
4320 comments, but this can be overridden by passing C{combine=True} in the constructor.
4321 If C{combine} is set to C{True}, the matching tokens are returned as a single token
4322 string, with the delimiters included; otherwise, the matching tokens are returned
4323 as a list of tokens, with the delimiters suppressed.
4324
4325 Example::
4326 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4327 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4328 """
4329 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4330 if combine:
4331 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4332 else:
4333 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4334
4336 """
4337 Helper to define a counted list of expressions.
4338 This helper defines a pattern of the form::
4339 integer expr expr expr...
4340 where the leading integer tells how many expr expressions follow.
4341 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4342
4343 Example::
4344 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4345 """
4346 arrayExpr = Forward()
4347 def countFieldParseAction(s,l,t):
4348 n = t[0]
4349 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4350 return []
4351 if intExpr is None:
4352 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4353 else:
4354 intExpr = intExpr.copy()
4355 intExpr.setName("arrayLen")
4356 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4357 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4358
4360 ret = []
4361 for i in L:
4362 if isinstance(i,list):
4363 ret.extend(_flatten(i))
4364 else:
4365 ret.append(i)
4366 return ret
4367
4369 """
4370 Helper to define an expression that is indirectly defined from
4371 the tokens matched in a previous expression, that is, it looks
4372 for a 'repeat' of a previous expression. For example::
4373 first = Word(nums)
4374 second = matchPreviousLiteral(first)
4375 matchExpr = first + ":" + second
4376 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
4377 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4378 If this is not desired, use C{matchPreviousExpr}.
4379 Do I{not} use with packrat parsing enabled.
4380 """
4381 rep = Forward()
4382 def copyTokenToRepeater(s,l,t):
4383 if t:
4384 if len(t) == 1:
4385 rep << t[0]
4386 else:
4387
4388 tflat = _flatten(t.asList())
4389 rep << And(Literal(tt) for tt in tflat)
4390 else:
4391 rep << Empty()
4392 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4393 rep.setName('(prev) ' + _ustr(expr))
4394 return rep
4395
4397 """
4398 Helper to define an expression that is indirectly defined from
4399 the tokens matched in a previous expression, that is, it looks
4400 for a 'repeat' of a previous expression. For example::
4401 first = Word(nums)
4402 second = matchPreviousExpr(first)
4403 matchExpr = first + ":" + second
4404 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
4405 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4406 the expressions are evaluated first, and then compared, so
4407 C{"1"} is compared with C{"10"}.
4408 Do I{not} use with packrat parsing enabled.
4409 """
4410 rep = Forward()
4411 e2 = expr.copy()
4412 rep <<= e2
4413 def copyTokenToRepeater(s,l,t):
4414 matchTokens = _flatten(t.asList())
4415 def mustMatchTheseTokens(s,l,t):
4416 theseTokens = _flatten(t.asList())
4417 if theseTokens != matchTokens:
4418 raise ParseException("",0,"")
4419 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4420 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4421 rep.setName('(prev) ' + _ustr(expr))
4422 return rep
4423
4425
4426 for c in r"\^-]":
4427 s = s.replace(c,_bslash+c)
4428 s = s.replace("\n",r"\n")
4429 s = s.replace("\t",r"\t")
4430 return _ustr(s)
4431
4432 -def oneOf( strs, caseless=False, useRegex=True ):
4433 """
4434 Helper to quickly define a set of alternative Literals, and makes sure to do
4435 longest-first testing when there is a conflict, regardless of the input order,
4436 but returns a C{L{MatchFirst}} for best performance.
4437
4438 Parameters:
4439 - strs - a string of space-delimited literals, or a collection of string literals
4440 - caseless - (default=C{False}) - treat all literals as caseless
4441 - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4442 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4443 if creating a C{Regex} raises an exception)
4444
4445 Example::
4446 comp_oper = oneOf("< = > <= >= !=")
4447 var = Word(alphas)
4448 number = Word(nums)
4449 term = var | number
4450 comparison_expr = term + comp_oper + term
4451 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
4452 prints::
4453 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4454 """
4455 if caseless:
4456 isequal = ( lambda a,b: a.upper() == b.upper() )
4457 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4458 parseElementClass = CaselessLiteral
4459 else:
4460 isequal = ( lambda a,b: a == b )
4461 masks = ( lambda a,b: b.startswith(a) )
4462 parseElementClass = Literal
4463
4464 symbols = []
4465 if isinstance(strs,basestring):
4466 symbols = strs.split()
4467 elif isinstance(strs, collections.Iterable):
4468 symbols = list(strs)
4469 else:
4470 warnings.warn("Invalid argument to oneOf, expected string or iterable",
4471 SyntaxWarning, stacklevel=2)
4472 if not symbols:
4473 return NoMatch()
4474
4475 i = 0
4476 while i < len(symbols)-1:
4477 cur = symbols[i]
4478 for j,other in enumerate(symbols[i+1:]):
4479 if ( isequal(other, cur) ):
4480 del symbols[i+j+1]
4481 break
4482 elif ( masks(cur, other) ):
4483 del symbols[i+j+1]
4484 symbols.insert(i,other)
4485 cur = other
4486 break
4487 else:
4488 i += 1
4489
4490 if not caseless and useRegex:
4491
4492 try:
4493 if len(symbols)==len("".join(symbols)):
4494 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4495 else:
4496 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4497 except:
4498 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4499 SyntaxWarning, stacklevel=2)
4500
4501
4502
4503 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4504
4506 """
4507 Helper to easily and clearly define a dictionary by specifying the respective patterns
4508 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4509 in the proper order. The key pattern can include delimiting markers or punctuation,
4510 as long as they are suppressed, thereby leaving the significant key text. The value
4511 pattern can include named results, so that the C{Dict} results can include named token
4512 fields.
4513
4514 Example::
4515 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4516 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4517 print(OneOrMore(attr_expr).parseString(text).dump())
4518
4519 attr_label = label
4520 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4521
4522 # similar to Dict, but simpler call format
4523 result = dictOf(attr_label, attr_value).parseString(text)
4524 print(result.dump())
4525 print(result['shape'])
4526 print(result.shape) # object attribute access works too
4527 print(result.asDict())
4528 prints::
4529 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4530 - color: light blue
4531 - posn: upper left
4532 - shape: SQUARE
4533 - texture: burlap
4534 SQUARE
4535 SQUARE
4536 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4537 """
4538 return Dict( ZeroOrMore( Group ( key + value ) ) )
4539
4540 -def originalTextFor(expr, asString=True):
4541 """
4542 Helper to return the original, untokenized text for a given expression. Useful to
4543 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4544 revert separate tokens with intervening whitespace back to the original matching
4545 input text. By default, returns astring containing the original parsed text.
4546
4547 If the optional C{asString} argument is passed as C{False}, then the return value is a
4548 C{L{ParseResults}} containing any results names that were originally matched, and a
4549 single token containing the original matched text from the input string. So if
4550 the expression passed to C{L{originalTextFor}} contains expressions with defined
4551 results names, you must set C{asString} to C{False} if you want to preserve those
4552 results name values.
4553
4554 Example::
4555 src = "this is test <b> bold <i>text</i> </b> normal text "
4556 for tag in ("b","i"):
4557 opener,closer = makeHTMLTags(tag)
4558 patt = originalTextFor(opener + SkipTo(closer) + closer)
4559 print(patt.searchString(src)[0])
4560 prints::
4561 ['<b> bold <i>text</i> </b>']
4562 ['<i>text</i>']
4563 """
4564 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4565 endlocMarker = locMarker.copy()
4566 endlocMarker.callPreparse = False
4567 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4568 if asString:
4569 extractText = lambda s,l,t: s[t._original_start:t._original_end]
4570 else:
4571 def extractText(s,l,t):
4572 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4573 matchExpr.setParseAction(extractText)
4574 matchExpr.ignoreExprs = expr.ignoreExprs
4575 return matchExpr
4576
4578 """
4579 Helper to undo pyparsing's default grouping of And expressions, even
4580 if all but one are non-empty.
4581 """
4582 return TokenConverter(expr).setParseAction(lambda t:t[0])
4583
4585 """
4586 Helper to decorate a returned token with its starting and ending locations in the input string.
4587 This helper adds the following results names:
4588 - locn_start = location where matched expression begins
4589 - locn_end = location where matched expression ends
4590 - value = the actual parsed results
4591
4592 Be careful if the input text contains C{<TAB>} characters, you may want to call
4593 C{L{ParserElement.parseWithTabs}}
4594
4595 Example::
4596 wd = Word(alphas)
4597 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4598 print(match)
4599 prints::
4600 [[0, 'ljsdf', 5]]
4601 [[8, 'lksdjjf', 15]]
4602 [[18, 'lkkjj', 23]]
4603 """
4604 locator = Empty().setParseAction(lambda s,l,t: l)
4605 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4606
4607
4608
4609 empty = Empty().setName("empty")
4610 lineStart = LineStart().setName("lineStart")
4611 lineEnd = LineEnd().setName("lineEnd")
4612 stringStart = StringStart().setName("stringStart")
4613 stringEnd = StringEnd().setName("stringEnd")
4614
4615 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4616 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4617 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4618 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
4619 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
4620 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4623 r"""
4624 Helper to easily define string ranges for use in Word construction. Borrows
4625 syntax from regexp '[]' string range definitions::
4626 srange("[0-9]") -> "0123456789"
4627 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
4628 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4629 The input string must be enclosed in []'s, and the returned string is the expanded
4630 character set joined into a single string.
4631 The values enclosed in the []'s may be:
4632 - a single character
4633 - an escaped character with a leading backslash (such as C{\-} or C{\]})
4634 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4635 (C{\0x##} is also supported for backwards compatibility)
4636 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4637 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4638 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4639 """
4640 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4641 try:
4642 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4643 except:
4644 return ""
4645
4647 """
4648 Helper method for defining parse actions that require matching at a specific
4649 column in the input text.
4650 """
4651 def verifyCol(strg,locn,toks):
4652 if col(locn,strg) != n:
4653 raise ParseException(strg,locn,"matched token not at column %d" % n)
4654 return verifyCol
4655
4657 """
4658 Helper method for common parse actions that simply return a literal value. Especially
4659 useful when used with C{L{transformString<ParserElement.transformString>}()}.
4660
4661 Example::
4662 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4663 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4664 term = na | num
4665
4666 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4667 """
4668 return lambda s,l,t: [replStr]
4669
4671 """
4672 Helper parse action for removing quotation marks from parsed quoted strings.
4673
4674 Example::
4675 # by default, quotation marks are included in parsed results
4676 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4677
4678 # use removeQuotes to strip quotation marks from parsed results
4679 quotedString.setParseAction(removeQuotes)
4680 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4681 """
4682 return t[0][1:-1]
4683
4685 """
4686 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4687 args are passed, they are forwarded to the given function as additional arguments after
4688 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4689 parsed data to an integer using base 16.
4690
4691 Example (compare the last to example in L{ParserElement.transformString}::
4692 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4693 hex_ints.runTests('''
4694 00 11 22 aa FF 0a 0d 1a
4695 ''')
4696
4697 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4698 OneOrMore(upperword).runTests('''
4699 my kingdom for a horse
4700 ''')
4701
4702 wd = Word(alphas).setParseAction(tokenMap(str.title))
4703 OneOrMore(wd).setParseAction(' '.join).runTests('''
4704 now is the winter of our discontent made glorious summer by this sun of york
4705 ''')
4706 prints::
4707 00 11 22 aa FF 0a 0d 1a
4708 [0, 17, 34, 170, 255, 10, 13, 26]
4709
4710 my kingdom for a horse
4711 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4712
4713 now is the winter of our discontent made glorious summer by this sun of york
4714 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4715 """
4716 def pa(s,l,t):
4717 return [func(tokn, *args) for tokn in t]
4718
4719 try:
4720 func_name = getattr(func, '__name__',
4721 getattr(func, '__class__').__name__)
4722 except Exception:
4723 func_name = str(func)
4724 pa.__name__ = func_name
4725
4726 return pa
4727
4728 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4729 """Helper parse action to convert tokens to upper case."""
4730
4731 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4732 """Helper parse action to convert tokens to lower case."""
4762
4781
4790
4792 """
4793 Helper to create a validating parse action to be used with start tags created
4794 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4795 with a required attribute value, to avoid false matches on common tags such as
4796 C{<TD>} or C{<DIV>}.
4797
4798 Call C{withAttribute} with a series of attribute names and values. Specify the list
4799 of filter attributes names and values as:
4800 - keyword arguments, as in C{(align="right")}, or
4801 - as an explicit dict with C{**} operator, when an attribute name is also a Python
4802 reserved word, as in C{**{"class":"Customer", "align":"right"}}
4803 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4804 For attribute names with a namespace prefix, you must use the second form. Attribute
4805 names are matched insensitive to upper/lower case.
4806
4807 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4808
4809 To verify that the attribute exists, but without specifying a value, pass
4810 C{withAttribute.ANY_VALUE} as the value.
4811
4812 Example::
4813 html = '''
4814 <div>
4815 Some text
4816 <div type="grid">1 4 0 1 0</div>
4817 <div type="graph">1,3 2,3 1,1</div>
4818 <div>this has no type</div>
4819 </div>
4820
4821 '''
4822 div,div_end = makeHTMLTags("div")
4823
4824 # only match div tag having a type attribute with value "grid"
4825 div_grid = div().setParseAction(withAttribute(type="grid"))
4826 grid_expr = div_grid + SkipTo(div | div_end)("body")
4827 for grid_header in grid_expr.searchString(html):
4828 print(grid_header.body)
4829
4830 # construct a match with any div tag having a type attribute, regardless of the value
4831 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4832 div_expr = div_any_type + SkipTo(div | div_end)("body")
4833 for div_header in div_expr.searchString(html):
4834 print(div_header.body)
4835 prints::
4836 1 4 0 1 0
4837
4838 1 4 0 1 0
4839 1,3 2,3 1,1
4840 """
4841 if args:
4842 attrs = args[:]
4843 else:
4844 attrs = attrDict.items()
4845 attrs = [(k,v) for k,v in attrs]
4846 def pa(s,l,tokens):
4847 for attrName,attrValue in attrs:
4848 if attrName not in tokens:
4849 raise ParseException(s,l,"no matching attribute " + attrName)
4850 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4851 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4852 (attrName, tokens[attrName], attrValue))
4853 return pa
4854 withAttribute.ANY_VALUE = object()
4855
4856 -def withClass(classname, namespace=''):
4857 """
4858 Simplified version of C{L{withAttribute}} when matching on a div class - made
4859 difficult because C{class} is a reserved word in Python.
4860
4861 Example::
4862 html = '''
4863 <div>
4864 Some text
4865 <div class="grid">1 4 0 1 0</div>
4866 <div class="graph">1,3 2,3 1,1</div>
4867 <div>this <div> has no class</div>
4868 </div>
4869
4870 '''
4871 div,div_end = makeHTMLTags("div")
4872 div_grid = div().setParseAction(withClass("grid"))
4873
4874 grid_expr = div_grid + SkipTo(div | div_end)("body")
4875 for grid_header in grid_expr.searchString(html):
4876 print(grid_header.body)
4877
4878 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
4879 div_expr = div_any_type + SkipTo(div | div_end)("body")
4880 for div_header in div_expr.searchString(html):
4881 print(div_header.body)
4882 prints::
4883 1 4 0 1 0
4884
4885 1 4 0 1 0
4886 1,3 2,3 1,1
4887 """
4888 classattr = "%s:class" % namespace if namespace else "class"
4889 return withAttribute(**{classattr : classname})
4890
4891 opAssoc = _Constants()
4892 opAssoc.LEFT = object()
4893 opAssoc.RIGHT = object()
4896 """
4897 Helper method for constructing grammars of expressions made up of
4898 operators working in a precedence hierarchy. Operators may be unary or
4899 binary, left- or right-associative. Parse actions can also be attached
4900 to operator expressions.
4901
4902 Parameters:
4903 - baseExpr - expression representing the most basic element for the nested
4904 - opList - list of tuples, one for each operator precedence level in the
4905 expression grammar; each tuple is of the form
4906 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
4907 - opExpr is the pyparsing expression for the operator;
4908 may also be a string, which will be converted to a Literal;
4909 if numTerms is 3, opExpr is a tuple of two expressions, for the
4910 two operators separating the 3 terms
4911 - numTerms is the number of terms for this operator (must
4912 be 1, 2, or 3)
4913 - rightLeftAssoc is the indicator whether the operator is
4914 right or left associative, using the pyparsing-defined
4915 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
4916 - parseAction is the parse action to be associated with
4917 expressions matching this operator expression (the
4918 parse action tuple member may be omitted)
4919 - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
4920 - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
4921
4922 Example::
4923 # simple example of four-function arithmetic with ints and variable names
4924 integer = pyparsing_common.signedInteger
4925 varname = pyparsing_common.identifier
4926
4927 arith_expr = infixNotation(integer | varname,
4928 [
4929 ('-', 1, opAssoc.RIGHT),
4930 (oneOf('* /'), 2, opAssoc.LEFT),
4931 (oneOf('+ -'), 2, opAssoc.LEFT),
4932 ])
4933
4934 arith_expr.runTests('''
4935 5+3*6
4936 (5+3)*6
4937 -2--11
4938 ''', fullDump=False)
4939 prints::
4940 5+3*6
4941 [[5, '+', [3, '*', 6]]]
4942
4943 (5+3)*6
4944 [[[5, '+', 3], '*', 6]]
4945
4946 -2--11
4947 [[['-', 2], '-', ['-', 11]]]
4948 """
4949 ret = Forward()
4950 lastExpr = baseExpr | ( lpar + ret + rpar )
4951 for i,operDef in enumerate(opList):
4952 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
4953 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
4954 if arity == 3:
4955 if opExpr is None or len(opExpr) != 2:
4956 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
4957 opExpr1, opExpr2 = opExpr
4958 thisExpr = Forward().setName(termName)
4959 if rightLeftAssoc == opAssoc.LEFT:
4960 if arity == 1:
4961 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
4962 elif arity == 2:
4963 if opExpr is not None:
4964 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
4965 else:
4966 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
4967 elif arity == 3:
4968 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
4969 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
4970 else:
4971 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
4972 elif rightLeftAssoc == opAssoc.RIGHT:
4973 if arity == 1:
4974
4975 if not isinstance(opExpr, Optional):
4976 opExpr = Optional(opExpr)
4977 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
4978 elif arity == 2:
4979 if opExpr is not None:
4980 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
4981 else:
4982 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
4983 elif arity == 3:
4984 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
4985 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
4986 else:
4987 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
4988 else:
4989 raise ValueError("operator must indicate right or left associativity")
4990 if pa:
4991 matchExpr.setParseAction( pa )
4992 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
4993 lastExpr = thisExpr
4994 ret <<= lastExpr
4995 return ret
4996
4997 operatorPrecedence = infixNotation
4998 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
4999
5000 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5001 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5002 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5003 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5004 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5007 """
5008 Helper method for defining nested lists enclosed in opening and closing
5009 delimiters ("(" and ")" are the default).
5010
5011 Parameters:
5012 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5013 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5014 - content - expression for items within the nested lists (default=C{None})
5015 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5016
5017 If an expression is not provided for the content argument, the nested
5018 expression will capture all whitespace-delimited content between delimiters
5019 as a list of separate values.
5020
5021 Use the C{ignoreExpr} argument to define expressions that may contain
5022 opening or closing characters that should not be treated as opening
5023 or closing characters for nesting, such as quotedString or a comment
5024 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5025 The default is L{quotedString}, but if no expressions are to be ignored,
5026 then pass C{None} for this argument.
5027
5028 Example::
5029 data_type = oneOf("void int short long char float double")
5030 decl_data_type = Combine(data_type + Optional(Word('*')))
5031 ident = Word(alphas+'_', alphanums+'_')
5032 number = pyparsing_common.number
5033 arg = Group(decl_data_type + ident)
5034 LPAR,RPAR = map(Suppress, "()")
5035
5036 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5037
5038 c_function = (decl_data_type("type")
5039 + ident("name")
5040 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5041 + code_body("body"))
5042 c_function.ignore(cStyleComment)
5043
5044 source_code = '''
5045 int is_odd(int x) {
5046 return (x%2);
5047 }
5048
5049 int dec_to_hex(char hchar) {
5050 if (hchar >= '0' && hchar <= '9') {
5051 return (ord(hchar)-ord('0'));
5052 } else {
5053 return (10+ord(hchar)-ord('A'));
5054 }
5055 }
5056 '''
5057 for func in c_function.searchString(source_code):
5058 print("%(name)s (%(type)s) args: %(args)s" % func)
5059
5060 prints::
5061 is_odd (int) args: [['int', 'x']]
5062 dec_to_hex (int) args: [['char', 'hchar']]
5063 """
5064 if opener == closer:
5065 raise ValueError("opening and closing strings cannot be the same")
5066 if content is None:
5067 if isinstance(opener,basestring) and isinstance(closer,basestring):
5068 if len(opener) == 1 and len(closer)==1:
5069 if ignoreExpr is not None:
5070 content = (Combine(OneOrMore(~ignoreExpr +
5071 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5072 ).setParseAction(lambda t:t[0].strip()))
5073 else:
5074 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5075 ).setParseAction(lambda t:t[0].strip()))
5076 else:
5077 if ignoreExpr is not None:
5078 content = (Combine(OneOrMore(~ignoreExpr +
5079 ~Literal(opener) + ~Literal(closer) +
5080 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5081 ).setParseAction(lambda t:t[0].strip()))
5082 else:
5083 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5084 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5085 ).setParseAction(lambda t:t[0].strip()))
5086 else:
5087 raise ValueError("opening and closing arguments must be strings if no content expression is given")
5088 ret = Forward()
5089 if ignoreExpr is not None:
5090 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5091 else:
5092 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5093 ret.setName('nested %s%s expression' % (opener,closer))
5094 return ret
5095
5096 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5097 """
5098 Helper method for defining space-delimited indentation blocks, such as
5099 those used to define block statements in Python source code.
5100
5101 Parameters:
5102 - blockStatementExpr - expression defining syntax of statement that
5103 is repeated within the indented block
5104 - indentStack - list created by caller to manage indentation stack
5105 (multiple statementWithIndentedBlock expressions within a single grammar
5106 should share a common indentStack)
5107 - indent - boolean indicating whether block must be indented beyond the
5108 the current level; set to False for block of left-most statements
5109 (default=C{True})
5110
5111 A valid block must contain at least one C{blockStatement}.
5112
5113 Example::
5114 data = '''
5115 def A(z):
5116 A1
5117 B = 100
5118 G = A2
5119 A2
5120 A3
5121 B
5122 def BB(a,b,c):
5123 BB1
5124 def BBA():
5125 bba1
5126 bba2
5127 bba3
5128 C
5129 D
5130 def spam(x,y):
5131 def eggs(z):
5132 pass
5133 '''
5134
5135
5136 indentStack = [1]
5137 stmt = Forward()
5138
5139 identifier = Word(alphas, alphanums)
5140 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5141 func_body = indentedBlock(stmt, indentStack)
5142 funcDef = Group( funcDecl + func_body )
5143
5144 rvalue = Forward()
5145 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5146 rvalue << (funcCall | identifier | Word(nums))
5147 assignment = Group(identifier + "=" + rvalue)
5148 stmt << ( funcDef | assignment | identifier )
5149
5150 module_body = OneOrMore(stmt)
5151
5152 parseTree = module_body.parseString(data)
5153 parseTree.pprint()
5154 prints::
5155 [['def',
5156 'A',
5157 ['(', 'z', ')'],
5158 ':',
5159 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5160 'B',
5161 ['def',
5162 'BB',
5163 ['(', 'a', 'b', 'c', ')'],
5164 ':',
5165 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5166 'C',
5167 'D',
5168 ['def',
5169 'spam',
5170 ['(', 'x', 'y', ')'],
5171 ':',
5172 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5173 """
5174 def checkPeerIndent(s,l,t):
5175 if l >= len(s): return
5176 curCol = col(l,s)
5177 if curCol != indentStack[-1]:
5178 if curCol > indentStack[-1]:
5179 raise ParseFatalException(s,l,"illegal nesting")
5180 raise ParseException(s,l,"not a peer entry")
5181
5182 def checkSubIndent(s,l,t):
5183 curCol = col(l,s)
5184 if curCol > indentStack[-1]:
5185 indentStack.append( curCol )
5186 else:
5187 raise ParseException(s,l,"not a subentry")
5188
5189 def checkUnindent(s,l,t):
5190 if l >= len(s): return
5191 curCol = col(l,s)
5192 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5193 raise ParseException(s,l,"not an unindent")
5194 indentStack.pop()
5195
5196 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5197 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5198 PEER = Empty().setParseAction(checkPeerIndent).setName('')
5199 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5200 if indent:
5201 smExpr = Group( Optional(NL) +
5202
5203 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5204 else:
5205 smExpr = Group( Optional(NL) +
5206 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5207 blockStatementExpr.ignore(_bslash + LineEnd())
5208 return smExpr.setName('indented block')
5209
5210 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5211 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5212
5213 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5214 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5215 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5217 """Helper parser action to replace common HTML entities with their special characters"""
5218 return _htmlEntityMap.get(t.entity)
5219
5220
5221 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5222 "Comment of the form C{/* ... */}"
5223
5224 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5225 "Comment of the form C{<!-- ... -->}"
5226
5227 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5228 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5229 "Comment of the form C{// ... (to end of line)}"
5230
5231 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5232 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5233
5234 javaStyleComment = cppStyleComment
5235 "Same as C{L{cppStyleComment}}"
5236
5237 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5238 "Comment of the form C{# ... (to end of line)}"
5239
5240 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5241 Optional( Word(" \t") +
5242 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5243 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5244 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5248 """
5249 Here are some common low-level expressions that may be useful in jump-starting parser development:
5250 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>})
5251 - common L{programming identifiers<identifier>}
5252 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5253 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5254 - L{UUID<uuid>}
5255 Parse actions:
5256 - C{L{convertToInteger}}
5257 - C{L{convertToFloat}}
5258 - C{L{convertToDate}}
5259 - C{L{convertToDatetime}}
5260 - C{L{stripHTMLTags}}
5261
5262 Example::
5263 pyparsing_common.number.runTests('''
5264 # any int or real number, returned as the appropriate type
5265 100
5266 -100
5267 +100
5268 3.14159
5269 6.02e23
5270 1e-12
5271 ''')
5272
5273 pyparsing_common.fnumber.runTests('''
5274 # any int or real number, returned as float
5275 100
5276 -100
5277 +100
5278 3.14159
5279 6.02e23
5280 1e-12
5281 ''')
5282
5283 pyparsing_common.hex_integer.runTests('''
5284 # hex numbers
5285 100
5286 FF
5287 ''')
5288
5289 pyparsing_common.fraction.runTests('''
5290 # fractions
5291 1/2
5292 -3/4
5293 ''')
5294
5295 pyparsing_common.mixed_integer.runTests('''
5296 # mixed fractions
5297 1
5298 1/2
5299 -3/4
5300 1-3/4
5301 ''')
5302
5303 import uuid
5304 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5305 pyparsing_common.uuid.runTests('''
5306 # uuid
5307 12345678-1234-5678-1234-567812345678
5308 ''')
5309 prints::
5310 # any int or real number, returned as the appropriate type
5311 100
5312 [100]
5313
5314 -100
5315 [-100]
5316
5317 +100
5318 [100]
5319
5320 3.14159
5321 [3.14159]
5322
5323 6.02e23
5324 [6.02e+23]
5325
5326 1e-12
5327 [1e-12]
5328
5329 # any int or real number, returned as float
5330 100
5331 [100.0]
5332
5333 -100
5334 [-100.0]
5335
5336 +100
5337 [100.0]
5338
5339 3.14159
5340 [3.14159]
5341
5342 6.02e23
5343 [6.02e+23]
5344
5345 1e-12
5346 [1e-12]
5347
5348 # hex numbers
5349 100
5350 [256]
5351
5352 FF
5353 [255]
5354
5355 # fractions
5356 1/2
5357 [0.5]
5358
5359 -3/4
5360 [-0.75]
5361
5362 # mixed fractions
5363 1
5364 [1]
5365
5366 1/2
5367 [0.5]
5368
5369 -3/4
5370 [-0.75]
5371
5372 1-3/4
5373 [1.75]
5374
5375 # uuid
5376 12345678-1234-5678-1234-567812345678
5377 [UUID('12345678-1234-5678-1234-567812345678')]
5378 """
5379
5380 convertToInteger = tokenMap(int)
5381 """
5382 Parse action for converting parsed integers to Python int
5383 """
5384
5385 convertToFloat = tokenMap(float)
5386 """
5387 Parse action for converting parsed numbers to Python float
5388 """
5389
5390 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5391 """expression that parses an unsigned integer, returns an int"""
5392
5393 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5394 """expression that parses a hexadecimal integer, returns an int"""
5395
5396 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5397 """expression that parses an integer with optional leading sign, returns an int"""
5398
5399 fraction = (signedInteger().setParseAction(convertToFloat) + '/' + signedInteger().setParseAction(convertToFloat)).setName("fraction")
5400 """fractional expression of an integer divided by an integer, returns a float"""
5401 fraction.addParseAction(lambda t: t[0]/t[-1])
5402
5403 mixed_integer = (fraction | signedInteger + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5404 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5405 mixed_integer.addParseAction(sum)
5406
5407 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5408 """expression that parses a floating point number and returns a float"""
5409
5410 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5411 """expression that parses a floating point number with optional scientific notation and returns a float"""
5412
5413
5414 number = (sciReal | real | signedInteger).streamline()
5415 """any numeric expression, returns the corresponding Python type"""
5416
5417 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5418 """any int or real number, returned as float"""
5419
5420 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5421 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5422
5423 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5424 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5425
5426 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5427 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5428 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5429 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5430 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5431 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5432 "IPv6 address (long, short, or mixed form)"
5433
5434 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5435 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5436
5437 @staticmethod
5439 """
5440 Helper to create a parse action for converting parsed date string to Python datetime.date
5441
5442 Params -
5443 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5444
5445 Example::
5446 date_expr = pyparsing_common.iso8601_date.copy()
5447 date_expr.setParseAction(pyparsing_common.convertToDate())
5448 print(date_expr.parseString("1999-12-31"))
5449 prints::
5450 [datetime.date(1999, 12, 31)]
5451 """
5452 def cvt_fn(s,l,t):
5453 try:
5454 return datetime.strptime(t[0], fmt).date()
5455 except ValueError as ve:
5456 raise ParseException(s, l, str(ve))
5457 return cvt_fn
5458
5459 @staticmethod
5461 """
5462 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5463
5464 Params -
5465 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5466
5467 Example::
5468 dt_expr = pyparsing_common.iso8601_datetime.copy()
5469 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5470 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5471 prints::
5472 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5473 """
5474 def cvt_fn(s,l,t):
5475 try:
5476 return datetime.strptime(t[0], fmt)
5477 except ValueError as ve:
5478 raise ParseException(s, l, str(ve))
5479 return cvt_fn
5480
5481 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5482 "ISO8601 date (C{yyyy-mm-dd})"
5483
5484 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5485 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5486
5487 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5488 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5489
5490 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5491 @staticmethod
5505
5506 if __name__ == "__main__":
5507
5508 selectToken = CaselessLiteral("select")
5509 fromToken = CaselessLiteral("from")
5510
5511 ident = Word(alphas, alphanums + "_$")
5512
5513 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5514 columnNameList = Group(delimitedList(columnName)).setName("columns")
5515 columnSpec = ('*' | columnNameList)
5516
5517 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5518 tableNameList = Group(delimitedList(tableName)).setName("tables")
5519
5520 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5521
5522
5523 simpleSQL.runTests("""
5524 # '*' as column list and dotted table name
5525 select * from SYS.XYZZY
5526
5527 # caseless match on "SELECT", and casts back to "select"
5528 SELECT * from XYZZY, ABC
5529
5530 # list of column names, and mixed case SELECT keyword
5531 Select AA,BB,CC from Sys.dual
5532
5533 # multiple tables
5534 Select A, B, C from Sys.dual, Table2
5535
5536 # invalid SELECT keyword - should fail
5537 Xelect A, B, C from Sys.dual
5538
5539 # incomplete command - should fail
5540 Select
5541
5542 # invalid column name - should fail
5543 Select ^^^ frox Sys.dual
5544
5545 """)
5546
5547 pyparsing_common.number.runTests("""
5548 100
5549 -100
5550 +100
5551 3.14159
5552 6.02e23
5553 1e-12
5554 """)
5555
5556
5557 pyparsing_common.fnumber.runTests("""
5558 100
5559 -100
5560 +100
5561 3.14159
5562 6.02e23
5563 1e-12
5564 """)
5565
5566 pyparsing_common.hex_integer.runTests("""
5567 100
5568 FF
5569 """)
5570
5571 import uuid
5572 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5573 pyparsing_common.uuid.runTests("""
5574 12345678-1234-5678-1234-567812345678
5575 """)
5576