1 | # Copyright 2006 Google, Inc. All Rights Reserved.
|
2 | # Licensed to PSF under a Contributor Agreement.
|
3 | from __future__ import print_function
|
4 | """
|
5 | Python parse tree definitions.
|
6 |
|
7 | This is a very concrete parse tree; we need to keep every token and
|
8 | even the comments and whitespace between tokens.
|
9 |
|
10 | There's also a pattern matching implementation here.
|
11 | """
|
12 |
|
13 | __author__ = "Guido van Rossum <guido@python.org>"
|
14 |
|
15 | import sys
|
16 | import warnings
|
17 | from io import StringIO
|
18 |
|
19 | HUGE = 0x7FFFFFFF # maximum repeat count, default max
|
20 |
|
21 | _type_reprs = {}
|
22 |
|
23 | def type_repr(type_num):
|
24 | return _type_reprs[type_num]
|
25 |
|
26 |
|
27 | def Init(python_symbols):
|
28 | # printing tokens is possible but not as useful
|
29 | # from .pgen2 import token // token.__dict__.items():
|
30 |
|
31 | # Fill in the dict.
|
32 | global _type_reprs
|
33 | for name, val in python_symbols.__dict__.items():
|
34 | if type(val) == int:
|
35 | _type_reprs[val] = name
|
36 |
|
37 |
|
38 | class Base(object):
|
39 |
|
40 | """
|
41 | Abstract base class for Node and Leaf.
|
42 |
|
43 | This provides some default functionality and boilerplate using the
|
44 | template pattern.
|
45 |
|
46 | A node may be a subnode of at most one parent.
|
47 | """
|
48 |
|
49 | # Default values for instance variables
|
50 | type = None # int: token number (< 256) or symbol number (>= 256)
|
51 | parent = None # Parent node pointer, or None
|
52 | children = () # Tuple of subnodes
|
53 | was_changed = False
|
54 | was_checked = False
|
55 |
|
56 | def __new__(cls, *args, **kwds):
|
57 | """Constructor that prevents Base from being instantiated."""
|
58 | assert cls is not Base, "Cannot instantiate Base"
|
59 | return object.__new__(cls)
|
60 |
|
61 | def __eq__(self, other):
|
62 | """
|
63 | Compare two nodes for equality.
|
64 |
|
65 | This calls the method _eq().
|
66 | """
|
67 | if self.__class__ is not other.__class__:
|
68 | return NotImplemented
|
69 | return self._eq(other)
|
70 |
|
71 | __hash__ = None # For Py3 compatibility.
|
72 |
|
73 | def _eq(self, other):
|
74 | """
|
75 | Compare two nodes for equality.
|
76 |
|
77 | This is called by __eq__ and __ne__. It is only called if the two nodes
|
78 | have the same type. This must be implemented by the concrete subclass.
|
79 | Nodes should be considered equal if they have the same structure,
|
80 | ignoring the prefix string and other context information.
|
81 | """
|
82 | raise NotImplementedError
|
83 |
|
84 | def clone(self):
|
85 | """
|
86 | Return a cloned (deep) copy of self.
|
87 |
|
88 | This must be implemented by the concrete subclass.
|
89 | """
|
90 | raise NotImplementedError
|
91 |
|
92 | def replace(self, new):
|
93 | """Replace this node with a new one in the parent."""
|
94 | assert self.parent is not None, str(self)
|
95 | assert new is not None
|
96 | if not isinstance(new, list):
|
97 | new = [new]
|
98 | l_children = []
|
99 | found = False
|
100 | for ch in self.parent.children:
|
101 | if ch is self:
|
102 | assert not found, (self.parent.children, self, new)
|
103 | if new is not None:
|
104 | l_children.extend(new)
|
105 | found = True
|
106 | else:
|
107 | l_children.append(ch)
|
108 | assert found, (self.children, self, new)
|
109 | self.parent.changed()
|
110 | self.parent.children = l_children
|
111 | for x in new:
|
112 | x.parent = self.parent
|
113 | self.parent = None
|
114 |
|
115 | def get_lineno(self):
|
116 | """Return the line number which generated the invocant node."""
|
117 | node = self
|
118 | while not isinstance(node, Leaf):
|
119 | if not node.children:
|
120 | return
|
121 | node = node.children[0]
|
122 | return node.lineno
|
123 |
|
124 | def changed(self):
|
125 | if self.parent:
|
126 | self.parent.changed()
|
127 | self.was_changed = True
|
128 |
|
129 | def remove(self):
|
130 | """
|
131 | Remove the node from the tree. Returns the position of the node in its
|
132 | parent's children before it was removed.
|
133 | """
|
134 | if self.parent:
|
135 | for i, node in enumerate(self.parent.children):
|
136 | if node is self:
|
137 | self.parent.changed()
|
138 | del self.parent.children[i]
|
139 | self.parent = None
|
140 | return i
|
141 |
|
142 | @property
|
143 | def next_sibling(self):
|
144 | """
|
145 | The node immediately following the invocant in their parent's children
|
146 | list. If the invocant does not have a next sibling, it is None
|
147 | """
|
148 | if self.parent is None:
|
149 | return None
|
150 |
|
151 | # Can't use index(); we need to test by identity
|
152 | for i, child in enumerate(self.parent.children):
|
153 | if child is self:
|
154 | try:
|
155 | return self.parent.children[i+1]
|
156 | except IndexError:
|
157 | return None
|
158 |
|
159 | @property
|
160 | def prev_sibling(self):
|
161 | """
|
162 | The node immediately preceding the invocant in their parent's children
|
163 | list. If the invocant does not have a previous sibling, it is None.
|
164 | """
|
165 | if self.parent is None:
|
166 | return None
|
167 |
|
168 | # Can't use index(); we need to test by identity
|
169 | for i, child in enumerate(self.parent.children):
|
170 | if child is self:
|
171 | if i == 0:
|
172 | return None
|
173 | return self.parent.children[i-1]
|
174 |
|
175 | def depth(self):
|
176 | if self.parent is None:
|
177 | return 0
|
178 | return 1 + self.parent.depth()
|
179 |
|
180 | def get_suffix(self):
|
181 | """
|
182 | Return the string immediately following the invocant node. This is
|
183 | effectively equivalent to node.next_sibling.prefix
|
184 | """
|
185 | next_sib = self.next_sibling
|
186 | if next_sib is None:
|
187 | return ""
|
188 | return next_sib.prefix
|
189 |
|
190 | if sys.version_info < (3, 0):
|
191 | def __str__(self):
|
192 | return str(self).encode("ascii")
|
193 |
|
194 | class Node(Base):
|
195 |
|
196 | """Concrete implementation for interior nodes."""
|
197 |
|
198 | def __init__(self, type, children,
|
199 | context=None,
|
200 | prefix=None,
|
201 | fixers_applied=None):
|
202 | """
|
203 | Initializer.
|
204 |
|
205 | Takes a type constant (a symbol number >= 256), a sequence of
|
206 | child nodes, and an optional context keyword argument.
|
207 |
|
208 | As a side effect, the parent pointers of the children are updated.
|
209 | """
|
210 | assert type >= 256, type
|
211 | self.type = type
|
212 | self.children = list(children)
|
213 | for ch in self.children:
|
214 | assert ch.parent is None, repr(ch)
|
215 | ch.parent = self
|
216 | if prefix is not None:
|
217 | self.prefix = prefix
|
218 | if fixers_applied:
|
219 | self.fixers_applied = fixers_applied[:]
|
220 | else:
|
221 | self.fixers_applied = None
|
222 |
|
223 | def __repr__(self):
|
224 | """Return a canonical string representation."""
|
225 | return "%s(%s, %r)" % (self.__class__.__name__,
|
226 | type_repr(self.type),
|
227 | self.children)
|
228 |
|
229 | def PrettyPrint(self, f, depth=0):
|
230 | indent = ' ' * depth
|
231 | print(indent + type_repr(self.type), file=f)
|
232 | for c in self.children:
|
233 | c.PrettyPrint(f, depth+1)
|
234 |
|
235 | def __unicode__(self):
|
236 | """
|
237 | Return a pretty string representation.
|
238 |
|
239 | This reproduces the input source exactly.
|
240 | """
|
241 | return "".join(map(str, self.children))
|
242 |
|
243 | if sys.version_info > (3, 0):
|
244 | __str__ = __unicode__
|
245 |
|
246 | def _eq(self, other):
|
247 | """Compare two nodes for equality."""
|
248 | return (self.type, self.children) == (other.type, other.children)
|
249 |
|
250 | def clone(self):
|
251 | """Return a cloned (deep) copy of self."""
|
252 | return Node(self.type, [ch.clone() for ch in self.children],
|
253 | fixers_applied=self.fixers_applied)
|
254 |
|
255 | def _prefix_getter(self):
|
256 | """
|
257 | The whitespace and comments preceding this node in the input.
|
258 | """
|
259 | if not self.children:
|
260 | return ""
|
261 | return self.children[0].prefix
|
262 |
|
263 | def _prefix_setter(self, prefix):
|
264 | if self.children:
|
265 | self.children[0].prefix = prefix
|
266 |
|
267 | prefix = property(_prefix_getter, _prefix_setter)
|
268 |
|
269 | def set_child(self, i, child):
|
270 | """
|
271 | Equivalent to 'node.children[i] = child'. This method also sets the
|
272 | child's parent attribute appropriately.
|
273 | """
|
274 | child.parent = self
|
275 | self.children[i].parent = None
|
276 | self.children[i] = child
|
277 | self.changed()
|
278 |
|
279 | def insert_child(self, i, child):
|
280 | """
|
281 | Equivalent to 'node.children.insert(i, child)'. This method also sets
|
282 | the child's parent attribute appropriately.
|
283 | """
|
284 | child.parent = self
|
285 | self.children.insert(i, child)
|
286 | self.changed()
|
287 |
|
288 | def append_child(self, child):
|
289 | """
|
290 | Equivalent to 'node.children.append(child)'. This method also sets the
|
291 | child's parent attribute appropriately.
|
292 | """
|
293 | child.parent = self
|
294 | self.children.append(child)
|
295 | self.changed()
|
296 |
|
297 |
|
298 | class Leaf(Base):
|
299 |
|
300 | """Concrete implementation for leaf nodes."""
|
301 |
|
302 | # Default values for instance variables
|
303 | _prefix = "" # Whitespace and comments preceding this token in the input
|
304 | lineno = 0 # Line where this token starts in the input
|
305 | column = 0 # Column where this token tarts in the input
|
306 |
|
307 | def __init__(self, type, value,
|
308 | context=None,
|
309 | prefix=None,
|
310 | fixers_applied=[]):
|
311 | """
|
312 | Initializer.
|
313 |
|
314 | Takes a type constant (a token number < 256), a string value, and an
|
315 | optional context keyword argument.
|
316 | """
|
317 | assert 0 <= type < 256, type
|
318 | if context is not None:
|
319 | self._prefix, (self.lineno, self.column) = context
|
320 | self.type = type
|
321 | self.value = value
|
322 | if prefix is not None:
|
323 | self._prefix = prefix
|
324 | self.fixers_applied = fixers_applied[:]
|
325 |
|
326 | def __repr__(self):
|
327 | """Return a canonical string representation."""
|
328 | return "%s(%r, %r)" % (self.__class__.__name__,
|
329 | self.type,
|
330 | self.value)
|
331 |
|
332 | def PrettyPrint(self, f, depth=0):
|
333 | indent = ' ' * depth
|
334 | print("%s(%r, %r)" % (indent, self.type, self.value), file=f)
|
335 |
|
336 | def __unicode__(self):
|
337 | """
|
338 | Return a pretty string representation.
|
339 |
|
340 | This reproduces the input source exactly.
|
341 | """
|
342 | return self.prefix + str(self.value)
|
343 |
|
344 | if sys.version_info > (3, 0):
|
345 | __str__ = __unicode__
|
346 |
|
347 | def _eq(self, other):
|
348 | """Compare two nodes for equality."""
|
349 | return (self.type, self.value) == (other.type, other.value)
|
350 |
|
351 | def clone(self):
|
352 | """Return a cloned (deep) copy of self."""
|
353 | return Leaf(self.type, self.value,
|
354 | (self.prefix, (self.lineno, self.column)),
|
355 | fixers_applied=self.fixers_applied)
|
356 |
|
357 | def _prefix_getter(self):
|
358 | """
|
359 | The whitespace and comments preceding this token in the input.
|
360 | """
|
361 | return self._prefix
|
362 |
|
363 | def _prefix_setter(self, prefix):
|
364 | self.changed()
|
365 | self._prefix = prefix
|
366 |
|
367 | prefix = property(_prefix_getter, _prefix_setter)
|
368 |
|
369 | def convert(gr, raw_node):
|
370 | """
|
371 | Convert raw node information to a Node or Leaf instance.
|
372 |
|
373 | This is passed to the parser driver which calls it whenever a reduction of a
|
374 | grammar rule produces a new complete node, so that the tree is build
|
375 | strictly bottom-up.
|
376 | """
|
377 | type, value, context, children = raw_node
|
378 | if children or type in gr.number2symbol:
|
379 | # If there's exactly one child, return that child instead of
|
380 | # creating a new node.
|
381 | if len(children) == 1:
|
382 | return children[0]
|
383 | return Node(type, children, context=context)
|
384 | else:
|
385 | return Leaf(type, value, context=context)
|
386 |
|
387 |
|
388 | class BasePattern(object):
|
389 |
|
390 | """
|
391 | A pattern is a tree matching pattern.
|
392 |
|
393 | It looks for a specific node type (token or symbol), and
|
394 | optionally for a specific content.
|
395 |
|
396 | This is an abstract base class. There are three concrete
|
397 | subclasses:
|
398 |
|
399 | - LeafPattern matches a single leaf node;
|
400 | - NodePattern matches a single node (usually non-leaf);
|
401 | - WildcardPattern matches a sequence of nodes of variable length.
|
402 | """
|
403 |
|
404 | # Defaults for instance variables
|
405 | type = None # Node type (token if < 256, symbol if >= 256)
|
406 | content = None # Optional content matching pattern
|
407 | name = None # Optional name used to store match in results dict
|
408 |
|
409 | def __new__(cls, *args, **kwds):
|
410 | """Constructor that prevents BasePattern from being instantiated."""
|
411 | assert cls is not BasePattern, "Cannot instantiate BasePattern"
|
412 | return object.__new__(cls)
|
413 |
|
414 | def __repr__(self):
|
415 | args = [type_repr(self.type), self.content, self.name]
|
416 | while args and args[-1] is None:
|
417 | del args[-1]
|
418 | return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
|
419 |
|
420 | def optimize(self):
|
421 | """
|
422 | A subclass can define this as a hook for optimizations.
|
423 |
|
424 | Returns either self or another node with the same effect.
|
425 | """
|
426 | return self
|
427 |
|
428 | def match(self, node, results=None):
|
429 | """
|
430 | Does this pattern exactly match a node?
|
431 |
|
432 | Returns True if it matches, False if not.
|
433 |
|
434 | If results is not None, it must be a dict which will be
|
435 | updated with the nodes matching named subpatterns.
|
436 |
|
437 | Default implementation for non-wildcard patterns.
|
438 | """
|
439 | if self.type is not None and node.type != self.type:
|
440 | return False
|
441 | if self.content is not None:
|
442 | r = None
|
443 | if results is not None:
|
444 | r = {}
|
445 | if not self._submatch(node, r):
|
446 | return False
|
447 | if r:
|
448 | results.update(r)
|
449 | if results is not None and self.name:
|
450 | results[self.name] = node
|
451 | return True
|
452 |
|
453 | def match_seq(self, nodes, results=None):
|
454 | """
|
455 | Does this pattern exactly match a sequence of nodes?
|
456 |
|
457 | Default implementation for non-wildcard patterns.
|
458 | """
|
459 | if len(nodes) != 1:
|
460 | return False
|
461 | return self.match(nodes[0], results)
|
462 |
|
463 | def generate_matches(self, nodes):
|
464 | """
|
465 | Generator yielding all matches for this pattern.
|
466 |
|
467 | Default implementation for non-wildcard patterns.
|
468 | """
|
469 | r = {}
|
470 | if nodes and self.match(nodes[0], r):
|
471 | yield 1, r
|
472 |
|
473 |
|
474 | class LeafPattern(BasePattern):
|
475 |
|
476 | def __init__(self, type=None, content=None, name=None):
|
477 | """
|
478 | Initializer. Takes optional type, content, and name.
|
479 |
|
480 | The type, if given must be a token type (< 256). If not given,
|
481 | this matches any *leaf* node; the content may still be required.
|
482 |
|
483 | The content, if given, must be a string.
|
484 |
|
485 | If a name is given, the matching node is stored in the results
|
486 | dict under that key.
|
487 | """
|
488 | if type is not None:
|
489 | assert 0 <= type < 256, type
|
490 | if content is not None:
|
491 | assert isinstance(content, str), repr(content)
|
492 | self.type = type
|
493 | self.content = content
|
494 | self.name = name
|
495 |
|
496 | def match(self, node, results=None):
|
497 | """Override match() to insist on a leaf node."""
|
498 | if not isinstance(node, Leaf):
|
499 | return False
|
500 | return BasePattern.match(self, node, results)
|
501 |
|
502 | def _submatch(self, node, results=None):
|
503 | """
|
504 | Match the pattern's content to the node's children.
|
505 |
|
506 | This assumes the node type matches and self.content is not None.
|
507 |
|
508 | Returns True if it matches, False if not.
|
509 |
|
510 | If results is not None, it must be a dict which will be
|
511 | updated with the nodes matching named subpatterns.
|
512 |
|
513 | When returning False, the results dict may still be updated.
|
514 | """
|
515 | return self.content == node.value
|
516 |
|
517 |
|
518 | class NodePattern(BasePattern):
|
519 |
|
520 | wildcards = False
|
521 |
|
522 | def __init__(self, type=None, content=None, name=None):
|
523 | """
|
524 | Initializer. Takes optional type, content, and name.
|
525 |
|
526 | The type, if given, must be a symbol type (>= 256). If the
|
527 | type is None this matches *any* single node (leaf or not),
|
528 | except if content is not None, in which it only matches
|
529 | non-leaf nodes that also match the content pattern.
|
530 |
|
531 | The content, if not None, must be a sequence of Patterns that
|
532 | must match the node's children exactly. If the content is
|
533 | given, the type must not be None.
|
534 |
|
535 | If a name is given, the matching node is stored in the results
|
536 | dict under that key.
|
537 | """
|
538 | if type is not None:
|
539 | assert type >= 256, type
|
540 | if content is not None:
|
541 | assert not isinstance(content, str), repr(content)
|
542 | content = list(content)
|
543 | for i, item in enumerate(content):
|
544 | assert isinstance(item, BasePattern), (i, item)
|
545 | if isinstance(item, WildcardPattern):
|
546 | self.wildcards = True
|
547 | self.type = type
|
548 | self.content = content
|
549 | self.name = name
|
550 |
|
551 | def _submatch(self, node, results=None):
|
552 | """
|
553 | Match the pattern's content to the node's children.
|
554 |
|
555 | This assumes the node type matches and self.content is not None.
|
556 |
|
557 | Returns True if it matches, False if not.
|
558 |
|
559 | If results is not None, it must be a dict which will be
|
560 | updated with the nodes matching named subpatterns.
|
561 |
|
562 | When returning False, the results dict may still be updated.
|
563 | """
|
564 | if self.wildcards:
|
565 | for c, r in generate_matches(self.content, node.children):
|
566 | if c == len(node.children):
|
567 | if results is not None:
|
568 | results.update(r)
|
569 | return True
|
570 | return False
|
571 | if len(self.content) != len(node.children):
|
572 | return False
|
573 | for subpattern, child in zip(self.content, node.children):
|
574 | if not subpattern.match(child, results):
|
575 | return False
|
576 | return True
|
577 |
|
578 |
|
579 | class WildcardPattern(BasePattern):
|
580 |
|
581 | """
|
582 | A wildcard pattern can match zero or more nodes.
|
583 |
|
584 | This has all the flexibility needed to implement patterns like:
|
585 |
|
586 | .* .+ .? .{m,n}
|
587 | (a b c | d e | f)
|
588 | (...)* (...)+ (...)? (...){m,n}
|
589 |
|
590 | except it always uses non-greedy matching.
|
591 | """
|
592 |
|
593 | def __init__(self, content=None, min=0, max=HUGE, name=None):
|
594 | """
|
595 | Initializer.
|
596 |
|
597 | Args:
|
598 | content: optional sequence of subsequences of patterns;
|
599 | if absent, matches one node;
|
600 | if present, each subsequence is an alternative [*]
|
601 | min: optional minimum number of times to match, default 0
|
602 | max: optional maximum number of times to match, default HUGE
|
603 | name: optional name assigned to this match
|
604 |
|
605 | [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
|
606 | equivalent to (a b c | d e | f g h); if content is None,
|
607 | this is equivalent to '.' in regular expression terms.
|
608 | The min and max parameters work as follows:
|
609 | min=0, max=maxint: .*
|
610 | min=1, max=maxint: .+
|
611 | min=0, max=1: .?
|
612 | min=1, max=1: .
|
613 | If content is not None, replace the dot with the parenthesized
|
614 | list of alternatives, e.g. (a b c | d e | f g h)*
|
615 | """
|
616 | assert 0 <= min <= max <= HUGE, (min, max)
|
617 | if content is not None:
|
618 | content = tuple(map(tuple, content)) # Protect against alterations
|
619 | # Check sanity of alternatives
|
620 | assert len(content), repr(content) # Can't have zero alternatives
|
621 | for alt in content:
|
622 | assert len(alt), repr(alt) # Can have empty alternatives
|
623 | self.content = content
|
624 | self.min = min
|
625 | self.max = max
|
626 | self.name = name
|
627 |
|
628 | def optimize(self):
|
629 | """Optimize certain stacked wildcard patterns."""
|
630 | subpattern = None
|
631 | if (self.content is not None and
|
632 | len(self.content) == 1 and len(self.content[0]) == 1):
|
633 | subpattern = self.content[0][0]
|
634 | if self.min == 1 and self.max == 1:
|
635 | if self.content is None:
|
636 | return NodePattern(name=self.name)
|
637 | if subpattern is not None and self.name == subpattern.name:
|
638 | return subpattern.optimize()
|
639 | if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
|
640 | subpattern.min <= 1 and self.name == subpattern.name):
|
641 | return WildcardPattern(subpattern.content,
|
642 | self.min*subpattern.min,
|
643 | self.max*subpattern.max,
|
644 | subpattern.name)
|
645 | return self
|
646 |
|
647 | def match(self, node, results=None):
|
648 | """Does this pattern exactly match a node?"""
|
649 | return self.match_seq([node], results)
|
650 |
|
651 | def match_seq(self, nodes, results=None):
|
652 | """Does this pattern exactly match a sequence of nodes?"""
|
653 | for c, r in self.generate_matches(nodes):
|
654 | if c == len(nodes):
|
655 | if results is not None:
|
656 | results.update(r)
|
657 | if self.name:
|
658 | results[self.name] = list(nodes)
|
659 | return True
|
660 | return False
|
661 |
|
662 | def generate_matches(self, nodes):
|
663 | """
|
664 | Generator yielding matches for a sequence of nodes.
|
665 |
|
666 | Args:
|
667 | nodes: sequence of nodes
|
668 |
|
669 | Yields:
|
670 | (count, results) tuples where:
|
671 | count: the match comprises nodes[:count];
|
672 | results: dict containing named submatches.
|
673 | """
|
674 | if self.content is None:
|
675 | # Shortcut for special case (see __init__.__doc__)
|
676 | for count in range(self.min, 1 + min(len(nodes), self.max)):
|
677 | r = {}
|
678 | if self.name:
|
679 | r[self.name] = nodes[:count]
|
680 | yield count, r
|
681 | elif self.name == "bare_name":
|
682 | yield self._bare_name_matches(nodes)
|
683 | else:
|
684 | # The reason for this is that hitting the recursion limit usually
|
685 | # results in some ugly messages about how RuntimeErrors are being
|
686 | # ignored. We only have to do this on CPython, though, because other
|
687 | # implementations don't have this nasty bug in the first place.
|
688 | if hasattr(sys, "getrefcount"):
|
689 | save_stderr = sys.stderr
|
690 | sys.stderr = StringIO()
|
691 | try:
|
692 | for count, r in self._recursive_matches(nodes, 0):
|
693 | if self.name:
|
694 | r[self.name] = nodes[:count]
|
695 | yield count, r
|
696 | except RuntimeError:
|
697 | # We fall back to the iterative pattern matching scheme if the recursive
|
698 | # scheme hits the recursion limit.
|
699 | for count, r in self._iterative_matches(nodes):
|
700 | if self.name:
|
701 | r[self.name] = nodes[:count]
|
702 | yield count, r
|
703 | finally:
|
704 | if hasattr(sys, "getrefcount"):
|
705 | sys.stderr = save_stderr
|
706 |
|
707 | def _iterative_matches(self, nodes):
|
708 | """Helper to iteratively yield the matches."""
|
709 | nodelen = len(nodes)
|
710 | if 0 >= self.min:
|
711 | yield 0, {}
|
712 |
|
713 | results = []
|
714 | # generate matches that use just one alt from self.content
|
715 | for alt in self.content:
|
716 | for c, r in generate_matches(alt, nodes):
|
717 | yield c, r
|
718 | results.append((c, r))
|
719 |
|
720 | # for each match, iterate down the nodes
|
721 | while results:
|
722 | new_results = []
|
723 | for c0, r0 in results:
|
724 | # stop if the entire set of nodes has been matched
|
725 | if c0 < nodelen and c0 <= self.max:
|
726 | for alt in self.content:
|
727 | for c1, r1 in generate_matches(alt, nodes[c0:]):
|
728 | if c1 > 0:
|
729 | r = {}
|
730 | r.update(r0)
|
731 | r.update(r1)
|
732 | yield c0 + c1, r
|
733 | new_results.append((c0 + c1, r))
|
734 | results = new_results
|
735 |
|
736 | def _bare_name_matches(self, nodes):
|
737 | """Special optimized matcher for bare_name."""
|
738 | count = 0
|
739 | r = {}
|
740 | done = False
|
741 | max = len(nodes)
|
742 | while not done and count < max:
|
743 | done = True
|
744 | for leaf in self.content:
|
745 | if leaf[0].match(nodes[count], r):
|
746 | count += 1
|
747 | done = False
|
748 | break
|
749 | r[self.name] = nodes[:count]
|
750 | return count, r
|
751 |
|
752 | def _recursive_matches(self, nodes, count):
|
753 | """Helper to recursively yield the matches."""
|
754 | assert self.content is not None
|
755 | if count >= self.min:
|
756 | yield 0, {}
|
757 | if count < self.max:
|
758 | for alt in self.content:
|
759 | for c0, r0 in generate_matches(alt, nodes):
|
760 | for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
|
761 | r = {}
|
762 | r.update(r0)
|
763 | r.update(r1)
|
764 | yield c0 + c1, r
|
765 |
|
766 |
|
767 | class NegatedPattern(BasePattern):
|
768 |
|
769 | def __init__(self, content=None):
|
770 | """
|
771 | Initializer.
|
772 |
|
773 | The argument is either a pattern or None. If it is None, this
|
774 | only matches an empty sequence (effectively '$' in regex
|
775 | lingo). If it is not None, this matches whenever the argument
|
776 | pattern doesn't have any matches.
|
777 | """
|
778 | if content is not None:
|
779 | assert isinstance(content, BasePattern), repr(content)
|
780 | self.content = content
|
781 |
|
782 | def match(self, node):
|
783 | # We never match a node in its entirety
|
784 | return False
|
785 |
|
786 | def match_seq(self, nodes):
|
787 | # We only match an empty sequence of nodes in its entirety
|
788 | return len(nodes) == 0
|
789 |
|
790 | def generate_matches(self, nodes):
|
791 | if self.content is None:
|
792 | # Return a match if there is an empty sequence
|
793 | if len(nodes) == 0:
|
794 | yield 0, {}
|
795 | else:
|
796 | # Return a match if the argument pattern has no matches
|
797 | for c, r in self.content.generate_matches(nodes):
|
798 | return
|
799 | yield 0, {}
|
800 |
|
801 |
|
802 | def generate_matches(patterns, nodes):
|
803 | """
|
804 | Generator yielding matches for a sequence of patterns and nodes.
|
805 |
|
806 | Args:
|
807 | patterns: a sequence of patterns
|
808 | nodes: a sequence of nodes
|
809 |
|
810 | Yields:
|
811 | (count, results) tuples where:
|
812 | count: the entire sequence of patterns matches nodes[:count];
|
813 | results: dict containing named submatches.
|
814 | """
|
815 | if not patterns:
|
816 | yield 0, {}
|
817 | else:
|
818 | p, rest = patterns[0], patterns[1:]
|
819 | for c0, r0 in p.generate_matches(nodes):
|
820 | if not rest:
|
821 | yield c0, r0
|
822 | else:
|
823 | for c1, r1 in generate_matches(rest, nodes[c0:]):
|
824 | r = {}
|
825 | r.update(r0)
|
826 | r.update(r1)
|
827 | yield c0 + c1, r
|