OILS / opy / _regtest / src / opy / pytree.py View on Github | oilshell.org

827 lines, 431 significant
1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3from __future__ import print_function
4"""
5Python parse tree definitions.
6
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
15import sys
16import warnings
17from io import StringIO
18
19HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
21_type_reprs = {}
22
23def type_repr(type_num):
24 return _type_reprs[type_num]
25
26
27def Init(python_symbols):
28 # printing tokens is possible but not as useful
29 # from .pgen2 import token // token.__dict__.items():
30
31 # Fill in the dict.
32 global _type_reprs
33 for name, val in python_symbols.__dict__.items():
34 if type(val) == int:
35 _type_reprs[val] = name
36
37
38class Base(object):
39
40 """
41 Abstract base class for Node and Leaf.
42
43 This provides some default functionality and boilerplate using the
44 template pattern.
45
46 A node may be a subnode of at most one parent.
47 """
48
49 # Default values for instance variables
50 type = None # int: token number (< 256) or symbol number (>= 256)
51 parent = None # Parent node pointer, or None
52 children = () # Tuple of subnodes
53 was_changed = False
54 was_checked = False
55
56 def __new__(cls, *args, **kwds):
57 """Constructor that prevents Base from being instantiated."""
58 assert cls is not Base, "Cannot instantiate Base"
59 return object.__new__(cls)
60
61 def __eq__(self, other):
62 """
63 Compare two nodes for equality.
64
65 This calls the method _eq().
66 """
67 if self.__class__ is not other.__class__:
68 return NotImplemented
69 return self._eq(other)
70
71 __hash__ = None # For Py3 compatibility.
72
73 def _eq(self, other):
74 """
75 Compare two nodes for equality.
76
77 This is called by __eq__ and __ne__. It is only called if the two nodes
78 have the same type. This must be implemented by the concrete subclass.
79 Nodes should be considered equal if they have the same structure,
80 ignoring the prefix string and other context information.
81 """
82 raise NotImplementedError
83
84 def clone(self):
85 """
86 Return a cloned (deep) copy of self.
87
88 This must be implemented by the concrete subclass.
89 """
90 raise NotImplementedError
91
92 def replace(self, new):
93 """Replace this node with a new one in the parent."""
94 assert self.parent is not None, str(self)
95 assert new is not None
96 if not isinstance(new, list):
97 new = [new]
98 l_children = []
99 found = False
100 for ch in self.parent.children:
101 if ch is self:
102 assert not found, (self.parent.children, self, new)
103 if new is not None:
104 l_children.extend(new)
105 found = True
106 else:
107 l_children.append(ch)
108 assert found, (self.children, self, new)
109 self.parent.changed()
110 self.parent.children = l_children
111 for x in new:
112 x.parent = self.parent
113 self.parent = None
114
115 def get_lineno(self):
116 """Return the line number which generated the invocant node."""
117 node = self
118 while not isinstance(node, Leaf):
119 if not node.children:
120 return
121 node = node.children[0]
122 return node.lineno
123
124 def changed(self):
125 if self.parent:
126 self.parent.changed()
127 self.was_changed = True
128
129 def remove(self):
130 """
131 Remove the node from the tree. Returns the position of the node in its
132 parent's children before it was removed.
133 """
134 if self.parent:
135 for i, node in enumerate(self.parent.children):
136 if node is self:
137 self.parent.changed()
138 del self.parent.children[i]
139 self.parent = None
140 return i
141
142 @property
143 def next_sibling(self):
144 """
145 The node immediately following the invocant in their parent's children
146 list. If the invocant does not have a next sibling, it is None
147 """
148 if self.parent is None:
149 return None
150
151 # Can't use index(); we need to test by identity
152 for i, child in enumerate(self.parent.children):
153 if child is self:
154 try:
155 return self.parent.children[i+1]
156 except IndexError:
157 return None
158
159 @property
160 def prev_sibling(self):
161 """
162 The node immediately preceding the invocant in their parent's children
163 list. If the invocant does not have a previous sibling, it is None.
164 """
165 if self.parent is None:
166 return None
167
168 # Can't use index(); we need to test by identity
169 for i, child in enumerate(self.parent.children):
170 if child is self:
171 if i == 0:
172 return None
173 return self.parent.children[i-1]
174
175 def depth(self):
176 if self.parent is None:
177 return 0
178 return 1 + self.parent.depth()
179
180 def get_suffix(self):
181 """
182 Return the string immediately following the invocant node. This is
183 effectively equivalent to node.next_sibling.prefix
184 """
185 next_sib = self.next_sibling
186 if next_sib is None:
187 return ""
188 return next_sib.prefix
189
190 if sys.version_info < (3, 0):
191 def __str__(self):
192 return str(self).encode("ascii")
193
194class Node(Base):
195
196 """Concrete implementation for interior nodes."""
197
198 def __init__(self, type, children,
199 context=None,
200 prefix=None,
201 fixers_applied=None):
202 """
203 Initializer.
204
205 Takes a type constant (a symbol number >= 256), a sequence of
206 child nodes, and an optional context keyword argument.
207
208 As a side effect, the parent pointers of the children are updated.
209 """
210 assert type >= 256, type
211 self.type = type
212 self.children = list(children)
213 for ch in self.children:
214 assert ch.parent is None, repr(ch)
215 ch.parent = self
216 if prefix is not None:
217 self.prefix = prefix
218 if fixers_applied:
219 self.fixers_applied = fixers_applied[:]
220 else:
221 self.fixers_applied = None
222
223 def __repr__(self):
224 """Return a canonical string representation."""
225 return "%s(%s, %r)" % (self.__class__.__name__,
226 type_repr(self.type),
227 self.children)
228
229 def PrettyPrint(self, f, depth=0):
230 indent = ' ' * depth
231 print(indent + type_repr(self.type), file=f)
232 for c in self.children:
233 c.PrettyPrint(f, depth+1)
234
235 def __unicode__(self):
236 """
237 Return a pretty string representation.
238
239 This reproduces the input source exactly.
240 """
241 return "".join(map(str, self.children))
242
243 if sys.version_info > (3, 0):
244 __str__ = __unicode__
245
246 def _eq(self, other):
247 """Compare two nodes for equality."""
248 return (self.type, self.children) == (other.type, other.children)
249
250 def clone(self):
251 """Return a cloned (deep) copy of self."""
252 return Node(self.type, [ch.clone() for ch in self.children],
253 fixers_applied=self.fixers_applied)
254
255 def _prefix_getter(self):
256 """
257 The whitespace and comments preceding this node in the input.
258 """
259 if not self.children:
260 return ""
261 return self.children[0].prefix
262
263 def _prefix_setter(self, prefix):
264 if self.children:
265 self.children[0].prefix = prefix
266
267 prefix = property(_prefix_getter, _prefix_setter)
268
269 def set_child(self, i, child):
270 """
271 Equivalent to 'node.children[i] = child'. This method also sets the
272 child's parent attribute appropriately.
273 """
274 child.parent = self
275 self.children[i].parent = None
276 self.children[i] = child
277 self.changed()
278
279 def insert_child(self, i, child):
280 """
281 Equivalent to 'node.children.insert(i, child)'. This method also sets
282 the child's parent attribute appropriately.
283 """
284 child.parent = self
285 self.children.insert(i, child)
286 self.changed()
287
288 def append_child(self, child):
289 """
290 Equivalent to 'node.children.append(child)'. This method also sets the
291 child's parent attribute appropriately.
292 """
293 child.parent = self
294 self.children.append(child)
295 self.changed()
296
297
298class Leaf(Base):
299
300 """Concrete implementation for leaf nodes."""
301
302 # Default values for instance variables
303 _prefix = "" # Whitespace and comments preceding this token in the input
304 lineno = 0 # Line where this token starts in the input
305 column = 0 # Column where this token tarts in the input
306
307 def __init__(self, type, value,
308 context=None,
309 prefix=None,
310 fixers_applied=[]):
311 """
312 Initializer.
313
314 Takes a type constant (a token number < 256), a string value, and an
315 optional context keyword argument.
316 """
317 assert 0 <= type < 256, type
318 if context is not None:
319 self._prefix, (self.lineno, self.column) = context
320 self.type = type
321 self.value = value
322 if prefix is not None:
323 self._prefix = prefix
324 self.fixers_applied = fixers_applied[:]
325
326 def __repr__(self):
327 """Return a canonical string representation."""
328 return "%s(%r, %r)" % (self.__class__.__name__,
329 self.type,
330 self.value)
331
332 def PrettyPrint(self, f, depth=0):
333 indent = ' ' * depth
334 print("%s(%r, %r)" % (indent, self.type, self.value), file=f)
335
336 def __unicode__(self):
337 """
338 Return a pretty string representation.
339
340 This reproduces the input source exactly.
341 """
342 return self.prefix + str(self.value)
343
344 if sys.version_info > (3, 0):
345 __str__ = __unicode__
346
347 def _eq(self, other):
348 """Compare two nodes for equality."""
349 return (self.type, self.value) == (other.type, other.value)
350
351 def clone(self):
352 """Return a cloned (deep) copy of self."""
353 return Leaf(self.type, self.value,
354 (self.prefix, (self.lineno, self.column)),
355 fixers_applied=self.fixers_applied)
356
357 def _prefix_getter(self):
358 """
359 The whitespace and comments preceding this token in the input.
360 """
361 return self._prefix
362
363 def _prefix_setter(self, prefix):
364 self.changed()
365 self._prefix = prefix
366
367 prefix = property(_prefix_getter, _prefix_setter)
368
369def convert(gr, raw_node):
370 """
371 Convert raw node information to a Node or Leaf instance.
372
373 This is passed to the parser driver which calls it whenever a reduction of a
374 grammar rule produces a new complete node, so that the tree is build
375 strictly bottom-up.
376 """
377 type, value, context, children = raw_node
378 if children or type in gr.number2symbol:
379 # If there's exactly one child, return that child instead of
380 # creating a new node.
381 if len(children) == 1:
382 return children[0]
383 return Node(type, children, context=context)
384 else:
385 return Leaf(type, value, context=context)
386
387
388class BasePattern(object):
389
390 """
391 A pattern is a tree matching pattern.
392
393 It looks for a specific node type (token or symbol), and
394 optionally for a specific content.
395
396 This is an abstract base class. There are three concrete
397 subclasses:
398
399 - LeafPattern matches a single leaf node;
400 - NodePattern matches a single node (usually non-leaf);
401 - WildcardPattern matches a sequence of nodes of variable length.
402 """
403
404 # Defaults for instance variables
405 type = None # Node type (token if < 256, symbol if >= 256)
406 content = None # Optional content matching pattern
407 name = None # Optional name used to store match in results dict
408
409 def __new__(cls, *args, **kwds):
410 """Constructor that prevents BasePattern from being instantiated."""
411 assert cls is not BasePattern, "Cannot instantiate BasePattern"
412 return object.__new__(cls)
413
414 def __repr__(self):
415 args = [type_repr(self.type), self.content, self.name]
416 while args and args[-1] is None:
417 del args[-1]
418 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
419
420 def optimize(self):
421 """
422 A subclass can define this as a hook for optimizations.
423
424 Returns either self or another node with the same effect.
425 """
426 return self
427
428 def match(self, node, results=None):
429 """
430 Does this pattern exactly match a node?
431
432 Returns True if it matches, False if not.
433
434 If results is not None, it must be a dict which will be
435 updated with the nodes matching named subpatterns.
436
437 Default implementation for non-wildcard patterns.
438 """
439 if self.type is not None and node.type != self.type:
440 return False
441 if self.content is not None:
442 r = None
443 if results is not None:
444 r = {}
445 if not self._submatch(node, r):
446 return False
447 if r:
448 results.update(r)
449 if results is not None and self.name:
450 results[self.name] = node
451 return True
452
453 def match_seq(self, nodes, results=None):
454 """
455 Does this pattern exactly match a sequence of nodes?
456
457 Default implementation for non-wildcard patterns.
458 """
459 if len(nodes) != 1:
460 return False
461 return self.match(nodes[0], results)
462
463 def generate_matches(self, nodes):
464 """
465 Generator yielding all matches for this pattern.
466
467 Default implementation for non-wildcard patterns.
468 """
469 r = {}
470 if nodes and self.match(nodes[0], r):
471 yield 1, r
472
473
474class LeafPattern(BasePattern):
475
476 def __init__(self, type=None, content=None, name=None):
477 """
478 Initializer. Takes optional type, content, and name.
479
480 The type, if given must be a token type (< 256). If not given,
481 this matches any *leaf* node; the content may still be required.
482
483 The content, if given, must be a string.
484
485 If a name is given, the matching node is stored in the results
486 dict under that key.
487 """
488 if type is not None:
489 assert 0 <= type < 256, type
490 if content is not None:
491 assert isinstance(content, str), repr(content)
492 self.type = type
493 self.content = content
494 self.name = name
495
496 def match(self, node, results=None):
497 """Override match() to insist on a leaf node."""
498 if not isinstance(node, Leaf):
499 return False
500 return BasePattern.match(self, node, results)
501
502 def _submatch(self, node, results=None):
503 """
504 Match the pattern's content to the node's children.
505
506 This assumes the node type matches and self.content is not None.
507
508 Returns True if it matches, False if not.
509
510 If results is not None, it must be a dict which will be
511 updated with the nodes matching named subpatterns.
512
513 When returning False, the results dict may still be updated.
514 """
515 return self.content == node.value
516
517
518class NodePattern(BasePattern):
519
520 wildcards = False
521
522 def __init__(self, type=None, content=None, name=None):
523 """
524 Initializer. Takes optional type, content, and name.
525
526 The type, if given, must be a symbol type (>= 256). If the
527 type is None this matches *any* single node (leaf or not),
528 except if content is not None, in which it only matches
529 non-leaf nodes that also match the content pattern.
530
531 The content, if not None, must be a sequence of Patterns that
532 must match the node's children exactly. If the content is
533 given, the type must not be None.
534
535 If a name is given, the matching node is stored in the results
536 dict under that key.
537 """
538 if type is not None:
539 assert type >= 256, type
540 if content is not None:
541 assert not isinstance(content, str), repr(content)
542 content = list(content)
543 for i, item in enumerate(content):
544 assert isinstance(item, BasePattern), (i, item)
545 if isinstance(item, WildcardPattern):
546 self.wildcards = True
547 self.type = type
548 self.content = content
549 self.name = name
550
551 def _submatch(self, node, results=None):
552 """
553 Match the pattern's content to the node's children.
554
555 This assumes the node type matches and self.content is not None.
556
557 Returns True if it matches, False if not.
558
559 If results is not None, it must be a dict which will be
560 updated with the nodes matching named subpatterns.
561
562 When returning False, the results dict may still be updated.
563 """
564 if self.wildcards:
565 for c, r in generate_matches(self.content, node.children):
566 if c == len(node.children):
567 if results is not None:
568 results.update(r)
569 return True
570 return False
571 if len(self.content) != len(node.children):
572 return False
573 for subpattern, child in zip(self.content, node.children):
574 if not subpattern.match(child, results):
575 return False
576 return True
577
578
579class WildcardPattern(BasePattern):
580
581 """
582 A wildcard pattern can match zero or more nodes.
583
584 This has all the flexibility needed to implement patterns like:
585
586 .* .+ .? .{m,n}
587 (a b c | d e | f)
588 (...)* (...)+ (...)? (...){m,n}
589
590 except it always uses non-greedy matching.
591 """
592
593 def __init__(self, content=None, min=0, max=HUGE, name=None):
594 """
595 Initializer.
596
597 Args:
598 content: optional sequence of subsequences of patterns;
599 if absent, matches one node;
600 if present, each subsequence is an alternative [*]
601 min: optional minimum number of times to match, default 0
602 max: optional maximum number of times to match, default HUGE
603 name: optional name assigned to this match
604
605 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
606 equivalent to (a b c | d e | f g h); if content is None,
607 this is equivalent to '.' in regular expression terms.
608 The min and max parameters work as follows:
609 min=0, max=maxint: .*
610 min=1, max=maxint: .+
611 min=0, max=1: .?
612 min=1, max=1: .
613 If content is not None, replace the dot with the parenthesized
614 list of alternatives, e.g. (a b c | d e | f g h)*
615 """
616 assert 0 <= min <= max <= HUGE, (min, max)
617 if content is not None:
618 content = tuple(map(tuple, content)) # Protect against alterations
619 # Check sanity of alternatives
620 assert len(content), repr(content) # Can't have zero alternatives
621 for alt in content:
622 assert len(alt), repr(alt) # Can have empty alternatives
623 self.content = content
624 self.min = min
625 self.max = max
626 self.name = name
627
628 def optimize(self):
629 """Optimize certain stacked wildcard patterns."""
630 subpattern = None
631 if (self.content is not None and
632 len(self.content) == 1 and len(self.content[0]) == 1):
633 subpattern = self.content[0][0]
634 if self.min == 1 and self.max == 1:
635 if self.content is None:
636 return NodePattern(name=self.name)
637 if subpattern is not None and self.name == subpattern.name:
638 return subpattern.optimize()
639 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
640 subpattern.min <= 1 and self.name == subpattern.name):
641 return WildcardPattern(subpattern.content,
642 self.min*subpattern.min,
643 self.max*subpattern.max,
644 subpattern.name)
645 return self
646
647 def match(self, node, results=None):
648 """Does this pattern exactly match a node?"""
649 return self.match_seq([node], results)
650
651 def match_seq(self, nodes, results=None):
652 """Does this pattern exactly match a sequence of nodes?"""
653 for c, r in self.generate_matches(nodes):
654 if c == len(nodes):
655 if results is not None:
656 results.update(r)
657 if self.name:
658 results[self.name] = list(nodes)
659 return True
660 return False
661
662 def generate_matches(self, nodes):
663 """
664 Generator yielding matches for a sequence of nodes.
665
666 Args:
667 nodes: sequence of nodes
668
669 Yields:
670 (count, results) tuples where:
671 count: the match comprises nodes[:count];
672 results: dict containing named submatches.
673 """
674 if self.content is None:
675 # Shortcut for special case (see __init__.__doc__)
676 for count in range(self.min, 1 + min(len(nodes), self.max)):
677 r = {}
678 if self.name:
679 r[self.name] = nodes[:count]
680 yield count, r
681 elif self.name == "bare_name":
682 yield self._bare_name_matches(nodes)
683 else:
684 # The reason for this is that hitting the recursion limit usually
685 # results in some ugly messages about how RuntimeErrors are being
686 # ignored. We only have to do this on CPython, though, because other
687 # implementations don't have this nasty bug in the first place.
688 if hasattr(sys, "getrefcount"):
689 save_stderr = sys.stderr
690 sys.stderr = StringIO()
691 try:
692 for count, r in self._recursive_matches(nodes, 0):
693 if self.name:
694 r[self.name] = nodes[:count]
695 yield count, r
696 except RuntimeError:
697 # We fall back to the iterative pattern matching scheme if the recursive
698 # scheme hits the recursion limit.
699 for count, r in self._iterative_matches(nodes):
700 if self.name:
701 r[self.name] = nodes[:count]
702 yield count, r
703 finally:
704 if hasattr(sys, "getrefcount"):
705 sys.stderr = save_stderr
706
707 def _iterative_matches(self, nodes):
708 """Helper to iteratively yield the matches."""
709 nodelen = len(nodes)
710 if 0 >= self.min:
711 yield 0, {}
712
713 results = []
714 # generate matches that use just one alt from self.content
715 for alt in self.content:
716 for c, r in generate_matches(alt, nodes):
717 yield c, r
718 results.append((c, r))
719
720 # for each match, iterate down the nodes
721 while results:
722 new_results = []
723 for c0, r0 in results:
724 # stop if the entire set of nodes has been matched
725 if c0 < nodelen and c0 <= self.max:
726 for alt in self.content:
727 for c1, r1 in generate_matches(alt, nodes[c0:]):
728 if c1 > 0:
729 r = {}
730 r.update(r0)
731 r.update(r1)
732 yield c0 + c1, r
733 new_results.append((c0 + c1, r))
734 results = new_results
735
736 def _bare_name_matches(self, nodes):
737 """Special optimized matcher for bare_name."""
738 count = 0
739 r = {}
740 done = False
741 max = len(nodes)
742 while not done and count < max:
743 done = True
744 for leaf in self.content:
745 if leaf[0].match(nodes[count], r):
746 count += 1
747 done = False
748 break
749 r[self.name] = nodes[:count]
750 return count, r
751
752 def _recursive_matches(self, nodes, count):
753 """Helper to recursively yield the matches."""
754 assert self.content is not None
755 if count >= self.min:
756 yield 0, {}
757 if count < self.max:
758 for alt in self.content:
759 for c0, r0 in generate_matches(alt, nodes):
760 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
761 r = {}
762 r.update(r0)
763 r.update(r1)
764 yield c0 + c1, r
765
766
767class NegatedPattern(BasePattern):
768
769 def __init__(self, content=None):
770 """
771 Initializer.
772
773 The argument is either a pattern or None. If it is None, this
774 only matches an empty sequence (effectively '$' in regex
775 lingo). If it is not None, this matches whenever the argument
776 pattern doesn't have any matches.
777 """
778 if content is not None:
779 assert isinstance(content, BasePattern), repr(content)
780 self.content = content
781
782 def match(self, node):
783 # We never match a node in its entirety
784 return False
785
786 def match_seq(self, nodes):
787 # We only match an empty sequence of nodes in its entirety
788 return len(nodes) == 0
789
790 def generate_matches(self, nodes):
791 if self.content is None:
792 # Return a match if there is an empty sequence
793 if len(nodes) == 0:
794 yield 0, {}
795 else:
796 # Return a match if the argument pattern has no matches
797 for c, r in self.content.generate_matches(nodes):
798 return
799 yield 0, {}
800
801
802def generate_matches(patterns, nodes):
803 """
804 Generator yielding matches for a sequence of patterns and nodes.
805
806 Args:
807 patterns: a sequence of patterns
808 nodes: a sequence of nodes
809
810 Yields:
811 (count, results) tuples where:
812 count: the entire sequence of patterns matches nodes[:count];
813 results: dict containing named submatches.
814 """
815 if not patterns:
816 yield 0, {}
817 else:
818 p, rest = patterns[0], patterns[1:]
819 for c0, r0 in p.generate_matches(nodes):
820 if not rest:
821 yield c0, r0
822 else:
823 for c1, r1 in generate_matches(rest, nodes[c0:]):
824 r = {}
825 r.update(r0)
826 r.update(r1)
827 yield c0 + c1, r