OILS / opy / pytree.py View on Github | oilshell.org

808 lines, 422 significant
1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3from __future__ import print_function
4"""
5Python parse tree definitions.
6
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
15import sys
16import cStringIO
17
18HUGE = 0x7FFFFFFF # maximum repeat count, default max
19
20_type_reprs = {}
21
22def type_repr(type_num):
23 return _type_reprs[type_num]
24
25
26def Init(python_symbols):
27 # printing tokens is possible but not as useful
28 # from .pgen2 import token // token.__dict__.items():
29
30 # Fill in the dict.
31 global _type_reprs
32 for name, val in python_symbols.__dict__.items():
33 if type(val) == int:
34 _type_reprs[val] = name
35
36
37class Base(object):
38
39 """
40 Abstract base class for Node and Leaf.
41
42 This provides some default functionality and boilerplate using the
43 template pattern.
44
45 A node may be a subnode of at most one parent.
46 """
47
48 # Default values for instance variables
49 type = None # int: token number (< 256) or symbol number (>= 256)
50 parent = None # Parent node pointer, or None
51 children = () # Tuple of subnodes
52 was_changed = False
53 was_checked = False
54
55 def __new__(cls, *args, **kwds):
56 """Constructor that prevents Base from being instantiated."""
57 assert cls is not Base, "Cannot instantiate Base"
58 return object.__new__(cls)
59
60 def __eq__(self, other):
61 """
62 Compare two nodes for equality.
63
64 This calls the method _eq().
65 """
66 if self.__class__ is not other.__class__:
67 return NotImplemented
68 return self._eq(other)
69
70 __hash__ = None # For Py3 compatibility.
71
72 def _eq(self, other):
73 """
74 Compare two nodes for equality.
75
76 This is called by __eq__ and __ne__. It is only called if the two nodes
77 have the same type. This must be implemented by the concrete subclass.
78 Nodes should be considered equal if they have the same structure,
79 ignoring the prefix string and other context information.
80 """
81 raise NotImplementedError()
82
83 def clone(self):
84 """
85 Return a cloned (deep) copy of self.
86
87 This must be implemented by the concrete subclass.
88 """
89 raise NotImplementedError()
90
91 def replace(self, new):
92 """Replace this node with a new one in the parent."""
93 assert self.parent is not None, str(self)
94 assert new is not None
95 if not isinstance(new, list):
96 new = [new]
97 l_children = []
98 found = False
99 for ch in self.parent.children:
100 if ch is self:
101 assert not found, (self.parent.children, self, new)
102 if new is not None:
103 l_children.extend(new)
104 found = True
105 else:
106 l_children.append(ch)
107 assert found, (self.children, self, new)
108 self.parent.changed()
109 self.parent.children = l_children
110 for x in new:
111 x.parent = self.parent
112 self.parent = None
113
114 def get_lineno(self):
115 """Return the line number which generated the invocant node."""
116 node = self
117 while not isinstance(node, Leaf):
118 if not node.children:
119 return
120 node = node.children[0]
121 return node.lineno
122
123 def changed(self):
124 if self.parent:
125 self.parent.changed()
126 self.was_changed = True
127
128 def remove(self):
129 """
130 Remove the node from the tree. Returns the position of the node in its
131 parent's children before it was removed.
132 """
133 if self.parent:
134 for i, node in enumerate(self.parent.children):
135 if node is self:
136 self.parent.changed()
137 del self.parent.children[i]
138 self.parent = None
139 return i
140
141 @property
142 def next_sibling(self):
143 """
144 The node immediately following the invocant in their parent's children
145 list. If the invocant does not have a next sibling, it is None
146 """
147 if self.parent is None:
148 return None
149
150 # Can't use index(); we need to test by identity
151 for i, child in enumerate(self.parent.children):
152 if child is self:
153 try:
154 return self.parent.children[i+1]
155 except IndexError:
156 return None
157
158 @property
159 def prev_sibling(self):
160 """
161 The node immediately preceding the invocant in their parent's children
162 list. If the invocant does not have a previous sibling, it is None.
163 """
164 if self.parent is None:
165 return None
166
167 # Can't use index(); we need to test by identity
168 for i, child in enumerate(self.parent.children):
169 if child is self:
170 if i == 0:
171 return None
172 return self.parent.children[i-1]
173
174 def depth(self):
175 if self.parent is None:
176 return 0
177 return 1 + self.parent.depth()
178
179 def get_suffix(self):
180 """
181 Return the string immediately following the invocant node. This is
182 effectively equivalent to node.next_sibling.prefix
183 """
184 next_sib = self.next_sibling
185 if next_sib is None:
186 return ""
187 return next_sib.prefix
188
189 if sys.version_info < (3, 0):
190 def __str__(self):
191 return str(self).encode("ascii")
192
193class Node(Base):
194
195 """Concrete implementation for interior nodes."""
196
197 def __init__(self, type, children,
198 context=None,
199 prefix=None,
200 fixers_applied=None):
201 """
202 Initializer.
203
204 Takes a type constant (a symbol number >= 256), a sequence of
205 child nodes, and an optional context keyword argument.
206
207 As a side effect, the parent pointers of the children are updated.
208 """
209 assert type >= 256, type
210 self.type = type
211 self.children = list(children)
212 for ch in self.children:
213 assert ch.parent is None, repr(ch)
214 ch.parent = self
215 if prefix is not None:
216 self.prefix = prefix
217 if fixers_applied:
218 self.fixers_applied = fixers_applied[:]
219 else:
220 self.fixers_applied = None
221
222 def __repr__(self):
223 """Return a canonical string representation."""
224 return "%s(%s, %r)" % (self.__class__.__name__,
225 type_repr(self.type),
226 self.children)
227
228 def PrettyPrint(self, f, depth=0):
229 indent = ' ' * depth
230 print(indent + type_repr(self.type), file=f)
231 for c in self.children:
232 c.PrettyPrint(f, depth+1)
233
234 def __unicode__(self):
235 """
236 Return a pretty string representation.
237
238 This reproduces the input source exactly.
239 """
240 return "".join(map(str, self.children))
241
242 if sys.version_info > (3, 0):
243 __str__ = __unicode__
244
245 def _eq(self, other):
246 """Compare two nodes for equality."""
247 return (self.type, self.children) == (other.type, other.children)
248
249 def clone(self):
250 """Return a cloned (deep) copy of self."""
251 return Node(self.type, [ch.clone() for ch in self.children],
252 fixers_applied=self.fixers_applied)
253
254 def _prefix_getter(self):
255 """
256 The whitespace and comments preceding this node in the input.
257 """
258 if not self.children:
259 return ""
260 return self.children[0].prefix
261
262 def _prefix_setter(self, prefix):
263 if self.children:
264 self.children[0].prefix = prefix
265
266 prefix = property(_prefix_getter, _prefix_setter)
267
268 def set_child(self, i, child):
269 """
270 Equivalent to 'node.children[i] = child'. This method also sets the
271 child's parent attribute appropriately.
272 """
273 child.parent = self
274 self.children[i].parent = None
275 self.children[i] = child
276 self.changed()
277
278 def insert_child(self, i, child):
279 """
280 Equivalent to 'node.children.insert(i, child)'. This method also sets
281 the child's parent attribute appropriately.
282 """
283 child.parent = self
284 self.children.insert(i, child)
285 self.changed()
286
287 def append_child(self, child):
288 """
289 Equivalent to 'node.children.append(child)'. This method also sets the
290 child's parent attribute appropriately.
291 """
292 child.parent = self
293 self.children.append(child)
294 self.changed()
295
296
297class Leaf(Base):
298
299 """Concrete implementation for leaf nodes."""
300
301 # Default values for instance variables
302 _prefix = "" # Whitespace and comments preceding this token in the input
303 lineno = 0 # Line where this token starts in the input
304 column = 0 # Column where this token tarts in the input
305
306 def __init__(self, type, value,
307 context=None,
308 prefix=None,
309 fixers_applied=[]):
310 """
311 Initializer.
312
313 Takes a type constant (a token number < 256), a string value, and an
314 optional context keyword argument.
315 """
316 assert 0 <= type < 256, type
317 if context is not None:
318 self._prefix, (self.lineno, self.column) = context
319 self.type = type
320 self.value = value
321 if prefix is not None:
322 self._prefix = prefix
323 self.fixers_applied = fixers_applied[:]
324
325 def __repr__(self):
326 """Return a canonical string representation."""
327 return "%s(%r, %r)" % (self.__class__.__name__,
328 self.type,
329 self.value)
330
331 def PrettyPrint(self, f, depth=0):
332 indent = ' ' * depth
333 print("%s(%r, %r)" % (indent, self.type, self.value), file=f)
334
335 def __unicode__(self):
336 """
337 Return a pretty string representation.
338
339 This reproduces the input source exactly.
340 """
341 return self.prefix + str(self.value)
342
343 if sys.version_info > (3, 0):
344 __str__ = __unicode__
345
346 def _eq(self, other):
347 """Compare two nodes for equality."""
348 return (self.type, self.value) == (other.type, other.value)
349
350 def clone(self):
351 """Return a cloned (deep) copy of self."""
352 return Leaf(self.type, self.value,
353 (self.prefix, (self.lineno, self.column)),
354 fixers_applied=self.fixers_applied)
355
356 def _prefix_getter(self):
357 """
358 The whitespace and comments preceding this token in the input.
359 """
360 return self._prefix
361
362 def _prefix_setter(self, prefix):
363 self.changed()
364 self._prefix = prefix
365
366 prefix = property(_prefix_getter, _prefix_setter)
367
368
369class BasePattern(object):
370
371 """
372 A pattern is a tree matching pattern.
373
374 It looks for a specific node type (token or symbol), and
375 optionally for a specific content.
376
377 This is an abstract base class. There are three concrete
378 subclasses:
379
380 - LeafPattern matches a single leaf node;
381 - NodePattern matches a single node (usually non-leaf);
382 - WildcardPattern matches a sequence of nodes of variable length.
383 """
384
385 # Defaults for instance variables
386 type = None # Node type (token if < 256, symbol if >= 256)
387 content = None # Optional content matching pattern
388 name = None # Optional name used to store match in results dict
389
390 def __new__(cls, *args, **kwds):
391 """Constructor that prevents BasePattern from being instantiated."""
392 assert cls is not BasePattern, "Cannot instantiate BasePattern"
393 return object.__new__(cls)
394
395 def __repr__(self):
396 args = [type_repr(self.type), self.content, self.name]
397 while args and args[-1] is None:
398 del args[-1]
399 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
400
401 def optimize(self):
402 """
403 A subclass can define this as a hook for optimizations.
404
405 Returns either self or another node with the same effect.
406 """
407 return self
408
409 def match(self, node, results=None):
410 """
411 Does this pattern exactly match a node?
412
413 Returns True if it matches, False if not.
414
415 If results is not None, it must be a dict which will be
416 updated with the nodes matching named subpatterns.
417
418 Default implementation for non-wildcard patterns.
419 """
420 if self.type is not None and node.type != self.type:
421 return False
422 if self.content is not None:
423 r = None
424 if results is not None:
425 r = {}
426 if not self._submatch(node, r):
427 return False
428 if r:
429 results.update(r)
430 if results is not None and self.name:
431 results[self.name] = node
432 return True
433
434 def match_seq(self, nodes, results=None):
435 """
436 Does this pattern exactly match a sequence of nodes?
437
438 Default implementation for non-wildcard patterns.
439 """
440 if len(nodes) != 1:
441 return False
442 return self.match(nodes[0], results)
443
444 def generate_matches(self, nodes):
445 """
446 Generator yielding all matches for this pattern.
447
448 Default implementation for non-wildcard patterns.
449 """
450 r = {}
451 if nodes and self.match(nodes[0], r):
452 yield 1, r
453
454
455class LeafPattern(BasePattern):
456
457 def __init__(self, type=None, content=None, name=None):
458 """
459 Initializer. Takes optional type, content, and name.
460
461 The type, if given must be a token type (< 256). If not given,
462 this matches any *leaf* node; the content may still be required.
463
464 The content, if given, must be a string.
465
466 If a name is given, the matching node is stored in the results
467 dict under that key.
468 """
469 if type is not None:
470 assert 0 <= type < 256, type
471 if content is not None:
472 assert isinstance(content, str), repr(content)
473 self.type = type
474 self.content = content
475 self.name = name
476
477 def match(self, node, results=None):
478 """Override match() to insist on a leaf node."""
479 if not isinstance(node, Leaf):
480 return False
481 return BasePattern.match(self, node, results)
482
483 def _submatch(self, node, results=None):
484 """
485 Match the pattern's content to the node's children.
486
487 This assumes the node type matches and self.content is not None.
488
489 Returns True if it matches, False if not.
490
491 If results is not None, it must be a dict which will be
492 updated with the nodes matching named subpatterns.
493
494 When returning False, the results dict may still be updated.
495 """
496 return self.content == node.value
497
498
499class NodePattern(BasePattern):
500
501 wildcards = False
502
503 def __init__(self, type=None, content=None, name=None):
504 """
505 Initializer. Takes optional type, content, and name.
506
507 The type, if given, must be a symbol type (>= 256). If the
508 type is None this matches *any* single node (leaf or not),
509 except if content is not None, in which it only matches
510 non-leaf nodes that also match the content pattern.
511
512 The content, if not None, must be a sequence of Patterns that
513 must match the node's children exactly. If the content is
514 given, the type must not be None.
515
516 If a name is given, the matching node is stored in the results
517 dict under that key.
518 """
519 if type is not None:
520 assert type >= 256, type
521 if content is not None:
522 assert not isinstance(content, str), repr(content)
523 content = list(content)
524 for i, item in enumerate(content):
525 assert isinstance(item, BasePattern), (i, item)
526 if isinstance(item, WildcardPattern):
527 self.wildcards = True
528 self.type = type
529 self.content = content
530 self.name = name
531
532 def _submatch(self, node, results=None):
533 """
534 Match the pattern's content to the node's children.
535
536 This assumes the node type matches and self.content is not None.
537
538 Returns True if it matches, False if not.
539
540 If results is not None, it must be a dict which will be
541 updated with the nodes matching named subpatterns.
542
543 When returning False, the results dict may still be updated.
544 """
545 if self.wildcards:
546 for c, r in generate_matches(self.content, node.children):
547 if c == len(node.children):
548 if results is not None:
549 results.update(r)
550 return True
551 return False
552 if len(self.content) != len(node.children):
553 return False
554 for subpattern, child in zip(self.content, node.children):
555 if not subpattern.match(child, results):
556 return False
557 return True
558
559
560class WildcardPattern(BasePattern):
561
562 """
563 A wildcard pattern can match zero or more nodes.
564
565 This has all the flexibility needed to implement patterns like:
566
567 .* .+ .? .{m,n}
568 (a b c | d e | f)
569 (...)* (...)+ (...)? (...){m,n}
570
571 except it always uses non-greedy matching.
572 """
573
574 def __init__(self, content=None, min=0, max=HUGE, name=None):
575 """
576 Initializer.
577
578 Args:
579 content: optional sequence of subsequences of patterns;
580 if absent, matches one node;
581 if present, each subsequence is an alternative [*]
582 min: optional minimum number of times to match, default 0
583 max: optional maximum number of times to match, default HUGE
584 name: optional name assigned to this match
585
586 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
587 equivalent to (a b c | d e | f g h); if content is None,
588 this is equivalent to '.' in regular expression terms.
589 The min and max parameters work as follows:
590 min=0, max=maxint: .*
591 min=1, max=maxint: .+
592 min=0, max=1: .?
593 min=1, max=1: .
594 If content is not None, replace the dot with the parenthesized
595 list of alternatives, e.g. (a b c | d e | f g h)*
596 """
597 assert 0 <= min <= max <= HUGE, (min, max)
598 if content is not None:
599 content = tuple(map(tuple, content)) # Protect against alterations
600 # Check sanity of alternatives
601 assert len(content), repr(content) # Can't have zero alternatives
602 for alt in content:
603 assert len(alt), repr(alt) # Can have empty alternatives
604 self.content = content
605 self.min = min
606 self.max = max
607 self.name = name
608
609 def optimize(self):
610 """Optimize certain stacked wildcard patterns."""
611 subpattern = None
612 if (self.content is not None and
613 len(self.content) == 1 and len(self.content[0]) == 1):
614 subpattern = self.content[0][0]
615 if self.min == 1 and self.max == 1:
616 if self.content is None:
617 return NodePattern(name=self.name)
618 if subpattern is not None and self.name == subpattern.name:
619 return subpattern.optimize()
620 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
621 subpattern.min <= 1 and self.name == subpattern.name):
622 return WildcardPattern(subpattern.content,
623 self.min*subpattern.min,
624 self.max*subpattern.max,
625 subpattern.name)
626 return self
627
628 def match(self, node, results=None):
629 """Does this pattern exactly match a node?"""
630 return self.match_seq([node], results)
631
632 def match_seq(self, nodes, results=None):
633 """Does this pattern exactly match a sequence of nodes?"""
634 for c, r in self.generate_matches(nodes):
635 if c == len(nodes):
636 if results is not None:
637 results.update(r)
638 if self.name:
639 results[self.name] = list(nodes)
640 return True
641 return False
642
643 def generate_matches(self, nodes):
644 """
645 Generator yielding matches for a sequence of nodes.
646
647 Args:
648 nodes: sequence of nodes
649
650 Yields:
651 (count, results) tuples where:
652 count: the match comprises nodes[:count];
653 results: dict containing named submatches.
654 """
655 if self.content is None:
656 # Shortcut for special case (see __init__.__doc__)
657 for count in range(self.min, 1 + min(len(nodes), self.max)):
658 r = {}
659 if self.name:
660 r[self.name] = nodes[:count]
661 yield count, r
662 elif self.name == "bare_name":
663 yield self._bare_name_matches(nodes)
664 else:
665 # The reason for this is that hitting the recursion limit usually
666 # results in some ugly messages about how RuntimeErrors are being
667 # ignored. We only have to do this on CPython, though, because other
668 # implementations don't have this nasty bug in the first place.
669 if hasattr(sys, "getrefcount"):
670 save_stderr = sys.stderr
671 sys.stderr = cStringIO.StringIO()
672 try:
673 for count, r in self._recursive_matches(nodes, 0):
674 if self.name:
675 r[self.name] = nodes[:count]
676 yield count, r
677 except RuntimeError:
678 # We fall back to the iterative pattern matching scheme if the recursive
679 # scheme hits the recursion limit.
680 for count, r in self._iterative_matches(nodes):
681 if self.name:
682 r[self.name] = nodes[:count]
683 yield count, r
684 finally:
685 if hasattr(sys, "getrefcount"):
686 sys.stderr = save_stderr
687
688 def _iterative_matches(self, nodes):
689 """Helper to iteratively yield the matches."""
690 nodelen = len(nodes)
691 if 0 >= self.min:
692 yield 0, {}
693
694 results = []
695 # generate matches that use just one alt from self.content
696 for alt in self.content:
697 for c, r in generate_matches(alt, nodes):
698 yield c, r
699 results.append((c, r))
700
701 # for each match, iterate down the nodes
702 while results:
703 new_results = []
704 for c0, r0 in results:
705 # stop if the entire set of nodes has been matched
706 if c0 < nodelen and c0 <= self.max:
707 for alt in self.content:
708 for c1, r1 in generate_matches(alt, nodes[c0:]):
709 if c1 > 0:
710 r = {}
711 r.update(r0)
712 r.update(r1)
713 yield c0 + c1, r
714 new_results.append((c0 + c1, r))
715 results = new_results
716
717 def _bare_name_matches(self, nodes):
718 """Special optimized matcher for bare_name."""
719 count = 0
720 r = {}
721 done = False
722 max = len(nodes)
723 while not done and count < max:
724 done = True
725 for leaf in self.content:
726 if leaf[0].match(nodes[count], r):
727 count += 1
728 done = False
729 break
730 r[self.name] = nodes[:count]
731 return count, r
732
733 def _recursive_matches(self, nodes, count):
734 """Helper to recursively yield the matches."""
735 assert self.content is not None
736 if count >= self.min:
737 yield 0, {}
738 if count < self.max:
739 for alt in self.content:
740 for c0, r0 in generate_matches(alt, nodes):
741 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
742 r = {}
743 r.update(r0)
744 r.update(r1)
745 yield c0 + c1, r
746
747
748class NegatedPattern(BasePattern):
749
750 def __init__(self, content=None):
751 """
752 Initializer.
753
754 The argument is either a pattern or None. If it is None, this
755 only matches an empty sequence (effectively '$' in regex
756 lingo). If it is not None, this matches whenever the argument
757 pattern doesn't have any matches.
758 """
759 if content is not None:
760 assert isinstance(content, BasePattern), repr(content)
761 self.content = content
762
763 def match(self, node):
764 # We never match a node in its entirety
765 return False
766
767 def match_seq(self, nodes):
768 # We only match an empty sequence of nodes in its entirety
769 return len(nodes) == 0
770
771 def generate_matches(self, nodes):
772 if self.content is None:
773 # Return a match if there is an empty sequence
774 if len(nodes) == 0:
775 yield 0, {}
776 else:
777 # Return a match if the argument pattern has no matches
778 for c, r in self.content.generate_matches(nodes):
779 return
780 yield 0, {}
781
782
783def generate_matches(patterns, nodes):
784 """
785 Generator yielding matches for a sequence of patterns and nodes.
786
787 Args:
788 patterns: a sequence of patterns
789 nodes: a sequence of nodes
790
791 Yields:
792 (count, results) tuples where:
793 count: the entire sequence of patterns matches nodes[:count];
794 results: dict containing named submatches.
795 """
796 if not patterns:
797 yield 0, {}
798 else:
799 p, rest = patterns[0], patterns[1:]
800 for c0, r0 in p.generate_matches(nodes):
801 if not rest:
802 yield c0, r0
803 else:
804 for c1, r1 in generate_matches(rest, nodes[c0:]):
805 r = {}
806 r.update(r0)
807 r.update(r1)
808 yield c0 + c1, r