| 1 | # Copyright 2006 Google, Inc. All Rights Reserved.
 | 
| 2 | # Licensed to PSF under a Contributor Agreement.
 | 
| 3 | from __future__ import print_function
 | 
| 4 | """
 | 
| 5 | Python parse tree definitions.
 | 
| 6 | 
 | 
| 7 | This is a very concrete parse tree; we need to keep every token and
 | 
| 8 | even the comments and whitespace between tokens.
 | 
| 9 | 
 | 
| 10 | There's also a pattern matching implementation here.
 | 
| 11 | """
 | 
| 12 | 
 | 
| 13 | __author__ = "Guido van Rossum <guido@python.org>"
 | 
| 14 | 
 | 
| 15 | import sys
 | 
| 16 | import cStringIO
 | 
| 17 | 
 | 
| 18 | HUGE = 0x7FFFFFFF  # maximum repeat count, default max
 | 
| 19 | 
 | 
| 20 | _type_reprs = {}
 | 
| 21 | 
 | 
| 22 | def type_repr(type_num):
 | 
| 23 |     return _type_reprs[type_num]
 | 
| 24 | 
 | 
| 25 | 
 | 
| 26 | def Init(python_symbols):
 | 
| 27 |   # printing tokens is possible but not as useful
 | 
| 28 |   # from .pgen2 import token // token.__dict__.items():
 | 
| 29 | 
 | 
| 30 |   # Fill in the dict.
 | 
| 31 |   global _type_reprs
 | 
| 32 |   for name, val in python_symbols.__dict__.items():
 | 
| 33 |       if type(val) == int:
 | 
| 34 |           _type_reprs[val] = name
 | 
| 35 | 
 | 
| 36 | 
 | 
| 37 | class Base(object):
 | 
| 38 | 
 | 
| 39 |     """
 | 
| 40 |     Abstract base class for Node and Leaf.
 | 
| 41 | 
 | 
| 42 |     This provides some default functionality and boilerplate using the
 | 
| 43 |     template pattern.
 | 
| 44 | 
 | 
| 45 |     A node may be a subnode of at most one parent.
 | 
| 46 |     """
 | 
| 47 | 
 | 
| 48 |     # Default values for instance variables
 | 
| 49 |     type = None    # int: token number (< 256) or symbol number (>= 256)
 | 
| 50 |     parent = None  # Parent node pointer, or None
 | 
| 51 |     children = ()  # Tuple of subnodes
 | 
| 52 |     was_changed = False
 | 
| 53 |     was_checked = False
 | 
| 54 | 
 | 
| 55 |     def __new__(cls, *args, **kwds):
 | 
| 56 |         """Constructor that prevents Base from being instantiated."""
 | 
| 57 |         assert cls is not Base, "Cannot instantiate Base"
 | 
| 58 |         return object.__new__(cls)
 | 
| 59 | 
 | 
| 60 |     def __eq__(self, other):
 | 
| 61 |         """
 | 
| 62 |         Compare two nodes for equality.
 | 
| 63 | 
 | 
| 64 |         This calls the method _eq().
 | 
| 65 |         """
 | 
| 66 |         if self.__class__ is not other.__class__:
 | 
| 67 |             return NotImplemented
 | 
| 68 |         return self._eq(other)
 | 
| 69 | 
 | 
| 70 |     __hash__ = None # For Py3 compatibility.
 | 
| 71 | 
 | 
| 72 |     def _eq(self, other):
 | 
| 73 |         """
 | 
| 74 |         Compare two nodes for equality.
 | 
| 75 | 
 | 
| 76 |         This is called by __eq__ and __ne__.  It is only called if the two nodes
 | 
| 77 |         have the same type.  This must be implemented by the concrete subclass.
 | 
| 78 |         Nodes should be considered equal if they have the same structure,
 | 
| 79 |         ignoring the prefix string and other context information.
 | 
| 80 |         """
 | 
| 81 |         raise NotImplementedError()
 | 
| 82 | 
 | 
| 83 |     def clone(self):
 | 
| 84 |         """
 | 
| 85 |         Return a cloned (deep) copy of self.
 | 
| 86 | 
 | 
| 87 |         This must be implemented by the concrete subclass.
 | 
| 88 |         """
 | 
| 89 |         raise NotImplementedError()
 | 
| 90 | 
 | 
| 91 |     def replace(self, new):
 | 
| 92 |         """Replace this node with a new one in the parent."""
 | 
| 93 |         assert self.parent is not None, str(self)
 | 
| 94 |         assert new is not None
 | 
| 95 |         if not isinstance(new, list):
 | 
| 96 |             new = [new]
 | 
| 97 |         l_children = []
 | 
| 98 |         found = False
 | 
| 99 |         for ch in self.parent.children:
 | 
| 100 |             if ch is self:
 | 
| 101 |                 assert not found, (self.parent.children, self, new)
 | 
| 102 |                 if new is not None:
 | 
| 103 |                     l_children.extend(new)
 | 
| 104 |                 found = True
 | 
| 105 |             else:
 | 
| 106 |                 l_children.append(ch)
 | 
| 107 |         assert found, (self.children, self, new)
 | 
| 108 |         self.parent.changed()
 | 
| 109 |         self.parent.children = l_children
 | 
| 110 |         for x in new:
 | 
| 111 |             x.parent = self.parent
 | 
| 112 |         self.parent = None
 | 
| 113 | 
 | 
| 114 |     def get_lineno(self):
 | 
| 115 |         """Return the line number which generated the invocant node."""
 | 
| 116 |         node = self
 | 
| 117 |         while not isinstance(node, Leaf):
 | 
| 118 |             if not node.children:
 | 
| 119 |                 return
 | 
| 120 |             node = node.children[0]
 | 
| 121 |         return node.lineno
 | 
| 122 | 
 | 
| 123 |     def changed(self):
 | 
| 124 |         if self.parent:
 | 
| 125 |             self.parent.changed()
 | 
| 126 |         self.was_changed = True
 | 
| 127 | 
 | 
| 128 |     def remove(self):
 | 
| 129 |         """
 | 
| 130 |         Remove the node from the tree. Returns the position of the node in its
 | 
| 131 |         parent's children before it was removed.
 | 
| 132 |         """
 | 
| 133 |         if self.parent:
 | 
| 134 |             for i, node in enumerate(self.parent.children):
 | 
| 135 |                 if node is self:
 | 
| 136 |                     self.parent.changed()
 | 
| 137 |                     del self.parent.children[i]
 | 
| 138 |                     self.parent = None
 | 
| 139 |                     return i
 | 
| 140 | 
 | 
| 141 |     @property
 | 
| 142 |     def next_sibling(self):
 | 
| 143 |         """
 | 
| 144 |         The node immediately following the invocant in their parent's children
 | 
| 145 |         list. If the invocant does not have a next sibling, it is None
 | 
| 146 |         """
 | 
| 147 |         if self.parent is None:
 | 
| 148 |             return None
 | 
| 149 | 
 | 
| 150 |         # Can't use index(); we need to test by identity
 | 
| 151 |         for i, child in enumerate(self.parent.children):
 | 
| 152 |             if child is self:
 | 
| 153 |                 try:
 | 
| 154 |                     return self.parent.children[i+1]
 | 
| 155 |                 except IndexError:
 | 
| 156 |                     return None
 | 
| 157 | 
 | 
| 158 |     @property
 | 
| 159 |     def prev_sibling(self):
 | 
| 160 |         """
 | 
| 161 |         The node immediately preceding the invocant in their parent's children
 | 
| 162 |         list. If the invocant does not have a previous sibling, it is None.
 | 
| 163 |         """
 | 
| 164 |         if self.parent is None:
 | 
| 165 |             return None
 | 
| 166 | 
 | 
| 167 |         # Can't use index(); we need to test by identity
 | 
| 168 |         for i, child in enumerate(self.parent.children):
 | 
| 169 |             if child is self:
 | 
| 170 |                 if i == 0:
 | 
| 171 |                     return None
 | 
| 172 |                 return self.parent.children[i-1]
 | 
| 173 | 
 | 
| 174 |     def depth(self):
 | 
| 175 |         if self.parent is None:
 | 
| 176 |             return 0
 | 
| 177 |         return 1 + self.parent.depth()
 | 
| 178 | 
 | 
| 179 |     def get_suffix(self):
 | 
| 180 |         """
 | 
| 181 |         Return the string immediately following the invocant node. This is
 | 
| 182 |         effectively equivalent to node.next_sibling.prefix
 | 
| 183 |         """
 | 
| 184 |         next_sib = self.next_sibling
 | 
| 185 |         if next_sib is None:
 | 
| 186 |             return ""
 | 
| 187 |         return next_sib.prefix
 | 
| 188 | 
 | 
| 189 |     if sys.version_info < (3, 0):
 | 
| 190 |         def __str__(self):
 | 
| 191 |             return str(self).encode("ascii")
 | 
| 192 | 
 | 
| 193 | class Node(Base):
 | 
| 194 | 
 | 
| 195 |     """Concrete implementation for interior nodes."""
 | 
| 196 | 
 | 
| 197 |     def __init__(self, type, children,
 | 
| 198 |                  context=None,
 | 
| 199 |                  prefix=None,
 | 
| 200 |                  fixers_applied=None):
 | 
| 201 |         """
 | 
| 202 |         Initializer.
 | 
| 203 | 
 | 
| 204 |         Takes a type constant (a symbol number >= 256), a sequence of
 | 
| 205 |         child nodes, and an optional context keyword argument.
 | 
| 206 | 
 | 
| 207 |         As a side effect, the parent pointers of the children are updated.
 | 
| 208 |         """
 | 
| 209 |         assert type >= 256, type
 | 
| 210 |         self.type = type
 | 
| 211 |         self.children = list(children)
 | 
| 212 |         for ch in self.children:
 | 
| 213 |             assert ch.parent is None, repr(ch)
 | 
| 214 |             ch.parent = self
 | 
| 215 |         if prefix is not None:
 | 
| 216 |             self.prefix = prefix
 | 
| 217 |         if fixers_applied:
 | 
| 218 |             self.fixers_applied = fixers_applied[:]
 | 
| 219 |         else:
 | 
| 220 |             self.fixers_applied = None
 | 
| 221 | 
 | 
| 222 |     def __repr__(self):
 | 
| 223 |         """Return a canonical string representation."""
 | 
| 224 |         return "%s(%s, %r)" % (self.__class__.__name__,
 | 
| 225 |                                type_repr(self.type),
 | 
| 226 |                                self.children)
 | 
| 227 | 
 | 
| 228 |     def PrettyPrint(self, f, depth=0):
 | 
| 229 |       indent = '  ' * depth
 | 
| 230 |       print(indent + type_repr(self.type), file=f)
 | 
| 231 |       for c in self.children:
 | 
| 232 |         c.PrettyPrint(f, depth+1)
 | 
| 233 | 
 | 
| 234 |     def __unicode__(self):
 | 
| 235 |         """
 | 
| 236 |         Return a pretty string representation.
 | 
| 237 | 
 | 
| 238 |         This reproduces the input source exactly.
 | 
| 239 |         """
 | 
| 240 |         return "".join(map(str, self.children))
 | 
| 241 | 
 | 
| 242 |     if sys.version_info > (3, 0):
 | 
| 243 |         __str__ = __unicode__
 | 
| 244 | 
 | 
| 245 |     def _eq(self, other):
 | 
| 246 |         """Compare two nodes for equality."""
 | 
| 247 |         return (self.type, self.children) == (other.type, other.children)
 | 
| 248 | 
 | 
| 249 |     def clone(self):
 | 
| 250 |         """Return a cloned (deep) copy of self."""
 | 
| 251 |         return Node(self.type, [ch.clone() for ch in self.children],
 | 
| 252 |                     fixers_applied=self.fixers_applied)
 | 
| 253 | 
 | 
| 254 |     def _prefix_getter(self):
 | 
| 255 |         """
 | 
| 256 |         The whitespace and comments preceding this node in the input.
 | 
| 257 |         """
 | 
| 258 |         if not self.children:
 | 
| 259 |             return ""
 | 
| 260 |         return self.children[0].prefix
 | 
| 261 | 
 | 
| 262 |     def _prefix_setter(self, prefix):
 | 
| 263 |         if self.children:
 | 
| 264 |             self.children[0].prefix = prefix
 | 
| 265 | 
 | 
| 266 |     prefix = property(_prefix_getter, _prefix_setter)
 | 
| 267 | 
 | 
| 268 |     def set_child(self, i, child):
 | 
| 269 |         """
 | 
| 270 |         Equivalent to 'node.children[i] = child'. This method also sets the
 | 
| 271 |         child's parent attribute appropriately.
 | 
| 272 |         """
 | 
| 273 |         child.parent = self
 | 
| 274 |         self.children[i].parent = None
 | 
| 275 |         self.children[i] = child
 | 
| 276 |         self.changed()
 | 
| 277 | 
 | 
| 278 |     def insert_child(self, i, child):
 | 
| 279 |         """
 | 
| 280 |         Equivalent to 'node.children.insert(i, child)'. This method also sets
 | 
| 281 |         the child's parent attribute appropriately.
 | 
| 282 |         """
 | 
| 283 |         child.parent = self
 | 
| 284 |         self.children.insert(i, child)
 | 
| 285 |         self.changed()
 | 
| 286 | 
 | 
| 287 |     def append_child(self, child):
 | 
| 288 |         """
 | 
| 289 |         Equivalent to 'node.children.append(child)'. This method also sets the
 | 
| 290 |         child's parent attribute appropriately.
 | 
| 291 |         """
 | 
| 292 |         child.parent = self
 | 
| 293 |         self.children.append(child)
 | 
| 294 |         self.changed()
 | 
| 295 | 
 | 
| 296 | 
 | 
| 297 | class Leaf(Base):
 | 
| 298 | 
 | 
| 299 |     """Concrete implementation for leaf nodes."""
 | 
| 300 | 
 | 
| 301 |     # Default values for instance variables
 | 
| 302 |     _prefix = ""  # Whitespace and comments preceding this token in the input
 | 
| 303 |     lineno = 0    # Line where this token starts in the input
 | 
| 304 |     column = 0    # Column where this token tarts in the input
 | 
| 305 | 
 | 
| 306 |     def __init__(self, type, value,
 | 
| 307 |                  context=None,
 | 
| 308 |                  prefix=None,
 | 
| 309 |                  fixers_applied=[]):
 | 
| 310 |         """
 | 
| 311 |         Initializer.
 | 
| 312 | 
 | 
| 313 |         Takes a type constant (a token number < 256), a string value, and an
 | 
| 314 |         optional context keyword argument.
 | 
| 315 |         """
 | 
| 316 |         assert 0 <= type < 256, type
 | 
| 317 |         if context is not None:
 | 
| 318 |             self._prefix, (self.lineno, self.column) = context
 | 
| 319 |         self.type = type
 | 
| 320 |         self.value = value
 | 
| 321 |         if prefix is not None:
 | 
| 322 |             self._prefix = prefix
 | 
| 323 |         self.fixers_applied = fixers_applied[:]
 | 
| 324 | 
 | 
| 325 |     def __repr__(self):
 | 
| 326 |         """Return a canonical string representation."""
 | 
| 327 |         return "%s(%r, %r)" % (self.__class__.__name__,
 | 
| 328 |                                self.type,
 | 
| 329 |                                self.value)
 | 
| 330 | 
 | 
| 331 |     def PrettyPrint(self, f, depth=0):
 | 
| 332 |       indent = '  ' * depth
 | 
| 333 |       print("%s(%r, %r)" % (indent, self.type, self.value), file=f)
 | 
| 334 | 
 | 
| 335 |     def __unicode__(self):
 | 
| 336 |         """
 | 
| 337 |         Return a pretty string representation.
 | 
| 338 | 
 | 
| 339 |         This reproduces the input source exactly.
 | 
| 340 |         """
 | 
| 341 |         return self.prefix + str(self.value)
 | 
| 342 | 
 | 
| 343 |     if sys.version_info > (3, 0):
 | 
| 344 |         __str__ = __unicode__
 | 
| 345 | 
 | 
| 346 |     def _eq(self, other):
 | 
| 347 |         """Compare two nodes for equality."""
 | 
| 348 |         return (self.type, self.value) == (other.type, other.value)
 | 
| 349 | 
 | 
| 350 |     def clone(self):
 | 
| 351 |         """Return a cloned (deep) copy of self."""
 | 
| 352 |         return Leaf(self.type, self.value,
 | 
| 353 |                     (self.prefix, (self.lineno, self.column)),
 | 
| 354 |                     fixers_applied=self.fixers_applied)
 | 
| 355 | 
 | 
| 356 |     def _prefix_getter(self):
 | 
| 357 |         """
 | 
| 358 |         The whitespace and comments preceding this token in the input.
 | 
| 359 |         """
 | 
| 360 |         return self._prefix
 | 
| 361 | 
 | 
| 362 |     def _prefix_setter(self, prefix):
 | 
| 363 |         self.changed()
 | 
| 364 |         self._prefix = prefix
 | 
| 365 | 
 | 
| 366 |     prefix = property(_prefix_getter, _prefix_setter)
 | 
| 367 | 
 | 
| 368 | 
 | 
| 369 | class BasePattern(object):
 | 
| 370 | 
 | 
| 371 |     """
 | 
| 372 |     A pattern is a tree matching pattern.
 | 
| 373 | 
 | 
| 374 |     It looks for a specific node type (token or symbol), and
 | 
| 375 |     optionally for a specific content.
 | 
| 376 | 
 | 
| 377 |     This is an abstract base class.  There are three concrete
 | 
| 378 |     subclasses:
 | 
| 379 | 
 | 
| 380 |     - LeafPattern matches a single leaf node;
 | 
| 381 |     - NodePattern matches a single node (usually non-leaf);
 | 
| 382 |     - WildcardPattern matches a sequence of nodes of variable length.
 | 
| 383 |     """
 | 
| 384 | 
 | 
| 385 |     # Defaults for instance variables
 | 
| 386 |     type = None     # Node type (token if < 256, symbol if >= 256)
 | 
| 387 |     content = None  # Optional content matching pattern
 | 
| 388 |     name = None     # Optional name used to store match in results dict
 | 
| 389 | 
 | 
| 390 |     def __new__(cls, *args, **kwds):
 | 
| 391 |         """Constructor that prevents BasePattern from being instantiated."""
 | 
| 392 |         assert cls is not BasePattern, "Cannot instantiate BasePattern"
 | 
| 393 |         return object.__new__(cls)
 | 
| 394 | 
 | 
| 395 |     def __repr__(self):
 | 
| 396 |         args = [type_repr(self.type), self.content, self.name]
 | 
| 397 |         while args and args[-1] is None:
 | 
| 398 |             del args[-1]
 | 
| 399 |         return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
 | 
| 400 | 
 | 
| 401 |     def optimize(self):
 | 
| 402 |         """
 | 
| 403 |         A subclass can define this as a hook for optimizations.
 | 
| 404 | 
 | 
| 405 |         Returns either self or another node with the same effect.
 | 
| 406 |         """
 | 
| 407 |         return self
 | 
| 408 | 
 | 
| 409 |     def match(self, node, results=None):
 | 
| 410 |         """
 | 
| 411 |         Does this pattern exactly match a node?
 | 
| 412 | 
 | 
| 413 |         Returns True if it matches, False if not.
 | 
| 414 | 
 | 
| 415 |         If results is not None, it must be a dict which will be
 | 
| 416 |         updated with the nodes matching named subpatterns.
 | 
| 417 | 
 | 
| 418 |         Default implementation for non-wildcard patterns.
 | 
| 419 |         """
 | 
| 420 |         if self.type is not None and node.type != self.type:
 | 
| 421 |             return False
 | 
| 422 |         if self.content is not None:
 | 
| 423 |             r = None
 | 
| 424 |             if results is not None:
 | 
| 425 |                 r = {}
 | 
| 426 |             if not self._submatch(node, r):
 | 
| 427 |                 return False
 | 
| 428 |             if r:
 | 
| 429 |                 results.update(r)
 | 
| 430 |         if results is not None and self.name:
 | 
| 431 |             results[self.name] = node
 | 
| 432 |         return True
 | 
| 433 | 
 | 
| 434 |     def match_seq(self, nodes, results=None):
 | 
| 435 |         """
 | 
| 436 |         Does this pattern exactly match a sequence of nodes?
 | 
| 437 | 
 | 
| 438 |         Default implementation for non-wildcard patterns.
 | 
| 439 |         """
 | 
| 440 |         if len(nodes) != 1:
 | 
| 441 |             return False
 | 
| 442 |         return self.match(nodes[0], results)
 | 
| 443 | 
 | 
| 444 |     def generate_matches(self, nodes):
 | 
| 445 |         """
 | 
| 446 |         Generator yielding all matches for this pattern.
 | 
| 447 | 
 | 
| 448 |         Default implementation for non-wildcard patterns.
 | 
| 449 |         """
 | 
| 450 |         r = {}
 | 
| 451 |         if nodes and self.match(nodes[0], r):
 | 
| 452 |             yield 1, r
 | 
| 453 | 
 | 
| 454 | 
 | 
| 455 | class LeafPattern(BasePattern):
 | 
| 456 | 
 | 
| 457 |     def __init__(self, type=None, content=None, name=None):
 | 
| 458 |         """
 | 
| 459 |         Initializer.  Takes optional type, content, and name.
 | 
| 460 | 
 | 
| 461 |         The type, if given must be a token type (< 256).  If not given,
 | 
| 462 |         this matches any *leaf* node; the content may still be required.
 | 
| 463 | 
 | 
| 464 |         The content, if given, must be a string.
 | 
| 465 | 
 | 
| 466 |         If a name is given, the matching node is stored in the results
 | 
| 467 |         dict under that key.
 | 
| 468 |         """
 | 
| 469 |         if type is not None:
 | 
| 470 |             assert 0 <= type < 256, type
 | 
| 471 |         if content is not None:
 | 
| 472 |             assert isinstance(content, str), repr(content)
 | 
| 473 |         self.type = type
 | 
| 474 |         self.content = content
 | 
| 475 |         self.name = name
 | 
| 476 | 
 | 
| 477 |     def match(self, node, results=None):
 | 
| 478 |         """Override match() to insist on a leaf node."""
 | 
| 479 |         if not isinstance(node, Leaf):
 | 
| 480 |             return False
 | 
| 481 |         return BasePattern.match(self, node, results)
 | 
| 482 | 
 | 
| 483 |     def _submatch(self, node, results=None):
 | 
| 484 |         """
 | 
| 485 |         Match the pattern's content to the node's children.
 | 
| 486 | 
 | 
| 487 |         This assumes the node type matches and self.content is not None.
 | 
| 488 | 
 | 
| 489 |         Returns True if it matches, False if not.
 | 
| 490 | 
 | 
| 491 |         If results is not None, it must be a dict which will be
 | 
| 492 |         updated with the nodes matching named subpatterns.
 | 
| 493 | 
 | 
| 494 |         When returning False, the results dict may still be updated.
 | 
| 495 |         """
 | 
| 496 |         return self.content == node.value
 | 
| 497 | 
 | 
| 498 | 
 | 
| 499 | class NodePattern(BasePattern):
 | 
| 500 | 
 | 
| 501 |     wildcards = False
 | 
| 502 | 
 | 
| 503 |     def __init__(self, type=None, content=None, name=None):
 | 
| 504 |         """
 | 
| 505 |         Initializer.  Takes optional type, content, and name.
 | 
| 506 | 
 | 
| 507 |         The type, if given, must be a symbol type (>= 256).  If the
 | 
| 508 |         type is None this matches *any* single node (leaf or not),
 | 
| 509 |         except if content is not None, in which it only matches
 | 
| 510 |         non-leaf nodes that also match the content pattern.
 | 
| 511 | 
 | 
| 512 |         The content, if not None, must be a sequence of Patterns that
 | 
| 513 |         must match the node's children exactly.  If the content is
 | 
| 514 |         given, the type must not be None.
 | 
| 515 | 
 | 
| 516 |         If a name is given, the matching node is stored in the results
 | 
| 517 |         dict under that key.
 | 
| 518 |         """
 | 
| 519 |         if type is not None:
 | 
| 520 |             assert type >= 256, type
 | 
| 521 |         if content is not None:
 | 
| 522 |             assert not isinstance(content, str), repr(content)
 | 
| 523 |             content = list(content)
 | 
| 524 |             for i, item in enumerate(content):
 | 
| 525 |                 assert isinstance(item, BasePattern), (i, item)
 | 
| 526 |                 if isinstance(item, WildcardPattern):
 | 
| 527 |                     self.wildcards = True
 | 
| 528 |         self.type = type
 | 
| 529 |         self.content = content
 | 
| 530 |         self.name = name
 | 
| 531 | 
 | 
| 532 |     def _submatch(self, node, results=None):
 | 
| 533 |         """
 | 
| 534 |         Match the pattern's content to the node's children.
 | 
| 535 | 
 | 
| 536 |         This assumes the node type matches and self.content is not None.
 | 
| 537 | 
 | 
| 538 |         Returns True if it matches, False if not.
 | 
| 539 | 
 | 
| 540 |         If results is not None, it must be a dict which will be
 | 
| 541 |         updated with the nodes matching named subpatterns.
 | 
| 542 | 
 | 
| 543 |         When returning False, the results dict may still be updated.
 | 
| 544 |         """
 | 
| 545 |         if self.wildcards:
 | 
| 546 |             for c, r in generate_matches(self.content, node.children):
 | 
| 547 |                 if c == len(node.children):
 | 
| 548 |                     if results is not None:
 | 
| 549 |                         results.update(r)
 | 
| 550 |                     return True
 | 
| 551 |             return False
 | 
| 552 |         if len(self.content) != len(node.children):
 | 
| 553 |             return False
 | 
| 554 |         for subpattern, child in zip(self.content, node.children):
 | 
| 555 |             if not subpattern.match(child, results):
 | 
| 556 |                 return False
 | 
| 557 |         return True
 | 
| 558 | 
 | 
| 559 | 
 | 
| 560 | class WildcardPattern(BasePattern):
 | 
| 561 | 
 | 
| 562 |     """
 | 
| 563 |     A wildcard pattern can match zero or more nodes.
 | 
| 564 | 
 | 
| 565 |     This has all the flexibility needed to implement patterns like:
 | 
| 566 | 
 | 
| 567 |     .*      .+      .?      .{m,n}
 | 
| 568 |     (a b c | d e | f)
 | 
| 569 |     (...)*  (...)+  (...)?  (...){m,n}
 | 
| 570 | 
 | 
| 571 |     except it always uses non-greedy matching.
 | 
| 572 |     """
 | 
| 573 | 
 | 
| 574 |     def __init__(self, content=None, min=0, max=HUGE, name=None):
 | 
| 575 |         """
 | 
| 576 |         Initializer.
 | 
| 577 | 
 | 
| 578 |         Args:
 | 
| 579 |             content: optional sequence of subsequences of patterns;
 | 
| 580 |                      if absent, matches one node;
 | 
| 581 |                      if present, each subsequence is an alternative [*]
 | 
| 582 |             min: optional minimum number of times to match, default 0
 | 
| 583 |             max: optional maximum number of times to match, default HUGE
 | 
| 584 |             name: optional name assigned to this match
 | 
| 585 | 
 | 
| 586 |         [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
 | 
| 587 |             equivalent to (a b c | d e | f g h); if content is None,
 | 
| 588 |             this is equivalent to '.' in regular expression terms.
 | 
| 589 |             The min and max parameters work as follows:
 | 
| 590 |                 min=0, max=maxint: .*
 | 
| 591 |                 min=1, max=maxint: .+
 | 
| 592 |                 min=0, max=1: .?
 | 
| 593 |                 min=1, max=1: .
 | 
| 594 |             If content is not None, replace the dot with the parenthesized
 | 
| 595 |             list of alternatives, e.g. (a b c | d e | f g h)*
 | 
| 596 |         """
 | 
| 597 |         assert 0 <= min <= max <= HUGE, (min, max)
 | 
| 598 |         if content is not None:
 | 
| 599 |             content = tuple(map(tuple, content))  # Protect against alterations
 | 
| 600 |             # Check sanity of alternatives
 | 
| 601 |             assert len(content), repr(content)  # Can't have zero alternatives
 | 
| 602 |             for alt in content:
 | 
| 603 |                 assert len(alt), repr(alt)  # Can have empty alternatives
 | 
| 604 |         self.content = content
 | 
| 605 |         self.min = min
 | 
| 606 |         self.max = max
 | 
| 607 |         self.name = name
 | 
| 608 | 
 | 
| 609 |     def optimize(self):
 | 
| 610 |         """Optimize certain stacked wildcard patterns."""
 | 
| 611 |         subpattern = None
 | 
| 612 |         if (self.content is not None and
 | 
| 613 |             len(self.content) == 1 and len(self.content[0]) == 1):
 | 
| 614 |             subpattern = self.content[0][0]
 | 
| 615 |         if self.min == 1 and self.max == 1:
 | 
| 616 |             if self.content is None:
 | 
| 617 |                 return NodePattern(name=self.name)
 | 
| 618 |             if subpattern is not None and  self.name == subpattern.name:
 | 
| 619 |                 return subpattern.optimize()
 | 
| 620 |         if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
 | 
| 621 |             subpattern.min <= 1 and self.name == subpattern.name):
 | 
| 622 |             return WildcardPattern(subpattern.content,
 | 
| 623 |                                    self.min*subpattern.min,
 | 
| 624 |                                    self.max*subpattern.max,
 | 
| 625 |                                    subpattern.name)
 | 
| 626 |         return self
 | 
| 627 | 
 | 
| 628 |     def match(self, node, results=None):
 | 
| 629 |         """Does this pattern exactly match a node?"""
 | 
| 630 |         return self.match_seq([node], results)
 | 
| 631 | 
 | 
| 632 |     def match_seq(self, nodes, results=None):
 | 
| 633 |         """Does this pattern exactly match a sequence of nodes?"""
 | 
| 634 |         for c, r in self.generate_matches(nodes):
 | 
| 635 |             if c == len(nodes):
 | 
| 636 |                 if results is not None:
 | 
| 637 |                     results.update(r)
 | 
| 638 |                     if self.name:
 | 
| 639 |                         results[self.name] = list(nodes)
 | 
| 640 |                 return True
 | 
| 641 |         return False
 | 
| 642 | 
 | 
| 643 |     def generate_matches(self, nodes):
 | 
| 644 |         """
 | 
| 645 |         Generator yielding matches for a sequence of nodes.
 | 
| 646 | 
 | 
| 647 |         Args:
 | 
| 648 |             nodes: sequence of nodes
 | 
| 649 | 
 | 
| 650 |         Yields:
 | 
| 651 |             (count, results) tuples where:
 | 
| 652 |             count: the match comprises nodes[:count];
 | 
| 653 |             results: dict containing named submatches.
 | 
| 654 |         """
 | 
| 655 |         if self.content is None:
 | 
| 656 |             # Shortcut for special case (see __init__.__doc__)
 | 
| 657 |             for count in range(self.min, 1 + min(len(nodes), self.max)):
 | 
| 658 |                 r = {}
 | 
| 659 |                 if self.name:
 | 
| 660 |                     r[self.name] = nodes[:count]
 | 
| 661 |                 yield count, r
 | 
| 662 |         elif self.name == "bare_name":
 | 
| 663 |             yield self._bare_name_matches(nodes)
 | 
| 664 |         else:
 | 
| 665 |             # The reason for this is that hitting the recursion limit usually
 | 
| 666 |             # results in some ugly messages about how RuntimeErrors are being
 | 
| 667 |             # ignored. We only have to do this on CPython, though, because other
 | 
| 668 |             # implementations don't have this nasty bug in the first place.
 | 
| 669 |             if hasattr(sys, "getrefcount"):
 | 
| 670 |                 save_stderr = sys.stderr
 | 
| 671 |                 sys.stderr = cStringIO.StringIO()
 | 
| 672 |             try:
 | 
| 673 |                 for count, r in self._recursive_matches(nodes, 0):
 | 
| 674 |                     if self.name:
 | 
| 675 |                         r[self.name] = nodes[:count]
 | 
| 676 |                     yield count, r
 | 
| 677 |             except RuntimeError:
 | 
| 678 |                 # We fall back to the iterative pattern matching scheme if the recursive
 | 
| 679 |                 # scheme hits the recursion limit.
 | 
| 680 |                 for count, r in self._iterative_matches(nodes):
 | 
| 681 |                     if self.name:
 | 
| 682 |                         r[self.name] = nodes[:count]
 | 
| 683 |                     yield count, r
 | 
| 684 |             finally:
 | 
| 685 |                 if hasattr(sys, "getrefcount"):
 | 
| 686 |                     sys.stderr = save_stderr
 | 
| 687 | 
 | 
| 688 |     def _iterative_matches(self, nodes):
 | 
| 689 |         """Helper to iteratively yield the matches."""
 | 
| 690 |         nodelen = len(nodes)
 | 
| 691 |         if 0 >= self.min:
 | 
| 692 |             yield 0, {}
 | 
| 693 | 
 | 
| 694 |         results = []
 | 
| 695 |         # generate matches that use just one alt from self.content
 | 
| 696 |         for alt in self.content:
 | 
| 697 |             for c, r in generate_matches(alt, nodes):
 | 
| 698 |                 yield c, r
 | 
| 699 |                 results.append((c, r))
 | 
| 700 | 
 | 
| 701 |         # for each match, iterate down the nodes
 | 
| 702 |         while results:
 | 
| 703 |             new_results = []
 | 
| 704 |             for c0, r0 in results:
 | 
| 705 |                 # stop if the entire set of nodes has been matched
 | 
| 706 |                 if c0 < nodelen and c0 <= self.max:
 | 
| 707 |                     for alt in self.content:
 | 
| 708 |                         for c1, r1 in generate_matches(alt, nodes[c0:]):
 | 
| 709 |                             if c1 > 0:
 | 
| 710 |                                 r = {}
 | 
| 711 |                                 r.update(r0)
 | 
| 712 |                                 r.update(r1)
 | 
| 713 |                                 yield c0 + c1, r
 | 
| 714 |                                 new_results.append((c0 + c1, r))
 | 
| 715 |             results = new_results
 | 
| 716 | 
 | 
| 717 |     def _bare_name_matches(self, nodes):
 | 
| 718 |         """Special optimized matcher for bare_name."""
 | 
| 719 |         count = 0
 | 
| 720 |         r = {}
 | 
| 721 |         done = False
 | 
| 722 |         max = len(nodes)
 | 
| 723 |         while not done and count < max:
 | 
| 724 |             done = True
 | 
| 725 |             for leaf in self.content:
 | 
| 726 |                 if leaf[0].match(nodes[count], r):
 | 
| 727 |                     count += 1
 | 
| 728 |                     done = False
 | 
| 729 |                     break
 | 
| 730 |         r[self.name] = nodes[:count]
 | 
| 731 |         return count, r
 | 
| 732 | 
 | 
| 733 |     def _recursive_matches(self, nodes, count):
 | 
| 734 |         """Helper to recursively yield the matches."""
 | 
| 735 |         assert self.content is not None
 | 
| 736 |         if count >= self.min:
 | 
| 737 |             yield 0, {}
 | 
| 738 |         if count < self.max:
 | 
| 739 |             for alt in self.content:
 | 
| 740 |                 for c0, r0 in generate_matches(alt, nodes):
 | 
| 741 |                     for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
 | 
| 742 |                         r = {}
 | 
| 743 |                         r.update(r0)
 | 
| 744 |                         r.update(r1)
 | 
| 745 |                         yield c0 + c1, r
 | 
| 746 | 
 | 
| 747 | 
 | 
| 748 | class NegatedPattern(BasePattern):
 | 
| 749 | 
 | 
| 750 |     def __init__(self, content=None):
 | 
| 751 |         """
 | 
| 752 |         Initializer.
 | 
| 753 | 
 | 
| 754 |         The argument is either a pattern or None.  If it is None, this
 | 
| 755 |         only matches an empty sequence (effectively '$' in regex
 | 
| 756 |         lingo).  If it is not None, this matches whenever the argument
 | 
| 757 |         pattern doesn't have any matches.
 | 
| 758 |         """
 | 
| 759 |         if content is not None:
 | 
| 760 |             assert isinstance(content, BasePattern), repr(content)
 | 
| 761 |         self.content = content
 | 
| 762 | 
 | 
| 763 |     def match(self, node):
 | 
| 764 |         # We never match a node in its entirety
 | 
| 765 |         return False
 | 
| 766 | 
 | 
| 767 |     def match_seq(self, nodes):
 | 
| 768 |         # We only match an empty sequence of nodes in its entirety
 | 
| 769 |         return len(nodes) == 0
 | 
| 770 | 
 | 
| 771 |     def generate_matches(self, nodes):
 | 
| 772 |         if self.content is None:
 | 
| 773 |             # Return a match if there is an empty sequence
 | 
| 774 |             if len(nodes) == 0:
 | 
| 775 |                 yield 0, {}
 | 
| 776 |         else:
 | 
| 777 |             # Return a match if the argument pattern has no matches
 | 
| 778 |             for c, r in self.content.generate_matches(nodes):
 | 
| 779 |                 return
 | 
| 780 |             yield 0, {}
 | 
| 781 | 
 | 
| 782 | 
 | 
| 783 | def generate_matches(patterns, nodes):
 | 
| 784 |     """
 | 
| 785 |     Generator yielding matches for a sequence of patterns and nodes.
 | 
| 786 | 
 | 
| 787 |     Args:
 | 
| 788 |         patterns: a sequence of patterns
 | 
| 789 |         nodes: a sequence of nodes
 | 
| 790 | 
 | 
| 791 |     Yields:
 | 
| 792 |         (count, results) tuples where:
 | 
| 793 |         count: the entire sequence of patterns matches nodes[:count];
 | 
| 794 |         results: dict containing named submatches.
 | 
| 795 |         """
 | 
| 796 |     if not patterns:
 | 
| 797 |         yield 0, {}
 | 
| 798 |     else:
 | 
| 799 |         p, rest = patterns[0], patterns[1:]
 | 
| 800 |         for c0, r0 in p.generate_matches(nodes):
 | 
| 801 |             if not rest:
 | 
| 802 |                 yield c0, r0
 | 
| 803 |             else:
 | 
| 804 |                 for c1, r1 in generate_matches(rest, nodes[c0:]):
 | 
| 805 |                     r = {}
 | 
| 806 |                     r.update(r0)
 | 
| 807 |                     r.update(r1)
 | 
| 808 |                     yield c0 + c1, r
 |