| 1 | """
 | 
| 2 | tdop.py
 | 
| 3 | """
 | 
| 4 | 
 | 
| 5 | from _devbuild.gen.typed_arith_asdl import arith_expr_t
 | 
| 6 | from typing import (Dict, List, Callable, Optional, Iterator, Tuple, NoReturn)
 | 
| 7 | from typing import TYPE_CHECKING
 | 
| 8 | 
 | 
| 9 | class ParseError(Exception):
 | 
| 10 |   pass
 | 
| 11 | 
 | 
| 12 | 
 | 
| 13 | #
 | 
| 14 | # Default parsing functions give errors
 | 
| 15 | #
 | 
| 16 | 
 | 
| 17 | def NullError(p, token, bp):
 | 
| 18 |   # type: (Parser, Token, int) -> NoReturn
 | 
| 19 |   raise ParseError("%s can't be used in prefix position" % token)
 | 
| 20 | 
 | 
| 21 | 
 | 
| 22 | def LeftError(p, token, left, rbp):
 | 
| 23 |   # type: (Parser, Token, arith_expr_t, int) -> NoReturn
 | 
| 24 |   # Hm is this not called because of binding power?
 | 
| 25 |   raise ParseError("%s can't be used in infix position" % token)
 | 
| 26 | 
 | 
| 27 | 
 | 
| 28 | #
 | 
| 29 | # Input
 | 
| 30 | #
 | 
| 31 | 
 | 
| 32 | class Token(object):
 | 
| 33 |   def __init__(self, type, val, loc=None):
 | 
| 34 |     # type: (str, str, Optional[Tuple[int, int]]) -> None
 | 
| 35 |     self.type = type
 | 
| 36 |     self.val = val
 | 
| 37 | 
 | 
| 38 |   def __repr__(self):
 | 
| 39 |     # type: () -> str
 | 
| 40 |     return '<Token %s %s>' % (self.type, self.val)
 | 
| 41 | 
 | 
| 42 | 
 | 
| 43 | #
 | 
| 44 | # Parser definition
 | 
| 45 | #
 | 
| 46 | 
 | 
| 47 | class LeftInfo(object):
 | 
| 48 |   """Row for operator.
 | 
| 49 | 
 | 
| 50 |   In C++ this should be a big array.
 | 
| 51 |   """
 | 
| 52 |   def __init__(self, led=None, lbp=0, rbp=0):
 | 
| 53 |     # type: (Optional[LeftFunc], int, int) -> None
 | 
| 54 |     self.led = led or LeftError
 | 
| 55 |     self.lbp = lbp
 | 
| 56 |     self.rbp = rbp
 | 
| 57 | 
 | 
| 58 | 
 | 
| 59 | class NullInfo(object):
 | 
| 60 |   """Row for operator.
 | 
| 61 | 
 | 
| 62 |   In C++ this should be a big array.
 | 
| 63 |   """
 | 
| 64 |   def __init__(self, nud=None, bp=0):
 | 
| 65 |     # type: (Optional[NullFunc], int) -> None
 | 
| 66 |     self.nud = nud or NullError
 | 
| 67 |     self.bp = bp
 | 
| 68 | 
 | 
| 69 | 
 | 
| 70 | class ParserSpec(object):
 | 
| 71 |   """Specification for a TDOP parser."""
 | 
| 72 | 
 | 
| 73 |   def __init__(self):
 | 
| 74 |     # type: () -> None
 | 
| 75 |     self.null_lookup = {}  # type: Dict[str, NullInfo]
 | 
| 76 |     self.left_lookup = {}  # type: Dict[str, LeftInfo]
 | 
| 77 | 
 | 
| 78 |   def Null(self, bp, nud, tokens):
 | 
| 79 |     # type: (int, NullFunc, List[str]) -> None
 | 
| 80 |     """Register a token that doesn't take anything on the left.
 | 
| 81 | 
 | 
| 82 |     Examples: constant, prefix operator, error.
 | 
| 83 |     """
 | 
| 84 |     for token in tokens:
 | 
| 85 |       self.null_lookup[token] = NullInfo(nud=nud, bp=bp)
 | 
| 86 |       if token not in self.left_lookup:
 | 
| 87 |         self.left_lookup[token] = LeftInfo()  # error
 | 
| 88 | 
 | 
| 89 |   def _RegisterLed(self, lbp, rbp, led, tokens):
 | 
| 90 |     # type: (int, int, LeftFunc, List[str]) -> None
 | 
| 91 |     for token in tokens:
 | 
| 92 |       if token not in self.null_lookup:
 | 
| 93 |         self.null_lookup[token] = NullInfo(NullError)
 | 
| 94 |       self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
 | 
| 95 | 
 | 
| 96 |   def Left(self, bp, led, tokens):
 | 
| 97 |     # type: (int, LeftFunc, List[str]) -> None
 | 
| 98 |     """Register a token that takes an expression on the left."""
 | 
| 99 |     self._RegisterLed(bp, bp, led, tokens)
 | 
| 100 | 
 | 
| 101 |   def LeftRightAssoc(self, bp, led, tokens):
 | 
| 102 |     # type: (int, LeftFunc, List[str]) -> None
 | 
| 103 |     """Register a right associative operator."""
 | 
| 104 |     self._RegisterLed(bp, bp-1, led, tokens)
 | 
| 105 | 
 | 
| 106 |   def LookupNull(self, token):
 | 
| 107 |     # type: (str) -> NullInfo
 | 
| 108 |     """Get the parsing function and precedence for a null position token."""
 | 
| 109 |     try:
 | 
| 110 |       nud = self.null_lookup[token]
 | 
| 111 |     except KeyError:
 | 
| 112 |       raise ParseError('Unexpected token %r' % token)
 | 
| 113 |     return nud
 | 
| 114 | 
 | 
| 115 |   def LookupLeft(self, token):
 | 
| 116 |     # type: (str) -> LeftInfo
 | 
| 117 |     """Get the parsing function and precedence for a left position token."""
 | 
| 118 |     try:
 | 
| 119 |       led = self.left_lookup[token]
 | 
| 120 |     except KeyError:
 | 
| 121 |       raise ParseError('Unexpected token %r' % token)
 | 
| 122 |     return led
 | 
| 123 | 
 | 
| 124 | 
 | 
| 125 | EOF_TOKEN = Token('eof', 'eof')
 | 
| 126 | 
 | 
| 127 | 
 | 
| 128 | class Parser(object):
 | 
| 129 |   """Recursive TDOP parser."""
 | 
| 130 | 
 | 
| 131 |   def __init__(self, spec, lexer):
 | 
| 132 |     # type: (ParserSpec, Iterator[Token]) -> None
 | 
| 133 |     self.spec = spec
 | 
| 134 |     self.lexer = lexer  # iterable
 | 
| 135 |     self.token = Token('undefined', '')  # current token
 | 
| 136 | 
 | 
| 137 |   def AtToken(self, token_type):
 | 
| 138 |     # type: (str) -> bool
 | 
| 139 |     """Test if we are looking at a token."""
 | 
| 140 |     return self.token.type == token_type
 | 
| 141 | 
 | 
| 142 |   def Next(self):
 | 
| 143 |     # type: () -> None
 | 
| 144 |     """Move to the next token."""
 | 
| 145 |     try:
 | 
| 146 |       t = self.lexer.next()
 | 
| 147 |     except StopIteration:
 | 
| 148 |       t = EOF_TOKEN
 | 
| 149 |     self.token = t
 | 
| 150 | 
 | 
| 151 |   def Eat(self, val):
 | 
| 152 |     # type: (str) -> None
 | 
| 153 |     """Assert the value of the current token, then move to the next token."""
 | 
| 154 |     if val and not self.AtToken(val):
 | 
| 155 |       raise ParseError('expected %s, got %s' % (val, self.token))
 | 
| 156 |     self.Next()
 | 
| 157 | 
 | 
| 158 |   def ParseUntil(self, rbp):
 | 
| 159 |     # type: (int) -> arith_expr_t
 | 
| 160 |     """
 | 
| 161 |     Parse to the right, eating tokens until we encounter a token with binding
 | 
| 162 |     power LESS THAN OR EQUAL TO rbp.
 | 
| 163 |     """
 | 
| 164 |     if self.AtToken('eof'):
 | 
| 165 |       raise ParseError('Unexpected end of input')
 | 
| 166 | 
 | 
| 167 |     t = self.token
 | 
| 168 |     self.Next()  # skip over the token, e.g. ! ~ + -
 | 
| 169 | 
 | 
| 170 |     null_info = self.spec.LookupNull(t.type)
 | 
| 171 |     node = null_info.nud(self, t, null_info.bp)
 | 
| 172 | 
 | 
| 173 |     while True:
 | 
| 174 |       t = self.token
 | 
| 175 |       left_info = self.spec.LookupLeft(t.type)
 | 
| 176 | 
 | 
| 177 |       # Examples:
 | 
| 178 |       # If we see 1*2+  , rbp = 27 and lbp = 25, so stop.
 | 
| 179 |       # If we see 1+2+  , rbp = 25 and lbp = 25, so stop.
 | 
| 180 |       # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
 | 
| 181 |       if rbp >= left_info.lbp:
 | 
| 182 |         break
 | 
| 183 |       self.Next()  # skip over the token, e.g. / *
 | 
| 184 | 
 | 
| 185 |       node = left_info.led(self, t, node, left_info.rbp)
 | 
| 186 | 
 | 
| 187 |     return node
 | 
| 188 | 
 | 
| 189 |   def Parse(self):
 | 
| 190 |     # type: () -> arith_expr_t
 | 
| 191 |     self.Next()
 | 
| 192 |     return self.ParseUntil(0)
 | 
| 193 | 
 | 
| 194 | 
 | 
| 195 | # Must define these aliases AFTER Parser and Token are defined.
 | 
| 196 | if TYPE_CHECKING:
 | 
| 197 |   NullFunc = Callable[[Parser, Token, int], arith_expr_t]
 | 
| 198 |   LeftFunc = Callable[[Parser, Token, arith_expr_t, int], arith_expr_t]
 | 
| 199 | 
 | 
| 200 | 
 |