OILS / asdl / examples / tdop.py View on Github | oilshell.org

200 lines, 93 significant
1"""
2tdop.py
3"""
4
5from _devbuild.gen.typed_arith_asdl import arith_expr_t
6from typing import (Dict, List, Callable, Optional, Iterator, Tuple, NoReturn)
7from typing import TYPE_CHECKING
8
9class ParseError(Exception):
10 pass
11
12
13#
14# Default parsing functions give errors
15#
16
17def NullError(p, token, bp):
18 # type: (Parser, Token, int) -> NoReturn
19 raise ParseError("%s can't be used in prefix position" % token)
20
21
22def LeftError(p, token, left, rbp):
23 # type: (Parser, Token, arith_expr_t, int) -> NoReturn
24 # Hm is this not called because of binding power?
25 raise ParseError("%s can't be used in infix position" % token)
26
27
28#
29# Input
30#
31
32class Token(object):
33 def __init__(self, type, val, loc=None):
34 # type: (str, str, Optional[Tuple[int, int]]) -> None
35 self.type = type
36 self.val = val
37
38 def __repr__(self):
39 # type: () -> str
40 return '<Token %s %s>' % (self.type, self.val)
41
42
43#
44# Parser definition
45#
46
47class LeftInfo(object):
48 """Row for operator.
49
50 In C++ this should be a big array.
51 """
52 def __init__(self, led=None, lbp=0, rbp=0):
53 # type: (Optional[LeftFunc], int, int) -> None
54 self.led = led or LeftError
55 self.lbp = lbp
56 self.rbp = rbp
57
58
59class NullInfo(object):
60 """Row for operator.
61
62 In C++ this should be a big array.
63 """
64 def __init__(self, nud=None, bp=0):
65 # type: (Optional[NullFunc], int) -> None
66 self.nud = nud or NullError
67 self.bp = bp
68
69
70class ParserSpec(object):
71 """Specification for a TDOP parser."""
72
73 def __init__(self):
74 # type: () -> None
75 self.null_lookup = {} # type: Dict[str, NullInfo]
76 self.left_lookup = {} # type: Dict[str, LeftInfo]
77
78 def Null(self, bp, nud, tokens):
79 # type: (int, NullFunc, List[str]) -> None
80 """Register a token that doesn't take anything on the left.
81
82 Examples: constant, prefix operator, error.
83 """
84 for token in tokens:
85 self.null_lookup[token] = NullInfo(nud=nud, bp=bp)
86 if token not in self.left_lookup:
87 self.left_lookup[token] = LeftInfo() # error
88
89 def _RegisterLed(self, lbp, rbp, led, tokens):
90 # type: (int, int, LeftFunc, List[str]) -> None
91 for token in tokens:
92 if token not in self.null_lookup:
93 self.null_lookup[token] = NullInfo(NullError)
94 self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
95
96 def Left(self, bp, led, tokens):
97 # type: (int, LeftFunc, List[str]) -> None
98 """Register a token that takes an expression on the left."""
99 self._RegisterLed(bp, bp, led, tokens)
100
101 def LeftRightAssoc(self, bp, led, tokens):
102 # type: (int, LeftFunc, List[str]) -> None
103 """Register a right associative operator."""
104 self._RegisterLed(bp, bp-1, led, tokens)
105
106 def LookupNull(self, token):
107 # type: (str) -> NullInfo
108 """Get the parsing function and precedence for a null position token."""
109 try:
110 nud = self.null_lookup[token]
111 except KeyError:
112 raise ParseError('Unexpected token %r' % token)
113 return nud
114
115 def LookupLeft(self, token):
116 # type: (str) -> LeftInfo
117 """Get the parsing function and precedence for a left position token."""
118 try:
119 led = self.left_lookup[token]
120 except KeyError:
121 raise ParseError('Unexpected token %r' % token)
122 return led
123
124
125EOF_TOKEN = Token('eof', 'eof')
126
127
128class Parser(object):
129 """Recursive TDOP parser."""
130
131 def __init__(self, spec, lexer):
132 # type: (ParserSpec, Iterator[Token]) -> None
133 self.spec = spec
134 self.lexer = lexer # iterable
135 self.token = Token('undefined', '') # current token
136
137 def AtToken(self, token_type):
138 # type: (str) -> bool
139 """Test if we are looking at a token."""
140 return self.token.type == token_type
141
142 def Next(self):
143 # type: () -> None
144 """Move to the next token."""
145 try:
146 t = self.lexer.next()
147 except StopIteration:
148 t = EOF_TOKEN
149 self.token = t
150
151 def Eat(self, val):
152 # type: (str) -> None
153 """Assert the value of the current token, then move to the next token."""
154 if val and not self.AtToken(val):
155 raise ParseError('expected %s, got %s' % (val, self.token))
156 self.Next()
157
158 def ParseUntil(self, rbp):
159 # type: (int) -> arith_expr_t
160 """
161 Parse to the right, eating tokens until we encounter a token with binding
162 power LESS THAN OR EQUAL TO rbp.
163 """
164 if self.AtToken('eof'):
165 raise ParseError('Unexpected end of input')
166
167 t = self.token
168 self.Next() # skip over the token, e.g. ! ~ + -
169
170 null_info = self.spec.LookupNull(t.type)
171 node = null_info.nud(self, t, null_info.bp)
172
173 while True:
174 t = self.token
175 left_info = self.spec.LookupLeft(t.type)
176
177 # Examples:
178 # If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
179 # If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
180 # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
181 if rbp >= left_info.lbp:
182 break
183 self.Next() # skip over the token, e.g. / *
184
185 node = left_info.led(self, t, node, left_info.rbp)
186
187 return node
188
189 def Parse(self):
190 # type: () -> arith_expr_t
191 self.Next()
192 return self.ParseUntil(0)
193
194
195# Must define these aliases AFTER Parser and Token are defined.
196if TYPE_CHECKING:
197 NullFunc = Callable[[Parser, Token, int], arith_expr_t]
198 LeftFunc = Callable[[Parser, Token, arith_expr_t, int], arith_expr_t]
199
200