1 | #!/usr/bin/env python2
|
2 | """
|
3 | arith_parse.py: Parse shell-like and C-like arithmetic.
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import sys
|
8 |
|
9 | import tdop
|
10 | from tdop import CompositeNode
|
11 |
|
12 | import demo_asdl
|
13 |
|
14 | arith_expr = demo_asdl.arith_expr
|
15 | op_id_e = demo_asdl.op_id_e
|
16 |
|
17 |
|
18 | #
|
19 | # Null Denotation -- token that takes nothing on the left
|
20 | #
|
21 |
|
22 | def NullConstant(p, token, bp):
|
23 | if token.type == 'number':
|
24 | return arith_expr.Const(token.val)
|
25 | # We have to wrap a string in some kind of variant.
|
26 | if token.type == 'name':
|
27 | return arith_expr.ArithVar(token.val)
|
28 |
|
29 | raise AssertionError(token.type)
|
30 |
|
31 |
|
32 | def NullParen(p, token, bp):
|
33 | """ Arithmetic grouping """
|
34 | r = p.ParseUntil(bp)
|
35 | p.Eat(')')
|
36 | return r
|
37 |
|
38 |
|
39 | def NullPrefixOp(p, token, bp):
|
40 | """Prefix operator.
|
41 |
|
42 | Low precedence: return, raise, etc.
|
43 | return x+y is return (x+y), not (return x) + y
|
44 |
|
45 | High precedence: logical negation, bitwise complement, etc.
|
46 | !x && y is (!x) && y, not !(x && y)
|
47 | """
|
48 | r = p.ParseUntil(bp)
|
49 | return CompositeNode(token, [r])
|
50 |
|
51 |
|
52 | def NullIncDec(p, token, bp):
|
53 | """ ++x or ++x[1] """
|
54 | right = p.ParseUntil(bp)
|
55 | if right.token.type not in ('name', 'get'):
|
56 | raise tdop.ParseError("Can't assign to %r (%s)" % (right, right.token))
|
57 | return CompositeNode(token, [right])
|
58 |
|
59 |
|
60 | #
|
61 | # Left Denotation -- token that takes an expression on the left
|
62 | #
|
63 |
|
64 | def LeftIncDec(p, token, left, rbp):
|
65 | """ For i++ and i--
|
66 | """
|
67 | if left.token.type not in ('name', 'get'):
|
68 | raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
|
69 | token.type = 'post' + token.type
|
70 | return CompositeNode(token, [left])
|
71 |
|
72 |
|
73 | def LeftIndex(p, token, left, unused_bp):
|
74 | """ index f[x+1] """
|
75 | # f[x] or f[x][y]
|
76 | if not isinstance(left, demo_asdl.ArithVar):
|
77 | raise tdop.ParseError("%s can't be indexed" % left)
|
78 | index = p.ParseUntil(0)
|
79 | if p.AtToken(':'):
|
80 | p.Next()
|
81 | end = p.ParseUntil(0)
|
82 | else:
|
83 | end = None
|
84 |
|
85 | p.Eat(']')
|
86 |
|
87 | # TODO: If you see ], then
|
88 | # 1:4
|
89 | # 1:4:2
|
90 | # Both end and step are optional
|
91 |
|
92 | if end:
|
93 | return demo_asdl.Slice(left, index, end, None)
|
94 | else:
|
95 | return demo_asdl.Index(left, index)
|
96 |
|
97 |
|
98 | def LeftTernary(p, token, left, bp):
|
99 | """ e.g. a > 1 ? x : y """
|
100 | true_expr = p.ParseUntil(bp)
|
101 | p.Eat(':')
|
102 | false_expr = p.ParseUntil(bp)
|
103 | children = [left, true_expr, false_expr]
|
104 | return CompositeNode(token, children)
|
105 |
|
106 |
|
107 | def LeftBinaryOp(p, token, left, rbp):
|
108 | """ Normal binary operator like 1+2 or 2*3, etc. """
|
109 | if token.val == '+':
|
110 | op_id_ = op_id_e.Plus
|
111 | elif token.val == '-':
|
112 | op_id_ = op_id_e.Minus
|
113 | elif token.val == '*':
|
114 | op_id_ = op_id_e.Star
|
115 | else:
|
116 | raise AssertionError(token.val)
|
117 | return arith_expr.ArithBinary(op_id_, left, p.ParseUntil(rbp))
|
118 |
|
119 |
|
120 | def LeftAssign(p, token, left, rbp):
|
121 | """ Normal binary operator like 1+2 or 2*3, etc. """
|
122 | # x += 1, or a[i] += 1
|
123 | if left.token.type not in ('name', 'get'):
|
124 | raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
|
125 | return CompositeNode(token, [left, p.ParseUntil(rbp)])
|
126 |
|
127 |
|
128 | def LeftComma(p, token, left, rbp):
|
129 | """ foo, bar, baz
|
130 |
|
131 | Could be sequencing operator, or tuple without parens
|
132 | """
|
133 | r = p.ParseUntil(rbp)
|
134 | if left.token.type == ',': # Keep adding more children
|
135 | left.children.append(r)
|
136 | return left
|
137 | children = [left, r]
|
138 | return CompositeNode(token, children)
|
139 |
|
140 |
|
141 | # For overloading of , inside function calls
|
142 | COMMA_PREC = 1
|
143 |
|
144 | def LeftFuncCall(p, token, left, unused_bp):
|
145 | """ Function call f(a, b). """
|
146 | args = []
|
147 | # f(x) or f[i](x)
|
148 | if not isinstance(left, demo_asdl.ArithVar):
|
149 | raise tdop.ParseError("%s can't be called" % left)
|
150 | func_name = left.name # get a string
|
151 |
|
152 | while not p.AtToken(')'):
|
153 | # We don't want to grab the comma, e.g. it is NOT a sequence operator. So
|
154 | # set the precedence to 5.
|
155 | args.append(p.ParseUntil(COMMA_PREC))
|
156 | if p.AtToken(','):
|
157 | p.Next()
|
158 | p.Eat(")")
|
159 | return demo_asdl.FuncCall(func_name, args)
|
160 |
|
161 |
|
162 | def MakeShellParserSpec():
|
163 | """
|
164 | Create a parser.
|
165 |
|
166 | Compare the code below with this table of C operator precedence:
|
167 | http://en.cppreference.com/w/c/language/operator_precedence
|
168 | """
|
169 | spec = tdop.ParserSpec()
|
170 |
|
171 | spec.Left(31, LeftIncDec, ['++', '--'])
|
172 | spec.Left(31, LeftFuncCall, ['('])
|
173 | spec.Left(31, LeftIndex, ['['])
|
174 |
|
175 | # 29 -- binds to everything except function call, indexing, postfix ops
|
176 | spec.Null(29, NullIncDec, ['++', '--'])
|
177 | spec.Null(29, NullPrefixOp, ['+', '!', '~', '-'])
|
178 |
|
179 | # Right associative: 2 ** 3 ** 2 == 2 ** (3 ** 2)
|
180 | spec.LeftRightAssoc(27, LeftBinaryOp, ['**'])
|
181 | spec.Left(25, LeftBinaryOp, ['*', '/', '%'])
|
182 |
|
183 | spec.Left(23, LeftBinaryOp, ['+', '-'])
|
184 | spec.Left(21, LeftBinaryOp, ['<<', '>>'])
|
185 | spec.Left(19, LeftBinaryOp, ['<', '>', '<=', '>='])
|
186 | spec.Left(17, LeftBinaryOp, ['!=', '=='])
|
187 |
|
188 | spec.Left(15, LeftBinaryOp, ['&'])
|
189 | spec.Left(13, LeftBinaryOp, ['^'])
|
190 | spec.Left(11, LeftBinaryOp, ['|'])
|
191 | spec.Left(9, LeftBinaryOp, ['&&'])
|
192 | spec.Left(7, LeftBinaryOp, ['||'])
|
193 |
|
194 | spec.LeftRightAssoc(5, LeftTernary, ['?'])
|
195 |
|
196 | # Right associative: a = b = 2 is a = (b = 2)
|
197 | spec.LeftRightAssoc(3, LeftAssign, [
|
198 | '=',
|
199 | '+=', '-=', '*=', '/=', '%=',
|
200 | '<<=', '>>=', '&=', '^=', '|='])
|
201 |
|
202 | spec.Left(COMMA_PREC, LeftComma, [','])
|
203 |
|
204 | # 0 precedence -- doesn't bind until )
|
205 | spec.Null(0, NullParen, ['(']) # for grouping
|
206 |
|
207 | # -1 precedence -- never used
|
208 | spec.Null(-1, NullConstant, ['name', 'number'])
|
209 | spec.Null(-1, tdop.NullError, [')', ']', ':', 'eof'])
|
210 |
|
211 | return spec
|
212 |
|
213 |
|
214 | def MakeParser(s):
|
215 | """Used by tests."""
|
216 | spec = MakeShellParserSpec()
|
217 | lexer = tdop.Tokenize(s)
|
218 | p = tdop.Parser(spec, lexer)
|
219 | return p
|
220 |
|
221 |
|
222 | def ParseShell(s, expected=None):
|
223 | """Used by tests."""
|
224 | p = MakeParser(s)
|
225 | tree = p.Parse()
|
226 |
|
227 | sexpr = repr(tree)
|
228 | if expected is not None:
|
229 | assert sexpr == expected, '%r != %r' % (sexpr, expected)
|
230 |
|
231 | #print('%-40s %s' % (s, sexpr))
|
232 | return tree
|
233 |
|
234 |
|
235 | def main(argv):
|
236 | try:
|
237 | s = argv[1]
|
238 | except IndexError:
|
239 | print('Usage: ./arith_parse.py EXPRESSION')
|
240 | else:
|
241 | try:
|
242 | tree = ParseShell(s)
|
243 | except tdop.ParseError as e:
|
244 | print('Error parsing %r: %s' % (s, e), file=sys.stderr)
|
245 | print(tree)
|
246 |
|
247 |
|
248 | if __name__ == '__main__':
|
249 | main(sys.argv)
|