| 1 | #!/usr/bin/env python
|
| 2 | from __future__ import print_function
|
| 3 | """
|
| 4 | arith_parse.py: Parse shell-like and C-like arithmetic.
|
| 5 | """
|
| 6 |
|
| 7 | import sys
|
| 8 |
|
| 9 | from asdl import tdop
|
| 10 | from asdl.tdop import CompositeNode
|
| 11 | from asdl import arith_ast
|
| 12 |
|
| 13 | op_id = arith_ast.op_id_e # TODO: Rename this back.
|
| 14 |
|
| 15 |
|
| 16 | #
|
| 17 | # Null Denotation -- token that takes nothing on the left
|
| 18 | #
|
| 19 |
|
| 20 | def NullConstant(p, token, bp):
|
| 21 | if token.type == 'number':
|
| 22 | return arith_ast.Const(token.val)
|
| 23 | # We have to wrap a string in some kind of variant.
|
| 24 | if token.type == 'name':
|
| 25 | return arith_ast.ArithVar(token.val)
|
| 26 |
|
| 27 | raise AssertionError(token.type)
|
| 28 |
|
| 29 |
|
| 30 | def NullParen(p, token, bp):
|
| 31 | """ Arithmetic grouping """
|
| 32 | r = p.ParseUntil(bp)
|
| 33 | p.Eat(')')
|
| 34 | return r
|
| 35 |
|
| 36 |
|
| 37 | def NullPrefixOp(p, token, bp):
|
| 38 | """Prefix operator.
|
| 39 |
|
| 40 | Low precedence: return, raise, etc.
|
| 41 | return x+y is return (x+y), not (return x) + y
|
| 42 |
|
| 43 | High precedence: logical negation, bitwise complement, etc.
|
| 44 | !x && y is (!x) && y, not !(x && y)
|
| 45 | """
|
| 46 | r = p.ParseUntil(bp)
|
| 47 | return CompositeNode(token, [r])
|
| 48 |
|
| 49 |
|
| 50 | def NullIncDec(p, token, bp):
|
| 51 | """ ++x or ++x[1] """
|
| 52 | right = p.ParseUntil(bp)
|
| 53 | if right.token.type not in ('name', 'get'):
|
| 54 | raise tdop.ParseError("Can't assign to %r (%s)" % (right, right.token))
|
| 55 | return CompositeNode(token, [right])
|
| 56 |
|
| 57 |
|
| 58 | #
|
| 59 | # Left Denotation -- token that takes an expression on the left
|
| 60 | #
|
| 61 |
|
| 62 | def LeftIncDec(p, token, left, rbp):
|
| 63 | """ For i++ and i--
|
| 64 | """
|
| 65 | if left.token.type not in ('name', 'get'):
|
| 66 | raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
|
| 67 | token.type = 'post' + token.type
|
| 68 | return CompositeNode(token, [left])
|
| 69 |
|
| 70 |
|
| 71 | def LeftIndex(p, token, left, unused_bp):
|
| 72 | """ index f[x+1] """
|
| 73 | # f[x] or f[x][y]
|
| 74 | if not isinstance(left, arith_ast.ArithVar):
|
| 75 | raise tdop.ParseError("%s can't be indexed" % left)
|
| 76 | index = p.ParseUntil(0)
|
| 77 | if p.AtToken(':'):
|
| 78 | p.Next()
|
| 79 | end = p.ParseUntil(0)
|
| 80 | else:
|
| 81 | end = None
|
| 82 |
|
| 83 | p.Eat(']')
|
| 84 |
|
| 85 | # TODO: If you see ], then
|
| 86 | # 1:4
|
| 87 | # 1:4:2
|
| 88 | # Both end and step are optional
|
| 89 |
|
| 90 | if end:
|
| 91 | return arith_ast.Slice(left, index, end, None)
|
| 92 | else:
|
| 93 | return arith_ast.Index(left, index)
|
| 94 |
|
| 95 |
|
| 96 | def LeftTernary(p, token, left, bp):
|
| 97 | """ e.g. a > 1 ? x : y """
|
| 98 | true_expr = p.ParseUntil(bp)
|
| 99 | p.Eat(':')
|
| 100 | false_expr = p.ParseUntil(bp)
|
| 101 | children = [left, true_expr, false_expr]
|
| 102 | return CompositeNode(token, children)
|
| 103 |
|
| 104 |
|
| 105 | def LeftBinaryOp(p, token, left, rbp):
|
| 106 | """ Normal binary operator like 1+2 or 2*3, etc. """
|
| 107 | if token.val == '+':
|
| 108 | op_id_ = op_id.Plus
|
| 109 | elif token.val == '-':
|
| 110 | op_id_ = op_id.Minus
|
| 111 | elif token.val == '*':
|
| 112 | op_id_ = op_id.Star
|
| 113 | else:
|
| 114 | raise AssertionError(token.val)
|
| 115 | return arith_ast.ArithBinary(op_id_, left, p.ParseUntil(rbp))
|
| 116 |
|
| 117 |
|
| 118 | def LeftAssign(p, token, left, rbp):
|
| 119 | """ Normal binary operator like 1+2 or 2*3, etc. """
|
| 120 | # x += 1, or a[i] += 1
|
| 121 | if left.token.type not in ('name', 'get'):
|
| 122 | raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
|
| 123 | return CompositeNode(token, [left, p.ParseUntil(rbp)])
|
| 124 |
|
| 125 |
|
| 126 | def LeftComma(p, token, left, rbp):
|
| 127 | """ foo, bar, baz
|
| 128 |
|
| 129 | Could be sequencing operator, or tuple without parens
|
| 130 | """
|
| 131 | r = p.ParseUntil(rbp)
|
| 132 | if left.token.type == ',': # Keep adding more children
|
| 133 | left.children.append(r)
|
| 134 | return left
|
| 135 | children = [left, r]
|
| 136 | return CompositeNode(token, children)
|
| 137 |
|
| 138 |
|
| 139 | # For overloading of , inside function calls
|
| 140 | COMMA_PREC = 1
|
| 141 |
|
| 142 | def LeftFuncCall(p, token, left, unused_bp):
|
| 143 | """ Function call f(a, b). """
|
| 144 | args = []
|
| 145 | # f(x) or f[i](x)
|
| 146 | if not isinstance(left, arith_ast.ArithVar):
|
| 147 | raise tdop.ParseError("%s can't be called" % left)
|
| 148 | func_name = left.name # get a string
|
| 149 |
|
| 150 | while not p.AtToken(')'):
|
| 151 | # We don't want to grab the comma, e.g. it is NOT a sequence operator. So
|
| 152 | # set the precedence to 5.
|
| 153 | args.append(p.ParseUntil(COMMA_PREC))
|
| 154 | if p.AtToken(','):
|
| 155 | p.Next()
|
| 156 | p.Eat(")")
|
| 157 | return arith_ast.FuncCall(func_name, args)
|
| 158 |
|
| 159 |
|
| 160 | def MakeShellParserSpec():
|
| 161 | """
|
| 162 | Create a parser.
|
| 163 |
|
| 164 | Compare the code below with this table of C operator precedence:
|
| 165 | http://en.cppreference.com/w/c/language/operator_precedence
|
| 166 | """
|
| 167 | spec = tdop.ParserSpec()
|
| 168 |
|
| 169 | spec.Left(31, LeftIncDec, ['++', '--'])
|
| 170 | spec.Left(31, LeftFuncCall, ['('])
|
| 171 | spec.Left(31, LeftIndex, ['['])
|
| 172 |
|
| 173 | # 29 -- binds to everything except function call, indexing, postfix ops
|
| 174 | spec.Null(29, NullIncDec, ['++', '--'])
|
| 175 | spec.Null(29, NullPrefixOp, ['+', '!', '~', '-'])
|
| 176 |
|
| 177 | # Right associative: 2 ** 3 ** 2 == 2 ** (3 ** 2)
|
| 178 | spec.LeftRightAssoc(27, LeftBinaryOp, ['**'])
|
| 179 | spec.Left(25, LeftBinaryOp, ['*', '/', '%'])
|
| 180 |
|
| 181 | spec.Left(23, LeftBinaryOp, ['+', '-'])
|
| 182 | spec.Left(21, LeftBinaryOp, ['<<', '>>'])
|
| 183 | spec.Left(19, LeftBinaryOp, ['<', '>', '<=', '>='])
|
| 184 | spec.Left(17, LeftBinaryOp, ['!=', '=='])
|
| 185 |
|
| 186 | spec.Left(15, LeftBinaryOp, ['&'])
|
| 187 | spec.Left(13, LeftBinaryOp, ['^'])
|
| 188 | spec.Left(11, LeftBinaryOp, ['|'])
|
| 189 | spec.Left(9, LeftBinaryOp, ['&&'])
|
| 190 | spec.Left(7, LeftBinaryOp, ['||'])
|
| 191 |
|
| 192 | spec.LeftRightAssoc(5, LeftTernary, ['?'])
|
| 193 |
|
| 194 | # Right associative: a = b = 2 is a = (b = 2)
|
| 195 | spec.LeftRightAssoc(3, LeftAssign, [
|
| 196 | '=',
|
| 197 | '+=', '-=', '*=', '/=', '%=',
|
| 198 | '<<=', '>>=', '&=', '^=', '|='])
|
| 199 |
|
| 200 | spec.Left(COMMA_PREC, LeftComma, [','])
|
| 201 |
|
| 202 | # 0 precedence -- doesn't bind until )
|
| 203 | spec.Null(0, NullParen, ['(']) # for grouping
|
| 204 |
|
| 205 | # -1 precedence -- never used
|
| 206 | spec.Null(-1, NullConstant, ['name', 'number'])
|
| 207 | spec.Null(-1, tdop.NullError, [')', ']', ':', 'eof'])
|
| 208 |
|
| 209 | return spec
|
| 210 |
|
| 211 |
|
| 212 | def MakeParser(s):
|
| 213 | """Used by tests."""
|
| 214 | spec = MakeShellParserSpec()
|
| 215 | lexer = tdop.Tokenize(s)
|
| 216 | p = tdop.Parser(spec, lexer)
|
| 217 | return p
|
| 218 |
|
| 219 |
|
| 220 | def ParseShell(s, expected=None):
|
| 221 | """Used by tests."""
|
| 222 | p = MakeParser(s)
|
| 223 | tree = p.Parse()
|
| 224 |
|
| 225 | sexpr = repr(tree)
|
| 226 | if expected is not None:
|
| 227 | assert sexpr == expected, '%r != %r' % (sexpr, expected)
|
| 228 |
|
| 229 | #print('%-40s %s' % (s, sexpr))
|
| 230 | return tree
|
| 231 |
|
| 232 |
|
| 233 | def main(argv):
|
| 234 | try:
|
| 235 | s = argv[1]
|
| 236 | except IndexError:
|
| 237 | print('Usage: ./arith_parse.py EXPRESSION')
|
| 238 | else:
|
| 239 | try:
|
| 240 | tree = ParseShell(s)
|
| 241 | except tdop.ParseError as e:
|
| 242 | print('Error parsing %r: %s' % (s, e), file=sys.stderr)
|
| 243 |
|
| 244 |
|
| 245 | if __name__ == '__main__':
|
| 246 | main(sys.argv)
|