| 1 | """expr_to_ast.py."""
 | 
| 2 | from __future__ import print_function
 | 
| 3 | 
 | 
| 4 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
 | 
| 5 | from _devbuild.gen.syntax_asdl import (
 | 
| 6 |     Token,
 | 
| 7 |     SimpleVarSub,
 | 
| 8 |     loc,
 | 
| 9 |     loc_t,
 | 
| 10 |     DoubleQuoted,
 | 
| 11 |     SingleQuoted,
 | 
| 12 |     BracedVarSub,
 | 
| 13 |     CommandSub,
 | 
| 14 |     ShArrayLiteral,
 | 
| 15 |     command,
 | 
| 16 |     expr,
 | 
| 17 |     expr_e,
 | 
| 18 |     expr_t,
 | 
| 19 |     expr_context_e,
 | 
| 20 |     re,
 | 
| 21 |     re_t,
 | 
| 22 |     re_repeat,
 | 
| 23 |     re_repeat_t,
 | 
| 24 |     class_literal_term,
 | 
| 25 |     class_literal_term_t,
 | 
| 26 |     PosixClass,
 | 
| 27 |     PerlClass,
 | 
| 28 |     NameType,
 | 
| 29 |     y_lhs_t,
 | 
| 30 |     Comprehension,
 | 
| 31 |     Subscript,
 | 
| 32 |     Attribute,
 | 
| 33 |     proc_sig,
 | 
| 34 |     proc_sig_t,
 | 
| 35 |     Param,
 | 
| 36 |     RestParam,
 | 
| 37 |     ParamGroup,
 | 
| 38 |     NamedArg,
 | 
| 39 |     ArgList,
 | 
| 40 |     pat,
 | 
| 41 |     pat_t,
 | 
| 42 |     TypeExpr,
 | 
| 43 |     Func,
 | 
| 44 |     Eggex,
 | 
| 45 |     EggexFlag,
 | 
| 46 |     CharCode,
 | 
| 47 |     CharRange,
 | 
| 48 | )
 | 
| 49 | from _devbuild.gen.value_asdl import value, value_t
 | 
| 50 | from _devbuild.gen import grammar_nt
 | 
| 51 | from core.error import p_die
 | 
| 52 | from data_lang import j8
 | 
| 53 | from frontend import consts
 | 
| 54 | from frontend import lexer
 | 
| 55 | from frontend import location
 | 
| 56 | from mycpp import mops
 | 
| 57 | from mycpp import mylib
 | 
| 58 | from mycpp.mylib import log, tagswitch
 | 
| 59 | from osh import word_compile
 | 
| 60 | from ysh import expr_parse
 | 
| 61 | from ysh import regex_translate
 | 
| 62 | 
 | 
| 63 | from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
 | 
| 64 | if TYPE_CHECKING:
 | 
| 65 |     from pgen2.grammar import Grammar
 | 
| 66 |     from pgen2.pnode import PNode
 | 
| 67 | 
 | 
| 68 | _ = log
 | 
| 69 | 
 | 
| 70 | PERL_CLASSES = {
 | 
| 71 |     'd': 'd',
 | 
| 72 |     'w': 'w',
 | 
| 73 |     'word': 'w',
 | 
| 74 |     's': 's',
 | 
| 75 | }
 | 
| 76 | # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
 | 
| 77 | POSIX_CLASSES = [
 | 
| 78 |     'alnum',
 | 
| 79 |     'cntrl',
 | 
| 80 |     'lower',
 | 
| 81 |     'space',
 | 
| 82 |     'alpha',
 | 
| 83 |     'digit',
 | 
| 84 |     'print',
 | 
| 85 |     'upper',
 | 
| 86 |     'blank',
 | 
| 87 |     'graph',
 | 
| 88 |     'punct',
 | 
| 89 |     'xdigit',
 | 
| 90 | ]
 | 
| 91 | # NOTE: There are also things like \p{Greek} that we could put in the
 | 
| 92 | # "non-sigil" namespace.
 | 
| 93 | 
 | 
| 94 | RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
 | 
| 95 | 
 | 
| 96 | POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
 | 
| 97 | 
 | 
| 98 | # Copied from pgen2/token.py to avoid dependency.
 | 
| 99 | NT_OFFSET = 256
 | 
| 100 | 
 | 
| 101 | if mylib.PYTHON:
 | 
| 102 | 
 | 
| 103 |     def MakeGrammarNames(ysh_grammar):
 | 
| 104 |         # type: (Grammar) -> Dict[int, str]
 | 
| 105 | 
 | 
| 106 |         # TODO: Break this dependency
 | 
| 107 |         from frontend import lexer_def
 | 
| 108 | 
 | 
| 109 |         names = {}
 | 
| 110 | 
 | 
| 111 |         for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
 | 
| 112 |             # Hm some are out of range
 | 
| 113 |             #assert k < 256, (k, id_name)
 | 
| 114 | 
 | 
| 115 |             # TODO: Some tokens have values greater than NT_OFFSET
 | 
| 116 |             if k < NT_OFFSET:
 | 
| 117 |                 names[k] = id_name
 | 
| 118 | 
 | 
| 119 |         for k, v in ysh_grammar.number2symbol.items():
 | 
| 120 |             assert k >= NT_OFFSET, (k, v)
 | 
| 121 |             names[k] = v
 | 
| 122 | 
 | 
| 123 |         return names
 | 
| 124 | 
 | 
| 125 | 
 | 
| 126 | class Transformer(object):
 | 
| 127 |     """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
 | 
| 128 | 
 | 
| 129 |     pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
 | 
| 130 |     so this "transformer" is the equivalent.
 | 
| 131 | 
 | 
| 132 |     Files to refer to when modifying this function:
 | 
| 133 | 
 | 
| 134 |       ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
 | 
| 135 |       frontend/syntax.asdl   (generates _devbuild/gen/syntax_asdl.py)
 | 
| 136 | 
 | 
| 137 |     Related examples:
 | 
| 138 | 
 | 
| 139 |       opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
 | 
| 140 |       Python-2.7.13/Python/ast.c   (the "real" CPython version, ~3600 lines)
 | 
| 141 | 
 | 
| 142 |     Other:
 | 
| 143 |       frontend/parse_lib.py  (turn on print_parse_tree)
 | 
| 144 | 
 | 
| 145 |     Public methods:
 | 
| 146 |       Expr, VarDecl
 | 
| 147 |       atom, trailer, etc. are private, named after productions in grammar.pgen2.
 | 
| 148 |     """
 | 
| 149 | 
 | 
| 150 |     def __init__(self, gr):
 | 
| 151 |         # type: (Grammar) -> None
 | 
| 152 |         self.number2symbol = gr.number2symbol
 | 
| 153 |         if mylib.PYTHON:
 | 
| 154 |             names = MakeGrammarNames(gr)
 | 
| 155 |             # print raw nodes
 | 
| 156 |             self.p_printer = expr_parse.ParseTreePrinter(names)
 | 
| 157 | 
 | 
| 158 |     def _LeftAssoc(self, p_node):
 | 
| 159 |         # type: (PNode) -> expr_t
 | 
| 160 |         """For an associative binary operation.
 | 
| 161 | 
 | 
| 162 |         Examples:
 | 
| 163 |           xor_expr: and_expr ('xor' and_expr)*
 | 
| 164 |           term: factor (('*'|'/'|'%'|'div') factor)*
 | 
| 165 | 
 | 
| 166 |         3 - 1 - 2 must be grouped as ((3 - 1) - 2).
 | 
| 167 |         """
 | 
| 168 |         # Note: Compare the iteractive com_binary() method in
 | 
| 169 |         # opy/compiler2/transformer.py.
 | 
| 170 | 
 | 
| 171 |         # Examples:
 | 
| 172 |         # - The PNode for '3 - 1' will have 3 children
 | 
| 173 |         # - The PNode for '3 - 1 - 2' will have 5 children
 | 
| 174 | 
 | 
| 175 |         #self.p_printer.Print(p_node)
 | 
| 176 | 
 | 
| 177 |         i = 1  # index of the operator
 | 
| 178 |         n = p_node.NumChildren()
 | 
| 179 | 
 | 
| 180 |         left = self.Expr(p_node.GetChild(0))
 | 
| 181 |         while i < n:
 | 
| 182 |             op = p_node.GetChild(i)
 | 
| 183 |             right = self.Expr(p_node.GetChild(i + 1))
 | 
| 184 | 
 | 
| 185 |             # create a new left node
 | 
| 186 |             left = expr.Binary(op.tok, left, right)
 | 
| 187 |             i += 2
 | 
| 188 | 
 | 
| 189 |         return left
 | 
| 190 | 
 | 
| 191 |     def _Trailer(self, base, p_trailer):
 | 
| 192 |         # type: (expr_t, PNode) -> expr_t
 | 
| 193 |         """
 | 
| 194 |         trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
 | 
| 195 |                  | '.' NAME | '->' NAME | '::' NAME
 | 
| 196 |                  )
 | 
| 197 |         """
 | 
| 198 |         tok0 = p_trailer.GetChild(0).tok
 | 
| 199 |         typ0 = p_trailer.GetChild(0).typ
 | 
| 200 | 
 | 
| 201 |         if typ0 == Id.Op_LParen:
 | 
| 202 |             lparen = tok0
 | 
| 203 |             rparen = p_trailer.GetChild(-1).tok
 | 
| 204 |             arglist = ArgList(lparen, [], None, [], None, None, rparen)
 | 
| 205 |             if p_trailer.NumChildren() == 2:  # ()
 | 
| 206 |                 return expr.FuncCall(base, arglist)
 | 
| 207 | 
 | 
| 208 |             p = p_trailer.GetChild(1)  # the X in ( X )
 | 
| 209 |             assert p.typ == grammar_nt.arglist  # f(x, y)
 | 
| 210 |             self._ArgList(p, arglist)
 | 
| 211 |             return expr.FuncCall(base, arglist)
 | 
| 212 | 
 | 
| 213 |         if typ0 == Id.Op_LBracket:
 | 
| 214 |             p_args = p_trailer.GetChild(1)
 | 
| 215 |             assert p_args.typ == grammar_nt.subscriptlist
 | 
| 216 |             n = p_args.NumChildren()
 | 
| 217 |             if n > 1:
 | 
| 218 |                 p_die("Only 1 subscript is accepted", p_args.GetChild(1).tok)
 | 
| 219 | 
 | 
| 220 |             a = p_args.GetChild(0)
 | 
| 221 |             return Subscript(tok0, base, self._Subscript(a))
 | 
| 222 | 
 | 
| 223 |         if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
 | 
| 224 |             attr = p_trailer.GetChild(1).tok  # will be Id.Expr_Name
 | 
| 225 |             return Attribute(base, tok0, attr, lexer.TokenVal(attr),
 | 
| 226 |                              expr_context_e.Store)
 | 
| 227 | 
 | 
| 228 |         raise AssertionError(typ0)
 | 
| 229 | 
 | 
| 230 |     def _DictPair(self, p_node):
 | 
| 231 |         # type: (PNode) -> Tuple[expr_t, expr_t]
 | 
| 232 |         """
 | 
| 233 |         dict_pair: ( Expr_Name [':' test]
 | 
| 234 |                    | '[' testlist ']' ':' test )
 | 
| 235 |                    | sq_string ':' test 
 | 
| 236 |                    | dq_string ':' test )
 | 
| 237 |         """
 | 
| 238 |         assert p_node.typ == grammar_nt.dict_pair
 | 
| 239 | 
 | 
| 240 |         typ = p_node.GetChild(0).typ
 | 
| 241 | 
 | 
| 242 |         if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
 | 
| 243 |             key = self.Expr(p_node.GetChild(0))  # type: expr_t
 | 
| 244 |             val = self.Expr(p_node.GetChild(2))
 | 
| 245 |             return key, val
 | 
| 246 | 
 | 
| 247 |         tok0 = p_node.GetChild(0).tok
 | 
| 248 |         id_ = tok0.id
 | 
| 249 | 
 | 
| 250 |         if id_ == Id.Expr_Name:
 | 
| 251 |             key_str = value.Str(lexer.TokenVal(tok0))
 | 
| 252 |             key = expr.Const(tok0, key_str)
 | 
| 253 |             if p_node.NumChildren() >= 3:
 | 
| 254 |                 val = self.Expr(p_node.GetChild(2))
 | 
| 255 |             else:
 | 
| 256 |                 val = expr.Implicit
 | 
| 257 | 
 | 
| 258 |         if id_ == Id.Op_LBracket:  # {[x+y]: 'val'}
 | 
| 259 |             key = self.Expr(p_node.GetChild(1))
 | 
| 260 |             val = self.Expr(p_node.GetChild(4))
 | 
| 261 |             return key, val
 | 
| 262 | 
 | 
| 263 |         return key, val
 | 
| 264 | 
 | 
| 265 |     def _Dict(self, parent, p_node):
 | 
| 266 |         # type: (PNode, PNode) -> expr.Dict
 | 
| 267 |         """
 | 
| 268 |         dict: dict_pair (comma_newline dict_pair)* [comma_newline]
 | 
| 269 |         """
 | 
| 270 |         if p_node.typ == Id.Op_RBrace:  # {}
 | 
| 271 |             return expr.Dict(parent.tok, [], [])
 | 
| 272 | 
 | 
| 273 |         assert p_node.typ == grammar_nt.dict
 | 
| 274 | 
 | 
| 275 |         keys = []  # type: List[expr_t]
 | 
| 276 |         values = []  # type: List[expr_t]
 | 
| 277 | 
 | 
| 278 |         n = p_node.NumChildren()
 | 
| 279 |         for i in xrange(0, n, 2):
 | 
| 280 |             key, val = self._DictPair(p_node.GetChild(i))
 | 
| 281 |             keys.append(key)
 | 
| 282 |             values.append(val)
 | 
| 283 | 
 | 
| 284 |         return expr.Dict(parent.tok, keys, values)
 | 
| 285 | 
 | 
| 286 |     def _Tuple(self, parent):
 | 
| 287 |         # type: (PNode) -> expr_t
 | 
| 288 | 
 | 
| 289 |         n = parent.NumChildren()
 | 
| 290 | 
 | 
| 291 |         # (x) -- not a tuple
 | 
| 292 |         if n == 1:
 | 
| 293 |             return self.Expr(parent.GetChild(0))
 | 
| 294 | 
 | 
| 295 |         # x, and (x,) aren't allowed
 | 
| 296 |         if n == 2:
 | 
| 297 |             p_die('Invalid trailing comma', parent.GetChild(1).tok)
 | 
| 298 | 
 | 
| 299 |         elts = []  # type: List[expr_t]
 | 
| 300 |         for i in xrange(0, n, 2):  # skip commas
 | 
| 301 |             p_node = parent.GetChild(i)
 | 
| 302 |             elts.append(self.Expr(p_node))
 | 
| 303 | 
 | 
| 304 |         return expr.Tuple(parent.tok, elts,
 | 
| 305 |                           expr_context_e.Store)  # unused expr_context_e
 | 
| 306 | 
 | 
| 307 |     def _TestlistComp(self, parent, p_node, id0):
 | 
| 308 |         # type: (PNode, PNode, Id_t) -> expr_t
 | 
| 309 |         """
 | 
| 310 |         testlist_comp:
 | 
| 311 |           (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 | 
| 312 |         """
 | 
| 313 |         assert p_node.typ == grammar_nt.testlist_comp
 | 
| 314 | 
 | 
| 315 |         n = p_node.NumChildren()
 | 
| 316 |         if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
 | 
| 317 |             elt = self.Expr(p_node.GetChild(0))
 | 
| 318 |             comp = self._CompFor(p_node.GetChild(1))
 | 
| 319 |             if id0 == Id.Op_LParen:  # (x+1 for x in y)
 | 
| 320 |                 return expr.GeneratorExp(elt, [comp])
 | 
| 321 |             if id0 == Id.Op_LBracket:  # [x+1 for x in y]
 | 
| 322 |                 return expr.ListComp(parent.tok, elt, [comp])
 | 
| 323 |             raise AssertionError()
 | 
| 324 | 
 | 
| 325 |         if id0 == Id.Op_LParen:
 | 
| 326 |             # Parenthesized expression like (x+1) or (x)
 | 
| 327 |             if n == 1:
 | 
| 328 |                 return self.Expr(p_node.GetChild(0))
 | 
| 329 | 
 | 
| 330 |             # Tuples (1,)  (1, 2)  etc. - TODO: should be a list literal?
 | 
| 331 |             if p_node.GetChild(1).typ == Id.Arith_Comma:
 | 
| 332 |                 return self._Tuple(p_node)
 | 
| 333 | 
 | 
| 334 |             raise AssertionError()
 | 
| 335 | 
 | 
| 336 |         if id0 == Id.Op_LBracket:  # List [1,2,3]
 | 
| 337 |             elts = []  # type: List[expr_t]
 | 
| 338 |             for i in xrange(0, n, 2):  # skip commas
 | 
| 339 |                 elts.append(self.Expr(p_node.GetChild(i)))
 | 
| 340 | 
 | 
| 341 |             return expr.List(parent.tok, elts,
 | 
| 342 |                              expr_context_e.Store)  # unused expr_context_e
 | 
| 343 | 
 | 
| 344 |         raise AssertionError(Id_str(id0))
 | 
| 345 | 
 | 
| 346 |     def _Atom(self, parent):
 | 
| 347 |         # type: (PNode) -> expr_t
 | 
| 348 |         """Handle alternatives of 'atom' where there's more than one child."""
 | 
| 349 | 
 | 
| 350 |         tok = parent.GetChild(0).tok
 | 
| 351 |         id_ = tok.id
 | 
| 352 |         n = parent.NumChildren()
 | 
| 353 | 
 | 
| 354 |         if id_ == Id.Op_LParen:
 | 
| 355 |             # atom: '(' [yield_expr|testlist_comp] ')' | ...
 | 
| 356 |             if n == 2:  # () is a tuple
 | 
| 357 |                 assert (
 | 
| 358 |                     parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
 | 
| 359 |                 return expr.Tuple(tok, [], expr_context_e.Store)
 | 
| 360 | 
 | 
| 361 |             return self._TestlistComp(parent, parent.GetChild(1), id_)
 | 
| 362 | 
 | 
| 363 |         if id_ == Id.Op_LBracket:
 | 
| 364 |             # atom: ... | '[' [testlist_comp] ']' | ...
 | 
| 365 | 
 | 
| 366 |             if n == 2:  # []
 | 
| 367 |                 assert (parent.GetChild(1).typ == Id.Op_RBracket
 | 
| 368 |                         ), parent.GetChild(1)
 | 
| 369 |                 return expr.List(tok, [],
 | 
| 370 |                                  expr_context_e.Store)  # unused expr_context_e
 | 
| 371 | 
 | 
| 372 |             return self._TestlistComp(parent, parent.GetChild(1), id_)
 | 
| 373 | 
 | 
| 374 |         if id_ == Id.Left_CaretBracket:  # ^[42 + x]
 | 
| 375 |             child = self.Expr(parent.GetChild(1))
 | 
| 376 |             return expr.Literal(child)
 | 
| 377 | 
 | 
| 378 |         if id_ == Id.Op_LBrace:
 | 
| 379 |             # atom: ... | '{' [Op_Newline] [dict] '}'
 | 
| 380 |             i = 1
 | 
| 381 |             if parent.GetChild(i).typ == Id.Op_Newline:
 | 
| 382 |                 i += 1
 | 
| 383 |             return self._Dict(parent, parent.GetChild(i))
 | 
| 384 | 
 | 
| 385 |         if id_ == Id.Arith_Amp:
 | 
| 386 |             n = parent.NumChildren()
 | 
| 387 |             if n >= 3:
 | 
| 388 |                 p_die("Places in containers not implemented yet",
 | 
| 389 |                       parent.GetChild(2).tok)
 | 
| 390 | 
 | 
| 391 |             name_tok = parent.GetChild(1).tok
 | 
| 392 |             return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
 | 
| 393 | 
 | 
| 394 |         if id_ == Id.Expr_Func:
 | 
| 395 |             # STUB.  This should really be a Func, not Lambda.
 | 
| 396 |             return expr.Lambda([], expr.Implicit)
 | 
| 397 | 
 | 
| 398 |         # 100 M
 | 
| 399 |         # Ignoring the suffix for now
 | 
| 400 |         if id_ == Id.Expr_DecInt:
 | 
| 401 |             assert n > 1
 | 
| 402 |             p_die("Units suffix not implemented", parent.GetChild(1).tok)
 | 
| 403 |             #return self.Expr(parent.GetChild(0))
 | 
| 404 | 
 | 
| 405 |         # 100.5 M
 | 
| 406 |         # Ignoring the suffix for now
 | 
| 407 |         if id_ == Id.Expr_Float:
 | 
| 408 |             assert n > 1
 | 
| 409 |             p_die("unix suffix implemented", parent.GetChild(1).tok)
 | 
| 410 |             #return self.Expr(parent.GetChild(0))
 | 
| 411 | 
 | 
| 412 |         raise AssertionError(Id_str(id_))
 | 
| 413 | 
 | 
| 414 |     def _NameType(self, p_node):
 | 
| 415 |         # type: (PNode) -> NameType
 | 
| 416 |         """ name_type: Expr_Name [':'] [type_expr] """
 | 
| 417 |         name_tok = p_node.GetChild(0).tok
 | 
| 418 |         typ = None  # type: Optional[TypeExpr]
 | 
| 419 | 
 | 
| 420 |         n = p_node.NumChildren()
 | 
| 421 |         if n == 2:
 | 
| 422 |             typ = self._TypeExpr(p_node.GetChild(1))
 | 
| 423 |         if n == 3:
 | 
| 424 |             typ = self._TypeExpr(p_node.GetChild(2))
 | 
| 425 | 
 | 
| 426 |         return NameType(name_tok, lexer.TokenVal(name_tok), typ)
 | 
| 427 | 
 | 
| 428 |     def _NameTypeList(self, p_node):
 | 
| 429 |         # type: (PNode) -> List[NameType]
 | 
| 430 |         """ name_type_list: name_type (',' name_type)* """
 | 
| 431 |         assert p_node.typ == grammar_nt.name_type_list
 | 
| 432 |         results = []  # type: List[NameType]
 | 
| 433 | 
 | 
| 434 |         n = p_node.NumChildren()
 | 
| 435 |         for i in xrange(0, n, 2):  # was children[::2]
 | 
| 436 |             results.append(self._NameType(p_node.GetChild(i)))
 | 
| 437 |         return results
 | 
| 438 | 
 | 
| 439 |     def _CompFor(self, p_node):
 | 
| 440 |         # type: (PNode) -> Comprehension
 | 
| 441 |         """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
 | 
| 442 |         lhs = self._NameTypeList(p_node.GetChild(1))
 | 
| 443 |         iterable = self.Expr(p_node.GetChild(3))
 | 
| 444 | 
 | 
| 445 |         if p_node.NumChildren() >= 6:
 | 
| 446 |             cond = self.Expr(p_node.GetChild(5))
 | 
| 447 |         else:
 | 
| 448 |             cond = None
 | 
| 449 | 
 | 
| 450 |         return Comprehension(lhs, iterable, cond)
 | 
| 451 | 
 | 
| 452 |     def _CompareChain(self, parent):
 | 
| 453 |         # type: (PNode) -> expr_t
 | 
| 454 |         """comparison: expr (comp_op expr)*"""
 | 
| 455 |         cmp_ops = []  # type: List[Token]
 | 
| 456 |         comparators = []  # type: List[expr_t]
 | 
| 457 |         left = self.Expr(parent.GetChild(0))
 | 
| 458 | 
 | 
| 459 |         i = 1
 | 
| 460 |         n = parent.NumChildren()
 | 
| 461 |         while i < n:
 | 
| 462 |             p = parent.GetChild(i)
 | 
| 463 |             op = p.GetChild(0).tok
 | 
| 464 |             if p.NumChildren() == 2:
 | 
| 465 |                 # Blame the first token, and change its type
 | 
| 466 |                 if op.id == Id.Expr_Not:  # not in
 | 
| 467 |                     op.id = Id.Node_NotIn
 | 
| 468 |                 elif op.id == Id.Expr_Is:  # is not
 | 
| 469 |                     op.id = Id.Node_IsNot
 | 
| 470 |                 else:
 | 
| 471 |                     raise AssertionError()
 | 
| 472 |             else:
 | 
| 473 |                 # is, <, ==, etc.
 | 
| 474 |                 pass
 | 
| 475 | 
 | 
| 476 |             cmp_ops.append(op)
 | 
| 477 |             i += 1
 | 
| 478 |             comparators.append(self.Expr(parent.GetChild(i)))
 | 
| 479 |             i += 1
 | 
| 480 |         return expr.Compare(left, cmp_ops, comparators)
 | 
| 481 | 
 | 
| 482 |     def _Subscript(self, parent):
 | 
| 483 |         # type: (PNode) -> expr_t
 | 
| 484 |         """subscript: expr | [expr] ':' [expr]"""
 | 
| 485 |         typ0 = parent.GetChild(0).typ
 | 
| 486 | 
 | 
| 487 |         n = parent.NumChildren()
 | 
| 488 | 
 | 
| 489 |         if typ0 == grammar_nt.expr:
 | 
| 490 |             if n == 3:  # a[1:2]
 | 
| 491 |                 lower = self.Expr(parent.GetChild(0))
 | 
| 492 |                 upper = self.Expr(parent.GetChild(2))
 | 
| 493 |             elif n == 2:  # a[1:]
 | 
| 494 |                 lower = self.Expr(parent.GetChild(0))
 | 
| 495 |                 upper = None
 | 
| 496 |             else:  # a[1]
 | 
| 497 |                 return self.Expr(parent.GetChild(0))
 | 
| 498 |         else:
 | 
| 499 |             assert typ0 == Id.Arith_Colon
 | 
| 500 |             lower = None
 | 
| 501 |             if n == 1:  # a[:]
 | 
| 502 |                 upper = None
 | 
| 503 |             else:  # a[:3]
 | 
| 504 |                 upper = self.Expr(parent.GetChild(1))
 | 
| 505 | 
 | 
| 506 |         return expr.Slice(lower, parent.GetChild(0).tok, upper)
 | 
| 507 | 
 | 
| 508 |     def Expr(self, pnode):
 | 
| 509 |         # type: (PNode) -> expr_t
 | 
| 510 |         """Transform expressions (as opposed to statements)"""
 | 
| 511 |         typ = pnode.typ
 | 
| 512 | 
 | 
| 513 |         #
 | 
| 514 |         # YSH Entry Points / Additions
 | 
| 515 |         #
 | 
| 516 | 
 | 
| 517 |         if typ == grammar_nt.ysh_expr:  # for if/while
 | 
| 518 |             # ysh_expr: '(' testlist ')'
 | 
| 519 |             return self.Expr(pnode.GetChild(1))
 | 
| 520 | 
 | 
| 521 |         if typ == grammar_nt.command_expr:
 | 
| 522 |             # return_expr: testlist end_stmt
 | 
| 523 |             return self.Expr(pnode.GetChild(0))
 | 
| 524 | 
 | 
| 525 |         #
 | 
| 526 |         # Python-like Expressions / Operators
 | 
| 527 |         #
 | 
| 528 | 
 | 
| 529 |         if typ == grammar_nt.atom:
 | 
| 530 |             if pnode.NumChildren() == 1:
 | 
| 531 |                 return self.Expr(pnode.GetChild(0))
 | 
| 532 |             return self._Atom(pnode)
 | 
| 533 | 
 | 
| 534 |         if typ == grammar_nt.testlist:
 | 
| 535 |             # testlist: test (',' test)* [',']
 | 
| 536 |             return self._Tuple(pnode)
 | 
| 537 | 
 | 
| 538 |         if typ == grammar_nt.test:
 | 
| 539 |             # test: or_test ['if' or_test 'else' test] | lambdef
 | 
| 540 |             if pnode.NumChildren() == 1:
 | 
| 541 |                 return self.Expr(pnode.GetChild(0))
 | 
| 542 | 
 | 
| 543 |             # TODO: Handle lambdef
 | 
| 544 | 
 | 
| 545 |             test = self.Expr(pnode.GetChild(2))
 | 
| 546 |             body = self.Expr(pnode.GetChild(0))
 | 
| 547 |             orelse = self.Expr(pnode.GetChild(4))
 | 
| 548 |             return expr.IfExp(test, body, orelse)
 | 
| 549 | 
 | 
| 550 |         if typ == grammar_nt.lambdef:
 | 
| 551 |             # lambdef: '|' [name_type_list] '|' test
 | 
| 552 | 
 | 
| 553 |             n = pnode.NumChildren()
 | 
| 554 |             if n == 4:
 | 
| 555 |                 params = self._NameTypeList(pnode.GetChild(1))
 | 
| 556 |             else:
 | 
| 557 |                 params = []
 | 
| 558 | 
 | 
| 559 |             body = self.Expr(pnode.GetChild(n - 1))
 | 
| 560 |             return expr.Lambda(params, body)
 | 
| 561 | 
 | 
| 562 |         #
 | 
| 563 |         # Operators with Precedence
 | 
| 564 |         #
 | 
| 565 | 
 | 
| 566 |         if typ == grammar_nt.or_test:
 | 
| 567 |             # or_test: and_test ('or' and_test)*
 | 
| 568 |             return self._LeftAssoc(pnode)
 | 
| 569 | 
 | 
| 570 |         if typ == grammar_nt.and_test:
 | 
| 571 |             # and_test: not_test ('and' not_test)*
 | 
| 572 |             return self._LeftAssoc(pnode)
 | 
| 573 | 
 | 
| 574 |         if typ == grammar_nt.not_test:
 | 
| 575 |             # not_test: 'not' not_test | comparison
 | 
| 576 |             if pnode.NumChildren() == 1:
 | 
| 577 |                 return self.Expr(pnode.GetChild(0))
 | 
| 578 | 
 | 
| 579 |             op_tok = pnode.GetChild(0).tok  # not
 | 
| 580 |             return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
 | 
| 581 | 
 | 
| 582 |         elif typ == grammar_nt.comparison:
 | 
| 583 |             if pnode.NumChildren() == 1:
 | 
| 584 |                 return self.Expr(pnode.GetChild(0))
 | 
| 585 | 
 | 
| 586 |             return self._CompareChain(pnode)
 | 
| 587 | 
 | 
| 588 |         elif typ == grammar_nt.range_expr:
 | 
| 589 |             n = pnode.NumChildren()
 | 
| 590 |             if n == 1:
 | 
| 591 |                 return self.Expr(pnode.GetChild(0))
 | 
| 592 | 
 | 
| 593 |             if n == 3:
 | 
| 594 |                 return expr.Range(self.Expr(pnode.GetChild(0)),
 | 
| 595 |                                   pnode.GetChild(1).tok,
 | 
| 596 |                                   self.Expr(pnode.GetChild(2)))
 | 
| 597 | 
 | 
| 598 |             raise AssertionError(n)
 | 
| 599 | 
 | 
| 600 |         elif typ == grammar_nt.expr:
 | 
| 601 |             # expr: xor_expr ('|' xor_expr)*
 | 
| 602 |             return self._LeftAssoc(pnode)
 | 
| 603 | 
 | 
| 604 |         if typ == grammar_nt.xor_expr:
 | 
| 605 |             # xor_expr: and_expr ('xor' and_expr)*
 | 
| 606 |             return self._LeftAssoc(pnode)
 | 
| 607 | 
 | 
| 608 |         if typ == grammar_nt.and_expr:  # a & b
 | 
| 609 |             # and_expr: shift_expr ('&' shift_expr)*
 | 
| 610 |             return self._LeftAssoc(pnode)
 | 
| 611 | 
 | 
| 612 |         elif typ == grammar_nt.shift_expr:
 | 
| 613 |             # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 | 
| 614 |             return self._LeftAssoc(pnode)
 | 
| 615 | 
 | 
| 616 |         elif typ == grammar_nt.arith_expr:
 | 
| 617 |             # arith_expr: term (('+'|'-') term)*
 | 
| 618 |             return self._LeftAssoc(pnode)
 | 
| 619 | 
 | 
| 620 |         elif typ == grammar_nt.term:
 | 
| 621 |             # term: factor (('*'|'/'|'div'|'mod') factor)*
 | 
| 622 |             return self._LeftAssoc(pnode)
 | 
| 623 | 
 | 
| 624 |         elif typ == grammar_nt.factor:
 | 
| 625 |             # factor: ('+'|'-'|'~') factor | power
 | 
| 626 |             # the power would have already been reduced
 | 
| 627 |             if pnode.NumChildren() == 1:
 | 
| 628 |                 return self.Expr(pnode.GetChild(0))
 | 
| 629 | 
 | 
| 630 |             assert pnode.NumChildren() == 2
 | 
| 631 |             op = pnode.GetChild(0)
 | 
| 632 |             e = pnode.GetChild(1)
 | 
| 633 | 
 | 
| 634 |             assert isinstance(op.tok, Token)
 | 
| 635 |             return expr.Unary(op.tok, self.Expr(e))
 | 
| 636 | 
 | 
| 637 |         elif typ == grammar_nt.power:
 | 
| 638 |             # power: atom trailer* ['**' factor]
 | 
| 639 | 
 | 
| 640 |             node = self.Expr(pnode.GetChild(0))
 | 
| 641 |             if pnode.NumChildren() == 1:  # No trailers
 | 
| 642 |                 return node
 | 
| 643 | 
 | 
| 644 |             # Support a->startswith(b) and mydict.key
 | 
| 645 |             n = pnode.NumChildren()
 | 
| 646 |             i = 1
 | 
| 647 |             while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
 | 
| 648 |                 node = self._Trailer(node, pnode.GetChild(i))
 | 
| 649 |                 i += 1
 | 
| 650 | 
 | 
| 651 |             if i != n:  # ['**' factor]
 | 
| 652 |                 op_tok = pnode.GetChild(i).tok
 | 
| 653 |                 assert op_tok.id == Id.Arith_DStar, op_tok
 | 
| 654 |                 factor = self.Expr(pnode.GetChild(i + 1))
 | 
| 655 |                 node = expr.Binary(op_tok, node, factor)
 | 
| 656 | 
 | 
| 657 |             return node
 | 
| 658 | 
 | 
| 659 |         elif typ == grammar_nt.eggex:
 | 
| 660 |             return self._Eggex(pnode)
 | 
| 661 | 
 | 
| 662 |         elif typ == grammar_nt.ysh_expr_sub:
 | 
| 663 |             return self.Expr(pnode.GetChild(0))
 | 
| 664 | 
 | 
| 665 |         #
 | 
| 666 |         # YSH Lexer Modes
 | 
| 667 |         #
 | 
| 668 | 
 | 
| 669 |         elif typ == grammar_nt.sh_array_literal:
 | 
| 670 |             return cast(ShArrayLiteral, pnode.GetChild(1).tok)
 | 
| 671 | 
 | 
| 672 |         elif typ == grammar_nt.old_sh_array_literal:
 | 
| 673 |             return cast(ShArrayLiteral, pnode.GetChild(1).tok)
 | 
| 674 | 
 | 
| 675 |         elif typ == grammar_nt.sh_command_sub:
 | 
| 676 |             return cast(CommandSub, pnode.GetChild(1).tok)
 | 
| 677 | 
 | 
| 678 |         elif typ == grammar_nt.braced_var_sub:
 | 
| 679 |             return cast(BracedVarSub, pnode.GetChild(1).tok)
 | 
| 680 | 
 | 
| 681 |         elif typ == grammar_nt.dq_string:
 | 
| 682 |             dq = cast(DoubleQuoted, pnode.GetChild(1).tok)
 | 
| 683 |             # sugar: ^"..." is short for ^["..."]
 | 
| 684 |             if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
 | 
| 685 |                 return expr.Literal(dq)
 | 
| 686 |             return dq
 | 
| 687 | 
 | 
| 688 |         elif typ == grammar_nt.sq_string:
 | 
| 689 |             return cast(SingleQuoted, pnode.GetChild(1).tok)
 | 
| 690 | 
 | 
| 691 |         elif typ == grammar_nt.simple_var_sub:
 | 
| 692 |             tok = pnode.GetChild(0).tok
 | 
| 693 | 
 | 
| 694 |             if tok.id == Id.VSub_DollarName:  # $foo is disallowed
 | 
| 695 |                 bare = lexer.TokenSliceLeft(tok, 1)
 | 
| 696 |                 p_die(
 | 
| 697 |                     'In expressions, remove $ and use `%s`, or sometimes "$%s"'
 | 
| 698 |                     % (bare, bare), tok)
 | 
| 699 | 
 | 
| 700 |             # $? is allowed
 | 
| 701 |             return SimpleVarSub(tok)
 | 
| 702 | 
 | 
| 703 |         #
 | 
| 704 |         # Terminals
 | 
| 705 |         #
 | 
| 706 | 
 | 
| 707 |         tok = pnode.tok
 | 
| 708 |         if typ == Id.Expr_Name:
 | 
| 709 |             return expr.Var(tok, lexer.TokenVal(tok))
 | 
| 710 | 
 | 
| 711 |         # Everything else is an expr.Const
 | 
| 712 |         tok_str = lexer.TokenVal(tok)
 | 
| 713 |         # Remove underscores from 1_000_000.  The lexer is responsible for
 | 
| 714 |         # validation.
 | 
| 715 |         c_under = tok_str.replace('_', '')
 | 
| 716 | 
 | 
| 717 |         if typ == Id.Expr_DecInt:
 | 
| 718 |             try:
 | 
| 719 |                 cval = value.Int(mops.FromStr(c_under))  # type: value_t
 | 
| 720 |             except ValueError:
 | 
| 721 |                 p_die('Decimal int constant is too large', tok)
 | 
| 722 | 
 | 
| 723 |         elif typ == Id.Expr_BinInt:
 | 
| 724 |             assert c_under[:2] in ('0b', '0B'), c_under
 | 
| 725 |             try:
 | 
| 726 |                 cval = value.Int(mops.FromStr(c_under[2:], 2))
 | 
| 727 |             except ValueError:
 | 
| 728 |                 p_die('Binary int constant is too large', tok)
 | 
| 729 | 
 | 
| 730 |         elif typ == Id.Expr_OctInt:
 | 
| 731 |             assert c_under[:2] in ('0o', '0O'), c_under
 | 
| 732 |             try:
 | 
| 733 |                 cval = value.Int(mops.FromStr(c_under[2:], 8))
 | 
| 734 |             except ValueError:
 | 
| 735 |                 p_die('Octal int constant is too large', tok)
 | 
| 736 | 
 | 
| 737 |         elif typ == Id.Expr_HexInt:
 | 
| 738 |             assert c_under[:2] in ('0x', '0X'), c_under
 | 
| 739 |             try:
 | 
| 740 |                 cval = value.Int(mops.FromStr(c_under[2:], 16))
 | 
| 741 |             except ValueError:
 | 
| 742 |                 p_die('Hex int constant is too large', tok)
 | 
| 743 | 
 | 
| 744 |         elif typ == Id.Expr_Float:
 | 
| 745 |             # Note: float() in mycpp/gc_builtins.cc currently uses strtod
 | 
| 746 |             # I think this never raises ValueError, because the lexer
 | 
| 747 |             # should only accept strings that strtod() does?
 | 
| 748 |             cval = value.Float(float(c_under))
 | 
| 749 | 
 | 
| 750 |         elif typ == Id.Expr_Null:
 | 
| 751 |             cval = value.Null
 | 
| 752 | 
 | 
| 753 |         elif typ == Id.Expr_True:
 | 
| 754 |             cval = value.Bool(True)
 | 
| 755 | 
 | 
| 756 |         elif typ == Id.Expr_False:
 | 
| 757 |             cval = value.Bool(False)
 | 
| 758 | 
 | 
| 759 |         elif typ == Id.Char_OneChar:  # \n
 | 
| 760 |             assert len(tok_str) == 2, tok_str
 | 
| 761 |             s = consts.LookupCharC(lexer.TokenSliceLeft(tok, 1))
 | 
| 762 |             cval = value.Str(s)
 | 
| 763 | 
 | 
| 764 |         elif typ == Id.Char_YHex:  # \yff
 | 
| 765 |             assert len(tok_str) == 4, tok_str
 | 
| 766 |             hex_str = lexer.TokenSliceLeft(tok, 2)
 | 
| 767 |             s = chr(int(hex_str, 16))
 | 
| 768 |             cval = value.Str(s)
 | 
| 769 | 
 | 
| 770 |         elif typ == Id.Char_UBraced:  # \u{123}
 | 
| 771 |             hex_str = lexer.TokenSlice(tok, 3, -1)
 | 
| 772 |             code_point = int(hex_str, 16)
 | 
| 773 |             s = j8.Utf8Encode(code_point)
 | 
| 774 |             cval = value.Str(s)
 | 
| 775 | 
 | 
| 776 |         else:
 | 
| 777 |             raise AssertionError(typ)
 | 
| 778 | 
 | 
| 779 |         return expr.Const(tok, cval)
 | 
| 780 | 
 | 
| 781 |     def _CheckLhs(self, lhs):
 | 
| 782 |         # type: (expr_t) -> None
 | 
| 783 | 
 | 
| 784 |         UP_lhs = lhs
 | 
| 785 |         with tagswitch(lhs) as case:
 | 
| 786 |             if case(expr_e.Var):
 | 
| 787 |                 # OK - e.g. setvar a.b.c[i] = 42
 | 
| 788 |                 pass
 | 
| 789 | 
 | 
| 790 |             elif case(expr_e.Subscript):
 | 
| 791 |                 lhs = cast(Subscript, UP_lhs)
 | 
| 792 |                 self._CheckLhs(lhs.obj)  # recurse on LHS
 | 
| 793 | 
 | 
| 794 |             elif case(expr_e.Attribute):
 | 
| 795 |                 lhs = cast(Attribute, UP_lhs)
 | 
| 796 |                 self._CheckLhs(lhs.obj)  # recurse on LHS
 | 
| 797 | 
 | 
| 798 |             else:
 | 
| 799 |                 # Illegal - e.g. setglobal {}["key"] = 42
 | 
| 800 |                 p_die("Subscript/Attribute not allowed on this LHS expression",
 | 
| 801 |                       location.TokenForExpr(lhs))
 | 
| 802 | 
 | 
| 803 |     def _LhsExprList(self, p_node):
 | 
| 804 |         # type: (PNode) -> List[y_lhs_t]
 | 
| 805 |         """lhs_list: expr (',' expr)*"""
 | 
| 806 |         assert p_node.typ == grammar_nt.lhs_list
 | 
| 807 | 
 | 
| 808 |         lhs_list = []  # type: List[y_lhs_t]
 | 
| 809 |         n = p_node.NumChildren()
 | 
| 810 |         for i in xrange(0, n, 2):
 | 
| 811 |             p = p_node.GetChild(i)
 | 
| 812 |             #self.p_printer.Print(p)
 | 
| 813 | 
 | 
| 814 |             e = self.Expr(p)
 | 
| 815 |             UP_e = e
 | 
| 816 |             with tagswitch(e) as case:
 | 
| 817 |                 if case(expr_e.Var):
 | 
| 818 |                     e = cast(expr.Var, UP_e)
 | 
| 819 |                     lhs_list.append(e.left)
 | 
| 820 | 
 | 
| 821 |                 elif case(expr_e.Subscript):
 | 
| 822 |                     e = cast(Subscript, UP_e)
 | 
| 823 |                     self._CheckLhs(e)
 | 
| 824 |                     lhs_list.append(e)
 | 
| 825 | 
 | 
| 826 |                 elif case(expr_e.Attribute):
 | 
| 827 |                     e = cast(Attribute, UP_e)
 | 
| 828 |                     self._CheckLhs(e)
 | 
| 829 |                     if e.op.id != Id.Expr_Dot:
 | 
| 830 |                         # e.g. setvar obj->method is not valid
 | 
| 831 |                         p_die("Can't assign to this attribute expr", e.op)
 | 
| 832 |                     lhs_list.append(e)
 | 
| 833 | 
 | 
| 834 |                 else:
 | 
| 835 |                     pass  # work around mycpp bug
 | 
| 836 | 
 | 
| 837 |                     # TODO: could blame arbitary expr_t, bu this works most of
 | 
| 838 |                     # the time
 | 
| 839 |                     if p.tok:
 | 
| 840 |                         blame = p.tok  # type: loc_t
 | 
| 841 |                     else:
 | 
| 842 |                         blame = loc.Missing
 | 
| 843 |                     p_die("Can't assign to this expression", blame)
 | 
| 844 | 
 | 
| 845 |         return lhs_list
 | 
| 846 | 
 | 
| 847 |     def MakeVarDecl(self, p_node):
 | 
| 848 |         # type: (PNode) -> command.VarDecl
 | 
| 849 |         """
 | 
| 850 |         ysh_var_decl: name_type_list ['=' testlist] end_stmt
 | 
| 851 |         """
 | 
| 852 |         assert p_node.typ == grammar_nt.ysh_var_decl
 | 
| 853 | 
 | 
| 854 |         lhs = self._NameTypeList(p_node.GetChild(0))  # could be a tuple
 | 
| 855 | 
 | 
| 856 |         # This syntax is confusing, and different than JavaScript
 | 
| 857 |         #   var x, y = 1, 2
 | 
| 858 |         # But this is useful:
 | 
| 859 |         #   var flag, i = parseArgs(spec, argv)
 | 
| 860 | 
 | 
| 861 |         n = p_node.NumChildren()
 | 
| 862 |         if n >= 3:
 | 
| 863 |             rhs = self.Expr(p_node.GetChild(2))
 | 
| 864 |         else:
 | 
| 865 |             rhs = None
 | 
| 866 | 
 | 
| 867 |         # The caller should fill in the keyword token.
 | 
| 868 |         return command.VarDecl(None, lhs, rhs)
 | 
| 869 | 
 | 
| 870 |     def MakeMutation(self, p_node):
 | 
| 871 |         # type: (PNode) -> command.Mutation
 | 
| 872 |         """
 | 
| 873 |         ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
 | 
| 874 |         """
 | 
| 875 |         typ = p_node.typ
 | 
| 876 |         assert typ == grammar_nt.ysh_mutation
 | 
| 877 | 
 | 
| 878 |         lhs_list = self._LhsExprList(p_node.GetChild(0))  # could be a tuple
 | 
| 879 |         op_tok = p_node.GetChild(1).tok
 | 
| 880 |         if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
 | 
| 881 |             p_die('Multiple assignment must use =', op_tok)
 | 
| 882 |         rhs = self.Expr(p_node.GetChild(2))
 | 
| 883 |         return command.Mutation(None, lhs_list, op_tok, rhs)
 | 
| 884 | 
 | 
| 885 |     def _EggexFlag(self, p_node):
 | 
| 886 |         # type: (PNode) -> EggexFlag
 | 
| 887 |         n = p_node.NumChildren()
 | 
| 888 |         if n == 1:
 | 
| 889 |             return EggexFlag(False, p_node.GetChild(0).tok)
 | 
| 890 |         elif n == 2:
 | 
| 891 |             return EggexFlag(True, p_node.GetChild(1).tok)
 | 
| 892 |         else:
 | 
| 893 |             raise AssertionError()
 | 
| 894 | 
 | 
| 895 |     def _Eggex(self, p_node):
 | 
| 896 |         # type: (PNode) -> Eggex
 | 
| 897 |         """
 | 
| 898 |         eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
 | 
| 899 |         """
 | 
| 900 |         left = p_node.GetChild(0).tok
 | 
| 901 |         regex = self._Regex(p_node.GetChild(1))
 | 
| 902 | 
 | 
| 903 |         flags = []  # type: List[EggexFlag]
 | 
| 904 |         trans_pref = None  # type: Optional[Token]
 | 
| 905 | 
 | 
| 906 |         i = 2
 | 
| 907 |         current = p_node.GetChild(i)
 | 
| 908 |         if current.typ == Id.Op_Semi:
 | 
| 909 |             i += 1
 | 
| 910 |             while True:
 | 
| 911 |                 current = p_node.GetChild(i)
 | 
| 912 |                 if current.typ != grammar_nt.re_flag:
 | 
| 913 |                     break
 | 
| 914 |                 flags.append(self._EggexFlag(current))
 | 
| 915 |                 i += 1
 | 
| 916 | 
 | 
| 917 |             if current.typ == Id.Op_Semi:
 | 
| 918 |                 i += 1
 | 
| 919 |                 trans_pref = p_node.GetChild(i).tok
 | 
| 920 | 
 | 
| 921 |         # Canonicalize and validate flags for ERE only.  Default is ERE.
 | 
| 922 |         if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
 | 
| 923 |             canonical_flags = regex_translate.CanonicalFlags(flags)
 | 
| 924 |         else:
 | 
| 925 |             canonical_flags = None
 | 
| 926 | 
 | 
| 927 |         return Eggex(left, regex, flags, trans_pref, canonical_flags)
 | 
| 928 | 
 | 
| 929 |     def YshCasePattern(self, pnode):
 | 
| 930 |         # type: (PNode) -> pat_t
 | 
| 931 |         assert pnode.typ == grammar_nt.ysh_case_pat, pnode
 | 
| 932 | 
 | 
| 933 |         pattern = pnode.GetChild(0)
 | 
| 934 |         typ = pattern.typ
 | 
| 935 |         if typ == Id.Op_LParen:
 | 
| 936 |             # pat_expr or pat_else
 | 
| 937 |             pattern = pnode.GetChild(1)
 | 
| 938 |             typ = pattern.typ
 | 
| 939 | 
 | 
| 940 |             if typ == grammar_nt.pat_else:
 | 
| 941 |                 return pat.Else
 | 
| 942 | 
 | 
| 943 |             if typ == grammar_nt.pat_exprs:
 | 
| 944 |                 exprs = []  # type: List[expr_t]
 | 
| 945 |                 for i in xrange(pattern.NumChildren()):
 | 
| 946 |                     child = pattern.GetChild(i)
 | 
| 947 |                     if child.typ == grammar_nt.expr:
 | 
| 948 |                         expr = self.Expr(child)
 | 
| 949 |                         exprs.append(expr)
 | 
| 950 |                 return pat.YshExprs(exprs)
 | 
| 951 | 
 | 
| 952 |         if typ == grammar_nt.eggex:
 | 
| 953 |             return self._Eggex(pattern)
 | 
| 954 | 
 | 
| 955 |         raise AssertionError()
 | 
| 956 | 
 | 
| 957 |     def _BlockArg(self, p_node):
 | 
| 958 |         # type: (PNode) -> expr_t
 | 
| 959 | 
 | 
| 960 |         n = p_node.NumChildren()
 | 
| 961 |         if n == 1:
 | 
| 962 |             child = p_node.GetChild(0)
 | 
| 963 |             return self.Expr(child)
 | 
| 964 | 
 | 
| 965 |         # It can only be an expression, not a=42, or ...expr
 | 
| 966 |         p_die('Invalid block expression argument', p_node.tok)
 | 
| 967 | 
 | 
| 968 |     def _Argument(self, p_node, after_semi, arglist):
 | 
| 969 |         # type: (PNode, bool, ArgList) -> None
 | 
| 970 |         """
 | 
| 971 |         argument: (
 | 
| 972 |           test [comp_for]
 | 
| 973 |         | test '=' test  # named arg
 | 
| 974 |         | '...' test  # var args
 | 
| 975 |         )
 | 
| 976 |         """
 | 
| 977 |         pos_args = arglist.pos_args
 | 
| 978 |         named_args = arglist.named_args
 | 
| 979 | 
 | 
| 980 |         assert p_node.typ == grammar_nt.argument, p_node
 | 
| 981 |         n = p_node.NumChildren()
 | 
| 982 |         if n == 1:
 | 
| 983 |             child = p_node.GetChild(0)
 | 
| 984 |             if after_semi:
 | 
| 985 |                 p_die(POS_ARG_MISPLACED, child.tok)
 | 
| 986 |             arg = self.Expr(child)
 | 
| 987 |             pos_args.append(arg)
 | 
| 988 |             return
 | 
| 989 | 
 | 
| 990 |         if n == 2:
 | 
| 991 |             # Note: We allow multiple spreads, just like Julia.  They are
 | 
| 992 |             # concatenated as in lists and dicts.
 | 
| 993 |             tok0 = p_node.GetChild(0).tok
 | 
| 994 |             if tok0.id == Id.Expr_Ellipsis:
 | 
| 995 |                 spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
 | 
| 996 |                 if after_semi:  # f(; ... named)
 | 
| 997 |                     named_args.append(NamedArg(None, spread_expr))
 | 
| 998 |                 else:  # f(...named)
 | 
| 999 |                     pos_args.append(spread_expr)
 | 
| 1000 |                 return
 | 
| 1001 | 
 | 
| 1002 |             # Note: generator expression not implemented
 | 
| 1003 |             if p_node.GetChild(1).typ == grammar_nt.comp_for:
 | 
| 1004 |                 child = p_node.GetChild(0)
 | 
| 1005 |                 if after_semi:
 | 
| 1006 |                     p_die(POS_ARG_MISPLACED, child.tok)
 | 
| 1007 | 
 | 
| 1008 |                 elt = self.Expr(child)
 | 
| 1009 |                 comp = self._CompFor(p_node.GetChild(1))
 | 
| 1010 |                 arg = expr.GeneratorExp(elt, [comp])
 | 
| 1011 |                 pos_args.append(arg)
 | 
| 1012 |                 return
 | 
| 1013 | 
 | 
| 1014 |             raise AssertionError()
 | 
| 1015 | 
 | 
| 1016 |         if n == 3:  # named args can come before or after the semicolon
 | 
| 1017 |             n1 = NamedArg(
 | 
| 1018 |                 p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
 | 
| 1019 |             named_args.append(n1)
 | 
| 1020 |             return
 | 
| 1021 | 
 | 
| 1022 |         raise AssertionError()
 | 
| 1023 | 
 | 
| 1024 |     def _ArgGroup(self, p_node, after_semi, arglist):
 | 
| 1025 |         # type: (PNode, bool, ArgList) -> None
 | 
| 1026 |         """
 | 
| 1027 |         arg_group: argument (',' argument)* [',']
 | 
| 1028 |         """
 | 
| 1029 |         for i in xrange(p_node.NumChildren()):
 | 
| 1030 |             p_child = p_node.GetChild(i)
 | 
| 1031 |             if p_child.typ == grammar_nt.argument:
 | 
| 1032 |                 self._Argument(p_child, after_semi, arglist)
 | 
| 1033 | 
 | 
| 1034 |     def _ArgList(self, p_node, arglist):
 | 
| 1035 |         # type: (PNode, ArgList) -> None
 | 
| 1036 |         """For both funcs and procs
 | 
| 1037 | 
 | 
| 1038 |         arglist: (
 | 
| 1039 |                [arg_group]
 | 
| 1040 |           [';' [arg_group]]
 | 
| 1041 |         )
 | 
| 1042 | 
 | 
| 1043 |         arglist3: ...
 | 
| 1044 |         """
 | 
| 1045 |         n = p_node.NumChildren()
 | 
| 1046 |         if n == 0:
 | 
| 1047 |             return
 | 
| 1048 | 
 | 
| 1049 |         i = 0
 | 
| 1050 | 
 | 
| 1051 |         if i >= n:
 | 
| 1052 |             return
 | 
| 1053 |         child = p_node.GetChild(i)
 | 
| 1054 |         if child.typ == grammar_nt.arg_group:
 | 
| 1055 |             self._ArgGroup(child, False, arglist)
 | 
| 1056 |             i += 1
 | 
| 1057 | 
 | 
| 1058 |         if i >= n:
 | 
| 1059 |             return
 | 
| 1060 |         child = p_node.GetChild(i)
 | 
| 1061 |         if child.typ == Id.Op_Semi:
 | 
| 1062 |             arglist.semi_tok = child.tok
 | 
| 1063 |             i += 1
 | 
| 1064 | 
 | 
| 1065 |         # Named args after first semi-colon
 | 
| 1066 |         if i >= n:
 | 
| 1067 |             return
 | 
| 1068 |         child = p_node.GetChild(i)
 | 
| 1069 |         if child.typ == grammar_nt.arg_group:
 | 
| 1070 |             self._ArgGroup(child, True, arglist)
 | 
| 1071 |             i += 1
 | 
| 1072 | 
 | 
| 1073 |         #
 | 
| 1074 |         # Special third group may have block expression - only for arglist3,
 | 
| 1075 |         # used for procs!
 | 
| 1076 |         #
 | 
| 1077 | 
 | 
| 1078 |         if i >= n:
 | 
| 1079 |             return
 | 
| 1080 |         assert p_node.typ == grammar_nt.arglist3, p_node
 | 
| 1081 | 
 | 
| 1082 |         child = p_node.GetChild(i)
 | 
| 1083 |         if child.typ == Id.Op_Semi:
 | 
| 1084 |             arglist.semi_tok2 = child.tok
 | 
| 1085 |             i += 1
 | 
| 1086 | 
 | 
| 1087 |         if i >= n:
 | 
| 1088 |             return
 | 
| 1089 |         child = p_node.GetChild(i)
 | 
| 1090 |         if child.typ == grammar_nt.argument:
 | 
| 1091 |             arglist.block_expr = self._BlockArg(child)
 | 
| 1092 |             i += 1
 | 
| 1093 | 
 | 
| 1094 |     def ProcCallArgs(self, pnode, arglist):
 | 
| 1095 |         # type: (PNode, ArgList) -> None
 | 
| 1096 |         """
 | 
| 1097 |         ysh_eager_arglist: '(' [arglist3] ')'
 | 
| 1098 |         ysh_lazy_arglist: '[' [arglist] ']'
 | 
| 1099 |         """
 | 
| 1100 |         n = pnode.NumChildren()
 | 
| 1101 |         if n == 2:  # f()
 | 
| 1102 |             return
 | 
| 1103 | 
 | 
| 1104 |         if n == 3:
 | 
| 1105 |             child1 = pnode.GetChild(1)  # the X in '( X )'
 | 
| 1106 | 
 | 
| 1107 |             self._ArgList(child1, arglist)
 | 
| 1108 |             return
 | 
| 1109 | 
 | 
| 1110 |         raise AssertionError()
 | 
| 1111 | 
 | 
| 1112 |     def _TypeExpr(self, pnode):
 | 
| 1113 |         # type: (PNode) -> TypeExpr
 | 
| 1114 |         """
 | 
| 1115 |         type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
 | 
| 1116 |         """
 | 
| 1117 |         assert pnode.typ == grammar_nt.type_expr, pnode.typ
 | 
| 1118 | 
 | 
| 1119 |         ty = TypeExpr.CreateNull()  # don't allocate children
 | 
| 1120 | 
 | 
| 1121 |         ty.tok = pnode.GetChild(0).tok
 | 
| 1122 |         ty.name = lexer.TokenVal(ty.tok)
 | 
| 1123 | 
 | 
| 1124 |         n = pnode.NumChildren()
 | 
| 1125 |         if n == 1:
 | 
| 1126 |             return ty
 | 
| 1127 | 
 | 
| 1128 |         ty.params = []
 | 
| 1129 |         i = 2
 | 
| 1130 |         while i < n:
 | 
| 1131 |             p = self._TypeExpr(pnode.GetChild(i))
 | 
| 1132 |             ty.params.append(p)
 | 
| 1133 |             i += 2  # skip comma
 | 
| 1134 | 
 | 
| 1135 |         return ty
 | 
| 1136 | 
 | 
| 1137 |     def _Param(self, pnode):
 | 
| 1138 |         # type: (PNode) -> Param
 | 
| 1139 |         """
 | 
| 1140 |         param: Expr_Name [type_expr] ['=' expr]
 | 
| 1141 |         """
 | 
| 1142 |         assert pnode.typ == grammar_nt.param
 | 
| 1143 | 
 | 
| 1144 |         name_tok = pnode.GetChild(0).tok
 | 
| 1145 |         n = pnode.NumChildren()
 | 
| 1146 | 
 | 
| 1147 |         assert name_tok.id == Id.Expr_Name, name_tok
 | 
| 1148 | 
 | 
| 1149 |         default_val = None  # type: expr_t
 | 
| 1150 |         type_ = None  # type: TypeExpr
 | 
| 1151 | 
 | 
| 1152 |         if n == 1:
 | 
| 1153 |             # proc p(a)
 | 
| 1154 |             pass
 | 
| 1155 | 
 | 
| 1156 |         elif n == 2:
 | 
| 1157 |             # proc p(a Int)
 | 
| 1158 |             type_ = self._TypeExpr(pnode.GetChild(1))
 | 
| 1159 | 
 | 
| 1160 |         elif n == 3:
 | 
| 1161 |             # proc p(a = 3)
 | 
| 1162 |             default_val = self.Expr(pnode.GetChild(2))
 | 
| 1163 | 
 | 
| 1164 |         elif n == 4:
 | 
| 1165 |             # proc p(a Int = 3)
 | 
| 1166 |             type_ = self._TypeExpr(pnode.GetChild(1))
 | 
| 1167 |             default_val = self.Expr(pnode.GetChild(3))
 | 
| 1168 | 
 | 
| 1169 |         return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
 | 
| 1170 | 
 | 
| 1171 |     def _ParamGroup(self, p_node):
 | 
| 1172 |         # type: (PNode) -> ParamGroup
 | 
| 1173 |         """
 | 
| 1174 |         param_group:
 | 
| 1175 |           (param ',')*
 | 
| 1176 |           [ (param | '...' Expr_Name) [,] ]
 | 
| 1177 |         """
 | 
| 1178 |         assert p_node.typ == grammar_nt.param_group, p_node
 | 
| 1179 | 
 | 
| 1180 |         params = []  # type: List[Param]
 | 
| 1181 |         rest_of = None  # type: Optional[RestParam]
 | 
| 1182 | 
 | 
| 1183 |         n = p_node.NumChildren()
 | 
| 1184 |         i = 0
 | 
| 1185 |         while i < n:
 | 
| 1186 |             child = p_node.GetChild(i)
 | 
| 1187 |             if child.typ == grammar_nt.param:
 | 
| 1188 |                 params.append(self._Param(child))
 | 
| 1189 | 
 | 
| 1190 |             elif child.typ == Id.Expr_Ellipsis:
 | 
| 1191 |                 tok = p_node.GetChild(i + 1).tok
 | 
| 1192 |                 rest_of = RestParam(tok, lexer.TokenVal(tok))
 | 
| 1193 | 
 | 
| 1194 |             i += 2
 | 
| 1195 | 
 | 
| 1196 |         return ParamGroup(params, rest_of)
 | 
| 1197 | 
 | 
| 1198 |     def Proc(self, p_node):
 | 
| 1199 |         # type: (PNode) -> proc_sig_t
 | 
| 1200 |         """
 | 
| 1201 |         ysh_proc: (
 | 
| 1202 |           [ '(' 
 | 
| 1203 |                   [ param_group ]         # word params, with defaults
 | 
| 1204 |             [ ';' [ param_group ] ]       # positional typed params, with defaults
 | 
| 1205 |             [ ';' [ param_group ] ]       # named params, with defaults
 | 
| 1206 |             [ ';' Expr_Name ]             # optional block param, with no type or default
 | 
| 1207 |             ')'  
 | 
| 1208 |           ]
 | 
| 1209 |           '{'  # opening { for pgen2
 | 
| 1210 |         )
 | 
| 1211 |         """
 | 
| 1212 |         typ = p_node.typ
 | 
| 1213 |         assert typ == grammar_nt.ysh_proc
 | 
| 1214 | 
 | 
| 1215 |         n = p_node.NumChildren()
 | 
| 1216 |         if n == 1:  # proc f {
 | 
| 1217 |             return proc_sig.Open
 | 
| 1218 | 
 | 
| 1219 |         if n == 3:  # proc f () {
 | 
| 1220 |             sig = proc_sig.Closed.CreateNull(alloc_lists=True)  # no params
 | 
| 1221 | 
 | 
| 1222 |         # proc f( three param groups, and block group )
 | 
| 1223 |         sig = proc_sig.Closed.CreateNull(alloc_lists=True)  # no params
 | 
| 1224 | 
 | 
| 1225 |         # Word args
 | 
| 1226 |         i = 1
 | 
| 1227 |         child = p_node.GetChild(i)
 | 
| 1228 |         if child.typ == grammar_nt.param_group:
 | 
| 1229 |             sig.word = self._ParamGroup(p_node.GetChild(i))
 | 
| 1230 | 
 | 
| 1231 |             # Validate word args
 | 
| 1232 |             for word in sig.word.params:
 | 
| 1233 |                 if word.type:
 | 
| 1234 |                     if word.type.name not in ('Str', 'Ref'):
 | 
| 1235 |                         p_die('Word params may only have type Str or Ref',
 | 
| 1236 |                               word.type.tok)
 | 
| 1237 |                     if word.type.params is not None:
 | 
| 1238 |                         p_die('Unexpected type parameters', word.type.tok)
 | 
| 1239 | 
 | 
| 1240 |             i += 2
 | 
| 1241 |         else:
 | 
| 1242 |             i += 1
 | 
| 1243 | 
 | 
| 1244 |         #log('i %d n %d', i, n)
 | 
| 1245 |         if i >= n:
 | 
| 1246 |             return sig
 | 
| 1247 | 
 | 
| 1248 |         # Positional args
 | 
| 1249 |         child = p_node.GetChild(i)
 | 
| 1250 |         if child.typ == grammar_nt.param_group:
 | 
| 1251 |             sig.positional = self._ParamGroup(p_node.GetChild(i))
 | 
| 1252 |             i += 2
 | 
| 1253 |         else:
 | 
| 1254 |             i += 1
 | 
| 1255 | 
 | 
| 1256 |         #log('i %d n %d', i, n)
 | 
| 1257 |         if i >= n:
 | 
| 1258 |             return sig
 | 
| 1259 | 
 | 
| 1260 |         # Keyword args
 | 
| 1261 |         child = p_node.GetChild(i)
 | 
| 1262 |         if child.typ == grammar_nt.param_group:
 | 
| 1263 |             sig.named = self._ParamGroup(p_node.GetChild(i))
 | 
| 1264 |             i += 2
 | 
| 1265 |         else:
 | 
| 1266 |             i += 1
 | 
| 1267 | 
 | 
| 1268 |         #log('i %d n %d', i, n)
 | 
| 1269 |         if i >= n:
 | 
| 1270 |             return sig
 | 
| 1271 | 
 | 
| 1272 |         child = p_node.GetChild(i)
 | 
| 1273 |         if child.typ == grammar_nt.param_group:
 | 
| 1274 |             group = self._ParamGroup(p_node.GetChild(i))
 | 
| 1275 |             params = group.params
 | 
| 1276 |             if len(params) > 1:
 | 
| 1277 |                 p_die('Only 1 block param is allowed', params[1].blame_tok)
 | 
| 1278 |             if group.rest_of:
 | 
| 1279 |                 p_die("Rest param isn't allowed for blocks",
 | 
| 1280 |                       group.rest_of.blame_tok)
 | 
| 1281 | 
 | 
| 1282 |             if len(params) == 1:
 | 
| 1283 |                 if params[0].type:
 | 
| 1284 |                     if params[0].type.name != 'Command':
 | 
| 1285 |                         p_die('Block param must have type Command',
 | 
| 1286 |                               params[0].type.tok)
 | 
| 1287 |                     if params[0].type.params is not None:
 | 
| 1288 |                         p_die('Unexpected type parameters', params[0].type.tok)
 | 
| 1289 | 
 | 
| 1290 |                 sig.block_param = params[0]
 | 
| 1291 | 
 | 
| 1292 |         return sig
 | 
| 1293 | 
 | 
| 1294 |     def YshFunc(self, p_node, out):
 | 
| 1295 |         # type: (PNode, Func) -> None
 | 
| 1296 |         """
 | 
| 1297 |         ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
 | 
| 1298 |         """
 | 
| 1299 |         assert p_node.typ == grammar_nt.ysh_func
 | 
| 1300 | 
 | 
| 1301 |         #self.p_printer.Print(p_node)
 | 
| 1302 | 
 | 
| 1303 |         out.name = p_node.GetChild(0).tok
 | 
| 1304 | 
 | 
| 1305 |         n = p_node.NumChildren()
 | 
| 1306 |         i = 2  # after (
 | 
| 1307 | 
 | 
| 1308 |         child = p_node.GetChild(i)
 | 
| 1309 |         if child.typ == grammar_nt.param_group:
 | 
| 1310 |             out.positional = self._ParamGroup(child)
 | 
| 1311 |             i += 2  # skip past ;
 | 
| 1312 |         else:
 | 
| 1313 |             i += 1
 | 
| 1314 | 
 | 
| 1315 |         if i >= n:
 | 
| 1316 |             return
 | 
| 1317 | 
 | 
| 1318 |         child = p_node.GetChild(i)
 | 
| 1319 |         if child.typ == grammar_nt.param_group:
 | 
| 1320 |             out.named = self._ParamGroup(child)
 | 
| 1321 | 
 | 
| 1322 |     #
 | 
| 1323 |     # Eggex Language
 | 
| 1324 |     #
 | 
| 1325 | 
 | 
| 1326 |     def _RangeCharSingleQuoted(self, p_node):
 | 
| 1327 |         # type: (PNode) -> Optional[CharCode]
 | 
| 1328 | 
 | 
| 1329 |         assert p_node.typ == grammar_nt.range_char, p_node
 | 
| 1330 | 
 | 
| 1331 |         # 'a' in 'a'-'b'
 | 
| 1332 | 
 | 
| 1333 |         child0 = p_node.GetChild(0)
 | 
| 1334 |         if child0.typ == grammar_nt.sq_string:
 | 
| 1335 |             sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
 | 
| 1336 |             n = len(sq_part.sval)
 | 
| 1337 |             if n == 0:
 | 
| 1338 |                 p_die("Quoted range char can't be empty",
 | 
| 1339 |                       loc.WordPart(sq_part))
 | 
| 1340 |             elif n == 1:
 | 
| 1341 |                 return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
 | 
| 1342 |             else:
 | 
| 1343 |                 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
 | 
| 1344 |         return None
 | 
| 1345 | 
 | 
| 1346 |     def _OtherRangeToken(self, p_node):
 | 
| 1347 |         # type: (PNode) -> Token
 | 
| 1348 |         """An endpoint of a range (single char)
 | 
| 1349 | 
 | 
| 1350 |         range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
 | 
| 1351 |                     a-z         0-9           'a'-'z'     \x00-\xff
 | 
| 1352 |         """
 | 
| 1353 |         assert p_node.typ == grammar_nt.range_char, p_node
 | 
| 1354 | 
 | 
| 1355 |         child0 = p_node.GetChild(0)
 | 
| 1356 |         if child0.typ == grammar_nt.char_literal:
 | 
| 1357 |             # \x00 in /[\x00 - \x20]/
 | 
| 1358 |             tok = child0.GetChild(0).tok
 | 
| 1359 |             return tok
 | 
| 1360 | 
 | 
| 1361 |         tok = p_node.tok
 | 
| 1362 |         # a in a-z is Expr_Name
 | 
| 1363 |         # 0 in 0-9 is Expr_DecInt
 | 
| 1364 |         assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
 | 
| 1365 | 
 | 
| 1366 |         if tok.length != 1:
 | 
| 1367 |             p_die(RANGE_POINT_TOO_LONG, tok)
 | 
| 1368 |         return tok
 | 
| 1369 | 
 | 
| 1370 |     def _NonRangeChars(self, p_node):
 | 
| 1371 |         # type: (PNode) -> class_literal_term_t
 | 
| 1372 |         """
 | 
| 1373 |         \" \u1234 '#'
 | 
| 1374 |         """
 | 
| 1375 |         assert p_node.typ == grammar_nt.range_char, p_node
 | 
| 1376 | 
 | 
| 1377 |         child0 = p_node.GetChild(0)
 | 
| 1378 |         typ0 = p_node.GetChild(0).typ
 | 
| 1379 | 
 | 
| 1380 |         if typ0 == grammar_nt.sq_string:
 | 
| 1381 |             return cast(SingleQuoted, child0.GetChild(1).tok)
 | 
| 1382 | 
 | 
| 1383 |         if typ0 == grammar_nt.char_literal:
 | 
| 1384 |             return word_compile.EvalCharLiteralForRegex(child0.tok)
 | 
| 1385 | 
 | 
| 1386 |         if typ0 == Id.Expr_Name:
 | 
| 1387 |             # Look up PerlClass and PosixClass
 | 
| 1388 |             return self._NameInClass(None, child0.tok)
 | 
| 1389 | 
 | 
| 1390 |         raise AssertionError()
 | 
| 1391 | 
 | 
| 1392 |     def _ClassLiteralTerm(self, p_node):
 | 
| 1393 |         # type: (PNode) -> class_literal_term_t
 | 
| 1394 |         """
 | 
| 1395 |         class_literal_term:
 | 
| 1396 |           range_char ['-' range_char ] 
 | 
| 1397 |         | '@' Expr_Name  # splice
 | 
| 1398 |         | '!' Expr_Name  # negate char class
 | 
| 1399 |           ...
 | 
| 1400 |         """
 | 
| 1401 |         assert p_node.typ == grammar_nt.class_literal_term, p_node
 | 
| 1402 | 
 | 
| 1403 |         typ0 = p_node.GetChild(0).typ
 | 
| 1404 | 
 | 
| 1405 |         if typ0 == grammar_nt.range_char:
 | 
| 1406 |             n = p_node.NumChildren()
 | 
| 1407 | 
 | 
| 1408 |             if n == 1:
 | 
| 1409 |                 return self._NonRangeChars(p_node.GetChild(0))
 | 
| 1410 | 
 | 
| 1411 |             # 'a'-'z' etc.
 | 
| 1412 |             if n == 3:
 | 
| 1413 |                 assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
 | 
| 1414 | 
 | 
| 1415 |                 left = p_node.GetChild(0)
 | 
| 1416 |                 right = p_node.GetChild(2)
 | 
| 1417 | 
 | 
| 1418 |                 code1 = self._RangeCharSingleQuoted(left)
 | 
| 1419 |                 if code1 is None:
 | 
| 1420 |                     tok1 = self._OtherRangeToken(left)
 | 
| 1421 |                     code1 = word_compile.EvalCharLiteralForRegex(tok1)
 | 
| 1422 | 
 | 
| 1423 |                 code2 = self._RangeCharSingleQuoted(right)
 | 
| 1424 |                 if code2 is None:
 | 
| 1425 |                     tok2 = self._OtherRangeToken(right)
 | 
| 1426 |                     code2 = word_compile.EvalCharLiteralForRegex(tok2)
 | 
| 1427 |                 return CharRange(code1, code2)
 | 
| 1428 | 
 | 
| 1429 |             raise AssertionError()
 | 
| 1430 | 
 | 
| 1431 |         if typ0 == Id.Expr_At:
 | 
| 1432 |             tok1 = p_node.GetChild(1).tok
 | 
| 1433 |             return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
 | 
| 1434 | 
 | 
| 1435 |         if typ0 == Id.Expr_Bang:
 | 
| 1436 |             return self._NameInClass(
 | 
| 1437 |                 p_node.GetChild(0).tok,
 | 
| 1438 |                 p_node.GetChild(1).tok)
 | 
| 1439 | 
 | 
| 1440 |         p_die("This kind of class literal term isn't implemented",
 | 
| 1441 |               p_node.GetChild(0).tok)
 | 
| 1442 | 
 | 
| 1443 |     def _ClassLiteral(self, p_node):
 | 
| 1444 |         # type: (PNode) -> List[class_literal_term_t]
 | 
| 1445 |         """class_literal: '[' class_literal_term+ ']'."""
 | 
| 1446 |         assert p_node.typ == grammar_nt.class_literal
 | 
| 1447 |         # skip [ and ]
 | 
| 1448 |         terms = []  # type: List[class_literal_term_t]
 | 
| 1449 |         for i in xrange(1, p_node.NumChildren() - 1):
 | 
| 1450 |             terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
 | 
| 1451 | 
 | 
| 1452 |         return terms
 | 
| 1453 | 
 | 
| 1454 |     def _NameInRegex(self, negated_tok, tok):
 | 
| 1455 |         # type: (Token, Token) -> re_t
 | 
| 1456 |         tok_str = lexer.TokenVal(tok)
 | 
| 1457 |         if tok_str == 'dot':
 | 
| 1458 |             if negated_tok:
 | 
| 1459 |                 p_die("Can't negate this symbol", tok)
 | 
| 1460 |             return re.Primitive(tok, Id.Eggex_Dot)
 | 
| 1461 | 
 | 
| 1462 |         if tok_str in POSIX_CLASSES:
 | 
| 1463 |             return PosixClass(negated_tok, tok_str)
 | 
| 1464 | 
 | 
| 1465 |         perl = PERL_CLASSES.get(tok_str)
 | 
| 1466 |         if perl is not None:
 | 
| 1467 |             return PerlClass(negated_tok, perl)
 | 
| 1468 | 
 | 
| 1469 |         if tok_str[0].isupper():  # e.g. HexDigit
 | 
| 1470 |             return re.Splice(tok, lexer.TokenVal(tok))
 | 
| 1471 | 
 | 
| 1472 |         p_die("%r isn't a character class" % tok_str, tok)
 | 
| 1473 | 
 | 
| 1474 |     def _NameInClass(self, negated_tok, tok):
 | 
| 1475 |         # type: (Token, Token) -> class_literal_term_t
 | 
| 1476 |         """Like the above, but 'dot' and 'd' don't mean anything within []"""
 | 
| 1477 |         tok_str = lexer.TokenVal(tok)
 | 
| 1478 | 
 | 
| 1479 |         # A bare, unquoted character literal.  In the grammar, this is expressed as
 | 
| 1480 |         # range_char without an ending.
 | 
| 1481 | 
 | 
| 1482 |         # d is NOT 'digit', it's a literal 'd'!
 | 
| 1483 |         if len(tok_str) == 1:
 | 
| 1484 |             # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
 | 
| 1485 |             assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
 | 
| 1486 | 
 | 
| 1487 |             if negated_tok:  # [~d] is not allowed, only [~digit]
 | 
| 1488 |                 p_die("Can't negate this symbol", tok)
 | 
| 1489 |             return word_compile.EvalCharLiteralForRegex(tok)
 | 
| 1490 | 
 | 
| 1491 |         # digit, word, but not d, w, etc.
 | 
| 1492 |         if tok_str in POSIX_CLASSES:
 | 
| 1493 |             return PosixClass(negated_tok, tok_str)
 | 
| 1494 | 
 | 
| 1495 |         perl = PERL_CLASSES.get(tok_str)
 | 
| 1496 |         if perl is not None:
 | 
| 1497 |             return PerlClass(negated_tok, perl)
 | 
| 1498 |         p_die("%r isn't a character class" % tok_str, tok)
 | 
| 1499 | 
 | 
| 1500 |     def _ReAtom(self, p_atom):
 | 
| 1501 |         # type: (PNode) -> re_t
 | 
| 1502 |         """
 | 
| 1503 |         re_atom: ( char_literal | ...
 | 
| 1504 |         """
 | 
| 1505 |         assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
 | 
| 1506 | 
 | 
| 1507 |         child0 = p_atom.GetChild(0)
 | 
| 1508 | 
 | 
| 1509 |         typ0 = p_atom.GetChild(0).typ
 | 
| 1510 |         tok0 = p_atom.GetChild(0).tok
 | 
| 1511 | 
 | 
| 1512 |         # Non-terminals
 | 
| 1513 | 
 | 
| 1514 |         if typ0 == grammar_nt.class_literal:
 | 
| 1515 |             return re.CharClassLiteral(False, self._ClassLiteral(child0))
 | 
| 1516 | 
 | 
| 1517 |         if typ0 == grammar_nt.sq_string:
 | 
| 1518 |             return cast(SingleQuoted, child0.GetChild(1).tok)
 | 
| 1519 | 
 | 
| 1520 |         if typ0 == grammar_nt.char_literal:
 | 
| 1521 |             # Note: ERE doesn't seem to support escapes like Python
 | 
| 1522 |             #    https://docs.python.org/3/library/re.html
 | 
| 1523 |             # We might want to do a translation like this;
 | 
| 1524 |             #
 | 
| 1525 |             # \u{03bc} -> \u03bc
 | 
| 1526 |             # \x00 -> \x00
 | 
| 1527 |             # \n -> \n
 | 
| 1528 | 
 | 
| 1529 |             # Must be Id.Char_{OneChar,Hex,UBraced}
 | 
| 1530 |             assert consts.GetKind(tok0.id) == Kind.Char
 | 
| 1531 |             s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
 | 
| 1532 |             return re.LiteralChars(tok0, s)
 | 
| 1533 | 
 | 
| 1534 |         # Special punctuation
 | 
| 1535 |         if typ0 == Id.Expr_Dot:  # .
 | 
| 1536 |             return re.Primitive(tok0, Id.Eggex_Dot)
 | 
| 1537 | 
 | 
| 1538 |         if typ0 == Id.Arith_Caret:  # ^
 | 
| 1539 |             return re.Primitive(tok0, Id.Eggex_Start)
 | 
| 1540 | 
 | 
| 1541 |         if typ0 == Id.Expr_Dollar:  # $
 | 
| 1542 |             return re.Primitive(tok0, Id.Eggex_End)
 | 
| 1543 | 
 | 
| 1544 |         if typ0 == Id.Expr_Name:
 | 
| 1545 |             # d digit -> PosixClass PerlClass etc.
 | 
| 1546 |             return self._NameInRegex(None, tok0)
 | 
| 1547 | 
 | 
| 1548 |         if typ0 == Id.Expr_Symbol:
 | 
| 1549 |             # Validate symbols here, like we validate PerlClass, etc.
 | 
| 1550 |             tok_str = lexer.TokenVal(tok0)
 | 
| 1551 |             if tok_str == '%start':
 | 
| 1552 |                 return re.Primitive(tok0, Id.Eggex_Start)
 | 
| 1553 |             if tok_str == '%end':
 | 
| 1554 |                 return re.Primitive(tok0, Id.Eggex_End)
 | 
| 1555 |             p_die("Unexpected token %r in regex" % tok_str, tok0)
 | 
| 1556 | 
 | 
| 1557 |         if typ0 == Id.Expr_At:
 | 
| 1558 |             # | '@' Expr_Name
 | 
| 1559 |             tok1 = p_atom.GetChild(1).tok
 | 
| 1560 |             return re.Splice(tok0, lexer.TokenVal(tok1))
 | 
| 1561 | 
 | 
| 1562 |         if typ0 == Id.Expr_Bang:
 | 
| 1563 |             # | '!' (Expr_Name | class_literal)
 | 
| 1564 |             # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
 | 
| 1565 |             n = p_atom.NumChildren()
 | 
| 1566 |             if n == 2:
 | 
| 1567 |                 child1 = p_atom.GetChild(1)
 | 
| 1568 |                 if child1.typ == grammar_nt.class_literal:
 | 
| 1569 |                     return re.CharClassLiteral(True,
 | 
| 1570 |                                                self._ClassLiteral(child1))
 | 
| 1571 |                 else:
 | 
| 1572 |                     return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
 | 
| 1573 |             else:
 | 
| 1574 |                 # Note: !! conflicts with shell history
 | 
| 1575 |                 p_die(
 | 
| 1576 |                     "Backtracking with !! isn't implemented (requires Python/PCRE)",
 | 
| 1577 |                     p_atom.GetChild(1).tok)
 | 
| 1578 | 
 | 
| 1579 |         if typ0 == Id.Op_LParen:
 | 
| 1580 |             # | '(' regex ')'
 | 
| 1581 | 
 | 
| 1582 |             # Note: in ERE (d+) is the same as <d+>.  That is, Group becomes
 | 
| 1583 |             # Capture.
 | 
| 1584 |             return re.Group(self._Regex(p_atom.GetChild(1)))
 | 
| 1585 | 
 | 
| 1586 |         if typ0 == Id.Arith_Less:
 | 
| 1587 |             # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
 | 
| 1588 | 
 | 
| 1589 |             n = p_atom.NumChildren()
 | 
| 1590 |             assert n == 4 or n == 6 or n == 8, n
 | 
| 1591 | 
 | 
| 1592 |             # < capture d+ >
 | 
| 1593 |             regex = self._Regex(p_atom.GetChild(2))
 | 
| 1594 | 
 | 
| 1595 |             as_name = None  # type: Optional[Token]
 | 
| 1596 |             func_name = None  # type: Optional[Token]
 | 
| 1597 | 
 | 
| 1598 |             i = 3  # points at any of   >   as   :
 | 
| 1599 | 
 | 
| 1600 |             typ = p_atom.GetChild(i).typ
 | 
| 1601 |             if typ == Id.Expr_As:
 | 
| 1602 |                 as_name = p_atom.GetChild(i + 1).tok
 | 
| 1603 |                 i += 2
 | 
| 1604 | 
 | 
| 1605 |             typ = p_atom.GetChild(i).typ
 | 
| 1606 |             if typ == Id.Arith_Colon:
 | 
| 1607 |                 func_name = p_atom.GetChild(i + 1).tok
 | 
| 1608 | 
 | 
| 1609 |             return re.Capture(regex, as_name, func_name)
 | 
| 1610 | 
 | 
| 1611 |         raise AssertionError(typ0)
 | 
| 1612 | 
 | 
| 1613 |     def _RepeatOp(self, p_repeat):
 | 
| 1614 |         # type: (PNode) -> re_repeat_t
 | 
| 1615 |         """
 | 
| 1616 |         repeat_op: '+' | '*' | '?' 
 | 
| 1617 |                  | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
 | 
| 1618 |         """
 | 
| 1619 |         assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
 | 
| 1620 | 
 | 
| 1621 |         tok = p_repeat.GetChild(0).tok
 | 
| 1622 |         id_ = tok.id
 | 
| 1623 | 
 | 
| 1624 |         if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
 | 
| 1625 |             return tok  # a+  a*  a?
 | 
| 1626 | 
 | 
| 1627 |         if id_ == Id.Op_LBrace:
 | 
| 1628 |             child1 = p_repeat.GetChild(1)
 | 
| 1629 |             if child1.typ != grammar_nt.repeat_range:
 | 
| 1630 |                 # e.g. dot{N *} is .*?
 | 
| 1631 |                 p_die("Perl-style repetition isn't implemented with libc",
 | 
| 1632 |                       child1.tok)
 | 
| 1633 | 
 | 
| 1634 |             # repeat_range: (
 | 
| 1635 |             #     Expr_DecInt [',']
 | 
| 1636 |             #   | ',' Expr_DecInt
 | 
| 1637 |             #   | Expr_DecInt ',' Expr_DecInt
 | 
| 1638 |             # )
 | 
| 1639 | 
 | 
| 1640 |             n = child1.NumChildren()
 | 
| 1641 |             if n == 1:  # {3}
 | 
| 1642 |                 tok = child1.GetChild(0).tok
 | 
| 1643 |                 return tok  # different operator than + * ?
 | 
| 1644 | 
 | 
| 1645 |             if n == 2:
 | 
| 1646 |                 if child1.GetChild(0).typ == Id.Expr_DecInt:  # {,3}
 | 
| 1647 |                     left = child1.GetChild(0).tok
 | 
| 1648 |                     return re_repeat.Range(left, lexer.TokenVal(left), '',
 | 
| 1649 |                                            None)
 | 
| 1650 |                 else:  # {1,}
 | 
| 1651 |                     right = child1.GetChild(1).tok
 | 
| 1652 |                     return re_repeat.Range(None, '', lexer.TokenVal(right),
 | 
| 1653 |                                            right)
 | 
| 1654 | 
 | 
| 1655 |             if n == 3:  # {1,3}
 | 
| 1656 |                 left = child1.GetChild(0).tok
 | 
| 1657 |                 right = child1.GetChild(2).tok
 | 
| 1658 |                 return re_repeat.Range(left, lexer.TokenVal(left),
 | 
| 1659 |                                        lexer.TokenVal(right), right)
 | 
| 1660 | 
 | 
| 1661 |             raise AssertionError(n)
 | 
| 1662 | 
 | 
| 1663 |         raise AssertionError(id_)
 | 
| 1664 | 
 | 
| 1665 |     def _ReAlt(self, p_node):
 | 
| 1666 |         # type: (PNode) -> re_t
 | 
| 1667 |         """
 | 
| 1668 |         re_alt: (re_atom [repeat_op])+
 | 
| 1669 |         """
 | 
| 1670 |         assert p_node.typ == grammar_nt.re_alt
 | 
| 1671 | 
 | 
| 1672 |         i = 0
 | 
| 1673 |         n = p_node.NumChildren()
 | 
| 1674 |         seq = []  # type: List[re_t]
 | 
| 1675 |         while i < n:
 | 
| 1676 |             r = self._ReAtom(p_node.GetChild(i))
 | 
| 1677 |             i += 1
 | 
| 1678 |             if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
 | 
| 1679 |                 repeat_op = self._RepeatOp(p_node.GetChild(i))
 | 
| 1680 |                 r = re.Repeat(r, repeat_op)
 | 
| 1681 |                 i += 1
 | 
| 1682 |             seq.append(r)
 | 
| 1683 | 
 | 
| 1684 |         if len(seq) == 1:
 | 
| 1685 |             return seq[0]
 | 
| 1686 |         else:
 | 
| 1687 |             return re.Seq(seq)
 | 
| 1688 | 
 | 
| 1689 |     def _Regex(self, p_node):
 | 
| 1690 |         # type: (PNode) -> re_t
 | 
| 1691 |         """
 | 
| 1692 |         regex: [re_alt] (('|'|'or') re_alt)*
 | 
| 1693 |         """
 | 
| 1694 |         assert p_node.typ == grammar_nt.regex
 | 
| 1695 | 
 | 
| 1696 |         n = p_node.NumChildren()
 | 
| 1697 |         alts = []  # type: List[re_t]
 | 
| 1698 |         for i in xrange(0, n, 2):  # was children[::2]
 | 
| 1699 |             c = p_node.GetChild(i)
 | 
| 1700 |             alts.append(self._ReAlt(c))
 | 
| 1701 | 
 | 
| 1702 |         if len(alts) == 1:
 | 
| 1703 |             return alts[0]
 | 
| 1704 |         else:
 | 
| 1705 |             return re.Alt(alts)
 | 
| 1706 | 
 | 
| 1707 | 
 | 
| 1708 | # vim: sw=4
 |