OILS / ysh / expr_to_ast.py View on Github | oilshell.org

1702 lines, 1021 significant
1"""expr_to_ast.py."""
2from __future__ import print_function
3
4from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
5from _devbuild.gen.syntax_asdl import (
6 Token,
7 SimpleVarSub,
8 loc,
9 loc_t,
10 DoubleQuoted,
11 SingleQuoted,
12 BracedVarSub,
13 CommandSub,
14 ShArrayLiteral,
15 command,
16 expr,
17 expr_e,
18 expr_t,
19 expr_context_e,
20 re,
21 re_t,
22 re_repeat,
23 re_repeat_t,
24 class_literal_term,
25 class_literal_term_t,
26 PosixClass,
27 PerlClass,
28 NameType,
29 y_lhs_t,
30 Comprehension,
31 Subscript,
32 Attribute,
33 proc_sig,
34 proc_sig_t,
35 Param,
36 RestParam,
37 ParamGroup,
38 NamedArg,
39 ArgList,
40 pat,
41 pat_t,
42 TypeExpr,
43 Func,
44 Eggex,
45 EggexFlag,
46 CharCode,
47 CharRange,
48)
49from _devbuild.gen.value_asdl import value, value_t
50from _devbuild.gen import grammar_nt
51from core.error import p_die
52from core import num
53from data_lang import j8
54from frontend import consts
55from frontend import lexer
56from frontend import location
57from mycpp import mops
58from mycpp import mylib
59from mycpp.mylib import log, tagswitch
60from osh import word_compile
61from ysh import expr_parse
62from ysh import regex_translate
63
64from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
65if TYPE_CHECKING:
66 from pgen2.grammar import Grammar
67 from pgen2.pnode import PNode
68
69_ = log
70
71PERL_CLASSES = {
72 'd': 'd',
73 'w': 'w',
74 'word': 'w',
75 's': 's',
76}
77# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
78POSIX_CLASSES = [
79 'alnum',
80 'cntrl',
81 'lower',
82 'space',
83 'alpha',
84 'digit',
85 'print',
86 'upper',
87 'blank',
88 'graph',
89 'punct',
90 'xdigit',
91]
92# NOTE: There are also things like \p{Greek} that we could put in the
93# "non-sigil" namespace.
94
95RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
96
97POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
98
99# Copied from pgen2/token.py to avoid dependency.
100NT_OFFSET = 256
101
102if mylib.PYTHON:
103
104 def MakeGrammarNames(ysh_grammar):
105 # type: (Grammar) -> Dict[int, str]
106
107 # TODO: Break this dependency
108 from frontend import lexer_def
109
110 names = {}
111
112 for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
113 # Hm some are out of range
114 #assert k < 256, (k, id_name)
115
116 # TODO: Some tokens have values greater than NT_OFFSET
117 if k < NT_OFFSET:
118 names[k] = id_name
119
120 for k, v in ysh_grammar.number2symbol.items():
121 assert k >= NT_OFFSET, (k, v)
122 names[k] = v
123
124 return names
125
126
127class Transformer(object):
128 """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
129
130 pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
131 so this "transformer" is the equivalent.
132
133 Files to refer to when modifying this function:
134
135 ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
136 frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
137
138 Related examples:
139
140 opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
141 Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
142
143 Other:
144 frontend/parse_lib.py (turn on print_parse_tree)
145
146 Public methods:
147 Expr, VarDecl
148 atom, trailer, etc. are private, named after productions in grammar.pgen2.
149 """
150
151 def __init__(self, gr):
152 # type: (Grammar) -> None
153 self.number2symbol = gr.number2symbol
154 if mylib.PYTHON:
155 names = MakeGrammarNames(gr)
156 # print raw nodes
157 self.p_printer = expr_parse.ParseTreePrinter(names)
158
159 def _LeftAssoc(self, p_node):
160 # type: (PNode) -> expr_t
161 """For an associative binary operation.
162
163 Examples:
164 xor_expr: and_expr ('xor' and_expr)*
165 term: factor (('*'|'/'|'%'|'div') factor)*
166
167 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
168 """
169 # Note: Compare the iteractive com_binary() method in
170 # opy/compiler2/transformer.py.
171
172 # Examples:
173 # - The PNode for '3 - 1' will have 3 children
174 # - The PNode for '3 - 1 - 2' will have 5 children
175
176 #self.p_printer.Print(p_node)
177
178 i = 1 # index of the operator
179 n = p_node.NumChildren()
180
181 left = self.Expr(p_node.GetChild(0))
182 while i < n:
183 op = p_node.GetChild(i)
184 right = self.Expr(p_node.GetChild(i + 1))
185
186 # create a new left node
187 left = expr.Binary(op.tok, left, right)
188 i += 2
189
190 return left
191
192 def _Trailer(self, base, p_trailer):
193 # type: (expr_t, PNode) -> expr_t
194 """
195 trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
196 | '.' NAME | '->' NAME | '::' NAME
197 )
198 """
199 tok0 = p_trailer.GetChild(0).tok
200 typ0 = p_trailer.GetChild(0).typ
201
202 if typ0 == Id.Op_LParen:
203 lparen = tok0
204 rparen = p_trailer.GetChild(-1).tok
205 arglist = ArgList(lparen, [], None, [], None, None, rparen)
206 if p_trailer.NumChildren() == 2: # ()
207 return expr.FuncCall(base, arglist)
208
209 p = p_trailer.GetChild(1) # the X in ( X )
210 assert p.typ == grammar_nt.arglist # f(x, y)
211 self._ArgList(p, arglist)
212 return expr.FuncCall(base, arglist)
213
214 if typ0 == Id.Op_LBracket:
215 p_args = p_trailer.GetChild(1)
216 assert p_args.typ == grammar_nt.subscriptlist
217 n = p_args.NumChildren()
218 if n > 1:
219 p_die("Only 1 subscript is accepted", p_args.GetChild(1).tok)
220
221 a = p_args.GetChild(0)
222 return Subscript(tok0, base, self._Subscript(a))
223
224 if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
225 attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
226 return Attribute(base, tok0, attr, lexer.TokenVal(attr),
227 expr_context_e.Store)
228
229 raise AssertionError(typ0)
230
231 def _DictPair(self, p_node):
232 # type: (PNode) -> Tuple[expr_t, expr_t]
233 """
234 dict_pair: ( Expr_Name [':' test]
235 | '[' testlist ']' ':' test )
236 | sq_string ':' test
237 | dq_string ':' test )
238 """
239 assert p_node.typ == grammar_nt.dict_pair
240
241 typ = p_node.GetChild(0).typ
242
243 if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
244 key = self.Expr(p_node.GetChild(0)) # type: expr_t
245 val = self.Expr(p_node.GetChild(2))
246 return key, val
247
248 tok0 = p_node.GetChild(0).tok
249 id_ = tok0.id
250
251 if id_ == Id.Expr_Name:
252 key_str = value.Str(lexer.TokenVal(tok0))
253 key = expr.Const(tok0, key_str)
254 if p_node.NumChildren() >= 3:
255 val = self.Expr(p_node.GetChild(2))
256 else:
257 val = expr.Implicit
258
259 if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
260 key = self.Expr(p_node.GetChild(1))
261 val = self.Expr(p_node.GetChild(4))
262 return key, val
263
264 return key, val
265
266 def _Dict(self, parent, p_node):
267 # type: (PNode, PNode) -> expr.Dict
268 """
269 dict: dict_pair (comma_newline dict_pair)* [comma_newline]
270 """
271 if p_node.typ == Id.Op_RBrace: # {}
272 return expr.Dict(parent.tok, [], [])
273
274 assert p_node.typ == grammar_nt.dict
275
276 keys = [] # type: List[expr_t]
277 values = [] # type: List[expr_t]
278
279 n = p_node.NumChildren()
280 for i in xrange(0, n, 2):
281 key, val = self._DictPair(p_node.GetChild(i))
282 keys.append(key)
283 values.append(val)
284
285 return expr.Dict(parent.tok, keys, values)
286
287 def _Tuple(self, parent):
288 # type: (PNode) -> expr_t
289
290 n = parent.NumChildren()
291
292 # (x) -- not a tuple
293 if n == 1:
294 return self.Expr(parent.GetChild(0))
295
296 # x, and (x,) aren't allowed
297 if n == 2:
298 p_die('Invalid trailing comma', parent.GetChild(1).tok)
299
300 elts = [] # type: List[expr_t]
301 for i in xrange(0, n, 2): # skip commas
302 p_node = parent.GetChild(i)
303 elts.append(self.Expr(p_node))
304
305 return expr.Tuple(parent.tok, elts,
306 expr_context_e.Store) # unused expr_context_e
307
308 def _TestlistComp(self, parent, p_node, id0):
309 # type: (PNode, PNode, Id_t) -> expr_t
310 """
311 testlist_comp:
312 (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
313 """
314 assert p_node.typ == grammar_nt.testlist_comp
315
316 n = p_node.NumChildren()
317 if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
318 elt = self.Expr(p_node.GetChild(0))
319 comp = self._CompFor(p_node.GetChild(1))
320 if id0 == Id.Op_LParen: # (x+1 for x in y)
321 return expr.GeneratorExp(elt, [comp])
322 if id0 == Id.Op_LBracket: # [x+1 for x in y]
323 return expr.ListComp(parent.tok, elt, [comp])
324 raise AssertionError()
325
326 if id0 == Id.Op_LParen:
327 # Parenthesized expression like (x+1) or (x)
328 if n == 1:
329 return self.Expr(p_node.GetChild(0))
330
331 # Tuples (1,) (1, 2) etc. - TODO: should be a list literal?
332 if p_node.GetChild(1).typ == Id.Arith_Comma:
333 return self._Tuple(p_node)
334
335 raise AssertionError()
336
337 if id0 == Id.Op_LBracket: # List [1,2,3]
338 elts = [] # type: List[expr_t]
339 for i in xrange(0, n, 2): # skip commas
340 elts.append(self.Expr(p_node.GetChild(i)))
341
342 return expr.List(parent.tok, elts,
343 expr_context_e.Store) # unused expr_context_e
344
345 raise AssertionError(Id_str(id0))
346
347 def _Atom(self, parent):
348 # type: (PNode) -> expr_t
349 """Handle alternatives of 'atom' where there's more than one child."""
350
351 tok = parent.GetChild(0).tok
352 id_ = tok.id
353 n = parent.NumChildren()
354
355 if id_ == Id.Op_LParen:
356 # atom: '(' [yield_expr|testlist_comp] ')' | ...
357 if n == 2: # () is a tuple
358 assert (
359 parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
360 return expr.Tuple(tok, [], expr_context_e.Store)
361
362 return self._TestlistComp(parent, parent.GetChild(1), id_)
363
364 if id_ == Id.Op_LBracket:
365 # atom: ... | '[' [testlist_comp] ']' | ...
366
367 if n == 2: # []
368 assert (parent.GetChild(1).typ == Id.Op_RBracket
369 ), parent.GetChild(1)
370 return expr.List(tok, [],
371 expr_context_e.Store) # unused expr_context_e
372
373 return self._TestlistComp(parent, parent.GetChild(1), id_)
374
375 if id_ == Id.Left_CaretBracket: # ^[42 + x]
376 child = self.Expr(parent.GetChild(1))
377 return expr.Literal(child)
378
379 if id_ == Id.Op_LBrace:
380 # atom: ... | '{' [Op_Newline] [dict] '}'
381 i = 1
382 if parent.GetChild(i).typ == Id.Op_Newline:
383 i += 1
384 return self._Dict(parent, parent.GetChild(i))
385
386 if id_ == Id.Arith_Amp:
387 n = parent.NumChildren()
388 if n >= 3:
389 p_die("Places in containers not implemented yet",
390 parent.GetChild(2).tok)
391
392 name_tok = parent.GetChild(1).tok
393 return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
394
395 if id_ == Id.Expr_Func:
396 # STUB. This should really be a Func, not Lambda.
397 return expr.Lambda([], expr.Implicit)
398
399 # 100 M
400 # Ignoring the suffix for now
401 if id_ == Id.Expr_DecInt:
402 assert n > 1
403 p_die("Units suffix not implemented", parent.GetChild(1).tok)
404 #return self.Expr(parent.GetChild(0))
405
406 # 100.5 M
407 # Ignoring the suffix for now
408 if id_ == Id.Expr_Float:
409 assert n > 1
410 p_die("unix suffix implemented", parent.GetChild(1).tok)
411 #return self.Expr(parent.GetChild(0))
412
413 raise AssertionError(Id_str(id_))
414
415 def _NameType(self, p_node):
416 # type: (PNode) -> NameType
417 """ name_type: Expr_Name [':'] [type_expr] """
418 name_tok = p_node.GetChild(0).tok
419 typ = None # type: Optional[TypeExpr]
420
421 n = p_node.NumChildren()
422 if n == 2:
423 typ = self._TypeExpr(p_node.GetChild(1))
424 if n == 3:
425 typ = self._TypeExpr(p_node.GetChild(2))
426
427 return NameType(name_tok, lexer.TokenVal(name_tok), typ)
428
429 def _NameTypeList(self, p_node):
430 # type: (PNode) -> List[NameType]
431 """ name_type_list: name_type (',' name_type)* """
432 assert p_node.typ == grammar_nt.name_type_list
433 results = [] # type: List[NameType]
434
435 n = p_node.NumChildren()
436 for i in xrange(0, n, 2): # was children[::2]
437 results.append(self._NameType(p_node.GetChild(i)))
438 return results
439
440 def _CompFor(self, p_node):
441 # type: (PNode) -> Comprehension
442 """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
443 lhs = self._NameTypeList(p_node.GetChild(1))
444 iterable = self.Expr(p_node.GetChild(3))
445
446 if p_node.NumChildren() >= 6:
447 cond = self.Expr(p_node.GetChild(5))
448 else:
449 cond = None
450
451 return Comprehension(lhs, iterable, cond)
452
453 def _CompareChain(self, parent):
454 # type: (PNode) -> expr_t
455 """comparison: expr (comp_op expr)*"""
456 cmp_ops = [] # type: List[Token]
457 comparators = [] # type: List[expr_t]
458 left = self.Expr(parent.GetChild(0))
459
460 i = 1
461 n = parent.NumChildren()
462 while i < n:
463 p = parent.GetChild(i)
464 op = p.GetChild(0).tok
465 if p.NumChildren() == 2:
466 # Blame the first token, and change its type
467 if op.id == Id.Expr_Not: # not in
468 op.id = Id.Node_NotIn
469 elif op.id == Id.Expr_Is: # is not
470 op.id = Id.Node_IsNot
471 else:
472 raise AssertionError()
473 else:
474 # is, <, ==, etc.
475 pass
476
477 cmp_ops.append(op)
478 i += 1
479 comparators.append(self.Expr(parent.GetChild(i)))
480 i += 1
481 return expr.Compare(left, cmp_ops, comparators)
482
483 def _Subscript(self, parent):
484 # type: (PNode) -> expr_t
485 """subscript: expr | [expr] ':' [expr]"""
486 typ0 = parent.GetChild(0).typ
487
488 n = parent.NumChildren()
489
490 if typ0 == grammar_nt.expr:
491 if n == 3: # a[1:2]
492 lower = self.Expr(parent.GetChild(0))
493 upper = self.Expr(parent.GetChild(2))
494 elif n == 2: # a[1:]
495 lower = self.Expr(parent.GetChild(0))
496 upper = None
497 else: # a[1]
498 return self.Expr(parent.GetChild(0))
499 else:
500 assert typ0 == Id.Arith_Colon
501 lower = None
502 if n == 1: # a[:]
503 upper = None
504 else: # a[:3]
505 upper = self.Expr(parent.GetChild(1))
506
507 return expr.Slice(lower, parent.GetChild(0).tok, upper)
508
509 def Expr(self, pnode):
510 # type: (PNode) -> expr_t
511 """Transform expressions (as opposed to statements)"""
512 typ = pnode.typ
513
514 #
515 # YSH Entry Points / Additions
516 #
517
518 if typ == grammar_nt.ysh_expr: # for if/while
519 # ysh_expr: '(' testlist ')'
520 return self.Expr(pnode.GetChild(1))
521
522 if typ == grammar_nt.command_expr:
523 # return_expr: testlist end_stmt
524 return self.Expr(pnode.GetChild(0))
525
526 #
527 # Python-like Expressions / Operators
528 #
529
530 if typ == grammar_nt.atom:
531 if pnode.NumChildren() == 1:
532 return self.Expr(pnode.GetChild(0))
533 return self._Atom(pnode)
534
535 if typ == grammar_nt.testlist:
536 # testlist: test (',' test)* [',']
537 return self._Tuple(pnode)
538
539 if typ == grammar_nt.test:
540 # test: or_test ['if' or_test 'else' test] | lambdef
541 if pnode.NumChildren() == 1:
542 return self.Expr(pnode.GetChild(0))
543
544 # TODO: Handle lambdef
545
546 test = self.Expr(pnode.GetChild(2))
547 body = self.Expr(pnode.GetChild(0))
548 orelse = self.Expr(pnode.GetChild(4))
549 return expr.IfExp(test, body, orelse)
550
551 if typ == grammar_nt.lambdef:
552 # lambdef: '|' [name_type_list] '|' test
553
554 n = pnode.NumChildren()
555 if n == 4:
556 params = self._NameTypeList(pnode.GetChild(1))
557 else:
558 params = []
559
560 body = self.Expr(pnode.GetChild(n - 1))
561 return expr.Lambda(params, body)
562
563 #
564 # Operators with Precedence
565 #
566
567 if typ == grammar_nt.or_test:
568 # or_test: and_test ('or' and_test)*
569 return self._LeftAssoc(pnode)
570
571 if typ == grammar_nt.and_test:
572 # and_test: not_test ('and' not_test)*
573 return self._LeftAssoc(pnode)
574
575 if typ == grammar_nt.not_test:
576 # not_test: 'not' not_test | comparison
577 if pnode.NumChildren() == 1:
578 return self.Expr(pnode.GetChild(0))
579
580 op_tok = pnode.GetChild(0).tok # not
581 return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
582
583 elif typ == grammar_nt.comparison:
584 if pnode.NumChildren() == 1:
585 return self.Expr(pnode.GetChild(0))
586
587 return self._CompareChain(pnode)
588
589 elif typ == grammar_nt.range_expr:
590 n = pnode.NumChildren()
591 if n == 1:
592 return self.Expr(pnode.GetChild(0))
593
594 if n == 3:
595 return expr.Range(self.Expr(pnode.GetChild(0)),
596 pnode.GetChild(1).tok,
597 self.Expr(pnode.GetChild(2)))
598
599 raise AssertionError(n)
600
601 elif typ == grammar_nt.expr:
602 # expr: xor_expr ('|' xor_expr)*
603 return self._LeftAssoc(pnode)
604
605 if typ == grammar_nt.xor_expr:
606 # xor_expr: and_expr ('xor' and_expr)*
607 return self._LeftAssoc(pnode)
608
609 if typ == grammar_nt.and_expr: # a & b
610 # and_expr: shift_expr ('&' shift_expr)*
611 return self._LeftAssoc(pnode)
612
613 elif typ == grammar_nt.shift_expr:
614 # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
615 return self._LeftAssoc(pnode)
616
617 elif typ == grammar_nt.arith_expr:
618 # arith_expr: term (('+'|'-') term)*
619 return self._LeftAssoc(pnode)
620
621 elif typ == grammar_nt.term:
622 # term: factor (('*'|'/'|'div'|'mod') factor)*
623 return self._LeftAssoc(pnode)
624
625 elif typ == grammar_nt.factor:
626 # factor: ('+'|'-'|'~') factor | power
627 # the power would have already been reduced
628 if pnode.NumChildren() == 1:
629 return self.Expr(pnode.GetChild(0))
630
631 assert pnode.NumChildren() == 2
632 op = pnode.GetChild(0)
633 e = pnode.GetChild(1)
634
635 assert isinstance(op.tok, Token)
636 return expr.Unary(op.tok, self.Expr(e))
637
638 elif typ == grammar_nt.power:
639 # power: atom trailer* ['**' factor]
640
641 node = self.Expr(pnode.GetChild(0))
642 if pnode.NumChildren() == 1: # No trailers
643 return node
644
645 # Support a->startswith(b) and mydict.key
646 n = pnode.NumChildren()
647 i = 1
648 while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
649 node = self._Trailer(node, pnode.GetChild(i))
650 i += 1
651
652 if i != n: # ['**' factor]
653 op_tok = pnode.GetChild(i).tok
654 assert op_tok.id == Id.Arith_DStar, op_tok
655 factor = self.Expr(pnode.GetChild(i + 1))
656 node = expr.Binary(op_tok, node, factor)
657
658 return node
659
660 elif typ == grammar_nt.eggex:
661 return self._Eggex(pnode)
662
663 elif typ == grammar_nt.ysh_expr_sub:
664 return self.Expr(pnode.GetChild(0))
665
666 #
667 # YSH Lexer Modes
668 #
669
670 elif typ == grammar_nt.sh_array_literal:
671 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
672
673 elif typ == grammar_nt.old_sh_array_literal:
674 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
675
676 elif typ == grammar_nt.sh_command_sub:
677 return cast(CommandSub, pnode.GetChild(1).tok)
678
679 elif typ == grammar_nt.braced_var_sub:
680 return cast(BracedVarSub, pnode.GetChild(1).tok)
681
682 elif typ == grammar_nt.dq_string:
683 s = cast(DoubleQuoted, pnode.GetChild(1).tok)
684 # sugar: ^"..." is short for ^["..."]
685 if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
686 return expr.Literal(s)
687 return s
688
689 elif typ == grammar_nt.sq_string:
690 return cast(SingleQuoted, pnode.GetChild(1).tok)
691
692 elif typ == grammar_nt.simple_var_sub:
693 tok = pnode.GetChild(0).tok
694
695 if tok.id == Id.VSub_DollarName: # $foo is disallowed
696 bare = lexer.TokenSliceLeft(tok, 1)
697 p_die(
698 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
699 % (bare, bare), tok)
700
701 # $? is allowed
702 return SimpleVarSub(tok)
703
704 #
705 # Terminals
706 #
707
708 tok = pnode.tok
709 if typ == Id.Expr_Name:
710 return expr.Var(tok, lexer.TokenVal(tok))
711
712 # Everything else is an expr.Const
713 tok_str = lexer.TokenVal(tok)
714 # Remove underscores from 1_000_000. The lexer is responsible for
715 # validation.
716 c_under = tok_str.replace('_', '')
717
718 if typ == Id.Expr_DecInt:
719 try:
720 cval = value.Int(mops.FromStr(c_under)) # type: value_t
721 except ValueError:
722 p_die('Decimal int constant is too large', tok)
723
724 elif typ == Id.Expr_BinInt:
725 assert c_under[:2] in ('0b', '0B'), c_under
726 try:
727 cval = value.Int(mops.FromStr(c_under[2:], 2))
728 except ValueError:
729 p_die('Binary int constant is too large', tok)
730
731 elif typ == Id.Expr_OctInt:
732 assert c_under[:2] in ('0o', '0O'), c_under
733 try:
734 cval = value.Int(mops.FromStr(c_under[2:], 8))
735 except ValueError:
736 p_die('Octal int constant is too large', tok)
737
738 elif typ == Id.Expr_HexInt:
739 assert c_under[:2] in ('0x', '0X'), c_under
740 try:
741 cval = value.Int(mops.FromStr(c_under[2:], 16))
742 except ValueError:
743 p_die('Hex int constant is too large', tok)
744
745 elif typ == Id.Expr_Float:
746 # Note: float() in mycpp/gc_builtins.cc currently uses strtod
747 # I think this never raises ValueError, because the lexer
748 # should only accept strings that strtod() does?
749 cval = value.Float(float(c_under))
750
751 elif typ == Id.Expr_Null:
752 cval = value.Null
753
754 elif typ == Id.Expr_True:
755 cval = value.Bool(True)
756
757 elif typ == Id.Expr_False:
758 cval = value.Bool(False)
759
760 # What to do with the char constants?
761 # \n \u{3bc} #'a'
762 # Are they integers or strings?
763 #
764 # Integers could be ord(\n), or strings could chr(\n)
765 # Or just remove them, with ord(u'\n') and chr(u'\n')
766 #
767 # I think this relies on small string optimization. If we have it,
768 # then 1-4 byte characters are efficient, and don't require heap
769 # allocation.
770
771 elif typ == Id.Char_OneChar:
772 cval = value.Str(consts.LookupCharC(tok_str[1]))
773
774 elif typ == Id.Char_UBraced:
775 hex_str = tok_str[3:-1] # \u{123}
776 code_point = int(hex_str, 16)
777 cval = value.Str(j8.Utf8Encode(code_point))
778 else:
779 raise AssertionError(typ)
780
781 return expr.Const(tok, cval)
782
783 def _CheckLhs(self, lhs):
784 # type: (expr_t) -> None
785
786 UP_lhs = lhs
787 with tagswitch(lhs) as case:
788 if case(expr_e.Var):
789 # OK - e.g. setvar a.b.c[i] = 42
790 pass
791
792 elif case(expr_e.Subscript):
793 lhs = cast(Subscript, UP_lhs)
794 self._CheckLhs(lhs.obj) # recurse on LHS
795
796 elif case(expr_e.Attribute):
797 lhs = cast(Attribute, UP_lhs)
798 self._CheckLhs(lhs.obj) # recurse on LHS
799
800 else:
801 # Illegal - e.g. setglobal {}["key"] = 42
802 p_die("Subscript/Attribute not allowed on this LHS expression",
803 location.TokenForExpr(lhs))
804
805 def _LhsExprList(self, p_node):
806 # type: (PNode) -> List[y_lhs_t]
807 """lhs_list: expr (',' expr)*"""
808 assert p_node.typ == grammar_nt.lhs_list
809
810 lhs_list = [] # type: List[y_lhs_t]
811 n = p_node.NumChildren()
812 for i in xrange(0, n, 2):
813 p = p_node.GetChild(i)
814 #self.p_printer.Print(p)
815
816 e = self.Expr(p)
817 UP_e = e
818 with tagswitch(e) as case:
819 if case(expr_e.Var):
820 e = cast(expr.Var, UP_e)
821 lhs_list.append(e.left)
822
823 elif case(expr_e.Subscript):
824 e = cast(Subscript, UP_e)
825 self._CheckLhs(e)
826 lhs_list.append(e)
827
828 elif case(expr_e.Attribute):
829 e = cast(Attribute, UP_e)
830 self._CheckLhs(e)
831 if e.op.id != Id.Expr_Dot:
832 # e.g. setvar obj->method is not valid
833 p_die("Can't assign to this attribute expr", e.op)
834 lhs_list.append(e)
835
836 else:
837 pass # work around mycpp bug
838
839 # TODO: could blame arbitary expr_t, bu this works most of
840 # the time
841 if p.tok:
842 blame = p.tok # type: loc_t
843 else:
844 blame = loc.Missing
845 p_die("Can't assign to this expression", blame)
846
847 return lhs_list
848
849 def MakeVarDecl(self, p_node):
850 # type: (PNode) -> command.VarDecl
851 """
852 ysh_var_decl: name_type_list ['=' testlist] end_stmt
853 """
854 assert p_node.typ == grammar_nt.ysh_var_decl
855
856 lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
857
858 # This syntax is confusing, and different than JavaScript
859 # var x, y = 1, 2
860 # But this is useful:
861 # var flag, i = parseArgs(spec, argv)
862
863 n = p_node.NumChildren()
864 if n >= 3:
865 rhs = self.Expr(p_node.GetChild(2))
866 else:
867 rhs = None
868
869 # The caller should fill in the keyword token.
870 return command.VarDecl(None, lhs, rhs)
871
872 def MakeMutation(self, p_node):
873 # type: (PNode) -> command.Mutation
874 """
875 ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
876 """
877 typ = p_node.typ
878 assert typ == grammar_nt.ysh_mutation
879
880 lhs_list = self._LhsExprList(p_node.GetChild(0)) # could be a tuple
881 op_tok = p_node.GetChild(1).tok
882 if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
883 p_die('Multiple assignment must use =', op_tok)
884 rhs = self.Expr(p_node.GetChild(2))
885 return command.Mutation(None, lhs_list, op_tok, rhs)
886
887 def _EggexFlag(self, p_node):
888 # type: (PNode) -> EggexFlag
889 n = p_node.NumChildren()
890 if n == 1:
891 return EggexFlag(False, p_node.GetChild(0).tok)
892 elif n == 2:
893 return EggexFlag(True, p_node.GetChild(1).tok)
894 else:
895 raise AssertionError()
896
897 def _Eggex(self, p_node):
898 # type: (PNode) -> Eggex
899 """
900 eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
901 """
902 left = p_node.GetChild(0).tok
903 regex = self._Regex(p_node.GetChild(1))
904
905 flags = [] # type: List[EggexFlag]
906 trans_pref = None # type: Optional[Token]
907
908 i = 2
909 current = p_node.GetChild(i)
910 if current.typ == Id.Op_Semi:
911 i += 1
912 while True:
913 current = p_node.GetChild(i)
914 if current.typ != grammar_nt.re_flag:
915 break
916 flags.append(self._EggexFlag(current))
917 i += 1
918
919 if current.typ == Id.Op_Semi:
920 i += 1
921 trans_pref = p_node.GetChild(i).tok
922
923 # Canonicalize and validate flags for ERE only. Default is ERE.
924 if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
925 canonical_flags = regex_translate.CanonicalFlags(flags)
926 else:
927 canonical_flags = None
928
929 return Eggex(left, regex, flags, trans_pref, canonical_flags)
930
931 def YshCasePattern(self, pnode):
932 # type: (PNode) -> pat_t
933 assert pnode.typ == grammar_nt.ysh_case_pat, pnode
934
935 pattern = pnode.GetChild(0)
936 typ = pattern.typ
937 if typ == Id.Op_LParen:
938 # pat_expr or pat_else
939 pattern = pnode.GetChild(1)
940 typ = pattern.typ
941
942 if typ == grammar_nt.pat_else:
943 return pat.Else
944
945 if typ == grammar_nt.pat_exprs:
946 exprs = [] # type: List[expr_t]
947 for i in xrange(pattern.NumChildren()):
948 child = pattern.GetChild(i)
949 if child.typ == grammar_nt.expr:
950 expr = self.Expr(child)
951 exprs.append(expr)
952 return pat.YshExprs(exprs)
953
954 if typ == grammar_nt.eggex:
955 return self._Eggex(pattern)
956
957 raise AssertionError()
958
959 def _BlockArg(self, p_node):
960 # type: (PNode) -> expr_t
961
962 n = p_node.NumChildren()
963 if n == 1:
964 child = p_node.GetChild(0)
965 return self.Expr(child)
966
967 # It can only be an expression, not a=42, or ...expr
968 p_die('Invalid block expression argument', p_node.tok)
969
970 def _Argument(self, p_node, after_semi, arglist):
971 # type: (PNode, bool, ArgList) -> None
972 """
973 argument: (
974 test [comp_for]
975 | test '=' test # named arg
976 | '...' test # var args
977 )
978 """
979 pos_args = arglist.pos_args
980 named_args = arglist.named_args
981
982 assert p_node.typ == grammar_nt.argument, p_node
983 n = p_node.NumChildren()
984 if n == 1:
985 child = p_node.GetChild(0)
986 if after_semi:
987 p_die(POS_ARG_MISPLACED, child.tok)
988 arg = self.Expr(child)
989 pos_args.append(arg)
990 return
991
992 if n == 2:
993 # Note: We allow multiple spreads, just like Julia. They are
994 # concatenated as in lists and dicts.
995 tok0 = p_node.GetChild(0).tok
996 if tok0.id == Id.Expr_Ellipsis:
997 spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
998 if after_semi: # f(; ... named)
999 named_args.append(NamedArg(None, spread_expr))
1000 else: # f(...named)
1001 pos_args.append(spread_expr)
1002 return
1003
1004 # Note: generator expression not implemented
1005 if p_node.GetChild(1).typ == grammar_nt.comp_for:
1006 child = p_node.GetChild(0)
1007 if after_semi:
1008 p_die(POS_ARG_MISPLACED, child.tok)
1009
1010 elt = self.Expr(child)
1011 comp = self._CompFor(p_node.GetChild(1))
1012 arg = expr.GeneratorExp(elt, [comp])
1013 pos_args.append(arg)
1014 return
1015
1016 raise AssertionError()
1017
1018 if n == 3: # named args can come before or after the semicolon
1019 n1 = NamedArg(
1020 p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
1021 named_args.append(n1)
1022 return
1023
1024 raise AssertionError()
1025
1026 def _ArgGroup(self, p_node, after_semi, arglist):
1027 # type: (PNode, bool, ArgList) -> None
1028 """
1029 arg_group: argument (',' argument)* [',']
1030 """
1031 for i in xrange(p_node.NumChildren()):
1032 p_child = p_node.GetChild(i)
1033 if p_child.typ == grammar_nt.argument:
1034 self._Argument(p_child, after_semi, arglist)
1035
1036 def _ArgList(self, p_node, arglist):
1037 # type: (PNode, ArgList) -> None
1038 """For both funcs and procs
1039
1040 arglist: (
1041 [arg_group]
1042 [';' [arg_group]]
1043 )
1044
1045 arglist3: ...
1046 """
1047 n = p_node.NumChildren()
1048 if n == 0:
1049 return
1050
1051 i = 0
1052
1053 if i >= n:
1054 return
1055 child = p_node.GetChild(i)
1056 if child.typ == grammar_nt.arg_group:
1057 self._ArgGroup(child, False, arglist)
1058 i += 1
1059
1060 if i >= n:
1061 return
1062 child = p_node.GetChild(i)
1063 if child.typ == Id.Op_Semi:
1064 arglist.semi_tok = child.tok
1065 i += 1
1066
1067 # Named args after first semi-colon
1068 if i >= n:
1069 return
1070 child = p_node.GetChild(i)
1071 if child.typ == grammar_nt.arg_group:
1072 self._ArgGroup(child, True, arglist)
1073 i += 1
1074
1075 #
1076 # Special third group may have block expression - only for arglist3,
1077 # used for procs!
1078 #
1079
1080 if i >= n:
1081 return
1082 assert p_node.typ == grammar_nt.arglist3, p_node
1083
1084 child = p_node.GetChild(i)
1085 if child.typ == Id.Op_Semi:
1086 arglist.semi_tok2 = child.tok
1087 i += 1
1088
1089 if i >= n:
1090 return
1091 child = p_node.GetChild(i)
1092 if child.typ == grammar_nt.argument:
1093 arglist.block_expr = self._BlockArg(child)
1094 i += 1
1095
1096 def ProcCallArgs(self, pnode, arglist):
1097 # type: (PNode, ArgList) -> None
1098 """
1099 ysh_eager_arglist: '(' [arglist3] ')'
1100 ysh_lazy_arglist: '[' [arglist] ']'
1101 """
1102 n = pnode.NumChildren()
1103 if n == 2: # f()
1104 return
1105
1106 if n == 3:
1107 child1 = pnode.GetChild(1) # the X in '( X )'
1108
1109 self._ArgList(child1, arglist)
1110 return
1111
1112 raise AssertionError()
1113
1114 def _TypeExpr(self, pnode):
1115 # type: (PNode) -> TypeExpr
1116 """
1117 type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
1118 """
1119 assert pnode.typ == grammar_nt.type_expr, pnode.typ
1120
1121 ty = TypeExpr.CreateNull() # don't allocate children
1122
1123 ty.tok = pnode.GetChild(0).tok
1124 ty.name = lexer.TokenVal(ty.tok)
1125
1126 n = pnode.NumChildren()
1127 if n == 1:
1128 return ty
1129
1130 ty.params = []
1131 i = 2
1132 while i < n:
1133 p = self._TypeExpr(pnode.GetChild(i))
1134 ty.params.append(p)
1135 i += 2 # skip comma
1136
1137 return ty
1138
1139 def _Param(self, pnode):
1140 # type: (PNode) -> Param
1141 """
1142 param: Expr_Name [type_expr] ['=' expr]
1143 """
1144 assert pnode.typ == grammar_nt.param
1145
1146 name_tok = pnode.GetChild(0).tok
1147 n = pnode.NumChildren()
1148
1149 assert name_tok.id == Id.Expr_Name, name_tok
1150
1151 default_val = None # type: expr_t
1152 type_ = None # type: TypeExpr
1153
1154 if n == 1:
1155 # proc p(a)
1156 pass
1157
1158 elif n == 2:
1159 # proc p(a Int)
1160 type_ = self._TypeExpr(pnode.GetChild(1))
1161
1162 elif n == 3:
1163 # proc p(a = 3)
1164 default_val = self.Expr(pnode.GetChild(2))
1165
1166 elif n == 4:
1167 # proc p(a Int = 3)
1168 type_ = self._TypeExpr(pnode.GetChild(1))
1169 default_val = self.Expr(pnode.GetChild(3))
1170
1171 return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
1172
1173 def _ParamGroup(self, p_node):
1174 # type: (PNode) -> ParamGroup
1175 """
1176 param_group:
1177 (param ',')*
1178 [ (param | '...' Expr_Name) [,] ]
1179 """
1180 assert p_node.typ == grammar_nt.param_group, p_node
1181
1182 params = [] # type: List[Param]
1183 rest_of = None # type: Optional[RestParam]
1184
1185 n = p_node.NumChildren()
1186 i = 0
1187 while i < n:
1188 child = p_node.GetChild(i)
1189 if child.typ == grammar_nt.param:
1190 params.append(self._Param(child))
1191
1192 elif child.typ == Id.Expr_Ellipsis:
1193 tok = p_node.GetChild(i + 1).tok
1194 rest_of = RestParam(tok, lexer.TokenVal(tok))
1195
1196 i += 2
1197
1198 return ParamGroup(params, rest_of)
1199
1200 def Proc(self, p_node):
1201 # type: (PNode) -> proc_sig_t
1202 """
1203 ysh_proc: (
1204 [ '('
1205 [ param_group ] # word params, with defaults
1206 [ ';' [ param_group ] ] # positional typed params, with defaults
1207 [ ';' [ param_group ] ] # named params, with defaults
1208 [ ';' Expr_Name ] # optional block param, with no type or default
1209 ')'
1210 ]
1211 '{' # opening { for pgen2
1212 )
1213 """
1214 typ = p_node.typ
1215 assert typ == grammar_nt.ysh_proc
1216
1217 n = p_node.NumChildren()
1218 if n == 1: # proc f {
1219 return proc_sig.Open
1220
1221 if n == 3: # proc f () {
1222 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1223
1224 # proc f( three param groups, and block group )
1225 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1226
1227 # Word args
1228 i = 1
1229 child = p_node.GetChild(i)
1230 if child.typ == grammar_nt.param_group:
1231 sig.word = self._ParamGroup(p_node.GetChild(i))
1232
1233 # Validate word args
1234 for word in sig.word.params:
1235 if word.type:
1236 if word.type.name not in ('Str', 'Ref'):
1237 p_die('Word params may only have type Str or Ref',
1238 word.type.tok)
1239 if word.type.params is not None:
1240 p_die('Unexpected type parameters', word.type.tok)
1241
1242 i += 2
1243 else:
1244 i += 1
1245
1246 #log('i %d n %d', i, n)
1247 if i >= n:
1248 return sig
1249
1250 # Positional args
1251 child = p_node.GetChild(i)
1252 if child.typ == grammar_nt.param_group:
1253 sig.positional = self._ParamGroup(p_node.GetChild(i))
1254 i += 2
1255 else:
1256 i += 1
1257
1258 #log('i %d n %d', i, n)
1259 if i >= n:
1260 return sig
1261
1262 # Keyword args
1263 child = p_node.GetChild(i)
1264 if child.typ == grammar_nt.param_group:
1265 sig.named = self._ParamGroup(p_node.GetChild(i))
1266 i += 2
1267 else:
1268 i += 1
1269
1270 #log('i %d n %d', i, n)
1271 if i >= n:
1272 return sig
1273
1274 child = p_node.GetChild(i)
1275 if child.typ == grammar_nt.param_group:
1276 group = self._ParamGroup(p_node.GetChild(i))
1277 params = group.params
1278 if len(params) > 1:
1279 p_die('Only 1 block param is allowed', params[1].blame_tok)
1280 if group.rest_of:
1281 p_die("Rest param isn't allowed for blocks",
1282 group.rest_of.blame_tok)
1283
1284 if len(params) == 1:
1285 if params[0].type:
1286 if params[0].type.name != 'Command':
1287 p_die('Block param must have type Command',
1288 params[0].type.tok)
1289 if params[0].type.params is not None:
1290 p_die('Unexpected type parameters', params[0].type.tok)
1291
1292 sig.block_param = params[0]
1293
1294 return sig
1295
1296 def YshFunc(self, p_node, out):
1297 # type: (PNode, Func) -> None
1298 """
1299 ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
1300 """
1301 assert p_node.typ == grammar_nt.ysh_func
1302
1303 #self.p_printer.Print(p_node)
1304
1305 out.name = p_node.GetChild(0).tok
1306
1307 n = p_node.NumChildren()
1308 i = 2 # after (
1309
1310 child = p_node.GetChild(i)
1311 if child.typ == grammar_nt.param_group:
1312 out.positional = self._ParamGroup(child)
1313 i += 2 # skip past ;
1314 else:
1315 i += 1
1316
1317 if i >= n:
1318 return
1319
1320 child = p_node.GetChild(i)
1321 if child.typ == grammar_nt.param_group:
1322 out.named = self._ParamGroup(child)
1323
1324 #
1325 # Eggex Language
1326 #
1327
1328 def _RangeCharSingleQuoted(self, p_node):
1329 # type: (PNode) -> Optional[CharCode]
1330
1331 assert p_node.typ == grammar_nt.range_char, p_node
1332
1333 # 'a' in 'a'-'b'
1334
1335 child0 = p_node.GetChild(0)
1336 if child0.typ == grammar_nt.sq_string:
1337 sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
1338 n = len(sq_part.sval)
1339 if n == 0:
1340 p_die("Quoted range char can't be empty",
1341 loc.WordPart(sq_part))
1342 elif n == 1:
1343 return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
1344 else:
1345 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
1346 return None
1347
1348 def _OtherRangeToken(self, p_node):
1349 # type: (PNode) -> Token
1350 """An endpoint of a range (single char)
1351
1352 range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
1353 a-z 0-9 'a'-'z' \x00-\xff
1354 """
1355 assert p_node.typ == grammar_nt.range_char, p_node
1356
1357 child0 = p_node.GetChild(0)
1358 if child0.typ == grammar_nt.char_literal:
1359 # \x00 in /[\x00 - \x20]/
1360 tok = child0.GetChild(0).tok
1361 return tok
1362
1363 tok = p_node.tok
1364 # a in a-z is Expr_Name
1365 # 0 in 0-9 is Expr_DecInt
1366 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
1367
1368 if tok.length != 1:
1369 p_die(RANGE_POINT_TOO_LONG, tok)
1370 return tok
1371
1372 def _NonRangeChars(self, p_node):
1373 # type: (PNode) -> class_literal_term_t
1374 """
1375 \" \u1234 '#'
1376 """
1377 assert p_node.typ == grammar_nt.range_char, p_node
1378
1379 child0 = p_node.GetChild(0)
1380 typ0 = p_node.GetChild(0).typ
1381
1382 if typ0 == grammar_nt.sq_string:
1383 return cast(SingleQuoted, child0.GetChild(1).tok)
1384
1385 if typ0 == grammar_nt.char_literal:
1386 return word_compile.EvalCharLiteralForRegex(child0.tok)
1387
1388 if typ0 == Id.Expr_Name:
1389 # Look up PerlClass and PosixClass
1390 return self._NameInClass(None, child0.tok)
1391
1392 raise AssertionError()
1393
1394 def _ClassLiteralTerm(self, p_node):
1395 # type: (PNode) -> class_literal_term_t
1396 """
1397 class_literal_term:
1398 range_char ['-' range_char ]
1399 | '@' Expr_Name # splice
1400 | '!' Expr_Name # negate char class
1401 ...
1402 """
1403 assert p_node.typ == grammar_nt.class_literal_term, p_node
1404
1405 typ0 = p_node.GetChild(0).typ
1406
1407 if typ0 == grammar_nt.range_char:
1408 n = p_node.NumChildren()
1409
1410 if n == 1:
1411 return self._NonRangeChars(p_node.GetChild(0))
1412
1413 # 'a'-'z' etc.
1414 if n == 3:
1415 assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
1416
1417 left = p_node.GetChild(0)
1418 right = p_node.GetChild(2)
1419
1420 code1 = self._RangeCharSingleQuoted(left)
1421 if code1 is None:
1422 tok1 = self._OtherRangeToken(left)
1423 code1 = word_compile.EvalCharLiteralForRegex(tok1)
1424
1425 code2 = self._RangeCharSingleQuoted(right)
1426 if code2 is None:
1427 tok2 = self._OtherRangeToken(right)
1428 code2 = word_compile.EvalCharLiteralForRegex(tok2)
1429 return CharRange(code1, code2)
1430
1431 raise AssertionError()
1432
1433 if typ0 == Id.Expr_At:
1434 tok1 = p_node.GetChild(1).tok
1435 return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
1436
1437 if typ0 == Id.Expr_Bang:
1438 return self._NameInClass(
1439 p_node.GetChild(0).tok,
1440 p_node.GetChild(1).tok)
1441
1442 p_die("This kind of class literal term isn't implemented",
1443 p_node.GetChild(0).tok)
1444
1445 def _ClassLiteral(self, p_node):
1446 # type: (PNode) -> List[class_literal_term_t]
1447 """class_literal: '[' class_literal_term+ ']'."""
1448 assert p_node.typ == grammar_nt.class_literal
1449 # skip [ and ]
1450 terms = [] # type: List[class_literal_term_t]
1451 for i in xrange(1, p_node.NumChildren() - 1):
1452 terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
1453
1454 return terms
1455
1456 def _NameInRegex(self, negated_tok, tok):
1457 # type: (Token, Token) -> re_t
1458 tok_str = lexer.TokenVal(tok)
1459 if tok_str == 'dot':
1460 if negated_tok:
1461 p_die("Can't negate this symbol", tok)
1462 return re.Primitive(tok, Id.Eggex_Dot)
1463
1464 if tok_str in POSIX_CLASSES:
1465 return PosixClass(negated_tok, tok_str)
1466
1467 perl = PERL_CLASSES.get(tok_str)
1468 if perl is not None:
1469 return PerlClass(negated_tok, perl)
1470
1471 if tok_str[0].isupper(): # e.g. HexDigit
1472 return re.Splice(tok, lexer.TokenVal(tok))
1473
1474 p_die("%r isn't a character class" % tok_str, tok)
1475
1476 def _NameInClass(self, negated_tok, tok):
1477 # type: (Token, Token) -> class_literal_term_t
1478 """Like the above, but 'dot' and 'd' don't mean anything within []"""
1479 tok_str = lexer.TokenVal(tok)
1480
1481 # A bare, unquoted character literal. In the grammar, this is expressed as
1482 # range_char without an ending.
1483
1484 # d is NOT 'digit', it's a literal 'd'!
1485 if len(tok_str) == 1:
1486 # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
1487 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
1488
1489 if negated_tok: # [~d] is not allowed, only [~digit]
1490 p_die("Can't negate this symbol", tok)
1491 return word_compile.EvalCharLiteralForRegex(tok)
1492
1493 # digit, word, but not d, w, etc.
1494 if tok_str in POSIX_CLASSES:
1495 return PosixClass(negated_tok, tok_str)
1496
1497 perl = PERL_CLASSES.get(tok_str)
1498 if perl is not None:
1499 return PerlClass(negated_tok, perl)
1500 p_die("%r isn't a character class" % tok_str, tok)
1501
1502 def _ReAtom(self, p_atom):
1503 # type: (PNode) -> re_t
1504 """
1505 re_atom: ( char_literal | ...
1506 """
1507 assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
1508
1509 child0 = p_atom.GetChild(0)
1510
1511 typ0 = p_atom.GetChild(0).typ
1512 tok0 = p_atom.GetChild(0).tok
1513
1514 # Non-terminals
1515
1516 if typ0 == grammar_nt.class_literal:
1517 return re.CharClassLiteral(False, self._ClassLiteral(child0))
1518
1519 if typ0 == grammar_nt.sq_string:
1520 return cast(SingleQuoted, child0.GetChild(1).tok)
1521
1522 if typ0 == grammar_nt.char_literal:
1523 # Must be Id.Char_{OneChar,Hex,UBraced}
1524 assert consts.GetKind(tok0.id) == Kind.Char
1525 s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
1526 return re.LiteralChars(tok0, s)
1527
1528 # Special punctuation
1529 if typ0 == Id.Expr_Dot: # .
1530 return re.Primitive(tok0, Id.Eggex_Dot)
1531
1532 if typ0 == Id.Arith_Caret: # ^
1533 return re.Primitive(tok0, Id.Eggex_Start)
1534
1535 if typ0 == Id.Expr_Dollar: # $
1536 return re.Primitive(tok0, Id.Eggex_End)
1537
1538 if typ0 == Id.Expr_Name:
1539 # d digit -> PosixClass PerlClass etc.
1540 return self._NameInRegex(None, tok0)
1541
1542 if typ0 == Id.Expr_Symbol:
1543 # Validate symbols here, like we validate PerlClass, etc.
1544 tok_str = lexer.TokenVal(tok0)
1545 if tok_str == '%start':
1546 return re.Primitive(tok0, Id.Eggex_Start)
1547 if tok_str == '%end':
1548 return re.Primitive(tok0, Id.Eggex_End)
1549 p_die("Unexpected token %r in regex" % tok_str, tok0)
1550
1551 if typ0 == Id.Expr_At:
1552 # | '@' Expr_Name
1553 tok1 = p_atom.GetChild(1).tok
1554 return re.Splice(tok0, lexer.TokenVal(tok1))
1555
1556 if typ0 == Id.Expr_Bang:
1557 # | '!' (Expr_Name | class_literal)
1558 # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
1559 n = p_atom.NumChildren()
1560 if n == 2:
1561 child1 = p_atom.GetChild(1)
1562 if child1.typ == grammar_nt.class_literal:
1563 return re.CharClassLiteral(True,
1564 self._ClassLiteral(child1))
1565 else:
1566 return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
1567 else:
1568 # Note: !! conflicts with shell history
1569 p_die(
1570 "Backtracking with !! isn't implemented (requires Python/PCRE)",
1571 p_atom.GetChild(1).tok)
1572
1573 if typ0 == Id.Op_LParen:
1574 # | '(' regex ')'
1575
1576 # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
1577 # Capture.
1578 return re.Group(self._Regex(p_atom.GetChild(1)))
1579
1580 if typ0 == Id.Arith_Less:
1581 # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
1582
1583 n = p_atom.NumChildren()
1584 assert n == 4 or n == 6 or n == 8, n
1585
1586 # < capture d+ >
1587 regex = self._Regex(p_atom.GetChild(2))
1588
1589 as_name = None # type: Optional[Token]
1590 func_name = None # type: Optional[Token]
1591
1592 i = 3 # points at any of > as :
1593
1594 typ = p_atom.GetChild(i).typ
1595 if typ == Id.Expr_As:
1596 as_name = p_atom.GetChild(i + 1).tok
1597 i += 2
1598
1599 typ = p_atom.GetChild(i).typ
1600 if typ == Id.Arith_Colon:
1601 func_name = p_atom.GetChild(i + 1).tok
1602
1603 return re.Capture(regex, as_name, func_name)
1604
1605 raise AssertionError(typ0)
1606
1607 def _RepeatOp(self, p_repeat):
1608 # type: (PNode) -> re_repeat_t
1609 """
1610 repeat_op: '+' | '*' | '?'
1611 | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
1612 """
1613 assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
1614
1615 tok = p_repeat.GetChild(0).tok
1616 id_ = tok.id
1617
1618 if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
1619 return tok # a+ a* a?
1620
1621 if id_ == Id.Op_LBrace:
1622 child1 = p_repeat.GetChild(1)
1623 if child1.typ != grammar_nt.repeat_range:
1624 # e.g. dot{N *} is .*?
1625 p_die("Perl-style repetition isn't implemented with libc",
1626 child1.tok)
1627
1628 # repeat_range: (
1629 # Expr_DecInt [',']
1630 # | ',' Expr_DecInt
1631 # | Expr_DecInt ',' Expr_DecInt
1632 # )
1633
1634 n = child1.NumChildren()
1635 if n == 1: # {3}
1636 tok = child1.GetChild(0).tok
1637 return tok # different operator than + * ?
1638
1639 if n == 2:
1640 if child1.GetChild(0).typ == Id.Expr_DecInt: # {,3}
1641 left = child1.GetChild(0).tok
1642 return re_repeat.Range(left, lexer.TokenVal(left), '',
1643 None)
1644 else: # {1,}
1645 right = child1.GetChild(1).tok
1646 return re_repeat.Range(None, '', lexer.TokenVal(right),
1647 right)
1648
1649 if n == 3: # {1,3}
1650 left = child1.GetChild(0).tok
1651 right = child1.GetChild(2).tok
1652 return re_repeat.Range(left, lexer.TokenVal(left),
1653 lexer.TokenVal(right), right)
1654
1655 raise AssertionError(n)
1656
1657 raise AssertionError(id_)
1658
1659 def _ReAlt(self, p_node):
1660 # type: (PNode) -> re_t
1661 """
1662 re_alt: (re_atom [repeat_op])+
1663 """
1664 assert p_node.typ == grammar_nt.re_alt
1665
1666 i = 0
1667 n = p_node.NumChildren()
1668 seq = [] # type: List[re_t]
1669 while i < n:
1670 r = self._ReAtom(p_node.GetChild(i))
1671 i += 1
1672 if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
1673 repeat_op = self._RepeatOp(p_node.GetChild(i))
1674 r = re.Repeat(r, repeat_op)
1675 i += 1
1676 seq.append(r)
1677
1678 if len(seq) == 1:
1679 return seq[0]
1680 else:
1681 return re.Seq(seq)
1682
1683 def _Regex(self, p_node):
1684 # type: (PNode) -> re_t
1685 """
1686 regex: [re_alt] (('|'|'or') re_alt)*
1687 """
1688 assert p_node.typ == grammar_nt.regex
1689
1690 n = p_node.NumChildren()
1691 alts = [] # type: List[re_t]
1692 for i in xrange(0, n, 2): # was children[::2]
1693 c = p_node.GetChild(i)
1694 alts.append(self._ReAlt(c))
1695
1696 if len(alts) == 1:
1697 return alts[0]
1698 else:
1699 return re.Alt(alts)
1700
1701
1702# vim: sw=4