OILS / ysh / expr_to_ast.py View on Github | oilshell.org

1702 lines, 1019 significant
1"""expr_to_ast.py."""
2from __future__ import print_function
3
4from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
5from _devbuild.gen.syntax_asdl import (
6 Token,
7 SimpleVarSub,
8 loc,
9 loc_t,
10 DoubleQuoted,
11 SingleQuoted,
12 BracedVarSub,
13 CommandSub,
14 ShArrayLiteral,
15 command,
16 expr,
17 expr_e,
18 expr_t,
19 expr_context_e,
20 re,
21 re_t,
22 re_repeat,
23 re_repeat_t,
24 class_literal_term,
25 class_literal_term_t,
26 PosixClass,
27 PerlClass,
28 NameType,
29 y_lhs_t,
30 Comprehension,
31 Subscript,
32 Attribute,
33 proc_sig,
34 proc_sig_t,
35 Param,
36 RestParam,
37 ParamGroup,
38 NamedArg,
39 ArgList,
40 pat,
41 pat_t,
42 TypeExpr,
43 Func,
44 Eggex,
45 EggexFlag,
46 CharCode,
47 CharRange,
48)
49from _devbuild.gen.value_asdl import value, value_t
50from _devbuild.gen import grammar_nt
51from core.error import p_die
52from core import num
53from frontend import consts
54from frontend import lexer
55from frontend import location
56from mycpp import mops
57from mycpp import mylib
58from mycpp.mylib import log, tagswitch
59from osh import word_compile
60from ysh import expr_parse
61from ysh import regex_translate
62
63from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
64if TYPE_CHECKING:
65 from pgen2.grammar import Grammar
66 from pgen2.pnode import PNode
67
68_ = log
69
70PERL_CLASSES = {
71 'd': 'd',
72 'w': 'w',
73 'word': 'w',
74 's': 's',
75}
76# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
77POSIX_CLASSES = [
78 'alnum',
79 'cntrl',
80 'lower',
81 'space',
82 'alpha',
83 'digit',
84 'print',
85 'upper',
86 'blank',
87 'graph',
88 'punct',
89 'xdigit',
90]
91# NOTE: There are also things like \p{Greek} that we could put in the
92# "non-sigil" namespace.
93
94RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
95
96POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
97
98# Copied from pgen2/token.py to avoid dependency.
99NT_OFFSET = 256
100
101if mylib.PYTHON:
102
103 def MakeGrammarNames(ysh_grammar):
104 # type: (Grammar) -> Dict[int, str]
105
106 # TODO: Break this dependency
107 from frontend import lexer_def
108
109 names = {}
110
111 for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
112 # Hm some are out of range
113 #assert k < 256, (k, id_name)
114
115 # TODO: Some tokens have values greater than NT_OFFSET
116 if k < NT_OFFSET:
117 names[k] = id_name
118
119 for k, v in ysh_grammar.number2symbol.items():
120 assert k >= NT_OFFSET, (k, v)
121 names[k] = v
122
123 return names
124
125
126class Transformer(object):
127 """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
128
129 pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
130 so this "transformer" is the equivalent.
131
132 Files to refer to when modifying this function:
133
134 ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
135 frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
136
137 Related examples:
138
139 opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
140 Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
141
142 Other:
143 frontend/parse_lib.py (turn on print_parse_tree)
144
145 Public methods:
146 Expr, VarDecl
147 atom, trailer, etc. are private, named after productions in grammar.pgen2.
148 """
149
150 def __init__(self, gr):
151 # type: (Grammar) -> None
152 self.number2symbol = gr.number2symbol
153 if mylib.PYTHON:
154 names = MakeGrammarNames(gr)
155 # print raw nodes
156 self.p_printer = expr_parse.ParseTreePrinter(names)
157
158 def _LeftAssoc(self, p_node):
159 # type: (PNode) -> expr_t
160 """For an associative binary operation.
161
162 Examples:
163 xor_expr: and_expr ('xor' and_expr)*
164 term: factor (('*'|'/'|'%'|'div') factor)*
165
166 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
167 """
168 # Note: Compare the iteractive com_binary() method in
169 # opy/compiler2/transformer.py.
170
171 # Examples:
172 # - The PNode for '3 - 1' will have 3 children
173 # - The PNode for '3 - 1 - 2' will have 5 children
174
175 #self.p_printer.Print(p_node)
176
177 i = 1 # index of the operator
178 n = p_node.NumChildren()
179
180 left = self.Expr(p_node.GetChild(0))
181 while i < n:
182 op = p_node.GetChild(i)
183 right = self.Expr(p_node.GetChild(i + 1))
184
185 # create a new left node
186 left = expr.Binary(op.tok, left, right)
187 i += 2
188
189 return left
190
191 def _Trailer(self, base, p_trailer):
192 # type: (expr_t, PNode) -> expr_t
193 """
194 trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
195 | '.' NAME | '->' NAME | '::' NAME
196 )
197 """
198 tok0 = p_trailer.GetChild(0).tok
199 typ0 = p_trailer.GetChild(0).typ
200
201 if typ0 == Id.Op_LParen:
202 lparen = tok0
203 rparen = p_trailer.GetChild(-1).tok
204 arglist = ArgList(lparen, [], None, [], None, None, rparen)
205 if p_trailer.NumChildren() == 2: # ()
206 return expr.FuncCall(base, arglist)
207
208 p = p_trailer.GetChild(1) # the X in ( X )
209 assert p.typ == grammar_nt.arglist # f(x, y)
210 self._ArgList(p, arglist)
211 return expr.FuncCall(base, arglist)
212
213 if typ0 == Id.Op_LBracket:
214 p_args = p_trailer.GetChild(1)
215 assert p_args.typ == grammar_nt.subscriptlist
216 n = p_args.NumChildren()
217 if n > 1:
218 p_die("Only 1 subscript is accepted", p_args.GetChild(1).tok)
219
220 a = p_args.GetChild(0)
221 return Subscript(tok0, base, self._Subscript(a))
222
223 if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
224 attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
225 return Attribute(base, tok0, attr, lexer.TokenVal(attr),
226 expr_context_e.Store)
227
228 raise AssertionError(typ0)
229
230 def _DictPair(self, p_node):
231 # type: (PNode) -> Tuple[expr_t, expr_t]
232 """
233 dict_pair: ( Expr_Name [':' test]
234 | '[' testlist ']' ':' test )
235 | sq_string ':' test
236 | dq_string ':' test )
237 """
238 assert p_node.typ == grammar_nt.dict_pair
239
240 typ = p_node.GetChild(0).typ
241
242 if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
243 key = self.Expr(p_node.GetChild(0)) # type: expr_t
244 val = self.Expr(p_node.GetChild(2))
245 return key, val
246
247 tok0 = p_node.GetChild(0).tok
248 id_ = tok0.id
249
250 if id_ == Id.Expr_Name:
251 key_str = value.Str(lexer.TokenVal(tok0))
252 key = expr.Const(tok0, key_str)
253 if p_node.NumChildren() >= 3:
254 val = self.Expr(p_node.GetChild(2))
255 else:
256 val = expr.Implicit
257
258 if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
259 key = self.Expr(p_node.GetChild(1))
260 val = self.Expr(p_node.GetChild(4))
261 return key, val
262
263 return key, val
264
265 def _Dict(self, parent, p_node):
266 # type: (PNode, PNode) -> expr.Dict
267 """
268 dict: dict_pair (comma_newline dict_pair)* [comma_newline]
269 """
270 if p_node.typ == Id.Op_RBrace: # {}
271 return expr.Dict(parent.tok, [], [])
272
273 assert p_node.typ == grammar_nt.dict
274
275 keys = [] # type: List[expr_t]
276 values = [] # type: List[expr_t]
277
278 n = p_node.NumChildren()
279 for i in xrange(0, n, 2):
280 key, val = self._DictPair(p_node.GetChild(i))
281 keys.append(key)
282 values.append(val)
283
284 return expr.Dict(parent.tok, keys, values)
285
286 def _Tuple(self, parent):
287 # type: (PNode) -> expr_t
288
289 n = parent.NumChildren()
290
291 # (x) -- not a tuple
292 if n == 1:
293 return self.Expr(parent.GetChild(0))
294
295 # x, and (x,) aren't allowed
296 if n == 2:
297 p_die('Invalid trailing comma', parent.GetChild(1).tok)
298
299 elts = [] # type: List[expr_t]
300 for i in xrange(0, n, 2): # skip commas
301 p_node = parent.GetChild(i)
302 elts.append(self.Expr(p_node))
303
304 return expr.Tuple(parent.tok, elts,
305 expr_context_e.Store) # unused expr_context_e
306
307 def _TestlistComp(self, parent, p_node, id0):
308 # type: (PNode, PNode, Id_t) -> expr_t
309 """
310 testlist_comp:
311 (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
312 """
313 assert p_node.typ == grammar_nt.testlist_comp
314
315 n = p_node.NumChildren()
316 if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
317 elt = self.Expr(p_node.GetChild(0))
318 comp = self._CompFor(p_node.GetChild(1))
319 if id0 == Id.Op_LParen: # (x+1 for x in y)
320 return expr.GeneratorExp(elt, [comp])
321 if id0 == Id.Op_LBracket: # [x+1 for x in y]
322 return expr.ListComp(parent.tok, elt, [comp])
323 raise AssertionError()
324
325 if id0 == Id.Op_LParen:
326 # Parenthesized expression like (x+1) or (x)
327 if n == 1:
328 return self.Expr(p_node.GetChild(0))
329
330 # Tuples (1,) (1, 2) etc. - TODO: should be a list literal?
331 if p_node.GetChild(1).typ == Id.Arith_Comma:
332 return self._Tuple(p_node)
333
334 raise AssertionError()
335
336 if id0 == Id.Op_LBracket: # List [1,2,3]
337 elts = [] # type: List[expr_t]
338 for i in xrange(0, n, 2): # skip commas
339 elts.append(self.Expr(p_node.GetChild(i)))
340
341 return expr.List(parent.tok, elts,
342 expr_context_e.Store) # unused expr_context_e
343
344 raise AssertionError(Id_str(id0))
345
346 def _Atom(self, parent):
347 # type: (PNode) -> expr_t
348 """Handle alternatives of 'atom' where there's more than one child."""
349
350 tok = parent.GetChild(0).tok
351 id_ = tok.id
352 n = parent.NumChildren()
353
354 if id_ == Id.Op_LParen:
355 # atom: '(' [yield_expr|testlist_comp] ')' | ...
356 if n == 2: # () is a tuple
357 assert (
358 parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
359 return expr.Tuple(tok, [], expr_context_e.Store)
360
361 return self._TestlistComp(parent, parent.GetChild(1), id_)
362
363 if id_ == Id.Op_LBracket:
364 # atom: ... | '[' [testlist_comp] ']' | ...
365
366 if n == 2: # []
367 assert (parent.GetChild(1).typ == Id.Op_RBracket
368 ), parent.GetChild(1)
369 return expr.List(tok, [],
370 expr_context_e.Store) # unused expr_context_e
371
372 return self._TestlistComp(parent, parent.GetChild(1), id_)
373
374 if id_ == Id.Left_CaretBracket: # ^[42 + x]
375 child = self.Expr(parent.GetChild(1))
376 return expr.Literal(child)
377
378 if id_ == Id.Op_LBrace:
379 # atom: ... | '{' [Op_Newline] [dict] '}'
380 i = 1
381 if parent.GetChild(i).typ == Id.Op_Newline:
382 i += 1
383 return self._Dict(parent, parent.GetChild(i))
384
385 if id_ == Id.Arith_Amp:
386 n = parent.NumChildren()
387 if n >= 3:
388 p_die("Places in containers not implemented yet",
389 parent.GetChild(2).tok)
390
391 name_tok = parent.GetChild(1).tok
392 return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
393
394 if id_ == Id.Expr_Func:
395 # STUB. This should really be a Func, not Lambda.
396 return expr.Lambda([], expr.Implicit)
397
398 # 100 M
399 # Ignoring the suffix for now
400 if id_ == Id.Expr_DecInt:
401 assert n > 1
402 p_die("Units suffix not implemented", parent.GetChild(1).tok)
403 #return self.Expr(parent.GetChild(0))
404
405 # 100.5 M
406 # Ignoring the suffix for now
407 if id_ == Id.Expr_Float:
408 assert n > 1
409 p_die("unix suffix implemented", parent.GetChild(1).tok)
410 #return self.Expr(parent.GetChild(0))
411
412 raise AssertionError(Id_str(id_))
413
414 def _NameType(self, p_node):
415 # type: (PNode) -> NameType
416 """ name_type: Expr_Name [':'] [type_expr] """
417 name_tok = p_node.GetChild(0).tok
418 typ = None # type: Optional[TypeExpr]
419
420 n = p_node.NumChildren()
421 if n == 2:
422 typ = self._TypeExpr(p_node.GetChild(1))
423 if n == 3:
424 typ = self._TypeExpr(p_node.GetChild(2))
425
426 return NameType(name_tok, lexer.TokenVal(name_tok), typ)
427
428 def _NameTypeList(self, p_node):
429 # type: (PNode) -> List[NameType]
430 """ name_type_list: name_type (',' name_type)* """
431 assert p_node.typ == grammar_nt.name_type_list
432 results = [] # type: List[NameType]
433
434 n = p_node.NumChildren()
435 for i in xrange(0, n, 2): # was children[::2]
436 results.append(self._NameType(p_node.GetChild(i)))
437 return results
438
439 def _CompFor(self, p_node):
440 # type: (PNode) -> Comprehension
441 """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
442 lhs = self._NameTypeList(p_node.GetChild(1))
443 iterable = self.Expr(p_node.GetChild(3))
444
445 if p_node.NumChildren() >= 6:
446 cond = self.Expr(p_node.GetChild(5))
447 else:
448 cond = None
449
450 return Comprehension(lhs, iterable, cond)
451
452 def _CompareChain(self, parent):
453 # type: (PNode) -> expr_t
454 """comparison: expr (comp_op expr)*"""
455 cmp_ops = [] # type: List[Token]
456 comparators = [] # type: List[expr_t]
457 left = self.Expr(parent.GetChild(0))
458
459 i = 1
460 n = parent.NumChildren()
461 while i < n:
462 p = parent.GetChild(i)
463 op = p.GetChild(0).tok
464 if p.NumChildren() == 2:
465 # Blame the first token, and change its type
466 if op.id == Id.Expr_Not: # not in
467 op.id = Id.Node_NotIn
468 elif op.id == Id.Expr_Is: # is not
469 op.id = Id.Node_IsNot
470 else:
471 raise AssertionError()
472 else:
473 # is, <, ==, etc.
474 pass
475
476 cmp_ops.append(op)
477 i += 1
478 comparators.append(self.Expr(parent.GetChild(i)))
479 i += 1
480 return expr.Compare(left, cmp_ops, comparators)
481
482 def _Subscript(self, parent):
483 # type: (PNode) -> expr_t
484 """subscript: expr | [expr] ':' [expr]"""
485 typ0 = parent.GetChild(0).typ
486
487 n = parent.NumChildren()
488
489 if typ0 == grammar_nt.expr:
490 if n == 3: # a[1:2]
491 lower = self.Expr(parent.GetChild(0))
492 upper = self.Expr(parent.GetChild(2))
493 elif n == 2: # a[1:]
494 lower = self.Expr(parent.GetChild(0))
495 upper = None
496 else: # a[1]
497 return self.Expr(parent.GetChild(0))
498 else:
499 assert typ0 == Id.Arith_Colon
500 lower = None
501 if n == 1: # a[:]
502 upper = None
503 else: # a[:3]
504 upper = self.Expr(parent.GetChild(1))
505
506 return expr.Slice(lower, parent.GetChild(0).tok, upper)
507
508 def Expr(self, pnode):
509 # type: (PNode) -> expr_t
510 """Transform expressions (as opposed to statements)"""
511 typ = pnode.typ
512
513 #
514 # YSH Entry Points / Additions
515 #
516
517 if typ == grammar_nt.ysh_expr: # for if/while
518 # ysh_expr: '(' testlist ')'
519 return self.Expr(pnode.GetChild(1))
520
521 if typ == grammar_nt.command_expr:
522 # return_expr: testlist end_stmt
523 return self.Expr(pnode.GetChild(0))
524
525 #
526 # Python-like Expressions / Operators
527 #
528
529 if typ == grammar_nt.atom:
530 if pnode.NumChildren() == 1:
531 return self.Expr(pnode.GetChild(0))
532 return self._Atom(pnode)
533
534 if typ == grammar_nt.testlist:
535 # testlist: test (',' test)* [',']
536 return self._Tuple(pnode)
537
538 if typ == grammar_nt.test:
539 # test: or_test ['if' or_test 'else' test] | lambdef
540 if pnode.NumChildren() == 1:
541 return self.Expr(pnode.GetChild(0))
542
543 # TODO: Handle lambdef
544
545 test = self.Expr(pnode.GetChild(2))
546 body = self.Expr(pnode.GetChild(0))
547 orelse = self.Expr(pnode.GetChild(4))
548 return expr.IfExp(test, body, orelse)
549
550 if typ == grammar_nt.lambdef:
551 # lambdef: '|' [name_type_list] '|' test
552
553 n = pnode.NumChildren()
554 if n == 4:
555 params = self._NameTypeList(pnode.GetChild(1))
556 else:
557 params = []
558
559 body = self.Expr(pnode.GetChild(n - 1))
560 return expr.Lambda(params, body)
561
562 #
563 # Operators with Precedence
564 #
565
566 if typ == grammar_nt.or_test:
567 # or_test: and_test ('or' and_test)*
568 return self._LeftAssoc(pnode)
569
570 if typ == grammar_nt.and_test:
571 # and_test: not_test ('and' not_test)*
572 return self._LeftAssoc(pnode)
573
574 if typ == grammar_nt.not_test:
575 # not_test: 'not' not_test | comparison
576 if pnode.NumChildren() == 1:
577 return self.Expr(pnode.GetChild(0))
578
579 op_tok = pnode.GetChild(0).tok # not
580 return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
581
582 elif typ == grammar_nt.comparison:
583 if pnode.NumChildren() == 1:
584 return self.Expr(pnode.GetChild(0))
585
586 return self._CompareChain(pnode)
587
588 elif typ == grammar_nt.range_expr:
589 n = pnode.NumChildren()
590 if n == 1:
591 return self.Expr(pnode.GetChild(0))
592
593 if n == 3:
594 return expr.Range(self.Expr(pnode.GetChild(0)),
595 pnode.GetChild(1).tok,
596 self.Expr(pnode.GetChild(2)))
597
598 raise AssertionError(n)
599
600 elif typ == grammar_nt.expr:
601 # expr: xor_expr ('|' xor_expr)*
602 return self._LeftAssoc(pnode)
603
604 if typ == grammar_nt.xor_expr:
605 # xor_expr: and_expr ('xor' and_expr)*
606 return self._LeftAssoc(pnode)
607
608 if typ == grammar_nt.and_expr: # a & b
609 # and_expr: shift_expr ('&' shift_expr)*
610 return self._LeftAssoc(pnode)
611
612 elif typ == grammar_nt.shift_expr:
613 # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
614 return self._LeftAssoc(pnode)
615
616 elif typ == grammar_nt.arith_expr:
617 # arith_expr: term (('+'|'-') term)*
618 return self._LeftAssoc(pnode)
619
620 elif typ == grammar_nt.term:
621 # term: factor (('*'|'/'|'div'|'mod') factor)*
622 return self._LeftAssoc(pnode)
623
624 elif typ == grammar_nt.factor:
625 # factor: ('+'|'-'|'~') factor | power
626 # the power would have already been reduced
627 if pnode.NumChildren() == 1:
628 return self.Expr(pnode.GetChild(0))
629
630 assert pnode.NumChildren() == 2
631 op = pnode.GetChild(0)
632 e = pnode.GetChild(1)
633
634 assert isinstance(op.tok, Token)
635 return expr.Unary(op.tok, self.Expr(e))
636
637 elif typ == grammar_nt.power:
638 # power: atom trailer* ['**' factor]
639
640 node = self.Expr(pnode.GetChild(0))
641 if pnode.NumChildren() == 1: # No trailers
642 return node
643
644 # Support a->startswith(b) and mydict.key
645 n = pnode.NumChildren()
646 i = 1
647 while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
648 node = self._Trailer(node, pnode.GetChild(i))
649 i += 1
650
651 if i != n: # ['**' factor]
652 op_tok = pnode.GetChild(i).tok
653 assert op_tok.id == Id.Arith_DStar, op_tok
654 factor = self.Expr(pnode.GetChild(i + 1))
655 node = expr.Binary(op_tok, node, factor)
656
657 return node
658
659 elif typ == grammar_nt.eggex:
660 return self._Eggex(pnode)
661
662 elif typ == grammar_nt.ysh_expr_sub:
663 return self.Expr(pnode.GetChild(0))
664
665 #
666 # YSH Lexer Modes
667 #
668
669 elif typ == grammar_nt.sh_array_literal:
670 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
671
672 elif typ == grammar_nt.old_sh_array_literal:
673 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
674
675 elif typ == grammar_nt.sh_command_sub:
676 return cast(CommandSub, pnode.GetChild(1).tok)
677
678 elif typ == grammar_nt.braced_var_sub:
679 return cast(BracedVarSub, pnode.GetChild(1).tok)
680
681 elif typ == grammar_nt.dq_string:
682 s = cast(DoubleQuoted, pnode.GetChild(1).tok)
683 # sugar: ^"..." is short for ^["..."]
684 if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
685 return expr.Literal(s)
686 return s
687
688 elif typ == grammar_nt.sq_string:
689 return cast(SingleQuoted, pnode.GetChild(1).tok)
690
691 elif typ == grammar_nt.simple_var_sub:
692 tok = pnode.GetChild(0).tok
693
694 if tok.id == Id.VSub_DollarName: # $foo is disallowed
695 bare = lexer.TokenSliceLeft(tok, 1)
696 p_die(
697 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
698 % (bare, bare), tok)
699
700 # $? is allowed
701 return SimpleVarSub(tok)
702
703 #
704 # Terminals
705 #
706
707 tok = pnode.tok
708 if typ == Id.Expr_Name:
709 return expr.Var(tok, lexer.TokenVal(tok))
710
711 # Everything else is an expr.Const
712 tok_str = lexer.TokenVal(tok)
713 # Remove underscores from 1_000_000. The lexer is responsible for
714 # validation.
715 c_under = tok_str.replace('_', '')
716
717 if typ == Id.Expr_DecInt:
718 try:
719 cval = value.Int(mops.FromStr(c_under)) # type: value_t
720 except ValueError:
721 p_die('Decimal int constant is too large', tok)
722
723 elif typ == Id.Expr_BinInt:
724 assert c_under[:2] in ('0b', '0B'), c_under
725 try:
726 cval = value.Int(mops.FromStr(c_under[2:], 2))
727 except ValueError:
728 p_die('Binary int constant is too large', tok)
729
730 elif typ == Id.Expr_OctInt:
731 assert c_under[:2] in ('0o', '0O'), c_under
732 try:
733 cval = value.Int(mops.FromStr(c_under[2:], 8))
734 except ValueError:
735 p_die('Octal int constant is too large', tok)
736
737 elif typ == Id.Expr_HexInt:
738 assert c_under[:2] in ('0x', '0X'), c_under
739 try:
740 cval = value.Int(mops.FromStr(c_under[2:], 16))
741 except ValueError:
742 p_die('Hex int constant is too large', tok)
743
744 elif typ == Id.Expr_Float:
745 # Note: float() in mycpp/gc_builtins.cc currently uses strtod
746 # I think this never raises ValueError, because the lexer
747 # should only accept strings that strtod() does?
748 cval = value.Float(float(c_under))
749
750 elif typ == Id.Expr_Null:
751 cval = value.Null
752
753 elif typ == Id.Expr_True:
754 cval = value.Bool(True)
755
756 elif typ == Id.Expr_False:
757 cval = value.Bool(False)
758
759 # What to do with the char constants?
760 # \n \u{3bc} #'a'
761 # Are they integers or strings?
762 #
763 # Integers could be ord(\n), or strings could chr(\n)
764 # Or just remove them, with ord(u'\n') and chr(u'\n')
765 #
766 # I think this relies on small string optimization. If we have it,
767 # then 1-4 byte characters are efficient, and don't require heap
768 # allocation.
769
770 elif typ == Id.Char_OneChar:
771 # TODO: look up integer directly?
772 cval = num.ToBig(ord(consts.LookupCharC(tok_str[1])))
773 elif typ == Id.Char_UBraced:
774 hex_str = tok_str[3:-1] # \u{123}
775 # ValueError shouldn't happen because lexer validates
776 cval = value.Int(mops.FromStr(hex_str, 16))
777
778 else:
779 raise AssertionError(typ)
780
781 return expr.Const(tok, cval)
782
783 def _CheckLhs(self, lhs):
784 # type: (expr_t) -> None
785
786 UP_lhs = lhs
787 with tagswitch(lhs) as case:
788 if case(expr_e.Var):
789 # OK - e.g. setvar a.b.c[i] = 42
790 pass
791
792 elif case(expr_e.Subscript):
793 lhs = cast(Subscript, UP_lhs)
794 self._CheckLhs(lhs.obj) # recurse on LHS
795
796 elif case(expr_e.Attribute):
797 lhs = cast(Attribute, UP_lhs)
798 self._CheckLhs(lhs.obj) # recurse on LHS
799
800 else:
801 # Illegal - e.g. setglobal {}["key"] = 42
802 p_die("Subscript/Attribute not allowed on this LHS expression",
803 location.TokenForExpr(lhs))
804
805 def _LhsExprList(self, p_node):
806 # type: (PNode) -> List[y_lhs_t]
807 """lhs_list: expr (',' expr)*"""
808 assert p_node.typ == grammar_nt.lhs_list
809
810 lhs_list = [] # type: List[y_lhs_t]
811 n = p_node.NumChildren()
812 for i in xrange(0, n, 2):
813 p = p_node.GetChild(i)
814 #self.p_printer.Print(p)
815
816 e = self.Expr(p)
817 UP_e = e
818 with tagswitch(e) as case:
819 if case(expr_e.Var):
820 e = cast(expr.Var, UP_e)
821 lhs_list.append(e.left)
822
823 elif case(expr_e.Subscript):
824 e = cast(Subscript, UP_e)
825 self._CheckLhs(e)
826 lhs_list.append(e)
827
828 elif case(expr_e.Attribute):
829 e = cast(Attribute, UP_e)
830 self._CheckLhs(e)
831 if e.op.id != Id.Expr_Dot:
832 # e.g. setvar obj->method is not valid
833 p_die("Can't assign to this attribute expr", e.op)
834 lhs_list.append(e)
835
836 else:
837 pass # work around mycpp bug
838
839 # TODO: could blame arbitary expr_t, bu this works most of
840 # the time
841 if p.tok:
842 blame = p.tok # type: loc_t
843 else:
844 blame = loc.Missing
845 p_die("Can't assign to this expression", blame)
846
847 return lhs_list
848
849 def MakeVarDecl(self, p_node):
850 # type: (PNode) -> command.VarDecl
851 """
852 ysh_var_decl: name_type_list ['=' testlist] end_stmt
853 """
854 assert p_node.typ == grammar_nt.ysh_var_decl
855
856 lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
857
858 # This syntax is confusing, and different than JavaScript
859 # var x, y = 1, 2
860 # But this is useful:
861 # var flag, i = parseArgs(spec, argv)
862
863 n = p_node.NumChildren()
864 if n >= 3:
865 rhs = self.Expr(p_node.GetChild(2))
866 else:
867 rhs = None
868
869 # The caller should fill in the keyword token.
870 return command.VarDecl(None, lhs, rhs)
871
872 def MakeMutation(self, p_node):
873 # type: (PNode) -> command.Mutation
874 """
875 ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
876 """
877 typ = p_node.typ
878 assert typ == grammar_nt.ysh_mutation
879
880 lhs_list = self._LhsExprList(p_node.GetChild(0)) # could be a tuple
881 op_tok = p_node.GetChild(1).tok
882 if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
883 p_die('Multiple assignment must use =', op_tok)
884 rhs = self.Expr(p_node.GetChild(2))
885 return command.Mutation(None, lhs_list, op_tok, rhs)
886
887 def _EggexFlag(self, p_node):
888 # type: (PNode) -> EggexFlag
889 n = p_node.NumChildren()
890 if n == 1:
891 return EggexFlag(False, p_node.GetChild(0).tok)
892 elif n == 2:
893 return EggexFlag(True, p_node.GetChild(1).tok)
894 else:
895 raise AssertionError()
896
897 def _Eggex(self, p_node):
898 # type: (PNode) -> Eggex
899 """
900 eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
901 """
902 left = p_node.GetChild(0).tok
903 regex = self._Regex(p_node.GetChild(1))
904
905 flags = [] # type: List[EggexFlag]
906 trans_pref = None # type: Optional[Token]
907
908 i = 2
909 current = p_node.GetChild(i)
910 if current.typ == Id.Op_Semi:
911 i += 1
912 while True:
913 current = p_node.GetChild(i)
914 if current.typ != grammar_nt.re_flag:
915 break
916 flags.append(self._EggexFlag(current))
917 i += 1
918
919 if current.typ == Id.Op_Semi:
920 i += 1
921 trans_pref = p_node.GetChild(i).tok
922
923 # Canonicalize and validate flags for ERE only. Default is ERE.
924 if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
925 canonical_flags = regex_translate.CanonicalFlags(flags)
926 else:
927 canonical_flags = None
928
929 return Eggex(left, regex, flags, trans_pref, canonical_flags)
930
931 def YshCasePattern(self, pnode):
932 # type: (PNode) -> pat_t
933 assert pnode.typ == grammar_nt.ysh_case_pat, pnode
934
935 pattern = pnode.GetChild(0)
936 typ = pattern.typ
937 if typ == Id.Op_LParen:
938 # pat_expr or pat_else
939 pattern = pnode.GetChild(1)
940 typ = pattern.typ
941
942 if typ == grammar_nt.pat_else:
943 return pat.Else
944
945 if typ == grammar_nt.pat_exprs:
946 exprs = [] # type: List[expr_t]
947 for i in xrange(pattern.NumChildren()):
948 child = pattern.GetChild(i)
949 if child.typ == grammar_nt.expr:
950 expr = self.Expr(child)
951 exprs.append(expr)
952 return pat.YshExprs(exprs)
953
954 if typ == grammar_nt.eggex:
955 return self._Eggex(pattern)
956
957 raise AssertionError()
958
959 def _BlockArg(self, p_node):
960 # type: (PNode) -> expr_t
961
962 n = p_node.NumChildren()
963 if n == 1:
964 child = p_node.GetChild(0)
965 return self.Expr(child)
966
967 # It can only be an expression, not a=42, or ...expr
968 p_die('Invalid block expression argument', p_node.tok)
969
970 def _Argument(self, p_node, after_semi, arglist):
971 # type: (PNode, bool, ArgList) -> None
972 """
973 argument: (
974 test [comp_for]
975 | test '=' test # named arg
976 | '...' test # var args
977 )
978 """
979 pos_args = arglist.pos_args
980 named_args = arglist.named_args
981
982 assert p_node.typ == grammar_nt.argument, p_node
983 n = p_node.NumChildren()
984 if n == 1:
985 child = p_node.GetChild(0)
986 if after_semi:
987 p_die(POS_ARG_MISPLACED, child.tok)
988 arg = self.Expr(child)
989 pos_args.append(arg)
990 return
991
992 if n == 2:
993 # Note: We allow multiple spreads, just like Julia. They are
994 # concatenated as in lists and dicts.
995 tok0 = p_node.GetChild(0).tok
996 if tok0.id == Id.Expr_Ellipsis:
997 spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
998 if after_semi: # f(; ... named)
999 named_args.append(NamedArg(None, spread_expr))
1000 else: # f(...named)
1001 pos_args.append(spread_expr)
1002 return
1003
1004 # Note: generator expression not implemented
1005 if p_node.GetChild(1).typ == grammar_nt.comp_for:
1006 child = p_node.GetChild(0)
1007 if after_semi:
1008 p_die(POS_ARG_MISPLACED, child.tok)
1009
1010 elt = self.Expr(child)
1011 comp = self._CompFor(p_node.GetChild(1))
1012 arg = expr.GeneratorExp(elt, [comp])
1013 pos_args.append(arg)
1014 return
1015
1016 raise AssertionError()
1017
1018 if n == 3: # named args can come before or after the semicolon
1019 n1 = NamedArg(
1020 p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
1021 named_args.append(n1)
1022 return
1023
1024 raise AssertionError()
1025
1026 def _ArgGroup(self, p_node, after_semi, arglist):
1027 # type: (PNode, bool, ArgList) -> None
1028 """
1029 arg_group: argument (',' argument)* [',']
1030 """
1031 for i in xrange(p_node.NumChildren()):
1032 p_child = p_node.GetChild(i)
1033 if p_child.typ == grammar_nt.argument:
1034 self._Argument(p_child, after_semi, arglist)
1035
1036 def _ArgList(self, p_node, arglist):
1037 # type: (PNode, ArgList) -> None
1038 """For both funcs and procs
1039
1040 arglist: (
1041 [arg_group]
1042 [';' [arg_group]]
1043 )
1044
1045 arglist3: ...
1046 """
1047 n = p_node.NumChildren()
1048 if n == 0:
1049 return
1050
1051 i = 0
1052
1053 if i >= n:
1054 return
1055 child = p_node.GetChild(i)
1056 if child.typ == grammar_nt.arg_group:
1057 self._ArgGroup(child, False, arglist)
1058 i += 1
1059
1060 if i >= n:
1061 return
1062 child = p_node.GetChild(i)
1063 if child.typ == Id.Op_Semi:
1064 arglist.semi_tok = child.tok
1065 i += 1
1066
1067 # Named args after first semi-colon
1068 if i >= n:
1069 return
1070 child = p_node.GetChild(i)
1071 if child.typ == grammar_nt.arg_group:
1072 self._ArgGroup(child, True, arglist)
1073 i += 1
1074
1075 #
1076 # Special third group may have block expression - only for arglist3,
1077 # used for procs!
1078 #
1079
1080 if i >= n:
1081 return
1082 assert p_node.typ == grammar_nt.arglist3, p_node
1083
1084 child = p_node.GetChild(i)
1085 if child.typ == Id.Op_Semi:
1086 arglist.semi_tok2 = child.tok
1087 i += 1
1088
1089 if i >= n:
1090 return
1091 child = p_node.GetChild(i)
1092 if child.typ == grammar_nt.argument:
1093 arglist.block_expr = self._BlockArg(child)
1094 i += 1
1095
1096 def ProcCallArgs(self, pnode, arglist):
1097 # type: (PNode, ArgList) -> None
1098 """
1099 ysh_eager_arglist: '(' [arglist3] ')'
1100 ysh_lazy_arglist: '[' [arglist] ']'
1101 """
1102 n = pnode.NumChildren()
1103 if n == 2: # f()
1104 return
1105
1106 if n == 3:
1107 child1 = pnode.GetChild(1) # the X in '( X )'
1108
1109 self._ArgList(child1, arglist)
1110 return
1111
1112 raise AssertionError()
1113
1114 def _TypeExpr(self, pnode):
1115 # type: (PNode) -> TypeExpr
1116 """
1117 type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
1118 """
1119 assert pnode.typ == grammar_nt.type_expr, pnode.typ
1120
1121 ty = TypeExpr.CreateNull() # don't allocate children
1122
1123 ty.tok = pnode.GetChild(0).tok
1124 ty.name = lexer.TokenVal(ty.tok)
1125
1126 n = pnode.NumChildren()
1127 if n == 1:
1128 return ty
1129
1130 ty.params = []
1131 i = 2
1132 while i < n:
1133 p = self._TypeExpr(pnode.GetChild(i))
1134 ty.params.append(p)
1135 i += 2 # skip comma
1136
1137 return ty
1138
1139 def _Param(self, pnode):
1140 # type: (PNode) -> Param
1141 """
1142 param: Expr_Name [type_expr] ['=' expr]
1143 """
1144 assert pnode.typ == grammar_nt.param
1145
1146 name_tok = pnode.GetChild(0).tok
1147 n = pnode.NumChildren()
1148
1149 assert name_tok.id == Id.Expr_Name, name_tok
1150
1151 default_val = None # type: expr_t
1152 type_ = None # type: TypeExpr
1153
1154 if n == 1:
1155 # proc p(a)
1156 pass
1157
1158 elif n == 2:
1159 # proc p(a Int)
1160 type_ = self._TypeExpr(pnode.GetChild(1))
1161
1162 elif n == 3:
1163 # proc p(a = 3)
1164 default_val = self.Expr(pnode.GetChild(2))
1165
1166 elif n == 4:
1167 # proc p(a Int = 3)
1168 type_ = self._TypeExpr(pnode.GetChild(1))
1169 default_val = self.Expr(pnode.GetChild(3))
1170
1171 return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
1172
1173 def _ParamGroup(self, p_node):
1174 # type: (PNode) -> ParamGroup
1175 """
1176 param_group:
1177 (param ',')*
1178 [ (param | '...' Expr_Name) [,] ]
1179 """
1180 assert p_node.typ == grammar_nt.param_group, p_node
1181
1182 params = [] # type: List[Param]
1183 rest_of = None # type: Optional[RestParam]
1184
1185 n = p_node.NumChildren()
1186 i = 0
1187 while i < n:
1188 child = p_node.GetChild(i)
1189 if child.typ == grammar_nt.param:
1190 params.append(self._Param(child))
1191
1192 elif child.typ == Id.Expr_Ellipsis:
1193 tok = p_node.GetChild(i + 1).tok
1194 rest_of = RestParam(tok, lexer.TokenVal(tok))
1195
1196 i += 2
1197
1198 return ParamGroup(params, rest_of)
1199
1200 def Proc(self, p_node):
1201 # type: (PNode) -> proc_sig_t
1202 """
1203 ysh_proc: (
1204 [ '('
1205 [ param_group ] # word params, with defaults
1206 [ ';' [ param_group ] ] # positional typed params, with defaults
1207 [ ';' [ param_group ] ] # named params, with defaults
1208 [ ';' Expr_Name ] # optional block param, with no type or default
1209 ')'
1210 ]
1211 '{' # opening { for pgen2
1212 )
1213 """
1214 typ = p_node.typ
1215 assert typ == grammar_nt.ysh_proc
1216
1217 n = p_node.NumChildren()
1218 if n == 1: # proc f {
1219 return proc_sig.Open
1220
1221 if n == 3: # proc f () {
1222 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1223
1224 # proc f( three param groups, and block group )
1225 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1226
1227 # Word args
1228 i = 1
1229 child = p_node.GetChild(i)
1230 if child.typ == grammar_nt.param_group:
1231 sig.word = self._ParamGroup(p_node.GetChild(i))
1232
1233 # Validate word args
1234 for word in sig.word.params:
1235 if word.type:
1236 if word.type.name not in ('Str', 'Ref'):
1237 p_die('Word params may only have type Str or Ref',
1238 word.type.tok)
1239 if word.type.params is not None:
1240 p_die('Unexpected type parameters', word.type.tok)
1241
1242 i += 2
1243 else:
1244 i += 1
1245
1246 #log('i %d n %d', i, n)
1247 if i >= n:
1248 return sig
1249
1250 # Positional args
1251 child = p_node.GetChild(i)
1252 if child.typ == grammar_nt.param_group:
1253 sig.positional = self._ParamGroup(p_node.GetChild(i))
1254 i += 2
1255 else:
1256 i += 1
1257
1258 #log('i %d n %d', i, n)
1259 if i >= n:
1260 return sig
1261
1262 # Keyword args
1263 child = p_node.GetChild(i)
1264 if child.typ == grammar_nt.param_group:
1265 sig.named = self._ParamGroup(p_node.GetChild(i))
1266 i += 2
1267 else:
1268 i += 1
1269
1270 #log('i %d n %d', i, n)
1271 if i >= n:
1272 return sig
1273
1274 child = p_node.GetChild(i)
1275 if child.typ == grammar_nt.param_group:
1276 group = self._ParamGroup(p_node.GetChild(i))
1277 params = group.params
1278 if len(params) > 1:
1279 p_die('Only 1 block param is allowed', params[1].blame_tok)
1280 if group.rest_of:
1281 p_die("Rest param isn't allowed for blocks",
1282 group.rest_of.blame_tok)
1283
1284 if len(params) == 1:
1285 if params[0].type:
1286 if params[0].type.name != 'Command':
1287 p_die('Block param must have type Command',
1288 params[0].type.tok)
1289 if params[0].type.params is not None:
1290 p_die('Unexpected type parameters', params[0].type.tok)
1291
1292 sig.block_param = params[0]
1293
1294 return sig
1295
1296 def YshFunc(self, p_node, out):
1297 # type: (PNode, Func) -> None
1298 """
1299 ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
1300 """
1301 assert p_node.typ == grammar_nt.ysh_func
1302
1303 #self.p_printer.Print(p_node)
1304
1305 out.name = p_node.GetChild(0).tok
1306
1307 n = p_node.NumChildren()
1308 i = 2 # after (
1309
1310 child = p_node.GetChild(i)
1311 if child.typ == grammar_nt.param_group:
1312 out.positional = self._ParamGroup(child)
1313 i += 2 # skip past ;
1314 else:
1315 i += 1
1316
1317 if i >= n:
1318 return
1319
1320 child = p_node.GetChild(i)
1321 if child.typ == grammar_nt.param_group:
1322 out.named = self._ParamGroup(child)
1323
1324 #
1325 # Eggex Language
1326 #
1327
1328 def _RangeCharSingleQuoted(self, p_node):
1329 # type: (PNode) -> Optional[CharCode]
1330
1331 assert p_node.typ == grammar_nt.range_char, p_node
1332
1333 # 'a' in 'a'-'b'
1334
1335 child0 = p_node.GetChild(0)
1336 if child0.typ == grammar_nt.sq_string:
1337 sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
1338 n = len(sq_part.sval)
1339 if n == 0:
1340 p_die("Quoted range char can't be empty",
1341 loc.WordPart(sq_part))
1342 elif n == 1:
1343 return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
1344 else:
1345 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
1346 return None
1347
1348 def _OtherRangeToken(self, p_node):
1349 # type: (PNode) -> Token
1350 """An endpoint of a range (single char)
1351
1352 range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
1353 a-z 0-9 'a'-'z' \x00-\xff
1354 """
1355 assert p_node.typ == grammar_nt.range_char, p_node
1356
1357 child0 = p_node.GetChild(0)
1358 if child0.typ == grammar_nt.char_literal:
1359 # \x00 in /[\x00 - \x20]/
1360 tok = child0.GetChild(0).tok
1361 return tok
1362
1363 tok = p_node.tok
1364 # a in a-z is Expr_Name
1365 # 0 in 0-9 is Expr_DecInt
1366 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
1367
1368 if tok.length != 1:
1369 p_die(RANGE_POINT_TOO_LONG, tok)
1370 return tok
1371
1372 def _NonRangeChars(self, p_node):
1373 # type: (PNode) -> class_literal_term_t
1374 """
1375 \" \u1234 '#'
1376 """
1377 assert p_node.typ == grammar_nt.range_char, p_node
1378
1379 child0 = p_node.GetChild(0)
1380 typ0 = p_node.GetChild(0).typ
1381
1382 if typ0 == grammar_nt.sq_string:
1383 return cast(SingleQuoted, child0.GetChild(1).tok)
1384
1385 if typ0 == grammar_nt.char_literal:
1386 return word_compile.EvalCharLiteralForRegex(child0.tok)
1387
1388 if typ0 == Id.Expr_Name:
1389 # Look up PerlClass and PosixClass
1390 return self._NameInClass(None, child0.tok)
1391
1392 raise AssertionError()
1393
1394 def _ClassLiteralTerm(self, p_node):
1395 # type: (PNode) -> class_literal_term_t
1396 """
1397 class_literal_term:
1398 range_char ['-' range_char ]
1399 | '@' Expr_Name # splice
1400 | '!' Expr_Name # negate char class
1401 ...
1402 """
1403 assert p_node.typ == grammar_nt.class_literal_term, p_node
1404
1405 typ0 = p_node.GetChild(0).typ
1406
1407 if typ0 == grammar_nt.range_char:
1408 n = p_node.NumChildren()
1409
1410 if n == 1:
1411 return self._NonRangeChars(p_node.GetChild(0))
1412
1413 # 'a'-'z' etc.
1414 if n == 3:
1415 assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
1416
1417 left = p_node.GetChild(0)
1418 right = p_node.GetChild(2)
1419
1420 code1 = self._RangeCharSingleQuoted(left)
1421 if code1 is None:
1422 tok1 = self._OtherRangeToken(left)
1423 code1 = word_compile.EvalCharLiteralForRegex(tok1)
1424
1425 code2 = self._RangeCharSingleQuoted(right)
1426 if code2 is None:
1427 tok2 = self._OtherRangeToken(right)
1428 code2 = word_compile.EvalCharLiteralForRegex(tok2)
1429 return CharRange(code1, code2)
1430
1431 raise AssertionError()
1432
1433 if typ0 == Id.Expr_At:
1434 tok1 = p_node.GetChild(1).tok
1435 return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
1436
1437 if typ0 == Id.Expr_Bang:
1438 return self._NameInClass(
1439 p_node.GetChild(0).tok,
1440 p_node.GetChild(1).tok)
1441
1442 p_die("This kind of class literal term isn't implemented",
1443 p_node.GetChild(0).tok)
1444
1445 def _ClassLiteral(self, p_node):
1446 # type: (PNode) -> List[class_literal_term_t]
1447 """class_literal: '[' class_literal_term+ ']'."""
1448 assert p_node.typ == grammar_nt.class_literal
1449 # skip [ and ]
1450 terms = [] # type: List[class_literal_term_t]
1451 for i in xrange(1, p_node.NumChildren() - 1):
1452 terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
1453
1454 return terms
1455
1456 def _NameInRegex(self, negated_tok, tok):
1457 # type: (Token, Token) -> re_t
1458 tok_str = lexer.TokenVal(tok)
1459 if tok_str == 'dot':
1460 if negated_tok:
1461 p_die("Can't negate this symbol", tok)
1462 return re.Primitive(tok, Id.Eggex_Dot)
1463
1464 if tok_str in POSIX_CLASSES:
1465 return PosixClass(negated_tok, tok_str)
1466
1467 perl = PERL_CLASSES.get(tok_str)
1468 if perl is not None:
1469 return PerlClass(negated_tok, perl)
1470
1471 if tok_str[0].isupper(): # e.g. HexDigit
1472 return re.Splice(tok, lexer.TokenVal(tok))
1473
1474 p_die("%r isn't a character class" % tok_str, tok)
1475
1476 def _NameInClass(self, negated_tok, tok):
1477 # type: (Token, Token) -> class_literal_term_t
1478 """Like the above, but 'dot' and 'd' don't mean anything within []"""
1479 tok_str = lexer.TokenVal(tok)
1480
1481 # A bare, unquoted character literal. In the grammar, this is expressed as
1482 # range_char without an ending.
1483
1484 # d is NOT 'digit', it's a literal 'd'!
1485 if len(tok_str) == 1:
1486 # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
1487 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
1488
1489 if negated_tok: # [~d] is not allowed, only [~digit]
1490 p_die("Can't negate this symbol", tok)
1491 return word_compile.EvalCharLiteralForRegex(tok)
1492
1493 # digit, word, but not d, w, etc.
1494 if tok_str in POSIX_CLASSES:
1495 return PosixClass(negated_tok, tok_str)
1496
1497 perl = PERL_CLASSES.get(tok_str)
1498 if perl is not None:
1499 return PerlClass(negated_tok, perl)
1500 p_die("%r isn't a character class" % tok_str, tok)
1501
1502 def _ReAtom(self, p_atom):
1503 # type: (PNode) -> re_t
1504 """
1505 re_atom: ( char_literal | ...
1506 """
1507 assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
1508
1509 child0 = p_atom.GetChild(0)
1510
1511 typ0 = p_atom.GetChild(0).typ
1512 tok0 = p_atom.GetChild(0).tok
1513
1514 # Non-terminals
1515
1516 if typ0 == grammar_nt.class_literal:
1517 return re.CharClassLiteral(False, self._ClassLiteral(child0))
1518
1519 if typ0 == grammar_nt.sq_string:
1520 return cast(SingleQuoted, child0.GetChild(1).tok)
1521
1522 if typ0 == grammar_nt.char_literal:
1523 # Must be Id.Char_{OneChar,Hex,UBraced}
1524 assert consts.GetKind(tok0.id) == Kind.Char
1525 s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
1526 return re.LiteralChars(tok0, s)
1527
1528 # Special punctuation
1529 if typ0 == Id.Expr_Dot: # .
1530 return re.Primitive(tok0, Id.Eggex_Dot)
1531
1532 if typ0 == Id.Arith_Caret: # ^
1533 return re.Primitive(tok0, Id.Eggex_Start)
1534
1535 if typ0 == Id.Expr_Dollar: # $
1536 return re.Primitive(tok0, Id.Eggex_End)
1537
1538 if typ0 == Id.Expr_Name:
1539 # d digit -> PosixClass PerlClass etc.
1540 return self._NameInRegex(None, tok0)
1541
1542 if typ0 == Id.Expr_Symbol:
1543 # Validate symbols here, like we validate PerlClass, etc.
1544 tok_str = lexer.TokenVal(tok0)
1545 if tok_str == '%start':
1546 return re.Primitive(tok0, Id.Eggex_Start)
1547 if tok_str == '%end':
1548 return re.Primitive(tok0, Id.Eggex_End)
1549 p_die("Unexpected token %r in regex" % tok_str, tok0)
1550
1551 if typ0 == Id.Expr_At:
1552 # | '@' Expr_Name
1553 tok1 = p_atom.GetChild(1).tok
1554 return re.Splice(tok0, lexer.TokenVal(tok1))
1555
1556 if typ0 == Id.Expr_Bang:
1557 # | '!' (Expr_Name | class_literal)
1558 # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
1559 n = p_atom.NumChildren()
1560 if n == 2:
1561 child1 = p_atom.GetChild(1)
1562 if child1.typ == grammar_nt.class_literal:
1563 return re.CharClassLiteral(True,
1564 self._ClassLiteral(child1))
1565 else:
1566 return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
1567 else:
1568 # Note: !! conflicts with shell history
1569 p_die(
1570 "Backtracking with !! isn't implemented (requires Python/PCRE)",
1571 p_atom.GetChild(1).tok)
1572
1573 if typ0 == Id.Op_LParen:
1574 # | '(' regex ')'
1575
1576 # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
1577 # Capture.
1578 return re.Group(self._Regex(p_atom.GetChild(1)))
1579
1580 if typ0 == Id.Arith_Less:
1581 # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
1582
1583 n = p_atom.NumChildren()
1584 assert n == 4 or n == 6 or n == 8, n
1585
1586 # < capture d+ >
1587 regex = self._Regex(p_atom.GetChild(2))
1588
1589 as_name = None # type: Optional[Token]
1590 func_name = None # type: Optional[Token]
1591
1592 i = 3 # points at any of > as :
1593
1594 typ = p_atom.GetChild(i).typ
1595 if typ == Id.Expr_As:
1596 as_name = p_atom.GetChild(i + 1).tok
1597 i += 2
1598
1599 typ = p_atom.GetChild(i).typ
1600 if typ == Id.Arith_Colon:
1601 func_name = p_atom.GetChild(i + 1).tok
1602
1603 return re.Capture(regex, as_name, func_name)
1604
1605 raise AssertionError(typ0)
1606
1607 def _RepeatOp(self, p_repeat):
1608 # type: (PNode) -> re_repeat_t
1609 """
1610 repeat_op: '+' | '*' | '?'
1611 | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
1612 """
1613 assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
1614
1615 tok = p_repeat.GetChild(0).tok
1616 id_ = tok.id
1617
1618 if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
1619 return tok # a+ a* a?
1620
1621 if id_ == Id.Op_LBrace:
1622 child1 = p_repeat.GetChild(1)
1623 if child1.typ != grammar_nt.repeat_range:
1624 # e.g. dot{N *} is .*?
1625 p_die("Perl-style repetition isn't implemented with libc",
1626 child1.tok)
1627
1628 # repeat_range: (
1629 # Expr_DecInt [',']
1630 # | ',' Expr_DecInt
1631 # | Expr_DecInt ',' Expr_DecInt
1632 # )
1633
1634 n = child1.NumChildren()
1635 if n == 1: # {3}
1636 tok = child1.GetChild(0).tok
1637 return tok # different operator than + * ?
1638
1639 if n == 2:
1640 if child1.GetChild(0).typ == Id.Expr_DecInt: # {,3}
1641 left = child1.GetChild(0).tok
1642 return re_repeat.Range(left, lexer.TokenVal(left), '',
1643 None)
1644 else: # {1,}
1645 right = child1.GetChild(1).tok
1646 return re_repeat.Range(None, '', lexer.TokenVal(right),
1647 right)
1648
1649 if n == 3: # {1,3}
1650 left = child1.GetChild(0).tok
1651 right = child1.GetChild(2).tok
1652 return re_repeat.Range(left, lexer.TokenVal(left),
1653 lexer.TokenVal(right), right)
1654
1655 raise AssertionError(n)
1656
1657 raise AssertionError(id_)
1658
1659 def _ReAlt(self, p_node):
1660 # type: (PNode) -> re_t
1661 """
1662 re_alt: (re_atom [repeat_op])+
1663 """
1664 assert p_node.typ == grammar_nt.re_alt
1665
1666 i = 0
1667 n = p_node.NumChildren()
1668 seq = [] # type: List[re_t]
1669 while i < n:
1670 r = self._ReAtom(p_node.GetChild(i))
1671 i += 1
1672 if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
1673 repeat_op = self._RepeatOp(p_node.GetChild(i))
1674 r = re.Repeat(r, repeat_op)
1675 i += 1
1676 seq.append(r)
1677
1678 if len(seq) == 1:
1679 return seq[0]
1680 else:
1681 return re.Seq(seq)
1682
1683 def _Regex(self, p_node):
1684 # type: (PNode) -> re_t
1685 """
1686 regex: [re_alt] (('|'|'or') re_alt)*
1687 """
1688 assert p_node.typ == grammar_nt.regex
1689
1690 n = p_node.NumChildren()
1691 alts = [] # type: List[re_t]
1692 for i in xrange(0, n, 2): # was children[::2]
1693 c = p_node.GetChild(i)
1694 alts.append(self._ReAlt(c))
1695
1696 if len(alts) == 1:
1697 return alts[0]
1698 else:
1699 return re.Alt(alts)
1700
1701
1702# vim: sw=4