| 1 | # Grammar for YSH.
 | 
| 2 | # Adapted from the Python 3.7 expression grammar, with several changes!
 | 
| 3 | # 
 | 
| 4 | # TODO:
 | 
| 5 | # - List comprehensions
 | 
| 6 | #   - There's also chaining => and maybe implicit vectorization ==>
 | 
| 7 | #   - But list comprehensions are more familiar, and they are concise
 | 
| 8 | # - Generator expressions?
 | 
| 9 | # - Do we need lambdas?
 | 
| 10 | 
 | 
| 11 | # Note: trailing commas are allowed:
 | 
| 12 | #   {k: mydict,}
 | 
| 13 | #   [mylist,]
 | 
| 14 | #   mytuple,
 | 
| 15 | #   f(args,)
 | 
| 16 | #   func f(params,)
 | 
| 17 | # 
 | 
| 18 | # Kinds used:
 | 
| 19 | #   VSub, Left, Right, Expr, Op, Arith, Char, Eof, Unknown
 | 
| 20 | 
 | 
| 21 | # YSH patch: removed @=
 | 
| 22 | augassign: (
 | 
| 23 |     '+=' | '-=' | '*=' | '/=' |
 | 
| 24 |     '**=' | '//=' | '%=' |
 | 
| 25 |     '&=' | '|=' | '^=' | '<<=' | '>>='
 | 
| 26 | )
 | 
| 27 | 
 | 
| 28 | test: or_test ['if' or_test 'else' test] | lambdef
 | 
| 29 | 
 | 
| 30 | # Lambdas follow the same rules as Python:
 | 
| 31 | #
 | 
| 32 | # |x| 1, 2                ==    (|x| 1), 2
 | 
| 33 | # |x| x if True else 42   ==    |x| (x if True else 42)
 | 
| 34 | #
 | 
| 35 | # Python also had a test_nocond production like this:  We don't need it because
 | 
| 36 | # we can't have multiple ifs.
 | 
| 37 | # [x for x in range(3) if lambda x: x if 1]
 | 
| 38 | # 
 | 
| 39 | # The zero arg syntax like || 1  annoys me -- but this also works:
 | 
| 40 | # func() { return 1 }
 | 
| 41 | # 
 | 
| 42 | # We used name_type_list rather than param_group because a default value like
 | 
| 43 | # x|y (bitwise or) conflicts with the | delimiter!
 | 
| 44 | #
 | 
| 45 | # TODO: consider this syntax:
 | 
| 46 | # fn (x) x            # expression
 | 
| 47 | # fn (x) ^( echo hi ) # statement
 | 
| 48 | 
 | 
| 49 | lambdef: '|' [name_type_list] '|' test
 | 
| 50 | 
 | 
| 51 | or_test: and_test ('or' and_test)*
 | 
| 52 | and_test: not_test ('and' not_test)*
 | 
| 53 | not_test: 'not' not_test | comparison
 | 
| 54 | comparison: range_expr (comp_op range_expr)*
 | 
| 55 | 
 | 
| 56 | # Unlike slice, beginning and end are required
 | 
| 57 | range_expr: expr ['..' expr]
 | 
| 58 | 
 | 
| 59 | # YSH patch: remove legacy <>, add === and more
 | 
| 60 | comp_op: (
 | 
| 61 |     '<'|'>'|'==='|'>='|'<='|'!=='|'in'|'not' 'in'|'is'|'is' 'not'|
 | 
| 62 |     '~' | '!~' | '~~' | '!~~' | '~=='
 | 
| 63 | )
 | 
| 64 | 
 | 
| 65 | # For lists and dicts.  Note: In Python this was star_expr *foo
 | 
| 66 | splat_expr: '...' expr
 | 
| 67 | 
 | 
| 68 | expr: xor_expr ('|' xor_expr)*
 | 
| 69 | xor_expr: and_expr ('^' and_expr)*
 | 
| 70 | and_expr: shift_expr ('&' shift_expr)*
 | 
| 71 | shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 | 
| 72 | # YSH: add concatenation ++ with same precedence as +
 | 
| 73 | arith_expr: term (('+'|'-'|'++') term)*
 | 
| 74 | # YSH: removed '@' matrix mul
 | 
| 75 | term: factor (('*'|'/'|'//'|'%') factor)*
 | 
| 76 | factor: ('+'|'-'|'~') factor | power
 | 
| 77 | # YSH: removed Python 3 'await'
 | 
| 78 | power: atom trailer* ['**' factor]
 | 
| 79 | 
 | 
| 80 | testlist_comp: (test|splat_expr) ( comp_for | (',' (test|splat_expr))* [','] )
 | 
| 81 | 
 | 
| 82 | atom: (
 | 
| 83 |     '(' [testlist_comp] ')'   # empty tuple/list, or parenthesized expression
 | 
| 84 |   | '[' [testlist_comp] ']'   # empty list or list comprehension
 | 
| 85 |   | '^[' testlist ']'         # expression literal
 | 
| 86 |                               # note: ^[x for x in y] is invalid
 | 
| 87 |                               #   but ^[[x for x in y]] is a list comprehension
 | 
| 88 | 
 | 
| 89 |     # Note: newlines are significant inside {}, unlike inside () and []
 | 
| 90 |   | '{' [Op_Newline] [dict] '}'
 | 
| 91 |   | '&' Expr_Name place_trailer*
 | 
| 92 | 
 | 
| 93 |     # NOTE: These atoms are are allowed in typed array literals
 | 
| 94 |   | Expr_Name | Expr_Null | Expr_True | Expr_False 
 | 
| 95 | 
 | 
| 96 |     # Allow suffixes on floats and decimals
 | 
| 97 |     # e.g. 100 M is a function M which multiplies by 1_000_000
 | 
| 98 |     # e.g. 100 Mi is a function Mi which multiplies by 1024 * 1024
 | 
| 99 |   | Expr_Float [Expr_Name]
 | 
| 100 |   | Expr_DecInt [Expr_Name]
 | 
| 101 | 
 | 
| 102 |   | Expr_BinInt | Expr_OctInt | Expr_HexInt 
 | 
| 103 | 
 | 
| 104 |   | Char_OneChar  # char literal \n \\ etc.
 | 
| 105 |   | Char_YHex
 | 
| 106 |   | Char_UBraced  # char literal \u{3bc}
 | 
| 107 | 
 | 
| 108 |   | dq_string | sq_string
 | 
| 109 |     # Expr_Symbol could be %mykey
 | 
| 110 | 
 | 
| 111 |   | eggex
 | 
| 112 | 
 | 
| 113 |   # $foo is disallowed, but $? is allowed.  Should be "$foo" to indicate a
 | 
| 114 |   # string, or ${foo:-}
 | 
| 115 |   | simple_var_sub
 | 
| 116 |   | sh_command_sub | braced_var_sub
 | 
| 117 |   | sh_array_literal
 | 
| 118 |   | old_sh_array_literal
 | 
| 119 | )
 | 
| 120 | 
 | 
| 121 | place_trailer: (
 | 
| 122 |     '[' subscriptlist ']'
 | 
| 123 |   | '.' Expr_Name
 | 
| 124 | )
 | 
| 125 | 
 | 
| 126 | # var f = f(x)
 | 
| 127 | trailer: (
 | 
| 128 |     '(' [arglist] ')'
 | 
| 129 |   | '[' subscriptlist ']'
 | 
| 130 | 
 | 
| 131 |   # Is a {} trailing useful for anything?  It's not in Python or JS
 | 
| 132 | 
 | 
| 133 |   | '.' Expr_Name
 | 
| 134 |   | '->' Expr_Name
 | 
| 135 |   | '=>' Expr_Name
 | 
| 136 | )
 | 
| 137 | 
 | 
| 138 | # YSH patch: this is 'expr' instead of 'test'
 | 
| 139 | # - 1:(3<4) doesn't make sense.
 | 
| 140 | # - TODO: could we revert this?  I think it might have been because we wanted
 | 
| 141 | #   first class slices like var x = 1:n, but we have ranges var x = 1 .. n instead.
 | 
| 142 | # - There was also the colon conflict for :symbol
 | 
| 143 | 
 | 
| 144 | subscriptlist: subscript (',' subscript)* [',']
 | 
| 145 | 
 | 
| 146 | # TODO: Add => as low precedence operator, for Func[Str, Int => Str]
 | 
| 147 | subscript: expr | [expr] ':' [expr]
 | 
| 148 | 
 | 
| 149 | # TODO: => should be even lower precedence here too
 | 
| 150 | testlist: test (',' test)* [',']
 | 
| 151 | 
 | 
| 152 | # Dict syntax resembles JavaScript
 | 
| 153 | # https://stackoverflow.com/questions/38948306/what-is-javascript-shorthand-property
 | 
| 154 | #
 | 
| 155 | # Examples:
 | 
| 156 | # {age: 20} is like {'age': 20}
 | 
| 157 | # 
 | 
| 158 | # x = 'age'
 | 
| 159 | # d = %{[x]: 20}  # Evaluate x as a variable
 | 
| 160 | # d = %{["foo$x"]: 20}  # Another expression
 | 
| 161 | # d = %{[x, y]: 20}  # Tuple key
 | 
| 162 | # d = %{key1, key1: 123}
 | 
| 163 | # Notes:
 | 
| 164 | # - Value is optional when the key is a name, because it can be taken from the
 | 
| 165 | # environment.
 | 
| 166 | # - We don't have:
 | 
| 167 | #   - dict comprehensions.  Maybe wait until LR parsing?
 | 
| 168 | #   - Splatting with **
 | 
| 169 | 
 | 
| 170 | dict_pair: (
 | 
| 171 |     Expr_Name [':' test] 
 | 
| 172 |   | '[' testlist ']' ':' test 
 | 
| 173 |   | sq_string ':' test 
 | 
| 174 |   | dq_string ':' test
 | 
| 175 | )
 | 
| 176 | 
 | 
| 177 | comma_newline: ',' [Op_Newline] | Op_Newline
 | 
| 178 | 
 | 
| 179 | dict: dict_pair (comma_newline dict_pair)* [comma_newline]
 | 
| 180 | 
 | 
| 181 | # This how Python implemented dict comprehensions.  We can probably do the
 | 
| 182 | # same.
 | 
| 183 | #
 | 
| 184 | # dictorsetmaker: ( ((test ':' test | '**' expr)
 | 
| 185 | #                    (comp_for | (',' (test ':' test | '**' expr))* [','])) |
 | 
| 186 | #                   ((test | splat_expr)
 | 
| 187 | #                    (comp_for | (',' (test | splat_expr))* [','])) )
 | 
| 188 | 
 | 
| 189 | # The reason that keywords are test nodes instead of NAME is that using NAME
 | 
| 190 | # results in an ambiguity. ast.c makes sure it's a NAME.
 | 
| 191 | # "test '=' test" is really "keyword '=' test", but we have no such token.
 | 
| 192 | # These need to be in a single rule to avoid grammar that is ambiguous
 | 
| 193 | # to our LL(1) parser. Even though 'test' includes '*expr' in splat_expr,
 | 
| 194 | # we explicitly match '*' here, too, to give it proper precedence.
 | 
| 195 | # Illegal combinations and orderings are blocked in ast.c:
 | 
| 196 | # multiple (test comp_for) arguments are blocked; keyword unpackings
 | 
| 197 | # that precede iterable unpackings are blocked; etc.
 | 
| 198 | 
 | 
| 199 | argument: (
 | 
| 200 |     test [comp_for]
 | 
| 201 |     # named arg
 | 
| 202 |   | test '=' test
 | 
| 203 |     # splat.  The ... goes before, not after, to be consistent with Python, JS,
 | 
| 204 |     # and the prefix @ operator.
 | 
| 205 |   | '...' test
 | 
| 206 | )
 | 
| 207 | 
 | 
| 208 | # The grammar at call sites is less restrictive than at declaration sites.
 | 
| 209 | # ... can appear anywhere.  Keyword args can appear anywhere too.
 | 
| 210 | arg_group: argument (',' argument)* [',']
 | 
| 211 | arglist: (
 | 
| 212 |        [arg_group]
 | 
| 213 |   [';' [arg_group]]
 | 
| 214 | )
 | 
| 215 | arglist3: (
 | 
| 216 |        [arg_group]
 | 
| 217 |   [';' [arg_group]]
 | 
| 218 |   [';' [argument]]   # procs have an extra block argument
 | 
| 219 | )
 | 
| 220 | 
 | 
| 221 | 
 | 
| 222 | # YSH patch: test_nocond -> or_test.  I believe this was trying to prevent the
 | 
| 223 | # "double if" ambiguity here:
 | 
| 224 | # #
 | 
| 225 | # [x for x in range(3) if lambda x: x if 1]
 | 
| 226 | # 
 | 
| 227 | # but YSH doesn't supported "nested loops", so we don't have this problem.
 | 
| 228 | comp_for: 'for' name_type_list 'in' or_test ['if' or_test]
 | 
| 229 | 
 | 
| 230 | 
 | 
| 231 | #
 | 
| 232 | # Expressions that are New in YSH
 | 
| 233 | #
 | 
| 234 | 
 | 
| 235 | # Notes:
 | 
| 236 | # - Most of these occur in 'atom' above
 | 
| 237 | # - You can write $mystr but not mystr.  It has to be (mystr)
 | 
| 238 | array_item: (
 | 
| 239 |    Expr_Null | Expr_True | Expr_False 
 | 
| 240 |  | Expr_Float | Expr_DecInt | Expr_BinInt | Expr_OctInt | Expr_HexInt
 | 
| 241 |  | dq_string | sq_string
 | 
| 242 |  | sh_command_sub | braced_var_sub | simple_var_sub
 | 
| 243 |  | '(' test ')'
 | 
| 244 | )
 | 
| 245 | sh_array_literal: ':|' Expr_CastedDummy Op_Pipe
 | 
| 246 | 
 | 
| 247 | # TODO: remove old array
 | 
| 248 | old_sh_array_literal: '%(' Expr_CastedDummy Right_ShArrayLiteral
 | 
| 249 | sh_command_sub: ( '$(' | '@(' | '^(' ) Expr_CastedDummy Eof_RParen
 | 
| 250 | 
 | 
| 251 | # "   $"   """   $"""   ^" 
 | 
| 252 | dq_string: (
 | 
| 253 |   Left_DoubleQuote | Left_DollarDoubleQuote |
 | 
| 254 |   Left_TDoubleQuote | Left_DollarTDoubleQuote |
 | 
| 255 |   Left_CaretDoubleQuote
 | 
| 256 |   ) Expr_CastedDummy Right_DoubleQuote
 | 
| 257 | 
 | 
| 258 | # '   '''   r'   r'''
 | 
| 259 | # $'  for "refactoring" property
 | 
| 260 | # u'   u'''   b'   b'''
 | 
| 261 | sq_string: (
 | 
| 262 |     Left_SingleQuote | Left_TSingleQuote 
 | 
| 263 |   | Left_RSingleQuote | Left_RTSingleQuote
 | 
| 264 |   | Left_DollarSingleQuote
 | 
| 265 |   | Left_USingleQuote | Left_UTSingleQuote 
 | 
| 266 |   | Left_BSingleQuote | Left_BTSingleQuote
 | 
| 267 | ) Expr_CastedDummy Right_SingleQuote
 | 
| 268 | 
 | 
| 269 | braced_var_sub: '${' Expr_CastedDummy Right_DollarBrace
 | 
| 270 | 
 | 
| 271 | simple_var_sub: (
 | 
| 272 |   # This is everything in Kind.VSub except VSub_Name, which is braced: ${foo}
 | 
| 273 |   #
 | 
| 274 |   # Note: we could allow $foo and $0, but disallow the rest in favor of ${@}
 | 
| 275 |   # and ${-}?  Meh it's too inconsistent.
 | 
| 276 |     VSub_DollarName | VSub_Number
 | 
| 277 |   | VSub_Bang | VSub_At | VSub_Pound | VSub_Dollar | VSub_Star | VSub_Hyphen
 | 
| 278 |   | VSub_QMark 
 | 
| 279 |   # NOTE: $? should be STATUS because it's an integer.
 | 
| 280 | )
 | 
| 281 | 
 | 
| 282 | #
 | 
| 283 | # Assignment / Type Variables
 | 
| 284 | #
 | 
| 285 | # Several differences vs. Python:
 | 
| 286 | #
 | 
| 287 | # - no yield expression on RHS
 | 
| 288 | # - no star expressions on either side (Python 3)    *x, y = 2, *b
 | 
| 289 | # - no multiple assignments like: var x = y = 3
 | 
| 290 | # - type annotation syntax is more restrictive    # a: (1+2) = 3 is OK in python
 | 
| 291 | # - We're validating the lvalue here, instead of doing it in the "transformer".
 | 
| 292 | #   We have the 'var' prefix which helps.
 | 
| 293 | 
 | 
| 294 | # name_type use cases:
 | 
| 295 | #   var x Int, y Int = 3, 5
 | 
| 296 | #   / <capture d+ as date: int> /
 | 
| 297 | #
 | 
| 298 | #   for x Int, y Int
 | 
| 299 | #   [x for x Int, y Int in ...]
 | 
| 300 | #
 | 
| 301 | #   func(x Int, y Int) - this is separate
 | 
| 302 | 
 | 
| 303 | # Optional colon because we want both
 | 
| 304 | 
 | 
| 305 | #   var x: Int = 42                             # colon looks nicer
 | 
| 306 | #   proc p (; x Int, y Int; z Int) { echo hi }  # colon gets in the way of ;
 | 
| 307 | 
 | 
| 308 | name_type: Expr_Name [':'] [type_expr]
 | 
| 309 | name_type_list: name_type (',' name_type)*
 | 
| 310 | 
 | 
| 311 | type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
 | 
| 312 | 
 | 
| 313 | # NOTE: Eof_RParen and Eof_Backtick aren't allowed because we don't want 'var'
 | 
| 314 | # in command subs.
 | 
| 315 | end_stmt: '}' | ';' | Op_Newline | Eof_Real 
 | 
| 316 | 
 | 
| 317 | # TODO: allow -> to denote aliasing/mutation
 | 
| 318 | ysh_var_decl: name_type_list ['=' testlist] end_stmt
 | 
| 319 | 
 | 
| 320 | # Note: this is more precise way of writing ysh_mutation, but it's ambiguous :(
 | 
| 321 | # ysh_mutation: lhs augassign testlist end_stmt
 | 
| 322 | #             | lhs_list '=' testlist end_stmt
 | 
| 323 | 
 | 
| 324 | # Note: for YSH (not Tea), we could accept [':'] expr for setvar :out = 'foo'
 | 
| 325 | lhs_list: expr (',' expr)*
 | 
| 326 | 
 | 
| 327 | # TODO: allow -> to denote aliasing/mutation
 | 
| 328 | ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
 | 
| 329 | 
 | 
| 330 | # proc arg lists, like:
 | 
| 331 | #     json write (x, indent=1)
 | 
| 332 | #     cd /tmp ( ; ; ^(echo hi))
 | 
| 333 | #
 | 
| 334 | # What about:
 | 
| 335 | #      myproc /tmp [ ; ; ^(echo hi)] - I guess this doesn't make sense?
 | 
| 336 | ysh_eager_arglist: '(' [arglist3] ')'
 | 
| 337 | ysh_lazy_arglist: '[' [arglist] ']'
 | 
| 338 | 
 | 
| 339 | #
 | 
| 340 | # Other Entry Points
 | 
| 341 | #
 | 
| 342 | 
 | 
| 343 | # if (x > 0) etc.
 | 
| 344 | ysh_expr: '(' testlist ')'
 | 
| 345 | 
 | 
| 346 | #    = 42 + a[i]
 | 
| 347 | # call f(x)
 | 
| 348 | command_expr: testlist end_stmt
 | 
| 349 | 
 | 
| 350 | # $[d->key] etc.
 | 
| 351 | ysh_expr_sub: testlist ']'
 | 
| 352 | 
 | 
| 353 | # Signatures for proc and func.
 | 
| 354 | 
 | 
| 355 | # Note: 'proc name-with-hyphens' is allowed, so we can't parse the name in
 | 
| 356 | # expression mode.
 | 
| 357 | ysh_proc: (
 | 
| 358 |   [ '(' 
 | 
| 359 |           [ param_group ]    # word params, with defaults
 | 
| 360 |     [ ';' [ param_group ] ]  # positional typed params, with defaults
 | 
| 361 |     [ ';' [ param_group ] ]  # named params, with defaults
 | 
| 362 |     [ ';' [ param_group ] ]  # optional block param, with no type or default
 | 
| 363 | 
 | 
| 364 |     # This causes a pgen2 error?  It doesn't know which branch to take
 | 
| 365 |     # So we have the extra {block} syntax
 | 
| 366 |     #[ ';' Expr_Name ]        # optional block param, with no type or default
 | 
| 367 |     ')'  
 | 
| 368 |   ]
 | 
| 369 |   '{'  # opening { for pgen2
 | 
| 370 | )
 | 
| 371 | 
 | 
| 372 | ysh_func: (
 | 
| 373 |     Expr_Name '(' [param_group] [';' param_group] ')' ['=>' type_expr] '{'
 | 
| 374 | )
 | 
| 375 | 
 | 
| 376 | param: Expr_Name [type_expr] ['=' expr]
 | 
| 377 | 
 | 
| 378 | # This is an awkward way of writing that '...' has to come last.
 | 
| 379 | param_group: (
 | 
| 380 |    (param ',')*
 | 
| 381 |    [ (param | '...' Expr_Name) [','] ]
 | 
| 382 | )
 | 
| 383 | 
 | 
| 384 | #
 | 
| 385 | # Regex Sublanguage
 | 
| 386 | #
 | 
| 387 | 
 | 
| 388 | char_literal: Char_OneChar | Char_Hex | Char_UBraced
 | 
| 389 | 
 | 
| 390 | # we allow a-z A-Z 0-9 as ranges, but otherwise they have to be quoted
 | 
| 391 | # The parser enforces that they are single strings
 | 
| 392 | range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
 | 
| 393 | 
 | 
| 394 | # digit or a-z
 | 
| 395 | # We have to do further validation of ranges later.
 | 
| 396 | class_literal_term: (
 | 
| 397 |     # NOTE: range_char has sq_string
 | 
| 398 |     range_char ['-' range_char ]
 | 
| 399 |     # splice a literal set of characters
 | 
| 400 |   | '@' Expr_Name
 | 
| 401 |   | '!' Expr_Name
 | 
| 402 |     # Reserved for [[.collating sequences.]] (Unicode)
 | 
| 403 |   | '.' Expr_Name
 | 
| 404 |     # Reserved for [[=character equivalents=]] (Unicode)
 | 
| 405 |   | '=' Expr_Name
 | 
| 406 |     # TODO: Do these char classes actually work in bash/awk/egrep/sed/etc.?
 | 
| 407 | 
 | 
| 408 | )
 | 
| 409 | class_literal: '[' class_literal_term+ ']'
 | 
| 410 | 
 | 
| 411 | # NOTE: Here is an example of where you can put ^ in the middle of a pattern in
 | 
| 412 | # Python, and it matters!
 | 
| 413 | # >>> r = re.compile('.f[a-z]*', re.DOTALL|re.MULTILINE)
 | 
| 414 | # >>> r.findall('z\nfoo\nbeef\nfood\n')
 | 
| 415 | # ['\nfoo', 'ef', '\nfood']
 | 
| 416 | # >>> r = re.compile('.^f[a-z]*', re.DOTALL|re.MULTILINE)
 | 
| 417 | # r.findall('z\nfoo\nbeef\nfood\n')
 | 
| 418 | # ['\nfoo', '\nfood']
 | 
| 419 | 
 | 
| 420 | re_atom: (
 | 
| 421 |     char_literal
 | 
| 422 |     # builtin regex like 'digit' or a regex reference like 'D'
 | 
| 423 |   | Expr_Name
 | 
| 424 |     # %begin or %end
 | 
| 425 |   | Expr_Symbol
 | 
| 426 |   | class_literal
 | 
| 427 |     # !digit or ![a-f].  Note ! %boundary could be \B in Python, but ERE
 | 
| 428 |     # doesn't have anything like that
 | 
| 429 |   | '!' (Expr_Name | class_literal)
 | 
| 430 | 
 | 
| 431 |     # syntactic space for Perl-style backtracking
 | 
| 432 |     # !!REF 1   !!REF name
 | 
| 433 |     # !!AHEAD(d+)   !!BEHIND(d+)  !!NOT_AHEAD(d+)  !!NOT_BEHIND(d+)
 | 
| 434 |     #
 | 
| 435 |     # Note: !! conflicts with history
 | 
| 436 |   | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
 | 
| 437 | 
 | 
| 438 |     # Splice another expression
 | 
| 439 |   | '@' Expr_Name
 | 
| 440 |     # any %start %end are preferred
 | 
| 441 |   | '.' | '^' | '$'
 | 
| 442 |     # In a language-independent spec, backslashes are disallowed within 'sq'.
 | 
| 443 |     # Write it with char literals outside strings: 'foo' \\ 'bar' \n
 | 
| 444 |     # 
 | 
| 445 |     # No double-quoted strings because you can write "x = $x" with 'x = ' @x
 | 
| 446 |   | sq_string
 | 
| 447 | 
 | 
| 448 |     # grouping (non-capturing in Perl; capturing in ERE although < > is preferred)
 | 
| 449 |   | '(' regex ')'
 | 
| 450 | 
 | 
| 451 |     # Capturing group, with optional name and conversion function
 | 
| 452 |     #   <capture d+ as date>
 | 
| 453 |     #   <capture d+ as date: int>
 | 
| 454 |     #   <capture d+ : int>
 | 
| 455 |   | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
 | 
| 456 | 
 | 
| 457 |     # Might want this obscure conditional construct.  Can't use C-style ternary
 | 
| 458 |     # because '?' is a regex operator.
 | 
| 459 |   #| '{' regex 'if' regex 'else' regex '}'
 | 
| 460 | 
 | 
| 461 |   # Others:
 | 
| 462 |   # PCRE has (?R ) for recursion?  That could be !RECURSE()
 | 
| 463 |   # Note: .NET has && in character classes, making it a recursive language
 | 
| 464 | )
 | 
| 465 | 
 | 
| 466 | # e.g.   a{3}   a{3,4}  a{3,}   a{,4} but not a{,}
 | 
| 467 | repeat_range: (
 | 
| 468 |     Expr_DecInt [',']
 | 
| 469 |   | ',' Expr_DecInt
 | 
| 470 |   | Expr_DecInt ',' Expr_DecInt
 | 
| 471 | )
 | 
| 472 | 
 | 
| 473 | repeat_op: (
 | 
| 474 |     '+' | '*' | '?' 
 | 
| 475 |   # In PCRE, ?? *? +? {}? is lazy/nongreedy and ?+ *+ ++ {}+ is "possessive"
 | 
| 476 |   # We use N and P modifiers within {}.
 | 
| 477 |   # a{L +}  a{P ?}  a{P 3,4}  a{P ,4}
 | 
| 478 |   | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
 | 
| 479 | )
 | 
| 480 | 
 | 
| 481 | re_alt: (re_atom [repeat_op])+
 | 
| 482 | 
 | 
| 483 | regex: [re_alt] (('|'|'or') re_alt)*
 | 
| 484 | 
 | 
| 485 | # e.g. /digit+ ; multiline !ignorecase/
 | 
| 486 | #
 | 
| 487 | # This can express translation preferences:
 | 
| 488 | #
 | 
| 489 | # / d+ ; ; ERE /                is   '[[:digit:]]+'
 | 
| 490 | # / d+ ; ; PCRE /               is   '\d+'
 | 
| 491 | # / d+ ; ignorecase ; python /  is   '(?i)\d+'
 | 
| 492 | 
 | 
| 493 | # Python has the syntax
 | 
| 494 | #    (?i:myre)  to set a flag
 | 
| 495 | #    (?-i:myre) to remove a flag
 | 
| 496 | #
 | 
| 497 | # They can apply to portions of the expression, which we don't have here.
 | 
| 498 | re_flag: ['!'] Expr_Name
 | 
| 499 | eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
 | 
| 500 | 
 | 
| 501 | # Patterns are the start of a case arm. Ie,
 | 
| 502 | #
 | 
| 503 | # case (foo) {
 | 
| 504 | #   (40 + 2) | (0) { echo number }
 | 
| 505 | #   ^^^^^^^^^^^^^^-- This is pattern
 | 
| 506 | # }
 | 
| 507 | #
 | 
| 508 | # Due to limitations created from pgen2/cmd_parser interactions, we also parse
 | 
| 509 | # the leading '{' token of the case arm body in pgen2. We do this to help pgen2
 | 
| 510 | # figure out when to transfer control back to the cmd_parser. For more details
 | 
| 511 | # see #oil-dev > Dev Friction / Smells.
 | 
| 512 | #
 | 
| 513 | # case (foo) {
 | 
| 514 | #   (40 + 2) | (0) { echo number }
 | 
| 515 | #                  ^-- End of pattern/beginning of case arm body
 | 
| 516 | # }
 | 
| 517 | 
 | 
| 518 | ysh_case_pat: (
 | 
| 519 |     '(' (pat_else | pat_exprs)
 | 
| 520 |   | eggex
 | 
| 521 | ) [Op_Newline] '{'
 | 
| 522 | 
 | 
| 523 | pat_else: 'else' ')'
 | 
| 524 | pat_exprs: expr ')' [Op_Newline] ('|' [Op_Newline] '(' expr ')' [Op_Newline])*
 | 
| 525 | 
 | 
| 526 | 
 | 
| 527 | # Syntax reserved for PCRE/Python, but that's not in ERE:
 | 
| 528 | # 
 | 
| 529 | #   non-greedy     a{N *}
 | 
| 530 | #   non-capturing  ( digit+ )
 | 
| 531 | #   backtracking   !!REF 1  !!AHEAD(d+)
 | 
| 532 | # 
 | 
| 533 | # Legacy syntax:
 | 
| 534 | # 
 | 
| 535 | #   ^ and $ instead of %start and %end
 | 
| 536 | #   < and > instead of %start_word and %end_word
 | 
| 537 | #   . instead of dot
 | 
| 538 | #   | instead of 'or'
 |