| 1 | # Data types for the Oils AST, aka "Lossless Syntax Tree".
 | 
| 2 | #
 | 
| 3 | # Invariant: the source text can be reconstructed byte-for-byte from this tree.
 | 
| 4 | # The test/lossless.sh suite verifies this.
 | 
| 5 | 
 | 
| 6 | # We usually try to preserve the physical order of the source in the ASDL
 | 
| 7 | # fields.  One exception is the order of redirects:
 | 
| 8 | #
 | 
| 9 | #     echo >out.txt hi   
 | 
| 10 | #     # versus
 | 
| 11 | #     echo hi >out.txt
 | 
| 12 | 
 | 
| 13 | # Unrepresented:
 | 
| 14 | # - let arithmetic (rarely used)
 | 
| 15 | # - coprocesses # one with arg and one without
 | 
| 16 | # - select block
 | 
| 17 | 
 | 
| 18 | # Possible refactorings:
 | 
| 19 | #
 | 
| 20 | #   # %CompoundWord as first class variant:
 | 
| 21 | #   bool_expr = WordTest %CompoundWord | ...
 | 
| 22 | #
 | 
| 23 | #   # Can DoubleQuoted have a subset of parts compared with CompoundWord?
 | 
| 24 | #   string_part = ...  # subset of word_part
 | 
| 25 | #
 | 
| 26 | #   - Distinguish word_t with BracedTree vs. those without?  seq_word_t?
 | 
| 27 | 
 | 
| 28 | module syntax
 | 
| 29 | {
 | 
| 30 |   use core value {
 | 
| 31 |     value
 | 
| 32 |   }
 | 
| 33 | 
 | 
| 34 |   # More efficient than the List[bool] pattern we've been using
 | 
| 35 |   BoolParamBox = (bool b)
 | 
| 36 |   IntParamBox = (int i)
 | 
| 37 | 
 | 
| 38 |   # core/main_loop.py
 | 
| 39 |   parse_result = EmptyLine | Eof | Node(command cmd)
 | 
| 40 |  
 | 
| 41 |   # 'source' represents the location of a line / token.
 | 
| 42 |   source = 
 | 
| 43 |     Interactive
 | 
| 44 |   | Headless
 | 
| 45 |   | Unused(str comment)     # completion and history never show parse errors?
 | 
| 46 |   | CFlag
 | 
| 47 |   | Stdin(str comment)
 | 
| 48 | 
 | 
| 49 |     # TODO: if it's not the main script, it's sourced, and you could provide
 | 
| 50 |     # a chain of locations back to the sourced script!
 | 
| 51 |     # MainFile(str path) or SourcedFile(str path, loc location)
 | 
| 52 |   | MainFile(str path)
 | 
| 53 |   | SourcedFile(str path, loc location)
 | 
| 54 | 
 | 
| 55 |     # code parsed from a word
 | 
| 56 |     # used for 'eval', 'trap', 'printf', 'complete -W', etc.
 | 
| 57 |   | ArgvWord(str what, loc location)
 | 
| 58 | 
 | 
| 59 |     # code parsed from the value of a variable
 | 
| 60 |     # used for $PS1 $PROMPT_COMMAND
 | 
| 61 |   | Variable(str var_name, loc location)
 | 
| 62 | 
 | 
| 63 |     # Point to the original variable reference
 | 
| 64 |   | VarRef(Token orig_tok)
 | 
| 65 | 
 | 
| 66 |     # alias expansion (location of first word)
 | 
| 67 |   | Alias(str argv0, loc argv0_loc)
 | 
| 68 | 
 | 
| 69 |     # 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
 | 
| 70 |     # TODO: use this for eval_unsafe_arith instead of Variable
 | 
| 71 |   | Reparsed(str what, Token left_token, Token right_token)
 | 
| 72 | 
 | 
| 73 |     # For --location-str
 | 
| 74 |   | Synthetic(str s)
 | 
| 75 | 
 | 
| 76 |   SourceLine = (int line_num, str content, source src)
 | 
| 77 | 
 | 
| 78 |   # Note that ASDL generates:
 | 
| 79 |   #    typedef uint16_t Id_t;
 | 
| 80 |   # So Token is
 | 
| 81 |   #    8 bytes GC header + 2 + 2 + 4 + 8 + 8 = 32 bytes on 64-bit machines
 | 
| 82 |   #
 | 
| 83 |   # We transpose (id, col, length) -> (id, length, col) for C struct packing.
 | 
| 84 |   Token = (id id, uint16 length, int col, SourceLine? line, str? tval)
 | 
| 85 | 
 | 
| 86 |   # I wanted to get rid of Token.tval with this separate WideToken type, but it
 | 
| 87 |   # is more efficient if word_part.Literal %Token literally is the same thing
 | 
| 88 |   # that comes out of the lexer.  Otherwise we have extra garbage.
 | 
| 89 | 
 | 
| 90 |   # WideToken = (id id, int length, int col, SourceLine? line, str? tval)
 | 
| 91 | 
 | 
| 92 |   # Slight ASDL bug: CompoundWord has to be defined before using it as a shared
 | 
| 93 |   # variant.  The _product_counter algorithm should be moved into a separate
 | 
| 94 |   # tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
 | 
| 95 |   CompoundWord = (List[word_part] parts)
 | 
| 96 | 
 | 
| 97 |   # Source location for errors
 | 
| 98 |   loc = 
 | 
| 99 |     Missing  # equivalent of runtime.NO_SPID
 | 
| 100 |   | Token %Token
 | 
| 101 |     # Very common case: argv arrays need original location
 | 
| 102 |   | ArgWord %CompoundWord
 | 
| 103 |   | WordPart(word_part p)
 | 
| 104 |   | Word(word w)
 | 
| 105 |   | Arith(arith_expr a)
 | 
| 106 |     # e.g. for errexit blaming
 | 
| 107 |   | Command(command c)
 | 
| 108 |     # the location of a token that's too long
 | 
| 109 |   | TokenTooLong(SourceLine line, id id, int length, int col)
 | 
| 110 | 
 | 
| 111 |   debug_frame = 
 | 
| 112 |     Main(str dollar0)
 | 
| 113 |     # call_loc => BASH_LINENO
 | 
| 114 |     # call_loc may be None with new --source flag?
 | 
| 115 |   | Source(Token? call_tok, str source_name)
 | 
| 116 |     # def_tok => BASH_SOURCE
 | 
| 117 |     # call_loc may be None if invoked via RunFuncForCompletion?
 | 
| 118 |   | Call(Token? call_tok, Token def_tok, str func_name)
 | 
| 119 | 
 | 
| 120 |   #
 | 
| 121 |   # Shell language
 | 
| 122 |   #
 | 
| 123 | 
 | 
| 124 |   bracket_op = 
 | 
| 125 |     WholeArray(id op_id)  # * or @
 | 
| 126 |   | ArrayIndex(arith_expr expr)
 | 
| 127 | 
 | 
| 128 |   suffix_op = 
 | 
| 129 |     Nullary %Token  # ${x@Q} or ${!prefix@} (which also has prefix_op)
 | 
| 130 |   | Unary(Token op, rhs_word arg_word)  # e.g. ${v:-default}
 | 
| 131 |     # TODO: Implement YSH ${x|html} and ${x %.3f}
 | 
| 132 |   | Static(Token tok, str arg)
 | 
| 133 |   | PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
 | 
| 134 |     # optional begin is arith_expr.EmptyZero
 | 
| 135 |     # optional length is None, because it's handled in a special way
 | 
| 136 |   | Slice(arith_expr begin, arith_expr? length)
 | 
| 137 | 
 | 
| 138 |   BracedVarSub = (
 | 
| 139 |       Token left,        # in dynamic ParseVarRef, same as name_tok
 | 
| 140 |       Token token,       # location for the name
 | 
| 141 |       str var_name,      # the name - TODO: remove this, use LazyStr() instead
 | 
| 142 |       Token? prefix_op,  # prefix # or ! operators
 | 
| 143 |       bracket_op? bracket_op,
 | 
| 144 |       suffix_op? suffix_op,
 | 
| 145 |       Token right        # in dynamic ParseVarRef, same as name_tok
 | 
| 146 |   )
 | 
| 147 | 
 | 
| 148 |   # Variants:
 | 
| 149 |   # - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
 | 
| 150 |   # - And """ and ''' e.g. Id.Left_TDoubleQuote
 | 
| 151 |   DoubleQuoted = (Token left, List[word_part] parts, Token right)
 | 
| 152 | 
 | 
| 153 |   # Consider making str? sval LAZY, like lexer.LazyStr(tok)
 | 
| 154 |   SingleQuoted = (Token left, str sval, Token right)
 | 
| 155 | 
 | 
| 156 |   # e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
 | 
| 157 |   SimpleVarSub = (Token tok)
 | 
| 158 | 
 | 
| 159 |   CommandSub = (Token left_token, command child, Token right)
 | 
| 160 | 
 | 
| 161 |   # - can contain word.BracedTree
 | 
| 162 |   # - no 'Token right' for now, doesn't appear to be used
 | 
| 163 |   ShArrayLiteral = (Token left, List[word] words, Token right)
 | 
| 164 | 
 | 
| 165 |   # Unevaluated, typed arguments for func and proc.
 | 
| 166 |   # Note that ...arg is expr.Spread.
 | 
| 167 |   ArgList = (
 | 
| 168 |      Token left, List[expr] pos_args,
 | 
| 169 |      Token? semi_tok, List[NamedArg] named_args,
 | 
| 170 |      Token? semi_tok2, expr? block_expr,
 | 
| 171 |      Token right
 | 
| 172 |   )
 | 
| 173 | 
 | 
| 174 |   AssocPair = (CompoundWord key, CompoundWord value)
 | 
| 175 | 
 | 
| 176 |   word_part = 
 | 
| 177 |     ShArrayLiteral %ShArrayLiteral
 | 
| 178 |   | BashAssocLiteral(Token left, List[AssocPair] pairs, Token right)
 | 
| 179 |   | Literal %Token
 | 
| 180 |     # escaped case is separate so the evaluator doesn't have to check token ID
 | 
| 181 |   | EscapedLiteral(Token token, str ch)
 | 
| 182 |   | SingleQuoted %SingleQuoted
 | 
| 183 |   | DoubleQuoted %DoubleQuoted
 | 
| 184 |     # Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
 | 
| 185 |     # confuse with the comon word_part.Literal is common for wno
 | 
| 186 |   | SimpleVarSub %SimpleVarSub
 | 
| 187 |   | BracedVarSub %BracedVarSub
 | 
| 188 |   | ZshVarSub (Token left, CompoundWord ignored, Token right)
 | 
| 189 |     # For command sub and process sub: $(...)  <(...)  >(...)
 | 
| 190 |   | CommandSub %CommandSub
 | 
| 191 |     # ~ or ~bob
 | 
| 192 |   | TildeSub(Token left, # always the tilde
 | 
| 193 |              Token? name, str? user_name)
 | 
| 194 |   | ArithSub(Token left, arith_expr anode, Token right)
 | 
| 195 |     # {a,b,c}
 | 
| 196 |   | BracedTuple(List[CompoundWord] words)
 | 
| 197 |     # {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
 | 
| 198 |     # {a..f} or {a..f..2} or {a..f..-2}
 | 
| 199 |     # the whole range is one Token,
 | 
| 200 |   | BracedRange(Token blame_tok, id kind, str start, str end, int step)
 | 
| 201 |     # extended globs are parsed statically, unlike globs
 | 
| 202 |   | ExtGlob(Token op, List[CompoundWord] arms, Token right)
 | 
| 203 |     # a regex group is similar to an extended glob part
 | 
| 204 |   | BashRegexGroup(Token left, CompoundWord? child, Token right)
 | 
| 205 | 
 | 
| 206 |     # YSH word_part extensions
 | 
| 207 | 
 | 
| 208 |     # @myarray - Id.Lit_Splice (could be optimized to %Token)
 | 
| 209 |   | Splice(Token blame_tok, str var_name)
 | 
| 210 |     # $[d.key], etc.
 | 
| 211 |   | ExprSub(Token left, expr child, Token right)
 | 
| 212 | 
 | 
| 213 |   # Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
 | 
| 214 |   # The latter is semantically necessary.  (See osh/word_parse.py). 
 | 
| 215 |   # At runtime: RHS of 'declare x='.
 | 
| 216 |   rhs_word = Empty | Compound %CompoundWord
 | 
| 217 | 
 | 
| 218 |   word = 
 | 
| 219 |     # Returns from WordParser, but not generally stored in LST
 | 
| 220 |     Operator %Token
 | 
| 221 |     # A Compound word can contain any word_part except the Braced*Part.
 | 
| 222 |     # We could model this with another variant type but it incurs runtime
 | 
| 223 |     # overhead and seems like overkill.  Note that DoubleQuoted can't
 | 
| 224 |     # contain a SingleQuoted, etc. either.
 | 
| 225 |   | Compound %CompoundWord
 | 
| 226 |     # For word sequences command.Simple, ShArrayLiteral, for_iter.Words
 | 
| 227 |     # Could be its own type
 | 
| 228 |   | BracedTree(List[word_part] parts)
 | 
| 229 |     # For dynamic parsing of test aka [ - the string is already evaluated.
 | 
| 230 |   | String(id id, str s, CompoundWord? blame_loc)
 | 
| 231 | 
 | 
| 232 |   # Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
 | 
| 233 |   sh_lhs =
 | 
| 234 |     Name(Token left, str name)  # Lit_VarLike foo=
 | 
| 235 |                                 # TODO: Could be Name %Token
 | 
| 236 |   | IndexedName(Token left, str name, arith_expr index)
 | 
| 237 |   | UnparsedIndex(Token left, str name, str index)  # for translation
 | 
| 238 | 
 | 
| 239 |   arith_expr =
 | 
| 240 |     EmptyZero              # these are valid:  $(( ))  (( ))  ${a[@]: : }
 | 
| 241 |   | EmptyOne               # condition is 1 for infinite loop:  for (( ; ; ))
 | 
| 242 |   | VarSub %Token          # e.g. $(( x ))  Id.Arith_VarLike
 | 
| 243 |   | Word %CompoundWord     # e.g. $(( 123'456'$y ))
 | 
| 244 | 
 | 
| 245 |   | UnaryAssign(id op_id, arith_expr child)
 | 
| 246 |   | BinaryAssign(id op_id, arith_expr left, arith_expr right)
 | 
| 247 | 
 | 
| 248 |   | Unary(id op_id, arith_expr child)
 | 
| 249 |   | Binary(Token op, arith_expr left, arith_expr right)
 | 
| 250 |   | TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
 | 
| 251 | 
 | 
| 252 |   bool_expr =
 | 
| 253 |     WordTest(word w)  # e.g. [[ myword ]]
 | 
| 254 |   | Binary(id op_id, word left, word right)
 | 
| 255 |   | Unary(id op_id, word child)
 | 
| 256 |   | LogicalNot(bool_expr child)
 | 
| 257 |   | LogicalAnd(bool_expr left, bool_expr right)
 | 
| 258 |   | LogicalOr(bool_expr left, bool_expr right)
 | 
| 259 | 
 | 
| 260 |   redir_loc =
 | 
| 261 |     Fd(int fd) | VarName(str name)
 | 
| 262 | 
 | 
| 263 |   redir_param =
 | 
| 264 |     Word %CompoundWord
 | 
| 265 |   | HereDoc(word here_begin,  # e.g. EOF or 'EOF'
 | 
| 266 |             Token? here_end_tok,  # Token consisting of the whole line
 | 
| 267 |                                   # It's always filled in AFTER creation, but
 | 
| 268 |                                   # temporarily so optional
 | 
| 269 |             List[word_part] stdin_parts  # one for each line
 | 
| 270 |            )
 | 
| 271 | 
 | 
| 272 |   Redir = (Token op, redir_loc loc, redir_param arg)
 | 
| 273 | 
 | 
| 274 |   assign_op = Equal | PlusEqual
 | 
| 275 |   AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
 | 
| 276 |   # TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
 | 
| 277 |   EnvPair = (Token left, str name, rhs_word val)
 | 
| 278 | 
 | 
| 279 |   condition = 
 | 
| 280 |     Shell(List[command] commands)  # if false; true; then echo hi; fi
 | 
| 281 |   | YshExpr(expr e)                # if (x > 0) { echo hi }
 | 
| 282 |                                    # TODO: add more specific blame location
 | 
| 283 | 
 | 
| 284 |   # Each arm tests one word against multiple words
 | 
| 285 |   # shell:  *.cc|*.h) echo C++ ;;
 | 
| 286 |   # YSH:    *.cc|*.h { echo C++ }
 | 
| 287 |   #
 | 
| 288 |   # Three location tokens:
 | 
| 289 |   # 1. left   - shell has ( or *.cc    ysh has *.cc
 | 
| 290 |   # 2. middle - shell has )            ysh has {
 | 
| 291 |   # 3. right  - shell has optional ;;  ysh has required }
 | 
| 292 |   #
 | 
| 293 |   # For YSH typed case, left can be ( and /
 | 
| 294 |   # And case_pat may contain more details
 | 
| 295 |   CaseArm = (
 | 
| 296 |       Token left, pat pattern, Token middle, List[command] action,
 | 
| 297 |       Token? right
 | 
| 298 |   )
 | 
| 299 | 
 | 
| 300 |   # The argument to match against in a case command
 | 
| 301 |   # In YSH-style case commands we match against an `expr`, but in sh-style case
 | 
| 302 |   # commands we match against a word.
 | 
| 303 |   case_arg =
 | 
| 304 |     Word(word w)
 | 
| 305 |   | YshExpr(expr e)
 | 
| 306 | 
 | 
| 307 |   EggexFlag = (bool negated, Token flag)
 | 
| 308 | 
 | 
| 309 |   # canonical_flags can be compared for equality.  This is needed to splice
 | 
| 310 |   # eggexes correctly, e.g.  / 'abc' @pat ; i /
 | 
| 311 |   Eggex = (
 | 
| 312 |       Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
 | 
| 313 |       str? canonical_flags)
 | 
| 314 | 
 | 
| 315 |   pat =
 | 
| 316 |     Else
 | 
| 317 |   | Words(List[word] words)
 | 
| 318 |   | YshExprs(List[expr] exprs)
 | 
| 319 |   | Eggex %Eggex
 | 
| 320 |   
 | 
| 321 |   # Each if arm starts with either an "if" or "elif" keyword
 | 
| 322 |   # In YSH, the then keyword is not used (replaced by braces {})
 | 
| 323 |   IfArm = (
 | 
| 324 |       Token keyword, condition cond, Token? then_kw, List[command] action,
 | 
| 325 |       # then_tok used in ysh-ify
 | 
| 326 |       Token? then_tok)
 | 
| 327 | 
 | 
| 328 |   for_iter =
 | 
| 329 |     Args                          # for x; do echo $x; done # implicit "$@"
 | 
| 330 |   | Words(List[word] words)       # for x in 'foo' *.py { echo $x }
 | 
| 331 |                                   # like ShArrayLiteral, but no location for %(
 | 
| 332 |   | YshExpr(expr e, Token blame)  # for x in (mylist) { echo $x }
 | 
| 333 |   | Files(Token left, List[word] words)
 | 
| 334 |                                   # for x in <> {
 | 
| 335 |                                   # for x in < @myfiles > {
 | 
| 336 | 
 | 
| 337 |   BraceGroup = (
 | 
| 338 |       Token left, Token? doc_token, List[command] children, Token right
 | 
| 339 |   )
 | 
| 340 | 
 | 
| 341 |   Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
 | 
| 342 |   RestParam = (Token blame_tok, str name)
 | 
| 343 | 
 | 
| 344 |   ParamGroup = (List[Param] params, RestParam? rest_of)
 | 
| 345 | 
 | 
| 346 |   # 'open' is for proc p { }; closed is for proc p () { }
 | 
| 347 |   proc_sig =
 | 
| 348 |     Open
 | 
| 349 |   | Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
 | 
| 350 |            Param? block_param)
 | 
| 351 | 
 | 
| 352 |   Proc = (Token keyword, Token name, proc_sig sig, command body)
 | 
| 353 | 
 | 
| 354 |   Func = (
 | 
| 355 |       Token keyword, Token name,
 | 
| 356 |       ParamGroup? positional, ParamGroup? named,
 | 
| 357 |       command body
 | 
| 358 |   )
 | 
| 359 | 
 | 
| 360 |   # Retain references to lines
 | 
| 361 |   LiteralBlock = (BraceGroup brace_group, List[SourceLine] lines)
 | 
| 362 | 
 | 
| 363 |   # Represents all these case:  s=1  s+=1  s[x]=1 ...
 | 
| 364 |   ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
 | 
| 365 | 
 | 
| 366 |   command =
 | 
| 367 |     NoOp
 | 
| 368 | 
 | 
| 369 |     # can wrap many children, e.g. { }, loops, functions
 | 
| 370 |   | Redirect(command child, List[Redir] redirects)
 | 
| 371 | 
 | 
| 372 |   | Simple(Token? blame_tok,  # TODO: make required (BracedTuple?)
 | 
| 373 |            List[EnvPair] more_env,
 | 
| 374 |            List[word] words,
 | 
| 375 |            ArgList? typed_args, LiteralBlock? block,
 | 
| 376 |            # do_fork is semantic, not syntactic
 | 
| 377 |            bool do_fork)
 | 
| 378 | 
 | 
| 379 |     # This doesn't technically belong in the LST, but it's convenient for
 | 
| 380 |     # execution
 | 
| 381 |   | ExpandedAlias(command child, List[EnvPair] more_env)
 | 
| 382 |   | Sentence(command child, Token terminator)
 | 
| 383 |     # Represents "bare assignment"
 | 
| 384 |     # Token left is redundant with pairs[0].left
 | 
| 385 |   | ShAssignment(Token left, List[AssignPair] pairs)
 | 
| 386 | 
 | 
| 387 |   | ControlFlow(Token keyword, word? arg_word)
 | 
| 388 | 
 | 
| 389 |     # ops are |  |&
 | 
| 390 |   | Pipeline(Token? negated, List[command] children, List[Token] ops)
 | 
| 391 |     # ops are &&  ||
 | 
| 392 |   | AndOr(List[command] children, List[Token] ops)
 | 
| 393 | 
 | 
| 394 |     # Part of for, while, until (but not if, case, ShFunction).  No redirects.
 | 
| 395 |   | DoGroup(Token left, List[command] children, Token right)
 | 
| 396 |     # A brace group is a compound command, with redirects.
 | 
| 397 |   | BraceGroup %BraceGroup
 | 
| 398 |     # Contains a single child, like CommandSub
 | 
| 399 |   | Subshell(Token left, command child, Token right)
 | 
| 400 |   | DParen(Token left, arith_expr child, Token right)
 | 
| 401 |   | DBracket(Token left, bool_expr expr, Token right)
 | 
| 402 | 
 | 
| 403 |     # up to 3 iterations variables
 | 
| 404 |   | ForEach(Token keyword, List[str] iter_names, for_iter iterable,
 | 
| 405 |             Token? semi_tok, command body)
 | 
| 406 |     # C-style for loop.  Any of the 3 expressions can be omitted.
 | 
| 407 |     # Note: body is required, but only optional here because of initialization
 | 
| 408 |     # order.
 | 
| 409 |   | ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
 | 
| 410 |             arith_expr? update, command? body)
 | 
| 411 |   | WhileUntil(Token keyword, condition cond, command body)
 | 
| 412 | 
 | 
| 413 |   | If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
 | 
| 414 |        Token? fi_kw)
 | 
| 415 |   | Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
 | 
| 416 |          Token arms_end)
 | 
| 417 | 
 | 
| 418 |     # The keyword is optional in the case of bash-style functions
 | 
| 419 |     # (ie. "foo() { ... }") which do not have one.
 | 
| 420 |   | ShFunction(Token? keyword, Token name_tok, str name, command body)
 | 
| 421 | 
 | 
| 422 |   | TimeBlock(Token keyword, command pipeline)
 | 
| 423 |     # Some nodes optimize it out as List[command], but we use CommandList for
 | 
| 424 |     # 1. the top level
 | 
| 425 |     # 2. ls ; ls & ls  (same line)
 | 
| 426 |     # 3. CommandSub  # single child that's a CommandList
 | 
| 427 |     # 4. Subshell  # single child that's a CommandList
 | 
| 428 |   | CommandList(List[command] children)
 | 
| 429 | 
 | 
| 430 |     # YSH command constructs
 | 
| 431 | 
 | 
| 432 |     # var, const.
 | 
| 433 |     # - Keyword is None for hay blocks
 | 
| 434 |     # - RHS is None, for use with value.Place
 | 
| 435 |     # - TODO: consider using BareDecl
 | 
| 436 |   | VarDecl(Token? keyword, List[NameType] lhs, expr? rhs)
 | 
| 437 | 
 | 
| 438 |     # this can behave like 'var', can be desugared
 | 
| 439 |   | BareDecl(Token lhs, expr rhs)
 | 
| 440 | 
 | 
| 441 |     # setvar, maybe 'auto' later
 | 
| 442 |   | Mutation(Token keyword, List[y_lhs] lhs, Token op, expr rhs)
 | 
| 443 |     # = keyword
 | 
| 444 |   | Expr(Token keyword, expr e)
 | 
| 445 |   | Proc %Proc
 | 
| 446 |   | Func %Func
 | 
| 447 |   | Retval(Token keyword, expr val)
 | 
| 448 | 
 | 
| 449 |   #
 | 
| 450 |   # Glob representation, for converting ${x//} to extended regexes.
 | 
| 451 |   #
 | 
| 452 | 
 | 
| 453 |   # Example: *.[ch] is:
 | 
| 454 |   #   GlobOp(<Glob_Star '*'>),
 | 
| 455 |   #   GlobLit(Glob_OtherLiteral, '.'),
 | 
| 456 |   #   CharClass(False, ['ch'])  # from Glob_CleanLiterals token
 | 
| 457 | 
 | 
| 458 |   glob_part =
 | 
| 459 |     Literal(id id, str s)
 | 
| 460 |   | Operator(id op_id)  # * or ?
 | 
| 461 |   | CharClass(bool negated, List[str] strs)
 | 
| 462 | 
 | 
| 463 |   # Char classes are opaque for now.  If we ever need them:
 | 
| 464 |   # - Collating symbols are [. .]
 | 
| 465 |   # - Equivalence classes are [=
 | 
| 466 | 
 | 
| 467 |   printf_part =
 | 
| 468 |     Literal %Token
 | 
| 469 |     # flags are 0 hyphen space + #
 | 
| 470 |     # type is 's' for %s, etc.
 | 
| 471 |   | Percent(List[Token] flags, Token? width, Token? precision, Token type)
 | 
| 472 | 
 | 
| 473 |   #
 | 
| 474 |   # YSH Language
 | 
| 475 |   #
 | 
| 476 |   # Copied and modified from Python-3.7/Parser/Python.asdl !
 | 
| 477 | 
 | 
| 478 |   expr_context = Load | Store | Del | AugLoad | AugStore | Param
 | 
| 479 | 
 | 
| 480 |   # Type expressions:   Int   List[Int]   Dict[Str, Any]
 | 
| 481 |   # Do we have Func[Int, Int => Int] ?  I guess we can parse that into this
 | 
| 482 |   # system.
 | 
| 483 |   TypeExpr = (Token tok, str name, List[TypeExpr] params)
 | 
| 484 | 
 | 
| 485 |   # LHS bindings in var/const, and eggex
 | 
| 486 |   NameType = (Token left, str name, TypeExpr? typ)
 | 
| 487 | 
 | 
| 488 |   # TODO: Inline this into GenExp and ListComp?  Just use a flag there?
 | 
| 489 |   Comprehension = (List[NameType] lhs, expr iter, expr? cond)
 | 
| 490 | 
 | 
| 491 |   # Named arguments supplied to call.  Token is null for f(; ...named).
 | 
| 492 |   NamedArg = (Token? name, expr value)
 | 
| 493 | 
 | 
| 494 |   # Subscripts are lists of expressions
 | 
| 495 |   #   a[:i, n]      (we don't have matrices, but we have data frames)
 | 
| 496 |   Subscript = (Token left, expr obj, expr index)
 | 
| 497 | 
 | 
| 498 |   # Attributes are obj.attr, d->key, name::scope,
 | 
| 499 |   Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
 | 
| 500 | 
 | 
| 501 |   y_lhs = 
 | 
| 502 |     Var %Token  # Id.Expr_Name
 | 
| 503 |   | Subscript %Subscript
 | 
| 504 |   | Attribute %Attribute
 | 
| 505 | 
 | 
| 506 |   place_op = 
 | 
| 507 |     # &a[i+1]
 | 
| 508 |     Subscript(Token op, expr index)
 | 
| 509 |     # &d.mykey
 | 
| 510 |   | Attribute(Token op, Token attr)
 | 
| 511 | 
 | 
| 512 |   expr =
 | 
| 513 |     Var(Token left, str name)  # a variable name to evaluate
 | 
| 514 |     # Constants are typically Null, Bool, Int, Float
 | 
| 515 |     #           and also Str for key in {key: 42}
 | 
| 516 |     # But string literals are SingleQuoted or DoubleQuoted
 | 
| 517 |     # Python uses Num(object n), which doesn't respect our "LST" invariant.
 | 
| 518 |   | Const(Token c, value val)
 | 
| 519 | 
 | 
| 520 |     # read(&x)  json read (&x[0])
 | 
| 521 |   | Place(Token blame_tok, str var_name, place_op* ops)
 | 
| 522 | 
 | 
| 523 |     # :| one 'two' "$three" |
 | 
| 524 |   | ShArrayLiteral %ShArrayLiteral
 | 
| 525 | 
 | 
| 526 |     # / d+ ; ignorecase; %python /
 | 
| 527 |   | Eggex %Eggex
 | 
| 528 | 
 | 
| 529 |     # $name is not an expr, but $? is, e.g. Id.VSub_QMark
 | 
| 530 |   | SimpleVarSub %SimpleVarSub
 | 
| 531 |   | BracedVarSub %BracedVarSub
 | 
| 532 |   | CommandSub %CommandSub
 | 
| 533 |   | SingleQuoted %SingleQuoted
 | 
| 534 |   | DoubleQuoted %DoubleQuoted
 | 
| 535 | 
 | 
| 536 |   | Literal(expr inner)
 | 
| 537 |   | Lambda(List[NameType] params, expr body)
 | 
| 538 | 
 | 
| 539 |   | Unary(Token op, expr child)
 | 
| 540 |   | Binary(Token op, expr left, expr right)
 | 
| 541 |     # x < 4 < 3 and (x < 4) < 3
 | 
| 542 |   | Compare(expr left, List[Token] ops, List[expr] comparators)
 | 
| 543 |   | FuncCall(expr func, ArgList args)
 | 
| 544 | 
 | 
| 545 |     # TODO: Need a representation for method call.  We don't just want
 | 
| 546 |     # Attribute() and then Call()
 | 
| 547 | 
 | 
| 548 |   | IfExp(expr test, expr body, expr orelse)
 | 
| 549 |   | Tuple(Token left, List[expr] elts, expr_context ctx)
 | 
| 550 | 
 | 
| 551 |   | List(Token left, List[expr] elts, expr_context ctx)
 | 
| 552 |   | Dict(Token left, List[expr] keys, List[expr] values)
 | 
| 553 |     # For the values in {n1, n2}
 | 
| 554 |   | Implicit
 | 
| 555 | 
 | 
| 556 |   | ListComp(Token left, expr elt, List[Comprehension] generators)
 | 
| 557 |     # not implemented
 | 
| 558 |   | DictComp(Token left, expr key, expr value, List[Comprehension] generators)
 | 
| 559 |   | GeneratorExp(expr elt, List[Comprehension] generators)
 | 
| 560 | 
 | 
| 561 |     # Ranges are written 1:2, with first class expression syntax. There is no
 | 
| 562 |     # step as in Python. Use range(0, 10, step=2) for that.
 | 
| 563 |   | Range(expr lower, Token op, expr upper)
 | 
| 564 | 
 | 
| 565 |     # Slices occur within [] only.  Unlike ranges, the start/end can be #
 | 
| 566 |     # implicit.  Like ranges, denote a step with slice(0, 10, step=2).
 | 
| 567 |     #   a[3:]   a[:i]
 | 
| 568 |   | Slice(expr? lower, Token op, expr? upper)
 | 
| 569 | 
 | 
| 570 |   | Subscript %Subscript
 | 
| 571 |   | Attribute %Attribute
 | 
| 572 | 
 | 
| 573 |     # Ellipsis is like 'Starred' within Python, which are valid on the LHS in
 | 
| 574 |     # Python for unpacking, and # within list literals for splicing.
 | 
| 575 |     # (Starred is NOT used for {k:v, **a}.  That used a blank "keys"
 | 
| 576 |     # attribute.)
 | 
| 577 | 
 | 
| 578 |     # I think we can use { **pairs } like Python
 | 
| 579 |   | Spread(Token left, expr child)
 | 
| 580 | 
 | 
| 581 |   #
 | 
| 582 |   # Regex Language (Eggex)
 | 
| 583 |   #
 | 
| 584 | 
 | 
| 585 |   # e.g. alnum digit
 | 
| 586 |   PosixClass = (Token? negated, str name)
 | 
| 587 |   # e.g. d w s
 | 
| 588 |   PerlClass = (Token? negated, str name)
 | 
| 589 | 
 | 
| 590 |   # Char Sets and Ranges both use Char Codes
 | 
| 591 |   # with u_braced == true : \u{ff}
 | 
| 592 |   # with u_braced == false: \xff \\ 'a' a '0' 0
 | 
| 593 |   # ERE doesn't make a distinction, but compiling to Python/PCRE can use it
 | 
| 594 |   CharCode = (Token blame_tok, int i, bool u_braced)
 | 
| 595 |   CharRange = (CharCode start, CharCode end)
 | 
| 596 | 
 | 
| 597 |   # Note: .NET has && in character classes, making it a recursive language
 | 
| 598 | 
 | 
| 599 |   class_literal_term = 
 | 
| 600 |     PosixClass %PosixClass
 | 
| 601 |   | PerlClass %PerlClass
 | 
| 602 |   | CharRange %CharRange
 | 
| 603 |   | CharCode %CharCode
 | 
| 604 | 
 | 
| 605 |   | SingleQuoted %SingleQuoted
 | 
| 606 |     # @chars
 | 
| 607 |   | Splice(Token name, str var_name)  # coudl be Splice %Token
 | 
| 608 | 
 | 
| 609 |   # evaluated version of class_literal_term (could be in runtime.asdl)
 | 
| 610 |   char_class_term =
 | 
| 611 |     PosixClass %PosixClass
 | 
| 612 |   | PerlClass %PerlClass
 | 
| 613 | 
 | 
| 614 |   | CharRange %CharRange
 | 
| 615 |     # For [ \x00 \\ ]
 | 
| 616 |   | CharCode %CharCode
 | 
| 617 | 
 | 
| 618 |   # NOTE: modifier is unused now, can represent L or P
 | 
| 619 |   re_repeat =
 | 
| 620 |     Op %Token  # + * ? or Expr_DecInt for x{3}
 | 
| 621 |   | Range(Token? left, str lower, str upper, Token? right)  # dot{1,2}
 | 
| 622 |   # Haven't implemented the modifier, e.g. x{+ P}
 | 
| 623 |   # | Num(Token times, id modifier)
 | 
| 624 |   # | Range(Token? lower, Token? upper, id modifier)
 | 
| 625 | 
 | 
| 626 |   re = 
 | 
| 627 |     Primitive(Token blame_tok, id id)  # . ^ $   dot %start %end
 | 
| 628 |   | PosixClass %PosixClass
 | 
| 629 |   | PerlClass %PerlClass
 | 
| 630 |     # syntax [ $x \n ]
 | 
| 631 |   | CharClassLiteral(bool negated, List[class_literal_term] terms)
 | 
| 632 |     # evaluated [ 'abc' \n ]
 | 
| 633 |   | CharClass(bool negated, List[char_class_term] terms)
 | 
| 634 | 
 | 
| 635 |     # @D
 | 
| 636 |   | Splice(Token name, str var_name)  # TODO: Splice %Token ?
 | 
| 637 | 
 | 
| 638 |   | SingleQuoted %SingleQuoted
 | 
| 639 | 
 | 
| 640 |     # Compound:
 | 
| 641 |   | Repeat(re child, re_repeat op)
 | 
| 642 |   | Seq(List[re] children)
 | 
| 643 |   | Alt(List[re] children)
 | 
| 644 | 
 | 
| 645 |   | Group(re child)
 | 
| 646 |     # convert_func is filled in on evaluation
 | 
| 647 |     # TODO: name and func_name can be expanded to strings
 | 
| 648 |   | Capture(re child, Token? name, Token? func_name)
 | 
| 649 |   | Backtracking(bool negated, Token name, re child)
 | 
| 650 | 
 | 
| 651 |     # \u{ff} is parsed as this, but SingleQuoted also evaluates to it
 | 
| 652 |   | LiteralChars(Token blame_tok, str s)
 | 
| 653 | }
 |