| 1 | # Copyright 2016 Andy Chu. All rights reserved.
 | 
| 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 | 
| 3 | # you may not use this file except in compliance with the License.
 | 
| 4 | # You may obtain a copy of the License at
 | 
| 5 | #
 | 
| 6 | #   http://www.apache.org/licenses/LICENSE-2.0
 | 
| 7 | """
 | 
| 8 | word_parse.py - Parse the shell word language.
 | 
| 9 | 
 | 
| 10 | Hairy example:
 | 
| 11 | 
 | 
| 12 |     hi$((1 + 2))"$(echo hi)"${var:-__"$(echo default)"__}
 | 
| 13 | 
 | 
| 14 | Substitutions can be nested, but which inner subs are allowed depends on the
 | 
| 15 | outer sub.  Notes:
 | 
| 16 | 
 | 
| 17 | lex_mode_e.ShCommand (_ReadUnquotedLeftParts)
 | 
| 18 |   All subs and quotes are allowed:
 | 
| 19 |   $v ${v}   $() ``   $(())   '' ""   $'' $""  <()  >()
 | 
| 20 | 
 | 
| 21 | lex_mode_e.DQ  (_ReadDoubleQuotedLeftParts)
 | 
| 22 |   Var, Command, Arith, but no quotes.
 | 
| 23 |   $v ${v}   $() ``   $(())
 | 
| 24 |   No process substitution.
 | 
| 25 | 
 | 
| 26 | lex_mode_e.Arith
 | 
| 27 |   Similar to DQ: Var, Command, and Arith sub, but no process sub.  bash doesn't
 | 
| 28 |   allow quotes, but OSH does.  We allow ALL FOUR kinds of quotes, because we
 | 
| 29 |   need those for associative array indexing.
 | 
| 30 | 
 | 
| 31 | lex_mode_e.VSub_ArgUnquoted
 | 
| 32 |   Like ShCommand, everything is allowed (even process substitutions), but we
 | 
| 33 |   stop at }, and space is SIGNIFICANT.
 | 
| 34 |   
 | 
| 35 |   Example: ${a:-  b   }
 | 
| 36 | 
 | 
| 37 |   ${X:-$v}   ${X:-${v}}  ${X:-$(echo hi)}  ${X:-`echo hi`}  ${X:-$((1+2))}
 | 
| 38 |   ${X:-'single'}  ${X:-"double"}  ${X:-$'\n'}  ${X:-<(echo hi)}
 | 
| 39 | 
 | 
| 40 | lex_mode_e.VSub_ArgDQ
 | 
| 41 |   In contrast to DQ, VS_ARG_DQ accepts nested "" and $'' and $"", e.g.
 | 
| 42 |   "${x:-"default"}".
 | 
| 43 | 
 | 
| 44 |   In contrast, VSub_ArgUnquoted respects single quotes and process
 | 
| 45 |   substitution.
 | 
| 46 | 
 | 
| 47 |   It's weird that double quotes are allowed.  Space is also significant here,
 | 
| 48 |   e.g. "${x:-a  "b"}".
 | 
| 49 | """
 | 
| 50 | 
 | 
| 51 | from _devbuild.gen import grammar_nt
 | 
| 52 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
 | 
| 53 | from _devbuild.gen.types_asdl import (lex_mode_t, lex_mode_e)
 | 
| 54 | from _devbuild.gen.syntax_asdl import (
 | 
| 55 |     BoolParamBox,
 | 
| 56 |     Token,
 | 
| 57 |     SimpleVarSub,
 | 
| 58 |     loc,
 | 
| 59 |     source,
 | 
| 60 |     DoubleQuoted,
 | 
| 61 |     SingleQuoted,
 | 
| 62 |     BracedVarSub,
 | 
| 63 |     CommandSub,
 | 
| 64 |     ShArrayLiteral,
 | 
| 65 |     AssocPair,
 | 
| 66 |     bracket_op,
 | 
| 67 |     bracket_op_t,
 | 
| 68 |     suffix_op,
 | 
| 69 |     suffix_op_t,
 | 
| 70 |     rhs_word,
 | 
| 71 |     rhs_word_e,
 | 
| 72 |     rhs_word_t,
 | 
| 73 |     word_e,
 | 
| 74 |     word_t,
 | 
| 75 |     CompoundWord,
 | 
| 76 |     word_part,
 | 
| 77 |     word_part_t,
 | 
| 78 |     y_lhs_e,
 | 
| 79 |     arith_expr_t,
 | 
| 80 |     command,
 | 
| 81 |     expr,
 | 
| 82 |     expr_e,
 | 
| 83 |     expr_t,
 | 
| 84 |     pat_t,
 | 
| 85 |     ArgList,
 | 
| 86 |     Proc,
 | 
| 87 |     Func,
 | 
| 88 |     Subscript,
 | 
| 89 |     Attribute,
 | 
| 90 |     arith_expr,
 | 
| 91 | )
 | 
| 92 | from core import alloc
 | 
| 93 | from core.error import p_die
 | 
| 94 | from mycpp.mylib import log
 | 
| 95 | from core import pyutil
 | 
| 96 | from core import ui
 | 
| 97 | from frontend import consts
 | 
| 98 | from frontend import lexer
 | 
| 99 | from frontend import reader
 | 
| 100 | from osh import tdop
 | 
| 101 | from osh import arith_parse
 | 
| 102 | from osh import braces
 | 
| 103 | from osh import word_
 | 
| 104 | from osh import word_compile
 | 
| 105 | from mycpp.mylib import tagswitch
 | 
| 106 | 
 | 
| 107 | from typing import List, Optional, Tuple, cast
 | 
| 108 | from typing import TYPE_CHECKING
 | 
| 109 | if TYPE_CHECKING:
 | 
| 110 |     from frontend.lexer import Lexer
 | 
| 111 |     from frontend.parse_lib import ParseContext
 | 
| 112 |     from frontend.reader import _Reader
 | 
| 113 |     from osh.cmd_parse import VarChecker
 | 
| 114 | 
 | 
| 115 | unused1 = log
 | 
| 116 | unused2 = Id_str
 | 
| 117 | 
 | 
| 118 | KINDS_THAT_END_WORDS = [Kind.Eof, Kind.WS, Kind.Op, Kind.Right]
 | 
| 119 | 
 | 
| 120 | 
 | 
| 121 | class WordEmitter(object):
 | 
| 122 |     """Common interface for [ and [["""
 | 
| 123 | 
 | 
| 124 |     def __init__(self):
 | 
| 125 |         # type: () -> None
 | 
| 126 |         """Empty constructor for mycpp."""
 | 
| 127 |         pass
 | 
| 128 | 
 | 
| 129 |     def ReadWord(self, lex_mode):
 | 
| 130 |         # type: (lex_mode_t) -> word_t
 | 
| 131 |         raise NotImplementedError()
 | 
| 132 | 
 | 
| 133 | 
 | 
| 134 | class WordParser(WordEmitter):
 | 
| 135 | 
 | 
| 136 |     def __init__(self, parse_ctx, lexer, line_reader):
 | 
| 137 |         # type: (ParseContext, Lexer, _Reader) -> None
 | 
| 138 |         self.parse_ctx = parse_ctx
 | 
| 139 |         self.lexer = lexer
 | 
| 140 |         self.line_reader = line_reader
 | 
| 141 |         self.arena = line_reader.arena
 | 
| 142 | 
 | 
| 143 |         self.parse_opts = parse_ctx.parse_opts
 | 
| 144 |         self.a_parser = tdop.TdopParser(arith_parse.Spec(), self,
 | 
| 145 |                                         self.parse_opts)
 | 
| 146 |         self.Reset()
 | 
| 147 | 
 | 
| 148 |     def Init(self, lex_mode):
 | 
| 149 |         # type: (lex_mode_t) -> None
 | 
| 150 |         """Used to parse arithmetic, see ParseContext."""
 | 
| 151 |         self.next_lex_mode = lex_mode
 | 
| 152 | 
 | 
| 153 |     def Reset(self):
 | 
| 154 |         # type: () -> None
 | 
| 155 |         """Called by interactive loop."""
 | 
| 156 |         # For _GetToken()
 | 
| 157 |         self.cur_token = None  # type: Token
 | 
| 158 |         self.token_kind = Kind.Undefined
 | 
| 159 |         self.token_type = Id.Undefined_Tok
 | 
| 160 | 
 | 
| 161 |         self.next_lex_mode = lex_mode_e.ShCommand
 | 
| 162 | 
 | 
| 163 |         # Boolean mutated by CommandParser via word_.ctx_EmitDocToken.  For ### doc
 | 
| 164 |         # comments
 | 
| 165 |         self.emit_doc_token = False
 | 
| 166 |         # Boolean mutated by CommandParser via word_.ctx_Multiline.  '...' starts
 | 
| 167 |         # multiline mode.
 | 
| 168 |         self.multiline = False
 | 
| 169 | 
 | 
| 170 |         # For detecting invalid \n\n in multiline mode.  Counts what we got
 | 
| 171 |         # directly from the lexer.
 | 
| 172 |         self.newline_state = 0
 | 
| 173 |         # For consolidating \n\n -> \n for the CALLER.  This simplifies the parsers
 | 
| 174 |         # that consume words.
 | 
| 175 |         self.returned_newline = False
 | 
| 176 | 
 | 
| 177 |         # For integration with pgen2
 | 
| 178 |         self.buffered_word = None  # type: word_t
 | 
| 179 | 
 | 
| 180 |     def _GetToken(self):
 | 
| 181 |         # type: () -> None
 | 
| 182 |         """Call this when you need to make a decision based on any of:
 | 
| 183 | 
 | 
| 184 |         self.token_type
 | 
| 185 |         self.token_kind
 | 
| 186 |         self.cur_token
 | 
| 187 |         """
 | 
| 188 |         if self.next_lex_mode == lex_mode_e.Undefined:
 | 
| 189 |             return  # _SetNext() not called, so do nothing
 | 
| 190 | 
 | 
| 191 |         is_fake = self.next_lex_mode == lex_mode_e.BashRegexFakeInner
 | 
| 192 |         real_mode = (lex_mode_e.BashRegex if is_fake else self.next_lex_mode)
 | 
| 193 | 
 | 
| 194 |         self.cur_token = self.lexer.Read(real_mode)
 | 
| 195 | 
 | 
| 196 |         # MUTATE TOKEN for fake lexer mode.
 | 
| 197 |         # This is for crazy stuff bash allows, like [[ s =~ (< >) ]]
 | 
| 198 |         if (is_fake and self.cur_token.id
 | 
| 199 |                 in (Id.WS_Space, Id.BashRegex_AllowedInParens)):
 | 
| 200 |             self.cur_token.id = Id.Lit_Chars
 | 
| 201 | 
 | 
| 202 |         self.token_type = self.cur_token.id
 | 
| 203 |         self.token_kind = consts.GetKind(self.token_type)
 | 
| 204 | 
 | 
| 205 |         # number of consecutive newlines, ignoring whitespace
 | 
| 206 |         if self.token_type == Id.Op_Newline:
 | 
| 207 |             self.newline_state += 1
 | 
| 208 |         elif self.token_kind != Kind.WS:
 | 
| 209 |             self.newline_state = 0
 | 
| 210 | 
 | 
| 211 |         self.parse_ctx.trail.AppendToken(self.cur_token)  # For completion
 | 
| 212 |         self.next_lex_mode = lex_mode_e.Undefined
 | 
| 213 | 
 | 
| 214 |     def _SetNext(self, lex_mode):
 | 
| 215 |         # type: (lex_mode_t) -> None
 | 
| 216 |         """Set the next lex state, but don't actually read a token.
 | 
| 217 | 
 | 
| 218 |         We need this for proper interactive parsing.
 | 
| 219 |         """
 | 
| 220 |         self.next_lex_mode = lex_mode
 | 
| 221 | 
 | 
| 222 |     def _ReadVarOpArg(self, arg_lex_mode):
 | 
| 223 |         # type: (lex_mode_t) -> rhs_word_t
 | 
| 224 | 
 | 
| 225 |         # NOTE: Operators like | and < are not treated as special, so ${a:- | >} is
 | 
| 226 |         # valid, even when unquoted.
 | 
| 227 |         self._SetNext(arg_lex_mode)
 | 
| 228 |         self._GetToken()
 | 
| 229 | 
 | 
| 230 |         w = self._ReadVarOpArg2(arg_lex_mode, Id.Undefined_Tok,
 | 
| 231 |                                 True)  # empty_ok
 | 
| 232 | 
 | 
| 233 |         # If the Compound has no parts, and we're in a double-quoted VarSub
 | 
| 234 |         # arg, and empty_ok, then return Empty.  This is so it can evaluate to
 | 
| 235 |         # the empty string and not get elided.
 | 
| 236 |         #
 | 
| 237 |         # Examples:
 | 
| 238 |         # - "${s:-}", "${s/%pat/}"
 | 
| 239 |         # It's similar to LooksLikeShAssignment where we turn x= into x=''.  And it
 | 
| 240 |         # has the same potential problem of not having Token location info.
 | 
| 241 |         #
 | 
| 242 |         # NOTE: empty_ok is False only for the PatSub pattern, which means we'll
 | 
| 243 |         # return a Compound with no parts, which is explicitly checked with a
 | 
| 244 |         # custom error message.
 | 
| 245 |         if len(w.parts) == 0 and arg_lex_mode == lex_mode_e.VSub_ArgDQ:
 | 
| 246 |             return rhs_word.Empty
 | 
| 247 | 
 | 
| 248 |         return w
 | 
| 249 | 
 | 
| 250 |     def _ReadVarOpArg2(self, arg_lex_mode, eof_type, empty_ok):
 | 
| 251 |         # type: (lex_mode_t, Id_t, bool) -> CompoundWord
 | 
| 252 |         """Return a CompoundWord.
 | 
| 253 | 
 | 
| 254 |         Helper function for _ReadVarOpArg and used directly by
 | 
| 255 |         _ReadPatSubVarOp.
 | 
| 256 |         """
 | 
| 257 |         w = self._ReadCompoundWord3(arg_lex_mode, eof_type, empty_ok)
 | 
| 258 |         #log('w %s', w)
 | 
| 259 |         tilde = word_.TildeDetect(w)
 | 
| 260 |         if tilde:
 | 
| 261 |             w = tilde
 | 
| 262 |         return w
 | 
| 263 | 
 | 
| 264 |     def _ReadSliceVarOp(self):
 | 
| 265 |         # type: () -> suffix_op.Slice
 | 
| 266 |         """
 | 
| 267 |         Looking token after first ':'
 | 
| 268 | 
 | 
| 269 |         ArithExpr? (':' ArithExpr? )? '}'
 | 
| 270 |         """
 | 
| 271 |         self._NextNonSpace()
 | 
| 272 | 
 | 
| 273 |         cur_id = self.token_type
 | 
| 274 | 
 | 
| 275 |         if cur_id in (Id.Arith_RBrace, Id.Arith_Colon):  #  ${a:} or ${a::}
 | 
| 276 |             begin = arith_expr.EmptyZero  # type: arith_expr_t
 | 
| 277 |         else:
 | 
| 278 |             begin = self.a_parser.Parse()
 | 
| 279 |             cur_id = self.a_parser.CurrentId()  # advance
 | 
| 280 | 
 | 
| 281 |         if cur_id == Id.Arith_RBrace:  #  ${a:1} or ${@:1}
 | 
| 282 |             # No length specified, so it's N
 | 
| 283 |             no_length = None  # type: Optional[arith_expr_t]
 | 
| 284 |             return suffix_op.Slice(begin, no_length)
 | 
| 285 | 
 | 
| 286 |         elif cur_id == Id.Arith_Colon:  # ${a:1:} or ${@:1:}
 | 
| 287 |             colon_tok = self.cur_token
 | 
| 288 |             self._NextNonSpace()
 | 
| 289 | 
 | 
| 290 |             if self.token_type == Id.Arith_RBrace:
 | 
| 291 |                 # quirky bash behavior:
 | 
| 292 |                 # ${a:1:} or ${a::} means length ZERO
 | 
| 293 |                 # but ${a:1} or ${a:} means length N
 | 
| 294 |                 if self.parse_opts.strict_parse_slice():
 | 
| 295 |                     p_die(
 | 
| 296 |                         "Slice length: Add explicit zero, or omit : for N (strict_parse_slice)",
 | 
| 297 |                         colon_tok)
 | 
| 298 | 
 | 
| 299 |                 length = arith_expr.EmptyZero  # type: arith_expr_t
 | 
| 300 |             else:
 | 
| 301 |                 length = self._ReadArithExpr(Id.Arith_RBrace)
 | 
| 302 | 
 | 
| 303 |             return suffix_op.Slice(begin, length)
 | 
| 304 | 
 | 
| 305 |         else:
 | 
| 306 |             p_die("Expected : or } in slice", self.cur_token)
 | 
| 307 | 
 | 
| 308 |         raise AssertionError()  # for MyPy
 | 
| 309 | 
 | 
| 310 |     def _ReadPatSubVarOp(self):
 | 
| 311 |         # type: () -> suffix_op.PatSub
 | 
| 312 |         """Looking at the first '/' after VarOf:
 | 
| 313 | 
 | 
| 314 |         VarSub    = ...
 | 
| 315 |                   | VarOf '/' Match ( '/' WORD? )?
 | 
| 316 |         Match     = '/' WORD   # can't be empty
 | 
| 317 |                   | '#' WORD?  # may be empty
 | 
| 318 |                   | '%' WORD?
 | 
| 319 |         """
 | 
| 320 |         slash_tok = self.cur_token  # location info
 | 
| 321 |         replace_mode = Id.Undefined_Tok  # bizarre syntax / # %
 | 
| 322 | 
 | 
| 323 |         self._SetNext(lex_mode_e.VSub_ArgUnquoted)  # advance past /
 | 
| 324 | 
 | 
| 325 |         self._GetToken()
 | 
| 326 |         if self.token_type == Id.Right_DollarBrace:
 | 
| 327 |             pat = CompoundWord([])
 | 
| 328 |             return suffix_op.PatSub(pat, rhs_word.Empty, replace_mode,
 | 
| 329 |                                     slash_tok)
 | 
| 330 | 
 | 
| 331 |         if self.token_type in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent):
 | 
| 332 |             replace_mode = self.token_type
 | 
| 333 |             self._SetNext(lex_mode_e.VSub_ArgUnquoted)
 | 
| 334 | 
 | 
| 335 |         # Bash quirk:
 | 
| 336 |         # echo ${x/#/replace} has an empty pattern
 | 
| 337 |         # echo ${x////replace} is non-empty; it means echo ${x//'/'/replace}
 | 
| 338 |         empty_ok = replace_mode != Id.Lit_Slash
 | 
| 339 |         pat = self._ReadVarOpArg2(lex_mode_e.VSub_ArgUnquoted, Id.Lit_Slash,
 | 
| 340 |                                   empty_ok)
 | 
| 341 |         #log('pat 1 %r', pat)
 | 
| 342 | 
 | 
| 343 |         if self.token_type == Id.Lit_Slash:
 | 
| 344 |             # read until }
 | 
| 345 |             replace = self._ReadVarOpArg(
 | 
| 346 |                 lex_mode_e.VSub_ArgUnquoted)  # type: rhs_word_t
 | 
| 347 |             #log('r 1 %r', replace)
 | 
| 348 |         else:
 | 
| 349 |             # e.g. ${v/a} is the same as ${v/a/}  -- empty replacement string
 | 
| 350 |             replace = rhs_word.Empty
 | 
| 351 | 
 | 
| 352 |         self._GetToken()
 | 
| 353 |         if self.token_type != Id.Right_DollarBrace:
 | 
| 354 |             # This happens on invalid code
 | 
| 355 |             p_die(
 | 
| 356 |                 "Expected } after replacement string, got %s" %
 | 
| 357 |                 ui.PrettyId(self.token_type), self.cur_token)
 | 
| 358 | 
 | 
| 359 |         return suffix_op.PatSub(pat, replace, replace_mode, slash_tok)
 | 
| 360 | 
 | 
| 361 |     def _ReadSubscript(self):
 | 
| 362 |         # type: () -> bracket_op_t
 | 
| 363 |         """ Subscript = '[' ('@' | '*' | ArithExpr) ']' """
 | 
| 364 |         # Lookahead to see if we get @ or *.  Otherwise read a full arithmetic
 | 
| 365 |         # expression.
 | 
| 366 |         next_id = self.lexer.LookPastSpace(lex_mode_e.Arith)
 | 
| 367 |         if next_id in (Id.Lit_At, Id.Arith_Star):
 | 
| 368 |             op = bracket_op.WholeArray(next_id)  # type: bracket_op_t
 | 
| 369 | 
 | 
| 370 |             self._SetNext(lex_mode_e.Arith)  # skip past [
 | 
| 371 |             self._GetToken()
 | 
| 372 |             self._SetNext(lex_mode_e.Arith)  # skip past @
 | 
| 373 |             self._GetToken()
 | 
| 374 |         else:
 | 
| 375 |             self._SetNext(lex_mode_e.Arith)  # skip past [
 | 
| 376 |             anode = self._ReadArithExpr(Id.Arith_RBracket)
 | 
| 377 |             op = bracket_op.ArrayIndex(anode)
 | 
| 378 | 
 | 
| 379 |         if self.token_type != Id.Arith_RBracket:  # Should be looking at ]
 | 
| 380 |             p_die('Expected ] to close subscript', self.cur_token)
 | 
| 381 | 
 | 
| 382 |         self._SetNext(lex_mode_e.VSub_2)  # skip past ]
 | 
| 383 |         self._GetToken()  # Needed to be in the same spot as no subscript
 | 
| 384 | 
 | 
| 385 |         return op
 | 
| 386 | 
 | 
| 387 |     def _ParseVarOf(self):
 | 
| 388 |         # type: () -> BracedVarSub
 | 
| 389 |         """
 | 
| 390 |         VarOf     = NAME Subscript?
 | 
| 391 |                   | NUMBER      # no subscript allowed, none of these are arrays
 | 
| 392 |                                 # ${@[1]} doesn't work, even though slicing does
 | 
| 393 |                   | VarSymbol
 | 
| 394 |         """
 | 
| 395 |         self._GetToken()
 | 
| 396 |         name_token = self.cur_token
 | 
| 397 |         self._SetNext(lex_mode_e.VSub_2)
 | 
| 398 | 
 | 
| 399 |         self._GetToken()  # Check for []
 | 
| 400 |         if self.token_type == Id.VOp2_LBracket:
 | 
| 401 |             bracket_op = self._ReadSubscript()
 | 
| 402 |         else:
 | 
| 403 |             bracket_op = None
 | 
| 404 | 
 | 
| 405 |         part = BracedVarSub.CreateNull()
 | 
| 406 |         part.token = name_token
 | 
| 407 |         part.var_name = lexer.TokenVal(name_token)
 | 
| 408 |         part.bracket_op = bracket_op
 | 
| 409 |         return part
 | 
| 410 | 
 | 
| 411 |     def _ParseVarExpr(self, arg_lex_mode, allow_query=False):
 | 
| 412 |         # type: (lex_mode_t, bool) -> BracedVarSub
 | 
| 413 |         """Start parsing at the op -- we already skipped past the name."""
 | 
| 414 |         part = self._ParseVarOf()
 | 
| 415 | 
 | 
| 416 |         self._GetToken()
 | 
| 417 |         if self.token_type == Id.Right_DollarBrace:
 | 
| 418 |             return part  # no ops
 | 
| 419 | 
 | 
| 420 |         op_kind = self.token_kind
 | 
| 421 | 
 | 
| 422 |         if op_kind == Kind.VTest:
 | 
| 423 |             tok = self.cur_token
 | 
| 424 |             arg_word = self._ReadVarOpArg(arg_lex_mode)
 | 
| 425 |             if self.token_type != Id.Right_DollarBrace:
 | 
| 426 |                 p_die('Expected } to close ${', self.cur_token)
 | 
| 427 | 
 | 
| 428 |             part.suffix_op = suffix_op.Unary(tok, arg_word)
 | 
| 429 | 
 | 
| 430 |         elif op_kind == Kind.VOpYsh:
 | 
| 431 |             tok = self.cur_token
 | 
| 432 |             arg_word = self._ReadVarOpArg(arg_lex_mode)
 | 
| 433 |             if self.token_type != Id.Right_DollarBrace:
 | 
| 434 |                 p_die('Expected } to close ${', self.cur_token)
 | 
| 435 | 
 | 
| 436 |             UP_arg_word = arg_word
 | 
| 437 |             with tagswitch(arg_word) as case:
 | 
| 438 |                 if case(rhs_word_e.Empty):
 | 
| 439 |                     pass
 | 
| 440 |                 elif case(rhs_word_e.Compound):
 | 
| 441 |                     arg_word = cast(CompoundWord, UP_arg_word)
 | 
| 442 |                     # This handles ${x|html} and ${x %.3f} now
 | 
| 443 |                     # However I think ${x %.3f} should be statically parsed?  It can enter
 | 
| 444 |                     # the printf lexer modes.
 | 
| 445 |                     ok, arg, quoted = word_.StaticEval(arg_word)
 | 
| 446 |                     if not ok or quoted:
 | 
| 447 |                         p_die('Expected a constant argument',
 | 
| 448 |                               loc.Word(arg_word))
 | 
| 449 | 
 | 
| 450 |             part.suffix_op = suffix_op.Static(tok, arg)
 | 
| 451 | 
 | 
| 452 |         elif op_kind == Kind.VOp0:
 | 
| 453 |             part.suffix_op = self.cur_token  # Nullary
 | 
| 454 |             self._SetNext(lex_mode_e.VSub_2)  # Expecting }
 | 
| 455 |             self._GetToken()
 | 
| 456 | 
 | 
| 457 |         elif op_kind == Kind.VOp1:  # % %% # ## etc.
 | 
| 458 |             tok = self.cur_token
 | 
| 459 |             # Weird exception that all shells have: these operators take a glob
 | 
| 460 |             # pattern, so they're lexed as VSub_ArgUnquoted, not VSub_ArgDQ
 | 
| 461 |             arg_word = self._ReadVarOpArg(lex_mode_e.VSub_ArgUnquoted)
 | 
| 462 |             if self.token_type != Id.Right_DollarBrace:
 | 
| 463 |                 p_die('Expected } to close ${', self.cur_token)
 | 
| 464 | 
 | 
| 465 |             part.suffix_op = suffix_op.Unary(tok, arg_word)
 | 
| 466 | 
 | 
| 467 |         elif op_kind == Kind.VOp2:  # / : [ ]
 | 
| 468 |             if self.token_type == Id.VOp2_Slash:
 | 
| 469 |                 patsub_op = self._ReadPatSubVarOp()  # type: suffix_op_t
 | 
| 470 |                 part.suffix_op = patsub_op
 | 
| 471 | 
 | 
| 472 |                 # Checked by the method above
 | 
| 473 |                 assert self.token_type == Id.Right_DollarBrace, self.cur_token
 | 
| 474 | 
 | 
| 475 |             elif self.token_type == Id.VOp2_Colon:
 | 
| 476 |                 part.suffix_op = self._ReadSliceVarOp()
 | 
| 477 |                 # NOTE: } in arithmetic mode.
 | 
| 478 |                 if self.token_type != Id.Arith_RBrace:
 | 
| 479 |                     # Token seems off; doesn't point to X in # ${a:1:2 X
 | 
| 480 |                     p_die('Expected } to close ${', self.cur_token)
 | 
| 481 | 
 | 
| 482 |             else:
 | 
| 483 |                 # TODO: Does this ever happen?
 | 
| 484 |                 p_die('Unexpected token in ${} (%s)' % 'VOp2', self.cur_token)
 | 
| 485 | 
 | 
| 486 |         elif op_kind == Kind.VOp3:  # ${prefix@} etc.
 | 
| 487 |             if allow_query:
 | 
| 488 |                 part.suffix_op = self.cur_token  # Nullary
 | 
| 489 |                 self._SetNext(lex_mode_e.VSub_2)  # Expecting }
 | 
| 490 |                 self._GetToken()
 | 
| 491 |             else:
 | 
| 492 |                 p_die("Unexpected token in ${} (%s)" % 'VOp3', self.cur_token)
 | 
| 493 | 
 | 
| 494 |         # NOTE: Arith_RBrace is for slicing, because it reads } in arithmetic
 | 
| 495 |         # mode.  It's redundantly checked above.
 | 
| 496 |         if self.token_type not in (Id.Right_DollarBrace, Id.Arith_RBrace):
 | 
| 497 |             # ${a.} or ${!a.}
 | 
| 498 |             p_die('Expected } to close ${', self.cur_token)
 | 
| 499 | 
 | 
| 500 |         # Now look for ops
 | 
| 501 |         return part
 | 
| 502 | 
 | 
| 503 |     def _ReadZshVarSub(self, left_token):
 | 
| 504 |         # type: (Token) -> word_part.ZshVarSub
 | 
| 505 | 
 | 
| 506 |         self._SetNext(lex_mode_e.VSub_Zsh)  # Move past ${(foo)
 | 
| 507 | 
 | 
| 508 |         # Can be empty
 | 
| 509 |         w = self._ReadCompoundWord3(lex_mode_e.VSub_Zsh, Id.Right_DollarBrace,
 | 
| 510 |                                     True)
 | 
| 511 |         self._GetToken()
 | 
| 512 |         return word_part.ZshVarSub(left_token, w, self.cur_token)
 | 
| 513 | 
 | 
| 514 |     def ReadBracedVarSub(self, left_token):
 | 
| 515 |         # type: (Token) -> Tuple[BracedVarSub, Token]
 | 
| 516 |         """   For YSH expressions like var x = ${x:-"default"}.  """
 | 
| 517 |         part = self._ReadBracedVarSub(left_token, d_quoted=False)
 | 
| 518 |         last_token = self.cur_token
 | 
| 519 |         return part, last_token
 | 
| 520 | 
 | 
| 521 |     def _ReadBracedVarSub(self, left_token, d_quoted):
 | 
| 522 |         # type: (Token, bool) -> BracedVarSub
 | 
| 523 |         """For the ${} expression language.
 | 
| 524 | 
 | 
| 525 |         NAME        = [a-zA-Z_][a-zA-Z0-9_]*
 | 
| 526 |         NUMBER      = [0-9]+                    # ${10}, ${11}, ...
 | 
| 527 | 
 | 
| 528 |         Subscript   = '[' ('@' | '*' | ArithExpr) ']'
 | 
| 529 |         VarSymbol   = '!' | '@' | '#' | ...
 | 
| 530 |         VarOf       = NAME Subscript?
 | 
| 531 |                     | NUMBER      # no subscript allowed, none of these are arrays
 | 
| 532 |                                   # ${@[1]} doesn't work, even though slicing does
 | 
| 533 |                     | VarSymbol
 | 
| 534 | 
 | 
| 535 |         NULLARY_OP  = '@Q' | '@E' | '@P' | '@A' | '@a'  # VOp0
 | 
| 536 | 
 | 
| 537 |         TEST_OP     = '-' | ':-' | '=' | ':=' | '+' | ':+' | '?' | ':?'
 | 
| 538 |         STRIP_OP    = '#' | '##' | '%' | '%%'
 | 
| 539 |         CASE_OP     = ',' | ',,' | '^' | '^^'
 | 
| 540 |         UnaryOp     = TEST_OP | STRIP_OP | CASE_OP
 | 
| 541 | 
 | 
| 542 |         YSH_UNARY   = '|' | ' '                 # ${x|html} and ${x %.3f}.
 | 
| 543 |                                                 # SPACE is operator not %
 | 
| 544 |         Match       = ('/' | '#' | '%') WORD    # match all / prefix / suffix
 | 
| 545 |         VarExpr     = VarOf
 | 
| 546 |                     | VarOf NULLARY_OP
 | 
| 547 |                     | VarOf UnaryOp WORD
 | 
| 548 |                     | VarOf YSH_UNARY STATIC_WORD
 | 
| 549 |                     | VarOf ':' ArithExpr (':' ArithExpr )?
 | 
| 550 |                     | VarOf '/' Match '/' WORD
 | 
| 551 | 
 | 
| 552 |         LengthExpr  = '#' VarOf    # can't apply operators after length
 | 
| 553 | 
 | 
| 554 |         RefOrKeys   = '!' VarExpr  # CAN apply operators after a named ref
 | 
| 555 |                                    # ${!ref[0]} vs ${!keys[@]} resolved later
 | 
| 556 | 
 | 
| 557 |         PrefixQuery = '!' NAME ('*' | '@')  # list variable names with a prefix
 | 
| 558 | 
 | 
| 559 |         BuiltinSub  = '.' WORD+    # ${.myproc 'builtin' $sub}
 | 
| 560 | 
 | 
| 561 |         VarSub      = LengthExpr
 | 
| 562 |                     | RefOrKeys
 | 
| 563 |                     | PrefixQuery
 | 
| 564 |                     | VarExpr
 | 
| 565 |                     | BuiltinSub
 | 
| 566 | 
 | 
| 567 |         NOTES:
 | 
| 568 |         - Arithmetic expressions are used twice, inside subscripts ${a[x+1]} and
 | 
| 569 |           slicing ${a:x+1:y+2}
 | 
| 570 |         - ${#} and ${!} need LL(2) lookahead (considering how my tokenizer works)
 | 
| 571 |         - @ and * are technically arithmetic expressions in this implementation
 | 
| 572 |         - We don't account for bash 4.4: ${param@operator} -- Q E P A a.  Note that
 | 
| 573 |           it's also vectorized.
 | 
| 574 | 
 | 
| 575 |         Strictness over bash:
 | 
| 576 |         - echo ${a[0][0]} doesn't do anything useful, so we disallow it from the
 | 
| 577 |           grammar
 | 
| 578 |         - ! and # prefixes can't be composed, even though named refs can be
 | 
| 579 |           composed with other operators
 | 
| 580 |         - '#' means 4 different things: length prefix, VarSymbol, UnaryOp to strip
 | 
| 581 |           a prefix, and it can also be a literal part of WORD.
 | 
| 582 | 
 | 
| 583 |         From the parser's point of view, the prefix # can't be combined with
 | 
| 584 |         UnaryOp/slicing/matching, and the ! can.  However
 | 
| 585 | 
 | 
| 586 |         - ${a[@]:1:2} is not allowed
 | 
| 587 |         - ${#a[@]:1:2} is allowed, but gives the wrong answer
 | 
| 588 |         """
 | 
| 589 |         if d_quoted:
 | 
| 590 |             arg_lex_mode = lex_mode_e.VSub_ArgDQ
 | 
| 591 |         else:
 | 
| 592 |             arg_lex_mode = lex_mode_e.VSub_ArgUnquoted
 | 
| 593 | 
 | 
| 594 |         self._SetNext(lex_mode_e.VSub_1)
 | 
| 595 |         self._GetToken()
 | 
| 596 | 
 | 
| 597 |         ty = self.token_type
 | 
| 598 |         first_tok = self.cur_token
 | 
| 599 | 
 | 
| 600 |         if ty == Id.VSub_Pound:
 | 
| 601 |             # Disambiguate
 | 
| 602 |             next_id = self.lexer.LookPastSpace(lex_mode_e.VSub_1)
 | 
| 603 |             if next_id not in (Id.Unknown_Tok, Id.Right_DollarBrace):
 | 
| 604 |                 # e.g. a name, '#' is the prefix
 | 
| 605 |                 self._SetNext(lex_mode_e.VSub_1)
 | 
| 606 |                 part = self._ParseVarOf()
 | 
| 607 | 
 | 
| 608 |                 self._GetToken()
 | 
| 609 |                 if self.token_type != Id.Right_DollarBrace:
 | 
| 610 |                     p_die('Expected } after length expression', self.cur_token)
 | 
| 611 | 
 | 
| 612 |                 part.prefix_op = first_tok
 | 
| 613 | 
 | 
| 614 |             else:  # not a prefix, '#' is the variable
 | 
| 615 |                 part = self._ParseVarExpr(arg_lex_mode)
 | 
| 616 | 
 | 
| 617 |         elif ty == Id.VSub_Bang:
 | 
| 618 |             next_id = self.lexer.LookPastSpace(lex_mode_e.VSub_1)
 | 
| 619 |             if next_id not in (Id.Unknown_Tok, Id.Right_DollarBrace):
 | 
| 620 |                 # e.g. a name, '!' is the prefix
 | 
| 621 |                 # ${!a} -- this is a ref
 | 
| 622 |                 # ${!3} -- this is ref
 | 
| 623 |                 # ${!a[1]} -- this is a ref
 | 
| 624 |                 # ${!a[@]} -- this is a keys
 | 
| 625 |                 # No lookahead -- do it in a second step, or at runtime
 | 
| 626 |                 self._SetNext(lex_mode_e.VSub_1)
 | 
| 627 |                 part = self._ParseVarExpr(arg_lex_mode, allow_query=True)
 | 
| 628 | 
 | 
| 629 |                 part.prefix_op = first_tok
 | 
| 630 | 
 | 
| 631 |             else:  # not a prefix, '!' is the variable
 | 
| 632 |                 part = self._ParseVarExpr(arg_lex_mode)
 | 
| 633 | 
 | 
| 634 |         elif ty == Id.VSub_Dot:
 | 
| 635 |             # Note: this will become a new builtin_sub type, so this method must
 | 
| 636 |             # return word_part_t rather than BracedVarSub.  I don't think that
 | 
| 637 |             # should cause problems.
 | 
| 638 |             p_die('TODO: ${.myproc builtin sub}', self.cur_token)
 | 
| 639 | 
 | 
| 640 |         # VS_NAME, VS_NUMBER, symbol that isn't # or !
 | 
| 641 |         elif self.token_kind == Kind.VSub:
 | 
| 642 |             part = self._ParseVarExpr(arg_lex_mode)
 | 
| 643 | 
 | 
| 644 |         else:
 | 
| 645 |             # e.g. ${^}
 | 
| 646 |             p_die('Unexpected token in ${}', self.cur_token)
 | 
| 647 | 
 | 
| 648 |         part.left = left_token  # attach the argument
 | 
| 649 |         part.right = self.cur_token
 | 
| 650 |         return part
 | 
| 651 | 
 | 
| 652 |     def _ReadSingleQuoted(self, left_token, lex_mode):
 | 
| 653 |         # type: (Token, lex_mode_t) -> SingleQuoted
 | 
| 654 |         """Internal method to read a word_part."""
 | 
| 655 |         tokens = []  # type: List[Token]
 | 
| 656 |         # In command mode, we never disallow backslashes like '\'
 | 
| 657 |         right_quote = self.ReadSingleQuoted(lex_mode, left_token, tokens,
 | 
| 658 |                                             False)
 | 
| 659 |         sval = word_compile.EvalSingleQuoted(left_token.id, tokens)
 | 
| 660 |         node = SingleQuoted(left_token, sval, right_quote)
 | 
| 661 |         return node
 | 
| 662 | 
 | 
| 663 |     def ReadSingleQuoted(self, lex_mode, left_token, out_tokens, is_ysh_expr):
 | 
| 664 |         # type: (lex_mode_t, Token, List[Token], bool) -> Token
 | 
| 665 |         """Appends to out_tokens; returns last token
 | 
| 666 | 
 | 
| 667 |         Used by expr_parse.py
 | 
| 668 |         """
 | 
| 669 |         # TODO: Remove and use out_tokens
 | 
| 670 |         tokens = []  # type: List[Token]
 | 
| 671 | 
 | 
| 672 |         # echo '\' is allowed, but x = '\' is invalid, in favor of x = r'\'
 | 
| 673 |         no_backslashes = is_ysh_expr and left_token.id == Id.Left_SingleQuote
 | 
| 674 | 
 | 
| 675 |         expected_end_tokens = 3 if left_token.id in (
 | 
| 676 |             Id.Left_TSingleQuote, Id.Left_RTSingleQuote, Id.Left_UTSingleQuote,
 | 
| 677 |             Id.Left_BTSingleQuote) else 1
 | 
| 678 |         num_end_tokens = 0
 | 
| 679 | 
 | 
| 680 |         while num_end_tokens < expected_end_tokens:
 | 
| 681 |             self._SetNext(lex_mode)
 | 
| 682 |             self._GetToken()
 | 
| 683 | 
 | 
| 684 |             # Kind.Char emitted in lex_mode.SQ_C
 | 
| 685 |             if self.token_kind in (Kind.Lit, Kind.Char):
 | 
| 686 |                 tok = self.cur_token
 | 
| 687 |                 # Happens in lex_mode_e.SQ: 'one\two' is ambiguous, should be
 | 
| 688 |                 # r'one\two' or c'one\\two'
 | 
| 689 |                 if no_backslashes and lexer.TokenContains(tok, '\\'):
 | 
| 690 |                     p_die(
 | 
| 691 |                         r"Strings with backslashes should look like r'\n' or u'\n' or b'\n'",
 | 
| 692 |                         tok)
 | 
| 693 | 
 | 
| 694 |                 if is_ysh_expr:
 | 
| 695 |                     # Disallow var x = $'\001'.  Arguably we don't need these
 | 
| 696 |                     # checks because u'\u{1}' is the way to write it.
 | 
| 697 |                     if self.token_type == Id.Char_Octal3:
 | 
| 698 |                         p_die(
 | 
| 699 |                             r"Use \xhh or \u{...} instead of octal escapes in YSH strings",
 | 
| 700 |                             tok)
 | 
| 701 | 
 | 
| 702 |                     if self.token_type == Id.Char_Hex and self.cur_token.length != 4:
 | 
| 703 |                         # disallow \xH
 | 
| 704 |                         p_die(
 | 
| 705 |                             r'Invalid hex escape in YSH string (must be \xHH)',
 | 
| 706 |                             tok)
 | 
| 707 | 
 | 
| 708 |                 tokens.append(tok)
 | 
| 709 | 
 | 
| 710 |             elif self.token_kind == Kind.Unknown:
 | 
| 711 |                 tok = self.cur_token
 | 
| 712 |                 assert tok.id == Id.Unknown_Backslash, tok
 | 
| 713 | 
 | 
| 714 |                 # x = $'\z' is disallowed; ditto for echo $'\z' if shopt -u parse_backslash
 | 
| 715 |                 if is_ysh_expr or not self.parse_opts.parse_backslash():
 | 
| 716 |                     p_die(
 | 
| 717 |                         "Invalid char escape in C-style string literal (OILS-ERR-11)",
 | 
| 718 |                         tok)
 | 
| 719 | 
 | 
| 720 |                 tokens.append(tok)
 | 
| 721 | 
 | 
| 722 |             elif self.token_kind == Kind.Eof:
 | 
| 723 |                 p_die('Unexpected EOF in single-quoted string that began here',
 | 
| 724 |                       left_token)
 | 
| 725 | 
 | 
| 726 |             elif self.token_kind == Kind.Right:
 | 
| 727 |                 # assume Id.Right_SingleQuote
 | 
| 728 |                 num_end_tokens += 1
 | 
| 729 |                 tokens.append(self.cur_token)
 | 
| 730 | 
 | 
| 731 |             else:
 | 
| 732 |                 raise AssertionError(self.cur_token)
 | 
| 733 | 
 | 
| 734 |             if self.token_kind != Kind.Right:
 | 
| 735 |                 num_end_tokens = 0  # we need three in a ROW
 | 
| 736 | 
 | 
| 737 |         if expected_end_tokens == 1:
 | 
| 738 |             tokens.pop()
 | 
| 739 |         elif expected_end_tokens == 3:  # Get rid of spurious end tokens
 | 
| 740 |             tokens.pop()
 | 
| 741 |             tokens.pop()
 | 
| 742 |             tokens.pop()
 | 
| 743 | 
 | 
| 744 |         # Remove space from '''  r'''  $''' in both expression mode and command mode
 | 
| 745 |         if left_token.id in (Id.Left_TSingleQuote, Id.Left_RTSingleQuote,
 | 
| 746 |                              Id.Left_UTSingleQuote, Id.Left_BTSingleQuote):
 | 
| 747 |             word_compile.RemoveLeadingSpaceSQ(tokens)
 | 
| 748 | 
 | 
| 749 |         # Validation after lexing - same 2 checks in j8.LexerDecoder
 | 
| 750 |         is_u_string = left_token.id in (Id.Left_USingleQuote,
 | 
| 751 |                                         Id.Left_UTSingleQuote)
 | 
| 752 | 
 | 
| 753 |         for tok in tokens:
 | 
| 754 |             # u'\yff' is not valid, but b'\yff' is
 | 
| 755 |             if is_u_string and tok.id == Id.Char_YHex:
 | 
| 756 |                 p_die(
 | 
| 757 |                     r"%s escapes not allowed in u'' strings" %
 | 
| 758 |                     lexer.TokenVal(tok), tok)
 | 
| 759 | 
 | 
| 760 |         out_tokens.extend(tokens)
 | 
| 761 |         return self.cur_token
 | 
| 762 | 
 | 
| 763 |     def _ReadDoubleQuotedLeftParts(self):
 | 
| 764 |         # type: () -> word_part_t
 | 
| 765 |         """Read substitution parts in a double quoted context."""
 | 
| 766 |         if self.token_type in (Id.Left_DollarParen, Id.Left_Backtick):
 | 
| 767 |             return self._ReadCommandSub(self.token_type, d_quoted=True)
 | 
| 768 | 
 | 
| 769 |         if self.token_type == Id.Left_DollarBrace:
 | 
| 770 |             return self._ReadBracedVarSub(self.cur_token, d_quoted=True)
 | 
| 771 | 
 | 
| 772 |         if self.token_type == Id.Left_DollarDParen:
 | 
| 773 |             return self._ReadArithSub()
 | 
| 774 | 
 | 
| 775 |         if self.token_type == Id.Left_DollarBracket:
 | 
| 776 |             return self._ReadExprSub(lex_mode_e.DQ)
 | 
| 777 | 
 | 
| 778 |         raise AssertionError(self.cur_token)
 | 
| 779 | 
 | 
| 780 |     def _ReadYshSingleQuoted(self, left_id):
 | 
| 781 |         # type: (Id_t) -> CompoundWord
 | 
| 782 |         """Read YSH style strings
 | 
| 783 | 
 | 
| 784 |         r''        u''        b''
 | 
| 785 |         r''' '''   u''' '''   b''' '''
 | 
| 786 |         """
 | 
| 787 |         #log('BEF self.cur_token %s', self.cur_token)
 | 
| 788 |         if left_id == Id.Left_RSingleQuote:
 | 
| 789 |             lexer_mode = lex_mode_e.SQ_Raw
 | 
| 790 |             triple_left_id = Id.Left_RTSingleQuote
 | 
| 791 |         elif left_id == Id.Left_USingleQuote:
 | 
| 792 |             lexer_mode = lex_mode_e.J8_Str
 | 
| 793 |             triple_left_id = Id.Left_UTSingleQuote
 | 
| 794 |         elif left_id == Id.Left_BSingleQuote:
 | 
| 795 |             lexer_mode = lex_mode_e.J8_Str
 | 
| 796 |             triple_left_id = Id.Left_BTSingleQuote
 | 
| 797 |         else:
 | 
| 798 |             raise AssertionError(left_id)
 | 
| 799 | 
 | 
| 800 |         # Needed for syntax checks
 | 
| 801 |         left_tok = self.cur_token
 | 
| 802 |         left_tok.id = left_id
 | 
| 803 | 
 | 
| 804 |         sq_part = self._ReadSingleQuoted(left_tok, lexer_mode)
 | 
| 805 | 
 | 
| 806 |         if (len(sq_part.sval) == 0 and self.lexer.ByteLookAhead() == "'"):
 | 
| 807 |             self._SetNext(lex_mode_e.ShCommand)
 | 
| 808 |             self._GetToken()
 | 
| 809 | 
 | 
| 810 |             assert self.token_type == Id.Left_SingleQuote
 | 
| 811 |             # HACK: magically transform the third ' in u''' to
 | 
| 812 |             # Id.Left_UTSingleQuote, so that ''' is the terminator
 | 
| 813 |             left_tok = self.cur_token
 | 
| 814 |             left_tok.id = triple_left_id
 | 
| 815 | 
 | 
| 816 |             # Handles stripping leading whitespace
 | 
| 817 |             sq_part = self._ReadSingleQuoted(left_tok, lexer_mode)
 | 
| 818 | 
 | 
| 819 |         # Advance and validate
 | 
| 820 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 821 | 
 | 
| 822 |         self._GetToken()
 | 
| 823 |         if self.token_kind not in KINDS_THAT_END_WORDS:
 | 
| 824 |             p_die('Unexpected token after YSH single-quoted string',
 | 
| 825 |                   self.cur_token)
 | 
| 826 | 
 | 
| 827 |         return CompoundWord([sq_part])
 | 
| 828 | 
 | 
| 829 |     def _ReadUnquotedLeftParts(self, triple_out):
 | 
| 830 |         # type: (Optional[BoolParamBox]) -> word_part_t
 | 
| 831 |         """Read substitutions and quoted strings (for lex_mode_e.ShCommand).
 | 
| 832 | 
 | 
| 833 |         If triple_out is set, then we try parsing triple quoted strings,
 | 
| 834 |         and set its value to True if we got one.
 | 
| 835 |         """
 | 
| 836 |         if self.token_type in (Id.Left_DoubleQuote, Id.Left_DollarDoubleQuote):
 | 
| 837 |             # Note: $"" is a synonym for "".  It might make sense if it added
 | 
| 838 |             # \n \0 \x00 \u{123} etc.  But that's not what bash does!
 | 
| 839 |             dq_part = self._ReadDoubleQuoted(self.cur_token)
 | 
| 840 |             # Got empty word "" and there's a " after
 | 
| 841 |             if (triple_out and len(dq_part.parts) == 0 and
 | 
| 842 |                     self.lexer.ByteLookAhead() == '"'):
 | 
| 843 | 
 | 
| 844 |                 self._SetNext(lex_mode_e.ShCommand)
 | 
| 845 |                 self._GetToken()
 | 
| 846 |                 # HACK: magically transform the third " in """ to
 | 
| 847 |                 # Id.Left_TDoubleQuote, so that """ is the terminator
 | 
| 848 |                 left_dq_token = self.cur_token
 | 
| 849 |                 left_dq_token.id = Id.Left_TDoubleQuote
 | 
| 850 |                 triple_out.b = True  # let caller know we got it
 | 
| 851 |                 return self._ReadDoubleQuoted(left_dq_token)
 | 
| 852 | 
 | 
| 853 |             return dq_part
 | 
| 854 | 
 | 
| 855 |         if self.token_type in (Id.Left_SingleQuote, Id.Left_RSingleQuote,
 | 
| 856 |                                Id.Left_DollarSingleQuote):
 | 
| 857 |             if self.token_type == Id.Left_SingleQuote:
 | 
| 858 |                 lexer_mode = lex_mode_e.SQ_Raw
 | 
| 859 |                 triple_left_id = Id.Left_TSingleQuote
 | 
| 860 |             elif self.token_type == Id.Left_RSingleQuote:
 | 
| 861 |                 lexer_mode = lex_mode_e.SQ_Raw
 | 
| 862 |                 triple_left_id = Id.Left_RTSingleQuote
 | 
| 863 |             else:
 | 
| 864 |                 lexer_mode = lex_mode_e.SQ_C
 | 
| 865 |                 # there is no such thing as $'''
 | 
| 866 |                 triple_left_id = Id.Undefined_Tok
 | 
| 867 | 
 | 
| 868 |             sq_part = self._ReadSingleQuoted(self.cur_token, lexer_mode)
 | 
| 869 | 
 | 
| 870 |             # Got empty '' or r'' and there's a ' after
 | 
| 871 |             # u'' and b'' are handled in _ReadYshSingleQuoted
 | 
| 872 |             if (triple_left_id != Id.Undefined_Tok and
 | 
| 873 |                     triple_out is not None and len(sq_part.sval) == 0 and
 | 
| 874 |                     self.lexer.ByteLookAhead() == "'"):
 | 
| 875 | 
 | 
| 876 |                 self._SetNext(lex_mode_e.ShCommand)
 | 
| 877 |                 self._GetToken()
 | 
| 878 | 
 | 
| 879 |                 # HACK: magically transform the third ' in ''' to
 | 
| 880 |                 # Id.Left_TSingleQuote, so that ''' is the terminator
 | 
| 881 |                 left_sq_token = self.cur_token
 | 
| 882 |                 left_sq_token.id = triple_left_id
 | 
| 883 | 
 | 
| 884 |                 triple_out.b = True  # let caller know we got it
 | 
| 885 |                 return self._ReadSingleQuoted(left_sq_token, lexer_mode)
 | 
| 886 | 
 | 
| 887 |             return sq_part
 | 
| 888 | 
 | 
| 889 |         if self.token_type in (Id.Left_DollarParen, Id.Left_Backtick,
 | 
| 890 |                                Id.Left_ProcSubIn, Id.Left_ProcSubOut):
 | 
| 891 |             return self._ReadCommandSub(self.token_type, d_quoted=False)
 | 
| 892 | 
 | 
| 893 |         if self.token_type == Id.Left_DollarBrace:
 | 
| 894 |             return self._ReadBracedVarSub(self.cur_token, d_quoted=False)
 | 
| 895 | 
 | 
| 896 |         if self.token_type == Id.Left_DollarDParen:
 | 
| 897 |             return self._ReadArithSub()
 | 
| 898 | 
 | 
| 899 |         if self.token_type == Id.Left_DollarBracket:
 | 
| 900 |             return self._ReadExprSub(lex_mode_e.ShCommand)
 | 
| 901 | 
 | 
| 902 |         if self.token_type == Id.Left_DollarBraceZsh:
 | 
| 903 |             return self._ReadZshVarSub(self.cur_token)
 | 
| 904 | 
 | 
| 905 |         raise AssertionError(self.cur_token)
 | 
| 906 | 
 | 
| 907 |     def _ReadExtGlob(self):
 | 
| 908 |         # type: () -> word_part.ExtGlob
 | 
| 909 |         """
 | 
| 910 |         Grammar:
 | 
| 911 |           Item         = CompoundWord | EPSILON  # important: @(foo|) is allowed
 | 
| 912 |           LEFT         = '@(' | '*(' | '+(' | '?(' | '!('
 | 
| 913 |           RIGHT        = ')'
 | 
| 914 |           ExtGlob      = LEFT (Item '|')* Item RIGHT  # ITEM may be empty
 | 
| 915 |           Compound includes ExtGlob
 | 
| 916 |         """
 | 
| 917 |         left_token = self.cur_token
 | 
| 918 |         right_token = None  # type: Token
 | 
| 919 |         arms = []  # type: List[CompoundWord]
 | 
| 920 | 
 | 
| 921 |         self.lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
 | 
| 922 |         self._SetNext(lex_mode_e.ExtGlob)  # advance past LEFT
 | 
| 923 | 
 | 
| 924 |         read_word = False  # did we just a read a word?  To handle @(||).
 | 
| 925 | 
 | 
| 926 |         while True:
 | 
| 927 |             self._GetToken()
 | 
| 928 | 
 | 
| 929 |             if self.token_type == Id.Right_ExtGlob:
 | 
| 930 |                 if not read_word:
 | 
| 931 |                     arms.append(CompoundWord([]))
 | 
| 932 |                 right_token = self.cur_token
 | 
| 933 |                 break
 | 
| 934 | 
 | 
| 935 |             elif self.token_type == Id.Op_Pipe:
 | 
| 936 |                 if not read_word:
 | 
| 937 |                     arms.append(CompoundWord([]))
 | 
| 938 |                 read_word = False
 | 
| 939 |                 self._SetNext(lex_mode_e.ExtGlob)
 | 
| 940 | 
 | 
| 941 |             # lex_mode_e.ExtGlob should only produce these 4 kinds of tokens
 | 
| 942 |             elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub,
 | 
| 943 |                                      Kind.ExtGlob):
 | 
| 944 |                 w = self._ReadCompoundWord(lex_mode_e.ExtGlob)
 | 
| 945 |                 arms.append(w)
 | 
| 946 |                 read_word = True
 | 
| 947 | 
 | 
| 948 |             elif self.token_kind == Kind.Eof:
 | 
| 949 |                 p_die('Unexpected EOF reading extended glob that began here',
 | 
| 950 |                       left_token)
 | 
| 951 | 
 | 
| 952 |             else:
 | 
| 953 |                 raise AssertionError(self.cur_token)
 | 
| 954 | 
 | 
| 955 |         return word_part.ExtGlob(left_token, arms, right_token)
 | 
| 956 | 
 | 
| 957 |     def _ReadBashRegexGroup(self):
 | 
| 958 |         # type: () -> word_part.BashRegexGroup
 | 
| 959 |         """
 | 
| 960 |         Grammar:
 | 
| 961 |         BashRegexGroup = '(' WORD? ')
 | 
| 962 |         """
 | 
| 963 |         left_token = self.cur_token
 | 
| 964 |         assert left_token.id == Id.BashRegex_LParen, left_token
 | 
| 965 | 
 | 
| 966 |         right_token = None  # type: Token
 | 
| 967 |         arms = []  # type: List[CompoundWord]
 | 
| 968 | 
 | 
| 969 |         self.lexer.PushHint(Id.Op_RParen, Id.Right_BashRegexGroup)
 | 
| 970 |         self._SetNext(lex_mode_e.BashRegexFakeInner)  # advance past LEFT
 | 
| 971 | 
 | 
| 972 |         self._GetToken()
 | 
| 973 |         if self.token_type == Id.Right_BashRegexGroup:  # empty ()
 | 
| 974 |             return word_part.BashRegexGroup(left_token, None, self.cur_token)
 | 
| 975 | 
 | 
| 976 |         # lex_mode_e.BashRegex should only produce these 4 kinds of tokens
 | 
| 977 |         if self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub, Kind.BashRegex):
 | 
| 978 |             # Fake lexer mode that translates Id.WS_Space to Id.Lit_Chars
 | 
| 979 |             # To allow bash style [[ s =~ (a b) ]]
 | 
| 980 |             w = self._ReadCompoundWord(lex_mode_e.BashRegexFakeInner)
 | 
| 981 |             arms.append(w)
 | 
| 982 | 
 | 
| 983 |             self._GetToken()
 | 
| 984 |             if self.token_type != Id.Right_BashRegexGroup:
 | 
| 985 |                 p_die('Expected ) to close bash regex group', self.cur_token)
 | 
| 986 | 
 | 
| 987 |             return word_part.BashRegexGroup(left_token, w, self.cur_token)
 | 
| 988 | 
 | 
| 989 |         p_die('Expected word after ( opening bash regex group', self.cur_token)
 | 
| 990 | 
 | 
| 991 |     def _ReadLikeDQ(self, left_token, is_ysh_expr, out_parts):
 | 
| 992 |         # type: (Optional[Token], bool, List[word_part_t]) -> None
 | 
| 993 |         """
 | 
| 994 |         Args:
 | 
| 995 |           left_token: A token if we are reading a double quoted part, or None if
 | 
| 996 |             we're reading a here doc.
 | 
| 997 |           is_ysh_expr: Whether to disallow backticks and invalid char escapes
 | 
| 998 |           out_parts: list of word_part to append to
 | 
| 999 |         """
 | 
| 1000 |         if left_token:
 | 
| 1001 |             if left_token.id in (Id.Left_TDoubleQuote,
 | 
| 1002 |                                  Id.Left_DollarTDoubleQuote):
 | 
| 1003 |                 expected_end_tokens = 3
 | 
| 1004 |             else:
 | 
| 1005 |                 expected_end_tokens = 1
 | 
| 1006 |         else:
 | 
| 1007 |             expected_end_tokens = 1000  # here doc will break
 | 
| 1008 | 
 | 
| 1009 |         num_end_tokens = 0
 | 
| 1010 |         while num_end_tokens < expected_end_tokens:
 | 
| 1011 |             self._SetNext(lex_mode_e.DQ)
 | 
| 1012 |             self._GetToken()
 | 
| 1013 | 
 | 
| 1014 |             if self.token_kind == Kind.Lit:
 | 
| 1015 |                 if self.token_type == Id.Lit_EscapedChar:
 | 
| 1016 |                     tok = self.cur_token
 | 
| 1017 |                     ch = lexer.TokenSliceLeft(tok, 1)
 | 
| 1018 |                     part = word_part.EscapedLiteral(tok,
 | 
| 1019 |                                                     ch)  # type: word_part_t
 | 
| 1020 |                 else:
 | 
| 1021 |                     if self.token_type == Id.Lit_BadBackslash:
 | 
| 1022 |                         # echo "\z" is OK in shell, but 'x = "\z" is a syntax error in
 | 
| 1023 |                         # YSH.
 | 
| 1024 |                         # Slight hole: We don't catch 'x = ${undef:-"\z"} because of the
 | 
| 1025 |                         # recursion (unless parse_backslash)
 | 
| 1026 |                         if (is_ysh_expr or
 | 
| 1027 |                                 not self.parse_opts.parse_backslash()):
 | 
| 1028 |                             p_die(
 | 
| 1029 |                                 "Invalid char escape in double quoted string (OILS-ERR-12)",
 | 
| 1030 |                                 self.cur_token)
 | 
| 1031 |                     elif self.token_type == Id.Lit_Dollar:
 | 
| 1032 |                         if is_ysh_expr or not self.parse_opts.parse_dollar():
 | 
| 1033 |                             p_die("Literal $ should be quoted like \$",
 | 
| 1034 |                                   self.cur_token)
 | 
| 1035 | 
 | 
| 1036 |                     part = self.cur_token
 | 
| 1037 |                 out_parts.append(part)
 | 
| 1038 | 
 | 
| 1039 |             elif self.token_kind == Kind.Left:
 | 
| 1040 |                 if self.token_type == Id.Left_Backtick and is_ysh_expr:
 | 
| 1041 |                     p_die("Invalid backtick: use $(cmd) or \\` in YSH strings",
 | 
| 1042 |                           self.cur_token)
 | 
| 1043 | 
 | 
| 1044 |                 part = self._ReadDoubleQuotedLeftParts()
 | 
| 1045 |                 out_parts.append(part)
 | 
| 1046 | 
 | 
| 1047 |             elif self.token_kind == Kind.VSub:
 | 
| 1048 |                 tok = self.cur_token
 | 
| 1049 |                 part = SimpleVarSub(tok)
 | 
| 1050 |                 out_parts.append(part)
 | 
| 1051 |                 # NOTE: parsing "$f(x)" would BREAK CODE.  Could add a more for it
 | 
| 1052 |                 # later.
 | 
| 1053 | 
 | 
| 1054 |             elif self.token_kind == Kind.Right:
 | 
| 1055 |                 assert self.token_type == Id.Right_DoubleQuote, self.token_type
 | 
| 1056 |                 if left_token:
 | 
| 1057 |                     num_end_tokens += 1
 | 
| 1058 | 
 | 
| 1059 |                 # In a here doc, the right quote is literal!
 | 
| 1060 |                 out_parts.append(self.cur_token)
 | 
| 1061 | 
 | 
| 1062 |             elif self.token_kind == Kind.Eof:
 | 
| 1063 |                 if left_token:
 | 
| 1064 |                     p_die(
 | 
| 1065 |                         'Unexpected EOF reading double-quoted string that began here',
 | 
| 1066 |                         left_token)
 | 
| 1067 |                 else:  # here docs will have an EOF in their token stream
 | 
| 1068 |                     break
 | 
| 1069 | 
 | 
| 1070 |             else:
 | 
| 1071 |                 raise AssertionError(self.cur_token)
 | 
| 1072 | 
 | 
| 1073 |             if self.token_kind != Kind.Right:
 | 
| 1074 |                 num_end_tokens = 0  # """ must be CONSECUTIVE
 | 
| 1075 | 
 | 
| 1076 |         if expected_end_tokens == 1:
 | 
| 1077 |             out_parts.pop()
 | 
| 1078 |         elif expected_end_tokens == 3:
 | 
| 1079 |             out_parts.pop()
 | 
| 1080 |             out_parts.pop()
 | 
| 1081 |             out_parts.pop()
 | 
| 1082 | 
 | 
| 1083 |         # Remove space from """ in both expression mode and command mode
 | 
| 1084 |         if (left_token and left_token.id
 | 
| 1085 |                 in (Id.Left_TDoubleQuote, Id.Left_DollarTDoubleQuote)):
 | 
| 1086 |             word_compile.RemoveLeadingSpaceDQ(out_parts)
 | 
| 1087 | 
 | 
| 1088 |         # Return nothing, since we appended to 'out_parts'
 | 
| 1089 | 
 | 
| 1090 |     def _ReadDoubleQuoted(self, left_token):
 | 
| 1091 |         # type: (Token) -> DoubleQuoted
 | 
| 1092 |         """Helper function for "hello $name".
 | 
| 1093 | 
 | 
| 1094 |         Args:
 | 
| 1095 |           eof_type: for stopping at }, Id.Lit_RBrace
 | 
| 1096 |           here_doc: Whether we are reading in a here doc context
 | 
| 1097 | 
 | 
| 1098 |         Also ${foo%%a b c}  # treat this as double quoted.  until you hit
 | 
| 1099 |         """
 | 
| 1100 |         parts = []  # type: List[word_part_t]
 | 
| 1101 |         self._ReadLikeDQ(left_token, False, parts)
 | 
| 1102 | 
 | 
| 1103 |         right_quote = self.cur_token
 | 
| 1104 |         return DoubleQuoted(left_token, parts, right_quote)
 | 
| 1105 | 
 | 
| 1106 |     def ReadDoubleQuoted(self, left_token, parts):
 | 
| 1107 |         # type: (Token, List[word_part_t]) -> Token
 | 
| 1108 |         """For expression mode.
 | 
| 1109 | 
 | 
| 1110 |         Read var x = "${dir:-}/$name"; etc.
 | 
| 1111 |         """
 | 
| 1112 |         self._ReadLikeDQ(left_token, True, parts)
 | 
| 1113 |         return self.cur_token
 | 
| 1114 | 
 | 
| 1115 |     def _ReadCommandSub(self, left_id, d_quoted=False):
 | 
| 1116 |         # type: (Id_t, bool) -> CommandSub
 | 
| 1117 |         """
 | 
| 1118 |         NOTE: This is not in the grammar, because word parts aren't in the grammar!
 | 
| 1119 | 
 | 
| 1120 |         command_sub = '$(' command_list ')'
 | 
| 1121 |                     | '@(' command_list ')'
 | 
| 1122 |                     | '<(' command_list ')'
 | 
| 1123 |                     | '>(' command_list ')'
 | 
| 1124 |                     | ` command_list `
 | 
| 1125 |         """
 | 
| 1126 |         left_token = self.cur_token
 | 
| 1127 | 
 | 
| 1128 |         # Set the lexer in a state so ) becomes the EOF token.
 | 
| 1129 |         if left_id in (Id.Left_DollarParen, Id.Left_AtParen, Id.Left_ProcSubIn,
 | 
| 1130 |                        Id.Left_ProcSubOut):
 | 
| 1131 |             self._SetNext(lex_mode_e.ShCommand)  # advance past $( etc.
 | 
| 1132 | 
 | 
| 1133 |             right_id = Id.Eof_RParen
 | 
| 1134 |             self.lexer.PushHint(Id.Op_RParen, right_id)
 | 
| 1135 |             c_parser = self.parse_ctx.MakeParserForCommandSub(
 | 
| 1136 |                 self.line_reader, self.lexer, right_id)
 | 
| 1137 |             # NOTE: This doesn't use something like main_loop because we don't want
 | 
| 1138 |             # to interleave parsing and execution!  Unlike 'source' and 'eval'.
 | 
| 1139 |             node = c_parser.ParseCommandSub()
 | 
| 1140 | 
 | 
| 1141 |             right_token = c_parser.w_parser.cur_token
 | 
| 1142 | 
 | 
| 1143 |         elif left_id == Id.Left_Backtick and self.parse_ctx.do_lossless:
 | 
| 1144 |             # NOTE: This is an APPROXIMATE solution for translation ONLY.  See
 | 
| 1145 |             # test/osh2oil.
 | 
| 1146 | 
 | 
| 1147 |             right_id = Id.Eof_Backtick
 | 
| 1148 |             self.lexer.PushHint(Id.Left_Backtick, right_id)
 | 
| 1149 |             c_parser = self.parse_ctx.MakeParserForCommandSub(
 | 
| 1150 |                 self.line_reader, self.lexer, right_id)
 | 
| 1151 |             node = c_parser.ParseCommandSub()
 | 
| 1152 |             right_token = c_parser.w_parser.cur_token
 | 
| 1153 | 
 | 
| 1154 |         elif left_id == Id.Left_Backtick:
 | 
| 1155 |             if not self.parse_opts.parse_backticks():
 | 
| 1156 |                 p_die('Use $(cmd) instead of backticks (parse_backticks)',
 | 
| 1157 |                       left_token)
 | 
| 1158 | 
 | 
| 1159 |             self._SetNext(lex_mode_e.Backtick)  # advance past `
 | 
| 1160 | 
 | 
| 1161 |             parts = []  # type: List[str]
 | 
| 1162 |             while True:
 | 
| 1163 |                 self._GetToken()
 | 
| 1164 |                 #log("TOK %s", self.cur_token)
 | 
| 1165 | 
 | 
| 1166 |                 if self.token_type == Id.Backtick_Quoted:
 | 
| 1167 |                     # Remove leading \
 | 
| 1168 |                     parts.append(lexer.TokenSliceLeft(self.cur_token, 1))
 | 
| 1169 | 
 | 
| 1170 |                 elif self.token_type == Id.Backtick_DoubleQuote:
 | 
| 1171 |                     # Compatibility: If backticks are double quoted, then double quotes
 | 
| 1172 |                     # within them have to be \"
 | 
| 1173 |                     # Shells aren't smart enough to match nested " and ` quotes (but OSH
 | 
| 1174 |                     # is)
 | 
| 1175 |                     if d_quoted:
 | 
| 1176 |                         # Remove leading \
 | 
| 1177 |                         parts.append(lexer.TokenSliceLeft(self.cur_token, 1))
 | 
| 1178 |                     else:
 | 
| 1179 |                         parts.append(lexer.TokenVal(self.cur_token))
 | 
| 1180 | 
 | 
| 1181 |                 elif self.token_type == Id.Backtick_Other:
 | 
| 1182 |                     parts.append(lexer.TokenVal(self.cur_token))
 | 
| 1183 | 
 | 
| 1184 |                 elif self.token_type == Id.Backtick_Right:
 | 
| 1185 |                     break
 | 
| 1186 | 
 | 
| 1187 |                 elif self.token_type == Id.Eof_Real:
 | 
| 1188 |                     # Note: this parse error is in the ORIGINAL context.  No code_str yet.
 | 
| 1189 |                     p_die('Unexpected EOF while looking for closing backtick',
 | 
| 1190 |                           left_token)
 | 
| 1191 | 
 | 
| 1192 |                 else:
 | 
| 1193 |                     raise AssertionError(self.cur_token)
 | 
| 1194 | 
 | 
| 1195 |                 self._SetNext(lex_mode_e.Backtick)
 | 
| 1196 | 
 | 
| 1197 |             # Calculate right SPID on CommandSub BEFORE re-parsing.
 | 
| 1198 |             right_token = self.cur_token
 | 
| 1199 | 
 | 
| 1200 |             code_str = ''.join(parts)
 | 
| 1201 |             #log('code %r', code_str)
 | 
| 1202 | 
 | 
| 1203 |             # NOTE: This is similar to how we parse aliases in osh/cmd_parse.py.  It
 | 
| 1204 |             # won't have the same location info as MakeParserForCommandSub(), because
 | 
| 1205 |             # the lexer is different.
 | 
| 1206 |             arena = self.parse_ctx.arena
 | 
| 1207 |             #arena = alloc.Arena()
 | 
| 1208 |             line_reader = reader.StringLineReader(code_str, arena)
 | 
| 1209 |             c_parser = self.parse_ctx.MakeOshParser(line_reader)
 | 
| 1210 |             src = source.Reparsed('backticks', left_token, right_token)
 | 
| 1211 |             with alloc.ctx_SourceCode(arena, src):
 | 
| 1212 |                 node = c_parser.ParseCommandSub()
 | 
| 1213 | 
 | 
| 1214 |         else:
 | 
| 1215 |             raise AssertionError(left_id)
 | 
| 1216 | 
 | 
| 1217 |         return CommandSub(left_token, node, right_token)
 | 
| 1218 | 
 | 
| 1219 |     def _ReadExprSub(self, lex_mode):
 | 
| 1220 |         # type: (lex_mode_t) -> word_part.ExprSub
 | 
| 1221 |         """$[d->key]  $[obj.method()]  etc."""
 | 
| 1222 |         left_token = self.cur_token
 | 
| 1223 | 
 | 
| 1224 |         self._SetNext(lex_mode_e.Expr)
 | 
| 1225 |         enode, right_token = self.parse_ctx.ParseYshExpr(
 | 
| 1226 |             self.lexer, grammar_nt.ysh_expr_sub)
 | 
| 1227 | 
 | 
| 1228 |         self._SetNext(lex_mode)  # Move past ]
 | 
| 1229 |         return word_part.ExprSub(left_token, enode, right_token)
 | 
| 1230 | 
 | 
| 1231 |     def ParseVarDecl(self, kw_token):
 | 
| 1232 |         # type: (Token) -> command.VarDecl
 | 
| 1233 |         """
 | 
| 1234 |         oil_var_decl: name_type_list '=' testlist end_stmt
 | 
| 1235 | 
 | 
| 1236 |         Note that assignments must end with \n  ;  }  or EOF.  Unlike shell
 | 
| 1237 |         assignments, we disallow:
 | 
| 1238 |         
 | 
| 1239 |         var x = 42 | wc -l
 | 
| 1240 |         var x = 42 && echo hi
 | 
| 1241 |         """
 | 
| 1242 |         self._SetNext(lex_mode_e.Expr)
 | 
| 1243 |         enode, last_token = self.parse_ctx.ParseVarDecl(kw_token, self.lexer)
 | 
| 1244 |         # Hack to move } from what the Expr lexer modes gives to what CommandParser
 | 
| 1245 |         # wants
 | 
| 1246 |         if last_token.id == Id.Op_RBrace:
 | 
| 1247 |             last_token.id = Id.Lit_RBrace
 | 
| 1248 | 
 | 
| 1249 |         # Let the CommandParser see the Op_Semi or Op_Newline.
 | 
| 1250 |         self.buffered_word = last_token
 | 
| 1251 |         self._SetNext(lex_mode_e.ShCommand)  # always back to this
 | 
| 1252 |         return enode
 | 
| 1253 | 
 | 
| 1254 |     def ParseMutation(self, kw_token, var_checker):
 | 
| 1255 |         # type: (Token, VarChecker) -> command.Mutation
 | 
| 1256 |         """
 | 
| 1257 |         setvar i = 42
 | 
| 1258 |         setvar i += 1
 | 
| 1259 |         setvar a[i] = 42
 | 
| 1260 |         setvar a[i] += 1
 | 
| 1261 |         setvar d.key = 42
 | 
| 1262 |         setvar d.key += 1
 | 
| 1263 |         """
 | 
| 1264 |         self._SetNext(lex_mode_e.Expr)
 | 
| 1265 |         enode, last_token = self.parse_ctx.ParseMutation(kw_token, self.lexer)
 | 
| 1266 |         # Hack to move } from what the Expr lexer modes gives to what CommandParser
 | 
| 1267 |         # wants
 | 
| 1268 |         if last_token.id == Id.Op_RBrace:
 | 
| 1269 |             last_token.id = Id.Lit_RBrace
 | 
| 1270 | 
 | 
| 1271 |         for lhs in enode.lhs:
 | 
| 1272 |             UP_lhs = lhs
 | 
| 1273 |             with tagswitch(lhs) as case:
 | 
| 1274 |                 if case(y_lhs_e.Var):
 | 
| 1275 |                     lhs = cast(Token, UP_lhs)
 | 
| 1276 |                     var_checker.Check(kw_token.id, lexer.LazyStr(lhs), lhs)
 | 
| 1277 | 
 | 
| 1278 |                 # Note: this does not cover cases like
 | 
| 1279 |                 # setvar (a[0])[1] = v
 | 
| 1280 |                 # setvar (d.key).other = v
 | 
| 1281 |                 # This leaks into catching all typos statically, which may be
 | 
| 1282 |                 # possible if 'use' makes all names explicit.
 | 
| 1283 |                 elif case(y_lhs_e.Subscript):
 | 
| 1284 |                     lhs = cast(Subscript, UP_lhs)
 | 
| 1285 |                     if lhs.obj.tag() == expr_e.Var:
 | 
| 1286 |                         v = cast(expr.Var, lhs.obj)
 | 
| 1287 |                         var_checker.Check(kw_token.id, v.name, v.left)
 | 
| 1288 | 
 | 
| 1289 |                 elif case(y_lhs_e.Attribute):
 | 
| 1290 |                     lhs = cast(Attribute, UP_lhs)
 | 
| 1291 |                     if lhs.obj.tag() == expr_e.Var:
 | 
| 1292 |                         v = cast(expr.Var, lhs.obj)
 | 
| 1293 |                         var_checker.Check(kw_token.id, v.name, v.left)
 | 
| 1294 | 
 | 
| 1295 |         # Let the CommandParser see the Op_Semi or Op_Newline.
 | 
| 1296 |         self.buffered_word = last_token
 | 
| 1297 |         self._SetNext(lex_mode_e.ShCommand)  # always back to this
 | 
| 1298 |         return enode
 | 
| 1299 | 
 | 
| 1300 |     def ParseBareDecl(self):
 | 
| 1301 |         # type: () -> expr_t
 | 
| 1302 |         """
 | 
| 1303 |         x = {name: val}
 | 
| 1304 |         """
 | 
| 1305 |         self._SetNext(lex_mode_e.Expr)
 | 
| 1306 |         self._GetToken()
 | 
| 1307 |         enode, last_token = self.parse_ctx.ParseYshExpr(
 | 
| 1308 |             self.lexer, grammar_nt.command_expr)
 | 
| 1309 |         if last_token.id == Id.Op_RBrace:
 | 
| 1310 |             last_token.id = Id.Lit_RBrace
 | 
| 1311 |         self.buffered_word = last_token
 | 
| 1312 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1313 |         return enode
 | 
| 1314 | 
 | 
| 1315 |     def ParseYshExprForCommand(self):
 | 
| 1316 |         # type: () -> expr_t
 | 
| 1317 | 
 | 
| 1318 |         # Fudge for this case
 | 
| 1319 |         #  for x in(y) {
 | 
| 1320 |         # versus
 | 
| 1321 |         #  for x in (y) {
 | 
| 1322 |         #
 | 
| 1323 |         # In the former case, ReadWord on 'in' puts the lexer past (.
 | 
| 1324 |         # Also see LookPastSpace in CommandParers.
 | 
| 1325 |         # A simpler solution would be nicer.
 | 
| 1326 | 
 | 
| 1327 |         if self.token_type == Id.Op_LParen:
 | 
| 1328 |             self.lexer.MaybeUnreadOne()
 | 
| 1329 | 
 | 
| 1330 |         enode, _ = self.parse_ctx.ParseYshExpr(self.lexer, grammar_nt.ysh_expr)
 | 
| 1331 | 
 | 
| 1332 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1333 |         return enode
 | 
| 1334 | 
 | 
| 1335 |     def ParseCommandExpr(self):
 | 
| 1336 |         # type: () -> expr_t
 | 
| 1337 |         """
 | 
| 1338 |         = 1+2
 | 
| 1339 |         """
 | 
| 1340 |         enode, last_token = self.parse_ctx.ParseYshExpr(
 | 
| 1341 |             self.lexer, grammar_nt.command_expr)
 | 
| 1342 | 
 | 
| 1343 |         # In some cases, such as the case statement, we expect *the lexer* to be
 | 
| 1344 |         # pointing at the token right after the expression. But the expression
 | 
| 1345 |         # parser must have read to the `last_token`. Unreading places the lexer
 | 
| 1346 |         # back in the expected state. Ie:
 | 
| 1347 |         #
 | 
| 1348 |         # case (x) {                           case (x) {
 | 
| 1349 |         #   (else) { = x }                       (else) { = x }
 | 
| 1350 |         #                 ^ The lexer is here                 ^ Unread to here
 | 
| 1351 |         # }                                    }
 | 
| 1352 |         assert last_token.id in (Id.Op_Newline, Id.Eof_Real, Id.Op_Semi,
 | 
| 1353 |                                  Id.Op_RBrace), last_token
 | 
| 1354 |         if last_token.id != Id.Eof_Real:
 | 
| 1355 |             # Eof_Real is the only token we cannot unread
 | 
| 1356 |             self.lexer.MaybeUnreadOne()
 | 
| 1357 | 
 | 
| 1358 |         return enode
 | 
| 1359 | 
 | 
| 1360 |     def ParseProc(self, node):
 | 
| 1361 |         # type: (Proc) -> None
 | 
| 1362 | 
 | 
| 1363 |         # proc name-with-hyphens() must be accepted
 | 
| 1364 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1365 |         self._GetToken()
 | 
| 1366 |         # example: 'proc f[' gets you Lit_ArrayLhsOpen
 | 
| 1367 |         if self.token_type != Id.Lit_Chars:
 | 
| 1368 |             p_die('Invalid proc name %s' % ui.PrettyToken(self.cur_token),
 | 
| 1369 |                   self.cur_token)
 | 
| 1370 | 
 | 
| 1371 |         # TODO: validate this more.  Disallow proc 123 { }, which isn't disallowed
 | 
| 1372 |         # for shell functions.  Similar to IsValidVarName().
 | 
| 1373 |         node.name = self.cur_token
 | 
| 1374 | 
 | 
| 1375 |         last_token = self.parse_ctx.ParseProc(self.lexer, node)
 | 
| 1376 | 
 | 
| 1377 |         # Translate from lex_mode_e.{Expr => ShCommand}, for CommandParser
 | 
| 1378 |         assert last_token.id == Id.Op_LBrace
 | 
| 1379 |         last_token.id = Id.Lit_LBrace
 | 
| 1380 |         self.buffered_word = last_token
 | 
| 1381 | 
 | 
| 1382 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1383 | 
 | 
| 1384 |     def ParseFunc(self, node):
 | 
| 1385 |         # type: (Func) -> None
 | 
| 1386 |         last_token = self.parse_ctx.ParseFunc(self.lexer, node)
 | 
| 1387 | 
 | 
| 1388 |         # Translate from lex_mode_e.{Expr => ShCommand}, for CommandParser
 | 
| 1389 |         assert last_token.id == Id.Op_LBrace
 | 
| 1390 |         last_token.id = Id.Lit_LBrace
 | 
| 1391 |         self.buffered_word = last_token
 | 
| 1392 | 
 | 
| 1393 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1394 | 
 | 
| 1395 |     def ParseYshCasePattern(self):
 | 
| 1396 |         # type: () -> Tuple[pat_t, Token]
 | 
| 1397 |         pat, left_tok, last_token = self.parse_ctx.ParseYshCasePattern(
 | 
| 1398 |             self.lexer)
 | 
| 1399 | 
 | 
| 1400 |         if last_token.id == Id.Op_LBrace:
 | 
| 1401 |             last_token.id = Id.Lit_LBrace
 | 
| 1402 |         self.buffered_word = last_token
 | 
| 1403 | 
 | 
| 1404 |         return pat, left_tok
 | 
| 1405 | 
 | 
| 1406 |     def NewlineOkForYshCase(self):
 | 
| 1407 |         # type: () -> Id_t
 | 
| 1408 |         """Check for optional newline and consume it.
 | 
| 1409 | 
 | 
| 1410 |         This is a special case of `_NewlineOk` which fixed some "off-by-one" issues
 | 
| 1411 |         which crop up while parsing Ysh Case Arms. For more details, see
 | 
| 1412 |         #oil-dev > Progress On YSH Case Grammar on zulip.
 | 
| 1413 | 
 | 
| 1414 |         Returns a token id which is filled with the choice of
 | 
| 1415 | 
 | 
| 1416 |              word { echo word }
 | 
| 1417 |              (3)  { echo expr }
 | 
| 1418 |              /e/  { echo eggex }
 | 
| 1419 |            }        # right brace
 | 
| 1420 |         """
 | 
| 1421 |         while True:
 | 
| 1422 |             next_id = self.lexer.LookAheadOne(lex_mode_e.Expr)
 | 
| 1423 | 
 | 
| 1424 |             # Cannot lookahead past lines
 | 
| 1425 |             if next_id == Id.Unknown_Tok:
 | 
| 1426 |                 self.lexer.MoveToNextLine()
 | 
| 1427 |                 continue
 | 
| 1428 | 
 | 
| 1429 |             next_kind = consts.GetKind(next_id)
 | 
| 1430 |             if next_id != Id.Op_Newline and next_kind != Kind.Ignored:
 | 
| 1431 |                 break
 | 
| 1432 | 
 | 
| 1433 |             self.lexer.Read(lex_mode_e.Expr)
 | 
| 1434 | 
 | 
| 1435 |         if next_id in (Id.Op_RBrace, Id.Op_LParen, Id.Arith_Slash):
 | 
| 1436 |             self._SetNext(lex_mode_e.Expr)  # Continue in expression mode
 | 
| 1437 |         else:
 | 
| 1438 |             #  Consume the trailing Op_Newline
 | 
| 1439 |             self._SetNext(lex_mode_e.ShCommand)
 | 
| 1440 |             self._GetToken()
 | 
| 1441 | 
 | 
| 1442 |         return next_id
 | 
| 1443 | 
 | 
| 1444 |     def _ReadArithExpr(self, end_id):
 | 
| 1445 |         # type: (Id_t) -> arith_expr_t
 | 
| 1446 |         """Read and parse an arithmetic expression in various contexts.
 | 
| 1447 | 
 | 
| 1448 |         $(( 1+2 ))
 | 
| 1449 |         (( a=1+2 ))
 | 
| 1450 |         ${a[ 1+2 ]}
 | 
| 1451 |         ${a : 1+2 : 1+2}
 | 
| 1452 | 
 | 
| 1453 |         See tests/arith-context.test.sh for ambiguous cases.
 | 
| 1454 | 
 | 
| 1455 |         ${a[a[0]]} is valid  # VS_RBRACKET vs Id.Arith_RBracket
 | 
| 1456 | 
 | 
| 1457 |         ${s : a<b?0:1 : 1}  # VS_COLON vs Id.Arith_Colon
 | 
| 1458 | 
 | 
| 1459 |         See the assertion in ArithParser.Parse() -- unexpected extra input.
 | 
| 1460 |         """
 | 
| 1461 |         # calls self.ReadWord(lex_mode_e.Arith)
 | 
| 1462 |         anode = self.a_parser.Parse()
 | 
| 1463 |         cur_id = self.a_parser.CurrentId()
 | 
| 1464 |         if end_id != Id.Undefined_Tok and cur_id != end_id:
 | 
| 1465 |             p_die(
 | 
| 1466 |                 'Unexpected token after arithmetic expression (%s != %s)' %
 | 
| 1467 |                 (ui.PrettyId(cur_id), ui.PrettyId(end_id)),
 | 
| 1468 |                 loc.Word(self.a_parser.cur_word))
 | 
| 1469 |         return anode
 | 
| 1470 | 
 | 
| 1471 |     def _ReadArithSub(self):
 | 
| 1472 |         # type: () -> word_part.ArithSub
 | 
| 1473 |         """Read an arith substitution, which contains an arith expression, e.g.
 | 
| 1474 | 
 | 
| 1475 |         $((a + 1)).
 | 
| 1476 |         """
 | 
| 1477 |         left_tok = self.cur_token
 | 
| 1478 | 
 | 
| 1479 |         # The second one needs to be disambiguated in stuff like stuff like:
 | 
| 1480 |         # $(echo $(( 1+2 )) )
 | 
| 1481 |         self.lexer.PushHint(Id.Op_RParen, Id.Right_DollarDParen)
 | 
| 1482 | 
 | 
| 1483 |         # NOTE: To disambiguate $(( as arith sub vs. command sub and subshell, we
 | 
| 1484 |         # could save the lexer/reader state here, and retry if the arithmetic parse
 | 
| 1485 |         # fails.  But we can almost always catch this at parse time.  There could
 | 
| 1486 |         # be some exceptions like:
 | 
| 1487 |         # $((echo * foo))  # looks like multiplication
 | 
| 1488 |         # $((echo / foo))  # looks like division
 | 
| 1489 | 
 | 
| 1490 |         # $(( )) is valid
 | 
| 1491 |         anode = arith_expr.EmptyZero  # type: arith_expr_t
 | 
| 1492 | 
 | 
| 1493 |         self._NextNonSpace()
 | 
| 1494 |         if self.token_type != Id.Arith_RParen:
 | 
| 1495 |             anode = self._ReadArithExpr(Id.Arith_RParen)
 | 
| 1496 | 
 | 
| 1497 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1498 | 
 | 
| 1499 |         # Ensure we get closing )
 | 
| 1500 |         self._GetToken()
 | 
| 1501 |         if self.token_type != Id.Right_DollarDParen:
 | 
| 1502 |             p_die('Expected second ) to end arith sub', self.cur_token)
 | 
| 1503 | 
 | 
| 1504 |         right_tok = self.cur_token
 | 
| 1505 |         return word_part.ArithSub(left_tok, anode, right_tok)
 | 
| 1506 | 
 | 
| 1507 |     def ReadDParen(self):
 | 
| 1508 |         # type: () -> Tuple[arith_expr_t, Token]
 | 
| 1509 |         """Read ((1+ 2))  -- command context.
 | 
| 1510 | 
 | 
| 1511 |         We're using the word parser because it's very similar to _ReadArithExpr
 | 
| 1512 |         above.
 | 
| 1513 | 
 | 
| 1514 |         This also returns the terminating Id.Op_DRightParen token for location
 | 
| 1515 |         info.
 | 
| 1516 |         """
 | 
| 1517 |         # (( )) is valid
 | 
| 1518 |         anode = arith_expr.EmptyZero  # type: arith_expr_t
 | 
| 1519 | 
 | 
| 1520 |         self.lexer.PushHint(Id.Op_RParen, Id.Op_DRightParen)
 | 
| 1521 | 
 | 
| 1522 |         self._NextNonSpace()
 | 
| 1523 |         if self.token_type != Id.Arith_RParen:
 | 
| 1524 |             anode = self._ReadArithExpr(Id.Arith_RParen)
 | 
| 1525 | 
 | 
| 1526 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1527 | 
 | 
| 1528 |         # Ensure we get the second )
 | 
| 1529 |         self._GetToken()
 | 
| 1530 |         right = self.cur_token
 | 
| 1531 |         if right.id != Id.Op_DRightParen:
 | 
| 1532 |             p_die('Expected second ) to end arith statement', right)
 | 
| 1533 | 
 | 
| 1534 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1535 | 
 | 
| 1536 |         return anode, right
 | 
| 1537 | 
 | 
| 1538 |     def _NextNonSpace(self):
 | 
| 1539 |         # type: () -> None
 | 
| 1540 |         """Advance in lex_mode_e.Arith until non-space token.
 | 
| 1541 | 
 | 
| 1542 |         Same logic as _ReadWord, but used in
 | 
| 1543 |            $(( ))
 | 
| 1544 |            (( ))
 | 
| 1545 |            for (( ))
 | 
| 1546 | 
 | 
| 1547 |         You can read self.token_type after this, without calling _GetToken.
 | 
| 1548 |         """
 | 
| 1549 |         while True:
 | 
| 1550 |             self._SetNext(lex_mode_e.Arith)
 | 
| 1551 |             self._GetToken()
 | 
| 1552 |             if self.token_kind not in (Kind.Ignored, Kind.WS):
 | 
| 1553 |                 break
 | 
| 1554 | 
 | 
| 1555 |     def ReadForExpression(self):
 | 
| 1556 |         # type: () -> command.ForExpr
 | 
| 1557 |         """Read ((i=0; i<5; ++i)) -- part of command context."""
 | 
| 1558 |         self._NextNonSpace()  # skip over ((
 | 
| 1559 |         cur_id = self.token_type  # for end of arith expressions
 | 
| 1560 | 
 | 
| 1561 |         if cur_id == Id.Arith_Semi:  # for (( ; i < 10; i++ ))
 | 
| 1562 |             init_node = arith_expr.EmptyZero  # type: arith_expr_t
 | 
| 1563 |         else:
 | 
| 1564 |             init_node = self.a_parser.Parse()
 | 
| 1565 |             cur_id = self.a_parser.CurrentId()
 | 
| 1566 |         self._NextNonSpace()
 | 
| 1567 | 
 | 
| 1568 |         # It's odd to keep track of both cur_id and self.token_type in this
 | 
| 1569 |         # function, but it works, and is tested in 'test/parse_error.sh
 | 
| 1570 |         # arith-integration'
 | 
| 1571 |         if cur_id != Id.Arith_Semi:  # for (( x=0 b; ... ))
 | 
| 1572 |             p_die("Expected ; here", loc.Word(self.a_parser.cur_word))
 | 
| 1573 | 
 | 
| 1574 |         self._GetToken()
 | 
| 1575 |         cur_id = self.token_type
 | 
| 1576 | 
 | 
| 1577 |         if cur_id == Id.Arith_Semi:  # for (( ; ; i++ ))
 | 
| 1578 |             # empty condition is TRUE
 | 
| 1579 |             cond_node = arith_expr.EmptyOne  # type: arith_expr_t
 | 
| 1580 |         else:
 | 
| 1581 |             cond_node = self.a_parser.Parse()
 | 
| 1582 |             cur_id = self.a_parser.CurrentId()
 | 
| 1583 | 
 | 
| 1584 |         if cur_id != Id.Arith_Semi:  # for (( x=0; x<5 b ))
 | 
| 1585 |             p_die("Expected ; here", loc.Word(self.a_parser.cur_word))
 | 
| 1586 | 
 | 
| 1587 |         self._NextNonSpace()
 | 
| 1588 |         if self.token_type == Id.Arith_RParen:  # for (( ; ; ))
 | 
| 1589 |             update_node = arith_expr.EmptyZero  # type: arith_expr_t
 | 
| 1590 |         else:
 | 
| 1591 |             update_node = self._ReadArithExpr(Id.Arith_RParen)
 | 
| 1592 | 
 | 
| 1593 |         self._NextNonSpace()
 | 
| 1594 |         if self.token_type != Id.Arith_RParen:
 | 
| 1595 |             p_die('Expected ) to end for loop expression', self.cur_token)
 | 
| 1596 |         self._SetNext(lex_mode_e.ShCommand)
 | 
| 1597 | 
 | 
| 1598 |         # redirects is None, will be assigned in CommandEvaluator
 | 
| 1599 |         node = command.ForExpr.CreateNull()
 | 
| 1600 |         node.init = init_node
 | 
| 1601 |         node.cond = cond_node
 | 
| 1602 |         node.update = update_node
 | 
| 1603 |         return node
 | 
| 1604 | 
 | 
| 1605 |     def _ReadArrayLiteral(self):
 | 
| 1606 |         # type: () -> word_part_t
 | 
| 1607 |         """a=(1 2 3)
 | 
| 1608 | 
 | 
| 1609 |         TODO: See osh/cmd_parse.py:164 for Id.Lit_ArrayLhsOpen, for a[x++]=1
 | 
| 1610 | 
 | 
| 1611 |         We want:
 | 
| 1612 | 
 | 
| 1613 |         A=(['x']=1 ["x"]=2 [$x$y]=3)
 | 
| 1614 | 
 | 
| 1615 |         Maybe allow this as a literal string?  Because I think I've seen it before?
 | 
| 1616 |         Or maybe force people to patch to learn the rule.
 | 
| 1617 | 
 | 
| 1618 |         A=([x]=4)
 | 
| 1619 | 
 | 
| 1620 |         Starts with Lit_Other '[', and then it has Lit_ArrayLhsClose
 | 
| 1621 |         Maybe enforce that ALL have keys or NONE of have keys.
 | 
| 1622 |         """
 | 
| 1623 |         self._SetNext(lex_mode_e.ShCommand)  # advance past (
 | 
| 1624 |         self._GetToken()
 | 
| 1625 |         if self.cur_token.id != Id.Op_LParen:
 | 
| 1626 |             p_die('Expected ( after =', self.cur_token)
 | 
| 1627 |         left_token = self.cur_token
 | 
| 1628 |         right_token = None  # type: Token
 | 
| 1629 | 
 | 
| 1630 |         # MUST use a new word parser (with same lexer).
 | 
| 1631 |         w_parser = self.parse_ctx.MakeWordParser(self.lexer, self.line_reader)
 | 
| 1632 |         words = []  # type: List[CompoundWord]
 | 
| 1633 |         done = False
 | 
| 1634 |         while not done:
 | 
| 1635 |             w = w_parser.ReadWord(lex_mode_e.ShCommand)
 | 
| 1636 |             with tagswitch(w) as case:
 | 
| 1637 |                 if case(word_e.Operator):
 | 
| 1638 |                     tok = cast(Token, w)
 | 
| 1639 |                     if tok.id == Id.Right_ShArrayLiteral:
 | 
| 1640 |                         right_token = tok
 | 
| 1641 |                         done = True  # can't use break here
 | 
| 1642 |                     # Unlike command parsing, array parsing allows embedded \n.
 | 
| 1643 |                     elif tok.id == Id.Op_Newline:
 | 
| 1644 |                         continue
 | 
| 1645 |                     else:
 | 
| 1646 |                         p_die('Unexpected token in array literal', loc.Word(w))
 | 
| 1647 | 
 | 
| 1648 |                 elif case(word_e.Compound):
 | 
| 1649 |                     words.append(cast(CompoundWord, w))
 | 
| 1650 | 
 | 
| 1651 |                 else:
 | 
| 1652 |                     raise AssertionError()
 | 
| 1653 | 
 | 
| 1654 |         if len(words) == 0:  # a=() is empty indexed array
 | 
| 1655 |             # Needed for type safety, doh
 | 
| 1656 |             no_words = []  # type: List[word_t]
 | 
| 1657 |             node = ShArrayLiteral(left_token, no_words, right_token)
 | 
| 1658 |             return node
 | 
| 1659 | 
 | 
| 1660 |         pairs = []  # type: List[AssocPair]
 | 
| 1661 |         # If the first one is a key/value pair, then the rest are assumed to be.
 | 
| 1662 |         pair = word_.DetectAssocPair(words[0])
 | 
| 1663 |         if pair:
 | 
| 1664 |             pairs.append(pair)
 | 
| 1665 | 
 | 
| 1666 |             n = len(words)
 | 
| 1667 |             for i in xrange(1, n):
 | 
| 1668 |                 w2 = words[i]
 | 
| 1669 |                 pair = word_.DetectAssocPair(w2)
 | 
| 1670 |                 if not pair:
 | 
| 1671 |                     p_die("Expected associative array pair", loc.Word(w2))
 | 
| 1672 | 
 | 
| 1673 |                 pairs.append(pair)
 | 
| 1674 | 
 | 
| 1675 |             # invariant List?
 | 
| 1676 |             return word_part.BashAssocLiteral(left_token, pairs, right_token)
 | 
| 1677 | 
 | 
| 1678 |         # Brace detection for arrays but NOT associative arrays
 | 
| 1679 |         words2 = braces.BraceDetectAll(words)
 | 
| 1680 |         words3 = word_.TildeDetectAll(words2)
 | 
| 1681 |         return ShArrayLiteral(left_token, words3, right_token)
 | 
| 1682 | 
 | 
| 1683 |     def ParseProcCallArgs(self, start_symbol):
 | 
| 1684 |         # type: (int) -> ArgList
 | 
| 1685 |         """ json write (x) """
 | 
| 1686 |         self.lexer.MaybeUnreadOne()
 | 
| 1687 | 
 | 
| 1688 |         arg_list = ArgList.CreateNull(alloc_lists=True)
 | 
| 1689 |         arg_list.left = self.cur_token
 | 
| 1690 |         self.parse_ctx.ParseProcCallArgs(self.lexer, arg_list, start_symbol)
 | 
| 1691 |         return arg_list
 | 
| 1692 | 
 | 
| 1693 |     def _MaybeReadWordPart(self, is_first, lex_mode, parts):
 | 
| 1694 |         # type: (bool, lex_mode_t, List[word_part_t]) -> bool
 | 
| 1695 |         """Helper for _ReadCompoundWord3."""
 | 
| 1696 |         done = False
 | 
| 1697 | 
 | 
| 1698 |         if self.token_type == Id.Lit_EscapedChar:
 | 
| 1699 |             tok = self.cur_token
 | 
| 1700 |             assert tok.length == 2
 | 
| 1701 |             ch = lexer.TokenSliceLeft(tok, 1)
 | 
| 1702 |             if not self.parse_opts.parse_backslash():
 | 
| 1703 |                 if not pyutil.IsValidCharEscape(ch):
 | 
| 1704 |                     p_die('Invalid char escape in unquoted word (OILS-ERR-13)',
 | 
| 1705 |                           self.cur_token)
 | 
| 1706 | 
 | 
| 1707 |             part = word_part.EscapedLiteral(self.cur_token,
 | 
| 1708 |                                             ch)  # type: word_part_t
 | 
| 1709 |         else:
 | 
| 1710 |             part = self.cur_token
 | 
| 1711 | 
 | 
| 1712 |         if is_first and self.token_type == Id.Lit_VarLike:  # foo=
 | 
| 1713 |             parts.append(part)
 | 
| 1714 |             # Unfortunately it's awkward to pull the check for a=(1 2) up to
 | 
| 1715 |             # _ReadWord.
 | 
| 1716 |             next_id = self.lexer.LookPastSpace(lex_mode)
 | 
| 1717 |             if next_id == Id.Op_LParen:
 | 
| 1718 |                 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)
 | 
| 1719 |                 part2 = self._ReadArrayLiteral()
 | 
| 1720 |                 parts.append(part2)
 | 
| 1721 | 
 | 
| 1722 |                 # Array literal must be the last part of the word.
 | 
| 1723 |                 self._SetNext(lex_mode)
 | 
| 1724 |                 self._GetToken()
 | 
| 1725 |                 # EOF, whitespace, newline, Right_Subshell
 | 
| 1726 |                 if self.token_kind not in KINDS_THAT_END_WORDS:
 | 
| 1727 |                     p_die('Unexpected token after array literal',
 | 
| 1728 |                           self.cur_token)
 | 
| 1729 |                 done = True
 | 
| 1730 | 
 | 
| 1731 |         elif (is_first and self.parse_opts.parse_at() and
 | 
| 1732 |               self.token_type == Id.Lit_Splice):
 | 
| 1733 | 
 | 
| 1734 |             splice_tok = self.cur_token
 | 
| 1735 |             part2 = word_part.Splice(splice_tok,
 | 
| 1736 |                                      lexer.TokenSliceLeft(splice_tok, 1))
 | 
| 1737 | 
 | 
| 1738 |             parts.append(part2)
 | 
| 1739 | 
 | 
| 1740 |             # @words must be the last part of the word
 | 
| 1741 |             self._SetNext(lex_mode)
 | 
| 1742 |             self._GetToken()
 | 
| 1743 |             # EOF, whitespace, newline, Right_Subshell
 | 
| 1744 |             if self.token_kind not in KINDS_THAT_END_WORDS:
 | 
| 1745 |                 p_die('Unexpected token after array splice', self.cur_token)
 | 
| 1746 |             done = True
 | 
| 1747 | 
 | 
| 1748 |         elif (is_first and self.parse_opts.parse_at() and
 | 
| 1749 |               self.token_type == Id.Lit_AtLBracket):  # @[split(x)]
 | 
| 1750 |             part2 = self._ReadExprSub(lex_mode_e.DQ)
 | 
| 1751 |             parts.append(part2)
 | 
| 1752 | 
 | 
| 1753 |             # @[split(x)]
 | 
| 1754 |             self._SetNext(lex_mode)
 | 
| 1755 |             self._GetToken()
 | 
| 1756 |             # EOF, whitespace, newline, Right_Subshell
 | 
| 1757 |             if self.token_kind not in KINDS_THAT_END_WORDS:
 | 
| 1758 |                 p_die('Unexpected token after Expr splice', self.cur_token)
 | 
| 1759 |             done = True
 | 
| 1760 | 
 | 
| 1761 |         elif (is_first and self.parse_opts.parse_at() and
 | 
| 1762 |               self.token_type == Id.Lit_AtLBraceDot):
 | 
| 1763 |             p_die('TODO: @{.myproc builtin sub}', self.cur_token)
 | 
| 1764 | 
 | 
| 1765 |         elif (is_first and self.parse_opts.parse_at_all() and
 | 
| 1766 |               self.token_type == Id.Lit_At):
 | 
| 1767 |             # Because $[x] ${x} and perhaps $/x/ are reserved, it makes sense for @
 | 
| 1768 |             # at the beginning of a word to be reserved.
 | 
| 1769 | 
 | 
| 1770 |             # Although should we relax 'echo @' ?  I'm tempted to have a shortcut for
 | 
| 1771 |             # @_argv and
 | 
| 1772 |             p_die('Literal @ starting a word must be quoted (parse_at_all)',
 | 
| 1773 |                   self.cur_token)
 | 
| 1774 | 
 | 
| 1775 |         else:
 | 
| 1776 |             # not a literal with lookahead; append it
 | 
| 1777 |             parts.append(part)
 | 
| 1778 | 
 | 
| 1779 |         return done
 | 
| 1780 | 
 | 
| 1781 |     def _ReadCompoundWord(self, lex_mode):
 | 
| 1782 |         # type: (lex_mode_t) -> CompoundWord
 | 
| 1783 |         return self._ReadCompoundWord3(lex_mode, Id.Undefined_Tok, True)
 | 
| 1784 | 
 | 
| 1785 |     def _ReadCompoundWord3(self, lex_mode, eof_type, empty_ok):
 | 
| 1786 |         # type: (lex_mode_t, Id_t, bool) -> CompoundWord
 | 
| 1787 |         """
 | 
| 1788 |         Precondition: Looking at the first token of the first word part
 | 
| 1789 |         Postcondition: Looking at the token after, e.g. space or operator
 | 
| 1790 | 
 | 
| 1791 |         NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it
 | 
| 1792 |         could be an operator delimiting a compound word.  Can we change lexer modes
 | 
| 1793 |         and remove this special case?
 | 
| 1794 |         """
 | 
| 1795 |         w = CompoundWord([])
 | 
| 1796 |         num_parts = 0
 | 
| 1797 |         brace_count = 0
 | 
| 1798 |         done = False
 | 
| 1799 |         is_triple_quoted = None  # type: Optional[BoolParamBox]
 | 
| 1800 | 
 | 
| 1801 |         while not done:
 | 
| 1802 |             self._GetToken()
 | 
| 1803 | 
 | 
| 1804 |             allow_done = empty_ok or num_parts != 0
 | 
| 1805 |             if allow_done and self.token_type == eof_type:
 | 
| 1806 |                 done = True  # e.g. for ${foo//pat/replace}
 | 
| 1807 | 
 | 
| 1808 |             # Keywords like "for" are treated like literals
 | 
| 1809 |             elif self.token_kind in (Kind.Lit, Kind.History, Kind.KW,
 | 
| 1810 |                                      Kind.ControlFlow, Kind.BoolUnary,
 | 
| 1811 |                                      Kind.BoolBinary):
 | 
| 1812 | 
 | 
| 1813 |                 # Syntax error for { and }
 | 
| 1814 |                 if self.token_type == Id.Lit_LBrace:
 | 
| 1815 |                     brace_count += 1
 | 
| 1816 |                 elif self.token_type == Id.Lit_RBrace:
 | 
| 1817 |                     brace_count -= 1
 | 
| 1818 |                 elif self.token_type == Id.Lit_Dollar:
 | 
| 1819 |                     if not self.parse_opts.parse_dollar():
 | 
| 1820 |                         if num_parts == 0 and lex_mode == lex_mode_e.ShCommand:
 | 
| 1821 |                             next_byte = self.lexer.ByteLookAhead()
 | 
| 1822 |                             # TODO: switch lexer modes and parse $/d+/.  But not ${a:-$/d+/}
 | 
| 1823 |                             if next_byte == '/':
 | 
| 1824 |                                 #log('next_byte %r', next_byte)
 | 
| 1825 |                                 pass
 | 
| 1826 | 
 | 
| 1827 |                         p_die('Literal $ should be quoted like \$',
 | 
| 1828 |                               self.cur_token)
 | 
| 1829 | 
 | 
| 1830 |                 done = self._MaybeReadWordPart(num_parts == 0, lex_mode,
 | 
| 1831 |                                                w.parts)
 | 
| 1832 | 
 | 
| 1833 |             elif self.token_kind == Kind.VSub:
 | 
| 1834 |                 vsub_token = self.cur_token
 | 
| 1835 | 
 | 
| 1836 |                 part = SimpleVarSub(vsub_token)  # type: word_part_t
 | 
| 1837 |                 w.parts.append(part)
 | 
| 1838 | 
 | 
| 1839 |             elif self.token_kind == Kind.ExtGlob:
 | 
| 1840 |                 # If parse_at, we can take over @( to start @(seq 3)
 | 
| 1841 |                 # Users can also use look at ,(*.py|*.sh)
 | 
| 1842 |                 if (self.parse_opts.parse_at() and
 | 
| 1843 |                         self.token_type == Id.ExtGlob_At and num_parts == 0):
 | 
| 1844 |                     cs_part = self._ReadCommandSub(Id.Left_AtParen,
 | 
| 1845 |                                                    d_quoted=False)
 | 
| 1846 |                     # RARE mutation of tok.id!
 | 
| 1847 |                     cs_part.left_token.id = Id.Left_AtParen
 | 
| 1848 |                     part = cs_part  # for type safety
 | 
| 1849 | 
 | 
| 1850 |                     # Same check as _MaybeReadWordPart.  @(seq 3)x is illegal, just like
 | 
| 1851 |                     # a=(one two)x and @arrayfunc(3)x.
 | 
| 1852 |                     self._GetToken()
 | 
| 1853 |                     if self.token_kind not in KINDS_THAT_END_WORDS:
 | 
| 1854 |                         p_die('Unexpected token after @()', self.cur_token)
 | 
| 1855 |                     done = True
 | 
| 1856 | 
 | 
| 1857 |                 else:
 | 
| 1858 |                     part = self._ReadExtGlob()
 | 
| 1859 |                 w.parts.append(part)
 | 
| 1860 | 
 | 
| 1861 |             elif self.token_kind == Kind.BashRegex:
 | 
| 1862 |                 if self.token_type == Id.BashRegex_LParen:  # Opening (
 | 
| 1863 |                     part = self._ReadBashRegexGroup()
 | 
| 1864 |                     w.parts.append(part)
 | 
| 1865 |                 else:
 | 
| 1866 |                     assert self.token_type == Id.BashRegex_AllowedInParens
 | 
| 1867 |                     p_die('Invalid token in bash regex', self.cur_token)
 | 
| 1868 | 
 | 
| 1869 |             elif self.token_kind == Kind.Left:
 | 
| 1870 |                 try_triple_quote = (self.parse_opts.parse_triple_quote() and
 | 
| 1871 |                                     lex_mode == lex_mode_e.ShCommand and
 | 
| 1872 |                                     num_parts == 0)
 | 
| 1873 | 
 | 
| 1874 |                 # Save allocation
 | 
| 1875 |                 if try_triple_quote:
 | 
| 1876 |                     is_triple_quoted = BoolParamBox(False)
 | 
| 1877 | 
 | 
| 1878 |                 part = self._ReadUnquotedLeftParts(is_triple_quoted)
 | 
| 1879 |                 w.parts.append(part)
 | 
| 1880 | 
 | 
| 1881 |             # NOT done yet, will advance below
 | 
| 1882 |             elif self.token_kind == Kind.Right:
 | 
| 1883 |                 # Still part of the word; will be done on the next iter.
 | 
| 1884 |                 if self.token_type == Id.Right_DoubleQuote:
 | 
| 1885 |                     pass
 | 
| 1886 |                 # Never happens, no PushHint for this case.
 | 
| 1887 |                 #elif self.token_type == Id.Right_DollarParen:
 | 
| 1888 |                 #  pass
 | 
| 1889 |                 elif self.token_type == Id.Right_Subshell:
 | 
| 1890 |                     # LEXER HACK for (case x in x) ;; esac )
 | 
| 1891 |                     # Rewind before it's used
 | 
| 1892 |                     assert self.next_lex_mode == lex_mode_e.Undefined
 | 
| 1893 |                     if self.lexer.MaybeUnreadOne():
 | 
| 1894 |                         self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
 | 
| 1895 |                         self._SetNext(lex_mode)
 | 
| 1896 |                     done = True
 | 
| 1897 |                 else:
 | 
| 1898 |                     done = True
 | 
| 1899 | 
 | 
| 1900 |             elif self.token_kind == Kind.Ignored:
 | 
| 1901 |                 done = True
 | 
| 1902 | 
 | 
| 1903 |             else:
 | 
| 1904 |                 # LEXER HACK for unbalanced case clause.  'case foo in esac' is valid,
 | 
| 1905 |                 # so to test for ESAC, we can read ) before getting a chance to
 | 
| 1906 |                 # PushHint(Id.Op_RParen, Id.Right_CasePat).  So here we unread one
 | 
| 1907 |                 # token and do it again.
 | 
| 1908 | 
 | 
| 1909 |                 # We get Id.Op_RParen at top level:      case x in x) ;; esac
 | 
| 1910 |                 # We get Id.Eof_RParen inside ComSub:  $(case x in x) ;; esac )
 | 
| 1911 |                 if self.token_type in (Id.Op_RParen, Id.Eof_RParen):
 | 
| 1912 |                     # Rewind before it's used
 | 
| 1913 |                     assert self.next_lex_mode == lex_mode_e.Undefined
 | 
| 1914 |                     if self.lexer.MaybeUnreadOne():
 | 
| 1915 |                         if self.token_type == Id.Eof_RParen:
 | 
| 1916 |                             # Redo translation
 | 
| 1917 |                             self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen)
 | 
| 1918 |                         self._SetNext(lex_mode)
 | 
| 1919 | 
 | 
| 1920 |                 done = True  # anything we don't recognize means we're done
 | 
| 1921 | 
 | 
| 1922 |             if not done:
 | 
| 1923 |                 self._SetNext(lex_mode)
 | 
| 1924 |                 num_parts += 1
 | 
| 1925 | 
 | 
| 1926 |         if (self.parse_opts.parse_brace() and num_parts > 1 and
 | 
| 1927 |                 brace_count != 0):
 | 
| 1928 |             # accept { and }, but not foo{
 | 
| 1929 |             p_die(
 | 
| 1930 |                 'Word has unbalanced { }.  Maybe add a space or quote it like \{',
 | 
| 1931 |                 loc.Word(w))
 | 
| 1932 | 
 | 
| 1933 |         if is_triple_quoted and is_triple_quoted.b and num_parts > 1:
 | 
| 1934 |             p_die('Unexpected parts after triple quoted string',
 | 
| 1935 |                   loc.WordPart(w.parts[-1]))
 | 
| 1936 | 
 | 
| 1937 |         if 0:
 | 
| 1938 |             from _devbuild.gen.syntax_asdl import word_part_str
 | 
| 1939 |             word_key = ' '.join(word_part_str(p.tag()) for p in w.parts)
 | 
| 1940 |             WORD_HIST[word_key] += 1
 | 
| 1941 |         return w
 | 
| 1942 | 
 | 
| 1943 |     def _ReadArithWord(self):
 | 
| 1944 |         # type: () -> Optional[word_t]
 | 
| 1945 |         """ Helper for ReadArithWord() """
 | 
| 1946 |         self._GetToken()
 | 
| 1947 | 
 | 
| 1948 |         if self.token_kind == Kind.Unknown:
 | 
| 1949 |             # e.g. happened during dynamic parsing of unset 'a[$foo]' in gherkin
 | 
| 1950 |             p_die(
 | 
| 1951 |                 'Unexpected token while parsing arithmetic: %r' %
 | 
| 1952 |                 lexer.TokenVal(self.cur_token), self.cur_token)
 | 
| 1953 | 
 | 
| 1954 |         elif self.token_kind == Kind.Eof:
 | 
| 1955 |             return self.cur_token
 | 
| 1956 | 
 | 
| 1957 |         elif self.token_kind == Kind.Ignored:
 | 
| 1958 |             # Space should be ignored.
 | 
| 1959 |             self._SetNext(lex_mode_e.Arith)
 | 
| 1960 |             return None
 | 
| 1961 | 
 | 
| 1962 |         elif self.token_kind in (Kind.Arith, Kind.Right):
 | 
| 1963 |             # Id.Right_DollarDParen IS just a normal token, handled by ArithParser
 | 
| 1964 |             self._SetNext(lex_mode_e.Arith)
 | 
| 1965 |             return self.cur_token
 | 
| 1966 | 
 | 
| 1967 |         elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub):
 | 
| 1968 |             return self._ReadCompoundWord(lex_mode_e.Arith)
 | 
| 1969 | 
 | 
| 1970 |         else:
 | 
| 1971 |             raise AssertionError(self.cur_token)
 | 
| 1972 | 
 | 
| 1973 |     def _ReadWord(self, word_mode):
 | 
| 1974 |         # type: (lex_mode_t) -> Optional[word_t]
 | 
| 1975 |         """Helper function for ReadWord()."""
 | 
| 1976 | 
 | 
| 1977 |         # Change the pseudo lexer mode to a real lexer mode
 | 
| 1978 |         if word_mode == lex_mode_e.ShCommandFakeBrack:
 | 
| 1979 |             lex_mode = lex_mode_e.ShCommand
 | 
| 1980 |         else:
 | 
| 1981 |             lex_mode = word_mode
 | 
| 1982 | 
 | 
| 1983 |         self._GetToken()
 | 
| 1984 | 
 | 
| 1985 |         if self.token_kind == Kind.Eof:
 | 
| 1986 |             # No advance
 | 
| 1987 |             return self.cur_token
 | 
| 1988 | 
 | 
| 1989 |         # Allow Arith for ) at end of for loop?
 | 
| 1990 |         elif self.token_kind in (Kind.Op, Kind.Redir, Kind.Arith):
 | 
| 1991 |             self._SetNext(lex_mode)
 | 
| 1992 | 
 | 
| 1993 |             # Newlines are complicated.  See 3x2 matrix in the comment about
 | 
| 1994 |             # self.multiline and self.newline_state above.
 | 
| 1995 |             if self.token_type == Id.Op_Newline:
 | 
| 1996 |                 if self.multiline:
 | 
| 1997 |                     if self.newline_state > 1:
 | 
| 1998 |                         # This points at a blank line, but at least it gives the line number
 | 
| 1999 |                         p_die('Invalid blank line in multiline mode',
 | 
| 2000 |                               self.cur_token)
 | 
| 2001 |                     return None
 | 
| 2002 | 
 | 
| 2003 |                 if self.returned_newline:  # skip
 | 
| 2004 |                     return None
 | 
| 2005 | 
 | 
| 2006 |             return self.cur_token
 | 
| 2007 | 
 | 
| 2008 |         elif self.token_kind == Kind.Right:
 | 
| 2009 |             if self.token_type not in (Id.Right_Subshell, Id.Right_ShFunction,
 | 
| 2010 |                                        Id.Right_CasePat,
 | 
| 2011 |                                        Id.Right_ShArrayLiteral):
 | 
| 2012 |                 raise AssertionError(self.cur_token)
 | 
| 2013 | 
 | 
| 2014 |             self._SetNext(lex_mode)
 | 
| 2015 |             return self.cur_token
 | 
| 2016 | 
 | 
| 2017 |         elif self.token_kind in (Kind.Ignored, Kind.WS):
 | 
| 2018 |             self._SetNext(lex_mode)
 | 
| 2019 |             return None
 | 
| 2020 | 
 | 
| 2021 |         else:
 | 
| 2022 |             assert self.token_kind in (Kind.VSub, Kind.Lit, Kind.History,
 | 
| 2023 |                                        Kind.Left, Kind.KW, Kind.ControlFlow,
 | 
| 2024 |                                        Kind.BoolUnary, Kind.BoolBinary,
 | 
| 2025 |                                        Kind.ExtGlob,
 | 
| 2026 |                                        Kind.BashRegex), 'Unhandled token kind'
 | 
| 2027 | 
 | 
| 2028 |             if (word_mode == lex_mode_e.ShCommandFakeBrack and
 | 
| 2029 |                     self.parse_opts.parse_bracket() and
 | 
| 2030 |                     self.token_type == Id.Lit_LBracket):
 | 
| 2031 |                 # Change [ from Kind.Lit -> Kind.Op
 | 
| 2032 |                 # So CommandParser can treat
 | 
| 2033 |                 #   assert [42 === x]
 | 
| 2034 |                 # like
 | 
| 2035 |                 #   json write (x)
 | 
| 2036 |                 bracket_word = self.cur_token
 | 
| 2037 |                 bracket_word.id = Id.Op_LBracket
 | 
| 2038 | 
 | 
| 2039 |                 self._SetNext(lex_mode)
 | 
| 2040 |                 return bracket_word
 | 
| 2041 | 
 | 
| 2042 |             # We're beginning a word.  If we see Id.Lit_Pound, change to
 | 
| 2043 |             # lex_mode_e.Comment and read until end of line.
 | 
| 2044 |             if self.token_type == Id.Lit_Pound:
 | 
| 2045 |                 self._SetNext(lex_mode_e.Comment)
 | 
| 2046 |                 self._GetToken()
 | 
| 2047 | 
 | 
| 2048 |                 # NOTE: The # could be the last character in the file.  It can't be
 | 
| 2049 |                 # Eof_{RParen,Backtick} because #) and #` are comments.
 | 
| 2050 |                 assert self.token_type in (Id.Ignored_Comment, Id.Eof_Real), \
 | 
| 2051 |                     self.cur_token
 | 
| 2052 | 
 | 
| 2053 |                 # The next iteration will go into Kind.Ignored and set lex state to
 | 
| 2054 |                 # lex_mode_e.ShCommand/etc.
 | 
| 2055 |                 return None  # tell ReadWord() to try again after comment
 | 
| 2056 | 
 | 
| 2057 |             elif self.token_type == Id.Lit_TPound:  ### doc comment
 | 
| 2058 |                 self._SetNext(lex_mode_e.Comment)
 | 
| 2059 |                 self._GetToken()
 | 
| 2060 | 
 | 
| 2061 |                 if self.token_type == Id.Ignored_Comment and self.emit_doc_token:
 | 
| 2062 |                     return self.cur_token
 | 
| 2063 | 
 | 
| 2064 |                 return None  # tell ReadWord() to try again after comment
 | 
| 2065 | 
 | 
| 2066 |             else:
 | 
| 2067 |                 # r'' u'' b''
 | 
| 2068 |                 if (self.token_type == Id.Lit_Chars and
 | 
| 2069 |                         self.lexer.LookAheadOne(
 | 
| 2070 |                             lex_mode_e.ShCommand) == Id.Left_SingleQuote):
 | 
| 2071 | 
 | 
| 2072 |                     # When shopt -s parse_raw_string:
 | 
| 2073 |                     #     echo r'hi' is like echo 'hi'
 | 
| 2074 |                     #
 | 
| 2075 |                     #     echo u'\u{3bc}' b'\yff' works
 | 
| 2076 | 
 | 
| 2077 |                     tok = self.cur_token
 | 
| 2078 |                     if self.parse_opts.parse_ysh_string():
 | 
| 2079 |                         if lexer.TokenEquals(tok, 'r'):
 | 
| 2080 |                             left_id = Id.Left_RSingleQuote
 | 
| 2081 |                         elif lexer.TokenEquals(tok, 'u'):
 | 
| 2082 |                             left_id = Id.Left_USingleQuote
 | 
| 2083 |                         elif lexer.TokenEquals(tok, 'b'):
 | 
| 2084 |                             left_id = Id.Left_BSingleQuote
 | 
| 2085 |                         else:
 | 
| 2086 |                             left_id = Id.Undefined_Tok
 | 
| 2087 | 
 | 
| 2088 |                         if left_id != Id.Undefined_Tok:
 | 
| 2089 |                             # skip the r, and then 'foo' will be read as normal
 | 
| 2090 |                             self._SetNext(lex_mode_e.ShCommand)
 | 
| 2091 | 
 | 
| 2092 |                             self._GetToken()
 | 
| 2093 |                             assert self.token_type == Id.Left_SingleQuote, self.token_type
 | 
| 2094 | 
 | 
| 2095 |                             # Read the word in a different lexer mode
 | 
| 2096 |                             return self._ReadYshSingleQuoted(left_id)
 | 
| 2097 | 
 | 
| 2098 |                 return self._ReadCompoundWord(lex_mode)
 | 
| 2099 | 
 | 
| 2100 |     def ParseVarRef(self):
 | 
| 2101 |         # type: () -> BracedVarSub
 | 
| 2102 |         """DYNAMIC parsing of what's inside ${!ref}
 | 
| 2103 | 
 | 
| 2104 |         # Same as VarOf production
 | 
| 2105 |         VarRefExpr = VarOf EOF
 | 
| 2106 |         """
 | 
| 2107 |         self._SetNext(lex_mode_e.VSub_1)
 | 
| 2108 | 
 | 
| 2109 |         self._GetToken()
 | 
| 2110 |         if self.token_kind != Kind.VSub:
 | 
| 2111 |             p_die('Expected var name', self.cur_token)
 | 
| 2112 | 
 | 
| 2113 |         part = self._ParseVarOf()
 | 
| 2114 |         # NOTE: no ${ } means no part.left and part.right
 | 
| 2115 |         part.left = part.token  # cheat to make test pass
 | 
| 2116 |         part.right = part.token
 | 
| 2117 | 
 | 
| 2118 |         self._GetToken()
 | 
| 2119 |         if self.token_type != Id.Eof_Real:
 | 
| 2120 |             p_die('Expected end of var ref expression', self.cur_token)
 | 
| 2121 |         return part
 | 
| 2122 | 
 | 
| 2123 |     def LookPastSpace(self):
 | 
| 2124 |         # type: () -> Id_t
 | 
| 2125 |         """Look ahead to the next token.
 | 
| 2126 | 
 | 
| 2127 |         For the CommandParser to recognize
 | 
| 2128 |            array= (1 2 3)
 | 
| 2129 |            YSH for (  versus  bash for ((
 | 
| 2130 |            YSH if (  versus  if test
 | 
| 2131 |            YSH while (  versus  while test
 | 
| 2132 |            YSH bare assignment 'grep ='  versus 'grep foo'
 | 
| 2133 |         """
 | 
| 2134 |         assert self.token_type != Id.Undefined_Tok
 | 
| 2135 |         if self.cur_token.id == Id.WS_Space:
 | 
| 2136 |             id_ = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
 | 
| 2137 |         else:
 | 
| 2138 |             id_ = self.cur_token.id
 | 
| 2139 |         return id_
 | 
| 2140 | 
 | 
| 2141 |     def LookAheadFuncParens(self):
 | 
| 2142 |         # type: () -> bool
 | 
| 2143 |         """Special lookahead for f( ) { echo hi; } to check for ( )"""
 | 
| 2144 |         assert self.token_type != Id.Undefined_Tok
 | 
| 2145 | 
 | 
| 2146 |         # We have to handle 2 cases because we buffer a token
 | 
| 2147 |         if self.cur_token.id == Id.Op_LParen:  # saw funcname(
 | 
| 2148 |             return self.lexer.LookAheadFuncParens(1)  # go back one char
 | 
| 2149 | 
 | 
| 2150 |         elif self.cur_token.id == Id.WS_Space:  # saw funcname WHITESPACE
 | 
| 2151 |             return self.lexer.LookAheadFuncParens(0)
 | 
| 2152 | 
 | 
| 2153 |         else:
 | 
| 2154 |             return False
 | 
| 2155 | 
 | 
| 2156 |     def ReadWord(self, word_mode):
 | 
| 2157 |         # type: (lex_mode_t) -> word_t
 | 
| 2158 |         """Read the next word, using the given lexer mode.
 | 
| 2159 | 
 | 
| 2160 |         This is a stateful wrapper for the stateless _ReadWord function.
 | 
| 2161 |         """
 | 
| 2162 |         assert word_mode in (lex_mode_e.ShCommand,
 | 
| 2163 |                              lex_mode_e.ShCommandFakeBrack,
 | 
| 2164 |                              lex_mode_e.DBracket, lex_mode_e.BashRegex)
 | 
| 2165 | 
 | 
| 2166 |         if self.buffered_word:  # For integration with pgen2
 | 
| 2167 |             w = self.buffered_word
 | 
| 2168 |             self.buffered_word = None
 | 
| 2169 |         else:
 | 
| 2170 |             while True:
 | 
| 2171 |                 w = self._ReadWord(word_mode)
 | 
| 2172 |                 if w is not None:
 | 
| 2173 |                     break
 | 
| 2174 | 
 | 
| 2175 |         self.returned_newline = (word_.CommandId(w) == Id.Op_Newline)
 | 
| 2176 |         return w
 | 
| 2177 | 
 | 
| 2178 |     def ReadArithWord(self):
 | 
| 2179 |         # type: () -> word_t
 | 
| 2180 |         while True:
 | 
| 2181 |             w = self._ReadArithWord()
 | 
| 2182 |             if w is not None:
 | 
| 2183 |                 break
 | 
| 2184 |         return w
 | 
| 2185 | 
 | 
| 2186 |     def ReadHereDocBody(self, parts):
 | 
| 2187 |         # type: (List[word_part_t]) -> None
 | 
| 2188 |         """
 | 
| 2189 |         A here doc is like a double quoted context, except " isn't special.
 | 
| 2190 |         """
 | 
| 2191 |         self._ReadLikeDQ(None, False, parts)
 | 
| 2192 |         # Returns nothing
 | 
| 2193 | 
 | 
| 2194 |     def ReadForPlugin(self):
 | 
| 2195 |         # type: () -> CompoundWord
 | 
| 2196 |         """For $PS1, $PS4, etc.
 | 
| 2197 | 
 | 
| 2198 |         This is just like reading a here doc line.  "\n" is allowed, as
 | 
| 2199 |         well as the typical substitutions ${x} $(echo hi) $((1 + 2)).
 | 
| 2200 |         """
 | 
| 2201 |         w = CompoundWord([])
 | 
| 2202 |         self._ReadLikeDQ(None, False, w.parts)
 | 
| 2203 |         return w
 | 
| 2204 | 
 | 
| 2205 |     def EmitDocToken(self, b):
 | 
| 2206 |         # type: (bool) -> None
 | 
| 2207 |         self.emit_doc_token = b
 | 
| 2208 | 
 | 
| 2209 |     def Multiline(self, b):
 | 
| 2210 |         # type: (bool) -> None
 | 
| 2211 |         self.multiline = b
 | 
| 2212 | 
 | 
| 2213 | 
 | 
| 2214 | if 0:
 | 
| 2215 |     import collections
 | 
| 2216 |     WORD_HIST = collections.Counter()
 | 
| 2217 | 
 | 
| 2218 | # vim: sw=4
 |