| 1 | # Copyright 2016 Andy Chu. All rights reserved.
|
| 2 | # Licensed under the Apache License, Version 2.0 (the "License");
|
| 3 | # you may not use this file except in compliance with the License.
|
| 4 | # You may obtain a copy of the License at
|
| 5 | #
|
| 6 | # http://www.apache.org/licenses/LICENSE-2.0
|
| 7 | """
|
| 8 | cmd_parse.py - Parse high level shell commands.
|
| 9 | """
|
| 10 | from __future__ import print_function
|
| 11 |
|
| 12 | from _devbuild.gen import grammar_nt
|
| 13 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
|
| 14 | from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
|
| 15 | from _devbuild.gen.syntax_asdl import (
|
| 16 | loc,
|
| 17 | SourceLine,
|
| 18 | source,
|
| 19 | parse_result,
|
| 20 | parse_result_t,
|
| 21 | command,
|
| 22 | command_t,
|
| 23 | condition,
|
| 24 | condition_t,
|
| 25 | for_iter,
|
| 26 | ArgList,
|
| 27 | BraceGroup,
|
| 28 | LiteralBlock,
|
| 29 | CaseArm,
|
| 30 | case_arg,
|
| 31 | IfArm,
|
| 32 | pat,
|
| 33 | pat_t,
|
| 34 | Redir,
|
| 35 | redir_param,
|
| 36 | redir_loc,
|
| 37 | redir_loc_t,
|
| 38 | word_e,
|
| 39 | word_t,
|
| 40 | CompoundWord,
|
| 41 | Token,
|
| 42 | word_part_e,
|
| 43 | word_part_t,
|
| 44 | rhs_word,
|
| 45 | rhs_word_t,
|
| 46 | sh_lhs,
|
| 47 | sh_lhs_t,
|
| 48 | AssignPair,
|
| 49 | EnvPair,
|
| 50 | ParsedAssignment,
|
| 51 | assign_op_e,
|
| 52 | NameType,
|
| 53 | proc_sig,
|
| 54 | proc_sig_e,
|
| 55 | Proc,
|
| 56 | Func,
|
| 57 | )
|
| 58 | from core import alloc
|
| 59 | from core import error
|
| 60 | from core.error import p_die
|
| 61 | from core import ui
|
| 62 | from frontend import consts
|
| 63 | from frontend import lexer
|
| 64 | from frontend import location
|
| 65 | from frontend import match
|
| 66 | from frontend import reader
|
| 67 | from mycpp.mylib import log
|
| 68 | from osh import braces
|
| 69 | from osh import bool_parse
|
| 70 | from osh import word_
|
| 71 |
|
| 72 | from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
|
| 73 | if TYPE_CHECKING:
|
| 74 | from core.alloc import Arena
|
| 75 | from core import optview
|
| 76 | from frontend.lexer import Lexer
|
| 77 | from frontend.parse_lib import ParseContext, AliasesInFlight
|
| 78 | from frontend.reader import _Reader
|
| 79 | from osh.word_parse import WordParser
|
| 80 |
|
| 81 | _ = Kind_str # for debug prints
|
| 82 |
|
| 83 | TAB_CH = 9 # ord('\t')
|
| 84 | SPACE_CH = 32 # ord(' ')
|
| 85 |
|
| 86 |
|
| 87 | def _ReadHereLines(
|
| 88 | line_reader, # type: _Reader
|
| 89 | h, # type: Redir
|
| 90 | delimiter, # type: str
|
| 91 | ):
|
| 92 | # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
|
| 93 | # NOTE: We read all lines at once, instead of parsing line-by-line,
|
| 94 | # because of cases like this:
|
| 95 | # cat <<EOF
|
| 96 | # 1 $(echo 2
|
| 97 | # echo 3) 4
|
| 98 | # EOF
|
| 99 | here_lines = [] # type: List[Tuple[SourceLine, int]]
|
| 100 | last_line = None # type: Tuple[SourceLine, int]
|
| 101 | strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
|
| 102 |
|
| 103 | while True:
|
| 104 | src_line, unused_offset = line_reader.GetLine()
|
| 105 |
|
| 106 | if src_line is None: # EOF
|
| 107 | # An unterminated here doc is just a warning in bash. We make it
|
| 108 | # fatal because we want to be strict, and because it causes problems
|
| 109 | # reporting other errors.
|
| 110 | # Attribute it to the << in <<EOF for now.
|
| 111 | p_die("Couldn't find terminator for here doc that starts here",
|
| 112 | h.op)
|
| 113 |
|
| 114 | assert len(src_line.content) != 0 # None should be the empty line
|
| 115 |
|
| 116 | line = src_line.content
|
| 117 |
|
| 118 | # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
|
| 119 | # the first tab.
|
| 120 | start_offset = 0
|
| 121 | if strip_leading_tabs:
|
| 122 | n = len(line)
|
| 123 | i = 0 # used after loop exit
|
| 124 | while i < n:
|
| 125 | if line[i] != '\t':
|
| 126 | break
|
| 127 | i += 1
|
| 128 | start_offset = i
|
| 129 |
|
| 130 | if line[start_offset:].rstrip() == delimiter:
|
| 131 | last_line = (src_line, start_offset)
|
| 132 | break
|
| 133 |
|
| 134 | here_lines.append((src_line, start_offset))
|
| 135 |
|
| 136 | return here_lines, last_line
|
| 137 |
|
| 138 |
|
| 139 | def _MakeLiteralHereLines(
|
| 140 | here_lines, # type: List[Tuple[SourceLine, int]]
|
| 141 | arena, # type: Arena
|
| 142 | do_lossless, # type: bool
|
| 143 | ):
|
| 144 | # type: (...) -> List[word_part_t]
|
| 145 | """Create a Token for each line.
|
| 146 |
|
| 147 | For <<'EOF' and <<-'EOF' - single quoted rule
|
| 148 |
|
| 149 | <<- has non-zero start_offset
|
| 150 | """
|
| 151 | # less precise type, because List[T] is an invariant type
|
| 152 | tokens = [] # type: List[word_part_t]
|
| 153 | for src_line, start_offset in here_lines:
|
| 154 |
|
| 155 | # Maintain lossless invariant for STRIPPED tabs: add a Token to the
|
| 156 | # arena invariant, but don't refer to it.
|
| 157 | #
|
| 158 | # Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
|
| 159 | # here docs, but it's more complex with double quoted EOF docs.
|
| 160 |
|
| 161 | if do_lossless: # avoid garbage, doesn't affect correctness
|
| 162 | arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
|
| 163 | src_line)
|
| 164 |
|
| 165 | t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
|
| 166 | src_line)
|
| 167 | tokens.append(t)
|
| 168 | return tokens
|
| 169 |
|
| 170 |
|
| 171 | def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
|
| 172 | # type: (ParseContext, Redir, _Reader, Arena) -> None
|
| 173 | """Fill in attributes of a pending here doc node."""
|
| 174 | h = cast(redir_param.HereDoc, r.arg)
|
| 175 | # "If any character in word is quoted, the delimiter shall be formed by
|
| 176 | # performing quote removal on word, and the here-document lines shall not
|
| 177 | # be expanded. Otherwise, the delimiter shall be the word itself."
|
| 178 | # NOTE: \EOF counts, or even E\OF
|
| 179 | ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
|
| 180 | if not ok:
|
| 181 | p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
|
| 182 |
|
| 183 | here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
|
| 184 |
|
| 185 | if delim_quoted:
|
| 186 | # <<'EOF' and <<-'EOF' - Literal for each line.
|
| 187 | h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
|
| 188 | parse_ctx.do_lossless)
|
| 189 | else:
|
| 190 | # <<EOF and <<-EOF - Parse as word
|
| 191 | line_reader = reader.VirtualLineReader(arena, here_lines,
|
| 192 | parse_ctx.do_lossless)
|
| 193 | w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
|
| 194 | w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
|
| 195 |
|
| 196 | end_line, start_offset = last_line
|
| 197 |
|
| 198 | # Maintain lossless invariant for STRIPPED tabs: add a Token to the
|
| 199 | # arena invariant, but don't refer to it.
|
| 200 | if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
|
| 201 | arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
|
| 202 |
|
| 203 | # Create a Token with the end terminator. Maintains the invariant that the
|
| 204 | # tokens "add up".
|
| 205 | h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
|
| 206 | len(end_line.content), end_line)
|
| 207 |
|
| 208 |
|
| 209 | def _MakeAssignPair(parse_ctx, preparsed, arena):
|
| 210 | # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
|
| 211 | """Create an AssignPair from a 4-tuples from DetectShAssignment."""
|
| 212 |
|
| 213 | left_token = preparsed.left
|
| 214 | close_token = preparsed.close
|
| 215 |
|
| 216 | lhs = None # type: sh_lhs_t
|
| 217 |
|
| 218 | if left_token.id == Id.Lit_VarLike: # s=1
|
| 219 | if lexer.IsPlusEquals(left_token):
|
| 220 | var_name = lexer.TokenSliceRight(left_token, -2)
|
| 221 | op = assign_op_e.PlusEqual
|
| 222 | else:
|
| 223 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 224 | op = assign_op_e.Equal
|
| 225 |
|
| 226 | lhs = sh_lhs.Name(left_token, var_name)
|
| 227 |
|
| 228 | elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
|
| 229 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 230 | if lexer.IsPlusEquals(close_token):
|
| 231 | op = assign_op_e.PlusEqual
|
| 232 | else:
|
| 233 | op = assign_op_e.Equal
|
| 234 |
|
| 235 | assert left_token.line == close_token.line, \
|
| 236 | '%s and %s not on same line' % (left_token, close_token)
|
| 237 |
|
| 238 | left_pos = left_token.col + left_token.length
|
| 239 | index_str = left_token.line.content[left_pos:close_token.col]
|
| 240 | lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
|
| 241 |
|
| 242 | elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
|
| 243 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 244 | if lexer.IsPlusEquals(close_token):
|
| 245 | op = assign_op_e.PlusEqual
|
| 246 | else:
|
| 247 | op = assign_op_e.Equal
|
| 248 |
|
| 249 | # Similar to SnipCodeString / SnipCodeBlock
|
| 250 | if left_token.line == close_token.line:
|
| 251 | # extract what's between brackets
|
| 252 | s = left_token.col + left_token.length
|
| 253 | code_str = left_token.line.content[s:close_token.col]
|
| 254 | else:
|
| 255 | raise NotImplementedError('%s != %s' %
|
| 256 | (left_token.line, close_token.line))
|
| 257 | a_parser = parse_ctx.MakeArithParser(code_str)
|
| 258 |
|
| 259 | # a[i+1]= is a LHS
|
| 260 | src = source.Reparsed('array LHS', left_token, close_token)
|
| 261 | with alloc.ctx_SourceCode(arena, src):
|
| 262 | index_node = a_parser.Parse() # may raise error.Parse
|
| 263 |
|
| 264 | lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
|
| 265 |
|
| 266 | else:
|
| 267 | raise AssertionError()
|
| 268 |
|
| 269 | # TODO: Should we also create a rhs_expr.ArrayLiteral here?
|
| 270 | parts = preparsed.w.parts
|
| 271 | offset = preparsed.part_offset
|
| 272 |
|
| 273 | n = len(parts)
|
| 274 | if offset == n:
|
| 275 | rhs = rhs_word.Empty # type: rhs_word_t
|
| 276 | else:
|
| 277 | w = CompoundWord(parts[offset:])
|
| 278 | word_.TildeDetectAssign(w)
|
| 279 | rhs = w
|
| 280 |
|
| 281 | return AssignPair(left_token, lhs, op, rhs)
|
| 282 |
|
| 283 |
|
| 284 | def _AppendMoreEnv(preparsed_list, more_env):
|
| 285 | # type: (List[ParsedAssignment], List[EnvPair]) -> None
|
| 286 | """Helper to modify a SimpleCommand node.
|
| 287 |
|
| 288 | Args:
|
| 289 | preparsed: a list of 4-tuples from DetectShAssignment
|
| 290 | more_env: a list to append env_pairs to
|
| 291 | """
|
| 292 | for preparsed in preparsed_list:
|
| 293 | left_token = preparsed.left
|
| 294 |
|
| 295 | if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
|
| 296 | p_die(
|
| 297 | "Environment binding shouldn't look like an array assignment",
|
| 298 | left_token)
|
| 299 |
|
| 300 | if lexer.IsPlusEquals(left_token):
|
| 301 | p_die('Expected = in environment binding, got +=', left_token)
|
| 302 |
|
| 303 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 304 |
|
| 305 | parts = preparsed.w.parts
|
| 306 | n = len(parts)
|
| 307 | offset = preparsed.part_offset
|
| 308 | if offset == n:
|
| 309 | rhs = rhs_word.Empty # type: rhs_word_t
|
| 310 | else:
|
| 311 | w = CompoundWord(parts[offset:])
|
| 312 | word_.TildeDetectAssign(w)
|
| 313 | rhs = w
|
| 314 |
|
| 315 | more_env.append(EnvPair(left_token, var_name, rhs))
|
| 316 |
|
| 317 |
|
| 318 | def _SplitSimpleCommandPrefix(words):
|
| 319 | # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
|
| 320 | """Second pass of SimpleCommand parsing: look for assignment words."""
|
| 321 | preparsed_list = [] # type: List[ParsedAssignment]
|
| 322 | suffix_words = [] # type: List[CompoundWord]
|
| 323 |
|
| 324 | done_prefix = False
|
| 325 | for w in words:
|
| 326 | if done_prefix:
|
| 327 | suffix_words.append(w)
|
| 328 | continue
|
| 329 |
|
| 330 | left_token, close_token, part_offset = word_.DetectShAssignment(w)
|
| 331 | if left_token:
|
| 332 | preparsed_list.append(
|
| 333 | ParsedAssignment(left_token, close_token, part_offset, w))
|
| 334 | else:
|
| 335 | done_prefix = True
|
| 336 | suffix_words.append(w)
|
| 337 |
|
| 338 | return preparsed_list, suffix_words
|
| 339 |
|
| 340 |
|
| 341 | def _MakeSimpleCommand(
|
| 342 | preparsed_list, # type: List[ParsedAssignment]
|
| 343 | suffix_words, # type: List[CompoundWord]
|
| 344 | typed_args, # type: Optional[ArgList]
|
| 345 | block, # type: Optional[LiteralBlock]
|
| 346 | ):
|
| 347 | # type: (...) -> command.Simple
|
| 348 | """Create a command.Simple"""
|
| 349 |
|
| 350 | # FOO=(1 2 3) ls is not allowed.
|
| 351 | for preparsed in preparsed_list:
|
| 352 | if word_.HasArrayPart(preparsed.w):
|
| 353 | p_die("Environment bindings can't contain array literals",
|
| 354 | loc.Word(preparsed.w))
|
| 355 |
|
| 356 | # NOTE: It would be possible to add this check back. But it already happens
|
| 357 | # at runtime in EvalWordSequence2.
|
| 358 | # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
|
| 359 | if 0:
|
| 360 | for w in suffix_words:
|
| 361 | if word_.HasArrayPart(w):
|
| 362 | p_die("Commands can't contain array literals", loc.Word(w))
|
| 363 |
|
| 364 | assert len(suffix_words) != 0
|
| 365 | # {a,b,c} # Use { before brace detection
|
| 366 | # ~/bin/ls # Use ~ before tilde detection
|
| 367 | part0 = suffix_words[0].parts[0]
|
| 368 | blame_tok = location.LeftTokenForWordPart(part0)
|
| 369 |
|
| 370 | # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
|
| 371 | # can't implement bash's behavior of having say {~bob,~jane}/src work,
|
| 372 | # because we only have a BracedTree.
|
| 373 | # This is documented in spec/brace-expansion.
|
| 374 | # NOTE: Technically we could do expansion outside of 'oshc translate', but it
|
| 375 | # doesn't seem worth it.
|
| 376 | words2 = braces.BraceDetectAll(suffix_words)
|
| 377 | words3 = word_.TildeDetectAll(words2)
|
| 378 |
|
| 379 | more_env = [] # type: List[EnvPair]
|
| 380 | _AppendMoreEnv(preparsed_list, more_env)
|
| 381 |
|
| 382 | # do_fork by default
|
| 383 | return command.Simple(blame_tok, more_env, words3, typed_args, block, True)
|
| 384 |
|
| 385 |
|
| 386 | class VarChecker(object):
|
| 387 | """Statically check for proc and variable usage errors."""
|
| 388 |
|
| 389 | def __init__(self):
|
| 390 | # type: () -> None
|
| 391 | """
|
| 392 | Args:
|
| 393 | oil_proc: Whether to disallow nested proc/function declarations
|
| 394 | """
|
| 395 | # self.tokens for location info: 'proc' or another token
|
| 396 | self.tokens = [] # type: List[Token]
|
| 397 | self.names = [] # type: List[Dict[str, Id_t]]
|
| 398 |
|
| 399 | def Push(self, blame_tok):
|
| 400 | # type: (Token) -> None
|
| 401 | """Called when we enter a shell function, proc, or func.
|
| 402 |
|
| 403 | Bash allows this, but it's confusing because it's the same as two
|
| 404 | functions at the top level.
|
| 405 |
|
| 406 | f() {
|
| 407 | g() {
|
| 408 | echo 'top level function defined in another one'
|
| 409 | }
|
| 410 | }
|
| 411 |
|
| 412 | YSH disallows nested procs and funcs.
|
| 413 | """
|
| 414 | if len(self.tokens) != 0:
|
| 415 | if blame_tok.id == Id.KW_Proc:
|
| 416 | p_die("procs must be defined at the top level", blame_tok)
|
| 417 | if blame_tok.id == Id.KW_Func:
|
| 418 | p_die("funcs must be defined at the top level", blame_tok)
|
| 419 | if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
|
| 420 | p_die("shell functions can't be defined inside proc or func",
|
| 421 | blame_tok)
|
| 422 |
|
| 423 | self.tokens.append(blame_tok)
|
| 424 | entry = {} # type: Dict[str, Id_t]
|
| 425 | self.names.append(entry)
|
| 426 |
|
| 427 | def Pop(self):
|
| 428 | # type: () -> None
|
| 429 | self.names.pop()
|
| 430 | self.tokens.pop()
|
| 431 |
|
| 432 | def Check(self, keyword_id, var_name, blame_tok):
|
| 433 | # type: (Id_t, str, Token) -> None
|
| 434 | """Check for declaration / mutation errors in proc and func.
|
| 435 |
|
| 436 | var x
|
| 437 | x already declared
|
| 438 | setvar x:
|
| 439 | x is not declared
|
| 440 | setglobal x:
|
| 441 | No errors are possible; we would need all these many conditions to
|
| 442 | statically know the names:
|
| 443 | - no 'source'
|
| 444 | - shopt -u copy_env.
|
| 445 | - AND use lib has to be static
|
| 446 |
|
| 447 | What about bare assignment in Hay? I think these are dynamic checks --
|
| 448 | there is no static check. Hay is for building up data imperatively,
|
| 449 | and then LATER, right before main(), it can be type checked.
|
| 450 |
|
| 451 | Package {
|
| 452 | version = '3.11'
|
| 453 | version = '3.12'
|
| 454 | }
|
| 455 | """
|
| 456 | # No static checks are the global level! Because of 'source', var and
|
| 457 | # setvar are essentially the same.
|
| 458 | if len(self.names) == 0:
|
| 459 | return
|
| 460 |
|
| 461 | top = self.names[-1]
|
| 462 | if keyword_id == Id.KW_Var:
|
| 463 | if var_name in top:
|
| 464 | p_die('%r was already declared' % var_name, blame_tok)
|
| 465 | else:
|
| 466 | top[var_name] = keyword_id
|
| 467 |
|
| 468 | if keyword_id == Id.KW_SetVar:
|
| 469 | if var_name not in top:
|
| 470 | # Note: the solution could be setglobal, etc.
|
| 471 | p_die(
|
| 472 | "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
|
| 473 | var_name, blame_tok)
|
| 474 |
|
| 475 |
|
| 476 | class ctx_VarChecker(object):
|
| 477 |
|
| 478 | def __init__(self, var_checker, blame_tok):
|
| 479 | # type: (VarChecker, Token) -> None
|
| 480 | var_checker.Push(blame_tok)
|
| 481 | self.var_checker = var_checker
|
| 482 |
|
| 483 | def __enter__(self):
|
| 484 | # type: () -> None
|
| 485 | pass
|
| 486 |
|
| 487 | def __exit__(self, type, value, traceback):
|
| 488 | # type: (Any, Any, Any) -> None
|
| 489 | self.var_checker.Pop()
|
| 490 |
|
| 491 |
|
| 492 | class ctx_CmdMode(object):
|
| 493 |
|
| 494 | def __init__(self, cmd_parse, new_cmd_mode):
|
| 495 | # type: (CommandParser, cmd_mode_t) -> None
|
| 496 | self.cmd_parse = cmd_parse
|
| 497 | self.prev_cmd_mode = cmd_parse.cmd_mode
|
| 498 | cmd_parse.cmd_mode = new_cmd_mode
|
| 499 |
|
| 500 | def __enter__(self):
|
| 501 | # type: () -> None
|
| 502 | pass
|
| 503 |
|
| 504 | def __exit__(self, type, value, traceback):
|
| 505 | # type: (Any, Any, Any) -> None
|
| 506 | self.cmd_parse.cmd_mode = self.prev_cmd_mode
|
| 507 |
|
| 508 |
|
| 509 | SECONDARY_KEYWORDS = [
|
| 510 | Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
|
| 511 | Id.KW_Esac
|
| 512 | ]
|
| 513 |
|
| 514 |
|
| 515 | class CommandParser(object):
|
| 516 | """Recursive descent parser derived from POSIX shell grammar.
|
| 517 |
|
| 518 | This is a BNF grammar:
|
| 519 | https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
|
| 520 |
|
| 521 | - Augmented with both bash/OSH and YSH constructs.
|
| 522 |
|
| 523 | - We use regex-like iteration rather than recursive references
|
| 524 | ? means optional (0 or 1)
|
| 525 | * means 0 or more
|
| 526 | + means 1 or more
|
| 527 |
|
| 528 | - Keywords are spelled in Caps:
|
| 529 | If Elif Case
|
| 530 |
|
| 531 | - Operator tokens are quoted:
|
| 532 | '(' '|'
|
| 533 |
|
| 534 | or can be spelled directly if it matters:
|
| 535 |
|
| 536 | Op_LParen Op_Pipe
|
| 537 |
|
| 538 | - Non-terminals are snake_case:
|
| 539 | brace_group subshell
|
| 540 |
|
| 541 | Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
|
| 542 | the production should be in the method docstrings, e.g.
|
| 543 |
|
| 544 | def ParseSubshell():
|
| 545 | "
|
| 546 | subshell : '(' compound_list ')'
|
| 547 |
|
| 548 | Looking at Op_LParen # Comment to say how this method is called
|
| 549 | "
|
| 550 |
|
| 551 | The grammar may be factored to make parsing easier.
|
| 552 | """
|
| 553 |
|
| 554 | def __init__(self,
|
| 555 | parse_ctx,
|
| 556 | parse_opts,
|
| 557 | w_parser,
|
| 558 | lexer,
|
| 559 | line_reader,
|
| 560 | eof_id=Id.Eof_Real):
|
| 561 | # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
|
| 562 | self.parse_ctx = parse_ctx
|
| 563 | self.aliases = parse_ctx.aliases # aliases to expand at parse time
|
| 564 |
|
| 565 | self.parse_opts = parse_opts
|
| 566 | self.w_parser = w_parser # type: WordParser # for normal parsing
|
| 567 | self.lexer = lexer # for pushing hints, lookahead to (
|
| 568 | self.line_reader = line_reader # for here docs
|
| 569 | self.eof_id = eof_id
|
| 570 |
|
| 571 | self.arena = line_reader.arena # for adding here doc and alias spans
|
| 572 | self.aliases_in_flight = [] # type: AliasesInFlight
|
| 573 |
|
| 574 | # A hacky boolean to remove 'if cd / {' ambiguity.
|
| 575 | self.allow_block = True
|
| 576 |
|
| 577 | # Stack of booleans for nested Attr and SHELL nodes.
|
| 578 | # Attr nodes allow bare assignment x = 42, but not shell x=42.
|
| 579 | # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
|
| 580 | # nodes, but x42 is still allowed.
|
| 581 | #
|
| 582 | # Note: this stack could be optimized by turning it into an integer and
|
| 583 | # binary encoding.
|
| 584 | self.hay_attrs_stack = [] # type: List[bool]
|
| 585 |
|
| 586 | # Note: VarChecker is instantiated with each CommandParser, which means
|
| 587 | # that two 'proc foo' -- inside a command sub and outside -- don't
|
| 588 | # conflict, because they use different CommandParser instances. I think
|
| 589 | # this OK but you can imagine different behaviors.
|
| 590 | self.var_checker = VarChecker()
|
| 591 |
|
| 592 | self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
|
| 593 |
|
| 594 | self.Reset()
|
| 595 |
|
| 596 | # Init_() function for "keyword arg"
|
| 597 | def Init_AliasesInFlight(self, aliases_in_flight):
|
| 598 | # type: (AliasesInFlight) -> None
|
| 599 | self.aliases_in_flight = aliases_in_flight
|
| 600 |
|
| 601 | def Reset(self):
|
| 602 | # type: () -> None
|
| 603 | """Reset our own internal state.
|
| 604 |
|
| 605 | Called by the interactive loop.
|
| 606 | """
|
| 607 | # Cursor state set by _GetWord()
|
| 608 | self.next_lex_mode = lex_mode_e.ShCommand
|
| 609 | self.cur_word = None # type: word_t # current word
|
| 610 | self.c_kind = Kind.Undefined
|
| 611 | self.c_id = Id.Undefined_Tok
|
| 612 |
|
| 613 | self.pending_here_docs = [] # type: List[Redir]
|
| 614 |
|
| 615 | def ResetInputObjects(self):
|
| 616 | # type: () -> None
|
| 617 | """Reset the internal state of our inputs.
|
| 618 |
|
| 619 | Called by the interactive loop.
|
| 620 | """
|
| 621 | self.w_parser.Reset()
|
| 622 | self.lexer.ResetInputObjects()
|
| 623 | self.line_reader.Reset()
|
| 624 |
|
| 625 | def _SetNext(self):
|
| 626 | # type: () -> None
|
| 627 | """Call this when you no longer need the current token.
|
| 628 |
|
| 629 | This method is lazy. A subsequent call to _GetWord() will
|
| 630 | actually read the next Token.
|
| 631 | """
|
| 632 | self.next_lex_mode = lex_mode_e.ShCommand
|
| 633 |
|
| 634 | def _SetNextBrack(self):
|
| 635 | # type: () -> None
|
| 636 | self.next_lex_mode = lex_mode_e.ShCommandFakeBrack
|
| 637 |
|
| 638 | def _GetWord(self):
|
| 639 | # type: () -> None
|
| 640 | """Call this when you need to make a decision based on Id or Kind.
|
| 641 |
|
| 642 | If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
|
| 643 | self.c_id and self.c_kind.
|
| 644 |
|
| 645 | Otherwise it does nothing.
|
| 646 | """
|
| 647 | if self.next_lex_mode != lex_mode_e.Undefined:
|
| 648 | w = self.w_parser.ReadWord(self.next_lex_mode)
|
| 649 | #log("w %s", w)
|
| 650 |
|
| 651 | # Here docs only happen in command mode, so other kinds of newlines don't
|
| 652 | # count.
|
| 653 | if w.tag() == word_e.Operator:
|
| 654 | tok = cast(Token, w)
|
| 655 | if tok.id == Id.Op_Newline:
|
| 656 | for h in self.pending_here_docs:
|
| 657 | _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
|
| 658 | self.arena)
|
| 659 | del self.pending_here_docs[:] # No .clear() until Python 3.3.
|
| 660 |
|
| 661 | self.cur_word = w
|
| 662 |
|
| 663 | self.c_kind = word_.CommandKind(self.cur_word)
|
| 664 | # Has special case for Id.Lit_{LBrace,RBrace,Equals}
|
| 665 | self.c_id = word_.CommandId(self.cur_word)
|
| 666 | self.next_lex_mode = lex_mode_e.Undefined
|
| 667 |
|
| 668 | def _Eat(self, c_id, msg=None):
|
| 669 | # type: (Id_t, Optional[str]) -> word_t
|
| 670 | """Consume a word of a type, maybe showing a custom error message.
|
| 671 |
|
| 672 | Args:
|
| 673 | c_id: the Id we expected
|
| 674 | msg: improved error message
|
| 675 | """
|
| 676 | self._GetWord()
|
| 677 | if self.c_id != c_id:
|
| 678 | if msg is None:
|
| 679 | msg = 'Expected word type %s, got %s' % (
|
| 680 | ui.PrettyId(c_id), ui.PrettyId(self.c_id))
|
| 681 | p_die(msg, loc.Word(self.cur_word))
|
| 682 |
|
| 683 | skipped = self.cur_word
|
| 684 | self._SetNext()
|
| 685 | return skipped
|
| 686 |
|
| 687 | def _NewlineOk(self):
|
| 688 | # type: () -> None
|
| 689 | """Check for optional newline and consume it."""
|
| 690 | self._GetWord()
|
| 691 | if self.c_id == Id.Op_Newline:
|
| 692 | self._SetNext()
|
| 693 |
|
| 694 | def _AtSecondaryKeyword(self):
|
| 695 | # type: () -> bool
|
| 696 | self._GetWord()
|
| 697 | if self.c_id in SECONDARY_KEYWORDS:
|
| 698 | return True
|
| 699 | return False
|
| 700 |
|
| 701 | def ParseRedirect(self):
|
| 702 | # type: () -> Redir
|
| 703 | self._GetWord()
|
| 704 | assert self.c_kind == Kind.Redir, self.cur_word
|
| 705 | op_tok = cast(Token, self.cur_word) # for MyPy
|
| 706 |
|
| 707 | # Note: the lexer could take distinguish between
|
| 708 | # >out
|
| 709 | # 3>out
|
| 710 | # {fd}>out
|
| 711 | #
|
| 712 | # which would make the code below faster. But small string optimization
|
| 713 | # would also speed it up, since redirects are small.
|
| 714 |
|
| 715 | # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
|
| 716 | # possibly "unify" the IDs by subtracting a constant like 8 or 16?
|
| 717 |
|
| 718 | op_val = lexer.TokenVal(op_tok)
|
| 719 | if op_val[0] == '{':
|
| 720 | pos = op_val.find('}')
|
| 721 | assert pos != -1 # lexer ensures this
|
| 722 | where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
|
| 723 |
|
| 724 | elif op_val[0].isdigit():
|
| 725 | pos = 1
|
| 726 | if op_val[1].isdigit():
|
| 727 | pos = 2
|
| 728 | where = redir_loc.Fd(int(op_val[:pos]))
|
| 729 |
|
| 730 | else:
|
| 731 | where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
|
| 732 |
|
| 733 | self._SetNext()
|
| 734 |
|
| 735 | self._GetWord()
|
| 736 | # Other redirect
|
| 737 | if self.c_kind != Kind.Word:
|
| 738 | p_die('Invalid token after redirect operator',
|
| 739 | loc.Word(self.cur_word))
|
| 740 |
|
| 741 | # Here doc
|
| 742 | if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
|
| 743 | arg = redir_param.HereDoc.CreateNull()
|
| 744 | arg.here_begin = self.cur_word
|
| 745 | arg.stdin_parts = []
|
| 746 |
|
| 747 | r = Redir(op_tok, where, arg)
|
| 748 |
|
| 749 | self.pending_here_docs.append(r) # will be filled on next newline.
|
| 750 |
|
| 751 | self._SetNext()
|
| 752 | return r
|
| 753 |
|
| 754 | arg_word = self.cur_word
|
| 755 | tilde = word_.TildeDetect(arg_word)
|
| 756 | if tilde:
|
| 757 | arg_word = tilde
|
| 758 | self._SetNext()
|
| 759 |
|
| 760 | # We should never get Empty, Token, etc.
|
| 761 | assert arg_word.tag() == word_e.Compound, arg_word
|
| 762 | return Redir(op_tok, where, cast(CompoundWord, arg_word))
|
| 763 |
|
| 764 | def _ParseRedirectList(self):
|
| 765 | # type: () -> List[Redir]
|
| 766 | """Try parsing any redirects at the cursor.
|
| 767 |
|
| 768 | This is used for blocks only, not commands.
|
| 769 | """
|
| 770 | redirects = [] # type: List[Redir]
|
| 771 | while True:
|
| 772 | # This prediction needs to ONLY accept redirect operators. Should we
|
| 773 | # make them a separate Kind?
|
| 774 | self._GetWord()
|
| 775 | if self.c_kind != Kind.Redir:
|
| 776 | break
|
| 777 |
|
| 778 | node = self.ParseRedirect()
|
| 779 | redirects.append(node)
|
| 780 | self._SetNext()
|
| 781 |
|
| 782 | return redirects
|
| 783 |
|
| 784 | def _MaybeParseRedirectList(self, node):
|
| 785 | # type: (command_t) -> command_t
|
| 786 | """Try parsing redirects at the current position.
|
| 787 |
|
| 788 | If there are any, then wrap the command_t argument with a
|
| 789 | command.Redirect node. Otherwise, return argument unchanged.
|
| 790 | """
|
| 791 | self._GetWord()
|
| 792 | if self.c_kind != Kind.Redir:
|
| 793 | return node
|
| 794 |
|
| 795 | redirects = [self.ParseRedirect()]
|
| 796 |
|
| 797 | while True:
|
| 798 | # This prediction needs to ONLY accept redirect operators. Should we
|
| 799 | # make them a separate Kind?
|
| 800 | self._GetWord()
|
| 801 | if self.c_kind != Kind.Redir:
|
| 802 | break
|
| 803 |
|
| 804 | redirects.append(self.ParseRedirect())
|
| 805 | self._SetNext()
|
| 806 |
|
| 807 | return command.Redirect(node, redirects)
|
| 808 |
|
| 809 | def _ScanSimpleCommand(self):
|
| 810 | # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
|
| 811 | """YSH extends simple commands with typed args and blocks.
|
| 812 |
|
| 813 | Shell has a recursive grammar, which awkwardly expresses
|
| 814 | non-grammatical rules:
|
| 815 |
|
| 816 | simple_command : cmd_prefix cmd_word cmd_suffix
|
| 817 | | cmd_prefix cmd_word
|
| 818 | | cmd_prefix
|
| 819 | | cmd_name cmd_suffix
|
| 820 | | cmd_name
|
| 821 | ;
|
| 822 | cmd_name : WORD /* Apply rule 7a */
|
| 823 | ;
|
| 824 | cmd_word : WORD /* Apply rule 7b */
|
| 825 | ;
|
| 826 | cmd_prefix : io_redirect
|
| 827 | | cmd_prefix io_redirect
|
| 828 | | ASSIGNMENT_WORD
|
| 829 | | cmd_prefix ASSIGNMENT_WORD
|
| 830 | ;
|
| 831 | cmd_suffix : io_redirect
|
| 832 | | cmd_suffix io_redirect
|
| 833 | | WORD
|
| 834 | | cmd_suffix WORD
|
| 835 |
|
| 836 | YSH grammar:
|
| 837 |
|
| 838 | redirect = redir_op WORD
|
| 839 | item = WORD | redirect
|
| 840 |
|
| 841 | typed_args =
|
| 842 | '(' arglist ')'
|
| 843 | | '[' arglist ']'
|
| 844 |
|
| 845 | simple_command =
|
| 846 | cmd_prefix* item+ typed_args? BraceGroup? cmd_suffix*
|
| 847 |
|
| 848 | Notably, redirects shouldn't appear after typed args, or after
|
| 849 | BraceGroup.
|
| 850 |
|
| 851 | Examples:
|
| 852 |
|
| 853 | This is an assignment:
|
| 854 | foo=1 >out
|
| 855 |
|
| 856 | This is a command.Simple
|
| 857 | >out
|
| 858 |
|
| 859 | What about
|
| 860 | >out (42)
|
| 861 | """
|
| 862 | redirects = [] # type: List[Redir]
|
| 863 | words = [] # type: List[CompoundWord]
|
| 864 | typed_args = None # type: Optional[ArgList]
|
| 865 | block = None # type: Optional[LiteralBlock]
|
| 866 |
|
| 867 | first_word_caps = False # does first word look like Caps, but not CAPS
|
| 868 |
|
| 869 | i = 0
|
| 870 | while True:
|
| 871 | self._GetWord()
|
| 872 |
|
| 873 | # If we got { }, change it to something that's not Kind.Word
|
| 874 | kind2 = self.c_kind
|
| 875 | if (kind2 == Kind.Word and self.parse_opts.parse_brace() and
|
| 876 | self.c_id in (Id.Lit_LBrace, Id.Lit_RBrace)):
|
| 877 | kind2 = Kind.Op
|
| 878 |
|
| 879 | if kind2 == Kind.Redir:
|
| 880 | node = self.ParseRedirect()
|
| 881 | redirects.append(node)
|
| 882 |
|
| 883 | elif kind2 == Kind.Word:
|
| 884 | w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
|
| 885 |
|
| 886 | if i == 0:
|
| 887 | # Disallow leading =a because it's confusing
|
| 888 | part0 = w.parts[0]
|
| 889 | if part0.tag() == word_part_e.Literal:
|
| 890 | tok = cast(Token, part0)
|
| 891 | if tok.id == Id.Lit_Equals:
|
| 892 | p_die(
|
| 893 | "=word isn't allowed. Hint: add a space after =, or quote it",
|
| 894 | tok)
|
| 895 |
|
| 896 | # Is the first word a Hay Attr word?
|
| 897 | #
|
| 898 | # Can we remove this StaticEval() call, and just look
|
| 899 | # inside Token? I think once we get rid of SHELL nodes,
|
| 900 | # this will be simpler.
|
| 901 |
|
| 902 | ok, word_str, quoted = word_.StaticEval(w)
|
| 903 | # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
|
| 904 | if (ok and len(word_str) and word_str[0].isupper() and
|
| 905 | not word_str.isupper()):
|
| 906 | first_word_caps = True
|
| 907 | #log('W %s', word_str)
|
| 908 |
|
| 909 | words.append(w)
|
| 910 |
|
| 911 | else:
|
| 912 | break
|
| 913 |
|
| 914 | self._SetNextBrack() # Allow bracket for SECOND word on
|
| 915 | i += 1
|
| 916 |
|
| 917 | # my-cmd (x) or my-cmd [x]
|
| 918 | self._GetWord()
|
| 919 | if self.c_id == Id.Op_LParen:
|
| 920 | # 1. Check that there's a preceding space
|
| 921 | prev_byte = self.lexer.ByteLookBack()
|
| 922 | if prev_byte not in (SPACE_CH, TAB_CH):
|
| 923 | if self.parse_opts.parse_at():
|
| 924 | p_die('Space required before (', loc.Word(self.cur_word))
|
| 925 | else:
|
| 926 | # inline func call like @sorted(x) is invalid in OSH, but the
|
| 927 | # solution isn't a space
|
| 928 | p_die(
|
| 929 | 'Unexpected left paren (might need a space before it)',
|
| 930 | loc.Word(self.cur_word))
|
| 931 |
|
| 932 | # 2. Check that it's not (). We disallow this because it's a no-op and
|
| 933 | # there could be confusion with shell func defs.
|
| 934 | # For some reason we need to call lexer.LookPastSpace, not
|
| 935 | # w_parser.LookPastSpace. I think this is because we're at (, which is
|
| 936 | # an operator token. All the other cases are like 'x=', which is PART
|
| 937 | # of a word, and we don't know if it will end.
|
| 938 | next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
|
| 939 | if next_id == Id.Op_RParen:
|
| 940 | p_die('Empty arg list not allowed', loc.Word(self.cur_word))
|
| 941 |
|
| 942 | typed_args = self.w_parser.ParseProcCallArgs(
|
| 943 | grammar_nt.ysh_eager_arglist)
|
| 944 |
|
| 945 | self._SetNext()
|
| 946 |
|
| 947 | elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
|
| 948 | typed_args = self.w_parser.ParseProcCallArgs(
|
| 949 | grammar_nt.ysh_lazy_arglist)
|
| 950 |
|
| 951 | self._SetNext()
|
| 952 |
|
| 953 | self._GetWord()
|
| 954 |
|
| 955 | # Allow redirects after typed args, e.g.
|
| 956 | # json write (x) > out.txt
|
| 957 | if self.c_kind == Kind.Redir:
|
| 958 | redirects.extend(self._ParseRedirectList())
|
| 959 |
|
| 960 | # my-cmd { echo hi } my-cmd (x) { echo hi } ...
|
| 961 | if (self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace and
|
| 962 | # Disabled for if/while condition, etc.
|
| 963 | self.allow_block):
|
| 964 |
|
| 965 | # allow x = 42
|
| 966 | self.hay_attrs_stack.append(first_word_caps)
|
| 967 | brace_group = self.ParseBraceGroup()
|
| 968 |
|
| 969 | # So we can get the source code back later
|
| 970 | lines = self.arena.SaveLinesAndDiscard(brace_group.left,
|
| 971 | brace_group.right)
|
| 972 | block = LiteralBlock(brace_group, lines)
|
| 973 |
|
| 974 | self.hay_attrs_stack.pop()
|
| 975 |
|
| 976 | self._GetWord()
|
| 977 |
|
| 978 | # Allow redirects after block, e.g.
|
| 979 | # cd /tmp { echo $PWD } > out.txt
|
| 980 | if self.c_kind == Kind.Redir:
|
| 981 | redirects.extend(self._ParseRedirectList())
|
| 982 |
|
| 983 | return redirects, words, typed_args, block
|
| 984 |
|
| 985 | def _MaybeExpandAliases(self, words):
|
| 986 | # type: (List[CompoundWord]) -> Optional[command_t]
|
| 987 | """Try to expand aliases.
|
| 988 |
|
| 989 | Args:
|
| 990 | words: A list of Compound
|
| 991 |
|
| 992 | Returns:
|
| 993 | A new LST node, or None.
|
| 994 |
|
| 995 | Our implementation of alias has two design choices:
|
| 996 | - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
|
| 997 | - What grammar rule to parse the expanded alias buffer with. In our case
|
| 998 | it's ParseCommand().
|
| 999 |
|
| 1000 | This doesn't quite match what other shells do, but I can't figure out a
|
| 1001 | better places.
|
| 1002 |
|
| 1003 | Most test cases pass, except for ones like:
|
| 1004 |
|
| 1005 | alias LBRACE='{'
|
| 1006 | LBRACE echo one; echo two; }
|
| 1007 |
|
| 1008 | alias MULTILINE='echo 1
|
| 1009 | echo 2
|
| 1010 | echo 3'
|
| 1011 | MULTILINE
|
| 1012 |
|
| 1013 | NOTE: dash handles aliases in a totally different way. It has a global
|
| 1014 | variable checkkwd in parser.c. It assigns it all over the grammar, like
|
| 1015 | this:
|
| 1016 |
|
| 1017 | checkkwd = CHKNL | CHKKWD | CHKALIAS;
|
| 1018 |
|
| 1019 | The readtoken() function checks (checkkwd & CHKALIAS) and then calls
|
| 1020 | lookupalias(). This seems to provide a consistent behavior among shells,
|
| 1021 | but it's less modular and testable.
|
| 1022 |
|
| 1023 | Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
|
| 1024 |
|
| 1025 | Returns:
|
| 1026 | A command node if any aliases were expanded, or None otherwise.
|
| 1027 | """
|
| 1028 | # Start a new list if there aren't any. This will be passed recursively
|
| 1029 | # through CommandParser instances.
|
| 1030 | aliases_in_flight = (self.aliases_in_flight
|
| 1031 | if len(self.aliases_in_flight) else [])
|
| 1032 |
|
| 1033 | # for error message
|
| 1034 | first_word_str = None # type: Optional[str]
|
| 1035 | argv0_loc = loc.Word(words[0])
|
| 1036 |
|
| 1037 | expanded = [] # type: List[str]
|
| 1038 | i = 0
|
| 1039 | n = len(words)
|
| 1040 |
|
| 1041 | while i < n:
|
| 1042 | w = words[i]
|
| 1043 |
|
| 1044 | ok, word_str, quoted = word_.StaticEval(w)
|
| 1045 | if not ok or quoted:
|
| 1046 | break
|
| 1047 |
|
| 1048 | alias_exp = self.aliases.get(word_str)
|
| 1049 | if alias_exp is None:
|
| 1050 | break
|
| 1051 |
|
| 1052 | # Prevent infinite loops. This is subtle: we want to prevent infinite
|
| 1053 | # expansion of alias echo='echo x'. But we don't want to prevent
|
| 1054 | # expansion of the second word in 'echo echo', so we add 'i' to
|
| 1055 | # "aliases_in_flight".
|
| 1056 | if (word_str, i) in aliases_in_flight:
|
| 1057 | break
|
| 1058 |
|
| 1059 | if i == 0:
|
| 1060 | first_word_str = word_str # for error message
|
| 1061 |
|
| 1062 | #log('%r -> %r', word_str, alias_exp)
|
| 1063 | aliases_in_flight.append((word_str, i))
|
| 1064 | expanded.append(alias_exp)
|
| 1065 | i += 1
|
| 1066 |
|
| 1067 | if not alias_exp.endswith(' '):
|
| 1068 | # alias e='echo [ ' is the same expansion as
|
| 1069 | # alias e='echo ['
|
| 1070 | # The trailing space indicates whether we should continue to expand
|
| 1071 | # aliases; it's not part of it.
|
| 1072 | expanded.append(' ')
|
| 1073 | break # No more expansions
|
| 1074 |
|
| 1075 | if len(expanded) == 0: # No expansions; caller does parsing.
|
| 1076 | return None
|
| 1077 |
|
| 1078 | # We are expanding an alias, so copy the rest of the words and re-parse.
|
| 1079 | if i < n:
|
| 1080 | left_tok = location.LeftTokenForWord(words[i])
|
| 1081 | right_tok = location.RightTokenForWord(words[-1])
|
| 1082 |
|
| 1083 | # OLD CONSTRAINT
|
| 1084 | #assert left_tok.line_id == right_tok.line_id
|
| 1085 |
|
| 1086 | words_str = self.arena.SnipCodeString(left_tok, right_tok)
|
| 1087 | expanded.append(words_str)
|
| 1088 |
|
| 1089 | code_str = ''.join(expanded)
|
| 1090 |
|
| 1091 | # TODO:
|
| 1092 | # Aliases break static parsing (like backticks), so use our own Arena.
|
| 1093 | # This matters for Hay, which calls SaveLinesAndDiscard().
|
| 1094 | # arena = alloc.Arena()
|
| 1095 | arena = self.arena
|
| 1096 |
|
| 1097 | line_reader = reader.StringLineReader(code_str, arena)
|
| 1098 | cp = self.parse_ctx.MakeOshParser(line_reader)
|
| 1099 | cp.Init_AliasesInFlight(aliases_in_flight)
|
| 1100 |
|
| 1101 | # break circular dep
|
| 1102 | from frontend import parse_lib
|
| 1103 |
|
| 1104 | # The interaction between COMPLETION and ALIASES requires special care.
|
| 1105 | # See docstring of BeginAliasExpansion() in parse_lib.py.
|
| 1106 | src = source.Alias(first_word_str, argv0_loc)
|
| 1107 | with alloc.ctx_SourceCode(arena, src):
|
| 1108 | with parse_lib.ctx_Alias(self.parse_ctx.trail):
|
| 1109 | try:
|
| 1110 | # _ParseCommandTerm() handles multiline commands, compound
|
| 1111 | # commands, etc. as opposed to ParseLogicalLine()
|
| 1112 | node = cp._ParseCommandTerm()
|
| 1113 | except error.Parse as e:
|
| 1114 | # Failure to parse alias expansion is a fatal error
|
| 1115 | # We don't need more handling here/
|
| 1116 | raise
|
| 1117 |
|
| 1118 | if 0:
|
| 1119 | log('AFTER expansion:')
|
| 1120 | node.PrettyPrint()
|
| 1121 |
|
| 1122 | return node
|
| 1123 |
|
| 1124 | def ParseSimpleCommand(self):
|
| 1125 | # type: () -> command_t
|
| 1126 | """Fixed transcription of the POSIX grammar
|
| 1127 |
|
| 1128 | io_file : '<' filename
|
| 1129 | | LESSAND filename
|
| 1130 | ...
|
| 1131 |
|
| 1132 | io_here : DLESS here_end
|
| 1133 | | DLESSDASH here_end
|
| 1134 |
|
| 1135 | redirect : IO_NUMBER (io_redirect | io_here)
|
| 1136 |
|
| 1137 | prefix_part : ASSIGNMENT_WORD | redirect
|
| 1138 | cmd_part : WORD | redirect
|
| 1139 |
|
| 1140 | assign_kw : Declare | Export | Local | Readonly
|
| 1141 |
|
| 1142 | # Without any words it is parsed as a command, not an assignment
|
| 1143 | assign_listing : assign_kw
|
| 1144 |
|
| 1145 | # Now we have something to do (might be changing assignment flags too)
|
| 1146 | # NOTE: any prefixes should be a warning, but they are allowed in shell.
|
| 1147 | assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
|
| 1148 |
|
| 1149 | # an external command, a function call, or a builtin -- a "word_command"
|
| 1150 | word_command : prefix_part* cmd_part+
|
| 1151 |
|
| 1152 | simple_command : assign_listing
|
| 1153 | | assignment
|
| 1154 | | proc_command
|
| 1155 |
|
| 1156 | Simple imperative algorithm:
|
| 1157 |
|
| 1158 | 1) Read a list of words and redirects. Append them to separate lists.
|
| 1159 | 2) Look for the first non-assignment word. If it's declare, etc., then
|
| 1160 | keep parsing words AND assign words. Otherwise, just parse words.
|
| 1161 | 3) If there are no non-assignment words, then it's a global assignment.
|
| 1162 |
|
| 1163 | { redirects, global assignments } OR
|
| 1164 | { redirects, prefix_bindings, words } OR
|
| 1165 | { redirects, ERROR_prefix_bindings, keyword, assignments, words }
|
| 1166 |
|
| 1167 | THEN CHECK that prefix bindings don't have any array literal parts!
|
| 1168 | global assignment and keyword assignments can have the of course.
|
| 1169 | well actually EXPORT shouldn't have them either -- WARNING
|
| 1170 |
|
| 1171 | 3 cases we want to warn: prefix_bindings for assignment, and array literal
|
| 1172 | in prefix bindings, or export
|
| 1173 |
|
| 1174 | A command can be an assignment word, word, or redirect on its own.
|
| 1175 |
|
| 1176 | ls
|
| 1177 | >out.txt
|
| 1178 |
|
| 1179 | >out.txt FOO=bar # this touches the file
|
| 1180 |
|
| 1181 | Or any sequence:
|
| 1182 | ls foo bar
|
| 1183 | <in.txt ls foo bar >out.txt
|
| 1184 | <in.txt ls >out.txt foo bar
|
| 1185 |
|
| 1186 | Or add one or more environment bindings:
|
| 1187 | VAR=val env
|
| 1188 | >out.txt VAR=val env
|
| 1189 |
|
| 1190 | here_end vs filename is a matter of whether we test that it's quoted. e.g.
|
| 1191 | <<EOF vs <<'EOF'.
|
| 1192 | """
|
| 1193 | redirects, words, typed_args, block = self._ScanSimpleCommand()
|
| 1194 |
|
| 1195 | typed_loc = None # type: Optional[Token]
|
| 1196 | if block:
|
| 1197 | typed_loc = block.brace_group.left
|
| 1198 | if typed_args:
|
| 1199 | typed_loc = typed_args.left # preferred over block location
|
| 1200 |
|
| 1201 | if len(words) == 0: # e.g. >out.txt # redirect without words
|
| 1202 | assert len(redirects) != 0
|
| 1203 | if typed_loc is not None:
|
| 1204 | p_die("Unexpected typed args", typed_loc)
|
| 1205 | return command.Redirect(command.NoOp, redirects)
|
| 1206 |
|
| 1207 | preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
|
| 1208 | if len(preparsed_list):
|
| 1209 | # Disallow X=Y inside proc and func
|
| 1210 | # and inside Hay Attr blocks
|
| 1211 | # But allow X=Y at the top level
|
| 1212 | # for interactive use foo=bar
|
| 1213 | # for global constants GLOBAL=~/src
|
| 1214 | # because YSH assignment doesn't have tilde sub
|
| 1215 | if len(suffix_words) == 0:
|
| 1216 | if (self.cmd_mode != cmd_mode_e.Shell or
|
| 1217 | (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
|
| 1218 | p_die('Use var/setvar to assign in YSH',
|
| 1219 | preparsed_list[0].left)
|
| 1220 |
|
| 1221 | # Set a reference to words and redirects for completion. We want to
|
| 1222 | # inspect this state after a failed parse.
|
| 1223 | self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
|
| 1224 |
|
| 1225 | if len(suffix_words) == 0:
|
| 1226 | if typed_loc is not None:
|
| 1227 | p_die("Unexpected typed args", typed_loc)
|
| 1228 |
|
| 1229 | # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
|
| 1230 | pairs = [] # type: List[AssignPair]
|
| 1231 | for preparsed in preparsed_list:
|
| 1232 | pairs.append(
|
| 1233 | _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
|
| 1234 |
|
| 1235 | left_tok = location.LeftTokenForCompoundWord(words[0])
|
| 1236 | assign_node = command.ShAssignment(left_tok, pairs)
|
| 1237 | if len(redirects):
|
| 1238 | return command.Redirect(assign_node, redirects)
|
| 1239 | else:
|
| 1240 | return assign_node
|
| 1241 |
|
| 1242 | kind, kw_token = word_.IsControlFlow(suffix_words[0])
|
| 1243 |
|
| 1244 | if kind == Kind.ControlFlow:
|
| 1245 | if not self.parse_opts.parse_ignored() and len(redirects):
|
| 1246 | p_die("Control flow shouldn't have redirects", kw_token)
|
| 1247 | if len(preparsed_list): # FOO=bar local spam=eggs not allowed
|
| 1248 | p_die("Control flow shouldn't have environment bindings",
|
| 1249 | preparsed_list[0].left)
|
| 1250 |
|
| 1251 | if kw_token.id == Id.ControlFlow_Return:
|
| 1252 | # return x - inside procs and shell functions
|
| 1253 | # return (x) - inside funcs
|
| 1254 | if typed_args is None:
|
| 1255 | if self.cmd_mode not in (cmd_mode_e.Shell,
|
| 1256 | cmd_mode_e.Proc):
|
| 1257 | p_die('Shell-style returns not allowed here', kw_token)
|
| 1258 | else:
|
| 1259 | if self.cmd_mode != cmd_mode_e.Func:
|
| 1260 | p_die('Typed return is only allowed inside func',
|
| 1261 | typed_loc)
|
| 1262 | if len(typed_args.pos_args) != 1:
|
| 1263 | p_die("Typed return expects one argument", typed_loc)
|
| 1264 | if len(typed_args.named_args) != 0:
|
| 1265 | p_die("Typed return doesn't take named arguments",
|
| 1266 | typed_loc)
|
| 1267 | return command.Retval(kw_token, typed_args.pos_args[0])
|
| 1268 |
|
| 1269 | # Except for return (x), we shouldn't have typed args
|
| 1270 | if typed_loc is not None:
|
| 1271 | p_die("Unexpected typed args", typed_loc)
|
| 1272 |
|
| 1273 | # Attach the token for errors. (ShAssignment may not need it.)
|
| 1274 | if len(suffix_words) == 1:
|
| 1275 | arg_word = None # type: Optional[word_t]
|
| 1276 | elif len(suffix_words) == 2:
|
| 1277 | arg_word = suffix_words[1]
|
| 1278 | else:
|
| 1279 | p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
|
| 1280 | loc.Word(suffix_words[2]))
|
| 1281 |
|
| 1282 | return command.ControlFlow(kw_token, arg_word)
|
| 1283 |
|
| 1284 | # Alias expansion only understands words, not typed args ( ) or block { }
|
| 1285 | if not typed_args and not block and self.parse_opts.expand_aliases():
|
| 1286 | # If any expansions were detected, then parse again.
|
| 1287 | expanded_node = self._MaybeExpandAliases(suffix_words)
|
| 1288 | if expanded_node:
|
| 1289 | # Attach env bindings and redirects to the expanded node.
|
| 1290 | more_env = [] # type: List[EnvPair]
|
| 1291 | _AppendMoreEnv(preparsed_list, more_env)
|
| 1292 | exp = command.ExpandedAlias(expanded_node, more_env)
|
| 1293 | if len(redirects):
|
| 1294 | return command.Redirect(exp, redirects)
|
| 1295 | else:
|
| 1296 | return exp
|
| 1297 |
|
| 1298 | # TODO: check that we don't have env1=x x[1]=y env2=z here.
|
| 1299 |
|
| 1300 | # FOO=bar printenv.py FOO
|
| 1301 | node = _MakeSimpleCommand(preparsed_list, suffix_words, typed_args,
|
| 1302 | block)
|
| 1303 | if len(redirects):
|
| 1304 | return command.Redirect(node, redirects)
|
| 1305 | else:
|
| 1306 | return node
|
| 1307 |
|
| 1308 | def ParseBraceGroup(self):
|
| 1309 | # type: () -> BraceGroup
|
| 1310 | """
|
| 1311 | Original:
|
| 1312 | brace_group : LBrace command_list RBrace ;
|
| 1313 |
|
| 1314 | YSH:
|
| 1315 | brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
|
| 1316 |
|
| 1317 | The doc comment can only occur if there's a newline.
|
| 1318 | """
|
| 1319 | ate = self._Eat(Id.Lit_LBrace)
|
| 1320 | left = word_.BraceToken(ate)
|
| 1321 |
|
| 1322 | doc_word = None # type: word_t
|
| 1323 | self._GetWord()
|
| 1324 | if self.c_id == Id.Op_Newline:
|
| 1325 | self._SetNext()
|
| 1326 | # Set a flag so we don't skip over ###
|
| 1327 | with word_.ctx_EmitDocToken(self.w_parser):
|
| 1328 | self._GetWord()
|
| 1329 |
|
| 1330 | if self.c_id == Id.Ignored_Comment:
|
| 1331 | doc_word = self.cur_word
|
| 1332 | self._SetNext()
|
| 1333 |
|
| 1334 | # Id.Ignored_Comment means it's a Token, or None
|
| 1335 | doc_token = cast(Token, doc_word)
|
| 1336 |
|
| 1337 | c_list = self._ParseCommandList()
|
| 1338 |
|
| 1339 | ate = self._Eat(Id.Lit_RBrace)
|
| 1340 | right = word_.BraceToken(ate)
|
| 1341 |
|
| 1342 | # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
|
| 1343 | # would allow us to revert this back to None, which was changed in
|
| 1344 | # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
|
| 1345 | # behavior saves allocations, but is less type safe.
|
| 1346 | return BraceGroup(left, doc_token, c_list.children, right)
|
| 1347 |
|
| 1348 | def ParseDoGroup(self):
|
| 1349 | # type: () -> command.DoGroup
|
| 1350 | """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
|
| 1351 |
|
| 1352 | do_group : Do command_list Done ; /* Apply rule 6 */
|
| 1353 | """
|
| 1354 | ate = self._Eat(Id.KW_Do)
|
| 1355 | do_kw = word_.AsKeywordToken(ate)
|
| 1356 |
|
| 1357 | c_list = self._ParseCommandList() # could be anything
|
| 1358 |
|
| 1359 | ate = self._Eat(Id.KW_Done)
|
| 1360 | done_kw = word_.AsKeywordToken(ate)
|
| 1361 |
|
| 1362 | return command.DoGroup(do_kw, c_list.children, done_kw)
|
| 1363 |
|
| 1364 | def ParseForWords(self):
|
| 1365 | # type: () -> Tuple[List[CompoundWord], Optional[Token]]
|
| 1366 | """
|
| 1367 | for_words : WORD* for_sep
|
| 1368 | ;
|
| 1369 | for_sep : ';' newline_ok
|
| 1370 | | NEWLINES
|
| 1371 | ;
|
| 1372 | """
|
| 1373 | words = [] # type: List[CompoundWord]
|
| 1374 | # The token of any semi-colon, so we can remove it.
|
| 1375 | semi_tok = None # type: Optional[Token]
|
| 1376 |
|
| 1377 | while True:
|
| 1378 | self._GetWord()
|
| 1379 | if self.c_id == Id.Op_Semi:
|
| 1380 | tok = cast(Token, self.cur_word)
|
| 1381 | semi_tok = tok
|
| 1382 | self._SetNext()
|
| 1383 | self._NewlineOk()
|
| 1384 | break
|
| 1385 | elif self.c_id == Id.Op_Newline:
|
| 1386 | self._SetNext()
|
| 1387 | break
|
| 1388 | elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 1389 | break
|
| 1390 |
|
| 1391 | if self.cur_word.tag() != word_e.Compound:
|
| 1392 | # TODO: Can we also show a pointer to the 'for' keyword?
|
| 1393 | p_die('Invalid word in for loop', loc.Word(self.cur_word))
|
| 1394 |
|
| 1395 | w2 = cast(CompoundWord, self.cur_word)
|
| 1396 | words.append(w2)
|
| 1397 | self._SetNext()
|
| 1398 | return words, semi_tok
|
| 1399 |
|
| 1400 | def _ParseForExprLoop(self, for_kw):
|
| 1401 | # type: (Token) -> command.ForExpr
|
| 1402 | """
|
| 1403 | Shell:
|
| 1404 | for '((' init ';' cond ';' update '))' for_sep? do_group
|
| 1405 |
|
| 1406 | YSH:
|
| 1407 | for '((' init ';' cond ';' update '))' for_sep? brace_group
|
| 1408 | """
|
| 1409 | node = self.w_parser.ReadForExpression()
|
| 1410 | node.keyword = for_kw
|
| 1411 |
|
| 1412 | self._SetNext()
|
| 1413 |
|
| 1414 | self._GetWord()
|
| 1415 | if self.c_id == Id.Op_Semi:
|
| 1416 | self._SetNext()
|
| 1417 | self._NewlineOk()
|
| 1418 | elif self.c_id == Id.Op_Newline:
|
| 1419 | self._SetNext()
|
| 1420 | elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
|
| 1421 | pass
|
| 1422 | elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
|
| 1423 | pass
|
| 1424 | else:
|
| 1425 | p_die('Invalid word after for expression', loc.Word(self.cur_word))
|
| 1426 |
|
| 1427 | if self.c_id == Id.Lit_LBrace:
|
| 1428 | node.body = self.ParseBraceGroup()
|
| 1429 | else:
|
| 1430 | node.body = self.ParseDoGroup()
|
| 1431 | return node
|
| 1432 |
|
| 1433 | def _ParseForEachLoop(self, for_kw):
|
| 1434 | # type: (Token) -> command.ForEach
|
| 1435 | node = command.ForEach.CreateNull(alloc_lists=True)
|
| 1436 | node.keyword = for_kw
|
| 1437 |
|
| 1438 | num_iter_names = 0
|
| 1439 | while True:
|
| 1440 | w = self.cur_word
|
| 1441 |
|
| 1442 | # Hack that makes the language more familiar:
|
| 1443 | # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
|
| 1444 | # - 'x y' is also accepted but not idiomatic.
|
| 1445 | UP_w = w
|
| 1446 | if w.tag() == word_e.Compound:
|
| 1447 | w = cast(CompoundWord, UP_w)
|
| 1448 | if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
|
| 1449 | w.parts.pop()
|
| 1450 |
|
| 1451 | ok, iter_name, quoted = word_.StaticEval(w)
|
| 1452 | if not ok or quoted: # error: for $x
|
| 1453 | p_die('Expected loop variable (a constant word)', loc.Word(w))
|
| 1454 |
|
| 1455 | if not match.IsValidVarName(iter_name): # error: for -
|
| 1456 | # TODO: consider commas?
|
| 1457 | if ',' in iter_name:
|
| 1458 | p_die('Loop variables look like x, y (fix spaces)',
|
| 1459 | loc.Word(w))
|
| 1460 | p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
|
| 1461 |
|
| 1462 | node.iter_names.append(iter_name)
|
| 1463 | num_iter_names += 1
|
| 1464 | self._SetNext()
|
| 1465 |
|
| 1466 | self._GetWord()
|
| 1467 | # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
|
| 1468 | # Subtlety: 'var' is KW_Var and is a valid loop name
|
| 1469 | if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
|
| 1470 | break
|
| 1471 |
|
| 1472 | if num_iter_names == 3:
|
| 1473 | p_die('Unexpected word after 3 loop variables',
|
| 1474 | loc.Word(self.cur_word))
|
| 1475 |
|
| 1476 | self._NewlineOk()
|
| 1477 |
|
| 1478 | self._GetWord()
|
| 1479 | if self.c_id == Id.KW_In:
|
| 1480 | # Ideally we would want ( not 'in'. But we still have to fix the bug
|
| 1481 | # where we require a SPACE between in and (
|
| 1482 | # for x in(y) # should be accepted, but isn't
|
| 1483 |
|
| 1484 | expr_blame = word_.AsKeywordToken(self.cur_word)
|
| 1485 |
|
| 1486 | self._SetNext() # skip in
|
| 1487 | if self.w_parser.LookPastSpace() == Id.Op_LParen:
|
| 1488 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1489 | node.iterable = for_iter.YshExpr(enode, expr_blame)
|
| 1490 |
|
| 1491 | # For simplicity, we don't accept for x in (obj); do ...
|
| 1492 | self._GetWord()
|
| 1493 | if self.c_id != Id.Lit_LBrace:
|
| 1494 | p_die('Expected { after iterable expression',
|
| 1495 | loc.Word(self.cur_word))
|
| 1496 | else:
|
| 1497 | semi_tok = None # type: Optional[Token]
|
| 1498 | iter_words, semi_tok = self.ParseForWords()
|
| 1499 | node.semi_tok = semi_tok
|
| 1500 |
|
| 1501 | if not self.parse_opts.parse_bare_word() and len(
|
| 1502 | iter_words) == 1:
|
| 1503 | ok, s, quoted = word_.StaticEval(iter_words[0])
|
| 1504 | if ok and match.IsValidVarName(s) and not quoted:
|
| 1505 | p_die(
|
| 1506 | 'Surround this word with either parens or quotes (parse_bare_word)',
|
| 1507 | loc.Word(iter_words[0]))
|
| 1508 |
|
| 1509 | words2 = braces.BraceDetectAll(iter_words)
|
| 1510 | words3 = word_.TildeDetectAll(words2)
|
| 1511 | node.iterable = for_iter.Words(words3)
|
| 1512 |
|
| 1513 | # Now that we know there are words, do an extra check
|
| 1514 | if num_iter_names > 2:
|
| 1515 | p_die('Expected at most 2 loop variables', for_kw)
|
| 1516 |
|
| 1517 | elif self.c_id == Id.KW_Do:
|
| 1518 | node.iterable = for_iter.Args # implicitly loop over "$@"
|
| 1519 | # do not advance
|
| 1520 |
|
| 1521 | elif self.c_id == Id.Op_Semi: # for x; do
|
| 1522 | node.iterable = for_iter.Args # implicitly loop over "$@"
|
| 1523 | self._SetNext()
|
| 1524 |
|
| 1525 | else: # for foo BAD
|
| 1526 | p_die('Unexpected word after for loop variable',
|
| 1527 | loc.Word(self.cur_word))
|
| 1528 |
|
| 1529 | self._GetWord()
|
| 1530 | if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
|
| 1531 | node.body = self.ParseBraceGroup()
|
| 1532 | else:
|
| 1533 | node.body = self.ParseDoGroup()
|
| 1534 |
|
| 1535 | return node
|
| 1536 |
|
| 1537 | def ParseFor(self):
|
| 1538 | # type: () -> command_t
|
| 1539 | """
|
| 1540 | TODO: Update the grammar
|
| 1541 |
|
| 1542 | for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
|
| 1543 | | For '((' ... TODO
|
| 1544 | """
|
| 1545 | ate = self._Eat(Id.KW_For)
|
| 1546 | for_kw = word_.AsKeywordToken(ate)
|
| 1547 |
|
| 1548 | self._GetWord()
|
| 1549 | if self.c_id == Id.Op_DLeftParen:
|
| 1550 | if not self.parse_opts.parse_dparen():
|
| 1551 | p_die("Bash for loops aren't allowed (parse_dparen)",
|
| 1552 | loc.Word(self.cur_word))
|
| 1553 |
|
| 1554 | # for (( i = 0; i < 10; i++)
|
| 1555 | n1 = self._ParseForExprLoop(for_kw)
|
| 1556 | return self._MaybeParseRedirectList(n1)
|
| 1557 | else:
|
| 1558 | # for x in a b; do echo hi; done
|
| 1559 | n2 = self._ParseForEachLoop(for_kw)
|
| 1560 | return self._MaybeParseRedirectList(n2)
|
| 1561 |
|
| 1562 | def _ParseConditionList(self):
|
| 1563 | # type: () -> condition_t
|
| 1564 | """
|
| 1565 | condition_list: command_list
|
| 1566 |
|
| 1567 | This is a helper to parse a condition list for if commands and while/until
|
| 1568 | loops. It will throw a parse error if there are no conditions in the list.
|
| 1569 | """
|
| 1570 | self.allow_block = False
|
| 1571 | commands = self._ParseCommandList()
|
| 1572 | self.allow_block = True
|
| 1573 |
|
| 1574 | if len(commands.children) == 0:
|
| 1575 | p_die("Expected a condition", loc.Word(self.cur_word))
|
| 1576 |
|
| 1577 | return condition.Shell(commands.children)
|
| 1578 |
|
| 1579 | def ParseWhileUntil(self, keyword):
|
| 1580 | # type: (Token) -> command.WhileUntil
|
| 1581 | """
|
| 1582 | while_clause : While command_list do_group ;
|
| 1583 | until_clause : Until command_list do_group ;
|
| 1584 | """
|
| 1585 | self._SetNext() # skip keyword
|
| 1586 |
|
| 1587 | if (self.parse_opts.parse_paren() and
|
| 1588 | self.w_parser.LookPastSpace() == Id.Op_LParen):
|
| 1589 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1590 | cond = condition.YshExpr(enode) # type: condition_t
|
| 1591 | else:
|
| 1592 | cond = self._ParseConditionList()
|
| 1593 |
|
| 1594 | # NOTE: The LSTs will be different for OSH and YSH, but the execution
|
| 1595 | # should be unchanged. To be sure we should desugar.
|
| 1596 | self._GetWord()
|
| 1597 | if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 1598 | # while test -f foo {
|
| 1599 | body_node = self.ParseBraceGroup() # type: command_t
|
| 1600 | else:
|
| 1601 | body_node = self.ParseDoGroup()
|
| 1602 |
|
| 1603 | return command.WhileUntil(keyword, cond, body_node)
|
| 1604 |
|
| 1605 | def ParseCaseArm(self):
|
| 1606 | # type: () -> CaseArm
|
| 1607 | """
|
| 1608 | case_item: '('? pattern ('|' pattern)* ')'
|
| 1609 | newline_ok command_term? trailer? ;
|
| 1610 |
|
| 1611 | Looking at '(' or pattern
|
| 1612 | """
|
| 1613 | self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
|
| 1614 |
|
| 1615 | left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
|
| 1616 |
|
| 1617 | if self.c_id == Id.Op_LParen: # Optional (
|
| 1618 | self._SetNext()
|
| 1619 |
|
| 1620 | pat_words = [] # type: List[word_t]
|
| 1621 | while True:
|
| 1622 | self._GetWord()
|
| 1623 | if self.c_kind != Kind.Word:
|
| 1624 | p_die('Expected case pattern', loc.Word(self.cur_word))
|
| 1625 | pat_words.append(self.cur_word)
|
| 1626 | self._SetNext()
|
| 1627 |
|
| 1628 | self._GetWord()
|
| 1629 | if self.c_id == Id.Op_Pipe:
|
| 1630 | self._SetNext()
|
| 1631 | else:
|
| 1632 | break
|
| 1633 |
|
| 1634 | ate = self._Eat(Id.Right_CasePat)
|
| 1635 | middle_tok = word_.AsOperatorToken(ate)
|
| 1636 |
|
| 1637 | self._NewlineOk()
|
| 1638 |
|
| 1639 | self._GetWord()
|
| 1640 | if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
|
| 1641 | Id.KW_Esac):
|
| 1642 | c_list = self._ParseCommandTerm()
|
| 1643 | action_children = c_list.children
|
| 1644 | else:
|
| 1645 | action_children = []
|
| 1646 |
|
| 1647 | dsemi_tok = None # type: Token
|
| 1648 | self._GetWord()
|
| 1649 | if self.c_id == Id.KW_Esac: # missing last ;;
|
| 1650 | pass
|
| 1651 | elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
|
| 1652 | dsemi_tok = word_.AsOperatorToken(self.cur_word)
|
| 1653 | self._SetNext()
|
| 1654 | else:
|
| 1655 | # Happens on EOF
|
| 1656 | p_die('Expected ;; or esac', loc.Word(self.cur_word))
|
| 1657 |
|
| 1658 | self._NewlineOk()
|
| 1659 |
|
| 1660 | return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
|
| 1661 | action_children, dsemi_tok)
|
| 1662 |
|
| 1663 | def ParseYshCaseArm(self, discriminant):
|
| 1664 | # type: (Id_t) -> CaseArm
|
| 1665 | """
|
| 1666 | case_item : pattern newline_ok brace_group newline_ok
|
| 1667 | pattern : pat_words
|
| 1668 | | pat_exprs
|
| 1669 | | pat_eggex
|
| 1670 | | pat_else
|
| 1671 | pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
|
| 1672 | pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
|
| 1673 | pat_word : WORD
|
| 1674 | pat_eggex : '/' oil_eggex '/'
|
| 1675 | pat_expr : '(' oil_expr ')'
|
| 1676 | pat_else : '(' Id.KW_Else ')'
|
| 1677 |
|
| 1678 | Looking at: 'pattern'
|
| 1679 |
|
| 1680 | Note that the trailing `newline_ok` in `case_item` is handled by
|
| 1681 | `ParseYshCase`. We do this because parsing that `newline_ok` returns
|
| 1682 | the next "discriminant" for the next token, so it makes more sense to
|
| 1683 | handle it there.
|
| 1684 | """
|
| 1685 | left_tok = None # type: Token
|
| 1686 | pattern = None # type: pat_t
|
| 1687 |
|
| 1688 | if discriminant in (Id.Op_LParen, Id.Arith_Slash):
|
| 1689 | # pat_exprs, pat_else or pat_eggex
|
| 1690 | pattern, left_tok = self.w_parser.ParseYshCasePattern()
|
| 1691 | else:
|
| 1692 | # pat_words
|
| 1693 | pat_words = [] # type: List[word_t]
|
| 1694 | while True:
|
| 1695 | self._GetWord()
|
| 1696 | if self.c_kind != Kind.Word:
|
| 1697 | p_die('Expected case pattern', loc.Word(self.cur_word))
|
| 1698 | pat_words.append(self.cur_word)
|
| 1699 | self._SetNext()
|
| 1700 |
|
| 1701 | if not left_tok:
|
| 1702 | left_tok = location.LeftTokenForWord(self.cur_word)
|
| 1703 |
|
| 1704 | self._NewlineOk()
|
| 1705 |
|
| 1706 | self._GetWord()
|
| 1707 | if self.c_id == Id.Op_Pipe:
|
| 1708 | self._SetNext()
|
| 1709 | self._NewlineOk()
|
| 1710 | else:
|
| 1711 | break
|
| 1712 | pattern = pat.Words(pat_words)
|
| 1713 |
|
| 1714 | self._NewlineOk()
|
| 1715 | action = self.ParseBraceGroup()
|
| 1716 |
|
| 1717 | # The left token of the action is our "middle" token
|
| 1718 | return CaseArm(left_tok, pattern, action.left, action.children,
|
| 1719 | action.right)
|
| 1720 |
|
| 1721 | def ParseYshCase(self, case_kw):
|
| 1722 | # type: (Token) -> command.Case
|
| 1723 | """
|
| 1724 | ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
|
| 1725 |
|
| 1726 | Looking at: token after 'case'
|
| 1727 | """
|
| 1728 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1729 | to_match = case_arg.YshExpr(enode)
|
| 1730 |
|
| 1731 | ate = self._Eat(Id.Lit_LBrace)
|
| 1732 | arms_start = word_.BraceToken(ate)
|
| 1733 |
|
| 1734 | discriminant = self.w_parser.NewlineOkForYshCase()
|
| 1735 |
|
| 1736 | # Note: for now, zero arms are accepted, just like POSIX case $x in esac
|
| 1737 | arms = [] # type: List[CaseArm]
|
| 1738 | while discriminant != Id.Op_RBrace:
|
| 1739 | arm = self.ParseYshCaseArm(discriminant)
|
| 1740 | arms.append(arm)
|
| 1741 |
|
| 1742 | discriminant = self.w_parser.NewlineOkForYshCase()
|
| 1743 |
|
| 1744 | # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
|
| 1745 | # token is read as an Id.Op_RBrace, but we need to store this as a
|
| 1746 | # Id.Lit_RBrace.
|
| 1747 | ate = self._Eat(Id.Op_RBrace)
|
| 1748 | arms_end = word_.AsOperatorToken(ate)
|
| 1749 | arms_end.id = Id.Lit_RBrace
|
| 1750 |
|
| 1751 | return command.Case(case_kw, to_match, arms_start, arms, arms_end)
|
| 1752 |
|
| 1753 | def ParseOldCase(self, case_kw):
|
| 1754 | # type: (Token) -> command.Case
|
| 1755 | """
|
| 1756 | case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
|
| 1757 |
|
| 1758 | -> Looking at WORD
|
| 1759 |
|
| 1760 | FYI original POSIX case list, which takes pains for DSEMI
|
| 1761 |
|
| 1762 | case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
|
| 1763 | """
|
| 1764 | self._GetWord()
|
| 1765 | w = self.cur_word
|
| 1766 | if not self.parse_opts.parse_bare_word():
|
| 1767 | ok, s, quoted = word_.StaticEval(w)
|
| 1768 | if ok and not quoted:
|
| 1769 | p_die(
|
| 1770 | "This is a constant string. You may want a variable like $x (parse_bare_word)",
|
| 1771 | loc.Word(w))
|
| 1772 |
|
| 1773 | if w.tag() != word_e.Compound:
|
| 1774 | p_die("Expected a word to match against", loc.Word(w))
|
| 1775 |
|
| 1776 | to_match = case_arg.Word(w)
|
| 1777 | self._SetNext() # past WORD
|
| 1778 |
|
| 1779 | self._NewlineOk()
|
| 1780 |
|
| 1781 | ate = self._Eat(Id.KW_In)
|
| 1782 | arms_start = word_.AsKeywordToken(ate)
|
| 1783 |
|
| 1784 | self._NewlineOk()
|
| 1785 |
|
| 1786 | arms = [] # type: List[CaseArm]
|
| 1787 | while True:
|
| 1788 | self._GetWord()
|
| 1789 | if self.c_id == Id.KW_Esac:
|
| 1790 | break
|
| 1791 | # case arm should begin with a pattern word or (
|
| 1792 | if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
|
| 1793 | break
|
| 1794 |
|
| 1795 | arm = self.ParseCaseArm()
|
| 1796 | arms.append(arm)
|
| 1797 |
|
| 1798 | ate = self._Eat(Id.KW_Esac)
|
| 1799 | arms_end = word_.AsKeywordToken(ate)
|
| 1800 |
|
| 1801 | # no redirects yet
|
| 1802 | return command.Case(case_kw, to_match, arms_start, arms, arms_end)
|
| 1803 |
|
| 1804 | def ParseCase(self):
|
| 1805 | # type: () -> command.Case
|
| 1806 | """
|
| 1807 | case_clause : old_case # from POSIX
|
| 1808 | | ysh_case
|
| 1809 | ;
|
| 1810 |
|
| 1811 | Looking at 'Case'
|
| 1812 | """
|
| 1813 | case_kw = word_.AsKeywordToken(self.cur_word)
|
| 1814 | self._SetNext() # past 'case'
|
| 1815 |
|
| 1816 | if self.w_parser.LookPastSpace() == Id.Op_LParen:
|
| 1817 | return self.ParseYshCase(case_kw)
|
| 1818 | else:
|
| 1819 | return self.ParseOldCase(case_kw)
|
| 1820 |
|
| 1821 | def _ParseYshElifElse(self, if_node):
|
| 1822 | # type: (command.If) -> None
|
| 1823 | """If test -f foo { echo foo.
|
| 1824 |
|
| 1825 | } elif test -f bar; test -f spam { ^ we parsed up to here echo
|
| 1826 | bar } else { echo none }
|
| 1827 | """
|
| 1828 | arms = if_node.arms
|
| 1829 |
|
| 1830 | while self.c_id == Id.KW_Elif:
|
| 1831 | elif_kw = word_.AsKeywordToken(self.cur_word)
|
| 1832 | self._SetNext() # skip elif
|
| 1833 | if (self.parse_opts.parse_paren() and
|
| 1834 | self.w_parser.LookPastSpace() == Id.Op_LParen):
|
| 1835 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1836 | cond = condition.YshExpr(enode) # type: condition_t
|
| 1837 | else:
|
| 1838 | self.allow_block = False
|
| 1839 | commands = self._ParseCommandList()
|
| 1840 | self.allow_block = True
|
| 1841 | cond = condition.Shell(commands.children)
|
| 1842 |
|
| 1843 | body = self.ParseBraceGroup()
|
| 1844 | self._GetWord()
|
| 1845 |
|
| 1846 | arm = IfArm(elif_kw, cond, None, body.children, None)
|
| 1847 | arms.append(arm)
|
| 1848 |
|
| 1849 | self._GetWord()
|
| 1850 | if self.c_id == Id.KW_Else:
|
| 1851 | self._SetNext()
|
| 1852 | body = self.ParseBraceGroup()
|
| 1853 | if_node.else_action = body.children
|
| 1854 |
|
| 1855 | def _ParseYshIf(self, if_kw, cond):
|
| 1856 | # type: (Token, condition_t) -> command.If
|
| 1857 | """
|
| 1858 | if test -f foo {
|
| 1859 | # ^ we parsed up to here
|
| 1860 | echo foo
|
| 1861 | } elif test -f bar; test -f spam {
|
| 1862 | echo bar
|
| 1863 | } else {
|
| 1864 | echo none
|
| 1865 | }
|
| 1866 | NOTE: If you do something like if test -n foo{, the parser keeps going, and
|
| 1867 | the error is confusing because it doesn't point to the right place.
|
| 1868 |
|
| 1869 | I think we might need strict_brace so that foo{ is disallowed. It has to
|
| 1870 | be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
|
| 1871 | form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
|
| 1872 | Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
|
| 1873 | """
|
| 1874 | if_node = command.If.CreateNull(alloc_lists=True)
|
| 1875 | if_node.if_kw = if_kw
|
| 1876 |
|
| 1877 | body1 = self.ParseBraceGroup()
|
| 1878 | # Every arm has 1 spid, unlike shell-style
|
| 1879 | # TODO: We could get the spids from the brace group.
|
| 1880 | arm = IfArm(if_kw, cond, None, body1.children, None)
|
| 1881 |
|
| 1882 | if_node.arms.append(arm)
|
| 1883 |
|
| 1884 | self._GetWord()
|
| 1885 | if self.c_id in (Id.KW_Elif, Id.KW_Else):
|
| 1886 | self._ParseYshElifElse(if_node)
|
| 1887 | # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
|
| 1888 | # spid because that's in the BraceGroup.
|
| 1889 | return if_node
|
| 1890 |
|
| 1891 | def _ParseElifElse(self, if_node):
|
| 1892 | # type: (command.If) -> None
|
| 1893 | """
|
| 1894 | else_part: (Elif command_list Then command_list)* Else command_list ;
|
| 1895 | """
|
| 1896 | arms = if_node.arms
|
| 1897 |
|
| 1898 | self._GetWord()
|
| 1899 | while self.c_id == Id.KW_Elif:
|
| 1900 | elif_kw = word_.AsKeywordToken(self.cur_word)
|
| 1901 | self._SetNext() # past 'elif'
|
| 1902 |
|
| 1903 | cond = self._ParseConditionList()
|
| 1904 |
|
| 1905 | ate = self._Eat(Id.KW_Then)
|
| 1906 | then_kw = word_.AsKeywordToken(ate)
|
| 1907 |
|
| 1908 | body = self._ParseCommandList()
|
| 1909 | arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
|
| 1910 |
|
| 1911 | arms.append(arm)
|
| 1912 |
|
| 1913 | self._GetWord()
|
| 1914 | if self.c_id == Id.KW_Else:
|
| 1915 | else_kw = word_.AsKeywordToken(self.cur_word)
|
| 1916 | self._SetNext() # past 'else'
|
| 1917 | body = self._ParseCommandList()
|
| 1918 | if_node.else_action = body.children
|
| 1919 | else:
|
| 1920 | else_kw = None
|
| 1921 |
|
| 1922 | if_node.else_kw = else_kw
|
| 1923 |
|
| 1924 | def ParseIf(self):
|
| 1925 | # type: () -> command.If
|
| 1926 | """
|
| 1927 | if_clause : If command_list Then command_list else_part? Fi ;
|
| 1928 |
|
| 1929 | open : '{' | Then
|
| 1930 | close : '}' | Fi
|
| 1931 |
|
| 1932 | ysh_if : If ( command_list | '(' expr ')' )
|
| 1933 | open command_list else_part? close;
|
| 1934 |
|
| 1935 | There are 2 conditionals here: parse_paren, then parse_brace
|
| 1936 | """
|
| 1937 | if_node = command.If.CreateNull(alloc_lists=True)
|
| 1938 | if_kw = word_.AsKeywordToken(self.cur_word)
|
| 1939 | if_node.if_kw = if_kw
|
| 1940 | self._SetNext() # past 'if'
|
| 1941 |
|
| 1942 | if (self.parse_opts.parse_paren() and
|
| 1943 | self.w_parser.LookPastSpace() == Id.Op_LParen):
|
| 1944 | # if (x + 1)
|
| 1945 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1946 | cond = condition.YshExpr(enode) # type: condition_t
|
| 1947 | else:
|
| 1948 | # if echo 1; echo 2; then
|
| 1949 | # Remove ambiguity with if cd / {
|
| 1950 | cond = self._ParseConditionList()
|
| 1951 |
|
| 1952 | self._GetWord()
|
| 1953 | if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 1954 | return self._ParseYshIf(if_kw, cond)
|
| 1955 |
|
| 1956 | ate = self._Eat(Id.KW_Then)
|
| 1957 | then_kw = word_.AsKeywordToken(ate)
|
| 1958 |
|
| 1959 | body = self._ParseCommandList()
|
| 1960 |
|
| 1961 | # First arm
|
| 1962 | arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
|
| 1963 | if_node.arms.append(arm)
|
| 1964 |
|
| 1965 | # 2nd to Nth arm
|
| 1966 | if self.c_id in (Id.KW_Elif, Id.KW_Else):
|
| 1967 | self._ParseElifElse(if_node)
|
| 1968 |
|
| 1969 | ate = self._Eat(Id.KW_Fi)
|
| 1970 | if_node.fi_kw = word_.AsKeywordToken(ate)
|
| 1971 |
|
| 1972 | return if_node
|
| 1973 |
|
| 1974 | def ParseTime(self):
|
| 1975 | # type: () -> command_t
|
| 1976 | """Time [-p] pipeline.
|
| 1977 |
|
| 1978 | According to bash help.
|
| 1979 | """
|
| 1980 | time_kw = word_.AsKeywordToken(self.cur_word)
|
| 1981 | self._SetNext() # skip time
|
| 1982 | pipeline = self.ParsePipeline()
|
| 1983 | return command.TimeBlock(time_kw, pipeline)
|
| 1984 |
|
| 1985 | def ParseCompoundCommand(self):
|
| 1986 | # type: () -> command_t
|
| 1987 | """
|
| 1988 | Refactoring: we put io_redirect* here instead of in function_body and
|
| 1989 | command.
|
| 1990 |
|
| 1991 | compound_command : brace_group io_redirect*
|
| 1992 | | subshell io_redirect*
|
| 1993 | | for_clause io_redirect*
|
| 1994 | | while_clause io_redirect*
|
| 1995 | | until_clause io_redirect*
|
| 1996 | | if_clause io_redirect*
|
| 1997 | | case_clause io_redirect*
|
| 1998 |
|
| 1999 | # bash extensions
|
| 2000 | | time_clause
|
| 2001 | | [[ BoolExpr ]]
|
| 2002 | | (( ArithExpr ))
|
| 2003 | """
|
| 2004 | self._GetWord()
|
| 2005 | if self.c_id == Id.Lit_LBrace:
|
| 2006 | n1 = self.ParseBraceGroup()
|
| 2007 | return self._MaybeParseRedirectList(n1)
|
| 2008 | if self.c_id == Id.Op_LParen:
|
| 2009 | n2 = self.ParseSubshell()
|
| 2010 | return self._MaybeParseRedirectList(n2)
|
| 2011 |
|
| 2012 | if self.c_id == Id.KW_For:
|
| 2013 | # Note: Redirects parsed in this call. POSIX for and bash for ((
|
| 2014 | # have different nodetypes.
|
| 2015 | return self.ParseFor()
|
| 2016 | if self.c_id in (Id.KW_While, Id.KW_Until):
|
| 2017 | keyword = word_.AsKeywordToken(self.cur_word)
|
| 2018 | n3 = self.ParseWhileUntil(keyword)
|
| 2019 | return self._MaybeParseRedirectList(n3)
|
| 2020 |
|
| 2021 | if self.c_id == Id.KW_If:
|
| 2022 | n4 = self.ParseIf()
|
| 2023 | return self._MaybeParseRedirectList(n4)
|
| 2024 |
|
| 2025 | if self.c_id == Id.KW_Case:
|
| 2026 | n5 = self.ParseCase()
|
| 2027 | return self._MaybeParseRedirectList(n5)
|
| 2028 |
|
| 2029 | if self.c_id == Id.KW_DLeftBracket:
|
| 2030 | if not self.parse_opts.parse_dbracket():
|
| 2031 | p_die('Bash [[ not allowed in YSH (parse_dbracket)',
|
| 2032 | loc.Word(self.cur_word))
|
| 2033 | n6 = self.ParseDBracket()
|
| 2034 | return self._MaybeParseRedirectList(n6)
|
| 2035 | if self.c_id == Id.Op_DLeftParen:
|
| 2036 | if not self.parse_opts.parse_dparen():
|
| 2037 | p_die(
|
| 2038 | 'Bash (( not allowed in YSH (parse_dparen, see OILS-ERR-14 for wart)',
|
| 2039 | loc.Word(self.cur_word))
|
| 2040 | n7 = self.ParseDParen()
|
| 2041 | return self._MaybeParseRedirectList(n7)
|
| 2042 |
|
| 2043 | # bash extensions: no redirects
|
| 2044 | if self.c_id == Id.KW_Time:
|
| 2045 | return self.ParseTime()
|
| 2046 |
|
| 2047 | # Happens in function body, e.g. myfunc() oops
|
| 2048 | p_die(
|
| 2049 | 'Unexpected word while parsing compound command (%s)' %
|
| 2050 | Id_str(self.c_id), loc.Word(self.cur_word))
|
| 2051 | assert False # for MyPy
|
| 2052 |
|
| 2053 | def ParseFunctionDef(self):
|
| 2054 | # type: () -> command.ShFunction
|
| 2055 | """
|
| 2056 | function_header : fname '(' ')'
|
| 2057 | function_def : function_header newline_ok function_body ;
|
| 2058 |
|
| 2059 | Precondition: Looking at the function name.
|
| 2060 |
|
| 2061 | NOTE: There is an ambiguity with:
|
| 2062 |
|
| 2063 | function foo ( echo hi ) and
|
| 2064 | function foo () ( echo hi )
|
| 2065 |
|
| 2066 | Bash only accepts the latter, though it doesn't really follow a grammar.
|
| 2067 | """
|
| 2068 | word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
|
| 2069 | name = word_.ShFunctionName(word0)
|
| 2070 | if len(name) == 0: # example: foo$x is invalid
|
| 2071 | p_die('Invalid function name', loc.Word(word0))
|
| 2072 |
|
| 2073 | part0 = word0.parts[0]
|
| 2074 | # If we got a non-empty string from ShFunctionName, this should be true.
|
| 2075 | assert part0.tag() == word_part_e.Literal
|
| 2076 | blame_tok = cast(Token, part0) # for ctx_VarChecker
|
| 2077 |
|
| 2078 | self._SetNext() # move past function name
|
| 2079 |
|
| 2080 | # Must be true because of lookahead
|
| 2081 | self._GetWord()
|
| 2082 | assert self.c_id == Id.Op_LParen, self.cur_word
|
| 2083 |
|
| 2084 | self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
|
| 2085 | self._SetNext()
|
| 2086 |
|
| 2087 | self._GetWord()
|
| 2088 | if self.c_id == Id.Right_ShFunction:
|
| 2089 | # 'f ()' implies a function definition, since invoking it with no args
|
| 2090 | # would just be 'f'
|
| 2091 | self._SetNext()
|
| 2092 |
|
| 2093 | self._NewlineOk()
|
| 2094 |
|
| 2095 | func = command.ShFunction.CreateNull()
|
| 2096 | func.name = name
|
| 2097 | with ctx_VarChecker(self.var_checker, blame_tok):
|
| 2098 | func.body = self.ParseCompoundCommand()
|
| 2099 |
|
| 2100 | func.name_tok = location.LeftTokenForCompoundWord(word0)
|
| 2101 | return func
|
| 2102 | else:
|
| 2103 | p_die('Expected ) in function definition', loc.Word(self.cur_word))
|
| 2104 | return None
|
| 2105 |
|
| 2106 | def ParseKshFunctionDef(self):
|
| 2107 | # type: () -> command.ShFunction
|
| 2108 | """
|
| 2109 | ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
|
| 2110 | """
|
| 2111 | keyword_tok = word_.AsKeywordToken(self.cur_word)
|
| 2112 |
|
| 2113 | self._SetNext() # skip past 'function'
|
| 2114 | self._GetWord()
|
| 2115 |
|
| 2116 | cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
|
| 2117 | name = word_.ShFunctionName(cur_word)
|
| 2118 | if len(name) == 0: # example: foo$x is invalid
|
| 2119 | p_die('Invalid KSH-style function name', loc.Word(cur_word))
|
| 2120 |
|
| 2121 | name_word = self.cur_word
|
| 2122 | self._SetNext() # skip past 'function name
|
| 2123 |
|
| 2124 | self._GetWord()
|
| 2125 | if self.c_id == Id.Op_LParen:
|
| 2126 | self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
|
| 2127 | self._SetNext()
|
| 2128 | self._Eat(Id.Right_ShFunction)
|
| 2129 |
|
| 2130 | self._NewlineOk()
|
| 2131 |
|
| 2132 | func = command.ShFunction.CreateNull()
|
| 2133 | func.name = name
|
| 2134 | with ctx_VarChecker(self.var_checker, keyword_tok):
|
| 2135 | func.body = self.ParseCompoundCommand()
|
| 2136 |
|
| 2137 | func.keyword = keyword_tok
|
| 2138 | func.name_tok = location.LeftTokenForWord(name_word)
|
| 2139 | return func
|
| 2140 |
|
| 2141 | def ParseYshProc(self):
|
| 2142 | # type: () -> Proc
|
| 2143 | node = Proc.CreateNull(alloc_lists=True)
|
| 2144 |
|
| 2145 | keyword_tok = word_.AsKeywordToken(self.cur_word)
|
| 2146 | node.keyword = keyword_tok
|
| 2147 |
|
| 2148 | with ctx_VarChecker(self.var_checker, keyword_tok):
|
| 2149 | with ctx_CmdMode(self, cmd_mode_e.Proc):
|
| 2150 | self.w_parser.ParseProc(node)
|
| 2151 | if node.sig.tag() == proc_sig_e.Closed: # Register params
|
| 2152 | sig = cast(proc_sig.Closed, node.sig)
|
| 2153 |
|
| 2154 | # Treat 3 kinds of params as variables.
|
| 2155 | wp = sig.word
|
| 2156 | if wp:
|
| 2157 | for param in wp.params:
|
| 2158 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2159 | param.blame_tok)
|
| 2160 | if wp.rest_of:
|
| 2161 | r = wp.rest_of
|
| 2162 | self.var_checker.Check(Id.KW_Var, r.name,
|
| 2163 | r.blame_tok)
|
| 2164 | # We COULD register __out here but it would require a different API.
|
| 2165 | #if param.prefix and param.prefix.id == Id.Arith_Colon:
|
| 2166 | # self.var_checker.Check(Id.KW_Var, '__' + param.name)
|
| 2167 |
|
| 2168 | posit = sig.positional
|
| 2169 | if posit:
|
| 2170 | for param in posit.params:
|
| 2171 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2172 | param.blame_tok)
|
| 2173 | if posit.rest_of:
|
| 2174 | r = posit.rest_of
|
| 2175 | self.var_checker.Check(Id.KW_Var, r.name,
|
| 2176 | r.blame_tok)
|
| 2177 |
|
| 2178 | named = sig.named
|
| 2179 | if named:
|
| 2180 | for param in named.params:
|
| 2181 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2182 | param.blame_tok)
|
| 2183 | if named.rest_of:
|
| 2184 | r = named.rest_of
|
| 2185 | self.var_checker.Check(Id.KW_Var, r.name,
|
| 2186 | r.blame_tok)
|
| 2187 |
|
| 2188 | if sig.block_param:
|
| 2189 | b = sig.block_param
|
| 2190 | self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
|
| 2191 |
|
| 2192 | self._SetNext()
|
| 2193 | node.body = self.ParseBraceGroup()
|
| 2194 | # No redirects for YSH procs (only at call site)
|
| 2195 |
|
| 2196 | return node
|
| 2197 |
|
| 2198 | def ParseYshFunc(self):
|
| 2199 | # type: () -> Func
|
| 2200 | """
|
| 2201 | ysh_func: (
|
| 2202 | Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
|
| 2203 | )
|
| 2204 | Looking at KW_Func
|
| 2205 | """
|
| 2206 | node = Func.CreateNull(alloc_lists=True)
|
| 2207 |
|
| 2208 | keyword_tok = word_.AsKeywordToken(self.cur_word)
|
| 2209 | node.keyword = keyword_tok
|
| 2210 |
|
| 2211 | with ctx_VarChecker(self.var_checker, keyword_tok):
|
| 2212 | self.w_parser.ParseFunc(node)
|
| 2213 |
|
| 2214 | posit = node.positional
|
| 2215 | if posit:
|
| 2216 | for param in posit.params:
|
| 2217 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2218 | param.blame_tok)
|
| 2219 | if posit.rest_of:
|
| 2220 | r = posit.rest_of
|
| 2221 | self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
|
| 2222 |
|
| 2223 | named = node.named
|
| 2224 | if named:
|
| 2225 | for param in named.params:
|
| 2226 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2227 | param.blame_tok)
|
| 2228 | if named.rest_of:
|
| 2229 | r = named.rest_of
|
| 2230 | self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
|
| 2231 |
|
| 2232 | self._SetNext()
|
| 2233 | with ctx_CmdMode(self, cmd_mode_e.Func):
|
| 2234 | node.body = self.ParseBraceGroup()
|
| 2235 |
|
| 2236 | return node
|
| 2237 |
|
| 2238 | def ParseCoproc(self):
|
| 2239 | # type: () -> command_t
|
| 2240 | """
|
| 2241 | TODO: command.Coproc?
|
| 2242 | """
|
| 2243 | raise NotImplementedError()
|
| 2244 |
|
| 2245 | def ParseSubshell(self):
|
| 2246 | # type: () -> command.Subshell
|
| 2247 | """
|
| 2248 | subshell : '(' compound_list ')'
|
| 2249 |
|
| 2250 | Looking at Op_LParen
|
| 2251 | """
|
| 2252 | left = word_.AsOperatorToken(self.cur_word)
|
| 2253 | self._SetNext() # skip past (
|
| 2254 |
|
| 2255 | # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
|
| 2256 | # translation stack, we want to delay it.
|
| 2257 |
|
| 2258 | self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
|
| 2259 |
|
| 2260 | c_list = self._ParseCommandList()
|
| 2261 | if len(c_list.children) == 1:
|
| 2262 | child = c_list.children[0]
|
| 2263 | else:
|
| 2264 | child = c_list
|
| 2265 |
|
| 2266 | ate = self._Eat(Id.Right_Subshell)
|
| 2267 | right = word_.AsOperatorToken(ate)
|
| 2268 |
|
| 2269 | return command.Subshell(left, child, right)
|
| 2270 |
|
| 2271 | def ParseDBracket(self):
|
| 2272 | # type: () -> command.DBracket
|
| 2273 | """Pass the underlying word parser off to the boolean expression
|
| 2274 | parser."""
|
| 2275 | left = word_.AsKeywordToken(self.cur_word)
|
| 2276 | # TODO: Test interactive. Without closing ]], you should get > prompt
|
| 2277 | # (PS2)
|
| 2278 |
|
| 2279 | self._SetNext() # skip [[
|
| 2280 | b_parser = bool_parse.BoolParser(self.w_parser)
|
| 2281 | bnode, right = b_parser.Parse() # May raise
|
| 2282 | return command.DBracket(left, bnode, right)
|
| 2283 |
|
| 2284 | def ParseDParen(self):
|
| 2285 | # type: () -> command.DParen
|
| 2286 | left = word_.AsOperatorToken(self.cur_word)
|
| 2287 |
|
| 2288 | self._SetNext() # skip ((
|
| 2289 | anode, right = self.w_parser.ReadDParen()
|
| 2290 | assert anode is not None
|
| 2291 |
|
| 2292 | return command.DParen(left, anode, right)
|
| 2293 |
|
| 2294 | def ParseCommand(self):
|
| 2295 | # type: () -> command_t
|
| 2296 | """
|
| 2297 | command : simple_command
|
| 2298 | | compound_command # OSH edit: io_redirect* folded in
|
| 2299 | | function_def
|
| 2300 | | ksh_function_def
|
| 2301 |
|
| 2302 | # YSH extensions
|
| 2303 | | proc NAME ...
|
| 2304 | | typed proc NAME ...
|
| 2305 | | func NAME ...
|
| 2306 | | const ...
|
| 2307 | | var ...
|
| 2308 | | setglobal ...
|
| 2309 | | setref ...
|
| 2310 | | setvar ...
|
| 2311 | | call EXPR
|
| 2312 | | = EXPR
|
| 2313 | ;
|
| 2314 |
|
| 2315 | Note: the reason const / var are not part of compound_command is because
|
| 2316 | they can't be alone in a shell function body.
|
| 2317 |
|
| 2318 | Example:
|
| 2319 | This is valid shell f() if true; then echo hi; fi
|
| 2320 | This is invalid f() var x = 1
|
| 2321 | """
|
| 2322 | if self._AtSecondaryKeyword():
|
| 2323 | p_die('Unexpected word when parsing command',
|
| 2324 | loc.Word(self.cur_word))
|
| 2325 |
|
| 2326 | # YSH Extensions
|
| 2327 |
|
| 2328 | if self.c_id == Id.KW_Proc: # proc p { ... }
|
| 2329 | # proc is hidden because of the 'local reasoning' principle. Code
|
| 2330 | # inside procs should be YSH, full stop. That means ysh:upgrade is
|
| 2331 | # on.
|
| 2332 | if self.parse_opts.parse_proc():
|
| 2333 | return self.ParseYshProc()
|
| 2334 | else:
|
| 2335 | # 2024-02: This avoids bad syntax errors if you type YSH code
|
| 2336 | # into OSH
|
| 2337 | # proc p (x) { echo hi } would actually be parsed as a
|
| 2338 | # command.Simple! Shell compatibility: quote 'proc'
|
| 2339 | p_die("proc is a YSH keyword, but this is OSH.",
|
| 2340 | loc.Word(self.cur_word))
|
| 2341 |
|
| 2342 | if self.c_id == Id.KW_Typed: # typed proc p () { ... }
|
| 2343 | self._SetNext()
|
| 2344 | self._GetWord()
|
| 2345 | if self.c_id != Id.KW_Proc:
|
| 2346 | p_die("Expected 'proc' after 'typed'", loc.Word(self.cur_word))
|
| 2347 |
|
| 2348 | if self.parse_opts.parse_proc():
|
| 2349 | return self.ParseYshProc()
|
| 2350 | else:
|
| 2351 | p_die("typed is a YSH keyword, but this is OSH.",
|
| 2352 | loc.Word(self.cur_word))
|
| 2353 |
|
| 2354 | if self.c_id == Id.KW_Func: # func f(x) { ... }
|
| 2355 | if self.parse_opts.parse_func():
|
| 2356 | return self.ParseYshFunc()
|
| 2357 | else:
|
| 2358 | # Same reasoning as above, for 'proc'
|
| 2359 | p_die("func is a YSH keyword, but this is OSH.",
|
| 2360 | loc.Word(self.cur_word))
|
| 2361 |
|
| 2362 | if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
|
| 2363 | p_die("const can't be inside proc or func. Use var instead.",
|
| 2364 | loc.Word(self.cur_word))
|
| 2365 |
|
| 2366 | if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
|
| 2367 | keyword_id = self.c_id
|
| 2368 | kw_token = word_.LiteralToken(self.cur_word)
|
| 2369 | self._SetNext()
|
| 2370 | n8 = self.w_parser.ParseVarDecl(kw_token)
|
| 2371 | for lhs in n8.lhs:
|
| 2372 | self.var_checker.Check(keyword_id, lhs.name, lhs.left)
|
| 2373 | return n8
|
| 2374 |
|
| 2375 | if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
|
| 2376 | kw_token = word_.LiteralToken(self.cur_word)
|
| 2377 | self._SetNext()
|
| 2378 | n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
|
| 2379 | return n9
|
| 2380 |
|
| 2381 | if self.c_id in (Id.KW_Call, Id.Lit_Equals):
|
| 2382 | # = 42 + a[i]
|
| 2383 | # call mylist->append('x')
|
| 2384 |
|
| 2385 | keyword = word_.LiteralToken(self.cur_word)
|
| 2386 | assert keyword is not None
|
| 2387 | self._SetNext()
|
| 2388 | enode = self.w_parser.ParseCommandExpr()
|
| 2389 | return command.Expr(keyword, enode)
|
| 2390 |
|
| 2391 | if self.c_id == Id.KW_Function:
|
| 2392 | return self.ParseKshFunctionDef()
|
| 2393 |
|
| 2394 | if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
|
| 2395 | Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
|
| 2396 | Id.KW_If, Id.KW_Case, Id.KW_Time):
|
| 2397 | return self.ParseCompoundCommand()
|
| 2398 |
|
| 2399 | # Syntax error for '}' starting a line, which all shells disallow.
|
| 2400 | if self.c_id == Id.Lit_RBrace:
|
| 2401 | p_die('Unexpected right brace', loc.Word(self.cur_word))
|
| 2402 |
|
| 2403 | if self.c_kind == Kind.Redir: # Leading redirect
|
| 2404 | return self.ParseSimpleCommand()
|
| 2405 |
|
| 2406 | if self.c_kind == Kind.Word:
|
| 2407 | # ensured by Kind.Word
|
| 2408 | cur_word = cast(CompoundWord, self.cur_word)
|
| 2409 |
|
| 2410 | # NOTE: At the top level, only Token and Compound are possible.
|
| 2411 | # Can this be modelled better in the type system, removing asserts?
|
| 2412 | #
|
| 2413 | # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
|
| 2414 | # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
|
| 2415 | # That requires 2 tokens of lookahead, which we don't have
|
| 2416 | #
|
| 2417 | # Or maybe we don't just have ParseSimpleCommand -- we will have
|
| 2418 | # ParseYshCommand or something
|
| 2419 |
|
| 2420 | if (self.w_parser.LookAheadFuncParens() and
|
| 2421 | not word_.IsVarLike(cur_word)):
|
| 2422 | return self.ParseFunctionDef() # f() { echo; } # function
|
| 2423 |
|
| 2424 | # Parse x = 1+2*3 when inside HayNode { } blocks
|
| 2425 | parts = cur_word.parts
|
| 2426 | if self.parse_opts.parse_equals() and len(parts) == 1:
|
| 2427 | part0 = parts[0]
|
| 2428 | if part0.tag() == word_part_e.Literal:
|
| 2429 | tok = cast(Token, part0)
|
| 2430 | if (match.IsValidVarName(lexer.LazyStr(tok)) and
|
| 2431 | self.w_parser.LookPastSpace() == Id.Lit_Equals):
|
| 2432 | assert tok.id == Id.Lit_Chars, tok
|
| 2433 |
|
| 2434 | if (len(self.hay_attrs_stack) and
|
| 2435 | self.hay_attrs_stack[-1]):
|
| 2436 | # Note: no static var_checker.Check() for bare assignment
|
| 2437 | enode = self.w_parser.ParseBareDecl()
|
| 2438 | self._SetNext() # Somehow this is necessary
|
| 2439 | # TODO: Use BareDecl here. Well, do that when we
|
| 2440 | # treat it as const or lazy.
|
| 2441 | return command.VarDecl(
|
| 2442 | None,
|
| 2443 | [NameType(tok, lexer.TokenVal(tok), None)],
|
| 2444 | enode)
|
| 2445 | else:
|
| 2446 | self._SetNext()
|
| 2447 | self._GetWord()
|
| 2448 | p_die(
|
| 2449 | 'Unexpected = (Hint: use var/setvar, or quote it)',
|
| 2450 | loc.Word(self.cur_word))
|
| 2451 |
|
| 2452 | # echo foo
|
| 2453 | # f=(a b c) # array
|
| 2454 | # array[1+2]+=1
|
| 2455 | return self.ParseSimpleCommand()
|
| 2456 |
|
| 2457 | if self.c_kind == Kind.Eof:
|
| 2458 | p_die("Unexpected EOF while parsing command",
|
| 2459 | loc.Word(self.cur_word))
|
| 2460 |
|
| 2461 | # NOTE: This only happens in batch mode in the second turn of the loop!
|
| 2462 | # e.g. )
|
| 2463 | p_die("Invalid word while parsing command", loc.Word(self.cur_word))
|
| 2464 |
|
| 2465 | assert False # for MyPy
|
| 2466 |
|
| 2467 | def ParsePipeline(self):
|
| 2468 | # type: () -> command_t
|
| 2469 | """
|
| 2470 | pipeline : Bang? command ( '|' newline_ok command )* ;
|
| 2471 | """
|
| 2472 | negated = None # type: Optional[Token]
|
| 2473 |
|
| 2474 | self._GetWord()
|
| 2475 | if self.c_id == Id.KW_Bang:
|
| 2476 | negated = word_.AsKeywordToken(self.cur_word)
|
| 2477 | self._SetNext()
|
| 2478 |
|
| 2479 | child = self.ParseCommand()
|
| 2480 | assert child is not None
|
| 2481 |
|
| 2482 | children = [child]
|
| 2483 |
|
| 2484 | self._GetWord()
|
| 2485 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
| 2486 | if negated is not None:
|
| 2487 | node = command.Pipeline(negated, children, [])
|
| 2488 | return node
|
| 2489 | else:
|
| 2490 | return child # no pipeline
|
| 2491 |
|
| 2492 | # | or |&
|
| 2493 | ops = [] # type: List[Token]
|
| 2494 | while True:
|
| 2495 | op = word_.AsOperatorToken(self.cur_word)
|
| 2496 | ops.append(op)
|
| 2497 |
|
| 2498 | self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
|
| 2499 | self._NewlineOk()
|
| 2500 |
|
| 2501 | child = self.ParseCommand()
|
| 2502 | children.append(child)
|
| 2503 |
|
| 2504 | self._GetWord()
|
| 2505 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
| 2506 | break
|
| 2507 |
|
| 2508 | return command.Pipeline(negated, children, ops)
|
| 2509 |
|
| 2510 | def ParseAndOr(self):
|
| 2511 | # type: () -> command_t
|
| 2512 | self._GetWord()
|
| 2513 | if self.c_id == Id.Lit_TDot:
|
| 2514 | # We got '...', so parse in multiline mode
|
| 2515 | self._SetNext()
|
| 2516 | with word_.ctx_Multiline(self.w_parser):
|
| 2517 | return self._ParseAndOr()
|
| 2518 |
|
| 2519 | # Parse in normal mode, not multiline
|
| 2520 | return self._ParseAndOr()
|
| 2521 |
|
| 2522 | def _ParseAndOr(self):
|
| 2523 | # type: () -> command_t
|
| 2524 | """
|
| 2525 | and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
|
| 2526 | | pipeline
|
| 2527 |
|
| 2528 | Note that it is left recursive and left associative. We parse it
|
| 2529 | iteratively with a token of lookahead.
|
| 2530 | """
|
| 2531 | child = self.ParsePipeline()
|
| 2532 | assert child is not None
|
| 2533 |
|
| 2534 | self._GetWord()
|
| 2535 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
| 2536 | return child
|
| 2537 |
|
| 2538 | ops = [] # type: List[Token]
|
| 2539 | children = [child]
|
| 2540 |
|
| 2541 | while True:
|
| 2542 | ops.append(word_.AsOperatorToken(self.cur_word))
|
| 2543 |
|
| 2544 | self._SetNext() # skip past || &&
|
| 2545 | self._NewlineOk()
|
| 2546 |
|
| 2547 | child = self.ParsePipeline()
|
| 2548 | children.append(child)
|
| 2549 |
|
| 2550 | self._GetWord()
|
| 2551 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
| 2552 | break
|
| 2553 |
|
| 2554 | return command.AndOr(children, ops)
|
| 2555 |
|
| 2556 | # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
|
| 2557 |
|
| 2558 | # At the top level, we execute after every line, e.g. to
|
| 2559 | # - process alias (a form of dynamic parsing)
|
| 2560 | # - process 'exit', because invalid syntax might appear after it
|
| 2561 |
|
| 2562 | # On the other hand, for a while loop body, we parse the whole thing at once,
|
| 2563 | # and then execute it. We don't want to parse it over and over again!
|
| 2564 |
|
| 2565 | # COMPARE
|
| 2566 | # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
|
| 2567 | # command_term : and_or (trailer and_or)* ; # CHILDREN
|
| 2568 |
|
| 2569 | def _ParseCommandLine(self):
|
| 2570 | # type: () -> command_t
|
| 2571 | """
|
| 2572 | command_line : and_or (sync_op and_or)* trailer? ;
|
| 2573 | trailer : sync_op newline_ok
|
| 2574 | | NEWLINES;
|
| 2575 | sync_op : '&' | ';';
|
| 2576 |
|
| 2577 | NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
|
| 2578 | there is another command word after the sync op.
|
| 2579 |
|
| 2580 | But it's easier to express imperatively. Do the following in a loop:
|
| 2581 | 1. ParseAndOr
|
| 2582 | 2. Peek.
|
| 2583 | a. If there's a newline, then return. (We're only parsing a single
|
| 2584 | line.)
|
| 2585 | b. If there's a sync_op, process it. Then look for a newline and
|
| 2586 | return. Otherwise, parse another AndOr.
|
| 2587 | """
|
| 2588 | # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
|
| 2589 | # I don't think we should add anything else here; otherwise it will be
|
| 2590 | # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
|
| 2591 | END_LIST = [Id.Op_Newline, Id.Eof_Real]
|
| 2592 |
|
| 2593 | children = [] # type: List[command_t]
|
| 2594 | done = False
|
| 2595 | while not done:
|
| 2596 | child = self.ParseAndOr()
|
| 2597 |
|
| 2598 | self._GetWord()
|
| 2599 | if self.c_id in (Id.Op_Semi, Id.Op_Amp):
|
| 2600 | tok = cast(Token, self.cur_word) # for MyPy
|
| 2601 | child = command.Sentence(child, tok)
|
| 2602 | self._SetNext()
|
| 2603 |
|
| 2604 | self._GetWord()
|
| 2605 | if self.c_id in END_LIST:
|
| 2606 | done = True
|
| 2607 |
|
| 2608 | elif self.c_id in END_LIST:
|
| 2609 | done = True
|
| 2610 |
|
| 2611 | else:
|
| 2612 | # e.g. echo a(b)
|
| 2613 | p_die(
|
| 2614 | 'Invalid word while parsing command line (%s)' %
|
| 2615 | Id_str(self.c_id), loc.Word(self.cur_word))
|
| 2616 |
|
| 2617 | children.append(child)
|
| 2618 |
|
| 2619 | # Simplify the AST.
|
| 2620 | if len(children) > 1:
|
| 2621 | return command.CommandList(children)
|
| 2622 | else:
|
| 2623 | return children[0]
|
| 2624 |
|
| 2625 | def _ParseCommandTerm(self):
|
| 2626 | # type: () -> command.CommandList
|
| 2627 | """"
|
| 2628 | command_term : and_or (trailer and_or)* ;
|
| 2629 | trailer : sync_op newline_ok
|
| 2630 | | NEWLINES;
|
| 2631 | sync_op : '&' | ';';
|
| 2632 |
|
| 2633 | This is handled in imperative style, like _ParseCommandLine.
|
| 2634 | Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
|
| 2635 | which is slightly different. (HOW? Is it the DSEMI?)
|
| 2636 |
|
| 2637 | Returns:
|
| 2638 | syntax_asdl.command
|
| 2639 | """
|
| 2640 | # Token types that will end the command term.
|
| 2641 | END_LIST = [
|
| 2642 | self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
|
| 2643 | Id.Op_SemiAmp, Id.Op_DSemiAmp
|
| 2644 | ]
|
| 2645 |
|
| 2646 | # NOTE: This is similar to _ParseCommandLine.
|
| 2647 | #
|
| 2648 | # - Why aren't we doing END_LIST in _ParseCommandLine?
|
| 2649 | # - Because you will never be inside $() at the top level.
|
| 2650 | # - We also know it will end in a newline. It can't end in "fi"!
|
| 2651 | # - example: if true; then { echo hi; } fi
|
| 2652 |
|
| 2653 | children = [] # type: List[command_t]
|
| 2654 | done = False
|
| 2655 | while not done:
|
| 2656 | # Most keywords are valid "first words". But do/done/then do not BEGIN
|
| 2657 | # commands, so they are not valid.
|
| 2658 | if self._AtSecondaryKeyword():
|
| 2659 | break
|
| 2660 |
|
| 2661 | child = self.ParseAndOr()
|
| 2662 |
|
| 2663 | self._GetWord()
|
| 2664 | if self.c_id == Id.Op_Newline:
|
| 2665 | self._SetNext()
|
| 2666 |
|
| 2667 | self._GetWord()
|
| 2668 | if self.c_id in END_LIST:
|
| 2669 | done = True
|
| 2670 |
|
| 2671 | elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
|
| 2672 | tok = cast(Token, self.cur_word) # for MyPy
|
| 2673 | child = command.Sentence(child, tok)
|
| 2674 | self._SetNext()
|
| 2675 |
|
| 2676 | self._GetWord()
|
| 2677 | if self.c_id == Id.Op_Newline:
|
| 2678 | self._SetNext() # skip over newline
|
| 2679 |
|
| 2680 | # Test if we should keep going. There might be another command after
|
| 2681 | # the semi and newline.
|
| 2682 | self._GetWord()
|
| 2683 | if self.c_id in END_LIST: # \n EOF
|
| 2684 | done = True
|
| 2685 |
|
| 2686 | elif self.c_id in END_LIST: # ; EOF
|
| 2687 | done = True
|
| 2688 |
|
| 2689 | elif self.c_id in END_LIST: # EOF
|
| 2690 | done = True
|
| 2691 |
|
| 2692 | # For if test -f foo; test -f bar {
|
| 2693 | elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 2694 | done = True
|
| 2695 |
|
| 2696 | elif self.c_kind != Kind.Word:
|
| 2697 | # e.g. f() { echo (( x )) ; }
|
| 2698 | # but can't fail on 'fi fi', see osh/cmd_parse_test.py
|
| 2699 |
|
| 2700 | #log("Invalid %s", self.cur_word)
|
| 2701 | p_die("Invalid word while parsing command list",
|
| 2702 | loc.Word(self.cur_word))
|
| 2703 |
|
| 2704 | children.append(child)
|
| 2705 |
|
| 2706 | return command.CommandList(children)
|
| 2707 |
|
| 2708 | def _ParseCommandList(self):
|
| 2709 | # type: () -> command.CommandList
|
| 2710 | """
|
| 2711 | command_list : newline_ok command_term trailer? ;
|
| 2712 |
|
| 2713 | This one is called by all the compound commands. It's basically a command
|
| 2714 | block.
|
| 2715 |
|
| 2716 | NOTE: Rather than translating the CFG directly, the code follows a style
|
| 2717 | more like this: more like this: (and_or trailer)+. It makes capture
|
| 2718 | easier.
|
| 2719 | """
|
| 2720 | self._NewlineOk()
|
| 2721 | return self._ParseCommandTerm()
|
| 2722 |
|
| 2723 | def ParseLogicalLine(self):
|
| 2724 | # type: () -> command_t
|
| 2725 | """Parse a single line for main_loop.
|
| 2726 |
|
| 2727 | A wrapper around _ParseCommandLine(). Similar but not identical to
|
| 2728 | _ParseCommandList() and ParseCommandSub().
|
| 2729 |
|
| 2730 | Raises:
|
| 2731 | ParseError
|
| 2732 | """
|
| 2733 | self._NewlineOk()
|
| 2734 | self._GetWord()
|
| 2735 | if self.c_id == Id.Eof_Real:
|
| 2736 | return None # main loop checks for here docs
|
| 2737 | node = self._ParseCommandLine()
|
| 2738 | return node
|
| 2739 |
|
| 2740 | def ParseInteractiveLine(self):
|
| 2741 | # type: () -> parse_result_t
|
| 2742 | """Parse a single line for Interactive main_loop.
|
| 2743 |
|
| 2744 | Different from ParseLogicalLine because newlines are handled differently.
|
| 2745 |
|
| 2746 | Raises:
|
| 2747 | ParseError
|
| 2748 | """
|
| 2749 | self._GetWord()
|
| 2750 | if self.c_id == Id.Op_Newline:
|
| 2751 | return parse_result.EmptyLine
|
| 2752 | if self.c_id == Id.Eof_Real:
|
| 2753 | return parse_result.Eof
|
| 2754 |
|
| 2755 | node = self._ParseCommandLine()
|
| 2756 | return parse_result.Node(node)
|
| 2757 |
|
| 2758 | def ParseCommandSub(self):
|
| 2759 | # type: () -> command_t
|
| 2760 | """Parse $(echo hi) and `echo hi` for word_parse.py.
|
| 2761 |
|
| 2762 | They can have multiple lines, like this: echo $( echo one echo
|
| 2763 | two )
|
| 2764 | """
|
| 2765 | self._NewlineOk()
|
| 2766 |
|
| 2767 | self._GetWord()
|
| 2768 | if self.c_kind == Kind.Eof: # e.g. $()
|
| 2769 | return command.NoOp
|
| 2770 |
|
| 2771 | c_list = self._ParseCommandTerm()
|
| 2772 | if len(c_list.children) == 1:
|
| 2773 | return c_list.children[0]
|
| 2774 | else:
|
| 2775 | return c_list
|
| 2776 |
|
| 2777 | def CheckForPendingHereDocs(self):
|
| 2778 | # type: () -> None
|
| 2779 | # NOTE: This happens when there is no newline at the end of a file, like
|
| 2780 | # osh -c 'cat <<EOF'
|
| 2781 | if len(self.pending_here_docs):
|
| 2782 | node = self.pending_here_docs[0] # Just show the first one?
|
| 2783 | h = cast(redir_param.HereDoc, node.arg)
|
| 2784 | p_die('Unterminated here doc began here', loc.Word(h.here_begin))
|
| 2785 |
|
| 2786 |
|
| 2787 | # vim: sw=4
|