| 1 | #!/usr/bin/env python
 | 
| 2 | # Copyright 2016 Andy Chu. All rights reserved.
 | 
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 | 
| 4 | # you may not use this file except in compliance with the License.
 | 
| 5 | # You may obtain a copy of the License at
 | 
| 6 | #
 | 
| 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 | 
| 8 | from __future__ import print_function
 | 
| 9 | """
 | 
| 10 | cmd_parse.py - Parse high level shell commands.
 | 
| 11 | """
 | 
| 12 | 
 | 
| 13 | from asdl import const
 | 
| 14 | 
 | 
| 15 | from core import braces
 | 
| 16 | from core import word
 | 
| 17 | from core import util
 | 
| 18 | 
 | 
| 19 | from osh.meta import ast, Id, Kind, types
 | 
| 20 | from osh.lex import VAR_NAME_RE
 | 
| 21 | from osh.bool_parse import BoolParser
 | 
| 22 | 
 | 
| 23 | log = util.log
 | 
| 24 | command_e = ast.command_e
 | 
| 25 | word_e = ast.word_e
 | 
| 26 | assign_op_e = ast.assign_op_e
 | 
| 27 | lex_mode_e = types.lex_mode_e
 | 
| 28 | 
 | 
| 29 | 
 | 
| 30 | class CommandParser(object):
 | 
| 31 |   """
 | 
| 32 |   Args:
 | 
| 33 |     word_parse: to get a stream of words
 | 
| 34 |     lexer: for lookahead in function def, PushHint of ()
 | 
| 35 |     line_reader: for here doc
 | 
| 36 |   """
 | 
| 37 |   def __init__(self, w_parser, lexer, line_reader, arena):
 | 
| 38 |     self.w_parser = w_parser  # for normal parsing
 | 
| 39 |     self.lexer = lexer  # for fast lookahead to (, for function defs
 | 
| 40 |     self.line_reader = line_reader  # for here docs
 | 
| 41 |     self.arena = arena
 | 
| 42 | 
 | 
| 43 |     self.Reset()
 | 
| 44 | 
 | 
| 45 |   def Reset(self):
 | 
| 46 |     self.error_stack = []
 | 
| 47 |     self.completion_stack = []
 | 
| 48 | 
 | 
| 49 |     # Cursor state set by _Peek()
 | 
| 50 |     self.next_lex_mode = lex_mode_e.OUTER
 | 
| 51 |     self.cur_word = None  # current word
 | 
| 52 |     self.c_kind = Kind.Undefined
 | 
| 53 |     self.c_id = Id.Undefined_Tok
 | 
| 54 | 
 | 
| 55 |     self.pending_here_docs = []
 | 
| 56 | 
 | 
| 57 |   def Error(self):
 | 
| 58 |     return self.error_stack
 | 
| 59 | 
 | 
| 60 |   def _BadWord(self, msg, w):
 | 
| 61 |     """Helper function for errors involving a word.
 | 
| 62 | 
 | 
| 63 |     Args:
 | 
| 64 |       msg: format string with a single %s token
 | 
| 65 |       w: Word
 | 
| 66 |     """
 | 
| 67 |     self.AddErrorContext(msg, w, word=w)
 | 
| 68 | 
 | 
| 69 |   def AddErrorContext(self, msg, *args, **kwargs):
 | 
| 70 |     err = util.ParseError(msg, *args, **kwargs)
 | 
| 71 |     self.error_stack.append(err)
 | 
| 72 | 
 | 
| 73 |   def GetCompletionState(self):
 | 
| 74 |     return self.completion_stack
 | 
| 75 | 
 | 
| 76 |   def _MaybeReadHereDocs(self):
 | 
| 77 |     for h in self.pending_here_docs:
 | 
| 78 |       lines = []
 | 
| 79 |       #log('HERE %r' % h.here_end)
 | 
| 80 |       while True:
 | 
| 81 |         # If op is <<-, strip off all leading tabs (NOT spaces).
 | 
| 82 |         # (in C++, just bump the start?)
 | 
| 83 |         line_id, line = self.line_reader.GetLine()
 | 
| 84 | 
 | 
| 85 |         #print("LINE %r %r" % (line, h.here_end))
 | 
| 86 |         if not line:  # EOF
 | 
| 87 |           # An unterminated here doc is just a warning in bash.  We make it
 | 
| 88 |           # fatal because we want to be strict, and because it causes problems
 | 
| 89 |           # reporting other errors.
 | 
| 90 |           # Attribute it to the << in <<EOF for now.
 | 
| 91 |           self.AddErrorContext('Unterminated here doc', span_id=h.spids[0])
 | 
| 92 |           return False
 | 
| 93 | 
 | 
| 94 |         # NOTE: Could do this runtime to preserve LST.
 | 
| 95 |         if h.op_id == Id.Redir_DLessDash:
 | 
| 96 |           line = line.lstrip('\t')
 | 
| 97 |         if line.rstrip() == h.here_end:
 | 
| 98 |           break
 | 
| 99 | 
 | 
| 100 |         lines.append((line_id, line))
 | 
| 101 | 
 | 
| 102 |       parts = []
 | 
| 103 |       if h.do_expansion:
 | 
| 104 |         # NOTE: We read all lines at once, instead of doing it line-by-line,
 | 
| 105 |         # because of cases like this:
 | 
| 106 |         # cat <<EOF
 | 
| 107 |         # 1 $(echo 2
 | 
| 108 |         # echo 3) 4
 | 
| 109 |         # EOF
 | 
| 110 | 
 | 
| 111 |         from osh import parse_lib  # Avoid circular import
 | 
| 112 |         w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
 | 
| 113 |         word = w_parser.ReadHereDocBody()
 | 
| 114 |         if not word:
 | 
| 115 |           self.AddErrorContext(
 | 
| 116 |               'Error reading here doc body: %s', w_parser.Error())
 | 
| 117 |           return False
 | 
| 118 |         h.body = word
 | 
| 119 |         h.was_filled = True
 | 
| 120 |       else:
 | 
| 121 |         # Each line is a single span.  TODO: Add span_id to token.
 | 
| 122 |         tokens = [
 | 
| 123 |             ast.token(Id.Lit_Chars, line, const.NO_INTEGER)
 | 
| 124 |             for _, line in lines]
 | 
| 125 |         parts = [ast.LiteralPart(t) for t in tokens]
 | 
| 126 |         h.body = ast.CompoundWord(parts)
 | 
| 127 |         h.was_filled = True
 | 
| 128 | 
 | 
| 129 |     # No .clear() until Python 3.3.
 | 
| 130 |     del self.pending_here_docs[:]
 | 
| 131 | 
 | 
| 132 |     return True
 | 
| 133 | 
 | 
| 134 |   def _Next(self, lex_mode=lex_mode_e.OUTER):
 | 
| 135 |     """Helper method."""
 | 
| 136 |     self.next_lex_mode = lex_mode
 | 
| 137 | 
 | 
| 138 |   def Peek(self):
 | 
| 139 |     """Public method for REPL."""
 | 
| 140 |     if not self._Peek():
 | 
| 141 |       return None
 | 
| 142 |     return self.cur_word
 | 
| 143 | 
 | 
| 144 |   def _Peek(self):
 | 
| 145 |     """Helper method.
 | 
| 146 | 
 | 
| 147 |     Returns True for success and False on error.  Error examples: bad command
 | 
| 148 |     sub word, or unterminated quoted string, etc.
 | 
| 149 |     """
 | 
| 150 |     if self.next_lex_mode != lex_mode_e.NONE:
 | 
| 151 |       w = self.w_parser.ReadWord(self.next_lex_mode)
 | 
| 152 |       if w is None:
 | 
| 153 |         error_stack = self.w_parser.Error()
 | 
| 154 |         self.error_stack.extend(error_stack)
 | 
| 155 |         return False
 | 
| 156 | 
 | 
| 157 |       # Here docs only happen in command mode, so other kinds of newlines don't
 | 
| 158 |       # count.
 | 
| 159 |       if w.tag == word_e.TokenWord and w.token.id == Id.Op_Newline:
 | 
| 160 |         if not self._MaybeReadHereDocs():
 | 
| 161 |           return False
 | 
| 162 | 
 | 
| 163 |       self.cur_word = w
 | 
| 164 | 
 | 
| 165 |       self.c_kind = word.CommandKind(self.cur_word)
 | 
| 166 |       self.c_id = word.CommandId(self.cur_word)
 | 
| 167 |       self.next_lex_mode = lex_mode_e.NONE
 | 
| 168 |     #print('_Peek', self.cur_word)
 | 
| 169 |     return True
 | 
| 170 | 
 | 
| 171 |   def _Eat(self, c_id):
 | 
| 172 |     """Consume a word of a type.  If it doesn't match, return False.
 | 
| 173 | 
 | 
| 174 |     Args:
 | 
| 175 |       c_id: either EKeyword.* or a token type like Id.Right_Subshell.
 | 
| 176 |       TODO: Rationalize / type check this.
 | 
| 177 |     """
 | 
| 178 |     if not self._Peek():
 | 
| 179 |       return False
 | 
| 180 |     # TODO: It would be nicer to print the word type, right now we get a number
 | 
| 181 |     if self.c_id != c_id:
 | 
| 182 |       self.AddErrorContext(
 | 
| 183 |           "Expected word type %s, got %s", c_id, self.cur_word,
 | 
| 184 |           word=self.cur_word)
 | 
| 185 |       return False
 | 
| 186 |     self._Next()
 | 
| 187 |     return True
 | 
| 188 | 
 | 
| 189 |   def _NewlineOk(self):
 | 
| 190 |     """Check for optional newline and consume it."""
 | 
| 191 |     if not self._Peek():
 | 
| 192 |       return False
 | 
| 193 |     if self.c_id == Id.Op_Newline:
 | 
| 194 |       self._Next()
 | 
| 195 |       if not self._Peek():
 | 
| 196 |         return False
 | 
| 197 |     return True
 | 
| 198 | 
 | 
| 199 |   def ParseRedirect(self):
 | 
| 200 |     """
 | 
| 201 |     Problem: You don't know which kind of redir_node to instantiate before
 | 
| 202 |     this?  You could stuff them all in one node, and then have a switch() on
 | 
| 203 |     the type.
 | 
| 204 | 
 | 
| 205 |     You need different types.
 | 
| 206 |     """
 | 
| 207 |     if not self._Peek(): return None
 | 
| 208 |     assert self.c_kind == Kind.Redir, self.cur_word
 | 
| 209 | 
 | 
| 210 |     left_spid = self.cur_word.token.span_id
 | 
| 211 | 
 | 
| 212 |     # For now only supporting single digit descriptor
 | 
| 213 |     first_char = self.cur_word.token.val[0]
 | 
| 214 |     if first_char.isdigit():
 | 
| 215 |       fd = int(first_char)
 | 
| 216 |     else:
 | 
| 217 |       fd = const.NO_INTEGER
 | 
| 218 | 
 | 
| 219 |     if self.c_id in (Id.Redir_DLess, Id.Redir_DLessDash):  # here doc
 | 
| 220 |       node = ast.HereDoc()
 | 
| 221 |       node.op_id = self.c_id
 | 
| 222 |       node.body = None  # not read yet
 | 
| 223 |       node.fd = fd
 | 
| 224 |       node.was_filled = False
 | 
| 225 |       node.spids.append(left_spid)
 | 
| 226 |       self._Next()
 | 
| 227 | 
 | 
| 228 |       if not self._Peek(): return None
 | 
| 229 |       # "If any character in word is quoted, the delimiter shall be formed by
 | 
| 230 |       # performing quote removal on word, and the here-document lines shall not
 | 
| 231 |       # be expanded. Otherwise, the delimiter shall be the word itself."
 | 
| 232 |       # NOTE: \EOF counts, or even E\OF
 | 
| 233 |       ok, node.here_end, quoted = word.StaticEval(self.cur_word)
 | 
| 234 |       if not ok:
 | 
| 235 |         self._BadWord('Error evaluating here doc delimiter: %s', self.cur_word)
 | 
| 236 |         return None
 | 
| 237 |       node.do_expansion = not quoted
 | 
| 238 |       self._Next()
 | 
| 239 | 
 | 
| 240 |       self.pending_here_docs.append(node)  # will be filled on next newline.
 | 
| 241 | 
 | 
| 242 |     else:
 | 
| 243 |       node = ast.Redir()
 | 
| 244 |       node.op_id = self.c_id
 | 
| 245 |       node.fd = fd
 | 
| 246 |       node.spids.append(left_spid)
 | 
| 247 |       self._Next()
 | 
| 248 | 
 | 
| 249 |       if not self._Peek(): return None
 | 
| 250 |       if self.c_kind != Kind.Word:
 | 
| 251 |         self.AddErrorContext(
 | 
| 252 |             'Expected word after redirect operator', word=self.cur_word)
 | 
| 253 |         return None
 | 
| 254 | 
 | 
| 255 |       new_word = word.TildeDetect(self.cur_word)
 | 
| 256 |       node.arg_word = new_word or self.cur_word
 | 
| 257 |       self._Next()
 | 
| 258 | 
 | 
| 259 |     return node
 | 
| 260 | 
 | 
| 261 |   def _ParseRedirectList(self):
 | 
| 262 |     """Try parsing any redirects at the cursor.
 | 
| 263 | 
 | 
| 264 |     This is used for blocks only, not commands.
 | 
| 265 | 
 | 
| 266 |     Return None on error.
 | 
| 267 |     """
 | 
| 268 |     redirects = []
 | 
| 269 |     while True:
 | 
| 270 |       if not self._Peek(): return None
 | 
| 271 | 
 | 
| 272 |       # This prediction needs to ONLY accept redirect operators.  Should we
 | 
| 273 |       # make them a separate TokeNkind?
 | 
| 274 |       if self.c_kind != Kind.Redir:
 | 
| 275 |         break
 | 
| 276 | 
 | 
| 277 |       node = self.ParseRedirect()
 | 
| 278 |       if not node:
 | 
| 279 |         return None
 | 
| 280 |       redirects.append(node)
 | 
| 281 |       self._Next()
 | 
| 282 |     return redirects
 | 
| 283 | 
 | 
| 284 |   def _ScanSimpleCommand(self):
 | 
| 285 |     """First pass: Split into redirects and words."""
 | 
| 286 |     redirects = []
 | 
| 287 |     words = []
 | 
| 288 |     while True:
 | 
| 289 |       if not self._Peek(): return None
 | 
| 290 |       if self.c_kind == Kind.Redir:
 | 
| 291 |         node = self.ParseRedirect()
 | 
| 292 |         if not node: return None  # e.g. EOF
 | 
| 293 |         redirects.append(node)
 | 
| 294 | 
 | 
| 295 |       elif self.c_kind == Kind.Word:
 | 
| 296 |         words.append(self.cur_word)
 | 
| 297 | 
 | 
| 298 |       else:
 | 
| 299 |         break
 | 
| 300 |       self._Next()
 | 
| 301 |     return redirects, words
 | 
| 302 | 
 | 
| 303 |   def _SplitSimpleCommandPrefix(self, words):
 | 
| 304 |     """
 | 
| 305 |     Second pass of SimpleCommand parsing: look for assignment words.
 | 
| 306 |     """
 | 
| 307 |     prefix_bindings = []
 | 
| 308 |     suffix_words = []
 | 
| 309 | 
 | 
| 310 |     done_prefix = False
 | 
| 311 |     for w in words:
 | 
| 312 |       if done_prefix:
 | 
| 313 |         suffix_words.append(w)
 | 
| 314 |         continue
 | 
| 315 | 
 | 
| 316 |       left_spid = word.LeftMostSpanForWord(w)
 | 
| 317 | 
 | 
| 318 |       kov = word.LooksLikeAssignment(w)
 | 
| 319 |       if kov:
 | 
| 320 |         k, op, v = kov
 | 
| 321 |         t = word.TildeDetect(v)
 | 
| 322 |         if t:
 | 
| 323 |           # t is an unevaluated word with TildeSubPart
 | 
| 324 |           prefix_bindings.append((k, op, t, left_spid))
 | 
| 325 |         else:
 | 
| 326 |           prefix_bindings.append((k, op, v, left_spid))  # v is unevaluated word
 | 
| 327 |       else:
 | 
| 328 |         done_prefix = True
 | 
| 329 |         suffix_words.append(w)
 | 
| 330 | 
 | 
| 331 |     return prefix_bindings, suffix_words
 | 
| 332 | 
 | 
| 333 |   def _MakeSimpleCommand(self, prefix_bindings, suffix_words, redirects):
 | 
| 334 |     # FOO=(1 2 3) ls is not allowed
 | 
| 335 |     for k, _, v, _ in prefix_bindings:
 | 
| 336 |       if word.HasArrayPart(v):
 | 
| 337 |         self.AddErrorContext(
 | 
| 338 |             'Unexpected array literal in binding: %s', v, word=v)
 | 
| 339 |         return None
 | 
| 340 | 
 | 
| 341 |     # echo FOO=(1 2 3) is not allowed
 | 
| 342 |     # NOTE: Other checks can be inserted here.  Can resolve builtins,
 | 
| 343 |     # functions, aliases, static PATH, etc.
 | 
| 344 |     for w in suffix_words:
 | 
| 345 |       kov = word.LooksLikeAssignment(w)
 | 
| 346 |       if kov:
 | 
| 347 |         _, _, v = kov
 | 
| 348 |         if word.HasArrayPart(v):
 | 
| 349 |           self.AddErrorContext('Unexpected array literal: %s', v, word=w)
 | 
| 350 |           return None
 | 
| 351 | 
 | 
| 352 |     # NOTE: # In bash, {~bob,~jane}/src works, even though ~ isn't the leading
 | 
| 353 |     # character of the initial word.
 | 
| 354 |     # However, this means we must do tilde detection AFTER brace EXPANSION, not
 | 
| 355 |     # just after brace DETECTION like we're doing here.
 | 
| 356 |     # The BracedWordTree instances have to be expanded into CompoundWord
 | 
| 357 |     # instances for the tilde detection to work.
 | 
| 358 |     words2 = braces.BraceDetectAll(suffix_words)
 | 
| 359 |     words3 = word.TildeDetectAll(words2)
 | 
| 360 | 
 | 
| 361 |     node = ast.SimpleCommand()
 | 
| 362 |     node.words = words3
 | 
| 363 |     node.redirects = redirects
 | 
| 364 |     for name, op, val, left_spid in prefix_bindings:
 | 
| 365 |       if op != assign_op_e.Equal:
 | 
| 366 |         # NOTE: Using spid of RHS for now, since we don't have one for op.
 | 
| 367 |         self.AddErrorContext('Expected = in environment binding, got +=',
 | 
| 368 |             word=val)
 | 
| 369 |         return None
 | 
| 370 |       pair = ast.env_pair(name, val)
 | 
| 371 |       pair.spids.append(left_spid)
 | 
| 372 |       node.more_env.append(pair)
 | 
| 373 |     return node
 | 
| 374 | 
 | 
| 375 |   def _MakeAssignment(self, assign_kw, suffix_words):
 | 
| 376 |     # First parse flags, e.g. -r -x -a -A.  None of the flags have arguments.
 | 
| 377 |     flags = []
 | 
| 378 |     n = len(suffix_words)
 | 
| 379 |     i = 1
 | 
| 380 |     while i < n:
 | 
| 381 |       w = suffix_words[i]
 | 
| 382 |       ok, static_val, quoted = word.StaticEval(w)
 | 
| 383 |       if not ok or quoted:
 | 
| 384 |         break  # can't statically evaluate
 | 
| 385 | 
 | 
| 386 |       if static_val.startswith('-'):
 | 
| 387 |         flags.append(static_val)
 | 
| 388 |       else:
 | 
| 389 |         break  # not a flag, rest are args
 | 
| 390 |       i += 1
 | 
| 391 | 
 | 
| 392 |     # Now parse bindings or variable names
 | 
| 393 |     assignments = []
 | 
| 394 |     while i < n:
 | 
| 395 |       w = suffix_words[i]
 | 
| 396 |       left_spid = word.LeftMostSpanForWord(w)
 | 
| 397 |       kov = word.LooksLikeAssignment(w)
 | 
| 398 |       if kov:
 | 
| 399 |         k, op, v = kov
 | 
| 400 |         t = word.TildeDetect(v)
 | 
| 401 |         if t:
 | 
| 402 |           # t is an unevaluated word with TildeSubPart
 | 
| 403 |           a = (k, op, t, left_spid)
 | 
| 404 |         else:
 | 
| 405 |           a = (k, op, v, left_spid)  # v is unevaluated word
 | 
| 406 |       else:
 | 
| 407 |         # In aboriginal in variables/sources: export_if_blank does export "$1".
 | 
| 408 |         # We should allow that.
 | 
| 409 | 
 | 
| 410 |         # Parse this differently then?
 | 
| 411 |         # dynamic-export?
 | 
| 412 |         # It sets global variables.
 | 
| 413 |         ok, static_val, quoted = word.StaticEval(w)
 | 
| 414 |         if not ok or quoted:
 | 
| 415 |            self.AddErrorContext(
 | 
| 416 |                'Variable names must be constant strings, got %s', w, word=w)
 | 
| 417 |            return None
 | 
| 418 | 
 | 
| 419 |         # No value is equivalent to ''
 | 
| 420 |         m = VAR_NAME_RE.match(static_val)
 | 
| 421 |         if not m:
 | 
| 422 |           self.AddErrorContext('Invalid variable name %r', static_val, word=w)
 | 
| 423 |           return None
 | 
| 424 |         a = (static_val, assign_op_e.Equal, None, left_spid)
 | 
| 425 | 
 | 
| 426 |       assignments.append(a)
 | 
| 427 |       i += 1
 | 
| 428 | 
 | 
| 429 |     # TODO: Also make with LhsIndexedName
 | 
| 430 |     pairs = []
 | 
| 431 |     for lhs, op, rhs, spid in assignments:
 | 
| 432 |       p = ast.assign_pair(ast.LhsName(lhs), op, rhs)
 | 
| 433 |       p.spids.append(spid)
 | 
| 434 |       pairs.append(p)
 | 
| 435 | 
 | 
| 436 |     node = ast.Assignment(assign_kw, flags, pairs)
 | 
| 437 | 
 | 
| 438 |     return node
 | 
| 439 | 
 | 
| 440 |   # Flags that indicate an assignment should be parsed like a command.
 | 
| 441 |   _ASSIGN_COMMANDS = set([
 | 
| 442 |       (Id.Assign_Declare, '-f'),  # function defs
 | 
| 443 |       (Id.Assign_Declare, '-F'),  # function names
 | 
| 444 |       (Id.Assign_Declare, '-p'),  # print
 | 
| 445 | 
 | 
| 446 |       (Id.Assign_Typeset, '-f'),
 | 
| 447 |       (Id.Assign_Typeset, '-F'),
 | 
| 448 |       (Id.Assign_Typeset, '-p'),
 | 
| 449 | 
 | 
| 450 |       (Id.Assign_Local, '-p'),
 | 
| 451 |       (Id.Assign_Readonly, '-p'),
 | 
| 452 |       # Hm 'export -p' is more like a command.  But we're parsing it
 | 
| 453 |       # dynamically now because of some wrappers.
 | 
| 454 |       # Maybe we could change this.
 | 
| 455 |       #(Id.Assign_Export, '-p'),
 | 
| 456 |   ])
 | 
| 457 |   # Flags to parse like assignments: -a -r -x (and maybe -i)
 | 
| 458 | 
 | 
| 459 |   def ParseSimpleCommand(self):
 | 
| 460 |     """
 | 
| 461 |     Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g)
 | 
| 462 | 
 | 
| 463 |     io_file        : '<'       filename
 | 
| 464 |                    | LESSAND   filename
 | 
| 465 |                      ...
 | 
| 466 | 
 | 
| 467 |     io_here        : DLESS     here_end
 | 
| 468 |                    | DLESSDASH here_end
 | 
| 469 | 
 | 
| 470 |     redirect       : IO_NUMBER (io_redirect | io_here)
 | 
| 471 | 
 | 
| 472 |     prefix_part    : ASSIGNMENT_WORD | redirect
 | 
| 473 |     cmd_part       : WORD | redirect
 | 
| 474 | 
 | 
| 475 |     assign_kw      : Declare | Export | Local | Readonly
 | 
| 476 | 
 | 
| 477 |     # Without any words it is parsed as a command, not an assignment
 | 
| 478 |     assign_listing : assign_kw
 | 
| 479 | 
 | 
| 480 |     # Now we have something to do (might be changing assignment flags too)
 | 
| 481 |     # NOTE: any prefixes should be a warning, but they are allowed in shell.
 | 
| 482 |     assignment     : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
 | 
| 483 | 
 | 
| 484 |     # an external command, a function call, or a builtin -- a "word_command"
 | 
| 485 |     word_command   : prefix_part* cmd_part+
 | 
| 486 | 
 | 
| 487 |     simple_command : assign_listing
 | 
| 488 |                    | assignment
 | 
| 489 |                    | proc_command
 | 
| 490 | 
 | 
| 491 |     Simple imperative algorithm:
 | 
| 492 | 
 | 
| 493 |     1) Read a list of words and redirects.  Append them to separate lists.
 | 
| 494 |     2) Look for the first non-assignment word.  If it's declare, etc., then
 | 
| 495 |     keep parsing words AND assign words.  Otherwise, just parse words.
 | 
| 496 |     3) If there are no non-assignment words, then it's a global assignment.
 | 
| 497 | 
 | 
| 498 |     { redirects, global assignments } OR
 | 
| 499 |     { redirects, prefix_bindings, words } OR
 | 
| 500 |     { redirects, ERROR_prefix_bindings, keyword, assignments, words }
 | 
| 501 | 
 | 
| 502 |     THEN CHECK that prefix bindings don't have any array literal parts!
 | 
| 503 |     global assignment and keyword assignments can have the of course.
 | 
| 504 |     well actually EXPORT shouldn't have them either -- WARNING
 | 
| 505 | 
 | 
| 506 |     3 cases we want to warn: prefix_bindings for assignment, and array literal
 | 
| 507 |     in prefix bindings, or export
 | 
| 508 | 
 | 
| 509 |     A command can be an assignment word, word, or redirect on its own.
 | 
| 510 | 
 | 
| 511 |         ls
 | 
| 512 |         >out.txt
 | 
| 513 | 
 | 
| 514 |         >out.txt FOO=bar   # this touches the file
 | 
| 515 | 
 | 
| 516 |     Or any sequence:
 | 
| 517 |         ls foo bar
 | 
| 518 |         <in.txt ls foo bar >out.txt
 | 
| 519 |         <in.txt ls >out.txt foo bar
 | 
| 520 | 
 | 
| 521 |     Or add one or more environment bindings:
 | 
| 522 |         VAR=val env
 | 
| 523 |         >out.txt VAR=val env
 | 
| 524 | 
 | 
| 525 |     here_end vs filename is a matter of whether we test that it's quoted.  e.g.
 | 
| 526 |     <<EOF vs <<'EOF'.
 | 
| 527 |     """
 | 
| 528 |     result = self._ScanSimpleCommand()
 | 
| 529 |     if not result: return None
 | 
| 530 |     redirects, words = result
 | 
| 531 | 
 | 
| 532 |     if not words:  # e.g.  >out.txt  # redirect without words
 | 
| 533 |       node = ast.SimpleCommand()
 | 
| 534 |       node.redirects = redirects
 | 
| 535 |       return node
 | 
| 536 | 
 | 
| 537 |     prefix_bindings, suffix_words = self._SplitSimpleCommandPrefix(words)
 | 
| 538 | 
 | 
| 539 |     if not suffix_words:  # ONE=1 TWO=2  (with no other words)
 | 
| 540 |       if redirects:
 | 
| 541 |         binding1 = prefix_bindings[0]
 | 
| 542 |         _, _, _, spid = binding1
 | 
| 543 |         self.AddErrorContext('Got redirects in global assignment',
 | 
| 544 |                              span_id=spid)
 | 
| 545 |         return None
 | 
| 546 | 
 | 
| 547 |       pairs = []
 | 
| 548 |       for lhs, op, rhs, spid in prefix_bindings:
 | 
| 549 |         p = ast.assign_pair(ast.LhsName(lhs), op, rhs)
 | 
| 550 |         p.spids.append(spid)
 | 
| 551 |         pairs.append(p)
 | 
| 552 | 
 | 
| 553 |       node = ast.Assignment(Id.Assign_None, [], pairs)
 | 
| 554 |       left_spid = word.LeftMostSpanForWord(words[0])
 | 
| 555 |       node.spids.append(left_spid)  # no keyword spid to skip past
 | 
| 556 |       return node
 | 
| 557 | 
 | 
| 558 |     kind, kw_token = word.KeywordToken(suffix_words[0])
 | 
| 559 | 
 | 
| 560 |     if kind == Kind.Assign:
 | 
| 561 |       # Here we StaticEval suffix_words[1] to see if it's a command like
 | 
| 562 |       # 'typeset -p'.  Then it becomes a SimpleCommand node instead of an
 | 
| 563 |       # Assignment.  Note we're not handling duplicate flags like 'typeset
 | 
| 564 |       # -pf'.  I see this in bashdb (bash debugger) but it can just be changed
 | 
| 565 |       # to 'typeset -p -f'.
 | 
| 566 |       is_command = False
 | 
| 567 |       if len(suffix_words) > 1:
 | 
| 568 |         ok, val, _ = word.StaticEval(suffix_words[1])
 | 
| 569 |         if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS:
 | 
| 570 |           is_command = True
 | 
| 571 | 
 | 
| 572 |       if is_command:  # declare -f, declare -p, typeset -p, etc.
 | 
| 573 |         node = self._MakeSimpleCommand(prefix_bindings, suffix_words,
 | 
| 574 |                                        redirects)
 | 
| 575 |         return node
 | 
| 576 | 
 | 
| 577 |       else:  # declare str='', declare -a array=()
 | 
| 578 |         if redirects:
 | 
| 579 |           # Attach the error location to the keyword.  It would be more precise
 | 
| 580 |           # to attach it to the
 | 
| 581 |           self.AddErrorContext('Got redirects in assignment', token=kw_token)
 | 
| 582 |           return None
 | 
| 583 | 
 | 
| 584 |         if prefix_bindings:  # FOO=bar local spam=eggs not allowed
 | 
| 585 |           # Use the location of the first value.  TODO: Use the whole word
 | 
| 586 |           # before splitting.
 | 
| 587 |           _, _, v0, _ = prefix_bindings[0]
 | 
| 588 |           self.AddErrorContext(
 | 
| 589 |               'Invalid prefix bindings in assignment: %s', prefix_bindings,
 | 
| 590 |               word=v0)
 | 
| 591 |           return None
 | 
| 592 | 
 | 
| 593 |         node = self._MakeAssignment(kw_token.id, suffix_words)
 | 
| 594 |         if not node: return None
 | 
| 595 |         node.spids.append(kw_token.span_id)
 | 
| 596 |         return node
 | 
| 597 | 
 | 
| 598 |     elif kind == Kind.ControlFlow:
 | 
| 599 |       if redirects:
 | 
| 600 |         self.AddErrorContext('Got redirects in control flow: %s', redirects)
 | 
| 601 |         return None
 | 
| 602 | 
 | 
| 603 |       if prefix_bindings:  # FOO=bar local spam=eggs not allowed
 | 
| 604 |         # Use the location of the first value.  TODO: Use the whole word before
 | 
| 605 |         # splitting.
 | 
| 606 |         _, _, v0, _ = prefix_bindings[0]
 | 
| 607 |         self.AddErrorContext(
 | 
| 608 |             'Invalid prefix bindings in control flow: %s', prefix_bindings,
 | 
| 609 |             word=v0)
 | 
| 610 |         return None
 | 
| 611 | 
 | 
| 612 |       # Attach the token for errors.  (Assignment may not need it.)
 | 
| 613 |       if len(suffix_words) == 1:
 | 
| 614 |         arg_word = None
 | 
| 615 |       elif len(suffix_words) == 2:
 | 
| 616 |         arg_word = suffix_words[1]
 | 
| 617 |       else:
 | 
| 618 |         # Underline the extra word.
 | 
| 619 |         self.AddErrorContext(
 | 
| 620 |             'Unexpected argument to %r', kw_token.val, word=suffix_words[2])
 | 
| 621 |         return None
 | 
| 622 | 
 | 
| 623 |       return ast.ControlFlow(kw_token, arg_word)
 | 
| 624 | 
 | 
| 625 |     else:
 | 
| 626 |       node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects)
 | 
| 627 |       return node
 | 
| 628 | 
 | 
| 629 |   def ParseBraceGroup(self):
 | 
| 630 |     """
 | 
| 631 |     brace_group      : LBrace command_list RBrace ;
 | 
| 632 |     """
 | 
| 633 |     left_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 634 |     if not self._Eat(Id.Lit_LBrace): return None
 | 
| 635 | 
 | 
| 636 |     c_list = self.ParseCommandList()
 | 
| 637 |     if not c_list: return None
 | 
| 638 | 
 | 
| 639 |     # Not needed
 | 
| 640 |     #right_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 641 |     if not self._Eat(Id.Lit_RBrace): return None
 | 
| 642 | 
 | 
| 643 |     node = ast.BraceGroup(c_list.children)
 | 
| 644 |     node.spids.append(left_spid)
 | 
| 645 |     return node
 | 
| 646 | 
 | 
| 647 |   def ParseDoGroup(self):
 | 
| 648 |     """
 | 
| 649 |     Used by ForEach, ForExpr, While, Until.  Should this be a Do node?
 | 
| 650 | 
 | 
| 651 |     do_group         : Do command_list Done ;          /* Apply rule 6 */
 | 
| 652 |     """
 | 
| 653 |     if not self._Eat(Id.KW_Do): return None
 | 
| 654 |     do_spid = word.LeftMostSpanForWord(self.cur_word)  # after _Eat
 | 
| 655 | 
 | 
| 656 |     c_list = self.ParseCommandList()  # could be any thing
 | 
| 657 |     if not c_list: return None
 | 
| 658 | 
 | 
| 659 |     if not self._Eat(Id.KW_Done): return None
 | 
| 660 |     done_spid = word.LeftMostSpanForWord(self.cur_word)  # after _Eat
 | 
| 661 | 
 | 
| 662 |     node = ast.DoGroup(c_list.children)
 | 
| 663 |     node.spids.extend((do_spid, done_spid))
 | 
| 664 |     return node
 | 
| 665 | 
 | 
| 666 |   def ParseForWords(self):
 | 
| 667 |     """
 | 
| 668 |     for_words        : WORD* for_sep
 | 
| 669 |                      ;
 | 
| 670 |     for_sep          : ';' newline_ok
 | 
| 671 |                      | NEWLINES
 | 
| 672 |                      ;
 | 
| 673 |     """
 | 
| 674 |     words = []
 | 
| 675 |     # The span_id of any semi-colon, so we can remove it.
 | 
| 676 |     semi_spid = const.NO_INTEGER  
 | 
| 677 | 
 | 
| 678 |     while True:
 | 
| 679 |       if not self._Peek(): return None
 | 
| 680 |       if self.c_id == Id.Op_Semi:
 | 
| 681 |         semi_spid = self.cur_word.token.span_id  # TokenWord
 | 
| 682 |         self._Next()
 | 
| 683 |         if not self._NewlineOk(): return None
 | 
| 684 |         break
 | 
| 685 |       elif self.c_id == Id.Op_Newline:
 | 
| 686 |         self._Next()
 | 
| 687 |         break
 | 
| 688 |       if self.cur_word.tag != word_e.CompoundWord:
 | 
| 689 |         # TODO: Can we also show a pointer to the 'for' keyword?
 | 
| 690 |         self.AddErrorContext('Invalid word in for loop', word=self.cur_word)
 | 
| 691 |         return None
 | 
| 692 | 
 | 
| 693 |       words.append(self.cur_word)
 | 
| 694 |       self._Next()
 | 
| 695 |     return words, semi_spid
 | 
| 696 | 
 | 
| 697 |   def _ParseForExprLoop(self):
 | 
| 698 |     """
 | 
| 699 |     for (( init; cond; update )) for_sep? do_group
 | 
| 700 |     """
 | 
| 701 |     node = self.w_parser.ReadForExpression()
 | 
| 702 |     if not node:
 | 
| 703 |       error_stack = self.w_parser.Error()
 | 
| 704 |       self.error_stack.extend(error_stack)
 | 
| 705 |       self.AddErrorContext("Parsing for expression failed")
 | 
| 706 |       return None
 | 
| 707 |     self._Next()
 | 
| 708 | 
 | 
| 709 |     if not self._Peek(): return None
 | 
| 710 |     if self.c_id == Id.Op_Semi:
 | 
| 711 |       self._Next()
 | 
| 712 |       if not self._NewlineOk(): return None
 | 
| 713 |     elif self.c_id == Id.Op_Newline:
 | 
| 714 |       self._Next()
 | 
| 715 |     elif self.c_id == Id.KW_Do:  # missing semicolon/newline allowed
 | 
| 716 |       pass
 | 
| 717 |     else:
 | 
| 718 |       self.AddErrorContext(
 | 
| 719 |           'Unexpected token after for expression: %s', self.cur_word,
 | 
| 720 |           word=self.cur_word)
 | 
| 721 |       return None
 | 
| 722 | 
 | 
| 723 |     body_node = self.ParseDoGroup()
 | 
| 724 |     if not body_node: return None
 | 
| 725 | 
 | 
| 726 |     node.body = body_node
 | 
| 727 |     return node
 | 
| 728 | 
 | 
| 729 |   def _ParseForEachLoop(self):
 | 
| 730 |     node = ast.ForEach()
 | 
| 731 |     node.do_arg_iter = False
 | 
| 732 | 
 | 
| 733 |     ok, iter_name, quoted = word.StaticEval(self.cur_word)
 | 
| 734 |     if not ok or quoted:
 | 
| 735 |       self.AddErrorContext(
 | 
| 736 |           "Invalid for loop variable", word=self.cur_word)
 | 
| 737 |       return None
 | 
| 738 |     if not VAR_NAME_RE.match(iter_name):
 | 
| 739 |       self.AddErrorContext(
 | 
| 740 |           "Invalid for loop variable name", word=self.cur_word)
 | 
| 741 |       return None
 | 
| 742 |     node.iter_name = iter_name
 | 
| 743 |     self._Next()  # skip past name
 | 
| 744 | 
 | 
| 745 |     if not self._NewlineOk(): return None
 | 
| 746 | 
 | 
| 747 |     in_spid = const.NO_INTEGER
 | 
| 748 |     semi_spid = const.NO_INTEGER
 | 
| 749 | 
 | 
| 750 |     if not self._Peek(): return None
 | 
| 751 |     if self.c_id == Id.KW_In:
 | 
| 752 |       self._Next()  # skip in
 | 
| 753 | 
 | 
| 754 |       in_spid = word.LeftMostSpanForWord(self.cur_word) + 1
 | 
| 755 |       x = self.ParseForWords()
 | 
| 756 |       if x is None:
 | 
| 757 |         return None
 | 
| 758 |       iter_words, semi_spid = x
 | 
| 759 |       words2 = braces.BraceDetectAll(iter_words)
 | 
| 760 |       words3 = word.TildeDetectAll(words2)
 | 
| 761 | 
 | 
| 762 |       if iter_words is None:  # empty list of words is OK
 | 
| 763 |         return None
 | 
| 764 |       node.iter_words = words3
 | 
| 765 | 
 | 
| 766 |     elif self.c_id == Id.Op_Semi:
 | 
| 767 |       node.do_arg_iter = True  # implicit for loop
 | 
| 768 |       self._Next()
 | 
| 769 | 
 | 
| 770 |     elif self.c_id == Id.KW_Do:
 | 
| 771 |       node.do_arg_iter = True  # implicit for loop
 | 
| 772 |       # do not advance
 | 
| 773 | 
 | 
| 774 |     else:
 | 
| 775 |       self.AddErrorContext("Unexpected word in for loop: %s", self.cur_word,
 | 
| 776 |           word=self.cur_word)
 | 
| 777 |       return None
 | 
| 778 | 
 | 
| 779 |     node.spids.extend((in_spid, semi_spid))
 | 
| 780 | 
 | 
| 781 |     body_node = self.ParseDoGroup()
 | 
| 782 |     if not body_node: return None
 | 
| 783 | 
 | 
| 784 |     node.body = body_node
 | 
| 785 |     return node
 | 
| 786 | 
 | 
| 787 |   def ParseFor(self):
 | 
| 788 |     """
 | 
| 789 |     for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
 | 
| 790 |                | For '((' ... TODO
 | 
| 791 |     """
 | 
| 792 |     if not self._Eat(Id.KW_For): return None
 | 
| 793 | 
 | 
| 794 |     if not self._Peek(): return None
 | 
| 795 |     if self.c_id == Id.Op_DLeftParen:
 | 
| 796 |       node = self._ParseForExprLoop()
 | 
| 797 |     else:
 | 
| 798 |       node = self._ParseForEachLoop()
 | 
| 799 | 
 | 
| 800 |     return node
 | 
| 801 | 
 | 
| 802 |   def ParseWhile(self):
 | 
| 803 |     """
 | 
| 804 |     while_clause     : While command_list do_group ;
 | 
| 805 |     """
 | 
| 806 |     self._Next()  # skip while
 | 
| 807 | 
 | 
| 808 |     cond_node = self.ParseCommandList()
 | 
| 809 |     if not cond_node: return None
 | 
| 810 | 
 | 
| 811 |     body_node = self.ParseDoGroup()
 | 
| 812 |     if not body_node: return None
 | 
| 813 | 
 | 
| 814 |     return ast.While(cond_node.children, body_node)
 | 
| 815 | 
 | 
| 816 |   def ParseUntil(self):
 | 
| 817 |     """
 | 
| 818 |     until_clause     : Until command_list do_group ;
 | 
| 819 |     """
 | 
| 820 |     self._Next()  # skip until
 | 
| 821 | 
 | 
| 822 |     cond_node = self.ParseCommandList()
 | 
| 823 |     if not cond_node: return None
 | 
| 824 | 
 | 
| 825 |     body_node = self.ParseDoGroup()
 | 
| 826 |     if not body_node: return None
 | 
| 827 | 
 | 
| 828 |     return ast.Until(cond_node.children, body_node)
 | 
| 829 | 
 | 
| 830 |   def ParseCaseItem(self):
 | 
| 831 |     """
 | 
| 832 |     case_item: '('? pattern ('|' pattern)* ')'
 | 
| 833 |                newline_ok command_term? trailer? ;
 | 
| 834 |     """
 | 
| 835 |     self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
 | 
| 836 | 
 | 
| 837 |     left_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 838 |     if self.c_id == Id.Op_LParen:
 | 
| 839 |       self._Next()
 | 
| 840 | 
 | 
| 841 |     pat_words = []
 | 
| 842 |     while True:
 | 
| 843 |       if not self._Peek(): return None
 | 
| 844 |       pat_words.append(self.cur_word)
 | 
| 845 |       self._Next()
 | 
| 846 | 
 | 
| 847 |       if not self._Peek(): return None
 | 
| 848 |       if self.c_id == Id.Op_Pipe:
 | 
| 849 |         self._Next()
 | 
| 850 |       else:
 | 
| 851 |         break
 | 
| 852 | 
 | 
| 853 |     rparen_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 854 |     if not self._Eat(Id.Right_CasePat): return None
 | 
| 855 |     if not self._NewlineOk(): return None
 | 
| 856 | 
 | 
| 857 |     if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
 | 
| 858 |       c_list = self.ParseCommandTerm()
 | 
| 859 |       if not c_list: return None
 | 
| 860 |       action_children = c_list.children
 | 
| 861 |     else:
 | 
| 862 |       action_children = []
 | 
| 863 | 
 | 
| 864 |     dsemi_spid = const.NO_INTEGER
 | 
| 865 |     last_spid = const.NO_INTEGER
 | 
| 866 |     if not self._Peek(): return None
 | 
| 867 |     if self.c_id == Id.KW_Esac:
 | 
| 868 |       last_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 869 |     elif self.c_id == Id.Op_DSemi:
 | 
| 870 |       dsemi_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 871 |       self._Next()
 | 
| 872 |     else:
 | 
| 873 |       self.AddErrorContext('Expected DSEMI or ESAC, got %s', self.cur_word,
 | 
| 874 |           word=self.cur_word)
 | 
| 875 |       return None
 | 
| 876 | 
 | 
| 877 |     if not self._NewlineOk(): return None
 | 
| 878 | 
 | 
| 879 |     arm = ast.case_arm(pat_words, action_children)
 | 
| 880 |     arm.spids.extend((left_spid, rparen_spid, dsemi_spid, last_spid))
 | 
| 881 |     return arm
 | 
| 882 | 
 | 
| 883 |   def ParseCaseList(self, arms):
 | 
| 884 |     """
 | 
| 885 |     case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
 | 
| 886 |     """
 | 
| 887 |     if not self._Peek(): return None
 | 
| 888 | 
 | 
| 889 |     while True:
 | 
| 890 |       # case item begins with a command word or (
 | 
| 891 |       if self.c_id == Id.KW_Esac:
 | 
| 892 |         break
 | 
| 893 |       if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
 | 
| 894 |         break
 | 
| 895 |       arm = self.ParseCaseItem()
 | 
| 896 |       if not arm: return None
 | 
| 897 | 
 | 
| 898 |       arms.append(arm)
 | 
| 899 |       if not self._Peek(): return None
 | 
| 900 |       # Now look for DSEMI or ESAC
 | 
| 901 | 
 | 
| 902 |     return True
 | 
| 903 | 
 | 
| 904 |   def ParseCase(self):
 | 
| 905 |     """
 | 
| 906 |     case_clause      : Case WORD newline_ok in newline_ok case_list? Esac ;
 | 
| 907 |     """
 | 
| 908 |     case_node = ast.Case()
 | 
| 909 | 
 | 
| 910 |     case_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 911 |     self._Next()  # skip case
 | 
| 912 | 
 | 
| 913 |     if not self._Peek(): return None
 | 
| 914 |     case_node.to_match = self.cur_word
 | 
| 915 |     self._Next()
 | 
| 916 | 
 | 
| 917 |     if not self._NewlineOk(): return None
 | 
| 918 |     in_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 919 |     if not self._Eat(Id.KW_In): return None
 | 
| 920 |     if not self._NewlineOk(): return None
 | 
| 921 | 
 | 
| 922 |     if self.c_id != Id.KW_Esac:  # empty case list
 | 
| 923 |       if not self.ParseCaseList(case_node.arms):
 | 
| 924 |         self.AddErrorContext("ParseCase: error parsing case list")
 | 
| 925 |         return None
 | 
| 926 |       # TODO: should it return a list of nodes, and extend?
 | 
| 927 |       if not self._Peek(): return None
 | 
| 928 | 
 | 
| 929 |     esac_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 930 |     if not self._Eat(Id.KW_Esac): return None
 | 
| 931 |     self._Next()
 | 
| 932 | 
 | 
| 933 |     case_node.spids.extend((case_spid, in_spid, esac_spid))
 | 
| 934 |     return case_node
 | 
| 935 | 
 | 
| 936 |   def _ParseElifElse(self, if_node):
 | 
| 937 |     """
 | 
| 938 |     else_part: (Elif command_list Then command_list)* Else command_list ;
 | 
| 939 |     """
 | 
| 940 |     arms = if_node.arms
 | 
| 941 | 
 | 
| 942 |     self._Peek()
 | 
| 943 |     while self.c_id == Id.KW_Elif:
 | 
| 944 |       elif_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 945 | 
 | 
| 946 |       self._Next()  # skip elif
 | 
| 947 |       cond = self.ParseCommandList()
 | 
| 948 |       if not cond: return None
 | 
| 949 | 
 | 
| 950 |       then_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 951 |       if not self._Eat(Id.KW_Then): return None
 | 
| 952 | 
 | 
| 953 |       body = self.ParseCommandList()
 | 
| 954 |       if not body: return None
 | 
| 955 | 
 | 
| 956 |       arm = ast.if_arm(cond.children, body.children)
 | 
| 957 |       arm.spids.extend((elif_spid, then_spid))
 | 
| 958 |       arms.append(arm)
 | 
| 959 | 
 | 
| 960 |     if self.c_id == Id.KW_Else:
 | 
| 961 |       else_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 962 |       self._Next()
 | 
| 963 |       body = self.ParseCommandList()
 | 
| 964 |       if not body: return None
 | 
| 965 |       if_node.else_action = body.children
 | 
| 966 |     else:
 | 
| 967 |       else_spid = const.NO_INTEGER
 | 
| 968 | 
 | 
| 969 |     if_node.spids.append(else_spid)
 | 
| 970 | 
 | 
| 971 |     return True
 | 
| 972 | 
 | 
| 973 |   def ParseIf(self):
 | 
| 974 |     """
 | 
| 975 |     if_clause        : If command_list Then command_list else_part? Fi ;
 | 
| 976 |     """
 | 
| 977 |     if_node = ast.If()
 | 
| 978 |     self._Next()  # skip if
 | 
| 979 | 
 | 
| 980 |     cond = self.ParseCommandList()
 | 
| 981 |     if not cond: return None
 | 
| 982 | 
 | 
| 983 |     then_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 984 |     if not self._Eat(Id.KW_Then): return None
 | 
| 985 | 
 | 
| 986 |     body = self.ParseCommandList()
 | 
| 987 |     if not body: return None
 | 
| 988 | 
 | 
| 989 |     arm = ast.if_arm(cond.children, body.children)
 | 
| 990 |     arm.spids.extend((const.NO_INTEGER, then_spid))  # no if spid at first?
 | 
| 991 |     if_node.arms.append(arm)
 | 
| 992 | 
 | 
| 993 |     if self.c_id in (Id.KW_Elif, Id.KW_Else):
 | 
| 994 |       if not self._ParseElifElse(if_node):
 | 
| 995 |         return None
 | 
| 996 |     else:
 | 
| 997 |       if_node.spids.append(const.NO_INTEGER)  # no else spid
 | 
| 998 | 
 | 
| 999 |     fi_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 1000 |     if not self._Eat(Id.KW_Fi): return None
 | 
| 1001 | 
 | 
| 1002 |     if_node.spids.append(fi_spid)
 | 
| 1003 |     return if_node
 | 
| 1004 | 
 | 
| 1005 |   def ParseTime(self):
 | 
| 1006 |     """
 | 
| 1007 |     time [-p] pipeline
 | 
| 1008 | 
 | 
| 1009 |     According to bash help.
 | 
| 1010 |     """
 | 
| 1011 |     self._Next()  # skip time
 | 
| 1012 | 
 | 
| 1013 |     pipeline = self.ParsePipeline()
 | 
| 1014 |     if not pipeline: return None
 | 
| 1015 |     return ast.TimeBlock(pipeline)
 | 
| 1016 | 
 | 
| 1017 |   def ParseCompoundCommand(self):
 | 
| 1018 |     """
 | 
| 1019 |     compound_command : brace_group
 | 
| 1020 |                      | subshell
 | 
| 1021 |                      | for_clause
 | 
| 1022 |                      | while_clause
 | 
| 1023 |                      | until_clause
 | 
| 1024 |                      | if_clause
 | 
| 1025 |                      | case_clause
 | 
| 1026 |                      | time_clause
 | 
| 1027 |                      | [[ BoolExpr ]]
 | 
| 1028 |                      | (( ArithExpr ))
 | 
| 1029 |                      ;
 | 
| 1030 |     """
 | 
| 1031 |     if self.c_id == Id.Lit_LBrace:
 | 
| 1032 |       return self.ParseBraceGroup()
 | 
| 1033 |     if self.c_id == Id.Op_LParen:
 | 
| 1034 |       return self.ParseSubshell()
 | 
| 1035 | 
 | 
| 1036 |     if self.c_id == Id.KW_For:
 | 
| 1037 |       return self.ParseFor()
 | 
| 1038 |     if self.c_id == Id.KW_While:
 | 
| 1039 |       return self.ParseWhile()
 | 
| 1040 |     if self.c_id == Id.KW_Until:
 | 
| 1041 |       return self.ParseUntil()
 | 
| 1042 | 
 | 
| 1043 |     if self.c_id == Id.KW_If:
 | 
| 1044 |       return self.ParseIf()
 | 
| 1045 |     if self.c_id == Id.KW_Case:
 | 
| 1046 |       return self.ParseCase()
 | 
| 1047 |     if self.c_id == Id.KW_Time:
 | 
| 1048 |       return self.ParseTime()
 | 
| 1049 | 
 | 
| 1050 |     # Example of redirect that is observable:
 | 
| 1051 |     # $ (( $(echo one 1>&2; echo 2) > 0 )) 2> out.txt
 | 
| 1052 |     if self.c_id == Id.KW_DLeftBracket:
 | 
| 1053 |       return self.ParseDBracket()
 | 
| 1054 | 
 | 
| 1055 |     if self.c_id == Id.Op_DLeftParen:
 | 
| 1056 |       return self.ParseDParen()
 | 
| 1057 | 
 | 
| 1058 |     self.AddErrorContext(
 | 
| 1059 |         "Expected a compound command (e.g. for while if case), got %s",
 | 
| 1060 |         self.cur_word, word=self.cur_word)
 | 
| 1061 |     return None
 | 
| 1062 | 
 | 
| 1063 |   def ParseFunctionBody(self, func):
 | 
| 1064 |     """
 | 
| 1065 |     function_body    : compound_command io_redirect* ; /* Apply rule 9 */
 | 
| 1066 |     """
 | 
| 1067 |     body = self.ParseCompoundCommand()
 | 
| 1068 |     if not body: return None
 | 
| 1069 | 
 | 
| 1070 |     redirects = self._ParseRedirectList()
 | 
| 1071 |     if redirects is None: return None
 | 
| 1072 | 
 | 
| 1073 |     func.body = body
 | 
| 1074 |     func.redirects = redirects
 | 
| 1075 |     return True
 | 
| 1076 | 
 | 
| 1077 |   def ParseFunctionDef(self):
 | 
| 1078 |     """
 | 
| 1079 |     function_header : fname '(' ')'
 | 
| 1080 |     function_def     : function_header newline_ok function_body ;
 | 
| 1081 | 
 | 
| 1082 |     Precondition: Looking at the function name.
 | 
| 1083 |     Post condition:
 | 
| 1084 | 
 | 
| 1085 |     NOTE: There is an ambiguity with:
 | 
| 1086 | 
 | 
| 1087 |     function foo ( echo hi ) and
 | 
| 1088 |     function foo () ( echo hi )
 | 
| 1089 | 
 | 
| 1090 |     Bash only accepts the latter, though it doesn't really follow a grammar.
 | 
| 1091 |     """
 | 
| 1092 |     left_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 1093 | 
 | 
| 1094 |     ok, name = word.AsFuncName(self.cur_word)
 | 
| 1095 |     if not ok:
 | 
| 1096 |       self.AddErrorContext('Invalid function name', word=self.cur_word)
 | 
| 1097 |       return None
 | 
| 1098 |     self._Next()  # skip function name
 | 
| 1099 | 
 | 
| 1100 |     # Must be true because of lookahead
 | 
| 1101 |     if not self._Peek(): return None
 | 
| 1102 |     assert self.c_id == Id.Op_LParen, self.cur_word
 | 
| 1103 | 
 | 
| 1104 |     self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef)
 | 
| 1105 |     self._Next()
 | 
| 1106 | 
 | 
| 1107 |     if not self._Eat(Id.Right_FuncDef): return None
 | 
| 1108 |     after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1
 | 
| 1109 | 
 | 
| 1110 |     if not self._NewlineOk(): return None
 | 
| 1111 | 
 | 
| 1112 |     func = ast.FuncDef()
 | 
| 1113 |     func.name = name
 | 
| 1114 | 
 | 
| 1115 |     if not self.ParseFunctionBody(func):
 | 
| 1116 |       return None
 | 
| 1117 | 
 | 
| 1118 |     func.spids.append(left_spid)
 | 
| 1119 |     func.spids.append(after_name_spid)
 | 
| 1120 |     return func
 | 
| 1121 | 
 | 
| 1122 |   def ParseKshFunctionDef(self):
 | 
| 1123 |     """
 | 
| 1124 |     ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
 | 
| 1125 |     """
 | 
| 1126 |     left_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 1127 | 
 | 
| 1128 |     self._Next()  # skip past 'function'
 | 
| 1129 | 
 | 
| 1130 |     if not self._Peek(): return None
 | 
| 1131 |     ok, name = word.AsFuncName(self.cur_word)
 | 
| 1132 |     if not ok:
 | 
| 1133 |       self.AddErrorContext("Invalid function name: %r", self.cur_word)
 | 
| 1134 |       return None
 | 
| 1135 |     after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1
 | 
| 1136 |     self._Next()  # skip past 'function name
 | 
| 1137 | 
 | 
| 1138 |     if not self._Peek(): return None
 | 
| 1139 |     if self.c_id == Id.Op_LParen:
 | 
| 1140 |       self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef)
 | 
| 1141 |       self._Next()
 | 
| 1142 |       if not self._Eat(Id.Right_FuncDef): return None
 | 
| 1143 |       # Change it: after )
 | 
| 1144 |       after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1
 | 
| 1145 | 
 | 
| 1146 |     if not self._NewlineOk(): return None
 | 
| 1147 | 
 | 
| 1148 |     func = ast.FuncDef()
 | 
| 1149 |     func.name = name
 | 
| 1150 | 
 | 
| 1151 |     if not self.ParseFunctionBody(func):
 | 
| 1152 |       return None
 | 
| 1153 | 
 | 
| 1154 |     func.spids.append(left_spid)
 | 
| 1155 |     func.spids.append(after_name_spid)
 | 
| 1156 |     return func
 | 
| 1157 | 
 | 
| 1158 |   def ParseCoproc(self):
 | 
| 1159 |     """
 | 
| 1160 |     TODO:
 | 
| 1161 |     """
 | 
| 1162 |     raise NotImplementedError
 | 
| 1163 | 
 | 
| 1164 |   def ParseSubshell(self):
 | 
| 1165 |     left_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 1166 |     self._Next()  # skip past (
 | 
| 1167 | 
 | 
| 1168 |     # Ensure that something $( (cd / && pwd) ) works.  If ) is already on the
 | 
| 1169 |     # translation stack, we want to delay it.
 | 
| 1170 | 
 | 
| 1171 |     #print('ParseSubshell lexer.PushHint ) -> )')
 | 
| 1172 |     self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
 | 
| 1173 | 
 | 
| 1174 |     c_list = self.ParseCommandList()
 | 
| 1175 |     if not c_list: return None
 | 
| 1176 | 
 | 
| 1177 |     # Remove singleton CommandList as an optimization.
 | 
| 1178 |     if len(c_list.children) == 1:
 | 
| 1179 |       child = c_list.children[0]
 | 
| 1180 |     else:
 | 
| 1181 |       child = c_list
 | 
| 1182 |     node = ast.Subshell(child)
 | 
| 1183 | 
 | 
| 1184 |     right_spid = word.LeftMostSpanForWord(self.cur_word)
 | 
| 1185 |     if not self._Eat(Id.Right_Subshell): return None
 | 
| 1186 | 
 | 
| 1187 |     node.spids.extend((left_spid, right_spid))
 | 
| 1188 |     return node
 | 
| 1189 | 
 | 
| 1190 |   def ParseDBracket(self):
 | 
| 1191 |     """
 | 
| 1192 |     Pass the underlying word parser off to the boolean expression parser.
 | 
| 1193 |     """
 | 
| 1194 |     maybe_error_word = self.cur_word
 | 
| 1195 |     # TODO: Test interactive.  Without closing ]], you should get > prompt
 | 
| 1196 |     # (PS2)
 | 
| 1197 | 
 | 
| 1198 |     self._Next()  # skip [[
 | 
| 1199 |     b_parser = BoolParser(self.w_parser)
 | 
| 1200 |     bnode = b_parser.Parse()
 | 
| 1201 |     if not bnode:
 | 
| 1202 |       error_stack = b_parser.Error()
 | 
| 1203 |       self.error_stack.extend(error_stack)
 | 
| 1204 |       self.AddErrorContext("Error parsing [[", word=maybe_error_word)
 | 
| 1205 |       return None
 | 
| 1206 |     return ast.DBracket(bnode)
 | 
| 1207 | 
 | 
| 1208 |   def ParseDParen(self):
 | 
| 1209 |     maybe_error_word = self.cur_word
 | 
| 1210 |     self._Next()  # skip ((
 | 
| 1211 |     #print('1 ((', self.cur_word)
 | 
| 1212 |     anode = self.w_parser.ReadDParen()
 | 
| 1213 |     if not anode:
 | 
| 1214 |       error_stack = self.w_parser.Error()
 | 
| 1215 |       self.error_stack.extend(error_stack)
 | 
| 1216 |       self.AddErrorContext("Error parsing ((", word=maybe_error_word)
 | 
| 1217 |       return None
 | 
| 1218 | 
 | 
| 1219 |     #print('2 ((', self.cur_word)
 | 
| 1220 |     return ast.DParen(anode)
 | 
| 1221 | 
 | 
| 1222 |   def ParseCommand(self):
 | 
| 1223 |     """
 | 
| 1224 |     command          : simple_command
 | 
| 1225 |                      | compound_command io_redirect*
 | 
| 1226 |                      | function_def
 | 
| 1227 |                      | ksh_function_def
 | 
| 1228 |                      ;
 | 
| 1229 |     """
 | 
| 1230 |     if not self._Peek(): return None
 | 
| 1231 | 
 | 
| 1232 |     if self.c_id == Id.KW_Function:
 | 
| 1233 |       return self.ParseKshFunctionDef()
 | 
| 1234 | 
 | 
| 1235 |     if self.c_id in (
 | 
| 1236 |         Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen, Id.Lit_LBrace,
 | 
| 1237 |         Id.KW_For, Id.KW_While, Id.KW_Until, Id.KW_If, Id.KW_Case, Id.KW_Time):
 | 
| 1238 |       node = self.ParseCompoundCommand()
 | 
| 1239 |       if not node: return None
 | 
| 1240 |       if node.tag != command_e.TimeBlock:  # The only one without redirects
 | 
| 1241 |         redirects = self._ParseRedirectList()
 | 
| 1242 |         if redirects is None:
 | 
| 1243 |           return None
 | 
| 1244 |         node.redirects = redirects
 | 
| 1245 |       return node
 | 
| 1246 | 
 | 
| 1247 |     # NOTE: I added this to fix cases in parse-errors.test.sh, but it doesn't
 | 
| 1248 |     # work because Lit_RBrace is in END_LIST below.
 | 
| 1249 | 
 | 
| 1250 |     # TODO: KW_Do is also invalid here.
 | 
| 1251 |     if self.c_id == Id.Lit_RBrace:
 | 
| 1252 |       self.AddErrorContext('Unexpected }', word=self.cur_word)
 | 
| 1253 |       return None
 | 
| 1254 | 
 | 
| 1255 |     if self.c_kind == Kind.Redir:  # Leading redirect
 | 
| 1256 |       return self.ParseSimpleCommand()
 | 
| 1257 | 
 | 
| 1258 |     if self.c_kind == Kind.Word:
 | 
| 1259 |       if self.w_parser.LookAhead() == Id.Op_LParen:  # (
 | 
| 1260 |         kov = word.LooksLikeAssignment(self.cur_word)
 | 
| 1261 |         if kov:
 | 
| 1262 |           return self.ParseSimpleCommand()  # f=(a b c)  # array
 | 
| 1263 |         else:
 | 
| 1264 |           return self.ParseFunctionDef()  # f() { echo; }  # function
 | 
| 1265 | 
 | 
| 1266 |       return self.ParseSimpleCommand()  # echo foo
 | 
| 1267 | 
 | 
| 1268 |     self.AddErrorContext(
 | 
| 1269 |         "ParseCommand: Expected to parse a command, got %s", self.cur_word,
 | 
| 1270 |         word=self.cur_word)
 | 
| 1271 |     return None
 | 
| 1272 | 
 | 
| 1273 |   def ParsePipeline(self):
 | 
| 1274 |     """
 | 
| 1275 |     pipeline         : Bang? command ( '|' newline_ok command )* ;
 | 
| 1276 |     """
 | 
| 1277 |     negated = False
 | 
| 1278 | 
 | 
| 1279 |     if not self._Peek(): return None
 | 
| 1280 |     if self.c_id == Id.KW_Bang:
 | 
| 1281 |       negated = True
 | 
| 1282 |       self._Next()
 | 
| 1283 | 
 | 
| 1284 |     child = self.ParseCommand()
 | 
| 1285 |     if not child: return None
 | 
| 1286 | 
 | 
| 1287 |     children = [child]
 | 
| 1288 | 
 | 
| 1289 |     if not self._Peek(): return None
 | 
| 1290 |     if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
 | 
| 1291 |       if negated:
 | 
| 1292 |         node = ast.Pipeline(children, negated)
 | 
| 1293 |         return node
 | 
| 1294 |       else:
 | 
| 1295 |         return child
 | 
| 1296 | 
 | 
| 1297 |     pipe_index = 0
 | 
| 1298 |     stderr_indices = []
 | 
| 1299 | 
 | 
| 1300 |     if self.c_id == Id.Op_PipeAmp:
 | 
| 1301 |       stderr_indices.append(pipe_index)
 | 
| 1302 |     pipe_index += 1
 | 
| 1303 | 
 | 
| 1304 |     while True:
 | 
| 1305 |       self._Next()  # skip past Id.Op_Pipe or Id.Op_PipeAmp
 | 
| 1306 | 
 | 
| 1307 |       if not self._NewlineOk():
 | 
| 1308 |         return None
 | 
| 1309 | 
 | 
| 1310 |       child = self.ParseCommand()
 | 
| 1311 |       if not child:
 | 
| 1312 |         self.AddErrorContext('Error parsing command after pipe')
 | 
| 1313 |         # TODO: Return partial pipeline here?  All signatures should (ok,
 | 
| 1314 |         # node).  Only the completion uses the node when ok is False.
 | 
| 1315 |         return None
 | 
| 1316 |       children.append(child)
 | 
| 1317 | 
 | 
| 1318 |       if not self._Peek(): return None
 | 
| 1319 |       if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
 | 
| 1320 |         break
 | 
| 1321 | 
 | 
| 1322 |       if self.c_id == Id.Op_PipeAmp:
 | 
| 1323 |         stderr_indices.append(pipe_index)
 | 
| 1324 |       pipe_index += 1
 | 
| 1325 | 
 | 
| 1326 |     node = ast.Pipeline(children, negated)
 | 
| 1327 |     node.stderr_indices = stderr_indices
 | 
| 1328 |     return node
 | 
| 1329 | 
 | 
| 1330 |   def ParseAndOr(self):
 | 
| 1331 |     """
 | 
| 1332 |     and_or           : and_or ( AND_IF | OR_IF ) newline_ok pipeline
 | 
| 1333 |                      | pipeline
 | 
| 1334 | 
 | 
| 1335 |     Note that it is left recursive and left associative.  We parse it
 | 
| 1336 |     iteratively with a token of lookahead.
 | 
| 1337 |     """
 | 
| 1338 |     child = self.ParsePipeline()
 | 
| 1339 |     if not child: return None
 | 
| 1340 | 
 | 
| 1341 |     if not self._Peek(): return None
 | 
| 1342 |     if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
 | 
| 1343 |       return child
 | 
| 1344 | 
 | 
| 1345 |     ops = []
 | 
| 1346 |     children = [child]
 | 
| 1347 | 
 | 
| 1348 |     while True:
 | 
| 1349 |       ops.append(self.c_id)
 | 
| 1350 | 
 | 
| 1351 |       self._Next()  # skip past || &&
 | 
| 1352 | 
 | 
| 1353 |       if not self._NewlineOk():
 | 
| 1354 |         return None
 | 
| 1355 | 
 | 
| 1356 |       child = self.ParsePipeline()
 | 
| 1357 |       if not child: return None
 | 
| 1358 | 
 | 
| 1359 |       children.append(child)
 | 
| 1360 | 
 | 
| 1361 |       if not self._Peek(): return None
 | 
| 1362 |       if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
 | 
| 1363 |         break
 | 
| 1364 | 
 | 
| 1365 |     node = ast.AndOr(ops, children)
 | 
| 1366 |     return node
 | 
| 1367 | 
 | 
| 1368 |   def ParseCommandLine(self):
 | 
| 1369 |     """
 | 
| 1370 |     NOTE: This is only called in InteractiveLoop.  Oh crap I need to really
 | 
| 1371 |     read and execute a line at a time then?
 | 
| 1372 | 
 | 
| 1373 |     BUG: sleep 1 & sleep 1 &  doesn't work here, when written in REPL.   But it
 | 
| 1374 |     does work with '-c', because that calls ParseFile and not ParseCommandLine
 | 
| 1375 |     over and over.
 | 
| 1376 | 
 | 
| 1377 |     TODO: Get rid of ParseFile and stuff?  Shouldn't be used for -c and so
 | 
| 1378 |     forth.  Just have an ExecuteLoop for now.  But you still need
 | 
| 1379 |     ParseCommandList, for internal nodes.
 | 
| 1380 | 
 | 
| 1381 |     command_line     : and_or (sync_op and_or)* trailer? ;
 | 
| 1382 |     trailer          : sync_op newline_ok
 | 
| 1383 |                      | NEWLINES;
 | 
| 1384 |     sync_op          : '&' | ';';
 | 
| 1385 | 
 | 
| 1386 |     This rule causes LL(k > 1) behavior.  We would have to peek to see if there
 | 
| 1387 |     is another command word after the sync op.
 | 
| 1388 | 
 | 
| 1389 |     But it's easier to express imperatively.  Do the following in a loop:
 | 
| 1390 |     1. ParseAndOr
 | 
| 1391 |     2. Peek.
 | 
| 1392 |        a. If there's a newline, then return.  (We're only parsing a single
 | 
| 1393 |           line.)
 | 
| 1394 |        b. If there's a sync_op, process it.  Then look for a newline and
 | 
| 1395 |           return.  Otherwise, parse another AndOr.
 | 
| 1396 | 
 | 
| 1397 |     COMPARE
 | 
| 1398 |     command_line     : and_or (sync_op and_or)* trailer? ;   # TOP LEVEL
 | 
| 1399 |     command_term     : and_or (trailer and_or)* ;            # CHILDREN
 | 
| 1400 | 
 | 
| 1401 |     I think you should be able to factor these out.
 | 
| 1402 |     """
 | 
| 1403 |     children = []
 | 
| 1404 |     done = False
 | 
| 1405 |     while not done:
 | 
| 1406 |       child = self.ParseAndOr()
 | 
| 1407 |       if not child: return None
 | 
| 1408 | 
 | 
| 1409 |       if not self._Peek(): return None
 | 
| 1410 |       if self.c_id in (Id.Op_Semi, Id.Op_Amp):  # also Id.Op_Amp.
 | 
| 1411 |         child = ast.Sentence(child, self.cur_word.token)
 | 
| 1412 |         self._Next()
 | 
| 1413 | 
 | 
| 1414 |         if not self._Peek(): return None
 | 
| 1415 |         if self.c_id in (Id.Op_Newline, Id.Eof_Real):
 | 
| 1416 |           done = True
 | 
| 1417 | 
 | 
| 1418 |       elif self.c_id == Id.Op_Newline:
 | 
| 1419 |         done = True
 | 
| 1420 | 
 | 
| 1421 |       elif self.c_id == Id.Eof_Real:
 | 
| 1422 |         done = True
 | 
| 1423 | 
 | 
| 1424 |       else:
 | 
| 1425 |         self.AddErrorContext(
 | 
| 1426 |             'ParseCommandLine: Unexpected token %s', self.cur_word)
 | 
| 1427 |         return None
 | 
| 1428 | 
 | 
| 1429 |       children.append(child)
 | 
| 1430 | 
 | 
| 1431 |     return ast.CommandList(children)
 | 
| 1432 | 
 | 
| 1433 |   def ParseCommandTerm(self):
 | 
| 1434 |     """"
 | 
| 1435 |     command_term     : and_or (trailer and_or)* ;
 | 
| 1436 |     trailer          : sync_op newline_ok
 | 
| 1437 |                      | NEWLINES;
 | 
| 1438 |     sync_op          : '&' | ';';
 | 
| 1439 | 
 | 
| 1440 |     This is handled in imperative style, like ParseCommandLine.
 | 
| 1441 |     Called by ParseCommandList for all blocks, and also for ParseCaseItem,
 | 
| 1442 |     which is slightly different.  (HOW?  Is it the DSEMI?)
 | 
| 1443 | 
 | 
| 1444 |     Returns:
 | 
| 1445 |       ast.command
 | 
| 1446 |     """
 | 
| 1447 |     # Word types that will end the command term.
 | 
| 1448 |     END_LIST = (
 | 
| 1449 |         Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick, Id.Right_Subshell,
 | 
| 1450 |         Id.Lit_RBrace, Id.Op_DSemi)
 | 
| 1451 | 
 | 
| 1452 |     # NOTE: This is similar to ParseCommandLine, except there is a lot of stuff
 | 
| 1453 |     # about here docs.  Here docs are inherently line-oriented.
 | 
| 1454 |     #
 | 
| 1455 |     # - Why aren't we doing END_LIST in ParseCommandLine?
 | 
| 1456 |     #   - Because you will never be inside $() at the top level.
 | 
| 1457 |     #   - We also know it will end in a newline.  It can't end in "fi"!
 | 
| 1458 |     #   - example: if true; then { echo hi; } fi
 | 
| 1459 |     # - Why aren't we doing 'for c in children' too?
 | 
| 1460 | 
 | 
| 1461 |     children = []
 | 
| 1462 |     done = False
 | 
| 1463 |     while not done:
 | 
| 1464 |       if not self._Peek(): return None
 | 
| 1465 |       #print('====> ParseCommandTerm word', self.cur_word)
 | 
| 1466 | 
 | 
| 1467 |       # Most keywords are valid "first words".  But do/done/then do not BEGIN
 | 
| 1468 |       # commands, so they are not valid.
 | 
| 1469 |       if self.c_id in (
 | 
| 1470 |         Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
 | 
| 1471 |         Id.KW_Esac):
 | 
| 1472 |         break
 | 
| 1473 | 
 | 
| 1474 |       child = self.ParseAndOr()
 | 
| 1475 |       if not child:
 | 
| 1476 |         self.AddErrorContext('Error parsing AndOr in ParseCommandTerm')
 | 
| 1477 |         return None
 | 
| 1478 | 
 | 
| 1479 |       if not self._Peek(): return None
 | 
| 1480 |       if self.c_id == Id.Op_Newline:
 | 
| 1481 |         self._Next()
 | 
| 1482 | 
 | 
| 1483 |         if not self._Peek(): return None
 | 
| 1484 |         if self.c_id in END_LIST:
 | 
| 1485 |           done = True
 | 
| 1486 | 
 | 
| 1487 |       elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
 | 
| 1488 |         child = ast.Sentence(child, self.cur_word.token)
 | 
| 1489 |         self._Next()
 | 
| 1490 | 
 | 
| 1491 |         if not self._Peek(): return None
 | 
| 1492 |         if self.c_id == Id.Op_Newline:
 | 
| 1493 |           self._Next()  # skip over newline
 | 
| 1494 | 
 | 
| 1495 |           # Test if we should keep going.  There might be another command after
 | 
| 1496 |           # the semi and newline.
 | 
| 1497 |           if not self._Peek(): return None
 | 
| 1498 |           if self.c_id in END_LIST:
 | 
| 1499 |             done = True
 | 
| 1500 | 
 | 
| 1501 |         elif self.c_id in END_LIST:  # ; EOF
 | 
| 1502 |           done = True
 | 
| 1503 | 
 | 
| 1504 |       elif self.c_id in END_LIST:  # EOF
 | 
| 1505 |         done = True
 | 
| 1506 | 
 | 
| 1507 |       else:
 | 
| 1508 |         pass  # e.g. "} done", "fi fi", ") fi", etc. is OK
 | 
| 1509 | 
 | 
| 1510 |       children.append(child)
 | 
| 1511 | 
 | 
| 1512 |     if not self._Peek(): return None
 | 
| 1513 | 
 | 
| 1514 |     return ast.CommandList(children)
 | 
| 1515 | 
 | 
| 1516 |   def ParseCommandList(self):
 | 
| 1517 |     """
 | 
| 1518 |     command_list     : newline_ok command_term trailer? ;
 | 
| 1519 | 
 | 
| 1520 |     This one is called by all the compound commands.  It's basically a command
 | 
| 1521 |     block.
 | 
| 1522 | 
 | 
| 1523 |     NOTE: Rather than translating the CFG directly, the code follows a style
 | 
| 1524 |     more like this: more like this: (and_or trailer)+.  It makes capture
 | 
| 1525 |     easier.
 | 
| 1526 |     """
 | 
| 1527 |     if not self._NewlineOk(): return None
 | 
| 1528 | 
 | 
| 1529 |     node = self.ParseCommandTerm()
 | 
| 1530 |     if node is None: return None
 | 
| 1531 |     assert node is not False
 | 
| 1532 |     return node
 | 
| 1533 | 
 | 
| 1534 |   def ParseWholeFile(self):
 | 
| 1535 |     """Entry point for main() in non-interactive shell.
 | 
| 1536 | 
 | 
| 1537 |     Very similar to ParseCommandList, but we allow empty files.
 | 
| 1538 | 
 | 
| 1539 |     TODO: This should be turned into a Parse and Execute loop, freeing arenas
 | 
| 1540 |     if they don't contain functions.
 | 
| 1541 |     """
 | 
| 1542 |     if not self._NewlineOk(): return None
 | 
| 1543 | 
 | 
| 1544 |     #print('ParseFile', self.c_kind, self.cur_word)
 | 
| 1545 |     # An empty node to execute
 | 
| 1546 |     if self.c_kind == Kind.Eof:
 | 
| 1547 |       return ast.NoOp()
 | 
| 1548 | 
 | 
| 1549 |     # This calls ParseAndOr(), but I think it should be a loop that calls
 | 
| 1550 |     # ParseCommandLine(), like oil.InteractiveLoop.
 | 
| 1551 |     node = self.ParseCommandTerm()
 | 
| 1552 |     if node is None: return None
 | 
| 1553 |     assert node is not False
 | 
| 1554 | 
 | 
| 1555 |     return node
 |