OILS / osh / cmd_parse.py View on Github | oilshell.org

2824 lines, 1427 significant
1# Copyright 2016 Andy Chu. All rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7"""
8cmd_parse.py - Parse high level shell commands.
9"""
10from __future__ import print_function
11
12from _devbuild.gen import grammar_nt
13from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 LiteralBlock,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs,
47 sh_lhs_t,
48 AssignPair,
49 EnvPair,
50 ParsedAssignment,
51 assign_op_e,
52 NameType,
53 proc_sig,
54 proc_sig_e,
55 Proc,
56 Func,
57)
58from core import alloc
59from core import error
60from core.error import p_die
61from core import ui
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from frontend import match
66from frontend import reader
67from mycpp.mylib import log
68from osh import braces
69from osh import bool_parse
70from osh import word_
71
72from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73if TYPE_CHECKING:
74 from core.alloc import Arena
75 from core import optview
76 from frontend.lexer import Lexer
77 from frontend.parse_lib import ParseContext, AliasesInFlight
78 from frontend.reader import _Reader
79 from osh.word_parse import WordParser
80
81_ = Kind_str # for debug prints
82
83TAB_CH = 9 # ord('\t')
84SPACE_CH = 32 # ord(' ')
85
86
87def _ReadHereLines(
88 line_reader, # type: _Reader
89 h, # type: Redir
90 delimiter, # type: str
91):
92 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93 # NOTE: We read all lines at once, instead of parsing line-by-line,
94 # because of cases like this:
95 # cat <<EOF
96 # 1 $(echo 2
97 # echo 3) 4
98 # EOF
99 here_lines = [] # type: List[Tuple[SourceLine, int]]
100 last_line = None # type: Tuple[SourceLine, int]
101 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103 while True:
104 src_line, unused_offset = line_reader.GetLine()
105
106 if src_line is None: # EOF
107 # An unterminated here doc is just a warning in bash. We make it
108 # fatal because we want to be strict, and because it causes problems
109 # reporting other errors.
110 # Attribute it to the << in <<EOF for now.
111 p_die("Couldn't find terminator for here doc that starts here",
112 h.op)
113
114 assert len(src_line.content) != 0 # None should be the empty line
115
116 line = src_line.content
117
118 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119 # the first tab.
120 start_offset = 0
121 if strip_leading_tabs:
122 n = len(line)
123 i = 0 # used after loop exit
124 while i < n:
125 if line[i] != '\t':
126 break
127 i += 1
128 start_offset = i
129
130 if line[start_offset:].rstrip() == delimiter:
131 last_line = (src_line, start_offset)
132 break
133
134 here_lines.append((src_line, start_offset))
135
136 return here_lines, last_line
137
138
139def _MakeLiteralHereLines(
140 here_lines, # type: List[Tuple[SourceLine, int]]
141 arena, # type: Arena
142 do_lossless, # type: bool
143):
144 # type: (...) -> List[word_part_t]
145 """Create a Token for each line.
146
147 For <<'EOF' and <<-'EOF' - single quoted rule
148
149 <<- has non-zero start_offset
150 """
151 # less precise type, because List[T] is an invariant type
152 tokens = [] # type: List[word_part_t]
153 for src_line, start_offset in here_lines:
154
155 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
156 # arena invariant, but don't refer to it.
157 #
158 # Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159 # here docs, but it's more complex with double quoted EOF docs.
160
161 if do_lossless: # avoid garbage, doesn't affect correctness
162 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163 src_line)
164
165 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166 src_line)
167 tokens.append(t)
168 return tokens
169
170
171def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172 # type: (ParseContext, Redir, _Reader, Arena) -> None
173 """Fill in attributes of a pending here doc node."""
174 h = cast(redir_param.HereDoc, r.arg)
175 # "If any character in word is quoted, the delimiter shall be formed by
176 # performing quote removal on word, and the here-document lines shall not
177 # be expanded. Otherwise, the delimiter shall be the word itself."
178 # NOTE: \EOF counts, or even E\OF
179 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180 if not ok:
181 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185 if delim_quoted:
186 # <<'EOF' and <<-'EOF' - Literal for each line.
187 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188 parse_ctx.do_lossless)
189 else:
190 # <<EOF and <<-EOF - Parse as word
191 line_reader = reader.VirtualLineReader(arena, here_lines,
192 parse_ctx.do_lossless)
193 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196 end_line, start_offset = last_line
197
198 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
199 # arena invariant, but don't refer to it.
200 if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203 # Create a Token with the end terminator. Maintains the invariant that the
204 # tokens "add up".
205 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206 len(end_line.content), end_line)
207
208
209def _MakeAssignPair(parse_ctx, preparsed, arena):
210 # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213 left_token = preparsed.left
214 close_token = preparsed.close
215
216 lhs = None # type: sh_lhs_t
217
218 if left_token.id == Id.Lit_VarLike: # s=1
219 if lexer.IsPlusEquals(left_token):
220 var_name = lexer.TokenSliceRight(left_token, -2)
221 op = assign_op_e.PlusEqual
222 else:
223 var_name = lexer.TokenSliceRight(left_token, -1)
224 op = assign_op_e.Equal
225
226 lhs = sh_lhs.Name(left_token, var_name)
227
228 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229 var_name = lexer.TokenSliceRight(left_token, -1)
230 if lexer.IsPlusEquals(close_token):
231 op = assign_op_e.PlusEqual
232 else:
233 op = assign_op_e.Equal
234
235 assert left_token.line == close_token.line, \
236 '%s and %s not on same line' % (left_token, close_token)
237
238 left_pos = left_token.col + left_token.length
239 index_str = left_token.line.content[left_pos:close_token.col]
240 lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243 var_name = lexer.TokenSliceRight(left_token, -1)
244 if lexer.IsPlusEquals(close_token):
245 op = assign_op_e.PlusEqual
246 else:
247 op = assign_op_e.Equal
248
249 # Similar to SnipCodeString / SnipCodeBlock
250 if left_token.line == close_token.line:
251 # extract what's between brackets
252 s = left_token.col + left_token.length
253 code_str = left_token.line.content[s:close_token.col]
254 else:
255 raise NotImplementedError('%s != %s' %
256 (left_token.line, close_token.line))
257 a_parser = parse_ctx.MakeArithParser(code_str)
258
259 # a[i+1]= is a LHS
260 src = source.Reparsed('array LHS', left_token, close_token)
261 with alloc.ctx_SourceCode(arena, src):
262 index_node = a_parser.Parse() # may raise error.Parse
263
264 lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266 else:
267 raise AssertionError()
268
269 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
270 parts = preparsed.w.parts
271 offset = preparsed.part_offset
272
273 n = len(parts)
274 if offset == n:
275 rhs = rhs_word.Empty # type: rhs_word_t
276 else:
277 w = CompoundWord(parts[offset:])
278 word_.TildeDetectAssign(w)
279 rhs = w
280
281 return AssignPair(left_token, lhs, op, rhs)
282
283
284def _AppendMoreEnv(preparsed_list, more_env):
285 # type: (List[ParsedAssignment], List[EnvPair]) -> None
286 """Helper to modify a SimpleCommand node.
287
288 Args:
289 preparsed: a list of 4-tuples from DetectShAssignment
290 more_env: a list to append env_pairs to
291 """
292 for preparsed in preparsed_list:
293 left_token = preparsed.left
294
295 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296 p_die(
297 "Environment binding shouldn't look like an array assignment",
298 left_token)
299
300 if lexer.IsPlusEquals(left_token):
301 p_die('Expected = in environment binding, got +=', left_token)
302
303 var_name = lexer.TokenSliceRight(left_token, -1)
304
305 parts = preparsed.w.parts
306 n = len(parts)
307 offset = preparsed.part_offset
308 if offset == n:
309 rhs = rhs_word.Empty # type: rhs_word_t
310 else:
311 w = CompoundWord(parts[offset:])
312 word_.TildeDetectAssign(w)
313 rhs = w
314
315 more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318def _SplitSimpleCommandPrefix(words):
319 # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320 """Second pass of SimpleCommand parsing: look for assignment words."""
321 preparsed_list = [] # type: List[ParsedAssignment]
322 suffix_words = [] # type: List[CompoundWord]
323
324 done_prefix = False
325 for w in words:
326 if done_prefix:
327 suffix_words.append(w)
328 continue
329
330 left_token, close_token, part_offset = word_.DetectShAssignment(w)
331 if left_token:
332 preparsed_list.append(
333 ParsedAssignment(left_token, close_token, part_offset, w))
334 else:
335 done_prefix = True
336 suffix_words.append(w)
337
338 return preparsed_list, suffix_words
339
340
341def _MakeSimpleCommand(
342 preparsed_list, # type: List[ParsedAssignment]
343 suffix_words, # type: List[CompoundWord]
344 typed_args, # type: Optional[ArgList]
345 block, # type: Optional[LiteralBlock]
346):
347 # type: (...) -> command.Simple
348 """Create a command.Simple"""
349
350 # FOO=(1 2 3) ls is not allowed.
351 for preparsed in preparsed_list:
352 if word_.HasArrayPart(preparsed.w):
353 p_die("Environment bindings can't contain array literals",
354 loc.Word(preparsed.w))
355
356 # NOTE: It would be possible to add this check back. But it already happens
357 # at runtime in EvalWordSequence2.
358 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
359 if 0:
360 for w in suffix_words:
361 if word_.HasArrayPart(w):
362 p_die("Commands can't contain array literals", loc.Word(w))
363
364 assert len(suffix_words) != 0
365 # {a,b,c} # Use { before brace detection
366 # ~/bin/ls # Use ~ before tilde detection
367 part0 = suffix_words[0].parts[0]
368 blame_tok = location.LeftTokenForWordPart(part0)
369
370 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
371 # can't implement bash's behavior of having say {~bob,~jane}/src work,
372 # because we only have a BracedTree.
373 # This is documented in spec/brace-expansion.
374 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
375 # doesn't seem worth it.
376 words2 = braces.BraceDetectAll(suffix_words)
377 words3 = word_.TildeDetectAll(words2)
378
379 more_env = [] # type: List[EnvPair]
380 _AppendMoreEnv(preparsed_list, more_env)
381
382 # do_fork by default
383 return command.Simple(blame_tok, more_env, words3, typed_args, block, True)
384
385
386class VarChecker(object):
387 """Statically check for proc and variable usage errors."""
388
389 def __init__(self):
390 # type: () -> None
391 """
392 Args:
393 oil_proc: Whether to disallow nested proc/function declarations
394 """
395 # self.tokens for location info: 'proc' or another token
396 self.tokens = [] # type: List[Token]
397 self.names = [] # type: List[Dict[str, Id_t]]
398
399 def Push(self, blame_tok):
400 # type: (Token) -> None
401 """Called when we enter a shell function, proc, or func.
402
403 Bash allows this, but it's confusing because it's the same as two
404 functions at the top level.
405
406 f() {
407 g() {
408 echo 'top level function defined in another one'
409 }
410 }
411
412 YSH disallows nested procs and funcs.
413 """
414 if len(self.tokens) != 0:
415 if blame_tok.id == Id.KW_Proc:
416 p_die("procs must be defined at the top level", blame_tok)
417 if blame_tok.id == Id.KW_Func:
418 p_die("funcs must be defined at the top level", blame_tok)
419 if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
420 p_die("shell functions can't be defined inside proc or func",
421 blame_tok)
422
423 self.tokens.append(blame_tok)
424 entry = {} # type: Dict[str, Id_t]
425 self.names.append(entry)
426
427 def Pop(self):
428 # type: () -> None
429 self.names.pop()
430 self.tokens.pop()
431
432 def Check(self, keyword_id, var_name, blame_tok):
433 # type: (Id_t, str, Token) -> None
434 """Check for declaration / mutation errors in proc and func.
435
436 var x
437 x already declared
438 setvar x:
439 x is not declared
440 setglobal x:
441 No errors are possible; we would need all these many conditions to
442 statically know the names:
443 - no 'source'
444 - shopt -u copy_env.
445 - AND use lib has to be static
446
447 What about bare assignment in Hay? I think these are dynamic checks --
448 there is no static check. Hay is for building up data imperatively,
449 and then LATER, right before main(), it can be type checked.
450
451 Package {
452 version = '3.11'
453 version = '3.12'
454 }
455 """
456 # No static checks are the global level! Because of 'source', var and
457 # setvar are essentially the same.
458 if len(self.names) == 0:
459 return
460
461 top = self.names[-1]
462 if keyword_id == Id.KW_Var:
463 if var_name in top:
464 p_die('%r was already declared' % var_name, blame_tok)
465 else:
466 top[var_name] = keyword_id
467
468 if keyword_id == Id.KW_SetVar:
469 if var_name not in top:
470 # Note: the solution could be setglobal, etc.
471 p_die(
472 "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
473 var_name, blame_tok)
474
475
476class ctx_VarChecker(object):
477
478 def __init__(self, var_checker, blame_tok):
479 # type: (VarChecker, Token) -> None
480 var_checker.Push(blame_tok)
481 self.var_checker = var_checker
482
483 def __enter__(self):
484 # type: () -> None
485 pass
486
487 def __exit__(self, type, value, traceback):
488 # type: (Any, Any, Any) -> None
489 self.var_checker.Pop()
490
491
492class ctx_CmdMode(object):
493
494 def __init__(self, cmd_parse, new_cmd_mode):
495 # type: (CommandParser, cmd_mode_t) -> None
496 self.cmd_parse = cmd_parse
497 self.prev_cmd_mode = cmd_parse.cmd_mode
498 cmd_parse.cmd_mode = new_cmd_mode
499
500 def __enter__(self):
501 # type: () -> None
502 pass
503
504 def __exit__(self, type, value, traceback):
505 # type: (Any, Any, Any) -> None
506 self.cmd_parse.cmd_mode = self.prev_cmd_mode
507
508
509SECONDARY_KEYWORDS = [
510 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
511 Id.KW_Esac
512]
513
514
515class CommandParser(object):
516 """Recursive descent parser derived from POSIX shell grammar.
517
518 This is a BNF grammar:
519 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
520
521 - Augmented with both bash/OSH and YSH constructs.
522
523 - We use regex-like iteration rather than recursive references
524 ? means optional (0 or 1)
525 * means 0 or more
526 + means 1 or more
527
528 - Keywords are spelled in Caps:
529 If Elif Case
530
531 - Operator tokens are quoted:
532 '(' '|'
533
534 or can be spelled directly if it matters:
535
536 Op_LParen Op_Pipe
537
538 - Non-terminals are snake_case:
539 brace_group subshell
540
541 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
542 the production should be in the method docstrings, e.g.
543
544 def ParseSubshell():
545 "
546 subshell : '(' compound_list ')'
547
548 Looking at Op_LParen # Comment to say how this method is called
549 "
550
551 The grammar may be factored to make parsing easier.
552 """
553
554 def __init__(self,
555 parse_ctx,
556 parse_opts,
557 w_parser,
558 lexer,
559 line_reader,
560 eof_id=Id.Eof_Real):
561 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
562 self.parse_ctx = parse_ctx
563 self.aliases = parse_ctx.aliases # aliases to expand at parse time
564
565 self.parse_opts = parse_opts
566 self.w_parser = w_parser # type: WordParser # for normal parsing
567 self.lexer = lexer # for pushing hints, lookahead to (
568 self.line_reader = line_reader # for here docs
569 self.eof_id = eof_id
570
571 self.arena = line_reader.arena # for adding here doc and alias spans
572 self.aliases_in_flight = [] # type: AliasesInFlight
573
574 # A hacky boolean to remove 'if cd / {' ambiguity.
575 self.allow_block = True
576
577 # Stack of booleans for nested Attr and SHELL nodes.
578 # Attr nodes allow bare assignment x = 42, but not shell x=42.
579 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
580 # nodes, but x42 is still allowed.
581 #
582 # Note: this stack could be optimized by turning it into an integer and
583 # binary encoding.
584 self.hay_attrs_stack = [] # type: List[bool]
585
586 # Note: VarChecker is instantiated with each CommandParser, which means
587 # that two 'proc foo' -- inside a command sub and outside -- don't
588 # conflict, because they use different CommandParser instances. I think
589 # this OK but you can imagine different behaviors.
590 self.var_checker = VarChecker()
591
592 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
593
594 self.Reset()
595
596 # Init_() function for "keyword arg"
597 def Init_AliasesInFlight(self, aliases_in_flight):
598 # type: (AliasesInFlight) -> None
599 self.aliases_in_flight = aliases_in_flight
600
601 def Reset(self):
602 # type: () -> None
603 """Reset our own internal state.
604
605 Called by the interactive loop.
606 """
607 # Cursor state set by _GetWord()
608 self.next_lex_mode = lex_mode_e.ShCommand
609 self.cur_word = None # type: word_t # current word
610 self.c_kind = Kind.Undefined
611 self.c_id = Id.Undefined_Tok
612
613 self.pending_here_docs = [] # type: List[Redir]
614
615 def ResetInputObjects(self):
616 # type: () -> None
617 """Reset the internal state of our inputs.
618
619 Called by the interactive loop.
620 """
621 self.w_parser.Reset()
622 self.lexer.ResetInputObjects()
623 self.line_reader.Reset()
624
625 def _SetNext(self):
626 # type: () -> None
627 """Call this when you no longer need the current token.
628
629 This method is lazy. A subsequent call to _GetWord() will
630 actually read the next Token.
631 """
632 self.next_lex_mode = lex_mode_e.ShCommand
633
634 def _SetNextBrack(self):
635 # type: () -> None
636 self.next_lex_mode = lex_mode_e.ShCommandFakeBrack
637
638 def _GetWord(self):
639 # type: () -> None
640 """Call this when you need to make a decision based on Id or Kind.
641
642 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
643 self.c_id and self.c_kind.
644
645 Otherwise it does nothing.
646 """
647 if self.next_lex_mode != lex_mode_e.Undefined:
648 w = self.w_parser.ReadWord(self.next_lex_mode)
649 #log("w %s", w)
650
651 # Here docs only happen in command mode, so other kinds of newlines don't
652 # count.
653 if w.tag() == word_e.Operator:
654 tok = cast(Token, w)
655 if tok.id == Id.Op_Newline:
656 for h in self.pending_here_docs:
657 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
658 self.arena)
659 del self.pending_here_docs[:] # No .clear() until Python 3.3.
660
661 self.cur_word = w
662
663 self.c_kind = word_.CommandKind(self.cur_word)
664 # Has special case for Id.Lit_{LBrace,RBrace,Equals}
665 self.c_id = word_.CommandId(self.cur_word)
666 self.next_lex_mode = lex_mode_e.Undefined
667
668 def _Eat(self, c_id, msg=None):
669 # type: (Id_t, Optional[str]) -> word_t
670 """Consume a word of a type, maybe showing a custom error message.
671
672 Args:
673 c_id: the Id we expected
674 msg: improved error message
675 """
676 self._GetWord()
677 if self.c_id != c_id:
678 if msg is None:
679 msg = 'Expected word type %s, got %s' % (
680 ui.PrettyId(c_id), ui.PrettyId(self.c_id))
681 p_die(msg, loc.Word(self.cur_word))
682
683 skipped = self.cur_word
684 self._SetNext()
685 return skipped
686
687 def _NewlineOk(self):
688 # type: () -> None
689 """Check for optional newline and consume it."""
690 self._GetWord()
691 if self.c_id == Id.Op_Newline:
692 self._SetNext()
693
694 def _AtSecondaryKeyword(self):
695 # type: () -> bool
696 self._GetWord()
697 if self.c_id in SECONDARY_KEYWORDS:
698 return True
699 return False
700
701 def ParseRedirect(self):
702 # type: () -> Redir
703 self._GetWord()
704 assert self.c_kind == Kind.Redir, self.cur_word
705 op_tok = cast(Token, self.cur_word) # for MyPy
706
707 # Note: the lexer could take distinguish between
708 # >out
709 # 3>out
710 # {fd}>out
711 #
712 # which would make the code below faster. But small string optimization
713 # would also speed it up, since redirects are small.
714
715 # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
716 # possibly "unify" the IDs by subtracting a constant like 8 or 16?
717
718 op_val = lexer.TokenVal(op_tok)
719 if op_val[0] == '{':
720 pos = op_val.find('}')
721 assert pos != -1 # lexer ensures this
722 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
723
724 elif op_val[0].isdigit():
725 pos = 1
726 if op_val[1].isdigit():
727 pos = 2
728 where = redir_loc.Fd(int(op_val[:pos]))
729
730 else:
731 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
732
733 self._SetNext()
734
735 self._GetWord()
736 # Other redirect
737 if self.c_kind != Kind.Word:
738 p_die('Invalid token after redirect operator',
739 loc.Word(self.cur_word))
740
741 # Here doc
742 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
743 arg = redir_param.HereDoc.CreateNull()
744 arg.here_begin = self.cur_word
745 arg.stdin_parts = []
746
747 r = Redir(op_tok, where, arg)
748
749 self.pending_here_docs.append(r) # will be filled on next newline.
750
751 self._SetNext()
752 return r
753
754 arg_word = self.cur_word
755 tilde = word_.TildeDetect(arg_word)
756 if tilde:
757 arg_word = tilde
758 self._SetNext()
759
760 # We should never get Empty, Token, etc.
761 assert arg_word.tag() == word_e.Compound, arg_word
762 return Redir(op_tok, where, cast(CompoundWord, arg_word))
763
764 def _ParseRedirectList(self):
765 # type: () -> List[Redir]
766 """Try parsing any redirects at the cursor.
767
768 This is used for blocks only, not commands.
769 """
770 redirects = [] # type: List[Redir]
771 while True:
772 # This prediction needs to ONLY accept redirect operators. Should we
773 # make them a separate Kind?
774 self._GetWord()
775 if self.c_kind != Kind.Redir:
776 break
777
778 node = self.ParseRedirect()
779 redirects.append(node)
780 self._SetNext()
781
782 return redirects
783
784 def _MaybeParseRedirectList(self, node):
785 # type: (command_t) -> command_t
786 """Try parsing redirects at the current position.
787
788 If there are any, then wrap the command_t argument with a
789 command.Redirect node. Otherwise, return argument unchanged.
790 """
791 self._GetWord()
792 if self.c_kind != Kind.Redir:
793 return node
794
795 redirects = [self.ParseRedirect()]
796
797 while True:
798 # This prediction needs to ONLY accept redirect operators. Should we
799 # make them a separate Kind?
800 self._GetWord()
801 if self.c_kind != Kind.Redir:
802 break
803
804 redirects.append(self.ParseRedirect())
805 self._SetNext()
806
807 return command.Redirect(node, redirects)
808
809 def _ScanSimpleCommand(self):
810 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
811 """YSH extends simple commands with typed args and blocks.
812
813 Shell has a recursive grammar, which awkwardly expresses
814 non-grammatical rules:
815
816 simple_command : cmd_prefix cmd_word cmd_suffix
817 | cmd_prefix cmd_word
818 | cmd_prefix
819 | cmd_name cmd_suffix
820 | cmd_name
821 ;
822 cmd_name : WORD /* Apply rule 7a */
823 ;
824 cmd_word : WORD /* Apply rule 7b */
825 ;
826 cmd_prefix : io_redirect
827 | cmd_prefix io_redirect
828 | ASSIGNMENT_WORD
829 | cmd_prefix ASSIGNMENT_WORD
830 ;
831 cmd_suffix : io_redirect
832 | cmd_suffix io_redirect
833 | WORD
834 | cmd_suffix WORD
835
836 YSH grammar:
837
838 redirect = redir_op WORD
839 item = WORD | redirect
840
841 typed_args =
842 '(' arglist ')'
843 | '[' arglist ']'
844
845 simple_command =
846 cmd_prefix* item+ typed_args? BraceGroup? cmd_suffix*
847
848 Notably, redirects shouldn't appear after typed args, or after
849 BraceGroup.
850
851 Examples:
852
853 This is an assignment:
854 foo=1 >out
855
856 This is a command.Simple
857 >out
858
859 What about
860 >out (42)
861 """
862 redirects = [] # type: List[Redir]
863 words = [] # type: List[CompoundWord]
864 typed_args = None # type: Optional[ArgList]
865 block = None # type: Optional[LiteralBlock]
866
867 first_word_caps = False # does first word look like Caps, but not CAPS
868
869 i = 0
870 while True:
871 self._GetWord()
872
873 # If we got { }, change it to something that's not Kind.Word
874 kind2 = self.c_kind
875 if (kind2 == Kind.Word and self.parse_opts.parse_brace() and
876 self.c_id in (Id.Lit_LBrace, Id.Lit_RBrace)):
877 kind2 = Kind.Op
878
879 if kind2 == Kind.Redir:
880 node = self.ParseRedirect()
881 redirects.append(node)
882
883 elif kind2 == Kind.Word:
884 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
885
886 if i == 0:
887 # Disallow leading =a because it's confusing
888 part0 = w.parts[0]
889 if part0.tag() == word_part_e.Literal:
890 tok = cast(Token, part0)
891 if tok.id == Id.Lit_Equals:
892 p_die(
893 "=word isn't allowed. Hint: add a space after =, or quote it",
894 tok)
895
896 # Is the first word a Hay Attr word?
897 #
898 # Can we remove this StaticEval() call, and just look
899 # inside Token? I think once we get rid of SHELL nodes,
900 # this will be simpler.
901
902 ok, word_str, quoted = word_.StaticEval(w)
903 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
904 if (ok and len(word_str) and word_str[0].isupper() and
905 not word_str.isupper()):
906 first_word_caps = True
907 #log('W %s', word_str)
908
909 words.append(w)
910
911 else:
912 break
913
914 self._SetNextBrack() # Allow bracket for SECOND word on
915 i += 1
916
917 # my-cmd (x) or my-cmd [x]
918 self._GetWord()
919 if self.c_id == Id.Op_LParen:
920 # 1. Check that there's a preceding space
921 prev_byte = self.lexer.ByteLookBack()
922 if prev_byte not in (SPACE_CH, TAB_CH):
923 if self.parse_opts.parse_at():
924 p_die('Space required before (', loc.Word(self.cur_word))
925 else:
926 # inline func call like @sorted(x) is invalid in OSH, but the
927 # solution isn't a space
928 p_die(
929 'Unexpected left paren (might need a space before it)',
930 loc.Word(self.cur_word))
931
932 # 2. Check that it's not (). We disallow this because it's a no-op and
933 # there could be confusion with shell func defs.
934 # For some reason we need to call lexer.LookPastSpace, not
935 # w_parser.LookPastSpace. I think this is because we're at (, which is
936 # an operator token. All the other cases are like 'x=', which is PART
937 # of a word, and we don't know if it will end.
938 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
939 if next_id == Id.Op_RParen:
940 p_die('Empty arg list not allowed', loc.Word(self.cur_word))
941
942 typed_args = self.w_parser.ParseProcCallArgs(
943 grammar_nt.ysh_eager_arglist)
944
945 self._SetNext()
946
947 elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
948 typed_args = self.w_parser.ParseProcCallArgs(
949 grammar_nt.ysh_lazy_arglist)
950
951 self._SetNext()
952
953 self._GetWord()
954
955 # Allow redirects after typed args, e.g.
956 # json write (x) > out.txt
957 if self.c_kind == Kind.Redir:
958 redirects.extend(self._ParseRedirectList())
959
960 # my-cmd { echo hi } my-cmd (x) { echo hi } ...
961 if (self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace and
962 # Disabled for if/while condition, etc.
963 self.allow_block):
964
965 # allow x = 42
966 self.hay_attrs_stack.append(first_word_caps)
967 brace_group = self.ParseBraceGroup()
968
969 # So we can get the source code back later
970 lines = self.arena.SaveLinesAndDiscard(brace_group.left,
971 brace_group.right)
972 block = LiteralBlock(brace_group, lines)
973
974 self.hay_attrs_stack.pop()
975
976 self._GetWord()
977
978 # Allow redirects after block, e.g.
979 # cd /tmp { echo $PWD } > out.txt
980 if self.c_kind == Kind.Redir:
981 redirects.extend(self._ParseRedirectList())
982
983 return redirects, words, typed_args, block
984
985 def _MaybeExpandAliases(self, words):
986 # type: (List[CompoundWord]) -> Optional[command_t]
987 """Try to expand aliases.
988
989 Args:
990 words: A list of Compound
991
992 Returns:
993 A new LST node, or None.
994
995 Our implementation of alias has two design choices:
996 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
997 - What grammar rule to parse the expanded alias buffer with. In our case
998 it's ParseCommand().
999
1000 This doesn't quite match what other shells do, but I can't figure out a
1001 better places.
1002
1003 Most test cases pass, except for ones like:
1004
1005 alias LBRACE='{'
1006 LBRACE echo one; echo two; }
1007
1008 alias MULTILINE='echo 1
1009 echo 2
1010 echo 3'
1011 MULTILINE
1012
1013 NOTE: dash handles aliases in a totally different way. It has a global
1014 variable checkkwd in parser.c. It assigns it all over the grammar, like
1015 this:
1016
1017 checkkwd = CHKNL | CHKKWD | CHKALIAS;
1018
1019 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
1020 lookupalias(). This seems to provide a consistent behavior among shells,
1021 but it's less modular and testable.
1022
1023 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
1024
1025 Returns:
1026 A command node if any aliases were expanded, or None otherwise.
1027 """
1028 # Start a new list if there aren't any. This will be passed recursively
1029 # through CommandParser instances.
1030 aliases_in_flight = (self.aliases_in_flight
1031 if len(self.aliases_in_flight) else [])
1032
1033 # for error message
1034 first_word_str = None # type: Optional[str]
1035 argv0_loc = loc.Word(words[0])
1036
1037 expanded = [] # type: List[str]
1038 i = 0
1039 n = len(words)
1040
1041 while i < n:
1042 w = words[i]
1043
1044 ok, word_str, quoted = word_.StaticEval(w)
1045 if not ok or quoted:
1046 break
1047
1048 alias_exp = self.aliases.get(word_str)
1049 if alias_exp is None:
1050 break
1051
1052 # Prevent infinite loops. This is subtle: we want to prevent infinite
1053 # expansion of alias echo='echo x'. But we don't want to prevent
1054 # expansion of the second word in 'echo echo', so we add 'i' to
1055 # "aliases_in_flight".
1056 if (word_str, i) in aliases_in_flight:
1057 break
1058
1059 if i == 0:
1060 first_word_str = word_str # for error message
1061
1062 #log('%r -> %r', word_str, alias_exp)
1063 aliases_in_flight.append((word_str, i))
1064 expanded.append(alias_exp)
1065 i += 1
1066
1067 if not alias_exp.endswith(' '):
1068 # alias e='echo [ ' is the same expansion as
1069 # alias e='echo ['
1070 # The trailing space indicates whether we should continue to expand
1071 # aliases; it's not part of it.
1072 expanded.append(' ')
1073 break # No more expansions
1074
1075 if len(expanded) == 0: # No expansions; caller does parsing.
1076 return None
1077
1078 # We are expanding an alias, so copy the rest of the words and re-parse.
1079 if i < n:
1080 left_tok = location.LeftTokenForWord(words[i])
1081 right_tok = location.RightTokenForWord(words[-1])
1082
1083 # OLD CONSTRAINT
1084 #assert left_tok.line_id == right_tok.line_id
1085
1086 words_str = self.arena.SnipCodeString(left_tok, right_tok)
1087 expanded.append(words_str)
1088
1089 code_str = ''.join(expanded)
1090
1091 # TODO:
1092 # Aliases break static parsing (like backticks), so use our own Arena.
1093 # This matters for Hay, which calls SaveLinesAndDiscard().
1094 # arena = alloc.Arena()
1095 arena = self.arena
1096
1097 line_reader = reader.StringLineReader(code_str, arena)
1098 cp = self.parse_ctx.MakeOshParser(line_reader)
1099 cp.Init_AliasesInFlight(aliases_in_flight)
1100
1101 # break circular dep
1102 from frontend import parse_lib
1103
1104 # The interaction between COMPLETION and ALIASES requires special care.
1105 # See docstring of BeginAliasExpansion() in parse_lib.py.
1106 src = source.Alias(first_word_str, argv0_loc)
1107 with alloc.ctx_SourceCode(arena, src):
1108 with parse_lib.ctx_Alias(self.parse_ctx.trail):
1109 try:
1110 # _ParseCommandTerm() handles multiline commands, compound
1111 # commands, etc. as opposed to ParseLogicalLine()
1112 node = cp._ParseCommandTerm()
1113 except error.Parse as e:
1114 # Failure to parse alias expansion is a fatal error
1115 # We don't need more handling here/
1116 raise
1117
1118 if 0:
1119 log('AFTER expansion:')
1120 node.PrettyPrint()
1121
1122 return node
1123
1124 def ParseSimpleCommand(self):
1125 # type: () -> command_t
1126 """Fixed transcription of the POSIX grammar
1127
1128 io_file : '<' filename
1129 | LESSAND filename
1130 ...
1131
1132 io_here : DLESS here_end
1133 | DLESSDASH here_end
1134
1135 redirect : IO_NUMBER (io_redirect | io_here)
1136
1137 prefix_part : ASSIGNMENT_WORD | redirect
1138 cmd_part : WORD | redirect
1139
1140 assign_kw : Declare | Export | Local | Readonly
1141
1142 # Without any words it is parsed as a command, not an assignment
1143 assign_listing : assign_kw
1144
1145 # Now we have something to do (might be changing assignment flags too)
1146 # NOTE: any prefixes should be a warning, but they are allowed in shell.
1147 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
1148
1149 # an external command, a function call, or a builtin -- a "word_command"
1150 word_command : prefix_part* cmd_part+
1151
1152 simple_command : assign_listing
1153 | assignment
1154 | proc_command
1155
1156 Simple imperative algorithm:
1157
1158 1) Read a list of words and redirects. Append them to separate lists.
1159 2) Look for the first non-assignment word. If it's declare, etc., then
1160 keep parsing words AND assign words. Otherwise, just parse words.
1161 3) If there are no non-assignment words, then it's a global assignment.
1162
1163 { redirects, global assignments } OR
1164 { redirects, prefix_bindings, words } OR
1165 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1166
1167 THEN CHECK that prefix bindings don't have any array literal parts!
1168 global assignment and keyword assignments can have the of course.
1169 well actually EXPORT shouldn't have them either -- WARNING
1170
1171 3 cases we want to warn: prefix_bindings for assignment, and array literal
1172 in prefix bindings, or export
1173
1174 A command can be an assignment word, word, or redirect on its own.
1175
1176 ls
1177 >out.txt
1178
1179 >out.txt FOO=bar # this touches the file
1180
1181 Or any sequence:
1182 ls foo bar
1183 <in.txt ls foo bar >out.txt
1184 <in.txt ls >out.txt foo bar
1185
1186 Or add one or more environment bindings:
1187 VAR=val env
1188 >out.txt VAR=val env
1189
1190 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1191 <<EOF vs <<'EOF'.
1192 """
1193 redirects, words, typed_args, block = self._ScanSimpleCommand()
1194
1195 typed_loc = None # type: Optional[Token]
1196 if block:
1197 typed_loc = block.brace_group.left
1198 if typed_args:
1199 typed_loc = typed_args.left # preferred over block location
1200
1201 if len(words) == 0: # e.g. >out.txt # redirect without words
1202 assert len(redirects) != 0
1203 if typed_loc is not None:
1204 p_die("Unexpected typed args", typed_loc)
1205 return command.Redirect(command.NoOp, redirects)
1206
1207 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1208 if len(preparsed_list):
1209 # Disallow X=Y inside proc and func
1210 # and inside Hay Attr blocks
1211 # But allow X=Y at the top level
1212 # for interactive use foo=bar
1213 # for global constants GLOBAL=~/src
1214 # because YSH assignment doesn't have tilde sub
1215 if len(suffix_words) == 0:
1216 if (self.cmd_mode != cmd_mode_e.Shell or
1217 (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1218 p_die('Use var/setvar to assign in YSH',
1219 preparsed_list[0].left)
1220
1221 # Set a reference to words and redirects for completion. We want to
1222 # inspect this state after a failed parse.
1223 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1224
1225 if len(suffix_words) == 0:
1226 if typed_loc is not None:
1227 p_die("Unexpected typed args", typed_loc)
1228
1229 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1230 pairs = [] # type: List[AssignPair]
1231 for preparsed in preparsed_list:
1232 pairs.append(
1233 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1234
1235 left_tok = location.LeftTokenForCompoundWord(words[0])
1236 assign_node = command.ShAssignment(left_tok, pairs)
1237 if len(redirects):
1238 return command.Redirect(assign_node, redirects)
1239 else:
1240 return assign_node
1241
1242 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1243
1244 if kind == Kind.ControlFlow:
1245 if not self.parse_opts.parse_ignored() and len(redirects):
1246 p_die("Control flow shouldn't have redirects", kw_token)
1247 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1248 p_die("Control flow shouldn't have environment bindings",
1249 preparsed_list[0].left)
1250
1251 if kw_token.id == Id.ControlFlow_Return:
1252 # return x - inside procs and shell functions
1253 # return (x) - inside funcs
1254 if typed_args is None:
1255 if self.cmd_mode not in (cmd_mode_e.Shell,
1256 cmd_mode_e.Proc):
1257 p_die('Shell-style returns not allowed here', kw_token)
1258 else:
1259 if self.cmd_mode != cmd_mode_e.Func:
1260 p_die('Typed return is only allowed inside func',
1261 typed_loc)
1262 if len(typed_args.pos_args) != 1:
1263 p_die("Typed return expects one argument", typed_loc)
1264 if len(typed_args.named_args) != 0:
1265 p_die("Typed return doesn't take named arguments",
1266 typed_loc)
1267 return command.Retval(kw_token, typed_args.pos_args[0])
1268
1269 # Except for return (x), we shouldn't have typed args
1270 if typed_loc is not None:
1271 p_die("Unexpected typed args", typed_loc)
1272
1273 # Attach the token for errors. (ShAssignment may not need it.)
1274 if len(suffix_words) == 1:
1275 arg_word = None # type: Optional[word_t]
1276 elif len(suffix_words) == 2:
1277 arg_word = suffix_words[1]
1278 else:
1279 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1280 loc.Word(suffix_words[2]))
1281
1282 return command.ControlFlow(kw_token, arg_word)
1283
1284 # Alias expansion only understands words, not typed args ( ) or block { }
1285 if not typed_args and not block and self.parse_opts.expand_aliases():
1286 # If any expansions were detected, then parse again.
1287 expanded_node = self._MaybeExpandAliases(suffix_words)
1288 if expanded_node:
1289 # Attach env bindings and redirects to the expanded node.
1290 more_env = [] # type: List[EnvPair]
1291 _AppendMoreEnv(preparsed_list, more_env)
1292 exp = command.ExpandedAlias(expanded_node, more_env)
1293 if len(redirects):
1294 return command.Redirect(exp, redirects)
1295 else:
1296 return exp
1297
1298 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1299
1300 # FOO=bar printenv.py FOO
1301 node = _MakeSimpleCommand(preparsed_list, suffix_words, typed_args,
1302 block)
1303 if len(redirects):
1304 return command.Redirect(node, redirects)
1305 else:
1306 return node
1307
1308 def ParseBraceGroup(self):
1309 # type: () -> BraceGroup
1310 """
1311 Original:
1312 brace_group : LBrace command_list RBrace ;
1313
1314 YSH:
1315 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1316
1317 The doc comment can only occur if there's a newline.
1318 """
1319 ate = self._Eat(Id.Lit_LBrace)
1320 left = word_.BraceToken(ate)
1321
1322 doc_word = None # type: word_t
1323 self._GetWord()
1324 if self.c_id == Id.Op_Newline:
1325 self._SetNext()
1326 # Set a flag so we don't skip over ###
1327 with word_.ctx_EmitDocToken(self.w_parser):
1328 self._GetWord()
1329
1330 if self.c_id == Id.Ignored_Comment:
1331 doc_word = self.cur_word
1332 self._SetNext()
1333
1334 # Id.Ignored_Comment means it's a Token, or None
1335 doc_token = cast(Token, doc_word)
1336
1337 c_list = self._ParseCommandList()
1338
1339 ate = self._Eat(Id.Lit_RBrace)
1340 right = word_.BraceToken(ate)
1341
1342 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1343 # would allow us to revert this back to None, which was changed in
1344 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1345 # behavior saves allocations, but is less type safe.
1346 return BraceGroup(left, doc_token, c_list.children, right)
1347
1348 def ParseDoGroup(self):
1349 # type: () -> command.DoGroup
1350 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1351
1352 do_group : Do command_list Done ; /* Apply rule 6 */
1353 """
1354 ate = self._Eat(Id.KW_Do)
1355 do_kw = word_.AsKeywordToken(ate)
1356
1357 c_list = self._ParseCommandList() # could be anything
1358
1359 ate = self._Eat(Id.KW_Done)
1360 done_kw = word_.AsKeywordToken(ate)
1361
1362 return command.DoGroup(do_kw, c_list.children, done_kw)
1363
1364 def ParseForWords(self):
1365 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1366 """
1367 for_words : WORD* for_sep
1368 ;
1369 for_sep : ';' newline_ok
1370 | NEWLINES
1371 ;
1372 """
1373 words = [] # type: List[CompoundWord]
1374 # The token of any semi-colon, so we can remove it.
1375 semi_tok = None # type: Optional[Token]
1376
1377 while True:
1378 self._GetWord()
1379 if self.c_id == Id.Op_Semi:
1380 tok = cast(Token, self.cur_word)
1381 semi_tok = tok
1382 self._SetNext()
1383 self._NewlineOk()
1384 break
1385 elif self.c_id == Id.Op_Newline:
1386 self._SetNext()
1387 break
1388 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1389 break
1390
1391 if self.cur_word.tag() != word_e.Compound:
1392 # TODO: Can we also show a pointer to the 'for' keyword?
1393 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1394
1395 w2 = cast(CompoundWord, self.cur_word)
1396 words.append(w2)
1397 self._SetNext()
1398 return words, semi_tok
1399
1400 def _ParseForExprLoop(self, for_kw):
1401 # type: (Token) -> command.ForExpr
1402 """
1403 Shell:
1404 for '((' init ';' cond ';' update '))' for_sep? do_group
1405
1406 YSH:
1407 for '((' init ';' cond ';' update '))' for_sep? brace_group
1408 """
1409 node = self.w_parser.ReadForExpression()
1410 node.keyword = for_kw
1411
1412 self._SetNext()
1413
1414 self._GetWord()
1415 if self.c_id == Id.Op_Semi:
1416 self._SetNext()
1417 self._NewlineOk()
1418 elif self.c_id == Id.Op_Newline:
1419 self._SetNext()
1420 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1421 pass
1422 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1423 pass
1424 else:
1425 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1426
1427 if self.c_id == Id.Lit_LBrace:
1428 node.body = self.ParseBraceGroup()
1429 else:
1430 node.body = self.ParseDoGroup()
1431 return node
1432
1433 def _ParseForEachLoop(self, for_kw):
1434 # type: (Token) -> command.ForEach
1435 node = command.ForEach.CreateNull(alloc_lists=True)
1436 node.keyword = for_kw
1437
1438 num_iter_names = 0
1439 while True:
1440 w = self.cur_word
1441
1442 # Hack that makes the language more familiar:
1443 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1444 # - 'x y' is also accepted but not idiomatic.
1445 UP_w = w
1446 if w.tag() == word_e.Compound:
1447 w = cast(CompoundWord, UP_w)
1448 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1449 w.parts.pop()
1450
1451 ok, iter_name, quoted = word_.StaticEval(w)
1452 if not ok or quoted: # error: for $x
1453 p_die('Expected loop variable (a constant word)', loc.Word(w))
1454
1455 if not match.IsValidVarName(iter_name): # error: for -
1456 # TODO: consider commas?
1457 if ',' in iter_name:
1458 p_die('Loop variables look like x, y (fix spaces)',
1459 loc.Word(w))
1460 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1461
1462 node.iter_names.append(iter_name)
1463 num_iter_names += 1
1464 self._SetNext()
1465
1466 self._GetWord()
1467 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1468 # Subtlety: 'var' is KW_Var and is a valid loop name
1469 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1470 break
1471
1472 if num_iter_names == 3:
1473 p_die('Unexpected word after 3 loop variables',
1474 loc.Word(self.cur_word))
1475
1476 self._NewlineOk()
1477
1478 self._GetWord()
1479 if self.c_id == Id.KW_In:
1480 expr_blame = word_.AsKeywordToken(self.cur_word)
1481
1482 self._SetNext() # skip in
1483
1484 next_id = self.w_parser.LookPastSpace()
1485 #log('%s', Id_str(next_id))
1486
1487 if next_id == Id.Op_LParen: # for x in (expr) {
1488 enode = self.w_parser.ParseYshExprForCommand()
1489 node.iterable = for_iter.YshExpr(enode, expr_blame)
1490
1491 # We don't accept for x in (obj); do ...
1492 self._GetWord()
1493 if self.c_id != Id.Lit_LBrace:
1494 p_die('Expected { after iterable expression',
1495 loc.Word(self.cur_word))
1496
1497 elif next_id == Id.Redir_LessGreat: # for x in <> {
1498 # <> is Id.Redir_Great - reuse this for simplicity
1499 w = self._Eat(Id.Redir_LessGreat)
1500 p_die('Reserved syntax', loc.Word(self.cur_word))
1501
1502 #left = word_.AsOperatorToken(w)
1503
1504 #node.iterable = for_iter.Files(left, [])
1505
1506 ## Must be { not 'do'
1507 #self._GetWord()
1508 #if self.c_id != Id.Lit_LBrace:
1509 # p_die('Expected { after files', loc.Word(self.cur_word))
1510
1511 elif next_id == Id.Redir_Less: # for x in < > {
1512 w = self._Eat(Id.Redir_Less)
1513 p_die('Reserved syntax', loc.Word(self.cur_word))
1514
1515 #left = word_.AsOperatorToken(w)
1516
1517 # TODO: we could accept
1518 #
1519 # for x in < README.md *.py > {
1520 # for x in < @myfiles > {
1521 #
1522 # And set _filename _line_num, similar to awk
1523
1524 #self._Eat(Id.Redir_Great)
1525
1526 #node.iterable = for_iter.Files(left, [])
1527
1528 ## Must be { not 'do'
1529 #self._GetWord()
1530 #if self.c_id != Id.Lit_LBrace:
1531 # p_die('Expected { after files', loc.Word(self.cur_word))
1532
1533 else:
1534 semi_tok = None # type: Optional[Token]
1535 iter_words, semi_tok = self.ParseForWords()
1536 node.semi_tok = semi_tok
1537
1538 if not self.parse_opts.parse_bare_word() and len(
1539 iter_words) == 1:
1540 ok, s, quoted = word_.StaticEval(iter_words[0])
1541 if ok and match.IsValidVarName(s) and not quoted:
1542 p_die(
1543 'Surround this word with either parens or quotes (parse_bare_word)',
1544 loc.Word(iter_words[0]))
1545
1546 words2 = braces.BraceDetectAll(iter_words)
1547 words3 = word_.TildeDetectAll(words2)
1548 node.iterable = for_iter.Words(words3)
1549
1550 # Now that we know there are words, do an extra check
1551 if num_iter_names > 2:
1552 p_die('Expected at most 2 loop variables', for_kw)
1553
1554 elif self.c_id == Id.KW_Do:
1555 node.iterable = for_iter.Args # implicitly loop over "$@"
1556 # do not advance
1557
1558 elif self.c_id == Id.Op_Semi: # for x; do
1559 node.iterable = for_iter.Args # implicitly loop over "$@"
1560 self._SetNext()
1561
1562 else: # for foo BAD
1563 p_die('Unexpected word after for loop variable',
1564 loc.Word(self.cur_word))
1565
1566 self._GetWord()
1567 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1568 node.body = self.ParseBraceGroup()
1569 else:
1570 node.body = self.ParseDoGroup()
1571
1572 return node
1573
1574 def ParseFor(self):
1575 # type: () -> command_t
1576 """
1577 TODO: Update the grammar
1578
1579 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1580 | For '((' ... TODO
1581 """
1582 ate = self._Eat(Id.KW_For)
1583 for_kw = word_.AsKeywordToken(ate)
1584
1585 self._GetWord()
1586 if self.c_id == Id.Op_DLeftParen:
1587 if not self.parse_opts.parse_dparen():
1588 p_die("Bash for loops aren't allowed (parse_dparen)",
1589 loc.Word(self.cur_word))
1590
1591 # for (( i = 0; i < 10; i++)
1592 n1 = self._ParseForExprLoop(for_kw)
1593 return self._MaybeParseRedirectList(n1)
1594 else:
1595 # for x in a b; do echo hi; done
1596 n2 = self._ParseForEachLoop(for_kw)
1597 return self._MaybeParseRedirectList(n2)
1598
1599 def _ParseConditionList(self):
1600 # type: () -> condition_t
1601 """
1602 condition_list: command_list
1603
1604 This is a helper to parse a condition list for if commands and while/until
1605 loops. It will throw a parse error if there are no conditions in the list.
1606 """
1607 self.allow_block = False
1608 commands = self._ParseCommandList()
1609 self.allow_block = True
1610
1611 if len(commands.children) == 0:
1612 p_die("Expected a condition", loc.Word(self.cur_word))
1613
1614 return condition.Shell(commands.children)
1615
1616 def ParseWhileUntil(self, keyword):
1617 # type: (Token) -> command.WhileUntil
1618 """
1619 while_clause : While command_list do_group ;
1620 until_clause : Until command_list do_group ;
1621 """
1622 self._SetNext() # skip keyword
1623
1624 if (self.parse_opts.parse_paren() and
1625 self.w_parser.LookPastSpace() == Id.Op_LParen):
1626 enode = self.w_parser.ParseYshExprForCommand()
1627 cond = condition.YshExpr(enode) # type: condition_t
1628 else:
1629 cond = self._ParseConditionList()
1630
1631 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1632 # should be unchanged. To be sure we should desugar.
1633 self._GetWord()
1634 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1635 # while test -f foo {
1636 body_node = self.ParseBraceGroup() # type: command_t
1637 else:
1638 body_node = self.ParseDoGroup()
1639
1640 return command.WhileUntil(keyword, cond, body_node)
1641
1642 def ParseCaseArm(self):
1643 # type: () -> CaseArm
1644 """
1645 case_item: '('? pattern ('|' pattern)* ')'
1646 newline_ok command_term? trailer? ;
1647
1648 Looking at '(' or pattern
1649 """
1650 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1651
1652 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1653
1654 if self.c_id == Id.Op_LParen: # Optional (
1655 self._SetNext()
1656
1657 pat_words = [] # type: List[word_t]
1658 while True:
1659 self._GetWord()
1660 if self.c_kind != Kind.Word:
1661 p_die('Expected case pattern', loc.Word(self.cur_word))
1662 pat_words.append(self.cur_word)
1663 self._SetNext()
1664
1665 self._GetWord()
1666 if self.c_id == Id.Op_Pipe:
1667 self._SetNext()
1668 else:
1669 break
1670
1671 ate = self._Eat(Id.Right_CasePat)
1672 middle_tok = word_.AsOperatorToken(ate)
1673
1674 self._NewlineOk()
1675
1676 self._GetWord()
1677 if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1678 Id.KW_Esac):
1679 c_list = self._ParseCommandTerm()
1680 action_children = c_list.children
1681 else:
1682 action_children = []
1683
1684 dsemi_tok = None # type: Token
1685 self._GetWord()
1686 if self.c_id == Id.KW_Esac: # missing last ;;
1687 pass
1688 elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1689 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1690 self._SetNext()
1691 else:
1692 # Happens on EOF
1693 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1694
1695 self._NewlineOk()
1696
1697 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1698 action_children, dsemi_tok)
1699
1700 def ParseYshCaseArm(self, discriminant):
1701 # type: (Id_t) -> CaseArm
1702 """
1703 case_item : pattern newline_ok brace_group newline_ok
1704 pattern : pat_words
1705 | pat_exprs
1706 | pat_eggex
1707 | pat_else
1708 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1709 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1710 pat_word : WORD
1711 pat_eggex : '/' oil_eggex '/'
1712 pat_expr : '(' oil_expr ')'
1713 pat_else : '(' Id.KW_Else ')'
1714
1715 Looking at: 'pattern'
1716
1717 Note that the trailing `newline_ok` in `case_item` is handled by
1718 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1719 the next "discriminant" for the next token, so it makes more sense to
1720 handle it there.
1721 """
1722 left_tok = None # type: Token
1723 pattern = None # type: pat_t
1724
1725 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1726 # pat_exprs, pat_else or pat_eggex
1727 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1728 else:
1729 # pat_words
1730 pat_words = [] # type: List[word_t]
1731 while True:
1732 self._GetWord()
1733 if self.c_kind != Kind.Word:
1734 p_die('Expected case pattern', loc.Word(self.cur_word))
1735 pat_words.append(self.cur_word)
1736 self._SetNext()
1737
1738 if not left_tok:
1739 left_tok = location.LeftTokenForWord(self.cur_word)
1740
1741 self._NewlineOk()
1742
1743 self._GetWord()
1744 if self.c_id == Id.Op_Pipe:
1745 self._SetNext()
1746 self._NewlineOk()
1747 else:
1748 break
1749 pattern = pat.Words(pat_words)
1750
1751 self._NewlineOk()
1752 action = self.ParseBraceGroup()
1753
1754 # The left token of the action is our "middle" token
1755 return CaseArm(left_tok, pattern, action.left, action.children,
1756 action.right)
1757
1758 def ParseYshCase(self, case_kw):
1759 # type: (Token) -> command.Case
1760 """
1761 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1762
1763 Looking at: token after 'case'
1764 """
1765 enode = self.w_parser.ParseYshExprForCommand()
1766 to_match = case_arg.YshExpr(enode)
1767
1768 ate = self._Eat(Id.Lit_LBrace)
1769 arms_start = word_.BraceToken(ate)
1770
1771 discriminant = self.w_parser.NewlineOkForYshCase()
1772
1773 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1774 arms = [] # type: List[CaseArm]
1775 while discriminant != Id.Op_RBrace:
1776 arm = self.ParseYshCaseArm(discriminant)
1777 arms.append(arm)
1778
1779 discriminant = self.w_parser.NewlineOkForYshCase()
1780
1781 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1782 # token is read as an Id.Op_RBrace, but we need to store this as a
1783 # Id.Lit_RBrace.
1784 ate = self._Eat(Id.Op_RBrace)
1785 arms_end = word_.AsOperatorToken(ate)
1786 arms_end.id = Id.Lit_RBrace
1787
1788 return command.Case(case_kw, to_match, arms_start, arms, arms_end)
1789
1790 def ParseOldCase(self, case_kw):
1791 # type: (Token) -> command.Case
1792 """
1793 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1794
1795 -> Looking at WORD
1796
1797 FYI original POSIX case list, which takes pains for DSEMI
1798
1799 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1800 """
1801 self._GetWord()
1802 w = self.cur_word
1803 if not self.parse_opts.parse_bare_word():
1804 ok, s, quoted = word_.StaticEval(w)
1805 if ok and not quoted:
1806 p_die(
1807 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1808 loc.Word(w))
1809
1810 if w.tag() != word_e.Compound:
1811 p_die("Expected a word to match against", loc.Word(w))
1812
1813 to_match = case_arg.Word(w)
1814 self._SetNext() # past WORD
1815
1816 self._NewlineOk()
1817
1818 ate = self._Eat(Id.KW_In)
1819 arms_start = word_.AsKeywordToken(ate)
1820
1821 self._NewlineOk()
1822
1823 arms = [] # type: List[CaseArm]
1824 while True:
1825 self._GetWord()
1826 if self.c_id == Id.KW_Esac:
1827 break
1828 # case arm should begin with a pattern word or (
1829 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1830 break
1831
1832 arm = self.ParseCaseArm()
1833 arms.append(arm)
1834
1835 ate = self._Eat(Id.KW_Esac)
1836 arms_end = word_.AsKeywordToken(ate)
1837
1838 # no redirects yet
1839 return command.Case(case_kw, to_match, arms_start, arms, arms_end)
1840
1841 def ParseCase(self):
1842 # type: () -> command.Case
1843 """
1844 case_clause : old_case # from POSIX
1845 | ysh_case
1846 ;
1847
1848 Looking at 'Case'
1849 """
1850 case_kw = word_.AsKeywordToken(self.cur_word)
1851 self._SetNext() # past 'case'
1852
1853 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1854 return self.ParseYshCase(case_kw)
1855 else:
1856 return self.ParseOldCase(case_kw)
1857
1858 def _ParseYshElifElse(self, if_node):
1859 # type: (command.If) -> None
1860 """If test -f foo { echo foo.
1861
1862 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1863 bar } else { echo none }
1864 """
1865 arms = if_node.arms
1866
1867 while self.c_id == Id.KW_Elif:
1868 elif_kw = word_.AsKeywordToken(self.cur_word)
1869 self._SetNext() # skip elif
1870 if (self.parse_opts.parse_paren() and
1871 self.w_parser.LookPastSpace() == Id.Op_LParen):
1872 enode = self.w_parser.ParseYshExprForCommand()
1873 cond = condition.YshExpr(enode) # type: condition_t
1874 else:
1875 self.allow_block = False
1876 commands = self._ParseCommandList()
1877 self.allow_block = True
1878 cond = condition.Shell(commands.children)
1879
1880 body = self.ParseBraceGroup()
1881 self._GetWord()
1882
1883 arm = IfArm(elif_kw, cond, None, body.children, None)
1884 arms.append(arm)
1885
1886 self._GetWord()
1887 if self.c_id == Id.KW_Else:
1888 self._SetNext()
1889 body = self.ParseBraceGroup()
1890 if_node.else_action = body.children
1891
1892 def _ParseYshIf(self, if_kw, cond):
1893 # type: (Token, condition_t) -> command.If
1894 """
1895 if test -f foo {
1896 # ^ we parsed up to here
1897 echo foo
1898 } elif test -f bar; test -f spam {
1899 echo bar
1900 } else {
1901 echo none
1902 }
1903 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1904 the error is confusing because it doesn't point to the right place.
1905
1906 I think we might need strict_brace so that foo{ is disallowed. It has to
1907 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1908 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1909 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1910 """
1911 if_node = command.If.CreateNull(alloc_lists=True)
1912 if_node.if_kw = if_kw
1913
1914 body1 = self.ParseBraceGroup()
1915 # Every arm has 1 spid, unlike shell-style
1916 # TODO: We could get the spids from the brace group.
1917 arm = IfArm(if_kw, cond, None, body1.children, None)
1918
1919 if_node.arms.append(arm)
1920
1921 self._GetWord()
1922 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1923 self._ParseYshElifElse(if_node)
1924 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1925 # spid because that's in the BraceGroup.
1926 return if_node
1927
1928 def _ParseElifElse(self, if_node):
1929 # type: (command.If) -> None
1930 """
1931 else_part: (Elif command_list Then command_list)* Else command_list ;
1932 """
1933 arms = if_node.arms
1934
1935 self._GetWord()
1936 while self.c_id == Id.KW_Elif:
1937 elif_kw = word_.AsKeywordToken(self.cur_word)
1938 self._SetNext() # past 'elif'
1939
1940 cond = self._ParseConditionList()
1941
1942 ate = self._Eat(Id.KW_Then)
1943 then_kw = word_.AsKeywordToken(ate)
1944
1945 body = self._ParseCommandList()
1946 arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1947
1948 arms.append(arm)
1949
1950 self._GetWord()
1951 if self.c_id == Id.KW_Else:
1952 else_kw = word_.AsKeywordToken(self.cur_word)
1953 self._SetNext() # past 'else'
1954 body = self._ParseCommandList()
1955 if_node.else_action = body.children
1956 else:
1957 else_kw = None
1958
1959 if_node.else_kw = else_kw
1960
1961 def ParseIf(self):
1962 # type: () -> command.If
1963 """
1964 if_clause : If command_list Then command_list else_part? Fi ;
1965
1966 open : '{' | Then
1967 close : '}' | Fi
1968
1969 ysh_if : If ( command_list | '(' expr ')' )
1970 open command_list else_part? close;
1971
1972 There are 2 conditionals here: parse_paren, then parse_brace
1973 """
1974 if_node = command.If.CreateNull(alloc_lists=True)
1975 if_kw = word_.AsKeywordToken(self.cur_word)
1976 if_node.if_kw = if_kw
1977 self._SetNext() # past 'if'
1978
1979 if (self.parse_opts.parse_paren() and
1980 self.w_parser.LookPastSpace() == Id.Op_LParen):
1981 # if (x + 1)
1982 enode = self.w_parser.ParseYshExprForCommand()
1983 cond = condition.YshExpr(enode) # type: condition_t
1984 else:
1985 # if echo 1; echo 2; then
1986 # Remove ambiguity with if cd / {
1987 cond = self._ParseConditionList()
1988
1989 self._GetWord()
1990 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1991 return self._ParseYshIf(if_kw, cond)
1992
1993 ate = self._Eat(Id.KW_Then)
1994 then_kw = word_.AsKeywordToken(ate)
1995
1996 body = self._ParseCommandList()
1997
1998 # First arm
1999 arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
2000 if_node.arms.append(arm)
2001
2002 # 2nd to Nth arm
2003 if self.c_id in (Id.KW_Elif, Id.KW_Else):
2004 self._ParseElifElse(if_node)
2005
2006 ate = self._Eat(Id.KW_Fi)
2007 if_node.fi_kw = word_.AsKeywordToken(ate)
2008
2009 return if_node
2010
2011 def ParseTime(self):
2012 # type: () -> command_t
2013 """Time [-p] pipeline.
2014
2015 According to bash help.
2016 """
2017 time_kw = word_.AsKeywordToken(self.cur_word)
2018 self._SetNext() # skip time
2019 pipeline = self.ParsePipeline()
2020 return command.TimeBlock(time_kw, pipeline)
2021
2022 def ParseCompoundCommand(self):
2023 # type: () -> command_t
2024 """
2025 Refactoring: we put io_redirect* here instead of in function_body and
2026 command.
2027
2028 compound_command : brace_group io_redirect*
2029 | subshell io_redirect*
2030 | for_clause io_redirect*
2031 | while_clause io_redirect*
2032 | until_clause io_redirect*
2033 | if_clause io_redirect*
2034 | case_clause io_redirect*
2035
2036 # bash extensions
2037 | time_clause
2038 | [[ BoolExpr ]]
2039 | (( ArithExpr ))
2040 """
2041 self._GetWord()
2042 if self.c_id == Id.Lit_LBrace:
2043 n1 = self.ParseBraceGroup()
2044 return self._MaybeParseRedirectList(n1)
2045 if self.c_id == Id.Op_LParen:
2046 n2 = self.ParseSubshell()
2047 return self._MaybeParseRedirectList(n2)
2048
2049 if self.c_id == Id.KW_For:
2050 # Note: Redirects parsed in this call. POSIX for and bash for ((
2051 # have different nodetypes.
2052 return self.ParseFor()
2053 if self.c_id in (Id.KW_While, Id.KW_Until):
2054 keyword = word_.AsKeywordToken(self.cur_word)
2055 n3 = self.ParseWhileUntil(keyword)
2056 return self._MaybeParseRedirectList(n3)
2057
2058 if self.c_id == Id.KW_If:
2059 n4 = self.ParseIf()
2060 return self._MaybeParseRedirectList(n4)
2061
2062 if self.c_id == Id.KW_Case:
2063 n5 = self.ParseCase()
2064 return self._MaybeParseRedirectList(n5)
2065
2066 if self.c_id == Id.KW_DLeftBracket:
2067 if not self.parse_opts.parse_dbracket():
2068 p_die('Bash [[ not allowed in YSH (parse_dbracket)',
2069 loc.Word(self.cur_word))
2070 n6 = self.ParseDBracket()
2071 return self._MaybeParseRedirectList(n6)
2072 if self.c_id == Id.Op_DLeftParen:
2073 if not self.parse_opts.parse_dparen():
2074 p_die(
2075 'Bash (( not allowed in YSH (parse_dparen, see OILS-ERR-14 for wart)',
2076 loc.Word(self.cur_word))
2077 n7 = self.ParseDParen()
2078 return self._MaybeParseRedirectList(n7)
2079
2080 # bash extensions: no redirects
2081 if self.c_id == Id.KW_Time:
2082 return self.ParseTime()
2083
2084 # Happens in function body, e.g. myfunc() oops
2085 p_die(
2086 'Unexpected word while parsing compound command (%s)' %
2087 Id_str(self.c_id), loc.Word(self.cur_word))
2088 assert False # for MyPy
2089
2090 def ParseFunctionDef(self):
2091 # type: () -> command.ShFunction
2092 """
2093 function_header : fname '(' ')'
2094 function_def : function_header newline_ok function_body ;
2095
2096 Precondition: Looking at the function name.
2097
2098 NOTE: There is an ambiguity with:
2099
2100 function foo ( echo hi ) and
2101 function foo () ( echo hi )
2102
2103 Bash only accepts the latter, though it doesn't really follow a grammar.
2104 """
2105 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2106 name = word_.ShFunctionName(word0)
2107 if len(name) == 0: # example: foo$x is invalid
2108 p_die('Invalid function name', loc.Word(word0))
2109
2110 part0 = word0.parts[0]
2111 # If we got a non-empty string from ShFunctionName, this should be true.
2112 assert part0.tag() == word_part_e.Literal
2113 blame_tok = cast(Token, part0) # for ctx_VarChecker
2114
2115 self._SetNext() # move past function name
2116
2117 # Must be true because of lookahead
2118 self._GetWord()
2119 assert self.c_id == Id.Op_LParen, self.cur_word
2120
2121 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2122 self._SetNext()
2123
2124 self._GetWord()
2125 if self.c_id == Id.Right_ShFunction:
2126 # 'f ()' implies a function definition, since invoking it with no args
2127 # would just be 'f'
2128 self._SetNext()
2129
2130 self._NewlineOk()
2131
2132 func = command.ShFunction.CreateNull()
2133 func.name = name
2134 with ctx_VarChecker(self.var_checker, blame_tok):
2135 func.body = self.ParseCompoundCommand()
2136
2137 func.name_tok = location.LeftTokenForCompoundWord(word0)
2138 return func
2139 else:
2140 p_die('Expected ) in function definition', loc.Word(self.cur_word))
2141 return None
2142
2143 def ParseKshFunctionDef(self):
2144 # type: () -> command.ShFunction
2145 """
2146 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2147 """
2148 keyword_tok = word_.AsKeywordToken(self.cur_word)
2149
2150 self._SetNext() # skip past 'function'
2151 self._GetWord()
2152
2153 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2154 name = word_.ShFunctionName(cur_word)
2155 if len(name) == 0: # example: foo$x is invalid
2156 p_die('Invalid KSH-style function name', loc.Word(cur_word))
2157
2158 name_word = self.cur_word
2159 self._SetNext() # skip past 'function name
2160
2161 self._GetWord()
2162 if self.c_id == Id.Op_LParen:
2163 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2164 self._SetNext()
2165 self._Eat(Id.Right_ShFunction)
2166
2167 self._NewlineOk()
2168
2169 func = command.ShFunction.CreateNull()
2170 func.name = name
2171 with ctx_VarChecker(self.var_checker, keyword_tok):
2172 func.body = self.ParseCompoundCommand()
2173
2174 func.keyword = keyword_tok
2175 func.name_tok = location.LeftTokenForWord(name_word)
2176 return func
2177
2178 def ParseYshProc(self):
2179 # type: () -> Proc
2180 node = Proc.CreateNull(alloc_lists=True)
2181
2182 keyword_tok = word_.AsKeywordToken(self.cur_word)
2183 node.keyword = keyword_tok
2184
2185 with ctx_VarChecker(self.var_checker, keyword_tok):
2186 with ctx_CmdMode(self, cmd_mode_e.Proc):
2187 self.w_parser.ParseProc(node)
2188 if node.sig.tag() == proc_sig_e.Closed: # Register params
2189 sig = cast(proc_sig.Closed, node.sig)
2190
2191 # Treat 3 kinds of params as variables.
2192 wp = sig.word
2193 if wp:
2194 for param in wp.params:
2195 self.var_checker.Check(Id.KW_Var, param.name,
2196 param.blame_tok)
2197 if wp.rest_of:
2198 r = wp.rest_of
2199 self.var_checker.Check(Id.KW_Var, r.name,
2200 r.blame_tok)
2201 # We COULD register __out here but it would require a different API.
2202 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2203 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2204
2205 posit = sig.positional
2206 if posit:
2207 for param in posit.params:
2208 self.var_checker.Check(Id.KW_Var, param.name,
2209 param.blame_tok)
2210 if posit.rest_of:
2211 r = posit.rest_of
2212 self.var_checker.Check(Id.KW_Var, r.name,
2213 r.blame_tok)
2214
2215 named = sig.named
2216 if named:
2217 for param in named.params:
2218 self.var_checker.Check(Id.KW_Var, param.name,
2219 param.blame_tok)
2220 if named.rest_of:
2221 r = named.rest_of
2222 self.var_checker.Check(Id.KW_Var, r.name,
2223 r.blame_tok)
2224
2225 if sig.block_param:
2226 b = sig.block_param
2227 self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2228
2229 self._SetNext()
2230 node.body = self.ParseBraceGroup()
2231 # No redirects for YSH procs (only at call site)
2232
2233 return node
2234
2235 def ParseYshFunc(self):
2236 # type: () -> Func
2237 """
2238 ysh_func: (
2239 Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2240 )
2241 Looking at KW_Func
2242 """
2243 node = Func.CreateNull(alloc_lists=True)
2244
2245 keyword_tok = word_.AsKeywordToken(self.cur_word)
2246 node.keyword = keyword_tok
2247
2248 with ctx_VarChecker(self.var_checker, keyword_tok):
2249 self.w_parser.ParseFunc(node)
2250
2251 posit = node.positional
2252 if posit:
2253 for param in posit.params:
2254 self.var_checker.Check(Id.KW_Var, param.name,
2255 param.blame_tok)
2256 if posit.rest_of:
2257 r = posit.rest_of
2258 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2259
2260 named = node.named
2261 if named:
2262 for param in named.params:
2263 self.var_checker.Check(Id.KW_Var, param.name,
2264 param.blame_tok)
2265 if named.rest_of:
2266 r = named.rest_of
2267 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2268
2269 self._SetNext()
2270 with ctx_CmdMode(self, cmd_mode_e.Func):
2271 node.body = self.ParseBraceGroup()
2272
2273 return node
2274
2275 def ParseCoproc(self):
2276 # type: () -> command_t
2277 """
2278 TODO: command.Coproc?
2279 """
2280 raise NotImplementedError()
2281
2282 def ParseSubshell(self):
2283 # type: () -> command.Subshell
2284 """
2285 subshell : '(' compound_list ')'
2286
2287 Looking at Op_LParen
2288 """
2289 left = word_.AsOperatorToken(self.cur_word)
2290 self._SetNext() # skip past (
2291
2292 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2293 # translation stack, we want to delay it.
2294
2295 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2296
2297 c_list = self._ParseCommandList()
2298 if len(c_list.children) == 1:
2299 child = c_list.children[0]
2300 else:
2301 child = c_list
2302
2303 ate = self._Eat(Id.Right_Subshell)
2304 right = word_.AsOperatorToken(ate)
2305
2306 return command.Subshell(left, child, right)
2307
2308 def ParseDBracket(self):
2309 # type: () -> command.DBracket
2310 """Pass the underlying word parser off to the boolean expression
2311 parser."""
2312 left = word_.AsKeywordToken(self.cur_word)
2313 # TODO: Test interactive. Without closing ]], you should get > prompt
2314 # (PS2)
2315
2316 self._SetNext() # skip [[
2317 b_parser = bool_parse.BoolParser(self.w_parser)
2318 bnode, right = b_parser.Parse() # May raise
2319 return command.DBracket(left, bnode, right)
2320
2321 def ParseDParen(self):
2322 # type: () -> command.DParen
2323 left = word_.AsOperatorToken(self.cur_word)
2324
2325 self._SetNext() # skip ((
2326 anode, right = self.w_parser.ReadDParen()
2327 assert anode is not None
2328
2329 return command.DParen(left, anode, right)
2330
2331 def ParseCommand(self):
2332 # type: () -> command_t
2333 """
2334 command : simple_command
2335 | compound_command # OSH edit: io_redirect* folded in
2336 | function_def
2337 | ksh_function_def
2338
2339 # YSH extensions
2340 | proc NAME ...
2341 | typed proc NAME ...
2342 | func NAME ...
2343 | const ...
2344 | var ...
2345 | setglobal ...
2346 | setref ...
2347 | setvar ...
2348 | call EXPR
2349 | = EXPR
2350 ;
2351
2352 Note: the reason const / var are not part of compound_command is because
2353 they can't be alone in a shell function body.
2354
2355 Example:
2356 This is valid shell f() if true; then echo hi; fi
2357 This is invalid f() var x = 1
2358 """
2359 if self._AtSecondaryKeyword():
2360 p_die('Unexpected word when parsing command',
2361 loc.Word(self.cur_word))
2362
2363 # YSH Extensions
2364
2365 if self.c_id == Id.KW_Proc: # proc p { ... }
2366 # proc is hidden because of the 'local reasoning' principle. Code
2367 # inside procs should be YSH, full stop. That means ysh:upgrade is
2368 # on.
2369 if self.parse_opts.parse_proc():
2370 return self.ParseYshProc()
2371 else:
2372 # 2024-02: This avoids bad syntax errors if you type YSH code
2373 # into OSH
2374 # proc p (x) { echo hi } would actually be parsed as a
2375 # command.Simple! Shell compatibility: quote 'proc'
2376 p_die("proc is a YSH keyword, but this is OSH.",
2377 loc.Word(self.cur_word))
2378
2379 if self.c_id == Id.KW_Typed: # typed proc p () { ... }
2380 self._SetNext()
2381 self._GetWord()
2382 if self.c_id != Id.KW_Proc:
2383 p_die("Expected 'proc' after 'typed'", loc.Word(self.cur_word))
2384
2385 if self.parse_opts.parse_proc():
2386 return self.ParseYshProc()
2387 else:
2388 p_die("typed is a YSH keyword, but this is OSH.",
2389 loc.Word(self.cur_word))
2390
2391 if self.c_id == Id.KW_Func: # func f(x) { ... }
2392 if self.parse_opts.parse_func():
2393 return self.ParseYshFunc()
2394 else:
2395 # Same reasoning as above, for 'proc'
2396 p_die("func is a YSH keyword, but this is OSH.",
2397 loc.Word(self.cur_word))
2398
2399 if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2400 p_die("const can't be inside proc or func. Use var instead.",
2401 loc.Word(self.cur_word))
2402
2403 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2404 keyword_id = self.c_id
2405 kw_token = word_.LiteralToken(self.cur_word)
2406 self._SetNext()
2407 n8 = self.w_parser.ParseVarDecl(kw_token)
2408 for lhs in n8.lhs:
2409 self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2410 return n8
2411
2412 if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2413 kw_token = word_.LiteralToken(self.cur_word)
2414 self._SetNext()
2415 n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2416 return n9
2417
2418 if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2419 # = 42 + a[i]
2420 # call mylist->append('x')
2421
2422 keyword = word_.LiteralToken(self.cur_word)
2423 assert keyword is not None
2424 self._SetNext()
2425 enode = self.w_parser.ParseCommandExpr()
2426 return command.Expr(keyword, enode)
2427
2428 if self.c_id == Id.KW_Function:
2429 return self.ParseKshFunctionDef()
2430
2431 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2432 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2433 Id.KW_If, Id.KW_Case, Id.KW_Time):
2434 return self.ParseCompoundCommand()
2435
2436 # Syntax error for '}' starting a line, which all shells disallow.
2437 if self.c_id == Id.Lit_RBrace:
2438 p_die('Unexpected right brace', loc.Word(self.cur_word))
2439
2440 if self.c_kind == Kind.Redir: # Leading redirect
2441 return self.ParseSimpleCommand()
2442
2443 if self.c_kind == Kind.Word:
2444 # ensured by Kind.Word
2445 cur_word = cast(CompoundWord, self.cur_word)
2446
2447 # NOTE: At the top level, only Token and Compound are possible.
2448 # Can this be modelled better in the type system, removing asserts?
2449 #
2450 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2451 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2452 # That requires 2 tokens of lookahead, which we don't have
2453 #
2454 # Or maybe we don't just have ParseSimpleCommand -- we will have
2455 # ParseYshCommand or something
2456
2457 if (self.w_parser.LookAheadFuncParens() and
2458 not word_.IsVarLike(cur_word)):
2459 return self.ParseFunctionDef() # f() { echo; } # function
2460
2461 # Parse x = 1+2*3 when inside HayNode { } blocks
2462 parts = cur_word.parts
2463 if self.parse_opts.parse_equals() and len(parts) == 1:
2464 part0 = parts[0]
2465 if part0.tag() == word_part_e.Literal:
2466 tok = cast(Token, part0)
2467 if (tok.id == Id.Lit_Chars and
2468 self.w_parser.LookPastSpace() == Id.Lit_Equals and
2469 match.IsValidVarName(lexer.LazyStr(tok))):
2470
2471 if (len(self.hay_attrs_stack) and
2472 self.hay_attrs_stack[-1]):
2473 # Note: no static var_checker.Check() for bare assignment
2474 enode = self.w_parser.ParseBareDecl()
2475 self._SetNext() # Somehow this is necessary
2476 # TODO: Use BareDecl here. Well, do that when we
2477 # treat it as const or lazy.
2478 return command.VarDecl(
2479 None,
2480 [NameType(tok, lexer.TokenVal(tok), None)],
2481 enode)
2482 else:
2483 self._SetNext()
2484 self._GetWord()
2485 p_die(
2486 'Unexpected = (Hint: use var/setvar, or quote it)',
2487 loc.Word(self.cur_word))
2488
2489 # echo foo
2490 # f=(a b c) # array
2491 # array[1+2]+=1
2492 return self.ParseSimpleCommand()
2493
2494 if self.c_kind == Kind.Eof:
2495 p_die("Unexpected EOF while parsing command",
2496 loc.Word(self.cur_word))
2497
2498 # NOTE: This only happens in batch mode in the second turn of the loop!
2499 # e.g. )
2500 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2501
2502 assert False # for MyPy
2503
2504 def ParsePipeline(self):
2505 # type: () -> command_t
2506 """
2507 pipeline : Bang? command ( '|' newline_ok command )* ;
2508 """
2509 negated = None # type: Optional[Token]
2510
2511 self._GetWord()
2512 if self.c_id == Id.KW_Bang:
2513 negated = word_.AsKeywordToken(self.cur_word)
2514 self._SetNext()
2515
2516 child = self.ParseCommand()
2517 assert child is not None
2518
2519 children = [child]
2520
2521 self._GetWord()
2522 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2523 if negated is not None:
2524 node = command.Pipeline(negated, children, [])
2525 return node
2526 else:
2527 return child # no pipeline
2528
2529 # | or |&
2530 ops = [] # type: List[Token]
2531 while True:
2532 op = word_.AsOperatorToken(self.cur_word)
2533 ops.append(op)
2534
2535 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2536 self._NewlineOk()
2537
2538 child = self.ParseCommand()
2539 children.append(child)
2540
2541 self._GetWord()
2542 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2543 break
2544
2545 return command.Pipeline(negated, children, ops)
2546
2547 def ParseAndOr(self):
2548 # type: () -> command_t
2549 self._GetWord()
2550 if self.c_id == Id.Lit_TDot:
2551 # We got '...', so parse in multiline mode
2552 self._SetNext()
2553 with word_.ctx_Multiline(self.w_parser):
2554 return self._ParseAndOr()
2555
2556 # Parse in normal mode, not multiline
2557 return self._ParseAndOr()
2558
2559 def _ParseAndOr(self):
2560 # type: () -> command_t
2561 """
2562 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2563 | pipeline
2564
2565 Note that it is left recursive and left associative. We parse it
2566 iteratively with a token of lookahead.
2567 """
2568 child = self.ParsePipeline()
2569 assert child is not None
2570
2571 self._GetWord()
2572 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2573 return child
2574
2575 ops = [] # type: List[Token]
2576 children = [child]
2577
2578 while True:
2579 ops.append(word_.AsOperatorToken(self.cur_word))
2580
2581 self._SetNext() # skip past || &&
2582 self._NewlineOk()
2583
2584 child = self.ParsePipeline()
2585 children.append(child)
2586
2587 self._GetWord()
2588 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2589 break
2590
2591 return command.AndOr(children, ops)
2592
2593 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2594
2595 # At the top level, we execute after every line, e.g. to
2596 # - process alias (a form of dynamic parsing)
2597 # - process 'exit', because invalid syntax might appear after it
2598
2599 # On the other hand, for a while loop body, we parse the whole thing at once,
2600 # and then execute it. We don't want to parse it over and over again!
2601
2602 # COMPARE
2603 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2604 # command_term : and_or (trailer and_or)* ; # CHILDREN
2605
2606 def _ParseCommandLine(self):
2607 # type: () -> command_t
2608 """
2609 command_line : and_or (sync_op and_or)* trailer? ;
2610 trailer : sync_op newline_ok
2611 | NEWLINES;
2612 sync_op : '&' | ';';
2613
2614 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2615 there is another command word after the sync op.
2616
2617 But it's easier to express imperatively. Do the following in a loop:
2618 1. ParseAndOr
2619 2. Peek.
2620 a. If there's a newline, then return. (We're only parsing a single
2621 line.)
2622 b. If there's a sync_op, process it. Then look for a newline and
2623 return. Otherwise, parse another AndOr.
2624 """
2625 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2626 # I don't think we should add anything else here; otherwise it will be
2627 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2628 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2629
2630 children = [] # type: List[command_t]
2631 done = False
2632 while not done:
2633 child = self.ParseAndOr()
2634
2635 self._GetWord()
2636 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2637 tok = cast(Token, self.cur_word) # for MyPy
2638 child = command.Sentence(child, tok)
2639 self._SetNext()
2640
2641 self._GetWord()
2642 if self.c_id in END_LIST:
2643 done = True
2644
2645 elif self.c_id in END_LIST:
2646 done = True
2647
2648 else:
2649 # e.g. echo a(b)
2650 p_die(
2651 'Invalid word while parsing command line (%s)' %
2652 Id_str(self.c_id), loc.Word(self.cur_word))
2653
2654 children.append(child)
2655
2656 # Simplify the AST.
2657 if len(children) > 1:
2658 return command.CommandList(children)
2659 else:
2660 return children[0]
2661
2662 def _ParseCommandTerm(self):
2663 # type: () -> command.CommandList
2664 """"
2665 command_term : and_or (trailer and_or)* ;
2666 trailer : sync_op newline_ok
2667 | NEWLINES;
2668 sync_op : '&' | ';';
2669
2670 This is handled in imperative style, like _ParseCommandLine.
2671 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2672 which is slightly different. (HOW? Is it the DSEMI?)
2673
2674 Returns:
2675 syntax_asdl.command
2676 """
2677 # Token types that will end the command term.
2678 END_LIST = [
2679 self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2680 Id.Op_SemiAmp, Id.Op_DSemiAmp
2681 ]
2682
2683 # NOTE: This is similar to _ParseCommandLine.
2684 #
2685 # - Why aren't we doing END_LIST in _ParseCommandLine?
2686 # - Because you will never be inside $() at the top level.
2687 # - We also know it will end in a newline. It can't end in "fi"!
2688 # - example: if true; then { echo hi; } fi
2689
2690 children = [] # type: List[command_t]
2691 done = False
2692 while not done:
2693 # Most keywords are valid "first words". But do/done/then do not BEGIN
2694 # commands, so they are not valid.
2695 if self._AtSecondaryKeyword():
2696 break
2697
2698 child = self.ParseAndOr()
2699
2700 self._GetWord()
2701 if self.c_id == Id.Op_Newline:
2702 self._SetNext()
2703
2704 self._GetWord()
2705 if self.c_id in END_LIST:
2706 done = True
2707
2708 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2709 tok = cast(Token, self.cur_word) # for MyPy
2710 child = command.Sentence(child, tok)
2711 self._SetNext()
2712
2713 self._GetWord()
2714 if self.c_id == Id.Op_Newline:
2715 self._SetNext() # skip over newline
2716
2717 # Test if we should keep going. There might be another command after
2718 # the semi and newline.
2719 self._GetWord()
2720 if self.c_id in END_LIST: # \n EOF
2721 done = True
2722
2723 elif self.c_id in END_LIST: # ; EOF
2724 done = True
2725
2726 elif self.c_id in END_LIST: # EOF
2727 done = True
2728
2729 # For if test -f foo; test -f bar {
2730 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2731 done = True
2732
2733 elif self.c_kind != Kind.Word:
2734 # e.g. f() { echo (( x )) ; }
2735 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2736
2737 #log("Invalid %s", self.cur_word)
2738 p_die("Invalid word while parsing command list",
2739 loc.Word(self.cur_word))
2740
2741 children.append(child)
2742
2743 return command.CommandList(children)
2744
2745 def _ParseCommandList(self):
2746 # type: () -> command.CommandList
2747 """
2748 command_list : newline_ok command_term trailer? ;
2749
2750 This one is called by all the compound commands. It's basically a command
2751 block.
2752
2753 NOTE: Rather than translating the CFG directly, the code follows a style
2754 more like this: more like this: (and_or trailer)+. It makes capture
2755 easier.
2756 """
2757 self._NewlineOk()
2758 return self._ParseCommandTerm()
2759
2760 def ParseLogicalLine(self):
2761 # type: () -> command_t
2762 """Parse a single line for main_loop.
2763
2764 A wrapper around _ParseCommandLine(). Similar but not identical to
2765 _ParseCommandList() and ParseCommandSub().
2766
2767 Raises:
2768 ParseError
2769 """
2770 self._NewlineOk()
2771 self._GetWord()
2772 if self.c_id == Id.Eof_Real:
2773 return None # main loop checks for here docs
2774 node = self._ParseCommandLine()
2775 return node
2776
2777 def ParseInteractiveLine(self):
2778 # type: () -> parse_result_t
2779 """Parse a single line for Interactive main_loop.
2780
2781 Different from ParseLogicalLine because newlines are handled differently.
2782
2783 Raises:
2784 ParseError
2785 """
2786 self._GetWord()
2787 if self.c_id == Id.Op_Newline:
2788 return parse_result.EmptyLine
2789 if self.c_id == Id.Eof_Real:
2790 return parse_result.Eof
2791
2792 node = self._ParseCommandLine()
2793 return parse_result.Node(node)
2794
2795 def ParseCommandSub(self):
2796 # type: () -> command_t
2797 """Parse $(echo hi) and `echo hi` for word_parse.py.
2798
2799 They can have multiple lines, like this: echo $( echo one echo
2800 two )
2801 """
2802 self._NewlineOk()
2803
2804 self._GetWord()
2805 if self.c_kind == Kind.Eof: # e.g. $()
2806 return command.NoOp
2807
2808 c_list = self._ParseCommandTerm()
2809 if len(c_list.children) == 1:
2810 return c_list.children[0]
2811 else:
2812 return c_list
2813
2814 def CheckForPendingHereDocs(self):
2815 # type: () -> None
2816 # NOTE: This happens when there is no newline at the end of a file, like
2817 # osh -c 'cat <<EOF'
2818 if len(self.pending_here_docs):
2819 node = self.pending_here_docs[0] # Just show the first one?
2820 h = cast(redir_param.HereDoc, node.arg)
2821 p_die('Unterminated here doc began here', loc.Word(h.here_begin))
2822
2823
2824# vim: sw=4