OILS / osh / word_eval.py View on Github | oilshell.org

2460 lines, 1476 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 AssignArg,
37 a_index,
38 a_index_e,
39 VTestPlace,
40 VarSubState,
41 Piece,
42)
43from _devbuild.gen.option_asdl import option_i, builtin_i
44from _devbuild.gen.value_asdl import (
45 value,
46 value_e,
47 value_t,
48 sh_lvalue,
49 sh_lvalue_t,
50)
51from core import error
52from core import pyos
53from core import pyutil
54from core import state
55from core import ui
56from core import util
57from data_lang import j8
58from data_lang import j8_lite
59from core.error import e_die
60from frontend import consts
61from frontend import lexer
62from frontend import location
63from mycpp import mops
64from mycpp.mylib import log, tagswitch, NewDict
65from osh import braces
66from osh import glob_
67from osh import string_ops
68from osh import word_
69from ysh import expr_eval
70from ysh import val_ops
71
72from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
73
74if TYPE_CHECKING:
75 from _devbuild.gen.syntax_asdl import word_part_t
76 from _devbuild.gen.option_asdl import builtin_t
77 from core import optview
78 from core.state import Mem
79 from core.ui import ErrorFormatter
80 from core.vm import _Executor
81 from osh.split import SplitContext
82 from osh import prompt
83 from osh import sh_expr_eval
84
85# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
86QUOTED = 1 << 0
87IS_SUBST = 1 << 1
88
89EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
90EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
91EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
92
93# For EvalWordToString
94QUOTE_FNMATCH = 1 << 5
95QUOTE_ERE = 1 << 6
96
97# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
98# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
99_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
100
101
102def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
103 # type: (str, optview.Exec, bool) -> bool
104 """Return whether we should allow ${a} to mean ${a[0]}."""
105 return (not exec_opts.strict_array() or
106 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
107
108
109def DecayArray(val):
110 # type: (value_t) -> value_t
111 """Resolve ${array} to ${array[0]}."""
112 if val.tag() == value_e.BashArray:
113 array_val = cast(value.BashArray, val)
114 s = array_val.strs[0] if len(array_val.strs) else None
115 elif val.tag() == value_e.BashAssoc:
116 assoc_val = cast(value.BashAssoc, val)
117 s = assoc_val.d['0'] if '0' in assoc_val.d else None
118 else:
119 raise AssertionError(val.tag())
120
121 if s is None:
122 return value.Undef
123 else:
124 return value.Str(s)
125
126
127def GetArrayItem(strs, index):
128 # type: (List[str], int) -> Optional[str]
129
130 n = len(strs)
131 if index < 0:
132 index += n
133
134 if 0 <= index and index < n:
135 # TODO: strs->index() has a redundant check for (i < 0)
136 s = strs[index]
137 # note: s could be None because representation is sparse
138 else:
139 s = None
140 return s
141
142
143def _DetectMetaBuiltinStr(s):
144 # type: (str) -> bool
145 """
146 We need to detect all of these cases:
147
148 builtin local
149 command local
150 builtin builtin local
151 builtin command local
152
153 Fundamentally, assignment builtins have different WORD EVALUATION RULES
154 for a=$x (no word splitting), so it seems hard to do this in
155 meta_osh.Builtin() or meta_osh.Command()
156 """
157 return (consts.LookupNormalBuiltin(s)
158 in (builtin_i.builtin, builtin_i.command))
159
160
161def _DetectMetaBuiltin(val0):
162 # type: (part_value_t) -> bool
163 UP_val0 = val0
164 if val0.tag() == part_value_e.String:
165 val0 = cast(Piece, UP_val0)
166 if not val0.quoted:
167 return _DetectMetaBuiltinStr(val0.s)
168 return False
169
170
171# Use libc to parse NAME, NAME=value, and NAME+=value. We want submatch
172# extraction, but I haven't used that in re2c, and we would need a new kind of
173# binding.
174#
175ASSIGN_ARG_RE = '^([a-zA-Z_][a-zA-Z0-9_]*)((=|\+=)(.*))?$'
176
177# Eggex equivalent:
178#
179# VarName = /
180# [a-z A-Z _ ]
181# [a-z A-Z 0-9 _ ]*
182# /
183#
184# SplitArg = /
185# %begin
186# < VarName >
187# < < '=' | '+=' > < dot* > > ?
188# %end
189# /
190# Note: must use < > for grouping because there is no non-capturing group.
191
192
193def _SplitAssignArg(arg, blame_word):
194 # type: (str, CompoundWord) -> AssignArg
195 """Dynamically parse argument to declare, export, etc.
196
197 This is a fallback to the static parsing done below.
198 """
199 # Note: it would be better to cache regcomp(), but we don't have an API for
200 # that, and it probably isn't a bottleneck now
201 m = util.RegexSearch(ASSIGN_ARG_RE, arg)
202 if m is None:
203 e_die("Assignment builtin expected NAME=value, got %r" % arg,
204 blame_word)
205
206 var_name = m[1]
207 # m[2] is used for grouping; ERE doesn't have non-capturing groups
208
209 op = m[3]
210 assert op is not None, op
211 if len(op): # declare NAME=
212 val = value.Str(m[4]) # type: Optional[value_t]
213 append = op[0] == '+'
214 else: # declare NAME
215 val = None # no operator
216 append = False
217
218 return AssignArg(var_name, val, append, blame_word)
219
220
221# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
222def _BackslashEscape(s):
223 # type: (str) -> str
224 """Double up backslashes.
225
226 Useful for strings about to be globbed and strings about to be IFS
227 escaped.
228 """
229 return s.replace('\\', '\\\\')
230
231
232def _ValueToPartValue(val, quoted, part_loc):
233 # type: (value_t, bool, word_part_t) -> part_value_t
234 """Helper for VarSub evaluation.
235
236 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
237 """
238 UP_val = val
239
240 with tagswitch(val) as case:
241 if case(value_e.Undef):
242 # This happens in the case of ${undef+foo}. We skipped _EmptyStrOrError,
243 # but we have to append to the empty string.
244 return Piece('', quoted, not quoted)
245
246 elif case(value_e.Str):
247 val = cast(value.Str, UP_val)
248 return Piece(val.s, quoted, not quoted)
249
250 elif case(value_e.BashArray):
251 val = cast(value.BashArray, UP_val)
252 return part_value.Array(val.strs)
253
254 elif case(value_e.BashAssoc):
255 val = cast(value.BashAssoc, UP_val)
256 # bash behavior: splice values!
257 return part_value.Array(val.d.values())
258
259 # Cases added for YSH
260 # value_e.List is also here - we use val_ops.stringify()s err message
261 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
262 value_e.Eggex, value_e.List):
263 s = val_ops.Stringify(val, loc.Missing)
264 return Piece(s, quoted, not quoted)
265
266 else:
267 raise error.TypeErr(val, "Can't substitute into word",
268 loc.WordPart(part_loc))
269
270 raise AssertionError('for -Wreturn-type in C++')
271
272
273def _MakeWordFrames(part_vals):
274 # type: (List[part_value_t]) -> List[List[Piece]]
275 """A word evaluates to a flat list of part_value (String or Array). frame
276 is a portion that results in zero or more args. It can never be joined.
277 This idea exists because of arrays like "$@" and "${a[@]}".
278
279 Example:
280
281 a=(1 '2 3' 4)
282 x=x
283 y=y
284
285 # This word
286 $x"${a[@]}"$y
287
288 # Results in Three frames:
289 [ ('x', False, True), ('1', True, False) ]
290 [ ('2 3', True, False) ]
291 [ ('4', True, False), ('y', False, True) ]
292
293 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
294 should make that top level type.
295
296 TODO:
297 - Instead of List[List[Piece]], where List[Piece] is a Frame
298 - Change this representation to
299 Frames = (List[Piece] pieces, List[int] break_indices)
300 # where break_indices are the end
301
302 Consider a common case like "$x" or "${x}" - I think this a lot more
303 efficient?
304
305 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
306 """
307 current = [] # type: List[Piece]
308 frames = [current]
309
310 for p in part_vals:
311 UP_p = p
312
313 with tagswitch(p) as case:
314 if case(part_value_e.String):
315 p = cast(Piece, UP_p)
316 current.append(p)
317
318 elif case(part_value_e.Array):
319 p = cast(part_value.Array, UP_p)
320
321 is_first = True
322 for s in p.strs:
323 if s is None:
324 continue # ignore undefined array entries
325
326 # Arrays parts are always quoted; otherwise they would have decayed to
327 # a string.
328 piece = Piece(s, True, False)
329 if is_first:
330 current.append(piece)
331 is_first = False
332 else:
333 current = [piece]
334 frames.append(current) # singleton frame
335
336 else:
337 raise AssertionError()
338
339 return frames
340
341
342# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
343def _DecayPartValuesToString(part_vals, join_char):
344 # type: (List[part_value_t], str) -> str
345 # Decay ${a=x"$@"x} to string.
346 out = [] # type: List[str]
347 for p in part_vals:
348 UP_p = p
349 with tagswitch(p) as case:
350 if case(part_value_e.String):
351 p = cast(Piece, UP_p)
352 out.append(p.s)
353 elif case(part_value_e.Array):
354 p = cast(part_value.Array, UP_p)
355 # TODO: Eliminate double join for speed?
356 tmp = [s for s in p.strs if s is not None]
357 out.append(join_char.join(tmp))
358 else:
359 raise AssertionError()
360 return ''.join(out)
361
362
363def _PerformSlice(
364 val, # type: value_t
365 begin, # type: int
366 length, # type: int
367 has_length, # type: bool
368 part, # type: BracedVarSub
369 arg0_val, # type: value.Str
370):
371 # type: (...) -> value_t
372 UP_val = val
373 with tagswitch(val) as case:
374 if case(value_e.Str): # Slice UTF-8 characters in a string.
375 val = cast(value.Str, UP_val)
376 s = val.s
377 n = len(s)
378
379 if begin < 0: # Compute offset with unicode
380 byte_begin = n
381 num_iters = -begin
382 for _ in xrange(num_iters):
383 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
384 else:
385 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
386
387 if has_length:
388 if length < 0: # Compute offset with unicode
389 # Confusing: this is a POSITION
390 byte_end = n
391 num_iters = -length
392 for _ in xrange(num_iters):
393 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
394 else:
395 byte_end = string_ops.AdvanceUtf8Chars(
396 s, length, byte_begin)
397 else:
398 byte_end = len(s)
399
400 substr = s[byte_begin:byte_end]
401 result = value.Str(substr) # type: value_t
402
403 elif case(value_e.BashArray): # Slice array entries.
404 val = cast(value.BashArray, UP_val)
405 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
406 # strings.
407 if has_length and length < 0:
408 e_die("Array slice can't have negative length: %d" % length,
409 loc.WordPart(part))
410
411 # Quirk: "begin" for positional arguments ($@ and $*) counts $0.
412 if arg0_val is not None:
413 orig = [arg0_val.s]
414 orig.extend(val.strs)
415 else:
416 orig = val.strs
417
418 n = len(orig)
419 if begin < 0:
420 i = n + begin # ${@:-3} starts counts from the end
421 else:
422 i = begin
423 strs = [] # type: List[str]
424 count = 0
425 while i < n:
426 if has_length and count == length: # length could be 0
427 break
428 s = orig[i]
429 if s is not None: # Unset elements don't count towards the length
430 strs.append(s)
431 count += 1
432 i += 1
433
434 result = value.BashArray(strs)
435
436 elif case(value_e.BashAssoc):
437 e_die("Can't slice associative arrays", loc.WordPart(part))
438
439 else:
440 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
441 loc.WordPart(part))
442
443 return result
444
445
446class StringWordEvaluator(object):
447 """Interface used by ArithEvaluator / BoolEvaluator"""
448
449 def __init__(self):
450 # type: () -> None
451 """Empty constructor for mycpp."""
452 pass
453
454 def EvalWordToString(self, w, eval_flags=0):
455 # type: (word_t, int) -> value.Str
456 raise NotImplementedError()
457
458
459def _GetDollarHyphen(exec_opts):
460 # type: (optview.Exec) -> str
461 chars = [] # type: List[str]
462 if exec_opts.interactive():
463 chars.append('i')
464
465 if exec_opts.errexit():
466 chars.append('e')
467 if exec_opts.noglob():
468 chars.append('f')
469 if exec_opts.noexec():
470 chars.append('n')
471 if exec_opts.nounset():
472 chars.append('u')
473 # NO letter for pipefail?
474 if exec_opts.xtrace():
475 chars.append('x')
476 if exec_opts.noclobber():
477 chars.append('C')
478
479 # bash has:
480 # - c for sh -c, i for sh -i (mksh also has this)
481 # - h for hashing (mksh also has this)
482 # - B for brace expansion
483 return ''.join(chars)
484
485
486class TildeEvaluator(object):
487
488 def __init__(self, mem, exec_opts):
489 # type: (Mem, optview.Exec) -> None
490 self.mem = mem
491 self.exec_opts = exec_opts
492
493 def GetMyHomeDir(self):
494 # type: () -> Optional[str]
495 """Consult $HOME first, and then make a libc call.
496
497 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
498 #1578.
499 """
500 # First look up the HOME var, then ask the OS. This is what bash does.
501 val = self.mem.GetValue('HOME')
502 UP_val = val
503 if val.tag() == value_e.Str:
504 val = cast(value.Str, UP_val)
505 return val.s
506 return pyos.GetMyHomeDir()
507
508 def Eval(self, part):
509 # type: (word_part.TildeSub) -> str
510 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
511
512 if part.user_name is None:
513 result = self.GetMyHomeDir()
514 else:
515 result = pyos.GetHomeDir(part.user_name)
516
517 if result is None:
518 if self.exec_opts.strict_tilde():
519 e_die("Error expanding tilde (e.g. invalid user)", part.left)
520 else:
521 # Return ~ or ~user literally
522 result = '~'
523 if part.user_name is not None:
524 result = result + part.user_name # mycpp doesn't have +=
525
526 return result
527
528
529class AbstractWordEvaluator(StringWordEvaluator):
530 """Abstract base class for word evaluators.
531
532 Public entry points:
533 EvalWordToString EvalForPlugin EvalRhsWord
534 EvalWordSequence EvalWordSequence2
535 """
536
537 def __init__(
538 self,
539 mem, # type: state.Mem
540 exec_opts, # type: optview.Exec
541 mutable_opts, # type: state.MutableOpts
542 tilde_ev, # type: TildeEvaluator
543 splitter, # type: SplitContext
544 errfmt, # type: ui.ErrorFormatter
545 ):
546 # type: (...) -> None
547 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
548 self.expr_ev = None # type: expr_eval.ExprEvaluator
549 self.prompt_ev = None # type: prompt.Evaluator
550
551 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
552
553 self.tilde_ev = tilde_ev
554
555 self.mem = mem # for $HOME, $1, etc.
556 self.exec_opts = exec_opts # for nounset
557 self.mutable_opts = mutable_opts # for _allow_command_sub
558 self.splitter = splitter
559 self.errfmt = errfmt
560
561 self.globber = glob_.Globber(exec_opts)
562
563 def CheckCircularDeps(self):
564 # type: () -> None
565 raise NotImplementedError()
566
567 def _EvalCommandSub(self, cs_part, quoted):
568 # type: (CommandSub, bool) -> part_value_t
569 """Abstract since it has a side effect."""
570 raise NotImplementedError()
571
572 def _EvalProcessSub(self, cs_part):
573 # type: (CommandSub) -> part_value_t
574 """Abstract since it has a side effect."""
575 raise NotImplementedError()
576
577 def _EvalVarNum(self, var_num):
578 # type: (int) -> value_t
579 assert var_num >= 0
580 return self.mem.GetArgNum(var_num)
581
582 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
583 # type: (int, bool, VarSubState) -> value_t
584 """Evaluate $?
585
586 and so forth
587 """
588 # $@ is special -- it need to know whether it is in a double quoted
589 # context.
590 #
591 # - If it's $@ in a double quoted context, return an ARRAY.
592 # - If it's $@ in a normal context, return a STRING, which then will be
593 # subject to splitting.
594
595 if op_id in (Id.VSub_At, Id.VSub_Star):
596 argv = self.mem.GetArgv()
597 val = value.BashArray(argv) # type: value_t
598 if op_id == Id.VSub_At:
599 # "$@" evaluates to an array, $@ should be decayed
600 vsub_state.join_array = not quoted
601 else: # $* "$*" are both decayed
602 vsub_state.join_array = True
603
604 elif op_id == Id.VSub_Hyphen:
605 val = value.Str(_GetDollarHyphen(self.exec_opts))
606
607 else:
608 val = self.mem.GetSpecialVar(op_id)
609
610 return val
611
612 def _ApplyTestOp(
613 self,
614 val, # type: value_t
615 op, # type: suffix_op.Unary
616 quoted, # type: bool
617 part_vals, # type: Optional[List[part_value_t]]
618 vtest_place, # type: VTestPlace
619 blame_token, # type: Token
620 ):
621 # type: (...) -> bool
622 """
623 Returns:
624 Whether part_vals was mutated
625
626 ${a:-} returns part_value[]
627 ${a:+} returns part_value[]
628 ${a:?error} returns error word?
629 ${a:=} returns part_value[] but also needs self.mem for side effects.
630
631 So I guess it should return part_value[], and then a flag for raising an
632 error, and then a flag for assigning it?
633 The original BracedVarSub will have the name.
634
635 Example of needing multiple part_value[]
636
637 echo X-${a:-'def'"ault"}-X
638
639 We return two part values from the BracedVarSub. Also consider:
640
641 echo ${a:-x"$@"x}
642 """
643 eval_flags = IS_SUBST
644 if quoted:
645 eval_flags |= QUOTED
646
647 tok = op.op
648 # NOTE: Splicing part_values is necessary because of code like
649 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
650 # do_glob/do_elide setting.
651 UP_val = val
652 with tagswitch(val) as case:
653 if case(value_e.Undef):
654 is_falsey = True
655
656 elif case(value_e.Str):
657 val = cast(value.Str, UP_val)
658 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
659 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
660 is_falsey = len(val.s) == 0
661 else:
662 is_falsey = False
663
664 elif case(value_e.BashArray):
665 val = cast(value.BashArray, UP_val)
666 # TODO: allow undefined
667 is_falsey = len(val.strs) == 0
668
669 elif case(value_e.BashAssoc):
670 val = cast(value.BashAssoc, UP_val)
671 is_falsey = len(val.d) == 0
672
673 else:
674 # value.Eggex, etc. are all false
675 is_falsey = False
676
677 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
678 if is_falsey:
679 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
680 return True
681 else:
682 return False
683
684 # Inverse of the above.
685 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
686 if is_falsey:
687 return False
688 else:
689 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
690 return True
691
692 # Splice and assign
693 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
694 if is_falsey:
695 # Collect new part vals.
696 assign_part_vals = [] # type: List[part_value_t]
697 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
698 eval_flags)
699 # Append them to out param AND return them.
700 part_vals.extend(assign_part_vals)
701
702 if vtest_place.name is None:
703 # TODO: error context
704 e_die("Can't assign to special variable")
705 else:
706 # NOTE: This decays arrays too! 'shopt -s strict_array' could
707 # avoid it.
708 rhs_str = _DecayPartValuesToString(
709 assign_part_vals, self.splitter.GetJoinChar())
710 if vtest_place.index is None: # using None when no index
711 lval = location.LName(
712 vtest_place.name) # type: sh_lvalue_t
713 else:
714 var_name = vtest_place.name
715 var_index = vtest_place.index
716 UP_var_index = var_index
717
718 with tagswitch(var_index) as case:
719 if case(a_index_e.Int):
720 var_index = cast(a_index.Int, UP_var_index)
721 lval = sh_lvalue.Indexed(
722 var_name, var_index.i, loc.Missing)
723 elif case(a_index_e.Str):
724 var_index = cast(a_index.Str, UP_var_index)
725 lval = sh_lvalue.Keyed(var_name, var_index.s,
726 loc.Missing)
727 else:
728 raise AssertionError()
729
730 state.OshLanguageSetValue(self.mem, lval,
731 value.Str(rhs_str))
732 return True
733
734 else:
735 return False
736
737 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
738 if is_falsey:
739 # The arg is the error message
740 error_part_vals = [] # type: List[part_value_t]
741 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
742 eval_flags)
743 error_str = _DecayPartValuesToString(
744 error_part_vals, self.splitter.GetJoinChar())
745
746 #
747 # Display fancy/helpful error
748 #
749 if vtest_place.name is None:
750 var_name = '???'
751 else:
752 var_name = vtest_place.name
753
754 if 0:
755 # This hint is nice, but looks too noisy for now
756 op_str = lexer.LazyStr(tok)
757 if tok.id == Id.VTest_ColonQMark:
758 why = 'empty or unset'
759 else:
760 why = 'unset'
761
762 self.errfmt.Print_(
763 "Hint: operator %s means a variable can't be %s" %
764 (op_str, why), tok)
765
766 if val.tag() == value_e.Undef:
767 actual = 'unset'
768 else:
769 actual = 'empty'
770
771 if len(error_str):
772 suffix = ': %r' % error_str
773 else:
774 suffix = ''
775 e_die("Var %s is %s%s" % (var_name, actual, suffix),
776 blame_token)
777
778 else:
779 return False
780
781 else:
782 raise AssertionError(tok.id)
783
784 def _Length(self, val, token):
785 # type: (value_t, Token) -> int
786 """Returns the length of the value, for ${#var}"""
787 UP_val = val
788 with tagswitch(val) as case:
789 if case(value_e.Str):
790 val = cast(value.Str, UP_val)
791 # NOTE: Whether bash counts bytes or chars is affected by LANG
792 # environment variables.
793 # Should we respect that, or another way to select? set -o
794 # count-bytes?
795
796 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
797 try:
798 length = string_ops.CountUtf8Chars(val.s)
799 except error.Strict as e:
800 # Add this here so we don't have to add it so far down the stack.
801 # TODO: It's better to show BOTH this CODE an the actual DATA
802 # somehow.
803 e.location = token
804
805 if self.exec_opts.strict_word_eval():
806 raise
807 else:
808 # NOTE: Doesn't make the command exit with 1; it just returns a
809 # length of -1.
810 self.errfmt.PrettyPrintError(e, prefix='warning: ')
811 return -1
812
813 elif case(value_e.BashArray):
814 val = cast(value.BashArray, UP_val)
815 # There can be empty placeholder values in the array.
816 length = 0
817 for s in val.strs:
818 if s is not None:
819 length += 1
820
821 elif case(value_e.BashAssoc):
822 val = cast(value.BashAssoc, UP_val)
823 length = len(val.d)
824
825 else:
826 raise error.TypeErr(
827 val, "Length op expected Str, BashArray, BashAssoc", token)
828
829 return length
830
831 def _Keys(self, val, token):
832 # type: (value_t, Token) -> value_t
833 """Return keys of a container, for ${!array[@]}"""
834
835 UP_val = val
836 with tagswitch(val) as case:
837 if case(value_e.BashArray):
838 val = cast(value.BashArray, UP_val)
839 # translation issue: tuple indices not supported in list comprehensions
840 #indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
841 indices = [] # type: List[str]
842 for i, s in enumerate(val.strs):
843 if s is not None:
844 indices.append(str(i))
845 return value.BashArray(indices)
846
847 elif case(value_e.BashAssoc):
848 val = cast(value.BashAssoc, UP_val)
849 assert val.d is not None # for MyPy, so it's not Optional[]
850
851 # BUG: Keys aren't ordered according to insertion!
852 return value.BashArray(val.d.keys())
853
854 else:
855 raise error.TypeErr(val, 'Keys op expected Str', token)
856
857 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
858 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
859 """Handles indirect expansion like ${!var} and ${!a[0]}.
860
861 Args:
862 blame_tok: 'foo' for ${!foo}
863 """
864 UP_val = val
865 with tagswitch(val) as case:
866 if case(value_e.Undef):
867 return value.Undef # ${!undef} is just weird bash behavior
868
869 elif case(value_e.Str):
870 val = cast(value.Str, UP_val)
871 bvs_part = self.unsafe_arith.ParseVarRef(val.s, blame_tok)
872 return self._VarRefValue(bvs_part, quoted, vsub_state,
873 vtest_place)
874
875 elif case(value_e.BashArray): # caught earlier but OK
876 e_die('Indirect expansion of array')
877
878 elif case(value_e.BashAssoc): # caught earlier but OK
879 e_die('Indirect expansion of assoc array')
880
881 else:
882 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
883
884 def _ApplyUnarySuffixOp(self, val, op):
885 # type: (value_t, suffix_op.Unary) -> value_t
886 assert val.tag() != value_e.Undef
887
888 op_kind = consts.GetKind(op.op.id)
889
890 if op_kind == Kind.VOp1:
891 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
892 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
893 # shortcut for constant strings.
894 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
895 assert arg_val.tag() == value_e.Str
896
897 UP_val = val
898 with tagswitch(val) as case:
899 if case(value_e.Str):
900 val = cast(value.Str, UP_val)
901 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
902 has_extglob)
903 #log('%r %r -> %r', val.s, arg_val.s, s)
904 new_val = value.Str(s) # type: value_t
905
906 elif case(value_e.BashArray):
907 val = cast(value.BashArray, UP_val)
908 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
909 strs = [] # type: List[str]
910 for s in val.strs:
911 if s is not None:
912 strs.append(
913 string_ops.DoUnarySuffixOp(
914 s, op.op, arg_val.s, has_extglob))
915 new_val = value.BashArray(strs)
916
917 elif case(value_e.BashAssoc):
918 val = cast(value.BashAssoc, UP_val)
919 strs = []
920 for s in val.d.values():
921 strs.append(
922 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
923 has_extglob))
924 new_val = value.BashArray(strs)
925
926 else:
927 raise error.TypeErr(
928 val, 'Unary op expected Str, BashArray, BashAssoc',
929 op.op)
930
931 else:
932 raise AssertionError(Kind_str(op_kind))
933
934 return new_val
935
936 def _PatSub(self, val, op):
937 # type: (value_t, suffix_op.PatSub) -> value_t
938
939 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
940 # Extended globs aren't supported because we only translate * ? etc. to
941 # ERE. I don't think there's a straightforward translation from !(*.py) to
942 # ERE! You would need an engine that supports negation? (Derivatives?)
943 if has_extglob:
944 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
945
946 if op.replace:
947 replace_val = self.EvalRhsWord(op.replace)
948 # Can't have an array, so must be a string
949 assert replace_val.tag() == value_e.Str, replace_val
950 replace_str = cast(value.Str, replace_val).s
951 else:
952 replace_str = ''
953
954 # note: doesn't support self.exec_opts.extglob()!
955 regex, warnings = glob_.GlobToERE(pat_val.s)
956 if len(warnings):
957 # TODO:
958 # - Add 'shopt -s strict_glob' mode and expose warnings.
959 # "Glob is not in CANONICAL FORM".
960 # - Propagate location info back to the 'op.pat' word.
961 pass
962 #log('regex %r', regex)
963 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
964
965 with tagswitch(val) as case2:
966 if case2(value_e.Str):
967 str_val = cast(value.Str, val)
968 s = replacer.Replace(str_val.s, op)
969 val = value.Str(s)
970
971 elif case2(value_e.BashArray):
972 array_val = cast(value.BashArray, val)
973 strs = [] # type: List[str]
974 for s in array_val.strs:
975 if s is not None:
976 strs.append(replacer.Replace(s, op))
977 val = value.BashArray(strs)
978
979 elif case2(value_e.BashAssoc):
980 assoc_val = cast(value.BashAssoc, val)
981 strs = []
982 for s in assoc_val.d.values():
983 strs.append(replacer.Replace(s, op))
984 val = value.BashArray(strs)
985
986 else:
987 raise error.TypeErr(
988 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
989 op.slash_tok)
990
991 return val
992
993 def _Slice(self, val, op, var_name, part):
994 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
995
996 begin = self.arith_ev.EvalToInt(op.begin)
997
998 # Note: bash allows lengths to be negative (with odd semantics), but
999 # we don't allow that right now.
1000 has_length = False
1001 length = -1
1002 if op.length:
1003 has_length = True
1004 length = self.arith_ev.EvalToInt(op.length)
1005
1006 try:
1007 arg0_val = None # type: value.Str
1008 if var_name is None: # $* or $@
1009 arg0_val = self.mem.GetArg0()
1010 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1011 except error.Strict as e:
1012 if self.exec_opts.strict_word_eval():
1013 raise
1014 else:
1015 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1016 with tagswitch(val) as case2:
1017 if case2(value_e.Str):
1018 val = value.Str('')
1019 elif case2(value_e.BashArray):
1020 val = value.BashArray([])
1021 else:
1022 raise NotImplementedError()
1023 return val
1024
1025 def _Nullary(self, val, op, var_name):
1026 # type: (value_t, Token, Optional[str]) -> Tuple[value.Str, bool]
1027
1028 UP_val = val
1029 quoted2 = False
1030 op_id = op.id
1031 if op_id == Id.VOp0_P:
1032 with tagswitch(val) as case:
1033 if case(value_e.Str):
1034 str_val = cast(value.Str, UP_val)
1035 prompt = self.prompt_ev.EvalPrompt(str_val)
1036 # readline gets rid of these, so we should too.
1037 p = prompt.replace('\x01', '').replace('\x02', '')
1038 result = value.Str(p)
1039 else:
1040 e_die("Can't use @P on %s" % ui.ValType(val), op)
1041
1042 elif op_id == Id.VOp0_Q:
1043 with tagswitch(val) as case:
1044 if case(value_e.Str):
1045 str_val = cast(value.Str, UP_val)
1046
1047 # TODO: use fastfunc.ShellEncode or
1048 # fastfunc.PosixShellEncode()
1049 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1050 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in bash
1051 quoted2 = True
1052 elif case(value_e.BashArray):
1053 array_val = cast(value.BashArray, UP_val)
1054
1055 # TODO: should use fastfunc.ShellEncode
1056 tmp = [j8_lite.MaybeShellEncode(s) for s in array_val.strs]
1057 result = value.Str(' '.join(tmp))
1058 else:
1059 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1060
1061 elif op_id == Id.VOp0_a:
1062 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1063 # spec/ble-idioms.test.sh.
1064 chars = [] # type: List[str]
1065 with tagswitch(val) as case:
1066 if case(value_e.BashArray):
1067 chars.append('a')
1068 elif case(value_e.BashAssoc):
1069 chars.append('A')
1070
1071 if var_name is not None: # e.g. ${?@a} is allowed
1072 cell = self.mem.GetCell(var_name)
1073 if cell:
1074 if cell.readonly:
1075 chars.append('r')
1076 if cell.exported:
1077 chars.append('x')
1078 if cell.nameref:
1079 chars.append('n')
1080
1081 result = value.Str(''.join(chars))
1082
1083 else:
1084 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1085
1086 return result, quoted2
1087
1088 def _WholeArray(self, val, part, quoted, vsub_state):
1089 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1090 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1091
1092 if op_id == Id.Lit_At:
1093 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1094 UP_val = val
1095 with tagswitch(val) as case2:
1096 if case2(value_e.Undef):
1097 if not vsub_state.has_test_op:
1098 val = self._EmptyBashArrayOrError(part.token)
1099 elif case2(value_e.Str):
1100 if self.exec_opts.strict_array():
1101 e_die("Can't index string with @", loc.WordPart(part))
1102 elif case2(value_e.BashArray):
1103 pass # no-op
1104
1105 elif op_id == Id.Arith_Star:
1106 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1107 UP_val = val
1108 with tagswitch(val) as case2:
1109 if case2(value_e.Undef):
1110 if not vsub_state.has_test_op:
1111 val = self._EmptyBashArrayOrError(part.token)
1112 elif case2(value_e.Str):
1113 if self.exec_opts.strict_array():
1114 e_die("Can't index string with *", loc.WordPart(part))
1115 elif case2(value_e.BashArray):
1116 pass # no-op
1117
1118 else:
1119 raise AssertionError(op_id) # unknown
1120
1121 return val
1122
1123 def _ArrayIndex(self, val, part, vtest_place):
1124 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1125 """Process a numeric array index like ${a[i+1]}"""
1126 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1127
1128 UP_val = val
1129 with tagswitch(val) as case2:
1130 if case2(value_e.Undef):
1131 pass # it will be checked later
1132
1133 elif case2(value_e.Str):
1134 # Bash treats any string as an array, so we can't add our own
1135 # behavior here without making valid OSH invalid bash.
1136 e_die("Can't index string %r with integer" % part.var_name,
1137 part.token)
1138
1139 elif case2(value_e.BashArray):
1140 array_val = cast(value.BashArray, UP_val)
1141 index = self.arith_ev.EvalToInt(anode)
1142 vtest_place.index = a_index.Int(index)
1143
1144 s = GetArrayItem(array_val.strs, index)
1145
1146 if s is None:
1147 val = value.Undef
1148 else:
1149 val = value.Str(s)
1150
1151 elif case2(value_e.BashAssoc):
1152 assoc_val = cast(value.BashAssoc, UP_val)
1153 # Location could also be attached to bracket_op? But
1154 # arith_expr.VarSub works OK too
1155 key = self.arith_ev.EvalWordToString(
1156 anode, blame_loc=location.TokenForArith(anode))
1157
1158 vtest_place.index = a_index.Str(key) # out param
1159 s = assoc_val.d.get(key)
1160
1161 if s is None:
1162 val = value.Undef
1163 else:
1164 val = value.Str(s)
1165
1166 else:
1167 raise error.TypeErr(val,
1168 'Index op expected BashArray, BashAssoc',
1169 loc.WordPart(part))
1170
1171 return val
1172
1173 def _EvalDoubleQuoted(self, parts, part_vals):
1174 # type: (List[word_part_t], List[part_value_t]) -> None
1175 """Evaluate parts of a DoubleQuoted part.
1176
1177 Args:
1178 part_vals: output param to append to.
1179 """
1180 # Example of returning array:
1181 # $ a=(1 2); b=(3); $ c=(4 5)
1182 # $ argv "${a[@]}${b[@]}${c[@]}"
1183 # ['1', '234', '5']
1184 #
1185 # Example of multiple parts
1186 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1187 # ['1', '24', '5']
1188
1189 # Special case for "". The parser outputs (DoubleQuoted []), instead
1190 # of (DoubleQuoted [Literal '']). This is better but it means we
1191 # have to check for it.
1192 if len(parts) == 0:
1193 v = Piece('', True, False)
1194 part_vals.append(v)
1195 return
1196
1197 for p in parts:
1198 self._EvalWordPart(p, part_vals, QUOTED)
1199
1200 def EvalDoubleQuotedToString(self, dq_part):
1201 # type: (DoubleQuoted) -> str
1202 """For double quoted strings in YSH expressions.
1203
1204 Example: var x = "$foo-${foo}"
1205 """
1206 part_vals = [] # type: List[part_value_t]
1207 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1208 return self._ConcatPartVals(part_vals, dq_part.left)
1209
1210 def _DecayArray(self, val):
1211 # type: (value.BashArray) -> value.Str
1212 """Decay $* to a string."""
1213 assert val.tag() == value_e.BashArray, val
1214 sep = self.splitter.GetJoinChar()
1215 tmp = [s for s in val.strs if s is not None]
1216 return value.Str(sep.join(tmp))
1217
1218 def _EmptyStrOrError(self, val, token):
1219 # type: (value_t, Token) -> value_t
1220 if val.tag() != value_e.Undef:
1221 return val
1222
1223 if not self.exec_opts.nounset():
1224 return value.Str('')
1225
1226 tok_str = lexer.TokenVal(token)
1227 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1228 e_die('Undefined variable %r' % name, token)
1229
1230 def _EmptyBashArrayOrError(self, token):
1231 # type: (Token) -> value_t
1232 assert token is not None
1233 if self.exec_opts.nounset():
1234 e_die('Undefined array %r' % lexer.TokenVal(token), token)
1235 else:
1236 return value.BashArray([])
1237
1238 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1239 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1240
1241 if part.bracket_op:
1242 with tagswitch(part.bracket_op) as case:
1243 if case(bracket_op_e.WholeArray):
1244 val = self._WholeArray(val, part, quoted, vsub_state)
1245
1246 elif case(bracket_op_e.ArrayIndex):
1247 val = self._ArrayIndex(val, part, vtest_place)
1248
1249 else:
1250 raise AssertionError(part.bracket_op.tag())
1251
1252 else: # no bracket op
1253 var_name = vtest_place.name
1254 if (var_name is not None and
1255 val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1256 not vsub_state.is_type_query):
1257 if ShouldArrayDecay(var_name, self.exec_opts,
1258 not (part.prefix_op or part.suffix_op)):
1259 # for ${BASH_SOURCE}, etc.
1260 val = DecayArray(val)
1261 else:
1262 e_die(
1263 "Array %r can't be referred to as a scalar (without @ or *)"
1264 % var_name, loc.WordPart(part))
1265
1266 return val
1267
1268 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1269 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1270 """Duplicates some logic from _EvalBracedVarSub, but returns a
1271 value_t."""
1272
1273 # 1. Evaluate from (var_name, var_num, token Id) -> value
1274 if part.token.id == Id.VSub_Name:
1275 vtest_place.name = part.var_name
1276 val = self.mem.GetValue(part.var_name)
1277
1278 elif part.token.id == Id.VSub_Number:
1279 var_num = int(part.var_name)
1280 val = self._EvalVarNum(var_num)
1281
1282 else:
1283 # $* decays
1284 val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1285
1286 # We don't need var_index because it's only for L-Values of test ops?
1287 if self.exec_opts.eval_unsafe_arith():
1288 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1289 vtest_place)
1290 else:
1291 with state.ctx_Option(self.mutable_opts,
1292 [option_i._allow_command_sub], False):
1293 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1294 vtest_place)
1295
1296 return val
1297
1298 def _EvalBracedVarSub(self, part, part_vals, quoted):
1299 # type: (BracedVarSub, List[part_value_t], bool) -> None
1300 """
1301 Args:
1302 part_vals: output param to append to.
1303 """
1304 # We have different operators that interact in a non-obvious order.
1305 #
1306 # 1. bracket_op: value -> value, with side effect on vsub_state
1307 #
1308 # 2. prefix_op
1309 # a. length ${#x}: value -> value
1310 # b. var ref ${!ref}: can expand to an array
1311 #
1312 # 3. suffix_op:
1313 # a. no operator: you have a value
1314 # b. Test: value -> part_value[]
1315 # c. Other Suffix: value -> value
1316 #
1317 # 4. Process vsub_state.join_array here before returning.
1318 #
1319 # These cases are hard to distinguish:
1320 # - ${!prefix@} prefix query
1321 # - ${!array[@]} keys
1322 # - ${!ref} named reference
1323 # - ${!ref[0]} named reference
1324 #
1325 # I think we need several stages:
1326 #
1327 # 1. value: name, number, special, prefix query
1328 # 2. bracket_op
1329 # 3. prefix length -- this is TERMINAL
1330 # 4. indirection? Only for some of the ! cases
1331 # 5. string transformation suffix ops like ##
1332 # 6. test op
1333 # 7. vsub_state.join_array
1334
1335 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1336 # suffix ops are applied. If we take the length with a prefix op, the
1337 # distinction is ignored.
1338
1339 var_name = None # type: Optional[str] # used throughout the function
1340 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1341 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1342
1343 # 1. Evaluate from (var_name, var_num, token Id) -> value
1344 if part.token.id == Id.VSub_Name:
1345 # Handle ${!prefix@} first, since that looks at names and not values
1346 # Do NOT handle ${!A[@]@a} here!
1347 if (part.prefix_op is not None and part.bracket_op is None and
1348 part.suffix_op is not None and
1349 part.suffix_op.tag() == suffix_op_e.Nullary):
1350 nullary_op = cast(Token, part.suffix_op)
1351 # ${!x@} but not ${!x@P}
1352 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1353 names = self.mem.VarNamesStartingWith(part.var_name)
1354 names.sort()
1355
1356 if quoted and nullary_op.id == Id.VOp3_At:
1357 part_vals.append(part_value.Array(names))
1358 else:
1359 sep = self.splitter.GetJoinChar()
1360 part_vals.append(Piece(sep.join(names), quoted, True))
1361 return # EARLY RETURN
1362
1363 var_name = part.var_name
1364 vtest_place.name = var_name # for _ApplyTestOp
1365
1366 val = self.mem.GetValue(var_name)
1367
1368 elif part.token.id == Id.VSub_Number:
1369 var_num = int(part.var_name)
1370 val = self._EvalVarNum(var_num)
1371 else:
1372 # $* decays
1373 val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1374
1375 suffix_op_ = part.suffix_op
1376 if suffix_op_:
1377 UP_op = suffix_op_
1378 with tagswitch(suffix_op_) as case:
1379 if case(suffix_op_e.Nullary):
1380 suffix_op_ = cast(Token, UP_op)
1381
1382 # Type query ${array@a} is a STRING, not an array
1383 # NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1384 # ${array[@]@Q}
1385 if suffix_op_.id == Id.VOp0_a:
1386 vsub_state.is_type_query = True
1387
1388 elif case(suffix_op_e.Unary):
1389 suffix_op_ = cast(suffix_op.Unary, UP_op)
1390
1391 # Do the _EmptyStrOrError/_EmptyBashArrayOrError up front, EXCEPT in
1392 # the case of Kind.VTest
1393 if consts.GetKind(suffix_op_.op.id) == Kind.VTest:
1394 vsub_state.has_test_op = True
1395
1396 # 2. Bracket Op
1397 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1398
1399 if part.prefix_op:
1400 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1401 if not vsub_state.has_test_op: # undef -> '' BEFORE length
1402 val = self._EmptyStrOrError(val, part.token)
1403
1404 n = self._Length(val, part.token)
1405 part_vals.append(Piece(str(n), quoted, False))
1406 return # EARLY EXIT: nothing else can come after length
1407
1408 elif part.prefix_op.id == Id.VSub_Bang:
1409 if (part.bracket_op and
1410 part.bracket_op.tag() == bracket_op_e.WholeArray):
1411 if vsub_state.has_test_op:
1412 # ${!a[@]-'default'} is a non-fatal runtime error in bash. Here
1413 # it's fatal.
1414 op_tok = cast(suffix_op.Unary, UP_op).op
1415 e_die('Test operation not allowed with ${!array[@]}',
1416 op_tok)
1417
1418 # ${!array[@]} to get indices/keys
1419 val = self._Keys(val, part.token)
1420 # already set vsub_State.join_array ABOVE
1421 else:
1422 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1423 # ${!a[@]} !
1424 # ${!ref} can expand into an array if ref='array[@]'
1425
1426 # Clear it now that we have a var ref
1427 vtest_place.name = None
1428 vtest_place.index = None
1429
1430 val = self._EvalVarRef(val, part.token, quoted, vsub_state,
1431 vtest_place)
1432
1433 if not vsub_state.has_test_op: # undef -> '' AFTER indirection
1434 val = self._EmptyStrOrError(val, part.token)
1435
1436 else:
1437 raise AssertionError(part.prefix_op)
1438
1439 else:
1440 if not vsub_state.has_test_op: # undef -> '' if no prefix op
1441 val = self._EmptyStrOrError(val, part.token)
1442
1443 quoted2 = False # another bit for @Q
1444 if suffix_op_:
1445 op = suffix_op_ # could get rid of this alias
1446
1447 with tagswitch(suffix_op_) as case:
1448 if case(suffix_op_e.Nullary):
1449 op = cast(Token, UP_op)
1450 val, quoted2 = self._Nullary(val, op, var_name)
1451
1452 elif case(suffix_op_e.Unary):
1453 op = cast(suffix_op.Unary, UP_op)
1454 if consts.GetKind(op.op.id) == Kind.VTest:
1455 if self._ApplyTestOp(val, op, quoted, part_vals,
1456 vtest_place, part.token):
1457 # e.g. to evaluate ${undef:-'default'}, we already appended
1458 # what we need
1459 return
1460
1461 else:
1462 # Other suffix: value -> value
1463 val = self._ApplyUnarySuffixOp(val, op)
1464
1465 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1466 op = cast(suffix_op.PatSub, UP_op)
1467 val = self._PatSub(val, op)
1468
1469 elif case(suffix_op_e.Slice):
1470 op = cast(suffix_op.Slice, UP_op)
1471 val = self._Slice(val, op, var_name, part)
1472
1473 elif case(suffix_op_e.Static):
1474 op = cast(suffix_op.Static, UP_op)
1475 e_die('Not implemented', op.tok)
1476
1477 else:
1478 raise AssertionError()
1479
1480 # After applying suffixes, process join_array here.
1481 UP_val = val
1482 if val.tag() == value_e.BashArray:
1483 array_val = cast(value.BashArray, UP_val)
1484 if vsub_state.join_array:
1485 val = self._DecayArray(array_val)
1486 else:
1487 val = array_val
1488
1489 # For example, ${a} evaluates to value.Str(), but we want a
1490 # Piece().
1491 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1492 part_vals.append(part_val)
1493
1494 def _ConcatPartVals(self, part_vals, location):
1495 # type: (List[part_value_t], loc_t) -> str
1496
1497 strs = [] # type: List[str]
1498 for part_val in part_vals:
1499 UP_part_val = part_val
1500 with tagswitch(part_val) as case:
1501 if case(part_value_e.String):
1502 part_val = cast(Piece, UP_part_val)
1503 s = part_val.s
1504
1505 elif case(part_value_e.Array):
1506 part_val = cast(part_value.Array, UP_part_val)
1507 if self.exec_opts.strict_array():
1508 # Examples: echo f > "$@"; local foo="$@"
1509 e_die("Illegal array word part (strict_array)",
1510 location)
1511 else:
1512 # It appears to not respect IFS
1513 # TODO: eliminate double join()?
1514 tmp = [s for s in part_val.strs if s is not None]
1515 s = ' '.join(tmp)
1516
1517 else:
1518 raise AssertionError()
1519
1520 strs.append(s)
1521
1522 return ''.join(strs)
1523
1524 def EvalBracedVarSubToString(self, part):
1525 # type: (BracedVarSub) -> str
1526 """For double quoted strings in YSH expressions.
1527
1528 Example: var x = "$foo-${foo}"
1529 """
1530 part_vals = [] # type: List[part_value_t]
1531 self._EvalBracedVarSub(part, part_vals, False)
1532 # blame ${ location
1533 return self._ConcatPartVals(part_vals, part.left)
1534
1535 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1536 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1537
1538 token = part.tok
1539
1540 vsub_state = VarSubState.CreateNull()
1541
1542 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1543 if token.id == Id.VSub_DollarName:
1544 var_name = lexer.LazyStr(token)
1545 # TODO: Special case for LINENO
1546 val = self.mem.GetValue(var_name)
1547 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1548 if ShouldArrayDecay(var_name, self.exec_opts):
1549 # for $BASH_SOURCE, etc.
1550 val = DecayArray(val)
1551 else:
1552 e_die(
1553 "Array %r can't be referred to as a scalar (without @ or *)"
1554 % var_name, token)
1555
1556 elif token.id == Id.VSub_Number:
1557 var_num = int(lexer.LazyStr(token))
1558 val = self._EvalVarNum(var_num)
1559
1560 else:
1561 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1562
1563 #log('SIMPLE %s', part)
1564 val = self._EmptyStrOrError(val, token)
1565 UP_val = val
1566 if val.tag() == value_e.BashArray:
1567 array_val = cast(value.BashArray, UP_val)
1568 if vsub_state.join_array:
1569 val = self._DecayArray(array_val)
1570 else:
1571 val = array_val
1572
1573 v = _ValueToPartValue(val, quoted, part)
1574 part_vals.append(v)
1575
1576 def EvalSimpleVarSubToString(self, node):
1577 # type: (SimpleVarSub) -> str
1578 """For double quoted strings in YSH expressions.
1579
1580 Example: var x = "$foo-${foo}"
1581 """
1582 part_vals = [] # type: List[part_value_t]
1583 self._EvalSimpleVarSub(node, part_vals, False)
1584 return self._ConcatPartVals(part_vals, node.tok)
1585
1586 def _EvalExtGlob(self, part, part_vals):
1587 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1588 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1589 op = part.op
1590 if op.id == Id.ExtGlob_Comma:
1591 op_str = '@('
1592 else:
1593 op_str = lexer.LazyStr(op)
1594 # Do NOT split these.
1595 part_vals.append(Piece(op_str, False, False))
1596
1597 for i, w in enumerate(part.arms):
1598 if i != 0:
1599 part_vals.append(Piece('|', False, False)) # separator
1600 # FLATTEN the tree of extglob "arms".
1601 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1602 part_vals.append(Piece(')', False, False)) # closing )
1603
1604 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1605 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1606 """Translate a flattened WORD with an ExtGlob part to string patterns.
1607
1608 We need both glob and fnmatch patterns. _EvalExtGlob does the
1609 flattening.
1610 """
1611 for i, part_val in enumerate(part_vals):
1612 UP_part_val = part_val
1613 with tagswitch(part_val) as case:
1614 if case(part_value_e.String):
1615 part_val = cast(Piece, UP_part_val)
1616 if part_val.quoted and not self.exec_opts.noglob():
1617 s = glob_.GlobEscape(part_val.s)
1618 else:
1619 # e.g. the @( and | in @(foo|bar) aren't quoted
1620 s = part_val.s
1621 glob_parts.append(s)
1622 fnmatch_parts.append(s) # from _EvalExtGlob()
1623
1624 elif case(part_value_e.Array):
1625 # Disallow array
1626 e_die(
1627 "Extended globs and arrays can't appear in the same word",
1628 w)
1629
1630 elif case(part_value_e.ExtGlob):
1631 part_val = cast(part_value.ExtGlob, UP_part_val)
1632 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1633 self._TranslateExtGlob(part_val.part_vals, w, [],
1634 fnmatch_parts)
1635 glob_parts.append('*')
1636
1637 else:
1638 raise AssertionError()
1639
1640 def _EvalWordPart(self, part, part_vals, flags):
1641 # type: (word_part_t, List[part_value_t], int) -> None
1642 """Evaluate a word part, appending to part_vals
1643
1644 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1645 """
1646 quoted = bool(flags & QUOTED)
1647 is_subst = bool(flags & IS_SUBST)
1648
1649 UP_part = part
1650 with tagswitch(part) as case:
1651 if case(word_part_e.ShArrayLiteral):
1652 part = cast(ShArrayLiteral, UP_part)
1653 e_die("Unexpected array literal", loc.WordPart(part))
1654 elif case(word_part_e.BashAssocLiteral):
1655 part = cast(word_part.BashAssocLiteral, UP_part)
1656 e_die("Unexpected associative array literal",
1657 loc.WordPart(part))
1658
1659 elif case(word_part_e.Literal):
1660 part = cast(Token, UP_part)
1661 # Split if it's in a substitution.
1662 # That is: echo is not split, but ${foo:-echo} is split
1663 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1664 part_vals.append(v)
1665
1666 elif case(word_part_e.EscapedLiteral):
1667 part = cast(word_part.EscapedLiteral, UP_part)
1668 v = Piece(part.ch, True, False)
1669 part_vals.append(v)
1670
1671 elif case(word_part_e.SingleQuoted):
1672 part = cast(SingleQuoted, UP_part)
1673 v = Piece(part.sval, True, False)
1674 part_vals.append(v)
1675
1676 elif case(word_part_e.DoubleQuoted):
1677 part = cast(DoubleQuoted, UP_part)
1678 self._EvalDoubleQuoted(part.parts, part_vals)
1679
1680 elif case(word_part_e.CommandSub):
1681 part = cast(CommandSub, UP_part)
1682 id_ = part.left_token.id
1683 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1684 Id.Left_Backtick):
1685 sv = self._EvalCommandSub(part,
1686 quoted) # type: part_value_t
1687
1688 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1689 sv = self._EvalProcessSub(part)
1690
1691 else:
1692 raise AssertionError(id_)
1693
1694 part_vals.append(sv)
1695
1696 elif case(word_part_e.SimpleVarSub):
1697 part = cast(SimpleVarSub, UP_part)
1698 self._EvalSimpleVarSub(part, part_vals, quoted)
1699
1700 elif case(word_part_e.BracedVarSub):
1701 part = cast(BracedVarSub, UP_part)
1702 self._EvalBracedVarSub(part, part_vals, quoted)
1703
1704 elif case(word_part_e.TildeSub):
1705 part = cast(word_part.TildeSub, UP_part)
1706 # We never parse a quoted string into a TildeSub.
1707 assert not quoted
1708 s = self.tilde_ev.Eval(part)
1709 v = Piece(s, True, False) # NOT split even when unquoted!
1710 part_vals.append(v)
1711
1712 elif case(word_part_e.ArithSub):
1713 part = cast(word_part.ArithSub, UP_part)
1714 num = self.arith_ev.EvalToBigInt(part.anode)
1715 v = Piece(mops.ToStr(num), quoted, not quoted)
1716 part_vals.append(v)
1717
1718 elif case(word_part_e.ExtGlob):
1719 part = cast(word_part.ExtGlob, UP_part)
1720 #if not self.exec_opts.extglob():
1721 # die() # disallow at runtime? Don't just decay
1722
1723 # Create a node to hold the flattened tree. The caller decides whether
1724 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1725 part_vals2 = [] # type: List[part_value_t]
1726 self._EvalExtGlob(part, part_vals2) # flattens tree
1727 part_vals.append(part_value.ExtGlob(part_vals2))
1728
1729 elif case(word_part_e.BashRegexGroup):
1730 part = cast(word_part.BashRegexGroup, UP_part)
1731
1732 part_vals.append(Piece('(', False, False)) # not quoted
1733 if part.child:
1734 self._EvalWordToParts(part.child, part_vals, 0)
1735 part_vals.append(Piece(')', False, False))
1736
1737 elif case(word_part_e.Splice):
1738 part = cast(word_part.Splice, UP_part)
1739 val = self.mem.GetValue(part.var_name)
1740
1741 strs = self.expr_ev.SpliceValue(val, part)
1742 part_vals.append(part_value.Array(strs))
1743
1744 elif case(word_part_e.ExprSub):
1745 part = cast(word_part.ExprSub, UP_part)
1746 part_val = self.expr_ev.EvalExprSub(part)
1747 part_vals.append(part_val)
1748
1749 elif case(word_part_e.ZshVarSub):
1750 part = cast(word_part.ZshVarSub, UP_part)
1751 e_die("ZSH var subs are parsed, but can't be evaluated",
1752 part.left)
1753
1754 else:
1755 raise AssertionError(part.tag())
1756
1757 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1758 # type: (rhs_word_t, List[part_value_t], int) -> None
1759 quoted = bool(eval_flags & QUOTED)
1760
1761 UP_w = w
1762 with tagswitch(w) as case:
1763 if case(rhs_word_e.Empty):
1764 part_vals.append(Piece('', quoted, not quoted))
1765
1766 elif case(rhs_word_e.Compound):
1767 w = cast(CompoundWord, UP_w)
1768 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1769
1770 else:
1771 raise AssertionError()
1772
1773 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1774 # type: (CompoundWord, List[part_value_t], int) -> None
1775 """Helper for EvalRhsWord, EvalWordSequence, etc.
1776
1777 Returns:
1778 Appends to part_vals. Note that this is a TREE.
1779 """
1780 # Does the word have an extended glob? This is a special case because
1781 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1782 # implement extended globs. It's hard to carry that extra information
1783 # all the way past the word splitting stage.
1784
1785 # OSH semantic limitations: If a word has an extended glob part, then
1786 # 1. It can't have an array
1787 # 2. Word splitting of unquoted words isn't respected
1788
1789 word_part_vals = [] # type: List[part_value_t]
1790 has_extglob = False
1791 for p in w.parts:
1792 if p.tag() == word_part_e.ExtGlob:
1793 has_extglob = True
1794 self._EvalWordPart(p, word_part_vals, eval_flags)
1795
1796 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1797 if has_extglob:
1798 if bool(eval_flags & EXTGLOB_FILES):
1799 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1800 # word because of the way we use libc:
1801 # 1. With '*' for extglob parts
1802 # 2. With _EvalExtGlob() for extglob parts
1803
1804 glob_parts = [] # type: List[str]
1805 fnmatch_parts = [] # type: List[str]
1806 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1807 fnmatch_parts)
1808
1809 #log('word_part_vals %s', word_part_vals)
1810 glob_pat = ''.join(glob_parts)
1811 fnmatch_pat = ''.join(fnmatch_parts)
1812 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1813
1814 results = [] # type: List[str]
1815 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1816 if n < 0:
1817 raise error.FailGlob(
1818 'Extended glob %r matched no files' % fnmatch_pat, w)
1819
1820 part_vals.append(part_value.Array(results))
1821 elif bool(eval_flags & EXTGLOB_NESTED):
1822 # We only glob at the TOP level of @(nested|@(pattern))
1823 part_vals.extend(word_part_vals)
1824 else:
1825 # e.g. simple_word_eval, assignment builtin
1826 e_die('Extended glob not allowed in this word', w)
1827 else:
1828 part_vals.extend(word_part_vals)
1829
1830 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1831 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1832 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1833
1834 Note: arg 'w' could just be a span ID
1835 """
1836 for part_val in part_vals:
1837 UP_part_val = part_val
1838 with tagswitch(part_val) as case:
1839 if case(part_value_e.String):
1840 part_val = cast(Piece, UP_part_val)
1841 s = part_val.s
1842 if part_val.quoted:
1843 if eval_flags & QUOTE_FNMATCH:
1844 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1845 s = glob_.GlobEscape(s)
1846 elif eval_flags & QUOTE_ERE:
1847 s = glob_.ExtendedRegexEscape(s)
1848 strs.append(s)
1849
1850 elif case(part_value_e.Array):
1851 part_val = cast(part_value.Array, UP_part_val)
1852 if self.exec_opts.strict_array():
1853 # Examples: echo f > "$@"; local foo="$@"
1854
1855 # TODO: This attributes too coarsely, to the word rather than the
1856 # parts. Problem: the word is a TREE of parts, but we only have a
1857 # flat list of part_vals. The only case where we really get arrays
1858 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1859 e_die(
1860 "This word should yield a string, but it contains an array",
1861 w)
1862
1863 # TODO: Maybe add detail like this.
1864 #e_die('RHS of assignment should only have strings. '
1865 # 'To assign arrays, use b=( "${a[@]}" )')
1866 else:
1867 # It appears to not respect IFS
1868 tmp = [s for s in part_val.strs if s is not None]
1869 s = ' '.join(tmp) # TODO: eliminate double join()?
1870 strs.append(s)
1871
1872 elif case(part_value_e.ExtGlob):
1873 part_val = cast(part_value.ExtGlob, UP_part_val)
1874
1875 # Extended globs are only allowed where we expect them!
1876 if not bool(eval_flags & QUOTE_FNMATCH):
1877 e_die('extended glob not allowed in this word', w)
1878
1879 # recursive call
1880 self._PartValsToString(part_val.part_vals, w, eval_flags,
1881 strs)
1882
1883 else:
1884 raise AssertionError()
1885
1886 def EvalWordToString(self, UP_w, eval_flags=0):
1887 # type: (word_t, int) -> value.Str
1888 """Given a word, return a string.
1889
1890 Flags can contain a quoting algorithm.
1891 """
1892 assert UP_w.tag() == word_e.Compound, UP_w
1893 w = cast(CompoundWord, UP_w)
1894
1895 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1896 fast_str = word_.FastStrEval(w)
1897 if fast_str is not None:
1898 return value.Str(fast_str)
1899
1900 # Could we additionally optimize a=$b, if we know $b isn't an array
1901 # etc.?
1902
1903 # Note: these empty lists are hot in fib benchmark
1904
1905 part_vals = [] # type: List[part_value_t]
1906 for p in w.parts:
1907 # this doesn't use eval_flags, which is slightly confusing
1908 self._EvalWordPart(p, part_vals, 0)
1909
1910 strs = [] # type: List[str]
1911 self._PartValsToString(part_vals, w, eval_flags, strs)
1912 return value.Str(''.join(strs))
1913
1914 def EvalWordToPattern(self, UP_w):
1915 # type: (rhs_word_t) -> Tuple[value.Str, bool]
1916 """Like EvalWordToString, but returns whether we got ExtGlob."""
1917 if UP_w.tag() == rhs_word_e.Empty:
1918 return value.Str(''), False
1919
1920 assert UP_w.tag() == rhs_word_e.Compound, UP_w
1921 w = cast(CompoundWord, UP_w)
1922
1923 has_extglob = False
1924 part_vals = [] # type: List[part_value_t]
1925 for p in w.parts:
1926 # this doesn't use eval_flags, which is slightly confusing
1927 self._EvalWordPart(p, part_vals, 0)
1928 if p.tag() == word_part_e.ExtGlob:
1929 has_extglob = True
1930
1931 strs = [] # type: List[str]
1932 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1933 return value.Str(''.join(strs)), has_extglob
1934
1935 def EvalForPlugin(self, w):
1936 # type: (CompoundWord) -> value.Str
1937 """Wrapper around EvalWordToString that prevents errors.
1938
1939 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
1940 are handled here.
1941
1942 Similar to ExprEvaluator.PluginCall().
1943 """
1944 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
1945 try:
1946 val = self.EvalWordToString(w)
1947 except error.FatalRuntime as e:
1948 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
1949
1950 except (IOError, OSError) as e:
1951 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
1952
1953 except KeyboardInterrupt:
1954 val = value.Str('<Ctrl-C>')
1955
1956 return val
1957
1958 def EvalRhsWord(self, UP_w):
1959 # type: (rhs_word_t) -> value_t
1960 """Used for RHS of assignment.
1961
1962 There is no splitting.
1963 """
1964 if UP_w.tag() == rhs_word_e.Empty:
1965 return value.Str('')
1966
1967 assert UP_w.tag() == word_e.Compound, UP_w
1968 w = cast(CompoundWord, UP_w)
1969
1970 if len(w.parts) == 1:
1971 part0 = w.parts[0]
1972 UP_part0 = part0
1973 tag = part0.tag()
1974 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
1975 # don't look like assignments.
1976 if tag == word_part_e.ShArrayLiteral:
1977 part0 = cast(ShArrayLiteral, UP_part0)
1978 array_words = part0.words
1979 words = braces.BraceExpandWords(array_words)
1980 strs = self.EvalWordSequence(words)
1981 return value.BashArray(strs)
1982
1983 if tag == word_part_e.BashAssocLiteral:
1984 part0 = cast(word_part.BashAssocLiteral, UP_part0)
1985 d = NewDict() # type: Dict[str, str]
1986 for pair in part0.pairs:
1987 k = self.EvalWordToString(pair.key)
1988 v = self.EvalWordToString(pair.value)
1989 d[k.s] = v.s
1990 return value.BashAssoc(d)
1991
1992 # If RHS doesn't look like a=( ... ), then it must be a string.
1993 return self.EvalWordToString(w)
1994
1995 def _EvalWordFrame(self, frame, argv):
1996 # type: (List[Piece], List[str]) -> None
1997 all_empty = True
1998 all_quoted = True
1999 any_quoted = False
2000
2001 #log('--- frame %s', frame)
2002
2003 for piece in frame:
2004 if len(piece.s):
2005 all_empty = False
2006
2007 if piece.quoted:
2008 any_quoted = True
2009 else:
2010 all_quoted = False
2011
2012 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2013 if all_empty and not any_quoted:
2014 return
2015
2016 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2017 # don't do word splitting or globbing.
2018 if all_quoted:
2019 tmp = [piece.s for piece in frame]
2020 a = ''.join(tmp)
2021 argv.append(a)
2022 return
2023
2024 will_glob = not self.exec_opts.noglob()
2025
2026 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2027 frags = [] # type: List[str]
2028 for piece in frame:
2029 if will_glob and piece.quoted:
2030 frag = glob_.GlobEscape(piece.s)
2031 else:
2032 # If we have a literal \, then we turn it into \\\\.
2033 # Splitting takes \\\\ -> \\
2034 # Globbing takes \\ to \ if it doesn't match
2035 frag = _BackslashEscape(piece.s)
2036
2037 if piece.do_split:
2038 frag = _BackslashEscape(frag)
2039 else:
2040 frag = self.splitter.Escape(frag)
2041
2042 frags.append(frag)
2043
2044 flat = ''.join(frags)
2045 #log('flat: %r', flat)
2046
2047 args = self.splitter.SplitForWordEval(flat)
2048
2049 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2050 # Add it back and don't bother globbing.
2051 if len(args) == 0 and any_quoted:
2052 argv.append('')
2053 return
2054
2055 #log('split args: %r', args)
2056 for a in args:
2057 if glob_.LooksLikeGlob(a):
2058 n = self.globber.Expand(a, argv)
2059 if n < 0:
2060 # TODO: location info, with span IDs carried through the frame
2061 raise error.FailGlob('Pattern %r matched no files' % a,
2062 loc.Missing)
2063 else:
2064 argv.append(glob_.GlobUnescape(a))
2065
2066 def _EvalWordToArgv(self, w):
2067 # type: (CompoundWord) -> List[str]
2068 """Helper for _EvalAssignBuiltin.
2069
2070 Splitting and globbing are disabled for assignment builtins.
2071
2072 Example: declare -"${a[@]}" b=(1 2)
2073 where a is [x b=a d=a]
2074 """
2075 part_vals = [] # type: List[part_value_t]
2076 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2077 frames = _MakeWordFrames(part_vals)
2078 argv = [] # type: List[str]
2079 for frame in frames:
2080 if len(frame): # empty array gives empty frame!
2081 tmp = [piece.s for piece in frame]
2082 argv.append(''.join(tmp)) # no split or glob
2083 #log('argv: %s', argv)
2084 return argv
2085
2086 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2087 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2088 """Handles both static and dynamic assignment, e.g.
2089
2090 x='foo=bar'
2091 local a=(1 2) $x
2092
2093 Grammar:
2094
2095 ('builtin' | 'command')* keyword flag* pair*
2096 flag = [-+].*
2097
2098 There is also command -p, but we haven't implemented it. Maybe just
2099 punt on it.
2100 """
2101 eval_to_pairs = True # except for -f and -F
2102 started_pairs = False
2103
2104 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2105 flag_locs = [words[0]]
2106 assign_args = [] # type: List[AssignArg]
2107
2108 n = len(words)
2109 for i in xrange(meta_offset + 1, n): # skip first word
2110 w = words[i]
2111
2112 if word_.IsVarLike(w):
2113 started_pairs = True # Everything from now on is an assign_pair
2114
2115 if started_pairs:
2116 left_token, close_token, part_offset = word_.DetectShAssignment(
2117 w)
2118 if left_token: # Detected statically
2119 if left_token.id != Id.Lit_VarLike:
2120 # (not guaranteed since started_pairs is set twice)
2121 e_die('LHS array not allowed in assignment builtin', w)
2122
2123 if lexer.IsPlusEquals(left_token):
2124 var_name = lexer.TokenSliceRight(left_token, -2)
2125 append = True
2126 else:
2127 var_name = lexer.TokenSliceRight(left_token, -1)
2128 append = False
2129
2130 if part_offset == len(w.parts):
2131 rhs = rhs_word.Empty # type: rhs_word_t
2132 else:
2133 # tmp is for intersection of C++/MyPy type systems
2134 tmp = CompoundWord(w.parts[part_offset:])
2135 word_.TildeDetectAssign(tmp)
2136 rhs = tmp
2137
2138 with state.ctx_AssignBuiltin(self.mutable_opts):
2139 right = self.EvalRhsWord(rhs)
2140
2141 arg2 = AssignArg(var_name, right, append, w)
2142 assign_args.append(arg2)
2143
2144 else: # e.g. export $dynamic
2145 argv = self._EvalWordToArgv(w)
2146 for arg in argv:
2147 arg2 = _SplitAssignArg(arg, w)
2148 assign_args.append(arg2)
2149
2150 else:
2151 argv = self._EvalWordToArgv(w)
2152 for arg in argv:
2153 if arg.startswith('-') or arg.startswith('+'):
2154 # e.g. declare -r +r
2155 flags.append(arg)
2156 flag_locs.append(w)
2157
2158 # Shortcut that relies on -f and -F always meaning "function" for
2159 # all assignment builtins
2160 if 'f' in arg or 'F' in arg:
2161 eval_to_pairs = False
2162
2163 else: # e.g. export $dynamic
2164 if eval_to_pairs:
2165 arg2 = _SplitAssignArg(arg, w)
2166 assign_args.append(arg2)
2167 started_pairs = True
2168 else:
2169 flags.append(arg)
2170
2171 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2172
2173 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2174 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2175 builtin_id = consts.LookupAssignBuiltin(arg0)
2176 if builtin_id != consts.NO_INDEX:
2177 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2178 meta_offset)
2179 return None
2180
2181 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2182 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2183 UP_val0 = val0
2184 if val0.tag() == part_value_e.String:
2185 val0 = cast(Piece, UP_val0)
2186 if not val0.quoted:
2187 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2188 return None
2189
2190 def SimpleEvalWordSequence2(self, words, allow_assign):
2191 # type: (List[CompoundWord], bool) -> cmd_value_t
2192 """Simple word evaluation for YSH."""
2193 strs = [] # type: List[str]
2194 locs = [] # type: List[CompoundWord]
2195
2196 meta_offset = 0
2197 for i, w in enumerate(words):
2198 # No globbing in the first arg for command.Simple.
2199 if i == meta_offset and allow_assign:
2200 strs0 = self._EvalWordToArgv(w)
2201 # TODO: Remove this because YSH will disallow assignment
2202 # builtins? (including export?)
2203 if len(strs0) == 1:
2204 cmd_val = self._DetectAssignBuiltinStr(
2205 strs0[0], words, meta_offset)
2206 if cmd_val:
2207 return cmd_val
2208
2209 strs.extend(strs0)
2210 for _ in strs0:
2211 locs.append(w)
2212 continue
2213
2214 if glob_.LooksLikeStaticGlob(w):
2215 val = self.EvalWordToString(w) # respects strict-array
2216 num_appended = self.globber.Expand(val.s, strs)
2217 if num_appended < 0:
2218 raise error.FailGlob('Pattern %r matched no files' % val.s,
2219 w)
2220 for _ in xrange(num_appended):
2221 locs.append(w)
2222 continue
2223
2224 part_vals = [] # type: List[part_value_t]
2225 self._EvalWordToParts(w, part_vals, 0) # not quoted
2226
2227 if 0:
2228 log('')
2229 log('Static: part_vals after _EvalWordToParts:')
2230 for entry in part_vals:
2231 log(' %s', entry)
2232
2233 # Still need to process
2234 frames = _MakeWordFrames(part_vals)
2235
2236 if 0:
2237 log('')
2238 log('Static: frames after _MakeWordFrames:')
2239 for entry in frames:
2240 log(' %s', entry)
2241
2242 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2243 # disallows such expressions at parse time.
2244 for frame in frames:
2245 if len(frame): # empty array gives empty frame!
2246 tmp = [piece.s for piece in frame]
2247 strs.append(''.join(tmp)) # no split or glob
2248 locs.append(w)
2249
2250 return cmd_value.Argv(strs, locs, None, None, None, None)
2251
2252 def EvalWordSequence2(self, words, allow_assign=False):
2253 # type: (List[CompoundWord], bool) -> cmd_value_t
2254 """Turns a list of Words into a list of strings.
2255
2256 Unlike the EvalWord*() methods, it does globbing.
2257
2258 Args:
2259 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2260 """
2261 if self.exec_opts.simple_word_eval():
2262 return self.SimpleEvalWordSequence2(words, allow_assign)
2263
2264 # Parse time:
2265 # 1. brace expansion. TODO: Do at parse time.
2266 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2267 # first WordPart.
2268 #
2269 # Run time:
2270 # 3. tilde sub, var sub, command sub, arith sub. These are all
2271 # "concurrent" on WordParts. (optional process sub with <() )
2272 # 4. word splitting. Can turn this off with a shell option? Definitely
2273 # off for oil.
2274 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2275
2276 #log('W %s', words)
2277 strs = [] # type: List[str]
2278 locs = [] # type: List[CompoundWord]
2279
2280 # 0 for declare x
2281 # 1 for builtin declare x
2282 # 2 for command builtin declare x
2283 # etc.
2284 meta_offset = 0
2285
2286 n = 0
2287 for i, w in enumerate(words):
2288 fast_str = word_.FastStrEval(w)
2289 if fast_str is not None:
2290 strs.append(fast_str)
2291 locs.append(w)
2292
2293 # e.g. the 'local' in 'local a=b c=d' will be here
2294 if allow_assign and i == meta_offset:
2295 cmd_val = self._DetectAssignBuiltinStr(
2296 fast_str, words, meta_offset)
2297 if cmd_val:
2298 return cmd_val
2299
2300 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2301 meta_offset += 1
2302
2303 continue
2304
2305 part_vals = [] # type: List[part_value_t]
2306 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2307
2308 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2309 # change the rest of the evaluation algorithm if so.
2310 #
2311 # We want to allow:
2312 # e=export
2313 # $e foo=bar
2314 #
2315 # But we don't want to evaluate the first word twice in the case of:
2316 # $(some-command) --flag
2317 if len(part_vals) == 1:
2318 if allow_assign and i == meta_offset:
2319 cmd_val = self._DetectAssignBuiltin(
2320 part_vals[0], words, meta_offset)
2321 if cmd_val:
2322 return cmd_val
2323
2324 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2325 meta_offset += 1
2326
2327 if 0:
2328 log('')
2329 log('part_vals after _EvalWordToParts:')
2330 for entry in part_vals:
2331 log(' %s', entry)
2332
2333 frames = _MakeWordFrames(part_vals)
2334 if 0:
2335 log('')
2336 log('frames after _MakeWordFrames:')
2337 for entry in frames:
2338 log(' %s', entry)
2339
2340 # Do splitting and globbing. Each frame will append zero or more args.
2341 for frame in frames:
2342 self._EvalWordFrame(frame, strs)
2343
2344 # Fill in locations parallel to strs.
2345 n_next = len(strs)
2346 for _ in xrange(n_next - n):
2347 locs.append(w)
2348 n = n_next
2349
2350 # A non-assignment command.
2351 # NOTE: Can't look up builtins here like we did for assignment, because
2352 # functions can override builtins.
2353 return cmd_value.Argv(strs, locs, None, None, None, None)
2354
2355 def EvalWordSequence(self, words):
2356 # type: (List[CompoundWord]) -> List[str]
2357 """For arrays and for loops.
2358
2359 They don't allow assignment builtins.
2360 """
2361 UP_cmd_val = self.EvalWordSequence2(words)
2362
2363 assert UP_cmd_val.tag() == cmd_value_e.Argv
2364 cmd_val = cast(cmd_value.Argv, UP_cmd_val)
2365 return cmd_val.argv
2366
2367
2368class NormalWordEvaluator(AbstractWordEvaluator):
2369
2370 def __init__(
2371 self,
2372 mem, # type: state.Mem
2373 exec_opts, # type: optview.Exec
2374 mutable_opts, # type: state.MutableOpts
2375 tilde_ev, # type: TildeEvaluator
2376 splitter, # type: SplitContext
2377 errfmt, # type: ErrorFormatter
2378 ):
2379 # type: (...) -> None
2380 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2381 tilde_ev, splitter, errfmt)
2382 self.shell_ex = None # type: _Executor
2383
2384 def CheckCircularDeps(self):
2385 # type: () -> None
2386 assert self.arith_ev is not None
2387 # Disabled for pure OSH
2388 #assert self.expr_ev is not None
2389 assert self.shell_ex is not None
2390 assert self.prompt_ev is not None
2391
2392 def _EvalCommandSub(self, cs_part, quoted):
2393 # type: (CommandSub, bool) -> part_value_t
2394 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2395
2396 if cs_part.left_token.id == Id.Left_AtParen:
2397 # YSH splitting algorithm: does not depend on IFS
2398 try:
2399 strs = j8.SplitJ8Lines(stdout_str)
2400 except error.Decode as e:
2401 # status code 4 is special, for encode/decode errors.
2402 raise error.Structured(4, e.Message(), cs_part.left_token)
2403
2404 #strs = self.splitter.SplitForWordEval(stdout_str)
2405 return part_value.Array(strs)
2406 else:
2407 return Piece(stdout_str, quoted, not quoted)
2408
2409 def _EvalProcessSub(self, cs_part):
2410 # type: (CommandSub) -> Piece
2411 dev_path = self.shell_ex.RunProcessSub(cs_part)
2412 # pretend it's quoted; no split or glob
2413 return Piece(dev_path, True, False)
2414
2415
2416_DUMMY = '__NO_COMMAND_SUB__'
2417
2418
2419class CompletionWordEvaluator(AbstractWordEvaluator):
2420 """An evaluator that has no access to an executor.
2421
2422 NOTE: core/completion.py doesn't actually try to use these strings to
2423 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2424 inner command as the last one, and knows that it is not at the end of the
2425 line.
2426 """
2427
2428 def __init__(
2429 self,
2430 mem, # type: state.Mem
2431 exec_opts, # type: optview.Exec
2432 mutable_opts, # type: state.MutableOpts
2433 tilde_ev, # type: TildeEvaluator
2434 splitter, # type: SplitContext
2435 errfmt, # type: ErrorFormatter
2436 ):
2437 # type: (...) -> None
2438 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2439 tilde_ev, splitter, errfmt)
2440
2441 def CheckCircularDeps(self):
2442 # type: () -> None
2443 assert self.prompt_ev is not None
2444 assert self.arith_ev is not None
2445 assert self.expr_ev is not None
2446
2447 def _EvalCommandSub(self, cs_part, quoted):
2448 # type: (CommandSub, bool) -> part_value_t
2449 if cs_part.left_token.id == Id.Left_AtParen:
2450 return part_value.Array([_DUMMY])
2451 else:
2452 return Piece(_DUMMY, quoted, not quoted)
2453
2454 def _EvalProcessSub(self, cs_part):
2455 # type: (CommandSub) -> Piece
2456 # pretend it's quoted; no split or glob
2457 return Piece('__NO_PROCESS_SUB__', True, False)
2458
2459
2460# vim: sw=4