osh/word_eval.py

OILS / osh / word_eval.py View on Github | oilshell.org

2436 lines, 1475 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	AssignArg,
37	a_index,
38	a_index_e,
39	VTestPlace,
40	VarSubState,
41	Piece,
42	)
43	from _devbuild.gen.option_asdl import option_i, builtin_i
44	from _devbuild.gen.value_asdl import (
45	value,
46	value_e,
47	value_t,
48	sh_lvalue,
49	sh_lvalue_t,
50	)
51	from core import error
52	from core import pyos
53	from core import pyutil
54	from core import state
55	from core import ui
56	from core import util
57	from data_lang import j8
58	from data_lang import j8_lite
59	from core.error import e_die
60	from frontend import consts
61	from frontend import lexer
62	from frontend import location
63	from mycpp import mops
64	from mycpp.mylib import log, tagswitch, NewDict
65	from osh import braces
66	from osh import glob_
67	from osh import string_ops
68	from osh import word_
69	from ysh import expr_eval
70	from ysh import val_ops
71
72	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
73
74	if TYPE_CHECKING:
75	from _devbuild.gen.syntax_asdl import word_part_t
76	from _devbuild.gen.option_asdl import builtin_t
77	from core import optview
78	from core.state import Mem
79	from core.ui import ErrorFormatter
80	from core.vm import _Executor
81	from osh.split import SplitContext
82	from osh import prompt
83	from osh import sh_expr_eval
84
85	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
86	QUOTED = 1 << 0
87	IS_SUBST = 1 << 1
88
89	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
90	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
91	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
92
93	# For EvalWordToString
94	QUOTE_FNMATCH = 1 << 5
95	QUOTE_ERE = 1 << 6
96
97	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
98	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
99	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
100
101
102	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
103	# type: (str, optview.Exec, bool) -> bool
104	"""Return whether we should allow ${a} to mean ${a[0]}."""
105	return (not exec_opts.strict_array() or
106	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
107
108
109	def DecayArray(val):
110	# type: (value_t) -> value_t
111	"""Resolve ${array} to ${array[0]}."""
112	if val.tag() == value_e.BashArray:
113	array_val = cast(value.BashArray, val)
114	s = array_val.strs[0] if len(array_val.strs) else None
115	elif val.tag() == value_e.BashAssoc:
116	assoc_val = cast(value.BashAssoc, val)
117	s = assoc_val.d['0'] if '0' in assoc_val.d else None
118	else:
119	raise AssertionError(val.tag())
120
121	if s is None:
122	return value.Undef
123	else:
124	return value.Str(s)
125
126
127	def GetArrayItem(strs, index):
128	# type: (List[str], int) -> Optional[str]
129
130	n = len(strs)
131	if index < 0:
132	index += n
133
134	if 0 <= index and index < n:
135	# TODO: strs->index() has a redundant check for (i < 0)
136	s = strs[index]
137	# note: s could be None because representation is sparse
138	else:
139	s = None
140	return s
141
142
143	def _DetectMetaBuiltinStr(s):
144	# type: (str) -> bool
145	"""
146	We need to detect all of these cases:
147
148	builtin local
149	command local
150	builtin builtin local
151	builtin command local
152
153	Fundamentally, assignment builtins have different WORD EVALUATION RULES
154	for a=$x (no word splitting), so it seems hard to do this in
155	meta_osh.Builtin() or meta_osh.Command()
156	"""
157	return (consts.LookupNormalBuiltin(s)
158	in (builtin_i.builtin, builtin_i.command))
159
160
161	def _DetectMetaBuiltin(val0):
162	# type: (part_value_t) -> bool
163	UP_val0 = val0
164	if val0.tag() == part_value_e.String:
165	val0 = cast(Piece, UP_val0)
166	if not val0.quoted:
167	return _DetectMetaBuiltinStr(val0.s)
168	return False
169
170
171	def _SplitAssignArg(arg, blame_word):
172	# type: (str, CompoundWord) -> AssignArg
173	"""Dynamically parse argument to declare, export, etc.
174
175	This is a fallback to the static parsing done below.
176	"""
177	# Note: it would be better to cache regcomp(), but we don't have an API for
178	# that, and it probably isn't a bottleneck now
179	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
180	if m is None:
181	e_die("Assignment builtin expected NAME=value, got %r" % arg,
182	blame_word)
183
184	var_name = m[1]
185	# m[2] is used for grouping; ERE doesn't have non-capturing groups
186
187	op = m[3]
188	assert op is not None, op
189	if len(op): # declare NAME=
190	val = value.Str(m[4]) # type: Optional[value_t]
191	append = op[0] == '+'
192	else: # declare NAME
193	val = None # no operator
194	append = False
195
196	return AssignArg(var_name, val, append, blame_word)
197
198
199	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
200	def _BackslashEscape(s):
201	# type: (str) -> str
202	"""Double up backslashes.
203
204	Useful for strings about to be globbed and strings about to be IFS
205	escaped.
206	"""
207	return s.replace('\\', '\\\\')
208
209
210	def _ValueToPartValue(val, quoted, part_loc):
211	# type: (value_t, bool, word_part_t) -> part_value_t
212	"""Helper for VarSub evaluation.
213
214	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
215	"""
216	UP_val = val
217
218	with tagswitch(val) as case:
219	if case(value_e.Undef):
220	# This happens in the case of ${undef+foo}. We skipped _EmptyStrOrError,
221	# but we have to append to the empty string.
222	return Piece('', quoted, not quoted)
223
224	elif case(value_e.Str):
225	val = cast(value.Str, UP_val)
226	return Piece(val.s, quoted, not quoted)
227
228	elif case(value_e.BashArray):
229	val = cast(value.BashArray, UP_val)
230	return part_value.Array(val.strs)
231
232	elif case(value_e.BashAssoc):
233	val = cast(value.BashAssoc, UP_val)
234	# bash behavior: splice values!
235	return part_value.Array(val.d.values())
236
237	# Cases added for YSH
238	# value_e.List is also here - we use val_ops.stringify()s err message
239	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
240	value_e.Eggex, value_e.List):
241	s = val_ops.Stringify(val, loc.Missing)
242	return Piece(s, quoted, not quoted)
243
244	else:
245	raise error.TypeErr(val, "Can't substitute into word",
246	loc.WordPart(part_loc))
247
248	raise AssertionError('for -Wreturn-type in C++')
249
250
251	def _MakeWordFrames(part_vals):
252	# type: (List[part_value_t]) -> List[List[Piece]]
253	"""A word evaluates to a flat list of part_value (String or Array). frame
254	is a portion that results in zero or more args. It can never be joined.
255	This idea exists because of arrays like "$@" and "${a[@]}".
256
257	Example:
258
259	a=(1 '2 3' 4)
260	x=x
261	y=y
262
263	# This word
264	$x"${a[@]}"$y
265
266	# Results in Three frames:
267	[ ('x', False, True), ('1', True, False) ]
268	[ ('2 3', True, False) ]
269	[ ('4', True, False), ('y', False, True) ]
270
271	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
272	should make that top level type.
273
274	TODO:
275	- Instead of List[List[Piece]], where List[Piece] is a Frame
276	- Change this representation to
277	Frames = (List[Piece] pieces, List[int] break_indices)
278	# where break_indices are the end
279
280	Consider a common case like "$x" or "${x}" - I think this a lot more
281	efficient?
282
283	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
284	"""
285	current = [] # type: List[Piece]
286	frames = [current]
287
288	for p in part_vals:
289	UP_p = p
290
291	with tagswitch(p) as case:
292	if case(part_value_e.String):
293	p = cast(Piece, UP_p)
294	current.append(p)
295
296	elif case(part_value_e.Array):
297	p = cast(part_value.Array, UP_p)
298
299	is_first = True
300	for s in p.strs:
301	if s is None:
302	continue # ignore undefined array entries
303
304	# Arrays parts are always quoted; otherwise they would have decayed to
305	# a string.
306	piece = Piece(s, True, False)
307	if is_first:
308	current.append(piece)
309	is_first = False
310	else:
311	current = [piece]
312	frames.append(current) # singleton frame
313
314	else:
315	raise AssertionError()
316
317	return frames
318
319
320	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
321	def _DecayPartValuesToString(part_vals, join_char):
322	# type: (List[part_value_t], str) -> str
323	# Decay ${a=x"$@"x} to string.
324	out = [] # type: List[str]
325	for p in part_vals:
326	UP_p = p
327	with tagswitch(p) as case:
328	if case(part_value_e.String):
329	p = cast(Piece, UP_p)
330	out.append(p.s)
331	elif case(part_value_e.Array):
332	p = cast(part_value.Array, UP_p)
333	# TODO: Eliminate double join for speed?
334	tmp = [s for s in p.strs if s is not None]
335	out.append(join_char.join(tmp))
336	else:
337	raise AssertionError()
338	return ''.join(out)
339
340
341	def _PerformSlice(
342	val, # type: value_t
343	begin, # type: int
344	length, # type: int
345	has_length, # type: bool
346	part, # type: BracedVarSub
347	arg0_val, # type: value.Str
348	):
349	# type: (...) -> value_t
350	UP_val = val
351	with tagswitch(val) as case:
352	if case(value_e.Str): # Slice UTF-8 characters in a string.
353	val = cast(value.Str, UP_val)
354	s = val.s
355	n = len(s)
356
357	if begin < 0: # Compute offset with unicode
358	byte_begin = n
359	num_iters = -begin
360	for _ in xrange(num_iters):
361	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
362	else:
363	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
364
365	if has_length:
366	if length < 0: # Compute offset with unicode
367	# Confusing: this is a POSITION
368	byte_end = n
369	num_iters = -length
370	for _ in xrange(num_iters):
371	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
372	else:
373	byte_end = string_ops.AdvanceUtf8Chars(
374	s, length, byte_begin)
375	else:
376	byte_end = len(s)
377
378	substr = s[byte_begin:byte_end]
379	result = value.Str(substr) # type: value_t
380
381	elif case(value_e.BashArray): # Slice array entries.
382	val = cast(value.BashArray, UP_val)
383	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
384	# strings.
385	if has_length and length < 0:
386	e_die("Array slice can't have negative length: %d" % length,
387	loc.WordPart(part))
388
389	# Quirk: "begin" for positional arguments ($@ and $*) counts $0.
390	if arg0_val is not None:
391	orig = [arg0_val.s]
392	orig.extend(val.strs)
393	else:
394	orig = val.strs
395
396	n = len(orig)
397	if begin < 0:
398	i = n + begin # ${@:-3} starts counts from the end
399	else:
400	i = begin
401	strs = [] # type: List[str]
402	count = 0
403	while i < n:
404	if has_length and count == length: # length could be 0
405	break
406	s = orig[i]
407	if s is not None: # Unset elements don't count towards the length
408	strs.append(s)
409	count += 1
410	i += 1
411
412	result = value.BashArray(strs)
413
414	elif case(value_e.BashAssoc):
415	e_die("Can't slice associative arrays", loc.WordPart(part))
416
417	else:
418	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
419	loc.WordPart(part))
420
421	return result
422
423
424	class StringWordEvaluator(object):
425	"""Interface used by ArithEvaluator / BoolEvaluator"""
426
427	def __init__(self):
428	# type: () -> None
429	"""Empty constructor for mycpp."""
430	pass
431
432	def EvalWordToString(self, w, eval_flags=0):
433	# type: (word_t, int) -> value.Str
434	raise NotImplementedError()
435
436
437	def _GetDollarHyphen(exec_opts):
438	# type: (optview.Exec) -> str
439	chars = [] # type: List[str]
440	if exec_opts.interactive():
441	chars.append('i')
442
443	if exec_opts.errexit():
444	chars.append('e')
445	if exec_opts.noglob():
446	chars.append('f')
447	if exec_opts.noexec():
448	chars.append('n')
449	if exec_opts.nounset():
450	chars.append('u')
451	# NO letter for pipefail?
452	if exec_opts.xtrace():
453	chars.append('x')
454	if exec_opts.noclobber():
455	chars.append('C')
456
457	# bash has:
458	# - c for sh -c, i for sh -i (mksh also has this)
459	# - h for hashing (mksh also has this)
460	# - B for brace expansion
461	return ''.join(chars)
462
463
464	class TildeEvaluator(object):
465
466	def __init__(self, mem, exec_opts):
467	# type: (Mem, optview.Exec) -> None
468	self.mem = mem
469	self.exec_opts = exec_opts
470
471	def GetMyHomeDir(self):
472	# type: () -> Optional[str]
473	"""Consult $HOME first, and then make a libc call.
474
475	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
476	#1578.
477	"""
478	# First look up the HOME var, then ask the OS. This is what bash does.
479	val = self.mem.GetValue('HOME')
480	UP_val = val
481	if val.tag() == value_e.Str:
482	val = cast(value.Str, UP_val)
483	return val.s
484	return pyos.GetMyHomeDir()
485
486	def Eval(self, part):
487	# type: (word_part.TildeSub) -> str
488	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
489
490	if part.user_name is None:
491	result = self.GetMyHomeDir()
492	else:
493	result = pyos.GetHomeDir(part.user_name)
494
495	if result is None:
496	if self.exec_opts.strict_tilde():
497	e_die("Error expanding tilde (e.g. invalid user)", part.left)
498	else:
499	# Return ~ or ~user literally
500	result = '~'
501	if part.user_name is not None:
502	result = result + part.user_name # mycpp doesn't have +=
503
504	return result
505
506
507	class AbstractWordEvaluator(StringWordEvaluator):
508	"""Abstract base class for word evaluators.
509
510	Public entry points:
511	EvalWordToString EvalForPlugin EvalRhsWord
512	EvalWordSequence EvalWordSequence2
513	"""
514
515	def __init__(
516	self,
517	mem, # type: state.Mem
518	exec_opts, # type: optview.Exec
519	mutable_opts, # type: state.MutableOpts
520	tilde_ev, # type: TildeEvaluator
521	splitter, # type: SplitContext
522	errfmt, # type: ui.ErrorFormatter
523	):
524	# type: (...) -> None
525	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
526	self.expr_ev = None # type: expr_eval.ExprEvaluator
527	self.prompt_ev = None # type: prompt.Evaluator
528
529	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
530
531	self.tilde_ev = tilde_ev
532
533	self.mem = mem # for $HOME, $1, etc.
534	self.exec_opts = exec_opts # for nounset
535	self.mutable_opts = mutable_opts # for _allow_command_sub
536	self.splitter = splitter
537	self.errfmt = errfmt
538
539	self.globber = glob_.Globber(exec_opts)
540
541	def CheckCircularDeps(self):
542	# type: () -> None
543	raise NotImplementedError()
544
545	def _EvalCommandSub(self, cs_part, quoted):
546	# type: (CommandSub, bool) -> part_value_t
547	"""Abstract since it has a side effect."""
548	raise NotImplementedError()
549
550	def _EvalProcessSub(self, cs_part):
551	# type: (CommandSub) -> part_value_t
552	"""Abstract since it has a side effect."""
553	raise NotImplementedError()
554
555	def _EvalVarNum(self, var_num):
556	# type: (int) -> value_t
557	assert var_num >= 0
558	return self.mem.GetArgNum(var_num)
559
560	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
561	# type: (int, bool, VarSubState) -> value_t
562	"""Evaluate $?
563
564	and so forth
565	"""
566	# $@ is special -- it need to know whether it is in a double quoted
567	# context.
568	#
569	# - If it's $@ in a double quoted context, return an ARRAY.
570	# - If it's $@ in a normal context, return a STRING, which then will be
571	# subject to splitting.
572
573	if op_id in (Id.VSub_At, Id.VSub_Star):
574	argv = self.mem.GetArgv()
575	val = value.BashArray(argv) # type: value_t
576	if op_id == Id.VSub_At:
577	# "$@" evaluates to an array, $@ should be decayed
578	vsub_state.join_array = not quoted
579	else: # $* "$*" are both decayed
580	vsub_state.join_array = True
581
582	elif op_id == Id.VSub_Hyphen:
583	val = value.Str(_GetDollarHyphen(self.exec_opts))
584
585	else:
586	val = self.mem.GetSpecialVar(op_id)
587
588	return val
589
590	def _ApplyTestOp(
591	self,
592	val, # type: value_t
593	op, # type: suffix_op.Unary
594	quoted, # type: bool
595	part_vals, # type: Optional[List[part_value_t]]
596	vtest_place, # type: VTestPlace
597	blame_token, # type: Token
598	):
599	# type: (...) -> bool
600	"""
601	Returns:
602	Whether part_vals was mutated
603
604	${a:-} returns part_value[]
605	${a:+} returns part_value[]
606	${a:?error} returns error word?
607	${a:=} returns part_value[] but also needs self.mem for side effects.
608
609	So I guess it should return part_value[], and then a flag for raising an
610	error, and then a flag for assigning it?
611	The original BracedVarSub will have the name.
612
613	Example of needing multiple part_value[]
614
615	echo X-${a:-'def'"ault"}-X
616
617	We return two part values from the BracedVarSub. Also consider:
618
619	echo ${a:-x"$@"x}
620	"""
621	eval_flags = IS_SUBST
622	if quoted:
623	eval_flags \|= QUOTED
624
625	tok = op.op
626	# NOTE: Splicing part_values is necessary because of code like
627	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
628	# do_glob/do_elide setting.
629	UP_val = val
630	with tagswitch(val) as case:
631	if case(value_e.Undef):
632	is_falsey = True
633
634	elif case(value_e.Str):
635	val = cast(value.Str, UP_val)
636	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
637	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
638	is_falsey = len(val.s) == 0
639	else:
640	is_falsey = False
641
642	elif case(value_e.BashArray):
643	val = cast(value.BashArray, UP_val)
644	# TODO: allow undefined
645	is_falsey = len(val.strs) == 0
646
647	elif case(value_e.BashAssoc):
648	val = cast(value.BashAssoc, UP_val)
649	is_falsey = len(val.d) == 0
650
651	else:
652	# value.Eggex, etc. are all false
653	is_falsey = False
654
655	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
656	if is_falsey:
657	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
658	return True
659	else:
660	return False
661
662	# Inverse of the above.
663	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
664	if is_falsey:
665	return False
666	else:
667	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
668	return True
669
670	# Splice and assign
671	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
672	if is_falsey:
673	# Collect new part vals.
674	assign_part_vals = [] # type: List[part_value_t]
675	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
676	eval_flags)
677	# Append them to out param AND return them.
678	part_vals.extend(assign_part_vals)
679
680	if vtest_place.name is None:
681	# TODO: error context
682	e_die("Can't assign to special variable")
683	else:
684	# NOTE: This decays arrays too! 'shopt -s strict_array' could
685	# avoid it.
686	rhs_str = _DecayPartValuesToString(
687	assign_part_vals, self.splitter.GetJoinChar())
688	if vtest_place.index is None: # using None when no index
689	lval = location.LName(
690	vtest_place.name) # type: sh_lvalue_t
691	else:
692	var_name = vtest_place.name
693	var_index = vtest_place.index
694	UP_var_index = var_index
695
696	with tagswitch(var_index) as case:
697	if case(a_index_e.Int):
698	var_index = cast(a_index.Int, UP_var_index)
699	lval = sh_lvalue.Indexed(
700	var_name, var_index.i, loc.Missing)
701	elif case(a_index_e.Str):
702	var_index = cast(a_index.Str, UP_var_index)
703	lval = sh_lvalue.Keyed(var_name, var_index.s,
704	loc.Missing)
705	else:
706	raise AssertionError()
707
708	state.OshLanguageSetValue(self.mem, lval,
709	value.Str(rhs_str))
710	return True
711
712	else:
713	return False
714
715	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
716	if is_falsey:
717	# The arg is the error message
718	error_part_vals = [] # type: List[part_value_t]
719	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
720	eval_flags)
721	error_str = _DecayPartValuesToString(
722	error_part_vals, self.splitter.GetJoinChar())
723
724	#
725	# Display fancy/helpful error
726	#
727	if vtest_place.name is None:
728	var_name = '???'
729	else:
730	var_name = vtest_place.name
731
732	if 0:
733	# This hint is nice, but looks too noisy for now
734	op_str = lexer.LazyStr(tok)
735	if tok.id == Id.VTest_ColonQMark:
736	why = 'empty or unset'
737	else:
738	why = 'unset'
739
740	self.errfmt.Print_(
741	"Hint: operator %s means a variable can't be %s" %
742	(op_str, why), tok)
743
744	if val.tag() == value_e.Undef:
745	actual = 'unset'
746	else:
747	actual = 'empty'
748
749	if len(error_str):
750	suffix = ': %r' % error_str
751	else:
752	suffix = ''
753	e_die("Var %s is %s%s" % (var_name, actual, suffix),
754	blame_token)
755
756	else:
757	return False
758
759	else:
760	raise AssertionError(tok.id)
761
762	def _Length(self, val, token):
763	# type: (value_t, Token) -> int
764	"""Returns the length of the value, for ${#var}"""
765	UP_val = val
766	with tagswitch(val) as case:
767	if case(value_e.Str):
768	val = cast(value.Str, UP_val)
769	# NOTE: Whether bash counts bytes or chars is affected by LANG
770	# environment variables.
771	# Should we respect that, or another way to select? set -o
772	# count-bytes?
773
774	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
775	try:
776	length = string_ops.CountUtf8Chars(val.s)
777	except error.Strict as e:
778	# Add this here so we don't have to add it so far down the stack.
779	# TODO: It's better to show BOTH this CODE an the actual DATA
780	# somehow.
781	e.location = token
782
783	if self.exec_opts.strict_word_eval():
784	raise
785	else:
786	# NOTE: Doesn't make the command exit with 1; it just returns a
787	# length of -1.
788	self.errfmt.PrettyPrintError(e, prefix='warning: ')
789	return -1
790
791	elif case(value_e.BashArray):
792	val = cast(value.BashArray, UP_val)
793	# There can be empty placeholder values in the array.
794	length = 0
795	for s in val.strs:
796	if s is not None:
797	length += 1
798
799	elif case(value_e.BashAssoc):
800	val = cast(value.BashAssoc, UP_val)
801	length = len(val.d)
802
803	else:
804	raise error.TypeErr(
805	val, "Length op expected Str, BashArray, BashAssoc", token)
806
807	return length
808
809	def _Keys(self, val, token):
810	# type: (value_t, Token) -> value_t
811	"""Return keys of a container, for ${!array[@]}"""
812
813	UP_val = val
814	with tagswitch(val) as case:
815	if case(value_e.BashArray):
816	val = cast(value.BashArray, UP_val)
817	# translation issue: tuple indices not supported in list comprehensions
818	#indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
819	indices = [] # type: List[str]
820	for i, s in enumerate(val.strs):
821	if s is not None:
822	indices.append(str(i))
823	return value.BashArray(indices)
824
825	elif case(value_e.BashAssoc):
826	val = cast(value.BashAssoc, UP_val)
827	assert val.d is not None # for MyPy, so it's not Optional[]
828
829	# BUG: Keys aren't ordered according to insertion!
830	return value.BashArray(val.d.keys())
831
832	else:
833	raise error.TypeErr(val, 'Keys op expected Str', token)
834
835	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
836	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
837	"""Handles indirect expansion like ${!var} and ${!a[0]}.
838
839	Args:
840	blame_tok: 'foo' for ${!foo}
841	"""
842	UP_val = val
843	with tagswitch(val) as case:
844	if case(value_e.Undef):
845	return value.Undef # ${!undef} is just weird bash behavior
846
847	elif case(value_e.Str):
848	val = cast(value.Str, UP_val)
849	bvs_part = self.unsafe_arith.ParseVarRef(val.s, blame_tok)
850	return self._VarRefValue(bvs_part, quoted, vsub_state,
851	vtest_place)
852
853	elif case(value_e.BashArray): # caught earlier but OK
854	e_die('Indirect expansion of array')
855
856	elif case(value_e.BashAssoc): # caught earlier but OK
857	e_die('Indirect expansion of assoc array')
858
859	else:
860	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
861
862	def _ApplyUnarySuffixOp(self, val, op):
863	# type: (value_t, suffix_op.Unary) -> value_t
864	assert val.tag() != value_e.Undef
865
866	op_kind = consts.GetKind(op.op.id)
867
868	if op_kind == Kind.VOp1:
869	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
870	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
871	# shortcut for constant strings.
872	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
873	assert arg_val.tag() == value_e.Str
874
875	UP_val = val
876	with tagswitch(val) as case:
877	if case(value_e.Str):
878	val = cast(value.Str, UP_val)
879	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
880	has_extglob)
881	#log('%r %r -> %r', val.s, arg_val.s, s)
882	new_val = value.Str(s) # type: value_t
883
884	elif case(value_e.BashArray):
885	val = cast(value.BashArray, UP_val)
886	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
887	strs = [] # type: List[str]
888	for s in val.strs:
889	if s is not None:
890	strs.append(
891	string_ops.DoUnarySuffixOp(
892	s, op.op, arg_val.s, has_extglob))
893	new_val = value.BashArray(strs)
894
895	elif case(value_e.BashAssoc):
896	val = cast(value.BashAssoc, UP_val)
897	strs = []
898	for s in val.d.values():
899	strs.append(
900	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
901	has_extglob))
902	new_val = value.BashArray(strs)
903
904	else:
905	raise error.TypeErr(
906	val, 'Unary op expected Str, BashArray, BashAssoc',
907	op.op)
908
909	else:
910	raise AssertionError(Kind_str(op_kind))
911
912	return new_val
913
914	def _PatSub(self, val, op):
915	# type: (value_t, suffix_op.PatSub) -> value_t
916
917	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
918	# Extended globs aren't supported because we only translate * ? etc. to
919	# ERE. I don't think there's a straightforward translation from !(*.py) to
920	# ERE! You would need an engine that supports negation? (Derivatives?)
921	if has_extglob:
922	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
923
924	if op.replace:
925	replace_val = self.EvalRhsWord(op.replace)
926	# Can't have an array, so must be a string
927	assert replace_val.tag() == value_e.Str, replace_val
928	replace_str = cast(value.Str, replace_val).s
929	else:
930	replace_str = ''
931
932	# note: doesn't support self.exec_opts.extglob()!
933	regex, warnings = glob_.GlobToERE(pat_val.s)
934	if len(warnings):
935	# TODO:
936	# - Add 'shopt -s strict_glob' mode and expose warnings.
937	# "Glob is not in CANONICAL FORM".
938	# - Propagate location info back to the 'op.pat' word.
939	pass
940	#log('regex %r', regex)
941	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
942
943	with tagswitch(val) as case2:
944	if case2(value_e.Str):
945	str_val = cast(value.Str, val)
946	s = replacer.Replace(str_val.s, op)
947	val = value.Str(s)
948
949	elif case2(value_e.BashArray):
950	array_val = cast(value.BashArray, val)
951	strs = [] # type: List[str]
952	for s in array_val.strs:
953	if s is not None:
954	strs.append(replacer.Replace(s, op))
955	val = value.BashArray(strs)
956
957	elif case2(value_e.BashAssoc):
958	assoc_val = cast(value.BashAssoc, val)
959	strs = []
960	for s in assoc_val.d.values():
961	strs.append(replacer.Replace(s, op))
962	val = value.BashArray(strs)
963
964	else:
965	raise error.TypeErr(
966	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
967	op.slash_tok)
968
969	return val
970
971	def _Slice(self, val, op, var_name, part):
972	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
973
974	begin = self.arith_ev.EvalToInt(op.begin)
975
976	# Note: bash allows lengths to be negative (with odd semantics), but
977	# we don't allow that right now.
978	has_length = False
979	length = -1
980	if op.length:
981	has_length = True
982	length = self.arith_ev.EvalToInt(op.length)
983
984	try:
985	arg0_val = None # type: value.Str
986	if var_name is None: # $* or $@
987	arg0_val = self.mem.GetArg0()
988	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
989	except error.Strict as e:
990	if self.exec_opts.strict_word_eval():
991	raise
992	else:
993	self.errfmt.PrettyPrintError(e, prefix='warning: ')
994	with tagswitch(val) as case2:
995	if case2(value_e.Str):
996	val = value.Str('')
997	elif case2(value_e.BashArray):
998	val = value.BashArray([])
999	else:
1000	raise NotImplementedError()
1001	return val
1002
1003	def _Nullary(self, val, op, var_name):
1004	# type: (value_t, Token, Optional[str]) -> Tuple[value.Str, bool]
1005
1006	UP_val = val
1007	quoted2 = False
1008	op_id = op.id
1009	if op_id == Id.VOp0_P:
1010	with tagswitch(val) as case:
1011	if case(value_e.Str):
1012	str_val = cast(value.Str, UP_val)
1013	prompt = self.prompt_ev.EvalPrompt(str_val)
1014	# readline gets rid of these, so we should too.
1015	p = prompt.replace('\x01', '').replace('\x02', '')
1016	result = value.Str(p)
1017	else:
1018	e_die("Can't use @P on %s" % ui.ValType(val), op)
1019
1020	elif op_id == Id.VOp0_Q:
1021	with tagswitch(val) as case:
1022	if case(value_e.Str):
1023	str_val = cast(value.Str, UP_val)
1024	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1025	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1026	# bash
1027	quoted2 = True
1028	elif case(value_e.BashArray):
1029	array_val = cast(value.BashArray, UP_val)
1030
1031	# TODO: should use fastfunc.ShellEncode
1032	tmp = [j8_lite.MaybeShellEncode(s) for s in array_val.strs]
1033	result = value.Str(' '.join(tmp))
1034	else:
1035	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1036
1037	elif op_id == Id.VOp0_a:
1038	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1039	# spec/ble-idioms.test.sh.
1040	chars = [] # type: List[str]
1041	with tagswitch(val) as case:
1042	if case(value_e.BashArray):
1043	chars.append('a')
1044	elif case(value_e.BashAssoc):
1045	chars.append('A')
1046
1047	if var_name is not None: # e.g. ${?@a} is allowed
1048	cell = self.mem.GetCell(var_name)
1049	if cell:
1050	if cell.readonly:
1051	chars.append('r')
1052	if cell.exported:
1053	chars.append('x')
1054	if cell.nameref:
1055	chars.append('n')
1056
1057	result = value.Str(''.join(chars))
1058
1059	else:
1060	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1061
1062	return result, quoted2
1063
1064	def _WholeArray(self, val, part, quoted, vsub_state):
1065	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1066	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1067
1068	if op_id == Id.Lit_At:
1069	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1070	UP_val = val
1071	with tagswitch(val) as case2:
1072	if case2(value_e.Undef):
1073	if not vsub_state.has_test_op:
1074	val = self._EmptyBashArrayOrError(part.token)
1075	elif case2(value_e.Str):
1076	if self.exec_opts.strict_array():
1077	e_die("Can't index string with @", loc.WordPart(part))
1078	elif case2(value_e.BashArray):
1079	pass # no-op
1080
1081	elif op_id == Id.Arith_Star:
1082	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1083	UP_val = val
1084	with tagswitch(val) as case2:
1085	if case2(value_e.Undef):
1086	if not vsub_state.has_test_op:
1087	val = self._EmptyBashArrayOrError(part.token)
1088	elif case2(value_e.Str):
1089	if self.exec_opts.strict_array():
1090	e_die("Can't index string with *", loc.WordPart(part))
1091	elif case2(value_e.BashArray):
1092	pass # no-op
1093
1094	else:
1095	raise AssertionError(op_id) # unknown
1096
1097	return val
1098
1099	def _ArrayIndex(self, val, part, vtest_place):
1100	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1101	"""Process a numeric array index like ${a[i+1]}"""
1102	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1103
1104	UP_val = val
1105	with tagswitch(val) as case2:
1106	if case2(value_e.Undef):
1107	pass # it will be checked later
1108
1109	elif case2(value_e.Str):
1110	# Bash treats any string as an array, so we can't add our own
1111	# behavior here without making valid OSH invalid bash.
1112	e_die("Can't index string %r with integer" % part.var_name,
1113	part.token)
1114
1115	elif case2(value_e.BashArray):
1116	array_val = cast(value.BashArray, UP_val)
1117	index = self.arith_ev.EvalToInt(anode)
1118	vtest_place.index = a_index.Int(index)
1119
1120	s = GetArrayItem(array_val.strs, index)
1121
1122	if s is None:
1123	val = value.Undef
1124	else:
1125	val = value.Str(s)
1126
1127	elif case2(value_e.BashAssoc):
1128	assoc_val = cast(value.BashAssoc, UP_val)
1129	# Location could also be attached to bracket_op? But
1130	# arith_expr.VarSub works OK too
1131	key = self.arith_ev.EvalWordToString(
1132	anode, blame_loc=location.TokenForArith(anode))
1133
1134	vtest_place.index = a_index.Str(key) # out param
1135	s = assoc_val.d.get(key)
1136
1137	if s is None:
1138	val = value.Undef
1139	else:
1140	val = value.Str(s)
1141
1142	else:
1143	raise error.TypeErr(val,
1144	'Index op expected BashArray, BashAssoc',
1145	loc.WordPart(part))
1146
1147	return val
1148
1149	def _EvalDoubleQuoted(self, parts, part_vals):
1150	# type: (List[word_part_t], List[part_value_t]) -> None
1151	"""Evaluate parts of a DoubleQuoted part.
1152
1153	Args:
1154	part_vals: output param to append to.
1155	"""
1156	# Example of returning array:
1157	# $ a=(1 2); b=(3); $ c=(4 5)
1158	# $ argv "${a[@]}${b[@]}${c[@]}"
1159	# ['1', '234', '5']
1160	#
1161	# Example of multiple parts
1162	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1163	# ['1', '24', '5']
1164
1165	# Special case for "". The parser outputs (DoubleQuoted []), instead
1166	# of (DoubleQuoted [Literal '']). This is better but it means we
1167	# have to check for it.
1168	if len(parts) == 0:
1169	v = Piece('', True, False)
1170	part_vals.append(v)
1171	return
1172
1173	for p in parts:
1174	self._EvalWordPart(p, part_vals, QUOTED)
1175
1176	def EvalDoubleQuotedToString(self, dq_part):
1177	# type: (DoubleQuoted) -> str
1178	"""For double quoted strings in YSH expressions.
1179
1180	Example: var x = "$foo-${foo}"
1181	"""
1182	part_vals = [] # type: List[part_value_t]
1183	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1184	return self._ConcatPartVals(part_vals, dq_part.left)
1185
1186	def _DecayArray(self, val):
1187	# type: (value.BashArray) -> value.Str
1188	"""Decay $* to a string."""
1189	assert val.tag() == value_e.BashArray, val
1190	sep = self.splitter.GetJoinChar()
1191	tmp = [s for s in val.strs if s is not None]
1192	return value.Str(sep.join(tmp))
1193
1194	def _EmptyStrOrError(self, val, token):
1195	# type: (value_t, Token) -> value_t
1196	if val.tag() != value_e.Undef:
1197	return val
1198
1199	if not self.exec_opts.nounset():
1200	return value.Str('')
1201
1202	tok_str = lexer.TokenVal(token)
1203	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1204	e_die('Undefined variable %r' % name, token)
1205
1206	def _EmptyBashArrayOrError(self, token):
1207	# type: (Token) -> value_t
1208	assert token is not None
1209	if self.exec_opts.nounset():
1210	e_die('Undefined array %r' % lexer.TokenVal(token), token)
1211	else:
1212	return value.BashArray([])
1213
1214	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1215	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1216
1217	if part.bracket_op:
1218	with tagswitch(part.bracket_op) as case:
1219	if case(bracket_op_e.WholeArray):
1220	val = self._WholeArray(val, part, quoted, vsub_state)
1221
1222	elif case(bracket_op_e.ArrayIndex):
1223	val = self._ArrayIndex(val, part, vtest_place)
1224
1225	else:
1226	raise AssertionError(part.bracket_op.tag())
1227
1228	else: # no bracket op
1229	var_name = vtest_place.name
1230	if (var_name is not None and
1231	val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1232	not vsub_state.is_type_query):
1233	if ShouldArrayDecay(var_name, self.exec_opts,
1234	not (part.prefix_op or part.suffix_op)):
1235	# for ${BASH_SOURCE}, etc.
1236	val = DecayArray(val)
1237	else:
1238	e_die(
1239	"Array %r can't be referred to as a scalar (without @ or *)"
1240	% var_name, loc.WordPart(part))
1241
1242	return val
1243
1244	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1245	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1246	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1247	value_t."""
1248
1249	# 1. Evaluate from (var_name, var_num, token Id) -> value
1250	if part.token.id == Id.VSub_Name:
1251	vtest_place.name = part.var_name
1252	val = self.mem.GetValue(part.var_name)
1253
1254	elif part.token.id == Id.VSub_Number:
1255	var_num = int(part.var_name)
1256	val = self._EvalVarNum(var_num)
1257
1258	else:
1259	# $* decays
1260	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1261
1262	# We don't need var_index because it's only for L-Values of test ops?
1263	if self.exec_opts.eval_unsafe_arith():
1264	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1265	vtest_place)
1266	else:
1267	with state.ctx_Option(self.mutable_opts,
1268	[option_i._allow_command_sub], False):
1269	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1270	vtest_place)
1271
1272	return val
1273
1274	def _EvalBracedVarSub(self, part, part_vals, quoted):
1275	# type: (BracedVarSub, List[part_value_t], bool) -> None
1276	"""
1277	Args:
1278	part_vals: output param to append to.
1279	"""
1280	# We have different operators that interact in a non-obvious order.
1281	#
1282	# 1. bracket_op: value -> value, with side effect on vsub_state
1283	#
1284	# 2. prefix_op
1285	# a. length ${#x}: value -> value
1286	# b. var ref ${!ref}: can expand to an array
1287	#
1288	# 3. suffix_op:
1289	# a. no operator: you have a value
1290	# b. Test: value -> part_value[]
1291	# c. Other Suffix: value -> value
1292	#
1293	# 4. Process vsub_state.join_array here before returning.
1294	#
1295	# These cases are hard to distinguish:
1296	# - ${!prefix@} prefix query
1297	# - ${!array[@]} keys
1298	# - ${!ref} named reference
1299	# - ${!ref[0]} named reference
1300	#
1301	# I think we need several stages:
1302	#
1303	# 1. value: name, number, special, prefix query
1304	# 2. bracket_op
1305	# 3. prefix length -- this is TERMINAL
1306	# 4. indirection? Only for some of the ! cases
1307	# 5. string transformation suffix ops like ##
1308	# 6. test op
1309	# 7. vsub_state.join_array
1310
1311	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1312	# suffix ops are applied. If we take the length with a prefix op, the
1313	# distinction is ignored.
1314
1315	var_name = None # type: Optional[str] # used throughout the function
1316	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1317	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1318
1319	# 1. Evaluate from (var_name, var_num, token Id) -> value
1320	if part.token.id == Id.VSub_Name:
1321	# Handle ${!prefix@} first, since that looks at names and not values
1322	# Do NOT handle ${!A[@]@a} here!
1323	if (part.prefix_op is not None and part.bracket_op is None and
1324	part.suffix_op is not None and
1325	part.suffix_op.tag() == suffix_op_e.Nullary):
1326	nullary_op = cast(Token, part.suffix_op)
1327	# ${!x@} but not ${!x@P}
1328	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1329	names = self.mem.VarNamesStartingWith(part.var_name)
1330	names.sort()
1331
1332	if quoted and nullary_op.id == Id.VOp3_At:
1333	part_vals.append(part_value.Array(names))
1334	else:
1335	sep = self.splitter.GetJoinChar()
1336	part_vals.append(Piece(sep.join(names), quoted, True))
1337	return # EARLY RETURN
1338
1339	var_name = part.var_name
1340	vtest_place.name = var_name # for _ApplyTestOp
1341
1342	val = self.mem.GetValue(var_name)
1343
1344	elif part.token.id == Id.VSub_Number:
1345	var_num = int(part.var_name)
1346	val = self._EvalVarNum(var_num)
1347	else:
1348	# $* decays
1349	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1350
1351	suffix_op_ = part.suffix_op
1352	if suffix_op_:
1353	UP_op = suffix_op_
1354	with tagswitch(suffix_op_) as case:
1355	if case(suffix_op_e.Nullary):
1356	suffix_op_ = cast(Token, UP_op)
1357
1358	# Type query ${array@a} is a STRING, not an array
1359	# NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1360	# ${array[@]@Q}
1361	if suffix_op_.id == Id.VOp0_a:
1362	vsub_state.is_type_query = True
1363
1364	elif case(suffix_op_e.Unary):
1365	suffix_op_ = cast(suffix_op.Unary, UP_op)
1366
1367	# Do the _EmptyStrOrError/_EmptyBashArrayOrError up front, EXCEPT in
1368	# the case of Kind.VTest
1369	if consts.GetKind(suffix_op_.op.id) == Kind.VTest:
1370	vsub_state.has_test_op = True
1371
1372	# 2. Bracket Op
1373	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1374
1375	if part.prefix_op:
1376	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1377	if not vsub_state.has_test_op: # undef -> '' BEFORE length
1378	val = self._EmptyStrOrError(val, part.token)
1379
1380	n = self._Length(val, part.token)
1381	part_vals.append(Piece(str(n), quoted, False))
1382	return # EARLY EXIT: nothing else can come after length
1383
1384	elif part.prefix_op.id == Id.VSub_Bang:
1385	if (part.bracket_op and
1386	part.bracket_op.tag() == bracket_op_e.WholeArray):
1387	if vsub_state.has_test_op:
1388	# ${!a[@]-'default'} is a non-fatal runtime error in bash. Here
1389	# it's fatal.
1390	op_tok = cast(suffix_op.Unary, UP_op).op
1391	e_die('Test operation not allowed with ${!array[@]}',
1392	op_tok)
1393
1394	# ${!array[@]} to get indices/keys
1395	val = self._Keys(val, part.token)
1396	# already set vsub_State.join_array ABOVE
1397	else:
1398	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1399	# ${!a[@]} !
1400	# ${!ref} can expand into an array if ref='array[@]'
1401
1402	# Clear it now that we have a var ref
1403	vtest_place.name = None
1404	vtest_place.index = None
1405
1406	val = self._EvalVarRef(val, part.token, quoted, vsub_state,
1407	vtest_place)
1408
1409	if not vsub_state.has_test_op: # undef -> '' AFTER indirection
1410	val = self._EmptyStrOrError(val, part.token)
1411
1412	else:
1413	raise AssertionError(part.prefix_op)
1414
1415	else:
1416	if not vsub_state.has_test_op: # undef -> '' if no prefix op
1417	val = self._EmptyStrOrError(val, part.token)
1418
1419	quoted2 = False # another bit for @Q
1420	if suffix_op_:
1421	op = suffix_op_ # could get rid of this alias
1422
1423	with tagswitch(suffix_op_) as case:
1424	if case(suffix_op_e.Nullary):
1425	op = cast(Token, UP_op)
1426	val, quoted2 = self._Nullary(val, op, var_name)
1427
1428	elif case(suffix_op_e.Unary):
1429	op = cast(suffix_op.Unary, UP_op)
1430	if consts.GetKind(op.op.id) == Kind.VTest:
1431	if self._ApplyTestOp(val, op, quoted, part_vals,
1432	vtest_place, part.token):
1433	# e.g. to evaluate ${undef:-'default'}, we already appended
1434	# what we need
1435	return
1436
1437	else:
1438	# Other suffix: value -> value
1439	val = self._ApplyUnarySuffixOp(val, op)
1440
1441	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1442	op = cast(suffix_op.PatSub, UP_op)
1443	val = self._PatSub(val, op)
1444
1445	elif case(suffix_op_e.Slice):
1446	op = cast(suffix_op.Slice, UP_op)
1447	val = self._Slice(val, op, var_name, part)
1448
1449	elif case(suffix_op_e.Static):
1450	op = cast(suffix_op.Static, UP_op)
1451	e_die('Not implemented', op.tok)
1452
1453	else:
1454	raise AssertionError()
1455
1456	# After applying suffixes, process join_array here.
1457	UP_val = val
1458	if val.tag() == value_e.BashArray:
1459	array_val = cast(value.BashArray, UP_val)
1460	if vsub_state.join_array:
1461	val = self._DecayArray(array_val)
1462	else:
1463	val = array_val
1464
1465	# For example, ${a} evaluates to value.Str(), but we want a
1466	# Piece().
1467	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1468	part_vals.append(part_val)
1469
1470	def _ConcatPartVals(self, part_vals, location):
1471	# type: (List[part_value_t], loc_t) -> str
1472
1473	strs = [] # type: List[str]
1474	for part_val in part_vals:
1475	UP_part_val = part_val
1476	with tagswitch(part_val) as case:
1477	if case(part_value_e.String):
1478	part_val = cast(Piece, UP_part_val)
1479	s = part_val.s
1480
1481	elif case(part_value_e.Array):
1482	part_val = cast(part_value.Array, UP_part_val)
1483	if self.exec_opts.strict_array():
1484	# Examples: echo f > "$@"; local foo="$@"
1485	e_die("Illegal array word part (strict_array)",
1486	location)
1487	else:
1488	# It appears to not respect IFS
1489	# TODO: eliminate double join()?
1490	tmp = [s for s in part_val.strs if s is not None]
1491	s = ' '.join(tmp)
1492
1493	else:
1494	raise AssertionError()
1495
1496	strs.append(s)
1497
1498	return ''.join(strs)
1499
1500	def EvalBracedVarSubToString(self, part):
1501	# type: (BracedVarSub) -> str
1502	"""For double quoted strings in YSH expressions.
1503
1504	Example: var x = "$foo-${foo}"
1505	"""
1506	part_vals = [] # type: List[part_value_t]
1507	self._EvalBracedVarSub(part, part_vals, False)
1508	# blame ${ location
1509	return self._ConcatPartVals(part_vals, part.left)
1510
1511	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1512	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1513
1514	token = part.tok
1515
1516	vsub_state = VarSubState.CreateNull()
1517
1518	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1519	if token.id == Id.VSub_DollarName:
1520	var_name = lexer.LazyStr(token)
1521	# TODO: Special case for LINENO
1522	val = self.mem.GetValue(var_name)
1523	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1524	if ShouldArrayDecay(var_name, self.exec_opts):
1525	# for $BASH_SOURCE, etc.
1526	val = DecayArray(val)
1527	else:
1528	e_die(
1529	"Array %r can't be referred to as a scalar (without @ or *)"
1530	% var_name, token)
1531
1532	elif token.id == Id.VSub_Number:
1533	var_num = int(lexer.LazyStr(token))
1534	val = self._EvalVarNum(var_num)
1535
1536	else:
1537	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1538
1539	#log('SIMPLE %s', part)
1540	val = self._EmptyStrOrError(val, token)
1541	UP_val = val
1542	if val.tag() == value_e.BashArray:
1543	array_val = cast(value.BashArray, UP_val)
1544	if vsub_state.join_array:
1545	val = self._DecayArray(array_val)
1546	else:
1547	val = array_val
1548
1549	v = _ValueToPartValue(val, quoted, part)
1550	part_vals.append(v)
1551
1552	def EvalSimpleVarSubToString(self, node):
1553	# type: (SimpleVarSub) -> str
1554	"""For double quoted strings in YSH expressions.
1555
1556	Example: var x = "$foo-${foo}"
1557	"""
1558	part_vals = [] # type: List[part_value_t]
1559	self._EvalSimpleVarSub(node, part_vals, False)
1560	return self._ConcatPartVals(part_vals, node.tok)
1561
1562	def _EvalExtGlob(self, part, part_vals):
1563	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1564	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1565	op = part.op
1566	if op.id == Id.ExtGlob_Comma:
1567	op_str = '@('
1568	else:
1569	op_str = lexer.LazyStr(op)
1570	# Do NOT split these.
1571	part_vals.append(Piece(op_str, False, False))
1572
1573	for i, w in enumerate(part.arms):
1574	if i != 0:
1575	part_vals.append(Piece('\|', False, False)) # separator
1576	# FLATTEN the tree of extglob "arms".
1577	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1578	part_vals.append(Piece(')', False, False)) # closing )
1579
1580	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1581	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1582	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1583
1584	We need both glob and fnmatch patterns. _EvalExtGlob does the
1585	flattening.
1586	"""
1587	for i, part_val in enumerate(part_vals):
1588	UP_part_val = part_val
1589	with tagswitch(part_val) as case:
1590	if case(part_value_e.String):
1591	part_val = cast(Piece, UP_part_val)
1592	if part_val.quoted and not self.exec_opts.noglob():
1593	s = glob_.GlobEscape(part_val.s)
1594	else:
1595	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1596	s = part_val.s
1597	glob_parts.append(s)
1598	fnmatch_parts.append(s) # from _EvalExtGlob()
1599
1600	elif case(part_value_e.Array):
1601	# Disallow array
1602	e_die(
1603	"Extended globs and arrays can't appear in the same word",
1604	w)
1605
1606	elif case(part_value_e.ExtGlob):
1607	part_val = cast(part_value.ExtGlob, UP_part_val)
1608	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1609	self._TranslateExtGlob(part_val.part_vals, w, [],
1610	fnmatch_parts)
1611	glob_parts.append('*')
1612
1613	else:
1614	raise AssertionError()
1615
1616	def _EvalWordPart(self, part, part_vals, flags):
1617	# type: (word_part_t, List[part_value_t], int) -> None
1618	"""Evaluate a word part, appending to part_vals
1619
1620	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1621	"""
1622	quoted = bool(flags & QUOTED)
1623	is_subst = bool(flags & IS_SUBST)
1624
1625	UP_part = part
1626	with tagswitch(part) as case:
1627	if case(word_part_e.ShArrayLiteral):
1628	part = cast(ShArrayLiteral, UP_part)
1629	e_die("Unexpected array literal", loc.WordPart(part))
1630	elif case(word_part_e.BashAssocLiteral):
1631	part = cast(word_part.BashAssocLiteral, UP_part)
1632	e_die("Unexpected associative array literal",
1633	loc.WordPart(part))
1634
1635	elif case(word_part_e.Literal):
1636	part = cast(Token, UP_part)
1637	# Split if it's in a substitution.
1638	# That is: echo is not split, but ${foo:-echo} is split
1639	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1640	part_vals.append(v)
1641
1642	elif case(word_part_e.EscapedLiteral):
1643	part = cast(word_part.EscapedLiteral, UP_part)
1644	v = Piece(part.ch, True, False)
1645	part_vals.append(v)
1646
1647	elif case(word_part_e.SingleQuoted):
1648	part = cast(SingleQuoted, UP_part)
1649	v = Piece(part.sval, True, False)
1650	part_vals.append(v)
1651
1652	elif case(word_part_e.DoubleQuoted):
1653	part = cast(DoubleQuoted, UP_part)
1654	self._EvalDoubleQuoted(part.parts, part_vals)
1655
1656	elif case(word_part_e.CommandSub):
1657	part = cast(CommandSub, UP_part)
1658	id_ = part.left_token.id
1659	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1660	Id.Left_Backtick):
1661	sv = self._EvalCommandSub(part,
1662	quoted) # type: part_value_t
1663
1664	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1665	sv = self._EvalProcessSub(part)
1666
1667	else:
1668	raise AssertionError(id_)
1669
1670	part_vals.append(sv)
1671
1672	elif case(word_part_e.SimpleVarSub):
1673	part = cast(SimpleVarSub, UP_part)
1674	self._EvalSimpleVarSub(part, part_vals, quoted)
1675
1676	elif case(word_part_e.BracedVarSub):
1677	part = cast(BracedVarSub, UP_part)
1678	self._EvalBracedVarSub(part, part_vals, quoted)
1679
1680	elif case(word_part_e.TildeSub):
1681	part = cast(word_part.TildeSub, UP_part)
1682	# We never parse a quoted string into a TildeSub.
1683	assert not quoted
1684	s = self.tilde_ev.Eval(part)
1685	v = Piece(s, True, False) # NOT split even when unquoted!
1686	part_vals.append(v)
1687
1688	elif case(word_part_e.ArithSub):
1689	part = cast(word_part.ArithSub, UP_part)
1690	num = self.arith_ev.EvalToBigInt(part.anode)
1691	v = Piece(mops.ToStr(num), quoted, not quoted)
1692	part_vals.append(v)
1693
1694	elif case(word_part_e.ExtGlob):
1695	part = cast(word_part.ExtGlob, UP_part)
1696	#if not self.exec_opts.extglob():
1697	# die() # disallow at runtime? Don't just decay
1698
1699	# Create a node to hold the flattened tree. The caller decides whether
1700	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1701	part_vals2 = [] # type: List[part_value_t]
1702	self._EvalExtGlob(part, part_vals2) # flattens tree
1703	part_vals.append(part_value.ExtGlob(part_vals2))
1704
1705	elif case(word_part_e.BashRegexGroup):
1706	part = cast(word_part.BashRegexGroup, UP_part)
1707
1708	part_vals.append(Piece('(', False, False)) # not quoted
1709	if part.child:
1710	self._EvalWordToParts(part.child, part_vals, 0)
1711	part_vals.append(Piece(')', False, False))
1712
1713	elif case(word_part_e.Splice):
1714	part = cast(word_part.Splice, UP_part)
1715	val = self.mem.GetValue(part.var_name)
1716
1717	strs = self.expr_ev.SpliceValue(val, part)
1718	part_vals.append(part_value.Array(strs))
1719
1720	elif case(word_part_e.ExprSub):
1721	part = cast(word_part.ExprSub, UP_part)
1722	part_val = self.expr_ev.EvalExprSub(part)
1723	part_vals.append(part_val)
1724
1725	elif case(word_part_e.ZshVarSub):
1726	part = cast(word_part.ZshVarSub, UP_part)
1727	e_die("ZSH var subs are parsed, but can't be evaluated",
1728	part.left)
1729
1730	else:
1731	raise AssertionError(part.tag())
1732
1733	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1734	# type: (rhs_word_t, List[part_value_t], int) -> None
1735	quoted = bool(eval_flags & QUOTED)
1736
1737	UP_w = w
1738	with tagswitch(w) as case:
1739	if case(rhs_word_e.Empty):
1740	part_vals.append(Piece('', quoted, not quoted))
1741
1742	elif case(rhs_word_e.Compound):
1743	w = cast(CompoundWord, UP_w)
1744	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1745
1746	else:
1747	raise AssertionError()
1748
1749	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1750	# type: (CompoundWord, List[part_value_t], int) -> None
1751	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1752
1753	Returns:
1754	Appends to part_vals. Note that this is a TREE.
1755	"""
1756	# Does the word have an extended glob? This is a special case because
1757	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1758	# implement extended globs. It's hard to carry that extra information
1759	# all the way past the word splitting stage.
1760
1761	# OSH semantic limitations: If a word has an extended glob part, then
1762	# 1. It can't have an array
1763	# 2. Word splitting of unquoted words isn't respected
1764
1765	word_part_vals = [] # type: List[part_value_t]
1766	has_extglob = False
1767	for p in w.parts:
1768	if p.tag() == word_part_e.ExtGlob:
1769	has_extglob = True
1770	self._EvalWordPart(p, word_part_vals, eval_flags)
1771
1772	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1773	if has_extglob:
1774	if bool(eval_flags & EXTGLOB_FILES):
1775	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1776	# word because of the way we use libc:
1777	# 1. With '*' for extglob parts
1778	# 2. With _EvalExtGlob() for extglob parts
1779
1780	glob_parts = [] # type: List[str]
1781	fnmatch_parts = [] # type: List[str]
1782	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1783	fnmatch_parts)
1784
1785	#log('word_part_vals %s', word_part_vals)
1786	glob_pat = ''.join(glob_parts)
1787	fnmatch_pat = ''.join(fnmatch_parts)
1788	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1789
1790	results = [] # type: List[str]
1791	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1792	if n < 0:
1793	raise error.FailGlob(
1794	'Extended glob %r matched no files' % fnmatch_pat, w)
1795
1796	part_vals.append(part_value.Array(results))
1797	elif bool(eval_flags & EXTGLOB_NESTED):
1798	# We only glob at the TOP level of @(nested\|@(pattern))
1799	part_vals.extend(word_part_vals)
1800	else:
1801	# e.g. simple_word_eval, assignment builtin
1802	e_die('Extended glob not allowed in this word', w)
1803	else:
1804	part_vals.extend(word_part_vals)
1805
1806	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1807	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1808	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1809
1810	Note: arg 'w' could just be a span ID
1811	"""
1812	for part_val in part_vals:
1813	UP_part_val = part_val
1814	with tagswitch(part_val) as case:
1815	if case(part_value_e.String):
1816	part_val = cast(Piece, UP_part_val)
1817	s = part_val.s
1818	if part_val.quoted:
1819	if eval_flags & QUOTE_FNMATCH:
1820	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1821	s = glob_.GlobEscape(s)
1822	elif eval_flags & QUOTE_ERE:
1823	s = glob_.ExtendedRegexEscape(s)
1824	strs.append(s)
1825
1826	elif case(part_value_e.Array):
1827	part_val = cast(part_value.Array, UP_part_val)
1828	if self.exec_opts.strict_array():
1829	# Examples: echo f > "$@"; local foo="$@"
1830
1831	# TODO: This attributes too coarsely, to the word rather than the
1832	# parts. Problem: the word is a TREE of parts, but we only have a
1833	# flat list of part_vals. The only case where we really get arrays
1834	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1835	e_die(
1836	"This word should yield a string, but it contains an array",
1837	w)
1838
1839	# TODO: Maybe add detail like this.
1840	#e_die('RHS of assignment should only have strings. '
1841	# 'To assign arrays, use b=( "${a[@]}" )')
1842	else:
1843	# It appears to not respect IFS
1844	tmp = [s for s in part_val.strs if s is not None]
1845	s = ' '.join(tmp) # TODO: eliminate double join()?
1846	strs.append(s)
1847
1848	elif case(part_value_e.ExtGlob):
1849	part_val = cast(part_value.ExtGlob, UP_part_val)
1850
1851	# Extended globs are only allowed where we expect them!
1852	if not bool(eval_flags & QUOTE_FNMATCH):
1853	e_die('extended glob not allowed in this word', w)
1854
1855	# recursive call
1856	self._PartValsToString(part_val.part_vals, w, eval_flags,
1857	strs)
1858
1859	else:
1860	raise AssertionError()
1861
1862	def EvalWordToString(self, UP_w, eval_flags=0):
1863	# type: (word_t, int) -> value.Str
1864	"""Given a word, return a string.
1865
1866	Flags can contain a quoting algorithm.
1867	"""
1868	assert UP_w.tag() == word_e.Compound, UP_w
1869	w = cast(CompoundWord, UP_w)
1870
1871	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1872	fast_str = word_.FastStrEval(w)
1873	if fast_str is not None:
1874	return value.Str(fast_str)
1875
1876	# Could we additionally optimize a=$b, if we know $b isn't an array
1877	# etc.?
1878
1879	# Note: these empty lists are hot in fib benchmark
1880
1881	part_vals = [] # type: List[part_value_t]
1882	for p in w.parts:
1883	# this doesn't use eval_flags, which is slightly confusing
1884	self._EvalWordPart(p, part_vals, 0)
1885
1886	strs = [] # type: List[str]
1887	self._PartValsToString(part_vals, w, eval_flags, strs)
1888	return value.Str(''.join(strs))
1889
1890	def EvalWordToPattern(self, UP_w):
1891	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1892	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1893	if UP_w.tag() == rhs_word_e.Empty:
1894	return value.Str(''), False
1895
1896	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1897	w = cast(CompoundWord, UP_w)
1898
1899	has_extglob = False
1900	part_vals = [] # type: List[part_value_t]
1901	for p in w.parts:
1902	# this doesn't use eval_flags, which is slightly confusing
1903	self._EvalWordPart(p, part_vals, 0)
1904	if p.tag() == word_part_e.ExtGlob:
1905	has_extglob = True
1906
1907	strs = [] # type: List[str]
1908	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1909	return value.Str(''.join(strs)), has_extglob
1910
1911	def EvalForPlugin(self, w):
1912	# type: (CompoundWord) -> value.Str
1913	"""Wrapper around EvalWordToString that prevents errors.
1914
1915	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
1916	are handled here.
1917
1918	Similar to ExprEvaluator.PluginCall().
1919	"""
1920	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
1921	try:
1922	val = self.EvalWordToString(w)
1923	except error.FatalRuntime as e:
1924	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
1925
1926	except (IOError, OSError) as e:
1927	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
1928
1929	except KeyboardInterrupt:
1930	val = value.Str('<Ctrl-C>')
1931
1932	return val
1933
1934	def EvalRhsWord(self, UP_w):
1935	# type: (rhs_word_t) -> value_t
1936	"""Used for RHS of assignment.
1937
1938	There is no splitting.
1939	"""
1940	if UP_w.tag() == rhs_word_e.Empty:
1941	return value.Str('')
1942
1943	assert UP_w.tag() == word_e.Compound, UP_w
1944	w = cast(CompoundWord, UP_w)
1945
1946	if len(w.parts) == 1:
1947	part0 = w.parts[0]
1948	UP_part0 = part0
1949	tag = part0.tag()
1950	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
1951	# don't look like assignments.
1952	if tag == word_part_e.ShArrayLiteral:
1953	part0 = cast(ShArrayLiteral, UP_part0)
1954	array_words = part0.words
1955	words = braces.BraceExpandWords(array_words)
1956	strs = self.EvalWordSequence(words)
1957	return value.BashArray(strs)
1958
1959	if tag == word_part_e.BashAssocLiteral:
1960	part0 = cast(word_part.BashAssocLiteral, UP_part0)
1961	d = NewDict() # type: Dict[str, str]
1962	for pair in part0.pairs:
1963	k = self.EvalWordToString(pair.key)
1964	v = self.EvalWordToString(pair.value)
1965	d[k.s] = v.s
1966	return value.BashAssoc(d)
1967
1968	# If RHS doesn't look like a=( ... ), then it must be a string.
1969	return self.EvalWordToString(w)
1970
1971	def _EvalWordFrame(self, frame, argv):
1972	# type: (List[Piece], List[str]) -> None
1973	all_empty = True
1974	all_quoted = True
1975	any_quoted = False
1976
1977	#log('--- frame %s', frame)
1978
1979	for piece in frame:
1980	if len(piece.s):
1981	all_empty = False
1982
1983	if piece.quoted:
1984	any_quoted = True
1985	else:
1986	all_quoted = False
1987
1988	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
1989	if all_empty and not any_quoted:
1990	return
1991
1992	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
1993	# don't do word splitting or globbing.
1994	if all_quoted:
1995	tmp = [piece.s for piece in frame]
1996	a = ''.join(tmp)
1997	argv.append(a)
1998	return
1999
2000	will_glob = not self.exec_opts.noglob()
2001
2002	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2003	frags = [] # type: List[str]
2004	for piece in frame:
2005	if will_glob and piece.quoted:
2006	frag = glob_.GlobEscape(piece.s)
2007	else:
2008	# If we have a literal \, then we turn it into \\\\.
2009	# Splitting takes \\\\ -> \\
2010	# Globbing takes \\ to \ if it doesn't match
2011	frag = _BackslashEscape(piece.s)
2012
2013	if piece.do_split:
2014	frag = _BackslashEscape(frag)
2015	else:
2016	frag = self.splitter.Escape(frag)
2017
2018	frags.append(frag)
2019
2020	flat = ''.join(frags)
2021	#log('flat: %r', flat)
2022
2023	args = self.splitter.SplitForWordEval(flat)
2024
2025	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2026	# Add it back and don't bother globbing.
2027	if len(args) == 0 and any_quoted:
2028	argv.append('')
2029	return
2030
2031	#log('split args: %r', args)
2032	for a in args:
2033	if glob_.LooksLikeGlob(a):
2034	n = self.globber.Expand(a, argv)
2035	if n < 0:
2036	# TODO: location info, with span IDs carried through the frame
2037	raise error.FailGlob('Pattern %r matched no files' % a,
2038	loc.Missing)
2039	else:
2040	argv.append(glob_.GlobUnescape(a))
2041
2042	def _EvalWordToArgv(self, w):
2043	# type: (CompoundWord) -> List[str]
2044	"""Helper for _EvalAssignBuiltin.
2045
2046	Splitting and globbing are disabled for assignment builtins.
2047
2048	Example: declare -"${a[@]}" b=(1 2)
2049	where a is [x b=a d=a]
2050	"""
2051	part_vals = [] # type: List[part_value_t]
2052	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2053	frames = _MakeWordFrames(part_vals)
2054	argv = [] # type: List[str]
2055	for frame in frames:
2056	if len(frame): # empty array gives empty frame!
2057	tmp = [piece.s for piece in frame]
2058	argv.append(''.join(tmp)) # no split or glob
2059	#log('argv: %s', argv)
2060	return argv
2061
2062	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2063	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2064	"""Handles both static and dynamic assignment, e.g.
2065
2066	x='foo=bar'
2067	local a=(1 2) $x
2068
2069	Grammar:
2070
2071	('builtin' \| 'command')* keyword flag* pair*
2072	flag = [-+].*
2073
2074	There is also command -p, but we haven't implemented it. Maybe just
2075	punt on it.
2076	"""
2077	eval_to_pairs = True # except for -f and -F
2078	started_pairs = False
2079
2080	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2081	flag_locs = [words[0]]
2082	assign_args = [] # type: List[AssignArg]
2083
2084	n = len(words)
2085	for i in xrange(meta_offset + 1, n): # skip first word
2086	w = words[i]
2087
2088	if word_.IsVarLike(w):
2089	started_pairs = True # Everything from now on is an assign_pair
2090
2091	if started_pairs:
2092	left_token, close_token, part_offset = word_.DetectShAssignment(
2093	w)
2094	if left_token: # Detected statically
2095	if left_token.id != Id.Lit_VarLike:
2096	# (not guaranteed since started_pairs is set twice)
2097	e_die('LHS array not allowed in assignment builtin', w)
2098
2099	if lexer.IsPlusEquals(left_token):
2100	var_name = lexer.TokenSliceRight(left_token, -2)
2101	append = True
2102	else:
2103	var_name = lexer.TokenSliceRight(left_token, -1)
2104	append = False
2105
2106	if part_offset == len(w.parts):
2107	rhs = rhs_word.Empty # type: rhs_word_t
2108	else:
2109	# tmp is for intersection of C++/MyPy type systems
2110	tmp = CompoundWord(w.parts[part_offset:])
2111	word_.TildeDetectAssign(tmp)
2112	rhs = tmp
2113
2114	with state.ctx_AssignBuiltin(self.mutable_opts):
2115	right = self.EvalRhsWord(rhs)
2116
2117	arg2 = AssignArg(var_name, right, append, w)
2118	assign_args.append(arg2)
2119
2120	else: # e.g. export $dynamic
2121	argv = self._EvalWordToArgv(w)
2122	for arg in argv:
2123	arg2 = _SplitAssignArg(arg, w)
2124	assign_args.append(arg2)
2125
2126	else:
2127	argv = self._EvalWordToArgv(w)
2128	for arg in argv:
2129	if arg.startswith('-') or arg.startswith('+'):
2130	# e.g. declare -r +r
2131	flags.append(arg)
2132	flag_locs.append(w)
2133
2134	# Shortcut that relies on -f and -F always meaning "function" for
2135	# all assignment builtins
2136	if 'f' in arg or 'F' in arg:
2137	eval_to_pairs = False
2138
2139	else: # e.g. export $dynamic
2140	if eval_to_pairs:
2141	arg2 = _SplitAssignArg(arg, w)
2142	assign_args.append(arg2)
2143	started_pairs = True
2144	else:
2145	flags.append(arg)
2146
2147	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2148
2149	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2150	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2151	builtin_id = consts.LookupAssignBuiltin(arg0)
2152	if builtin_id != consts.NO_INDEX:
2153	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2154	meta_offset)
2155	return None
2156
2157	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2158	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2159	UP_val0 = val0
2160	if val0.tag() == part_value_e.String:
2161	val0 = cast(Piece, UP_val0)
2162	if not val0.quoted:
2163	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2164	return None
2165
2166	def SimpleEvalWordSequence2(self, words, allow_assign):
2167	# type: (List[CompoundWord], bool) -> cmd_value_t
2168	"""Simple word evaluation for YSH."""
2169	strs = [] # type: List[str]
2170	locs = [] # type: List[CompoundWord]
2171
2172	meta_offset = 0
2173	for i, w in enumerate(words):
2174	# No globbing in the first arg for command.Simple.
2175	if i == meta_offset and allow_assign:
2176	strs0 = self._EvalWordToArgv(w)
2177	# TODO: Remove this because YSH will disallow assignment
2178	# builtins? (including export?)
2179	if len(strs0) == 1:
2180	cmd_val = self._DetectAssignBuiltinStr(
2181	strs0[0], words, meta_offset)
2182	if cmd_val:
2183	return cmd_val
2184
2185	strs.extend(strs0)
2186	for _ in strs0:
2187	locs.append(w)
2188	continue
2189
2190	if glob_.LooksLikeStaticGlob(w):
2191	val = self.EvalWordToString(w) # respects strict-array
2192	num_appended = self.globber.Expand(val.s, strs)
2193	if num_appended < 0:
2194	raise error.FailGlob('Pattern %r matched no files' % val.s,
2195	w)
2196	for _ in xrange(num_appended):
2197	locs.append(w)
2198	continue
2199
2200	part_vals = [] # type: List[part_value_t]
2201	self._EvalWordToParts(w, part_vals, 0) # not quoted
2202
2203	if 0:
2204	log('')
2205	log('Static: part_vals after _EvalWordToParts:')
2206	for entry in part_vals:
2207	log(' %s', entry)
2208
2209	# Still need to process
2210	frames = _MakeWordFrames(part_vals)
2211
2212	if 0:
2213	log('')
2214	log('Static: frames after _MakeWordFrames:')
2215	for entry in frames:
2216	log(' %s', entry)
2217
2218	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2219	# disallows such expressions at parse time.
2220	for frame in frames:
2221	if len(frame): # empty array gives empty frame!
2222	tmp = [piece.s for piece in frame]
2223	strs.append(''.join(tmp)) # no split or glob
2224	locs.append(w)
2225
2226	return cmd_value.Argv(strs, locs, None, None, None, None)
2227
2228	def EvalWordSequence2(self, words, allow_assign=False):
2229	# type: (List[CompoundWord], bool) -> cmd_value_t
2230	"""Turns a list of Words into a list of strings.
2231
2232	Unlike the EvalWord*() methods, it does globbing.
2233
2234	Args:
2235	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2236	"""
2237	if self.exec_opts.simple_word_eval():
2238	return self.SimpleEvalWordSequence2(words, allow_assign)
2239
2240	# Parse time:
2241	# 1. brace expansion. TODO: Do at parse time.
2242	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2243	# first WordPart.
2244	#
2245	# Run time:
2246	# 3. tilde sub, var sub, command sub, arith sub. These are all
2247	# "concurrent" on WordParts. (optional process sub with <() )
2248	# 4. word splitting. Can turn this off with a shell option? Definitely
2249	# off for oil.
2250	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2251
2252	#log('W %s', words)
2253	strs = [] # type: List[str]
2254	locs = [] # type: List[CompoundWord]
2255
2256	# 0 for declare x
2257	# 1 for builtin declare x
2258	# 2 for command builtin declare x
2259	# etc.
2260	meta_offset = 0
2261
2262	n = 0
2263	for i, w in enumerate(words):
2264	fast_str = word_.FastStrEval(w)
2265	if fast_str is not None:
2266	strs.append(fast_str)
2267	locs.append(w)
2268
2269	# e.g. the 'local' in 'local a=b c=d' will be here
2270	if allow_assign and i == meta_offset:
2271	cmd_val = self._DetectAssignBuiltinStr(
2272	fast_str, words, meta_offset)
2273	if cmd_val:
2274	return cmd_val
2275
2276	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2277	meta_offset += 1
2278
2279	continue
2280
2281	part_vals = [] # type: List[part_value_t]
2282	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2283
2284	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2285	# change the rest of the evaluation algorithm if so.
2286	#
2287	# We want to allow:
2288	# e=export
2289	# $e foo=bar
2290	#
2291	# But we don't want to evaluate the first word twice in the case of:
2292	# $(some-command) --flag
2293	if len(part_vals) == 1:
2294	if allow_assign and i == meta_offset:
2295	cmd_val = self._DetectAssignBuiltin(
2296	part_vals[0], words, meta_offset)
2297	if cmd_val:
2298	return cmd_val
2299
2300	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2301	meta_offset += 1
2302
2303	if 0:
2304	log('')
2305	log('part_vals after _EvalWordToParts:')
2306	for entry in part_vals:
2307	log(' %s', entry)
2308
2309	frames = _MakeWordFrames(part_vals)
2310	if 0:
2311	log('')
2312	log('frames after _MakeWordFrames:')
2313	for entry in frames:
2314	log(' %s', entry)
2315
2316	# Do splitting and globbing. Each frame will append zero or more args.
2317	for frame in frames:
2318	self._EvalWordFrame(frame, strs)
2319
2320	# Fill in locations parallel to strs.
2321	n_next = len(strs)
2322	for _ in xrange(n_next - n):
2323	locs.append(w)
2324	n = n_next
2325
2326	# A non-assignment command.
2327	# NOTE: Can't look up builtins here like we did for assignment, because
2328	# functions can override builtins.
2329	return cmd_value.Argv(strs, locs, None, None, None, None)
2330
2331	def EvalWordSequence(self, words):
2332	# type: (List[CompoundWord]) -> List[str]
2333	"""For arrays and for loops.
2334
2335	They don't allow assignment builtins.
2336	"""
2337	UP_cmd_val = self.EvalWordSequence2(words)
2338
2339	assert UP_cmd_val.tag() == cmd_value_e.Argv
2340	cmd_val = cast(cmd_value.Argv, UP_cmd_val)
2341	return cmd_val.argv
2342
2343
2344	class NormalWordEvaluator(AbstractWordEvaluator):
2345
2346	def __init__(
2347	self,
2348	mem, # type: state.Mem
2349	exec_opts, # type: optview.Exec
2350	mutable_opts, # type: state.MutableOpts
2351	tilde_ev, # type: TildeEvaluator
2352	splitter, # type: SplitContext
2353	errfmt, # type: ErrorFormatter
2354	):
2355	# type: (...) -> None
2356	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2357	tilde_ev, splitter, errfmt)
2358	self.shell_ex = None # type: _Executor
2359
2360	def CheckCircularDeps(self):
2361	# type: () -> None
2362	assert self.arith_ev is not None
2363	# Disabled for pure OSH
2364	#assert self.expr_ev is not None
2365	assert self.shell_ex is not None
2366	assert self.prompt_ev is not None
2367
2368	def _EvalCommandSub(self, cs_part, quoted):
2369	# type: (CommandSub, bool) -> part_value_t
2370	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2371
2372	if cs_part.left_token.id == Id.Left_AtParen:
2373	# YSH splitting algorithm: does not depend on IFS
2374	try:
2375	strs = j8.SplitJ8Lines(stdout_str)
2376	except error.Decode as e:
2377	# status code 4 is special, for encode/decode errors.
2378	raise error.Structured(4, e.Message(), cs_part.left_token)
2379
2380	#strs = self.splitter.SplitForWordEval(stdout_str)
2381	return part_value.Array(strs)
2382	else:
2383	return Piece(stdout_str, quoted, not quoted)
2384
2385	def _EvalProcessSub(self, cs_part):
2386	# type: (CommandSub) -> Piece
2387	dev_path = self.shell_ex.RunProcessSub(cs_part)
2388	# pretend it's quoted; no split or glob
2389	return Piece(dev_path, True, False)
2390
2391
2392	_DUMMY = '__NO_COMMAND_SUB__'
2393
2394
2395	class CompletionWordEvaluator(AbstractWordEvaluator):
2396	"""An evaluator that has no access to an executor.
2397
2398	NOTE: core/completion.py doesn't actually try to use these strings to
2399	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2400	inner command as the last one, and knows that it is not at the end of the
2401	line.
2402	"""
2403
2404	def __init__(
2405	self,
2406	mem, # type: state.Mem
2407	exec_opts, # type: optview.Exec
2408	mutable_opts, # type: state.MutableOpts
2409	tilde_ev, # type: TildeEvaluator
2410	splitter, # type: SplitContext
2411	errfmt, # type: ErrorFormatter
2412	):
2413	# type: (...) -> None
2414	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2415	tilde_ev, splitter, errfmt)
2416
2417	def CheckCircularDeps(self):
2418	# type: () -> None
2419	assert self.prompt_ev is not None
2420	assert self.arith_ev is not None
2421	assert self.expr_ev is not None
2422
2423	def _EvalCommandSub(self, cs_part, quoted):
2424	# type: (CommandSub, bool) -> part_value_t
2425	if cs_part.left_token.id == Id.Left_AtParen:
2426	return part_value.Array([_DUMMY])
2427	else:
2428	return Piece(_DUMMY, quoted, not quoted)
2429
2430	def _EvalProcessSub(self, cs_part):
2431	# type: (CommandSub) -> Piece
2432	# pretend it's quoted; no split or glob
2433	return Piece('__NO_PROCESS_SUB__', True, False)
2434
2435
2436	# vim: sw=4