osh/word_eval.py

OILS / osh / word_eval.py View on Github | oilshell.org

2438 lines, 1475 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	AssignArg,
37	a_index,
38	a_index_e,
39	VTestPlace,
40	VarSubState,
41	Piece,
42	)
43	from _devbuild.gen.option_asdl import option_i, builtin_i
44	from _devbuild.gen.value_asdl import (
45	value,
46	value_e,
47	value_t,
48	sh_lvalue,
49	sh_lvalue_t,
50	)
51	from core import error
52	from core import pyos
53	from core import pyutil
54	from core import state
55	from core import ui
56	from core import util
57	from data_lang import j8
58	from data_lang import j8_lite
59	from core.error import e_die
60	from frontend import consts
61	from frontend import lexer
62	from frontend import location
63	from mycpp import mops
64	from mycpp.mylib import log, tagswitch, NewDict
65	from osh import braces
66	from osh import glob_
67	from osh import string_ops
68	from osh import word_
69	from ysh import expr_eval
70	from ysh import val_ops
71
72	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
73
74	if TYPE_CHECKING:
75	from _devbuild.gen.syntax_asdl import word_part_t
76	from _devbuild.gen.option_asdl import builtin_t
77	from core import optview
78	from core.state import Mem
79	from core.ui import ErrorFormatter
80	from core.vm import _Executor
81	from osh.split import SplitContext
82	from osh import prompt
83	from osh import sh_expr_eval
84
85	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
86	QUOTED = 1 << 0
87	IS_SUBST = 1 << 1
88
89	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
90	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
91	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
92
93	# For EvalWordToString
94	QUOTE_FNMATCH = 1 << 5
95	QUOTE_ERE = 1 << 6
96
97	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
98	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
99	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
100
101
102	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
103	# type: (str, optview.Exec, bool) -> bool
104	"""Return whether we should allow ${a} to mean ${a[0]}."""
105	return (not exec_opts.strict_array() or
106	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
107
108
109	def DecayArray(val):
110	# type: (value_t) -> value_t
111	"""Resolve ${array} to ${array[0]}."""
112	if val.tag() == value_e.BashArray:
113	array_val = cast(value.BashArray, val)
114	s = array_val.strs[0] if len(array_val.strs) else None
115	elif val.tag() == value_e.BashAssoc:
116	assoc_val = cast(value.BashAssoc, val)
117	s = assoc_val.d['0'] if '0' in assoc_val.d else None
118	else:
119	raise AssertionError(val.tag())
120
121	if s is None:
122	return value.Undef
123	else:
124	return value.Str(s)
125
126
127	def GetArrayItem(strs, index):
128	# type: (List[str], int) -> Optional[str]
129
130	n = len(strs)
131	if index < 0:
132	index += n
133
134	if 0 <= index and index < n:
135	# TODO: strs->index() has a redundant check for (i < 0)
136	s = strs[index]
137	# note: s could be None because representation is sparse
138	else:
139	s = None
140	return s
141
142
143	def _DetectMetaBuiltinStr(s):
144	# type: (str) -> bool
145	"""
146	We need to detect all of these cases:
147
148	builtin local
149	command local
150	builtin builtin local
151	builtin command local
152
153	Fundamentally, assignment builtins have different WORD EVALUATION RULES
154	for a=$x (no word splitting), so it seems hard to do this in
155	meta_osh.Builtin() or meta_osh.Command()
156	"""
157	return (consts.LookupNormalBuiltin(s)
158	in (builtin_i.builtin, builtin_i.command))
159
160
161	def _DetectMetaBuiltin(val0):
162	# type: (part_value_t) -> bool
163	UP_val0 = val0
164	if val0.tag() == part_value_e.String:
165	val0 = cast(Piece, UP_val0)
166	if not val0.quoted:
167	return _DetectMetaBuiltinStr(val0.s)
168	return False
169
170
171	def _SplitAssignArg(arg, blame_word):
172	# type: (str, CompoundWord) -> AssignArg
173	"""Dynamically parse argument to declare, export, etc.
174
175	This is a fallback to the static parsing done below.
176	"""
177	# Note: it would be better to cache regcomp(), but we don't have an API for
178	# that, and it probably isn't a bottleneck now
179	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
180	if m is None:
181	e_die("Assignment builtin expected NAME=value, got %r" % arg,
182	blame_word)
183
184	var_name = m[1]
185	# m[2] is used for grouping; ERE doesn't have non-capturing groups
186
187	op = m[3]
188	assert op is not None, op
189	if len(op): # declare NAME=
190	val = value.Str(m[4]) # type: Optional[value_t]
191	append = op[0] == '+'
192	else: # declare NAME
193	val = None # no operator
194	append = False
195
196	return AssignArg(var_name, val, append, blame_word)
197
198
199	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
200	def _BackslashEscape(s):
201	# type: (str) -> str
202	"""Double up backslashes.
203
204	Useful for strings about to be globbed and strings about to be IFS
205	escaped.
206	"""
207	return s.replace('\\', '\\\\')
208
209
210	def _ValueToPartValue(val, quoted, part_loc):
211	# type: (value_t, bool, word_part_t) -> part_value_t
212	"""Helper for VarSub evaluation.
213
214	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
215	"""
216	UP_val = val
217
218	with tagswitch(val) as case:
219	if case(value_e.Undef):
220	# This happens in the case of ${undef+foo}. We skipped _EmptyStrOrError,
221	# but we have to append to the empty string.
222	return Piece('', quoted, not quoted)
223
224	elif case(value_e.Str):
225	val = cast(value.Str, UP_val)
226	return Piece(val.s, quoted, not quoted)
227
228	elif case(value_e.BashArray):
229	val = cast(value.BashArray, UP_val)
230	return part_value.Array(val.strs)
231
232	elif case(value_e.BashAssoc):
233	val = cast(value.BashAssoc, UP_val)
234	# bash behavior: splice values!
235	return part_value.Array(val.d.values())
236
237	# Cases added for YSH
238	# value_e.List is also here - we use val_ops.stringify()s err message
239	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
240	value_e.Eggex, value_e.List):
241	s = val_ops.Stringify(val, loc.Missing)
242	return Piece(s, quoted, not quoted)
243
244	else:
245	raise error.TypeErr(val, "Can't substitute into word",
246	loc.WordPart(part_loc))
247
248	raise AssertionError('for -Wreturn-type in C++')
249
250
251	def _MakeWordFrames(part_vals):
252	# type: (List[part_value_t]) -> List[List[Piece]]
253	"""A word evaluates to a flat list of part_value (String or Array). frame
254	is a portion that results in zero or more args. It can never be joined.
255	This idea exists because of arrays like "$@" and "${a[@]}".
256
257	Example:
258
259	a=(1 '2 3' 4)
260	x=x
261	y=y
262
263	# This word
264	$x"${a[@]}"$y
265
266	# Results in Three frames:
267	[ ('x', False, True), ('1', True, False) ]
268	[ ('2 3', True, False) ]
269	[ ('4', True, False), ('y', False, True) ]
270
271	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
272	should make that top level type.
273
274	TODO:
275	- Instead of List[List[Piece]], where List[Piece] is a Frame
276	- Change this representation to
277	Frames = (List[Piece] pieces, List[int] break_indices)
278	# where break_indices are the end
279
280	Consider a common case like "$x" or "${x}" - I think this a lot more
281	efficient?
282
283	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
284	"""
285	current = [] # type: List[Piece]
286	frames = [current]
287
288	for p in part_vals:
289	UP_p = p
290
291	with tagswitch(p) as case:
292	if case(part_value_e.String):
293	p = cast(Piece, UP_p)
294	current.append(p)
295
296	elif case(part_value_e.Array):
297	p = cast(part_value.Array, UP_p)
298
299	is_first = True
300	for s in p.strs:
301	if s is None:
302	continue # ignore undefined array entries
303
304	# Arrays parts are always quoted; otherwise they would have decayed to
305	# a string.
306	piece = Piece(s, True, False)
307	if is_first:
308	current.append(piece)
309	is_first = False
310	else:
311	current = [piece]
312	frames.append(current) # singleton frame
313
314	else:
315	raise AssertionError()
316
317	return frames
318
319
320	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
321	def _DecayPartValuesToString(part_vals, join_char):
322	# type: (List[part_value_t], str) -> str
323	# Decay ${a=x"$@"x} to string.
324	out = [] # type: List[str]
325	for p in part_vals:
326	UP_p = p
327	with tagswitch(p) as case:
328	if case(part_value_e.String):
329	p = cast(Piece, UP_p)
330	out.append(p.s)
331	elif case(part_value_e.Array):
332	p = cast(part_value.Array, UP_p)
333	# TODO: Eliminate double join for speed?
334	tmp = [s for s in p.strs if s is not None]
335	out.append(join_char.join(tmp))
336	else:
337	raise AssertionError()
338	return ''.join(out)
339
340
341	def _PerformSlice(
342	val, # type: value_t
343	begin, # type: int
344	length, # type: int
345	has_length, # type: bool
346	part, # type: BracedVarSub
347	arg0_val, # type: value.Str
348	):
349	# type: (...) -> value_t
350	UP_val = val
351	with tagswitch(val) as case:
352	if case(value_e.Str): # Slice UTF-8 characters in a string.
353	val = cast(value.Str, UP_val)
354	s = val.s
355	n = len(s)
356
357	if begin < 0: # Compute offset with unicode
358	byte_begin = n
359	num_iters = -begin
360	for _ in xrange(num_iters):
361	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
362	else:
363	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
364
365	if has_length:
366	if length < 0: # Compute offset with unicode
367	# Confusing: this is a POSITION
368	byte_end = n
369	num_iters = -length
370	for _ in xrange(num_iters):
371	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
372	else:
373	byte_end = string_ops.AdvanceUtf8Chars(
374	s, length, byte_begin)
375	else:
376	byte_end = len(s)
377
378	substr = s[byte_begin:byte_end]
379	result = value.Str(substr) # type: value_t
380
381	elif case(value_e.BashArray): # Slice array entries.
382	val = cast(value.BashArray, UP_val)
383	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
384	# strings.
385	if has_length and length < 0:
386	e_die("Array slice can't have negative length: %d" % length,
387	loc.WordPart(part))
388
389	# Quirk: "begin" for positional arguments ($@ and $*) counts $0.
390	if arg0_val is not None:
391	orig = [arg0_val.s]
392	orig.extend(val.strs)
393	else:
394	orig = val.strs
395
396	n = len(orig)
397	if begin < 0:
398	i = n + begin # ${@:-3} starts counts from the end
399	else:
400	i = begin
401	strs = [] # type: List[str]
402	count = 0
403	while i < n:
404	if has_length and count == length: # length could be 0
405	break
406	s = orig[i]
407	if s is not None: # Unset elements don't count towards the length
408	strs.append(s)
409	count += 1
410	i += 1
411
412	result = value.BashArray(strs)
413
414	elif case(value_e.BashAssoc):
415	e_die("Can't slice associative arrays", loc.WordPart(part))
416
417	else:
418	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
419	loc.WordPart(part))
420
421	return result
422
423
424	class StringWordEvaluator(object):
425	"""Interface used by ArithEvaluator / BoolEvaluator"""
426
427	def __init__(self):
428	# type: () -> None
429	"""Empty constructor for mycpp."""
430	pass
431
432	def EvalWordToString(self, w, eval_flags=0):
433	# type: (word_t, int) -> value.Str
434	raise NotImplementedError()
435
436
437	def _GetDollarHyphen(exec_opts):
438	# type: (optview.Exec) -> str
439	chars = [] # type: List[str]
440	if exec_opts.interactive():
441	chars.append('i')
442
443	if exec_opts.errexit():
444	chars.append('e')
445	if exec_opts.noglob():
446	chars.append('f')
447	if exec_opts.noexec():
448	chars.append('n')
449	if exec_opts.nounset():
450	chars.append('u')
451	# NO letter for pipefail?
452	if exec_opts.xtrace():
453	chars.append('x')
454	if exec_opts.noclobber():
455	chars.append('C')
456
457	# bash has:
458	# - c for sh -c, i for sh -i (mksh also has this)
459	# - h for hashing (mksh also has this)
460	# - B for brace expansion
461	return ''.join(chars)
462
463
464	class TildeEvaluator(object):
465
466	def __init__(self, mem, exec_opts):
467	# type: (Mem, optview.Exec) -> None
468	self.mem = mem
469	self.exec_opts = exec_opts
470
471	def GetMyHomeDir(self):
472	# type: () -> Optional[str]
473	"""Consult $HOME first, and then make a libc call.
474
475	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
476	#1578.
477	"""
478	# First look up the HOME var, then ask the OS. This is what bash does.
479	val = self.mem.GetValue('HOME')
480	UP_val = val
481	if val.tag() == value_e.Str:
482	val = cast(value.Str, UP_val)
483	return val.s
484	return pyos.GetMyHomeDir()
485
486	def Eval(self, part):
487	# type: (word_part.TildeSub) -> str
488	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
489
490	if part.user_name is None:
491	result = self.GetMyHomeDir()
492	else:
493	result = pyos.GetHomeDir(part.user_name)
494
495	if result is None:
496	if self.exec_opts.strict_tilde():
497	e_die("Error expanding tilde (e.g. invalid user)", part.left)
498	else:
499	# Return ~ or ~user literally
500	result = '~'
501	if part.user_name is not None:
502	result = result + part.user_name # mycpp doesn't have +=
503
504	return result
505
506
507	class AbstractWordEvaluator(StringWordEvaluator):
508	"""Abstract base class for word evaluators.
509
510	Public entry points:
511	EvalWordToString EvalForPlugin EvalRhsWord
512	EvalWordSequence EvalWordSequence2
513	"""
514
515	def __init__(
516	self,
517	mem, # type: state.Mem
518	exec_opts, # type: optview.Exec
519	mutable_opts, # type: state.MutableOpts
520	tilde_ev, # type: TildeEvaluator
521	splitter, # type: SplitContext
522	errfmt, # type: ui.ErrorFormatter
523	):
524	# type: (...) -> None
525	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
526	self.expr_ev = None # type: expr_eval.ExprEvaluator
527	self.prompt_ev = None # type: prompt.Evaluator
528
529	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
530
531	self.tilde_ev = tilde_ev
532
533	self.mem = mem # for $HOME, $1, etc.
534	self.exec_opts = exec_opts # for nounset
535	self.mutable_opts = mutable_opts # for _allow_command_sub
536	self.splitter = splitter
537	self.errfmt = errfmt
538
539	self.globber = glob_.Globber(exec_opts)
540
541	def CheckCircularDeps(self):
542	# type: () -> None
543	raise NotImplementedError()
544
545	def _EvalCommandSub(self, cs_part, quoted):
546	# type: (CommandSub, bool) -> part_value_t
547	"""Abstract since it has a side effect."""
548	raise NotImplementedError()
549
550	def _EvalProcessSub(self, cs_part):
551	# type: (CommandSub) -> part_value_t
552	"""Abstract since it has a side effect."""
553	raise NotImplementedError()
554
555	def _EvalVarNum(self, var_num):
556	# type: (int) -> value_t
557	assert var_num >= 0
558	return self.mem.GetArgNum(var_num)
559
560	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
561	# type: (int, bool, VarSubState) -> value_t
562	"""Evaluate $?
563
564	and so forth
565	"""
566	# $@ is special -- it need to know whether it is in a double quoted
567	# context.
568	#
569	# - If it's $@ in a double quoted context, return an ARRAY.
570	# - If it's $@ in a normal context, return a STRING, which then will be
571	# subject to splitting.
572
573	if op_id in (Id.VSub_At, Id.VSub_Star):
574	argv = self.mem.GetArgv()
575	val = value.BashArray(argv) # type: value_t
576	if op_id == Id.VSub_At:
577	# "$@" evaluates to an array, $@ should be decayed
578	vsub_state.join_array = not quoted
579	else: # $* "$*" are both decayed
580	vsub_state.join_array = True
581
582	elif op_id == Id.VSub_Hyphen:
583	val = value.Str(_GetDollarHyphen(self.exec_opts))
584
585	else:
586	val = self.mem.GetSpecialVar(op_id)
587
588	return val
589
590	def _ApplyTestOp(
591	self,
592	val, # type: value_t
593	op, # type: suffix_op.Unary
594	quoted, # type: bool
595	part_vals, # type: Optional[List[part_value_t]]
596	vtest_place, # type: VTestPlace
597	blame_token, # type: Token
598	):
599	# type: (...) -> bool
600	"""
601	Returns:
602	Whether part_vals was mutated
603
604	${a:-} returns part_value[]
605	${a:+} returns part_value[]
606	${a:?error} returns error word?
607	${a:=} returns part_value[] but also needs self.mem for side effects.
608
609	So I guess it should return part_value[], and then a flag for raising an
610	error, and then a flag for assigning it?
611	The original BracedVarSub will have the name.
612
613	Example of needing multiple part_value[]
614
615	echo X-${a:-'def'"ault"}-X
616
617	We return two part values from the BracedVarSub. Also consider:
618
619	echo ${a:-x"$@"x}
620	"""
621	eval_flags = IS_SUBST
622	if quoted:
623	eval_flags \|= QUOTED
624
625	tok = op.op
626	# NOTE: Splicing part_values is necessary because of code like
627	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
628	# do_glob/do_elide setting.
629	UP_val = val
630	with tagswitch(val) as case:
631	if case(value_e.Undef):
632	is_falsey = True
633
634	elif case(value_e.Str):
635	val = cast(value.Str, UP_val)
636	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
637	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
638	is_falsey = len(val.s) == 0
639	else:
640	is_falsey = False
641
642	elif case(value_e.BashArray):
643	val = cast(value.BashArray, UP_val)
644	# TODO: allow undefined
645	is_falsey = len(val.strs) == 0
646
647	elif case(value_e.BashAssoc):
648	val = cast(value.BashAssoc, UP_val)
649	is_falsey = len(val.d) == 0
650
651	else:
652	# value.Eggex, etc. are all false
653	is_falsey = False
654
655	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
656	if is_falsey:
657	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
658	return True
659	else:
660	return False
661
662	# Inverse of the above.
663	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
664	if is_falsey:
665	return False
666	else:
667	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
668	return True
669
670	# Splice and assign
671	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
672	if is_falsey:
673	# Collect new part vals.
674	assign_part_vals = [] # type: List[part_value_t]
675	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
676	eval_flags)
677	# Append them to out param AND return them.
678	part_vals.extend(assign_part_vals)
679
680	if vtest_place.name is None:
681	# TODO: error context
682	e_die("Can't assign to special variable")
683	else:
684	# NOTE: This decays arrays too! 'shopt -s strict_array' could
685	# avoid it.
686	rhs_str = _DecayPartValuesToString(
687	assign_part_vals, self.splitter.GetJoinChar())
688	if vtest_place.index is None: # using None when no index
689	lval = location.LName(
690	vtest_place.name) # type: sh_lvalue_t
691	else:
692	var_name = vtest_place.name
693	var_index = vtest_place.index
694	UP_var_index = var_index
695
696	with tagswitch(var_index) as case:
697	if case(a_index_e.Int):
698	var_index = cast(a_index.Int, UP_var_index)
699	lval = sh_lvalue.Indexed(
700	var_name, var_index.i, loc.Missing)
701	elif case(a_index_e.Str):
702	var_index = cast(a_index.Str, UP_var_index)
703	lval = sh_lvalue.Keyed(var_name, var_index.s,
704	loc.Missing)
705	else:
706	raise AssertionError()
707
708	state.OshLanguageSetValue(self.mem, lval,
709	value.Str(rhs_str))
710	return True
711
712	else:
713	return False
714
715	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
716	if is_falsey:
717	# The arg is the error message
718	error_part_vals = [] # type: List[part_value_t]
719	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
720	eval_flags)
721	error_str = _DecayPartValuesToString(
722	error_part_vals, self.splitter.GetJoinChar())
723
724	#
725	# Display fancy/helpful error
726	#
727	if vtest_place.name is None:
728	var_name = '???'
729	else:
730	var_name = vtest_place.name
731
732	if 0:
733	# This hint is nice, but looks too noisy for now
734	op_str = lexer.LazyStr(tok)
735	if tok.id == Id.VTest_ColonQMark:
736	why = 'empty or unset'
737	else:
738	why = 'unset'
739
740	self.errfmt.Print_(
741	"Hint: operator %s means a variable can't be %s" %
742	(op_str, why), tok)
743
744	if val.tag() == value_e.Undef:
745	actual = 'unset'
746	else:
747	actual = 'empty'
748
749	if len(error_str):
750	suffix = ': %r' % error_str
751	else:
752	suffix = ''
753	e_die("Var %s is %s%s" % (var_name, actual, suffix),
754	blame_token)
755
756	else:
757	return False
758
759	else:
760	raise AssertionError(tok.id)
761
762	def _Length(self, val, token):
763	# type: (value_t, Token) -> int
764	"""Returns the length of the value, for ${#var}"""
765	UP_val = val
766	with tagswitch(val) as case:
767	if case(value_e.Str):
768	val = cast(value.Str, UP_val)
769	# NOTE: Whether bash counts bytes or chars is affected by LANG
770	# environment variables.
771	# Should we respect that, or another way to select? set -o
772	# count-bytes?
773
774	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
775	try:
776	length = string_ops.CountUtf8Chars(val.s)
777	except error.Strict as e:
778	# Add this here so we don't have to add it so far down the stack.
779	# TODO: It's better to show BOTH this CODE an the actual DATA
780	# somehow.
781	e.location = token
782
783	if self.exec_opts.strict_word_eval():
784	raise
785	else:
786	# NOTE: Doesn't make the command exit with 1; it just returns a
787	# length of -1.
788	self.errfmt.PrettyPrintError(e, prefix='warning: ')
789	return -1
790
791	elif case(value_e.BashArray):
792	val = cast(value.BashArray, UP_val)
793	# There can be empty placeholder values in the array.
794	length = 0
795	for s in val.strs:
796	if s is not None:
797	length += 1
798
799	elif case(value_e.BashAssoc):
800	val = cast(value.BashAssoc, UP_val)
801	length = len(val.d)
802
803	else:
804	raise error.TypeErr(
805	val, "Length op expected Str, BashArray, BashAssoc", token)
806
807	return length
808
809	def _Keys(self, val, token):
810	# type: (value_t, Token) -> value_t
811	"""Return keys of a container, for ${!array[@]}"""
812
813	UP_val = val
814	with tagswitch(val) as case:
815	if case(value_e.BashArray):
816	val = cast(value.BashArray, UP_val)
817	# translation issue: tuple indices not supported in list comprehensions
818	#indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
819	indices = [] # type: List[str]
820	for i, s in enumerate(val.strs):
821	if s is not None:
822	indices.append(str(i))
823	return value.BashArray(indices)
824
825	elif case(value_e.BashAssoc):
826	val = cast(value.BashAssoc, UP_val)
827	assert val.d is not None # for MyPy, so it's not Optional[]
828
829	# BUG: Keys aren't ordered according to insertion!
830	return value.BashArray(val.d.keys())
831
832	else:
833	raise error.TypeErr(val, 'Keys op expected Str', token)
834
835	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
836	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
837	"""Handles indirect expansion like ${!var} and ${!a[0]}.
838
839	Args:
840	blame_tok: 'foo' for ${!foo}
841	"""
842	UP_val = val
843	with tagswitch(val) as case:
844	if case(value_e.Undef):
845	return value.Undef # ${!undef} is just weird bash behavior
846
847	elif case(value_e.Str):
848	val = cast(value.Str, UP_val)
849	bvs_part = self.unsafe_arith.ParseVarRef(val.s, blame_tok)
850	return self._VarRefValue(bvs_part, quoted, vsub_state,
851	vtest_place)
852
853	elif case(value_e.BashArray): # caught earlier but OK
854	e_die('Indirect expansion of array')
855
856	elif case(value_e.BashAssoc): # caught earlier but OK
857	e_die('Indirect expansion of assoc array')
858
859	else:
860	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
861
862	def _ApplyUnarySuffixOp(self, val, op):
863	# type: (value_t, suffix_op.Unary) -> value_t
864	assert val.tag() != value_e.Undef
865
866	op_kind = consts.GetKind(op.op.id)
867
868	if op_kind == Kind.VOp1:
869	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
870	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
871	# shortcut for constant strings.
872	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
873	assert arg_val.tag() == value_e.Str
874
875	UP_val = val
876	with tagswitch(val) as case:
877	if case(value_e.Str):
878	val = cast(value.Str, UP_val)
879	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
880	has_extglob)
881	#log('%r %r -> %r', val.s, arg_val.s, s)
882	new_val = value.Str(s) # type: value_t
883
884	elif case(value_e.BashArray):
885	val = cast(value.BashArray, UP_val)
886	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
887	strs = [] # type: List[str]
888	for s in val.strs:
889	if s is not None:
890	strs.append(
891	string_ops.DoUnarySuffixOp(
892	s, op.op, arg_val.s, has_extglob))
893	new_val = value.BashArray(strs)
894
895	elif case(value_e.BashAssoc):
896	val = cast(value.BashAssoc, UP_val)
897	strs = []
898	for s in val.d.values():
899	strs.append(
900	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
901	has_extglob))
902	new_val = value.BashArray(strs)
903
904	else:
905	raise error.TypeErr(
906	val, 'Unary op expected Str, BashArray, BashAssoc',
907	op.op)
908
909	else:
910	raise AssertionError(Kind_str(op_kind))
911
912	return new_val
913
914	def _PatSub(self, val, op):
915	# type: (value_t, suffix_op.PatSub) -> value_t
916
917	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
918	# Extended globs aren't supported because we only translate * ? etc. to
919	# ERE. I don't think there's a straightforward translation from !(*.py) to
920	# ERE! You would need an engine that supports negation? (Derivatives?)
921	if has_extglob:
922	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
923
924	if op.replace:
925	replace_val = self.EvalRhsWord(op.replace)
926	# Can't have an array, so must be a string
927	assert replace_val.tag() == value_e.Str, replace_val
928	replace_str = cast(value.Str, replace_val).s
929	else:
930	replace_str = ''
931
932	# note: doesn't support self.exec_opts.extglob()!
933	regex, warnings = glob_.GlobToERE(pat_val.s)
934	if len(warnings):
935	# TODO:
936	# - Add 'shopt -s strict_glob' mode and expose warnings.
937	# "Glob is not in CANONICAL FORM".
938	# - Propagate location info back to the 'op.pat' word.
939	pass
940	#log('regex %r', regex)
941	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
942
943	with tagswitch(val) as case2:
944	if case2(value_e.Str):
945	str_val = cast(value.Str, val)
946	s = replacer.Replace(str_val.s, op)
947	val = value.Str(s)
948
949	elif case2(value_e.BashArray):
950	array_val = cast(value.BashArray, val)
951	strs = [] # type: List[str]
952	for s in array_val.strs:
953	if s is not None:
954	strs.append(replacer.Replace(s, op))
955	val = value.BashArray(strs)
956
957	elif case2(value_e.BashAssoc):
958	assoc_val = cast(value.BashAssoc, val)
959	strs = []
960	for s in assoc_val.d.values():
961	strs.append(replacer.Replace(s, op))
962	val = value.BashArray(strs)
963
964	else:
965	raise error.TypeErr(
966	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
967	op.slash_tok)
968
969	return val
970
971	def _Slice(self, val, op, var_name, part):
972	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
973
974	begin = self.arith_ev.EvalToInt(op.begin)
975
976	# Note: bash allows lengths to be negative (with odd semantics), but
977	# we don't allow that right now.
978	has_length = False
979	length = -1
980	if op.length:
981	has_length = True
982	length = self.arith_ev.EvalToInt(op.length)
983
984	try:
985	arg0_val = None # type: value.Str
986	if var_name is None: # $* or $@
987	arg0_val = self.mem.GetArg0()
988	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
989	except error.Strict as e:
990	if self.exec_opts.strict_word_eval():
991	raise
992	else:
993	self.errfmt.PrettyPrintError(e, prefix='warning: ')
994	with tagswitch(val) as case2:
995	if case2(value_e.Str):
996	val = value.Str('')
997	elif case2(value_e.BashArray):
998	val = value.BashArray([])
999	else:
1000	raise NotImplementedError()
1001	return val
1002
1003	def _Nullary(self, val, op, var_name):
1004	# type: (value_t, Token, Optional[str]) -> Tuple[value.Str, bool]
1005
1006	UP_val = val
1007	quoted2 = False
1008	op_id = op.id
1009	if op_id == Id.VOp0_P:
1010	with tagswitch(val) as case:
1011	if case(value_e.Str):
1012	str_val = cast(value.Str, UP_val)
1013	prompt = self.prompt_ev.EvalPrompt(str_val)
1014	# readline gets rid of these, so we should too.
1015	p = prompt.replace('\x01', '').replace('\x02', '')
1016	result = value.Str(p)
1017	else:
1018	e_die("Can't use @P on %s" % ui.ValType(val), op)
1019
1020	elif op_id == Id.VOp0_Q:
1021	with tagswitch(val) as case:
1022	if case(value_e.Str):
1023	str_val = cast(value.Str, UP_val)
1024
1025	# TODO: use fastfunc.ShellEncode or
1026	# fastfunc.PosixShellEncode()
1027	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1028	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in bash
1029	quoted2 = True
1030	elif case(value_e.BashArray):
1031	array_val = cast(value.BashArray, UP_val)
1032
1033	# TODO: should use fastfunc.ShellEncode
1034	tmp = [j8_lite.MaybeShellEncode(s) for s in array_val.strs]
1035	result = value.Str(' '.join(tmp))
1036	else:
1037	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1038
1039	elif op_id == Id.VOp0_a:
1040	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1041	# spec/ble-idioms.test.sh.
1042	chars = [] # type: List[str]
1043	with tagswitch(val) as case:
1044	if case(value_e.BashArray):
1045	chars.append('a')
1046	elif case(value_e.BashAssoc):
1047	chars.append('A')
1048
1049	if var_name is not None: # e.g. ${?@a} is allowed
1050	cell = self.mem.GetCell(var_name)
1051	if cell:
1052	if cell.readonly:
1053	chars.append('r')
1054	if cell.exported:
1055	chars.append('x')
1056	if cell.nameref:
1057	chars.append('n')
1058
1059	result = value.Str(''.join(chars))
1060
1061	else:
1062	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1063
1064	return result, quoted2
1065
1066	def _WholeArray(self, val, part, quoted, vsub_state):
1067	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1068	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1069
1070	if op_id == Id.Lit_At:
1071	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1072	UP_val = val
1073	with tagswitch(val) as case2:
1074	if case2(value_e.Undef):
1075	if not vsub_state.has_test_op:
1076	val = self._EmptyBashArrayOrError(part.token)
1077	elif case2(value_e.Str):
1078	if self.exec_opts.strict_array():
1079	e_die("Can't index string with @", loc.WordPart(part))
1080	elif case2(value_e.BashArray):
1081	pass # no-op
1082
1083	elif op_id == Id.Arith_Star:
1084	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1085	UP_val = val
1086	with tagswitch(val) as case2:
1087	if case2(value_e.Undef):
1088	if not vsub_state.has_test_op:
1089	val = self._EmptyBashArrayOrError(part.token)
1090	elif case2(value_e.Str):
1091	if self.exec_opts.strict_array():
1092	e_die("Can't index string with *", loc.WordPart(part))
1093	elif case2(value_e.BashArray):
1094	pass # no-op
1095
1096	else:
1097	raise AssertionError(op_id) # unknown
1098
1099	return val
1100
1101	def _ArrayIndex(self, val, part, vtest_place):
1102	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1103	"""Process a numeric array index like ${a[i+1]}"""
1104	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1105
1106	UP_val = val
1107	with tagswitch(val) as case2:
1108	if case2(value_e.Undef):
1109	pass # it will be checked later
1110
1111	elif case2(value_e.Str):
1112	# Bash treats any string as an array, so we can't add our own
1113	# behavior here without making valid OSH invalid bash.
1114	e_die("Can't index string %r with integer" % part.var_name,
1115	part.token)
1116
1117	elif case2(value_e.BashArray):
1118	array_val = cast(value.BashArray, UP_val)
1119	index = self.arith_ev.EvalToInt(anode)
1120	vtest_place.index = a_index.Int(index)
1121
1122	s = GetArrayItem(array_val.strs, index)
1123
1124	if s is None:
1125	val = value.Undef
1126	else:
1127	val = value.Str(s)
1128
1129	elif case2(value_e.BashAssoc):
1130	assoc_val = cast(value.BashAssoc, UP_val)
1131	# Location could also be attached to bracket_op? But
1132	# arith_expr.VarSub works OK too
1133	key = self.arith_ev.EvalWordToString(
1134	anode, blame_loc=location.TokenForArith(anode))
1135
1136	vtest_place.index = a_index.Str(key) # out param
1137	s = assoc_val.d.get(key)
1138
1139	if s is None:
1140	val = value.Undef
1141	else:
1142	val = value.Str(s)
1143
1144	else:
1145	raise error.TypeErr(val,
1146	'Index op expected BashArray, BashAssoc',
1147	loc.WordPart(part))
1148
1149	return val
1150
1151	def _EvalDoubleQuoted(self, parts, part_vals):
1152	# type: (List[word_part_t], List[part_value_t]) -> None
1153	"""Evaluate parts of a DoubleQuoted part.
1154
1155	Args:
1156	part_vals: output param to append to.
1157	"""
1158	# Example of returning array:
1159	# $ a=(1 2); b=(3); $ c=(4 5)
1160	# $ argv "${a[@]}${b[@]}${c[@]}"
1161	# ['1', '234', '5']
1162	#
1163	# Example of multiple parts
1164	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1165	# ['1', '24', '5']
1166
1167	# Special case for "". The parser outputs (DoubleQuoted []), instead
1168	# of (DoubleQuoted [Literal '']). This is better but it means we
1169	# have to check for it.
1170	if len(parts) == 0:
1171	v = Piece('', True, False)
1172	part_vals.append(v)
1173	return
1174
1175	for p in parts:
1176	self._EvalWordPart(p, part_vals, QUOTED)
1177
1178	def EvalDoubleQuotedToString(self, dq_part):
1179	# type: (DoubleQuoted) -> str
1180	"""For double quoted strings in YSH expressions.
1181
1182	Example: var x = "$foo-${foo}"
1183	"""
1184	part_vals = [] # type: List[part_value_t]
1185	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1186	return self._ConcatPartVals(part_vals, dq_part.left)
1187
1188	def _DecayArray(self, val):
1189	# type: (value.BashArray) -> value.Str
1190	"""Decay $* to a string."""
1191	assert val.tag() == value_e.BashArray, val
1192	sep = self.splitter.GetJoinChar()
1193	tmp = [s for s in val.strs if s is not None]
1194	return value.Str(sep.join(tmp))
1195
1196	def _EmptyStrOrError(self, val, token):
1197	# type: (value_t, Token) -> value_t
1198	if val.tag() != value_e.Undef:
1199	return val
1200
1201	if not self.exec_opts.nounset():
1202	return value.Str('')
1203
1204	tok_str = lexer.TokenVal(token)
1205	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1206	e_die('Undefined variable %r' % name, token)
1207
1208	def _EmptyBashArrayOrError(self, token):
1209	# type: (Token) -> value_t
1210	assert token is not None
1211	if self.exec_opts.nounset():
1212	e_die('Undefined array %r' % lexer.TokenVal(token), token)
1213	else:
1214	return value.BashArray([])
1215
1216	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1217	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1218
1219	if part.bracket_op:
1220	with tagswitch(part.bracket_op) as case:
1221	if case(bracket_op_e.WholeArray):
1222	val = self._WholeArray(val, part, quoted, vsub_state)
1223
1224	elif case(bracket_op_e.ArrayIndex):
1225	val = self._ArrayIndex(val, part, vtest_place)
1226
1227	else:
1228	raise AssertionError(part.bracket_op.tag())
1229
1230	else: # no bracket op
1231	var_name = vtest_place.name
1232	if (var_name is not None and
1233	val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1234	not vsub_state.is_type_query):
1235	if ShouldArrayDecay(var_name, self.exec_opts,
1236	not (part.prefix_op or part.suffix_op)):
1237	# for ${BASH_SOURCE}, etc.
1238	val = DecayArray(val)
1239	else:
1240	e_die(
1241	"Array %r can't be referred to as a scalar (without @ or *)"
1242	% var_name, loc.WordPart(part))
1243
1244	return val
1245
1246	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1247	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1248	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1249	value_t."""
1250
1251	# 1. Evaluate from (var_name, var_num, token Id) -> value
1252	if part.token.id == Id.VSub_Name:
1253	vtest_place.name = part.var_name
1254	val = self.mem.GetValue(part.var_name)
1255
1256	elif part.token.id == Id.VSub_Number:
1257	var_num = int(part.var_name)
1258	val = self._EvalVarNum(var_num)
1259
1260	else:
1261	# $* decays
1262	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1263
1264	# We don't need var_index because it's only for L-Values of test ops?
1265	if self.exec_opts.eval_unsafe_arith():
1266	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1267	vtest_place)
1268	else:
1269	with state.ctx_Option(self.mutable_opts,
1270	[option_i._allow_command_sub], False):
1271	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1272	vtest_place)
1273
1274	return val
1275
1276	def _EvalBracedVarSub(self, part, part_vals, quoted):
1277	# type: (BracedVarSub, List[part_value_t], bool) -> None
1278	"""
1279	Args:
1280	part_vals: output param to append to.
1281	"""
1282	# We have different operators that interact in a non-obvious order.
1283	#
1284	# 1. bracket_op: value -> value, with side effect on vsub_state
1285	#
1286	# 2. prefix_op
1287	# a. length ${#x}: value -> value
1288	# b. var ref ${!ref}: can expand to an array
1289	#
1290	# 3. suffix_op:
1291	# a. no operator: you have a value
1292	# b. Test: value -> part_value[]
1293	# c. Other Suffix: value -> value
1294	#
1295	# 4. Process vsub_state.join_array here before returning.
1296	#
1297	# These cases are hard to distinguish:
1298	# - ${!prefix@} prefix query
1299	# - ${!array[@]} keys
1300	# - ${!ref} named reference
1301	# - ${!ref[0]} named reference
1302	#
1303	# I think we need several stages:
1304	#
1305	# 1. value: name, number, special, prefix query
1306	# 2. bracket_op
1307	# 3. prefix length -- this is TERMINAL
1308	# 4. indirection? Only for some of the ! cases
1309	# 5. string transformation suffix ops like ##
1310	# 6. test op
1311	# 7. vsub_state.join_array
1312
1313	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1314	# suffix ops are applied. If we take the length with a prefix op, the
1315	# distinction is ignored.
1316
1317	var_name = None # type: Optional[str] # used throughout the function
1318	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1319	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1320
1321	# 1. Evaluate from (var_name, var_num, token Id) -> value
1322	if part.token.id == Id.VSub_Name:
1323	# Handle ${!prefix@} first, since that looks at names and not values
1324	# Do NOT handle ${!A[@]@a} here!
1325	if (part.prefix_op is not None and part.bracket_op is None and
1326	part.suffix_op is not None and
1327	part.suffix_op.tag() == suffix_op_e.Nullary):
1328	nullary_op = cast(Token, part.suffix_op)
1329	# ${!x@} but not ${!x@P}
1330	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1331	names = self.mem.VarNamesStartingWith(part.var_name)
1332	names.sort()
1333
1334	if quoted and nullary_op.id == Id.VOp3_At:
1335	part_vals.append(part_value.Array(names))
1336	else:
1337	sep = self.splitter.GetJoinChar()
1338	part_vals.append(Piece(sep.join(names), quoted, True))
1339	return # EARLY RETURN
1340
1341	var_name = part.var_name
1342	vtest_place.name = var_name # for _ApplyTestOp
1343
1344	val = self.mem.GetValue(var_name)
1345
1346	elif part.token.id == Id.VSub_Number:
1347	var_num = int(part.var_name)
1348	val = self._EvalVarNum(var_num)
1349	else:
1350	# $* decays
1351	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1352
1353	suffix_op_ = part.suffix_op
1354	if suffix_op_:
1355	UP_op = suffix_op_
1356	with tagswitch(suffix_op_) as case:
1357	if case(suffix_op_e.Nullary):
1358	suffix_op_ = cast(Token, UP_op)
1359
1360	# Type query ${array@a} is a STRING, not an array
1361	# NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1362	# ${array[@]@Q}
1363	if suffix_op_.id == Id.VOp0_a:
1364	vsub_state.is_type_query = True
1365
1366	elif case(suffix_op_e.Unary):
1367	suffix_op_ = cast(suffix_op.Unary, UP_op)
1368
1369	# Do the _EmptyStrOrError/_EmptyBashArrayOrError up front, EXCEPT in
1370	# the case of Kind.VTest
1371	if consts.GetKind(suffix_op_.op.id) == Kind.VTest:
1372	vsub_state.has_test_op = True
1373
1374	# 2. Bracket Op
1375	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1376
1377	if part.prefix_op:
1378	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1379	if not vsub_state.has_test_op: # undef -> '' BEFORE length
1380	val = self._EmptyStrOrError(val, part.token)
1381
1382	n = self._Length(val, part.token)
1383	part_vals.append(Piece(str(n), quoted, False))
1384	return # EARLY EXIT: nothing else can come after length
1385
1386	elif part.prefix_op.id == Id.VSub_Bang:
1387	if (part.bracket_op and
1388	part.bracket_op.tag() == bracket_op_e.WholeArray):
1389	if vsub_state.has_test_op:
1390	# ${!a[@]-'default'} is a non-fatal runtime error in bash. Here
1391	# it's fatal.
1392	op_tok = cast(suffix_op.Unary, UP_op).op
1393	e_die('Test operation not allowed with ${!array[@]}',
1394	op_tok)
1395
1396	# ${!array[@]} to get indices/keys
1397	val = self._Keys(val, part.token)
1398	# already set vsub_State.join_array ABOVE
1399	else:
1400	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1401	# ${!a[@]} !
1402	# ${!ref} can expand into an array if ref='array[@]'
1403
1404	# Clear it now that we have a var ref
1405	vtest_place.name = None
1406	vtest_place.index = None
1407
1408	val = self._EvalVarRef(val, part.token, quoted, vsub_state,
1409	vtest_place)
1410
1411	if not vsub_state.has_test_op: # undef -> '' AFTER indirection
1412	val = self._EmptyStrOrError(val, part.token)
1413
1414	else:
1415	raise AssertionError(part.prefix_op)
1416
1417	else:
1418	if not vsub_state.has_test_op: # undef -> '' if no prefix op
1419	val = self._EmptyStrOrError(val, part.token)
1420
1421	quoted2 = False # another bit for @Q
1422	if suffix_op_:
1423	op = suffix_op_ # could get rid of this alias
1424
1425	with tagswitch(suffix_op_) as case:
1426	if case(suffix_op_e.Nullary):
1427	op = cast(Token, UP_op)
1428	val, quoted2 = self._Nullary(val, op, var_name)
1429
1430	elif case(suffix_op_e.Unary):
1431	op = cast(suffix_op.Unary, UP_op)
1432	if consts.GetKind(op.op.id) == Kind.VTest:
1433	if self._ApplyTestOp(val, op, quoted, part_vals,
1434	vtest_place, part.token):
1435	# e.g. to evaluate ${undef:-'default'}, we already appended
1436	# what we need
1437	return
1438
1439	else:
1440	# Other suffix: value -> value
1441	val = self._ApplyUnarySuffixOp(val, op)
1442
1443	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1444	op = cast(suffix_op.PatSub, UP_op)
1445	val = self._PatSub(val, op)
1446
1447	elif case(suffix_op_e.Slice):
1448	op = cast(suffix_op.Slice, UP_op)
1449	val = self._Slice(val, op, var_name, part)
1450
1451	elif case(suffix_op_e.Static):
1452	op = cast(suffix_op.Static, UP_op)
1453	e_die('Not implemented', op.tok)
1454
1455	else:
1456	raise AssertionError()
1457
1458	# After applying suffixes, process join_array here.
1459	UP_val = val
1460	if val.tag() == value_e.BashArray:
1461	array_val = cast(value.BashArray, UP_val)
1462	if vsub_state.join_array:
1463	val = self._DecayArray(array_val)
1464	else:
1465	val = array_val
1466
1467	# For example, ${a} evaluates to value.Str(), but we want a
1468	# Piece().
1469	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1470	part_vals.append(part_val)
1471
1472	def _ConcatPartVals(self, part_vals, location):
1473	# type: (List[part_value_t], loc_t) -> str
1474
1475	strs = [] # type: List[str]
1476	for part_val in part_vals:
1477	UP_part_val = part_val
1478	with tagswitch(part_val) as case:
1479	if case(part_value_e.String):
1480	part_val = cast(Piece, UP_part_val)
1481	s = part_val.s
1482
1483	elif case(part_value_e.Array):
1484	part_val = cast(part_value.Array, UP_part_val)
1485	if self.exec_opts.strict_array():
1486	# Examples: echo f > "$@"; local foo="$@"
1487	e_die("Illegal array word part (strict_array)",
1488	location)
1489	else:
1490	# It appears to not respect IFS
1491	# TODO: eliminate double join()?
1492	tmp = [s for s in part_val.strs if s is not None]
1493	s = ' '.join(tmp)
1494
1495	else:
1496	raise AssertionError()
1497
1498	strs.append(s)
1499
1500	return ''.join(strs)
1501
1502	def EvalBracedVarSubToString(self, part):
1503	# type: (BracedVarSub) -> str
1504	"""For double quoted strings in YSH expressions.
1505
1506	Example: var x = "$foo-${foo}"
1507	"""
1508	part_vals = [] # type: List[part_value_t]
1509	self._EvalBracedVarSub(part, part_vals, False)
1510	# blame ${ location
1511	return self._ConcatPartVals(part_vals, part.left)
1512
1513	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1514	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1515
1516	token = part.tok
1517
1518	vsub_state = VarSubState.CreateNull()
1519
1520	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1521	if token.id == Id.VSub_DollarName:
1522	var_name = lexer.LazyStr(token)
1523	# TODO: Special case for LINENO
1524	val = self.mem.GetValue(var_name)
1525	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1526	if ShouldArrayDecay(var_name, self.exec_opts):
1527	# for $BASH_SOURCE, etc.
1528	val = DecayArray(val)
1529	else:
1530	e_die(
1531	"Array %r can't be referred to as a scalar (without @ or *)"
1532	% var_name, token)
1533
1534	elif token.id == Id.VSub_Number:
1535	var_num = int(lexer.LazyStr(token))
1536	val = self._EvalVarNum(var_num)
1537
1538	else:
1539	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1540
1541	#log('SIMPLE %s', part)
1542	val = self._EmptyStrOrError(val, token)
1543	UP_val = val
1544	if val.tag() == value_e.BashArray:
1545	array_val = cast(value.BashArray, UP_val)
1546	if vsub_state.join_array:
1547	val = self._DecayArray(array_val)
1548	else:
1549	val = array_val
1550
1551	v = _ValueToPartValue(val, quoted, part)
1552	part_vals.append(v)
1553
1554	def EvalSimpleVarSubToString(self, node):
1555	# type: (SimpleVarSub) -> str
1556	"""For double quoted strings in YSH expressions.
1557
1558	Example: var x = "$foo-${foo}"
1559	"""
1560	part_vals = [] # type: List[part_value_t]
1561	self._EvalSimpleVarSub(node, part_vals, False)
1562	return self._ConcatPartVals(part_vals, node.tok)
1563
1564	def _EvalExtGlob(self, part, part_vals):
1565	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1566	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1567	op = part.op
1568	if op.id == Id.ExtGlob_Comma:
1569	op_str = '@('
1570	else:
1571	op_str = lexer.LazyStr(op)
1572	# Do NOT split these.
1573	part_vals.append(Piece(op_str, False, False))
1574
1575	for i, w in enumerate(part.arms):
1576	if i != 0:
1577	part_vals.append(Piece('\|', False, False)) # separator
1578	# FLATTEN the tree of extglob "arms".
1579	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1580	part_vals.append(Piece(')', False, False)) # closing )
1581
1582	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1583	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1584	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1585
1586	We need both glob and fnmatch patterns. _EvalExtGlob does the
1587	flattening.
1588	"""
1589	for i, part_val in enumerate(part_vals):
1590	UP_part_val = part_val
1591	with tagswitch(part_val) as case:
1592	if case(part_value_e.String):
1593	part_val = cast(Piece, UP_part_val)
1594	if part_val.quoted and not self.exec_opts.noglob():
1595	s = glob_.GlobEscape(part_val.s)
1596	else:
1597	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1598	s = part_val.s
1599	glob_parts.append(s)
1600	fnmatch_parts.append(s) # from _EvalExtGlob()
1601
1602	elif case(part_value_e.Array):
1603	# Disallow array
1604	e_die(
1605	"Extended globs and arrays can't appear in the same word",
1606	w)
1607
1608	elif case(part_value_e.ExtGlob):
1609	part_val = cast(part_value.ExtGlob, UP_part_val)
1610	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1611	self._TranslateExtGlob(part_val.part_vals, w, [],
1612	fnmatch_parts)
1613	glob_parts.append('*')
1614
1615	else:
1616	raise AssertionError()
1617
1618	def _EvalWordPart(self, part, part_vals, flags):
1619	# type: (word_part_t, List[part_value_t], int) -> None
1620	"""Evaluate a word part, appending to part_vals
1621
1622	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1623	"""
1624	quoted = bool(flags & QUOTED)
1625	is_subst = bool(flags & IS_SUBST)
1626
1627	UP_part = part
1628	with tagswitch(part) as case:
1629	if case(word_part_e.ShArrayLiteral):
1630	part = cast(ShArrayLiteral, UP_part)
1631	e_die("Unexpected array literal", loc.WordPart(part))
1632	elif case(word_part_e.BashAssocLiteral):
1633	part = cast(word_part.BashAssocLiteral, UP_part)
1634	e_die("Unexpected associative array literal",
1635	loc.WordPart(part))
1636
1637	elif case(word_part_e.Literal):
1638	part = cast(Token, UP_part)
1639	# Split if it's in a substitution.
1640	# That is: echo is not split, but ${foo:-echo} is split
1641	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1642	part_vals.append(v)
1643
1644	elif case(word_part_e.EscapedLiteral):
1645	part = cast(word_part.EscapedLiteral, UP_part)
1646	v = Piece(part.ch, True, False)
1647	part_vals.append(v)
1648
1649	elif case(word_part_e.SingleQuoted):
1650	part = cast(SingleQuoted, UP_part)
1651	v = Piece(part.sval, True, False)
1652	part_vals.append(v)
1653
1654	elif case(word_part_e.DoubleQuoted):
1655	part = cast(DoubleQuoted, UP_part)
1656	self._EvalDoubleQuoted(part.parts, part_vals)
1657
1658	elif case(word_part_e.CommandSub):
1659	part = cast(CommandSub, UP_part)
1660	id_ = part.left_token.id
1661	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1662	Id.Left_Backtick):
1663	sv = self._EvalCommandSub(part,
1664	quoted) # type: part_value_t
1665
1666	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1667	sv = self._EvalProcessSub(part)
1668
1669	else:
1670	raise AssertionError(id_)
1671
1672	part_vals.append(sv)
1673
1674	elif case(word_part_e.SimpleVarSub):
1675	part = cast(SimpleVarSub, UP_part)
1676	self._EvalSimpleVarSub(part, part_vals, quoted)
1677
1678	elif case(word_part_e.BracedVarSub):
1679	part = cast(BracedVarSub, UP_part)
1680	self._EvalBracedVarSub(part, part_vals, quoted)
1681
1682	elif case(word_part_e.TildeSub):
1683	part = cast(word_part.TildeSub, UP_part)
1684	# We never parse a quoted string into a TildeSub.
1685	assert not quoted
1686	s = self.tilde_ev.Eval(part)
1687	v = Piece(s, True, False) # NOT split even when unquoted!
1688	part_vals.append(v)
1689
1690	elif case(word_part_e.ArithSub):
1691	part = cast(word_part.ArithSub, UP_part)
1692	num = self.arith_ev.EvalToBigInt(part.anode)
1693	v = Piece(mops.ToStr(num), quoted, not quoted)
1694	part_vals.append(v)
1695
1696	elif case(word_part_e.ExtGlob):
1697	part = cast(word_part.ExtGlob, UP_part)
1698	#if not self.exec_opts.extglob():
1699	# die() # disallow at runtime? Don't just decay
1700
1701	# Create a node to hold the flattened tree. The caller decides whether
1702	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1703	part_vals2 = [] # type: List[part_value_t]
1704	self._EvalExtGlob(part, part_vals2) # flattens tree
1705	part_vals.append(part_value.ExtGlob(part_vals2))
1706
1707	elif case(word_part_e.BashRegexGroup):
1708	part = cast(word_part.BashRegexGroup, UP_part)
1709
1710	part_vals.append(Piece('(', False, False)) # not quoted
1711	if part.child:
1712	self._EvalWordToParts(part.child, part_vals, 0)
1713	part_vals.append(Piece(')', False, False))
1714
1715	elif case(word_part_e.Splice):
1716	part = cast(word_part.Splice, UP_part)
1717	val = self.mem.GetValue(part.var_name)
1718
1719	strs = self.expr_ev.SpliceValue(val, part)
1720	part_vals.append(part_value.Array(strs))
1721
1722	elif case(word_part_e.ExprSub):
1723	part = cast(word_part.ExprSub, UP_part)
1724	part_val = self.expr_ev.EvalExprSub(part)
1725	part_vals.append(part_val)
1726
1727	elif case(word_part_e.ZshVarSub):
1728	part = cast(word_part.ZshVarSub, UP_part)
1729	e_die("ZSH var subs are parsed, but can't be evaluated",
1730	part.left)
1731
1732	else:
1733	raise AssertionError(part.tag())
1734
1735	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1736	# type: (rhs_word_t, List[part_value_t], int) -> None
1737	quoted = bool(eval_flags & QUOTED)
1738
1739	UP_w = w
1740	with tagswitch(w) as case:
1741	if case(rhs_word_e.Empty):
1742	part_vals.append(Piece('', quoted, not quoted))
1743
1744	elif case(rhs_word_e.Compound):
1745	w = cast(CompoundWord, UP_w)
1746	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1747
1748	else:
1749	raise AssertionError()
1750
1751	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1752	# type: (CompoundWord, List[part_value_t], int) -> None
1753	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1754
1755	Returns:
1756	Appends to part_vals. Note that this is a TREE.
1757	"""
1758	# Does the word have an extended glob? This is a special case because
1759	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1760	# implement extended globs. It's hard to carry that extra information
1761	# all the way past the word splitting stage.
1762
1763	# OSH semantic limitations: If a word has an extended glob part, then
1764	# 1. It can't have an array
1765	# 2. Word splitting of unquoted words isn't respected
1766
1767	word_part_vals = [] # type: List[part_value_t]
1768	has_extglob = False
1769	for p in w.parts:
1770	if p.tag() == word_part_e.ExtGlob:
1771	has_extglob = True
1772	self._EvalWordPart(p, word_part_vals, eval_flags)
1773
1774	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1775	if has_extglob:
1776	if bool(eval_flags & EXTGLOB_FILES):
1777	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1778	# word because of the way we use libc:
1779	# 1. With '*' for extglob parts
1780	# 2. With _EvalExtGlob() for extglob parts
1781
1782	glob_parts = [] # type: List[str]
1783	fnmatch_parts = [] # type: List[str]
1784	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1785	fnmatch_parts)
1786
1787	#log('word_part_vals %s', word_part_vals)
1788	glob_pat = ''.join(glob_parts)
1789	fnmatch_pat = ''.join(fnmatch_parts)
1790	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1791
1792	results = [] # type: List[str]
1793	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1794	if n < 0:
1795	raise error.FailGlob(
1796	'Extended glob %r matched no files' % fnmatch_pat, w)
1797
1798	part_vals.append(part_value.Array(results))
1799	elif bool(eval_flags & EXTGLOB_NESTED):
1800	# We only glob at the TOP level of @(nested\|@(pattern))
1801	part_vals.extend(word_part_vals)
1802	else:
1803	# e.g. simple_word_eval, assignment builtin
1804	e_die('Extended glob not allowed in this word', w)
1805	else:
1806	part_vals.extend(word_part_vals)
1807
1808	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1809	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1810	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1811
1812	Note: arg 'w' could just be a span ID
1813	"""
1814	for part_val in part_vals:
1815	UP_part_val = part_val
1816	with tagswitch(part_val) as case:
1817	if case(part_value_e.String):
1818	part_val = cast(Piece, UP_part_val)
1819	s = part_val.s
1820	if part_val.quoted:
1821	if eval_flags & QUOTE_FNMATCH:
1822	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1823	s = glob_.GlobEscape(s)
1824	elif eval_flags & QUOTE_ERE:
1825	s = glob_.ExtendedRegexEscape(s)
1826	strs.append(s)
1827
1828	elif case(part_value_e.Array):
1829	part_val = cast(part_value.Array, UP_part_val)
1830	if self.exec_opts.strict_array():
1831	# Examples: echo f > "$@"; local foo="$@"
1832
1833	# TODO: This attributes too coarsely, to the word rather than the
1834	# parts. Problem: the word is a TREE of parts, but we only have a
1835	# flat list of part_vals. The only case where we really get arrays
1836	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1837	e_die(
1838	"This word should yield a string, but it contains an array",
1839	w)
1840
1841	# TODO: Maybe add detail like this.
1842	#e_die('RHS of assignment should only have strings. '
1843	# 'To assign arrays, use b=( "${a[@]}" )')
1844	else:
1845	# It appears to not respect IFS
1846	tmp = [s for s in part_val.strs if s is not None]
1847	s = ' '.join(tmp) # TODO: eliminate double join()?
1848	strs.append(s)
1849
1850	elif case(part_value_e.ExtGlob):
1851	part_val = cast(part_value.ExtGlob, UP_part_val)
1852
1853	# Extended globs are only allowed where we expect them!
1854	if not bool(eval_flags & QUOTE_FNMATCH):
1855	e_die('extended glob not allowed in this word', w)
1856
1857	# recursive call
1858	self._PartValsToString(part_val.part_vals, w, eval_flags,
1859	strs)
1860
1861	else:
1862	raise AssertionError()
1863
1864	def EvalWordToString(self, UP_w, eval_flags=0):
1865	# type: (word_t, int) -> value.Str
1866	"""Given a word, return a string.
1867
1868	Flags can contain a quoting algorithm.
1869	"""
1870	assert UP_w.tag() == word_e.Compound, UP_w
1871	w = cast(CompoundWord, UP_w)
1872
1873	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1874	fast_str = word_.FastStrEval(w)
1875	if fast_str is not None:
1876	return value.Str(fast_str)
1877
1878	# Could we additionally optimize a=$b, if we know $b isn't an array
1879	# etc.?
1880
1881	# Note: these empty lists are hot in fib benchmark
1882
1883	part_vals = [] # type: List[part_value_t]
1884	for p in w.parts:
1885	# this doesn't use eval_flags, which is slightly confusing
1886	self._EvalWordPart(p, part_vals, 0)
1887
1888	strs = [] # type: List[str]
1889	self._PartValsToString(part_vals, w, eval_flags, strs)
1890	return value.Str(''.join(strs))
1891
1892	def EvalWordToPattern(self, UP_w):
1893	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1894	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1895	if UP_w.tag() == rhs_word_e.Empty:
1896	return value.Str(''), False
1897
1898	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1899	w = cast(CompoundWord, UP_w)
1900
1901	has_extglob = False
1902	part_vals = [] # type: List[part_value_t]
1903	for p in w.parts:
1904	# this doesn't use eval_flags, which is slightly confusing
1905	self._EvalWordPart(p, part_vals, 0)
1906	if p.tag() == word_part_e.ExtGlob:
1907	has_extglob = True
1908
1909	strs = [] # type: List[str]
1910	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1911	return value.Str(''.join(strs)), has_extglob
1912
1913	def EvalForPlugin(self, w):
1914	# type: (CompoundWord) -> value.Str
1915	"""Wrapper around EvalWordToString that prevents errors.
1916
1917	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
1918	are handled here.
1919
1920	Similar to ExprEvaluator.PluginCall().
1921	"""
1922	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
1923	try:
1924	val = self.EvalWordToString(w)
1925	except error.FatalRuntime as e:
1926	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
1927
1928	except (IOError, OSError) as e:
1929	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
1930
1931	except KeyboardInterrupt:
1932	val = value.Str('<Ctrl-C>')
1933
1934	return val
1935
1936	def EvalRhsWord(self, UP_w):
1937	# type: (rhs_word_t) -> value_t
1938	"""Used for RHS of assignment.
1939
1940	There is no splitting.
1941	"""
1942	if UP_w.tag() == rhs_word_e.Empty:
1943	return value.Str('')
1944
1945	assert UP_w.tag() == word_e.Compound, UP_w
1946	w = cast(CompoundWord, UP_w)
1947
1948	if len(w.parts) == 1:
1949	part0 = w.parts[0]
1950	UP_part0 = part0
1951	tag = part0.tag()
1952	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
1953	# don't look like assignments.
1954	if tag == word_part_e.ShArrayLiteral:
1955	part0 = cast(ShArrayLiteral, UP_part0)
1956	array_words = part0.words
1957	words = braces.BraceExpandWords(array_words)
1958	strs = self.EvalWordSequence(words)
1959	return value.BashArray(strs)
1960
1961	if tag == word_part_e.BashAssocLiteral:
1962	part0 = cast(word_part.BashAssocLiteral, UP_part0)
1963	d = NewDict() # type: Dict[str, str]
1964	for pair in part0.pairs:
1965	k = self.EvalWordToString(pair.key)
1966	v = self.EvalWordToString(pair.value)
1967	d[k.s] = v.s
1968	return value.BashAssoc(d)
1969
1970	# If RHS doesn't look like a=( ... ), then it must be a string.
1971	return self.EvalWordToString(w)
1972
1973	def _EvalWordFrame(self, frame, argv):
1974	# type: (List[Piece], List[str]) -> None
1975	all_empty = True
1976	all_quoted = True
1977	any_quoted = False
1978
1979	#log('--- frame %s', frame)
1980
1981	for piece in frame:
1982	if len(piece.s):
1983	all_empty = False
1984
1985	if piece.quoted:
1986	any_quoted = True
1987	else:
1988	all_quoted = False
1989
1990	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
1991	if all_empty and not any_quoted:
1992	return
1993
1994	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
1995	# don't do word splitting or globbing.
1996	if all_quoted:
1997	tmp = [piece.s for piece in frame]
1998	a = ''.join(tmp)
1999	argv.append(a)
2000	return
2001
2002	will_glob = not self.exec_opts.noglob()
2003
2004	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2005	frags = [] # type: List[str]
2006	for piece in frame:
2007	if will_glob and piece.quoted:
2008	frag = glob_.GlobEscape(piece.s)
2009	else:
2010	# If we have a literal \, then we turn it into \\\\.
2011	# Splitting takes \\\\ -> \\
2012	# Globbing takes \\ to \ if it doesn't match
2013	frag = _BackslashEscape(piece.s)
2014
2015	if piece.do_split:
2016	frag = _BackslashEscape(frag)
2017	else:
2018	frag = self.splitter.Escape(frag)
2019
2020	frags.append(frag)
2021
2022	flat = ''.join(frags)
2023	#log('flat: %r', flat)
2024
2025	args = self.splitter.SplitForWordEval(flat)
2026
2027	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2028	# Add it back and don't bother globbing.
2029	if len(args) == 0 and any_quoted:
2030	argv.append('')
2031	return
2032
2033	#log('split args: %r', args)
2034	for a in args:
2035	if glob_.LooksLikeGlob(a):
2036	n = self.globber.Expand(a, argv)
2037	if n < 0:
2038	# TODO: location info, with span IDs carried through the frame
2039	raise error.FailGlob('Pattern %r matched no files' % a,
2040	loc.Missing)
2041	else:
2042	argv.append(glob_.GlobUnescape(a))
2043
2044	def _EvalWordToArgv(self, w):
2045	# type: (CompoundWord) -> List[str]
2046	"""Helper for _EvalAssignBuiltin.
2047
2048	Splitting and globbing are disabled for assignment builtins.
2049
2050	Example: declare -"${a[@]}" b=(1 2)
2051	where a is [x b=a d=a]
2052	"""
2053	part_vals = [] # type: List[part_value_t]
2054	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2055	frames = _MakeWordFrames(part_vals)
2056	argv = [] # type: List[str]
2057	for frame in frames:
2058	if len(frame): # empty array gives empty frame!
2059	tmp = [piece.s for piece in frame]
2060	argv.append(''.join(tmp)) # no split or glob
2061	#log('argv: %s', argv)
2062	return argv
2063
2064	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2065	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2066	"""Handles both static and dynamic assignment, e.g.
2067
2068	x='foo=bar'
2069	local a=(1 2) $x
2070
2071	Grammar:
2072
2073	('builtin' \| 'command')* keyword flag* pair*
2074	flag = [-+].*
2075
2076	There is also command -p, but we haven't implemented it. Maybe just
2077	punt on it.
2078	"""
2079	eval_to_pairs = True # except for -f and -F
2080	started_pairs = False
2081
2082	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2083	flag_locs = [words[0]]
2084	assign_args = [] # type: List[AssignArg]
2085
2086	n = len(words)
2087	for i in xrange(meta_offset + 1, n): # skip first word
2088	w = words[i]
2089
2090	if word_.IsVarLike(w):
2091	started_pairs = True # Everything from now on is an assign_pair
2092
2093	if started_pairs:
2094	left_token, close_token, part_offset = word_.DetectShAssignment(
2095	w)
2096	if left_token: # Detected statically
2097	if left_token.id != Id.Lit_VarLike:
2098	# (not guaranteed since started_pairs is set twice)
2099	e_die('LHS array not allowed in assignment builtin', w)
2100
2101	if lexer.IsPlusEquals(left_token):
2102	var_name = lexer.TokenSliceRight(left_token, -2)
2103	append = True
2104	else:
2105	var_name = lexer.TokenSliceRight(left_token, -1)
2106	append = False
2107
2108	if part_offset == len(w.parts):
2109	rhs = rhs_word.Empty # type: rhs_word_t
2110	else:
2111	# tmp is for intersection of C++/MyPy type systems
2112	tmp = CompoundWord(w.parts[part_offset:])
2113	word_.TildeDetectAssign(tmp)
2114	rhs = tmp
2115
2116	with state.ctx_AssignBuiltin(self.mutable_opts):
2117	right = self.EvalRhsWord(rhs)
2118
2119	arg2 = AssignArg(var_name, right, append, w)
2120	assign_args.append(arg2)
2121
2122	else: # e.g. export $dynamic
2123	argv = self._EvalWordToArgv(w)
2124	for arg in argv:
2125	arg2 = _SplitAssignArg(arg, w)
2126	assign_args.append(arg2)
2127
2128	else:
2129	argv = self._EvalWordToArgv(w)
2130	for arg in argv:
2131	if arg.startswith('-') or arg.startswith('+'):
2132	# e.g. declare -r +r
2133	flags.append(arg)
2134	flag_locs.append(w)
2135
2136	# Shortcut that relies on -f and -F always meaning "function" for
2137	# all assignment builtins
2138	if 'f' in arg or 'F' in arg:
2139	eval_to_pairs = False
2140
2141	else: # e.g. export $dynamic
2142	if eval_to_pairs:
2143	arg2 = _SplitAssignArg(arg, w)
2144	assign_args.append(arg2)
2145	started_pairs = True
2146	else:
2147	flags.append(arg)
2148
2149	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2150
2151	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2152	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2153	builtin_id = consts.LookupAssignBuiltin(arg0)
2154	if builtin_id != consts.NO_INDEX:
2155	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2156	meta_offset)
2157	return None
2158
2159	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2160	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2161	UP_val0 = val0
2162	if val0.tag() == part_value_e.String:
2163	val0 = cast(Piece, UP_val0)
2164	if not val0.quoted:
2165	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2166	return None
2167
2168	def SimpleEvalWordSequence2(self, words, allow_assign):
2169	# type: (List[CompoundWord], bool) -> cmd_value_t
2170	"""Simple word evaluation for YSH."""
2171	strs = [] # type: List[str]
2172	locs = [] # type: List[CompoundWord]
2173
2174	meta_offset = 0
2175	for i, w in enumerate(words):
2176	# No globbing in the first arg for command.Simple.
2177	if i == meta_offset and allow_assign:
2178	strs0 = self._EvalWordToArgv(w)
2179	# TODO: Remove this because YSH will disallow assignment
2180	# builtins? (including export?)
2181	if len(strs0) == 1:
2182	cmd_val = self._DetectAssignBuiltinStr(
2183	strs0[0], words, meta_offset)
2184	if cmd_val:
2185	return cmd_val
2186
2187	strs.extend(strs0)
2188	for _ in strs0:
2189	locs.append(w)
2190	continue
2191
2192	if glob_.LooksLikeStaticGlob(w):
2193	val = self.EvalWordToString(w) # respects strict-array
2194	num_appended = self.globber.Expand(val.s, strs)
2195	if num_appended < 0:
2196	raise error.FailGlob('Pattern %r matched no files' % val.s,
2197	w)
2198	for _ in xrange(num_appended):
2199	locs.append(w)
2200	continue
2201
2202	part_vals = [] # type: List[part_value_t]
2203	self._EvalWordToParts(w, part_vals, 0) # not quoted
2204
2205	if 0:
2206	log('')
2207	log('Static: part_vals after _EvalWordToParts:')
2208	for entry in part_vals:
2209	log(' %s', entry)
2210
2211	# Still need to process
2212	frames = _MakeWordFrames(part_vals)
2213
2214	if 0:
2215	log('')
2216	log('Static: frames after _MakeWordFrames:')
2217	for entry in frames:
2218	log(' %s', entry)
2219
2220	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2221	# disallows such expressions at parse time.
2222	for frame in frames:
2223	if len(frame): # empty array gives empty frame!
2224	tmp = [piece.s for piece in frame]
2225	strs.append(''.join(tmp)) # no split or glob
2226	locs.append(w)
2227
2228	return cmd_value.Argv(strs, locs, None, None, None, None)
2229
2230	def EvalWordSequence2(self, words, allow_assign=False):
2231	# type: (List[CompoundWord], bool) -> cmd_value_t
2232	"""Turns a list of Words into a list of strings.
2233
2234	Unlike the EvalWord*() methods, it does globbing.
2235
2236	Args:
2237	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2238	"""
2239	if self.exec_opts.simple_word_eval():
2240	return self.SimpleEvalWordSequence2(words, allow_assign)
2241
2242	# Parse time:
2243	# 1. brace expansion. TODO: Do at parse time.
2244	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2245	# first WordPart.
2246	#
2247	# Run time:
2248	# 3. tilde sub, var sub, command sub, arith sub. These are all
2249	# "concurrent" on WordParts. (optional process sub with <() )
2250	# 4. word splitting. Can turn this off with a shell option? Definitely
2251	# off for oil.
2252	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2253
2254	#log('W %s', words)
2255	strs = [] # type: List[str]
2256	locs = [] # type: List[CompoundWord]
2257
2258	# 0 for declare x
2259	# 1 for builtin declare x
2260	# 2 for command builtin declare x
2261	# etc.
2262	meta_offset = 0
2263
2264	n = 0
2265	for i, w in enumerate(words):
2266	fast_str = word_.FastStrEval(w)
2267	if fast_str is not None:
2268	strs.append(fast_str)
2269	locs.append(w)
2270
2271	# e.g. the 'local' in 'local a=b c=d' will be here
2272	if allow_assign and i == meta_offset:
2273	cmd_val = self._DetectAssignBuiltinStr(
2274	fast_str, words, meta_offset)
2275	if cmd_val:
2276	return cmd_val
2277
2278	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2279	meta_offset += 1
2280
2281	continue
2282
2283	part_vals = [] # type: List[part_value_t]
2284	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2285
2286	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2287	# change the rest of the evaluation algorithm if so.
2288	#
2289	# We want to allow:
2290	# e=export
2291	# $e foo=bar
2292	#
2293	# But we don't want to evaluate the first word twice in the case of:
2294	# $(some-command) --flag
2295	if len(part_vals) == 1:
2296	if allow_assign and i == meta_offset:
2297	cmd_val = self._DetectAssignBuiltin(
2298	part_vals[0], words, meta_offset)
2299	if cmd_val:
2300	return cmd_val
2301
2302	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2303	meta_offset += 1
2304
2305	if 0:
2306	log('')
2307	log('part_vals after _EvalWordToParts:')
2308	for entry in part_vals:
2309	log(' %s', entry)
2310
2311	frames = _MakeWordFrames(part_vals)
2312	if 0:
2313	log('')
2314	log('frames after _MakeWordFrames:')
2315	for entry in frames:
2316	log(' %s', entry)
2317
2318	# Do splitting and globbing. Each frame will append zero or more args.
2319	for frame in frames:
2320	self._EvalWordFrame(frame, strs)
2321
2322	# Fill in locations parallel to strs.
2323	n_next = len(strs)
2324	for _ in xrange(n_next - n):
2325	locs.append(w)
2326	n = n_next
2327
2328	# A non-assignment command.
2329	# NOTE: Can't look up builtins here like we did for assignment, because
2330	# functions can override builtins.
2331	return cmd_value.Argv(strs, locs, None, None, None, None)
2332
2333	def EvalWordSequence(self, words):
2334	# type: (List[CompoundWord]) -> List[str]
2335	"""For arrays and for loops.
2336
2337	They don't allow assignment builtins.
2338	"""
2339	UP_cmd_val = self.EvalWordSequence2(words)
2340
2341	assert UP_cmd_val.tag() == cmd_value_e.Argv
2342	cmd_val = cast(cmd_value.Argv, UP_cmd_val)
2343	return cmd_val.argv
2344
2345
2346	class NormalWordEvaluator(AbstractWordEvaluator):
2347
2348	def __init__(
2349	self,
2350	mem, # type: state.Mem
2351	exec_opts, # type: optview.Exec
2352	mutable_opts, # type: state.MutableOpts
2353	tilde_ev, # type: TildeEvaluator
2354	splitter, # type: SplitContext
2355	errfmt, # type: ErrorFormatter
2356	):
2357	# type: (...) -> None
2358	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2359	tilde_ev, splitter, errfmt)
2360	self.shell_ex = None # type: _Executor
2361
2362	def CheckCircularDeps(self):
2363	# type: () -> None
2364	assert self.arith_ev is not None
2365	# Disabled for pure OSH
2366	#assert self.expr_ev is not None
2367	assert self.shell_ex is not None
2368	assert self.prompt_ev is not None
2369
2370	def _EvalCommandSub(self, cs_part, quoted):
2371	# type: (CommandSub, bool) -> part_value_t
2372	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2373
2374	if cs_part.left_token.id == Id.Left_AtParen:
2375	# YSH splitting algorithm: does not depend on IFS
2376	try:
2377	strs = j8.SplitJ8Lines(stdout_str)
2378	except error.Decode as e:
2379	# status code 4 is special, for encode/decode errors.
2380	raise error.Structured(4, e.Message(), cs_part.left_token)
2381
2382	#strs = self.splitter.SplitForWordEval(stdout_str)
2383	return part_value.Array(strs)
2384	else:
2385	return Piece(stdout_str, quoted, not quoted)
2386
2387	def _EvalProcessSub(self, cs_part):
2388	# type: (CommandSub) -> Piece
2389	dev_path = self.shell_ex.RunProcessSub(cs_part)
2390	# pretend it's quoted; no split or glob
2391	return Piece(dev_path, True, False)
2392
2393
2394	_DUMMY = '__NO_COMMAND_SUB__'
2395
2396
2397	class CompletionWordEvaluator(AbstractWordEvaluator):
2398	"""An evaluator that has no access to an executor.
2399
2400	NOTE: core/completion.py doesn't actually try to use these strings to
2401	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2402	inner command as the last one, and knows that it is not at the end of the
2403	line.
2404	"""
2405
2406	def __init__(
2407	self,
2408	mem, # type: state.Mem
2409	exec_opts, # type: optview.Exec
2410	mutable_opts, # type: state.MutableOpts
2411	tilde_ev, # type: TildeEvaluator
2412	splitter, # type: SplitContext
2413	errfmt, # type: ErrorFormatter
2414	):
2415	# type: (...) -> None
2416	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2417	tilde_ev, splitter, errfmt)
2418
2419	def CheckCircularDeps(self):
2420	# type: () -> None
2421	assert self.prompt_ev is not None
2422	assert self.arith_ev is not None
2423	assert self.expr_ev is not None
2424
2425	def _EvalCommandSub(self, cs_part, quoted):
2426	# type: (CommandSub, bool) -> part_value_t
2427	if cs_part.left_token.id == Id.Left_AtParen:
2428	return part_value.Array([_DUMMY])
2429	else:
2430	return Piece(_DUMMY, quoted, not quoted)
2431
2432	def _EvalProcessSub(self, cs_part):
2433	# type: (CommandSub) -> Piece
2434	# pretend it's quoted; no split or glob
2435	return Piece('__NO_PROCESS_SUB__', True, False)
2436
2437
2438	# vim: sw=4