builtin/bracket

OILS / builtin / bracket_osh.py View on Github | oilshell.org

285 lines, 166 significant

1	"""Builtin_bracket.py."""
2	from __future__ import print_function
3
4	from _devbuild.gen.id_kind_asdl import Id
5	from _devbuild.gen.syntax_asdl import loc, word, word_e, word_t, bool_expr
6	from _devbuild.gen.types_asdl import lex_mode_e
7	from _devbuild.gen.value_asdl import value
8
9	from core import error
10	from core.error import e_usage, p_die
11	from core import vm
12	from frontend import match
13	from mycpp.mylib import log
14	from osh import bool_parse
15	from osh import sh_expr_eval
16	from osh import word_parse
17	from osh import word_eval
18
19	_ = log
20
21	from typing import cast, TYPE_CHECKING
22
23	if TYPE_CHECKING:
24	from _devbuild.gen.runtime_asdl import cmd_value
25	from _devbuild.gen.syntax_asdl import bool_expr_t
26	from _devbuild.gen.types_asdl import lex_mode_t
27	from core.ui import ErrorFormatter
28	from core import optview
29	from core import state
30
31
32	class _StringWordEmitter(word_parse.WordEmitter):
33	"""For test/[, we need a word parser that returns String.
34
35	The BoolParser calls word_.BoolId(w), and deals with Kind.BoolUnary,
36	Kind.BoolBinary, etc. This is instead of Compound/Token (as in the
37	[[ case.
38	"""
39
40	def __init__(self, cmd_val):
41	# type: (cmd_value.Argv) -> None
42	self.cmd_val = cmd_val
43	self.i = 0
44	self.n = len(cmd_val.argv)
45
46	def ReadWord(self, unused_lex_mode):
47	# type: (lex_mode_t) -> word.String
48	"""Interface for bool_parse.py.
49
50	TODO: This should probably be word_t
51	"""
52	if self.i == self.n:
53	# Does it make sense to define Eof_Argv or something?
54	# TODO: Add a way to show this location.
55	w = word.String(Id.Eof_Real, '', None)
56	return w
57
58	#log('ARGV %s i %d', self.argv, self.i)
59	s = self.cmd_val.argv[self.i]
60	arg_loc = self.cmd_val.arg_locs[self.i]
61
62	self.i += 1
63
64	# chained lookup; default is an operand word
65	id_ = match.BracketUnary(s)
66	if id_ == Id.Undefined_Tok:
67	id_ = match.BracketBinary(s)
68	if id_ == Id.Undefined_Tok:
69	id_ = match.BracketOther(s)
70	if id_ == Id.Undefined_Tok:
71	id_ = Id.Word_Compound
72
73	return word.String(id_, s, arg_loc)
74
75	def Read(self):
76	# type: () -> word.String
77	"""Interface used for special cases below."""
78	return self.ReadWord(lex_mode_e.ShCommand)
79
80	def Peek(self, offset):
81	# type: (int) -> str
82	"""For special cases."""
83	return self.cmd_val.argv[self.i + offset]
84
85	def Rewind(self, offset):
86	# type: (int) -> None
87	"""For special cases."""
88	self.i -= offset
89
90
91	class _WordEvaluator(word_eval.StringWordEvaluator):
92
93	def __init__(self):
94	# type: () -> None
95	word_eval.StringWordEvaluator.__init__(self)
96
97	def EvalWordToString(self, w, eval_flags=0):
98	# type: (word_t, int) -> value.Str
99	# do_fnmatch: for the [[ == ]] semantics which we don't have!
100	# I think I need another type of node
101	# Maybe it should be BuiltinEqual and BuiltinDEqual? Parse it into a
102	# different tree.
103	assert w.tag() == word_e.String
104	string_word = cast(word.String, w)
105	return value.Str(string_word.s)
106
107
108	def _TwoArgs(w_parser):
109	# type: (_StringWordEmitter) -> bool_expr_t
110	"""Returns an expression tree to be evaluated."""
111	w0 = w_parser.Read()
112	w1 = w_parser.Read()
113
114	s0 = w0.s
115	if s0 == '!':
116	return bool_expr.LogicalNot(bool_expr.WordTest(w1))
117
118	unary_id = Id.Undefined_Tok
119
120	# YSH prefers long flags
121	if w0.s.startswith('--'):
122	if s0 == '--dir':
123	unary_id = Id.BoolUnary_d
124	elif s0 == '--exists':
125	unary_id = Id.BoolUnary_e
126	elif s0 == '--file':
127	unary_id = Id.BoolUnary_f
128	elif s0 == '--symlink':
129	unary_id = Id.BoolUnary_L
130
131	if unary_id == Id.Undefined_Tok:
132	unary_id = match.BracketUnary(w0.s)
133
134	if unary_id == Id.Undefined_Tok:
135	p_die('Expected unary operator, got %r (2 args)' % w0.s, loc.Word(w0))
136
137	return bool_expr.Unary(unary_id, w1)
138
139
140	def _ThreeArgs(w_parser):
141	# type: (_StringWordEmitter) -> bool_expr_t
142	"""Returns an expression tree to be evaluated."""
143	w0 = w_parser.Read()
144	w1 = w_parser.Read()
145	w2 = w_parser.Read()
146
147	# NOTE: Order is important here.
148
149	binary_id = match.BracketBinary(w1.s)
150	if binary_id != Id.Undefined_Tok:
151	return bool_expr.Binary(binary_id, w0, w2)
152
153	if w1.s == '-a':
154	return bool_expr.LogicalAnd(bool_expr.WordTest(w0),
155	bool_expr.WordTest(w2))
156
157	if w1.s == '-o':
158	return bool_expr.LogicalOr(bool_expr.WordTest(w0),
159	bool_expr.WordTest(w2))
160
161	if w0.s == '!':
162	w_parser.Rewind(2)
163	child = _TwoArgs(w_parser)
164	return bool_expr.LogicalNot(child)
165
166	if w0.s == '(' and w2.s == ')':
167	return bool_expr.WordTest(w1)
168
169	p_die('Expected binary operator, got %r (3 args)' % w1.s, loc.Word(w1))
170
171
172	class Test(vm._Builtin):
173
174	def __init__(self, need_right_bracket, exec_opts, mem, errfmt):
175	# type: (bool, optview.Exec, state.Mem, ErrorFormatter) -> None
176	self.need_right_bracket = need_right_bracket
177	self.exec_opts = exec_opts
178	self.mem = mem
179	self.errfmt = errfmt
180
181	def Run(self, cmd_val):
182	# type: (cmd_value.Argv) -> int
183	"""The test/[ builtin.
184
185	The only difference between test and [ is that [ needs a
186	matching ].
187	"""
188	if self.need_right_bracket: # Preprocess right bracket
189	if self.exec_opts.simple_test_builtin():
190	e_usage("should be invoked as 'test' (simple_test_builtin)",
191	loc.Missing)
192
193	strs = cmd_val.argv
194	if len(strs) == 0 or strs[-1] != ']':
195	self.errfmt.Print_('missing closing ]',
196	blame_loc=cmd_val.arg_locs[0])
197	return 2
198	# Remove the right bracket
199	cmd_val.argv.pop()
200	cmd_val.arg_locs.pop()
201
202	w_parser = _StringWordEmitter(cmd_val)
203	w_parser.Read() # dummy: advance past argv[0]
204	b_parser = bool_parse.BoolParser(w_parser)
205
206	# There is a fundamental ambiguity due to poor language design, in cases like:
207	# [ -z ]
208	# [ -z -a ]
209	# [ -z -a ] ]
210	#
211	# See posixtest() in bash's test.c:
212	# "This is an implementation of a Posix.2 proposal by David Korn."
213	# It dispatches on expressions of length 0, 1, 2, 3, 4, and N args. We do
214	# the same here.
215	#
216	# Another ambiguity:
217	# -a is both a unary prefix operator and an infix operator. How to fix this
218	# ambiguity?
219
220	bool_node = None # type: bool_expr_t
221	n = len(cmd_val.argv) - 1
222
223	if self.exec_opts.simple_test_builtin() and n > 3:
224	e_usage(
225	"should only have 3 arguments or fewer (simple_test_builtin)",
226	loc.Missing)
227
228	try:
229	if n == 0:
230	return 1 # [ ] is False
231	elif n == 1:
232	w = w_parser.Read()
233	bool_node = bool_expr.WordTest(w)
234	elif n == 2:
235	bool_node = _TwoArgs(w_parser)
236	elif n == 3:
237	bool_node = _ThreeArgs(w_parser)
238	if n == 4:
239	a0 = w_parser.Peek(0)
240	if a0 == '!':
241	w_parser.Read() # skip !
242	child = _ThreeArgs(w_parser)
243	bool_node = bool_expr.LogicalNot(child)
244	elif a0 == '(' and w_parser.Peek(3) == ')':
245	w_parser.Read() # skip ')'
246	bool_node = _TwoArgs(w_parser)
247	else:
248	pass # fallthrough
249
250	if bool_node is None:
251	bool_node = b_parser.ParseForBuiltin()
252
253	except error.Parse as e:
254	self.errfmt.PrettyPrintError(e, prefix='(test) ')
255	return 2
256
257	word_ev = _WordEvaluator()
258
259	# We technically don't need mem because we don't support BASH_REMATCH here.
260	# We want [ a -eq a ] to always be an error, unlike [[ a -eq a ]]. This is
261	# a weird case of [[ being less strict.
262	bool_ev = sh_expr_eval.BoolEvaluator(self.mem,
263	self.exec_opts,
264	None,
265	None,
266	self.errfmt,
267	always_strict=True)
268	bool_ev.word_ev = word_ev
269	bool_ev.CheckCircularDeps()
270	try:
271	b = bool_ev.EvalB(bool_node)
272	except error._ErrorWithLocation as e:
273	# We want to catch e_die() and e_strict(). Those are both FatalRuntime
274	# errors now, but it might not make sense later.
275
276	# NOTE: This doesn't seem to happen. We have location info for all
277	# errors that arise out of [.
278	#if not e.HasLocation():
279	# raise
280
281	self.errfmt.PrettyPrintError(e, prefix='(test) ')
282	return 2 # 1 means 'false', and this usage error is like a parse error.
283
284	status = 0 if b else 1
285	return status