OILS / builtin / bracket_osh.py View on Github | oilshell.org

285 lines, 166 significant
1"""Builtin_bracket.py."""
2from __future__ import print_function
3
4from _devbuild.gen.id_kind_asdl import Id
5from _devbuild.gen.syntax_asdl import loc, word, word_e, word_t, bool_expr
6from _devbuild.gen.types_asdl import lex_mode_e
7from _devbuild.gen.value_asdl import value
8
9from core import error
10from core.error import e_usage, p_die
11from core import vm
12from frontend import match
13from mycpp.mylib import log
14from osh import bool_parse
15from osh import sh_expr_eval
16from osh import word_parse
17from osh import word_eval
18
19_ = log
20
21from typing import cast, TYPE_CHECKING
22
23if TYPE_CHECKING:
24 from _devbuild.gen.runtime_asdl import cmd_value
25 from _devbuild.gen.syntax_asdl import bool_expr_t
26 from _devbuild.gen.types_asdl import lex_mode_t
27 from core.ui import ErrorFormatter
28 from core import optview
29 from core import state
30
31
32class _StringWordEmitter(word_parse.WordEmitter):
33 """For test/[, we need a word parser that returns String.
34
35 The BoolParser calls word_.BoolId(w), and deals with Kind.BoolUnary,
36 Kind.BoolBinary, etc. This is instead of Compound/Token (as in the
37 [[ case.
38 """
39
40 def __init__(self, cmd_val):
41 # type: (cmd_value.Argv) -> None
42 self.cmd_val = cmd_val
43 self.i = 0
44 self.n = len(cmd_val.argv)
45
46 def ReadWord(self, unused_lex_mode):
47 # type: (lex_mode_t) -> word.String
48 """Interface for bool_parse.py.
49
50 TODO: This should probably be word_t
51 """
52 if self.i == self.n:
53 # Does it make sense to define Eof_Argv or something?
54 # TODO: Add a way to show this location.
55 w = word.String(Id.Eof_Real, '', None)
56 return w
57
58 #log('ARGV %s i %d', self.argv, self.i)
59 s = self.cmd_val.argv[self.i]
60 arg_loc = self.cmd_val.arg_locs[self.i]
61
62 self.i += 1
63
64 # chained lookup; default is an operand word
65 id_ = match.BracketUnary(s)
66 if id_ == Id.Undefined_Tok:
67 id_ = match.BracketBinary(s)
68 if id_ == Id.Undefined_Tok:
69 id_ = match.BracketOther(s)
70 if id_ == Id.Undefined_Tok:
71 id_ = Id.Word_Compound
72
73 return word.String(id_, s, arg_loc)
74
75 def Read(self):
76 # type: () -> word.String
77 """Interface used for special cases below."""
78 return self.ReadWord(lex_mode_e.ShCommand)
79
80 def Peek(self, offset):
81 # type: (int) -> str
82 """For special cases."""
83 return self.cmd_val.argv[self.i + offset]
84
85 def Rewind(self, offset):
86 # type: (int) -> None
87 """For special cases."""
88 self.i -= offset
89
90
91class _WordEvaluator(word_eval.StringWordEvaluator):
92
93 def __init__(self):
94 # type: () -> None
95 word_eval.StringWordEvaluator.__init__(self)
96
97 def EvalWordToString(self, w, eval_flags=0):
98 # type: (word_t, int) -> value.Str
99 # do_fnmatch: for the [[ == ]] semantics which we don't have!
100 # I think I need another type of node
101 # Maybe it should be BuiltinEqual and BuiltinDEqual? Parse it into a
102 # different tree.
103 assert w.tag() == word_e.String
104 string_word = cast(word.String, w)
105 return value.Str(string_word.s)
106
107
108def _TwoArgs(w_parser):
109 # type: (_StringWordEmitter) -> bool_expr_t
110 """Returns an expression tree to be evaluated."""
111 w0 = w_parser.Read()
112 w1 = w_parser.Read()
113
114 s0 = w0.s
115 if s0 == '!':
116 return bool_expr.LogicalNot(bool_expr.WordTest(w1))
117
118 unary_id = Id.Undefined_Tok
119
120 # YSH prefers long flags
121 if w0.s.startswith('--'):
122 if s0 == '--dir':
123 unary_id = Id.BoolUnary_d
124 elif s0 == '--exists':
125 unary_id = Id.BoolUnary_e
126 elif s0 == '--file':
127 unary_id = Id.BoolUnary_f
128 elif s0 == '--symlink':
129 unary_id = Id.BoolUnary_L
130
131 if unary_id == Id.Undefined_Tok:
132 unary_id = match.BracketUnary(w0.s)
133
134 if unary_id == Id.Undefined_Tok:
135 p_die('Expected unary operator, got %r (2 args)' % w0.s, loc.Word(w0))
136
137 return bool_expr.Unary(unary_id, w1)
138
139
140def _ThreeArgs(w_parser):
141 # type: (_StringWordEmitter) -> bool_expr_t
142 """Returns an expression tree to be evaluated."""
143 w0 = w_parser.Read()
144 w1 = w_parser.Read()
145 w2 = w_parser.Read()
146
147 # NOTE: Order is important here.
148
149 binary_id = match.BracketBinary(w1.s)
150 if binary_id != Id.Undefined_Tok:
151 return bool_expr.Binary(binary_id, w0, w2)
152
153 if w1.s == '-a':
154 return bool_expr.LogicalAnd(bool_expr.WordTest(w0),
155 bool_expr.WordTest(w2))
156
157 if w1.s == '-o':
158 return bool_expr.LogicalOr(bool_expr.WordTest(w0),
159 bool_expr.WordTest(w2))
160
161 if w0.s == '!':
162 w_parser.Rewind(2)
163 child = _TwoArgs(w_parser)
164 return bool_expr.LogicalNot(child)
165
166 if w0.s == '(' and w2.s == ')':
167 return bool_expr.WordTest(w1)
168
169 p_die('Expected binary operator, got %r (3 args)' % w1.s, loc.Word(w1))
170
171
172class Test(vm._Builtin):
173
174 def __init__(self, need_right_bracket, exec_opts, mem, errfmt):
175 # type: (bool, optview.Exec, state.Mem, ErrorFormatter) -> None
176 self.need_right_bracket = need_right_bracket
177 self.exec_opts = exec_opts
178 self.mem = mem
179 self.errfmt = errfmt
180
181 def Run(self, cmd_val):
182 # type: (cmd_value.Argv) -> int
183 """The test/[ builtin.
184
185 The only difference between test and [ is that [ needs a
186 matching ].
187 """
188 if self.need_right_bracket: # Preprocess right bracket
189 if self.exec_opts.simple_test_builtin():
190 e_usage("should be invoked as 'test' (simple_test_builtin)",
191 loc.Missing)
192
193 strs = cmd_val.argv
194 if len(strs) == 0 or strs[-1] != ']':
195 self.errfmt.Print_('missing closing ]',
196 blame_loc=cmd_val.arg_locs[0])
197 return 2
198 # Remove the right bracket
199 cmd_val.argv.pop()
200 cmd_val.arg_locs.pop()
201
202 w_parser = _StringWordEmitter(cmd_val)
203 w_parser.Read() # dummy: advance past argv[0]
204 b_parser = bool_parse.BoolParser(w_parser)
205
206 # There is a fundamental ambiguity due to poor language design, in cases like:
207 # [ -z ]
208 # [ -z -a ]
209 # [ -z -a ] ]
210 #
211 # See posixtest() in bash's test.c:
212 # "This is an implementation of a Posix.2 proposal by David Korn."
213 # It dispatches on expressions of length 0, 1, 2, 3, 4, and N args. We do
214 # the same here.
215 #
216 # Another ambiguity:
217 # -a is both a unary prefix operator and an infix operator. How to fix this
218 # ambiguity?
219
220 bool_node = None # type: bool_expr_t
221 n = len(cmd_val.argv) - 1
222
223 if self.exec_opts.simple_test_builtin() and n > 3:
224 e_usage(
225 "should only have 3 arguments or fewer (simple_test_builtin)",
226 loc.Missing)
227
228 try:
229 if n == 0:
230 return 1 # [ ] is False
231 elif n == 1:
232 w = w_parser.Read()
233 bool_node = bool_expr.WordTest(w)
234 elif n == 2:
235 bool_node = _TwoArgs(w_parser)
236 elif n == 3:
237 bool_node = _ThreeArgs(w_parser)
238 if n == 4:
239 a0 = w_parser.Peek(0)
240 if a0 == '!':
241 w_parser.Read() # skip !
242 child = _ThreeArgs(w_parser)
243 bool_node = bool_expr.LogicalNot(child)
244 elif a0 == '(' and w_parser.Peek(3) == ')':
245 w_parser.Read() # skip ')'
246 bool_node = _TwoArgs(w_parser)
247 else:
248 pass # fallthrough
249
250 if bool_node is None:
251 bool_node = b_parser.ParseForBuiltin()
252
253 except error.Parse as e:
254 self.errfmt.PrettyPrintError(e, prefix='(test) ')
255 return 2
256
257 word_ev = _WordEvaluator()
258
259 # We technically don't need mem because we don't support BASH_REMATCH here.
260 # We want [ a -eq a ] to always be an error, unlike [[ a -eq a ]]. This is
261 # a weird case of [[ being less strict.
262 bool_ev = sh_expr_eval.BoolEvaluator(self.mem,
263 self.exec_opts,
264 None,
265 None,
266 self.errfmt,
267 always_strict=True)
268 bool_ev.word_ev = word_ev
269 bool_ev.CheckCircularDeps()
270 try:
271 b = bool_ev.EvalB(bool_node)
272 except error._ErrorWithLocation as e:
273 # We want to catch e_die() and e_strict(). Those are both FatalRuntime
274 # errors now, but it might not make sense later.
275
276 # NOTE: This doesn't seem to happen. We have location info for all
277 # errors that arise out of [.
278 #if not e.HasLocation():
279 # raise
280
281 self.errfmt.PrettyPrintError(e, prefix='(test) ')
282 return 2 # 1 means 'false', and this usage error is like a parse error.
283
284 status = 0 if b else 1
285 return status