| 1 | """Builtin_bracket.py."""
 | 
| 2 | from __future__ import print_function
 | 
| 3 | 
 | 
| 4 | from _devbuild.gen.id_kind_asdl import Id
 | 
| 5 | from _devbuild.gen.syntax_asdl import loc, word, word_e, word_t, bool_expr
 | 
| 6 | from _devbuild.gen.types_asdl import lex_mode_e
 | 
| 7 | from _devbuild.gen.value_asdl import value
 | 
| 8 | 
 | 
| 9 | from core import error
 | 
| 10 | from core.error import e_usage, p_die
 | 
| 11 | from core import vm
 | 
| 12 | from frontend import match
 | 
| 13 | from mycpp.mylib import log
 | 
| 14 | from osh import bool_parse
 | 
| 15 | from osh import sh_expr_eval
 | 
| 16 | from osh import word_parse
 | 
| 17 | from osh import word_eval
 | 
| 18 | 
 | 
| 19 | _ = log
 | 
| 20 | 
 | 
| 21 | from typing import cast, TYPE_CHECKING
 | 
| 22 | 
 | 
| 23 | if TYPE_CHECKING:
 | 
| 24 |     from _devbuild.gen.runtime_asdl import cmd_value
 | 
| 25 |     from _devbuild.gen.syntax_asdl import bool_expr_t
 | 
| 26 |     from _devbuild.gen.types_asdl import lex_mode_t
 | 
| 27 |     from core.ui import ErrorFormatter
 | 
| 28 |     from core import optview
 | 
| 29 |     from core import state
 | 
| 30 | 
 | 
| 31 | 
 | 
| 32 | class _StringWordEmitter(word_parse.WordEmitter):
 | 
| 33 |     """For test/[, we need a word parser that returns String.
 | 
| 34 | 
 | 
| 35 |     The BoolParser calls word_.BoolId(w), and deals with Kind.BoolUnary,
 | 
| 36 |     Kind.BoolBinary, etc.  This is instead of Compound/Token (as in the
 | 
| 37 |     [[ case.
 | 
| 38 |     """
 | 
| 39 | 
 | 
| 40 |     def __init__(self, cmd_val):
 | 
| 41 |         # type: (cmd_value.Argv) -> None
 | 
| 42 |         self.cmd_val = cmd_val
 | 
| 43 |         self.i = 0
 | 
| 44 |         self.n = len(cmd_val.argv)
 | 
| 45 | 
 | 
| 46 |     def ReadWord(self, unused_lex_mode):
 | 
| 47 |         # type: (lex_mode_t) -> word.String
 | 
| 48 |         """Interface for bool_parse.py.
 | 
| 49 | 
 | 
| 50 |         TODO: This should probably be word_t
 | 
| 51 |         """
 | 
| 52 |         if self.i == self.n:
 | 
| 53 |             # Does it make sense to define Eof_Argv or something?
 | 
| 54 |             # TODO: Add a way to show this location.
 | 
| 55 |             w = word.String(Id.Eof_Real, '', None)
 | 
| 56 |             return w
 | 
| 57 | 
 | 
| 58 |         #log('ARGV %s i %d', self.argv, self.i)
 | 
| 59 |         s = self.cmd_val.argv[self.i]
 | 
| 60 |         arg_loc = self.cmd_val.arg_locs[self.i]
 | 
| 61 | 
 | 
| 62 |         self.i += 1
 | 
| 63 | 
 | 
| 64 |         # chained lookup; default is an operand word
 | 
| 65 |         id_ = match.BracketUnary(s)
 | 
| 66 |         if id_ == Id.Undefined_Tok:
 | 
| 67 |             id_ = match.BracketBinary(s)
 | 
| 68 |         if id_ == Id.Undefined_Tok:
 | 
| 69 |             id_ = match.BracketOther(s)
 | 
| 70 |         if id_ == Id.Undefined_Tok:
 | 
| 71 |             id_ = Id.Word_Compound
 | 
| 72 | 
 | 
| 73 |         return word.String(id_, s, arg_loc)
 | 
| 74 | 
 | 
| 75 |     def Read(self):
 | 
| 76 |         # type: () -> word.String
 | 
| 77 |         """Interface used for special cases below."""
 | 
| 78 |         return self.ReadWord(lex_mode_e.ShCommand)
 | 
| 79 | 
 | 
| 80 |     def Peek(self, offset):
 | 
| 81 |         # type: (int) -> str
 | 
| 82 |         """For special cases."""
 | 
| 83 |         return self.cmd_val.argv[self.i + offset]
 | 
| 84 | 
 | 
| 85 |     def Rewind(self, offset):
 | 
| 86 |         # type: (int) -> None
 | 
| 87 |         """For special cases."""
 | 
| 88 |         self.i -= offset
 | 
| 89 | 
 | 
| 90 | 
 | 
| 91 | class _WordEvaluator(word_eval.StringWordEvaluator):
 | 
| 92 | 
 | 
| 93 |     def __init__(self):
 | 
| 94 |         # type: () -> None
 | 
| 95 |         word_eval.StringWordEvaluator.__init__(self)
 | 
| 96 | 
 | 
| 97 |     def EvalWordToString(self, w, eval_flags=0):
 | 
| 98 |         # type: (word_t, int) -> value.Str
 | 
| 99 |         # do_fnmatch: for the [[ == ]] semantics which we don't have!
 | 
| 100 |         # I think I need another type of node
 | 
| 101 |         # Maybe it should be BuiltinEqual and BuiltinDEqual?  Parse it into a
 | 
| 102 |         # different tree.
 | 
| 103 |         assert w.tag() == word_e.String
 | 
| 104 |         string_word = cast(word.String, w)
 | 
| 105 |         return value.Str(string_word.s)
 | 
| 106 | 
 | 
| 107 | 
 | 
| 108 | def _TwoArgs(w_parser):
 | 
| 109 |     # type: (_StringWordEmitter) -> bool_expr_t
 | 
| 110 |     """Returns an expression tree to be evaluated."""
 | 
| 111 |     w0 = w_parser.Read()
 | 
| 112 |     w1 = w_parser.Read()
 | 
| 113 | 
 | 
| 114 |     s0 = w0.s
 | 
| 115 |     if s0 == '!':
 | 
| 116 |         return bool_expr.LogicalNot(bool_expr.WordTest(w1))
 | 
| 117 | 
 | 
| 118 |     unary_id = Id.Undefined_Tok
 | 
| 119 | 
 | 
| 120 |     # YSH prefers long flags
 | 
| 121 |     if w0.s.startswith('--'):
 | 
| 122 |         if s0 == '--dir':
 | 
| 123 |             unary_id = Id.BoolUnary_d
 | 
| 124 |         elif s0 == '--exists':
 | 
| 125 |             unary_id = Id.BoolUnary_e
 | 
| 126 |         elif s0 == '--file':
 | 
| 127 |             unary_id = Id.BoolUnary_f
 | 
| 128 |         elif s0 == '--symlink':
 | 
| 129 |             unary_id = Id.BoolUnary_L
 | 
| 130 | 
 | 
| 131 |     if unary_id == Id.Undefined_Tok:
 | 
| 132 |         unary_id = match.BracketUnary(w0.s)
 | 
| 133 | 
 | 
| 134 |     if unary_id == Id.Undefined_Tok:
 | 
| 135 |         p_die('Expected unary operator, got %r (2 args)' % w0.s, loc.Word(w0))
 | 
| 136 | 
 | 
| 137 |     return bool_expr.Unary(unary_id, w1)
 | 
| 138 | 
 | 
| 139 | 
 | 
| 140 | def _ThreeArgs(w_parser):
 | 
| 141 |     # type: (_StringWordEmitter) -> bool_expr_t
 | 
| 142 |     """Returns an expression tree to be evaluated."""
 | 
| 143 |     w0 = w_parser.Read()
 | 
| 144 |     w1 = w_parser.Read()
 | 
| 145 |     w2 = w_parser.Read()
 | 
| 146 | 
 | 
| 147 |     # NOTE: Order is important here.
 | 
| 148 | 
 | 
| 149 |     binary_id = match.BracketBinary(w1.s)
 | 
| 150 |     if binary_id != Id.Undefined_Tok:
 | 
| 151 |         return bool_expr.Binary(binary_id, w0, w2)
 | 
| 152 | 
 | 
| 153 |     if w1.s == '-a':
 | 
| 154 |         return bool_expr.LogicalAnd(bool_expr.WordTest(w0),
 | 
| 155 |                                     bool_expr.WordTest(w2))
 | 
| 156 | 
 | 
| 157 |     if w1.s == '-o':
 | 
| 158 |         return bool_expr.LogicalOr(bool_expr.WordTest(w0),
 | 
| 159 |                                    bool_expr.WordTest(w2))
 | 
| 160 | 
 | 
| 161 |     if w0.s == '!':
 | 
| 162 |         w_parser.Rewind(2)
 | 
| 163 |         child = _TwoArgs(w_parser)
 | 
| 164 |         return bool_expr.LogicalNot(child)
 | 
| 165 | 
 | 
| 166 |     if w0.s == '(' and w2.s == ')':
 | 
| 167 |         return bool_expr.WordTest(w1)
 | 
| 168 | 
 | 
| 169 |     p_die('Expected binary operator, got %r (3 args)' % w1.s, loc.Word(w1))
 | 
| 170 | 
 | 
| 171 | 
 | 
| 172 | class Test(vm._Builtin):
 | 
| 173 | 
 | 
| 174 |     def __init__(self, need_right_bracket, exec_opts, mem, errfmt):
 | 
| 175 |         # type: (bool, optview.Exec, state.Mem, ErrorFormatter) -> None
 | 
| 176 |         self.need_right_bracket = need_right_bracket
 | 
| 177 |         self.exec_opts = exec_opts
 | 
| 178 |         self.mem = mem
 | 
| 179 |         self.errfmt = errfmt
 | 
| 180 | 
 | 
| 181 |     def Run(self, cmd_val):
 | 
| 182 |         # type: (cmd_value.Argv) -> int
 | 
| 183 |         """The test/[ builtin.
 | 
| 184 | 
 | 
| 185 |         The only difference between test and [ is that [ needs a
 | 
| 186 |         matching ].
 | 
| 187 |         """
 | 
| 188 |         if self.need_right_bracket:  # Preprocess right bracket
 | 
| 189 |             if self.exec_opts.simple_test_builtin():
 | 
| 190 |                 e_usage("should be invoked as 'test' (simple_test_builtin)",
 | 
| 191 |                         loc.Missing)
 | 
| 192 | 
 | 
| 193 |             strs = cmd_val.argv
 | 
| 194 |             if len(strs) == 0 or strs[-1] != ']':
 | 
| 195 |                 self.errfmt.Print_('missing closing ]',
 | 
| 196 |                                    blame_loc=cmd_val.arg_locs[0])
 | 
| 197 |                 return 2
 | 
| 198 |             # Remove the right bracket
 | 
| 199 |             cmd_val.argv.pop()
 | 
| 200 |             cmd_val.arg_locs.pop()
 | 
| 201 | 
 | 
| 202 |         w_parser = _StringWordEmitter(cmd_val)
 | 
| 203 |         w_parser.Read()  # dummy: advance past argv[0]
 | 
| 204 |         b_parser = bool_parse.BoolParser(w_parser)
 | 
| 205 | 
 | 
| 206 |         # There is a fundamental ambiguity due to poor language design, in cases like:
 | 
| 207 |         # [ -z ]
 | 
| 208 |         # [ -z -a ]
 | 
| 209 |         # [ -z -a ] ]
 | 
| 210 |         #
 | 
| 211 |         # See posixtest() in bash's test.c:
 | 
| 212 |         # "This is an implementation of a Posix.2 proposal by David Korn."
 | 
| 213 |         # It dispatches on expressions of length 0, 1, 2, 3, 4, and N args.  We do
 | 
| 214 |         # the same here.
 | 
| 215 |         #
 | 
| 216 |         # Another ambiguity:
 | 
| 217 |         # -a is both a unary prefix operator and an infix operator.  How to fix this
 | 
| 218 |         # ambiguity?
 | 
| 219 | 
 | 
| 220 |         bool_node = None  # type: bool_expr_t
 | 
| 221 |         n = len(cmd_val.argv) - 1
 | 
| 222 | 
 | 
| 223 |         if self.exec_opts.simple_test_builtin() and n > 3:
 | 
| 224 |             e_usage(
 | 
| 225 |                 "should only have 3 arguments or fewer (simple_test_builtin)",
 | 
| 226 |                 loc.Missing)
 | 
| 227 | 
 | 
| 228 |         try:
 | 
| 229 |             if n == 0:
 | 
| 230 |                 return 1  # [ ] is False
 | 
| 231 |             elif n == 1:
 | 
| 232 |                 w = w_parser.Read()
 | 
| 233 |                 bool_node = bool_expr.WordTest(w)
 | 
| 234 |             elif n == 2:
 | 
| 235 |                 bool_node = _TwoArgs(w_parser)
 | 
| 236 |             elif n == 3:
 | 
| 237 |                 bool_node = _ThreeArgs(w_parser)
 | 
| 238 |             if n == 4:
 | 
| 239 |                 a0 = w_parser.Peek(0)
 | 
| 240 |                 if a0 == '!':
 | 
| 241 |                     w_parser.Read()  # skip !
 | 
| 242 |                     child = _ThreeArgs(w_parser)
 | 
| 243 |                     bool_node = bool_expr.LogicalNot(child)
 | 
| 244 |                 elif a0 == '(' and w_parser.Peek(3) == ')':
 | 
| 245 |                     w_parser.Read()  # skip ')'
 | 
| 246 |                     bool_node = _TwoArgs(w_parser)
 | 
| 247 |                 else:
 | 
| 248 |                     pass  # fallthrough
 | 
| 249 | 
 | 
| 250 |             if bool_node is None:
 | 
| 251 |                 bool_node = b_parser.ParseForBuiltin()
 | 
| 252 | 
 | 
| 253 |         except error.Parse as e:
 | 
| 254 |             self.errfmt.PrettyPrintError(e, prefix='(test) ')
 | 
| 255 |             return 2
 | 
| 256 | 
 | 
| 257 |         word_ev = _WordEvaluator()
 | 
| 258 | 
 | 
| 259 |         # We technically don't need mem because we don't support BASH_REMATCH here.
 | 
| 260 |         # We want [ a -eq a ] to always be an error, unlike [[ a -eq a ]].  This is
 | 
| 261 |         # a weird case of [[ being less strict.
 | 
| 262 |         bool_ev = sh_expr_eval.BoolEvaluator(self.mem,
 | 
| 263 |                                              self.exec_opts,
 | 
| 264 |                                              None,
 | 
| 265 |                                              None,
 | 
| 266 |                                              self.errfmt,
 | 
| 267 |                                              always_strict=True)
 | 
| 268 |         bool_ev.word_ev = word_ev
 | 
| 269 |         bool_ev.CheckCircularDeps()
 | 
| 270 |         try:
 | 
| 271 |             b = bool_ev.EvalB(bool_node)
 | 
| 272 |         except error._ErrorWithLocation as e:
 | 
| 273 |             # We want to catch e_die() and e_strict().  Those are both FatalRuntime
 | 
| 274 |             # errors now, but it might not make sense later.
 | 
| 275 | 
 | 
| 276 |             # NOTE: This doesn't seem to happen.  We have location info for all
 | 
| 277 |             # errors that arise out of [.
 | 
| 278 |             #if not e.HasLocation():
 | 
| 279 |             #  raise
 | 
| 280 | 
 | 
| 281 |             self.errfmt.PrettyPrintError(e, prefix='(test) ')
 | 
| 282 |             return 2  # 1 means 'false', and this usage error is like a parse error.
 | 
| 283 | 
 | 
| 284 |         status = 0 if b else 1
 | 
| 285 |         return status
 |