| 1 | #!/usr/bin/env python2
 | 
| 2 | # Copyright 2016 Andy Chu. All rights reserved.
 | 
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 | 
| 4 | # you may not use this file except in compliance with the License.
 | 
| 5 | # You may obtain a copy of the License at
 | 
| 6 | #
 | 
| 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 | 
| 8 | """
 | 
| 9 | bool_parse.py - Parse boolean expressions.
 | 
| 10 | 
 | 
| 11 | In contrast to test / [, the parsing of [[ expressions is done BEFORE
 | 
| 12 | evaluation.  So we are parsing a list of Word instances to an AST, rather than
 | 
| 13 | a list of strings.
 | 
| 14 | 
 | 
| 15 | Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
 | 
| 16 | syntax.
 | 
| 17 | 
 | 
| 18 |   Expr    : Term (OR Term)*
 | 
| 19 |   Term    : Negated (AND Negated)*
 | 
| 20 |   Negated : '!'? Factor
 | 
| 21 |   Factor  : WORD
 | 
| 22 |           | UNARY_OP WORD
 | 
| 23 |           | WORD BINARY_OP WORD
 | 
| 24 |           | '(' Expr ')'
 | 
| 25 | 
 | 
| 26 | OR = ||  -o
 | 
| 27 | AND = &&  -a
 | 
| 28 | WORD = any word
 | 
| 29 | UNARY_OP: -z -n, etc.
 | 
| 30 | BINARY_OP: -gt, -ot, ==, etc.
 | 
| 31 | """
 | 
| 32 | 
 | 
| 33 | from _devbuild.gen.id_kind_asdl import Id, Kind
 | 
| 34 | from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
 | 
| 35 | from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
 | 
| 36 |                                        bool_expr_t, Token)
 | 
| 37 | from core import ui
 | 
| 38 | from core.error import p_die
 | 
| 39 | from frontend import consts
 | 
| 40 | from mycpp.mylib import log
 | 
| 41 | from osh import word_
 | 
| 42 | 
 | 
| 43 | from typing import List, Optional, Tuple, TYPE_CHECKING
 | 
| 44 | if TYPE_CHECKING:
 | 
| 45 |     from osh.word_parse import WordEmitter
 | 
| 46 | 
 | 
| 47 | # import libc  # for regex_parse
 | 
| 48 | 
 | 
| 49 | _ = log
 | 
| 50 | 
 | 
| 51 | 
 | 
| 52 | class BoolParser(object):
 | 
| 53 |     """Parses [[ at compile time and [ at runtime."""
 | 
| 54 | 
 | 
| 55 |     def __init__(self, w_parser):
 | 
| 56 |         # type: (WordEmitter) -> None
 | 
| 57 |         self.w_parser = w_parser
 | 
| 58 |         # Either one word or two words for lookahead
 | 
| 59 |         self.words = []  # type: List[word_t]
 | 
| 60 | 
 | 
| 61 |         self.cur_word = None  # type: Optional[word_t]
 | 
| 62 |         self.bool_id = Id.Undefined_Tok
 | 
| 63 |         self.bool_kind = Kind.Undefined
 | 
| 64 | 
 | 
| 65 |     def _NextOne(self, lex_mode=lex_mode_e.DBracket):
 | 
| 66 |         # type: (lex_mode_t) -> None
 | 
| 67 |         n = len(self.words)
 | 
| 68 |         if n == 2:
 | 
| 69 |             assert lex_mode == lex_mode_e.DBracket
 | 
| 70 |             self.words[0] = self.words[1]
 | 
| 71 |             self.cur_word = self.words[0]
 | 
| 72 |             self.words.pop()
 | 
| 73 |         elif n in (0, 1):
 | 
| 74 |             w = self.w_parser.ReadWord(lex_mode)  # may raise
 | 
| 75 |             if n == 0:
 | 
| 76 |                 self.words.append(w)
 | 
| 77 |             else:
 | 
| 78 |                 self.words[0] = w
 | 
| 79 |             self.cur_word = w
 | 
| 80 | 
 | 
| 81 |         assert self.cur_word is not None
 | 
| 82 |         self.bool_id = word_.BoolId(self.cur_word)
 | 
| 83 |         self.bool_kind = consts.GetKind(self.bool_id)
 | 
| 84 |         #log('--- word %s', self.cur_word)
 | 
| 85 |         #log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
 | 
| 86 | 
 | 
| 87 |     def _Next(self, lex_mode=lex_mode_e.DBracket):
 | 
| 88 |         # type: (lex_mode_t) -> None
 | 
| 89 |         """Advance to the next token, skipping newlines.
 | 
| 90 | 
 | 
| 91 |         We don't handle newlines in the lexer because we want the
 | 
| 92 |         newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
 | 
| 93 |         It's more complicated if it's Id.WS_Newline -- we might have to
 | 
| 94 |         unread tokens, etc.
 | 
| 95 |         """
 | 
| 96 |         while True:
 | 
| 97 |             self._NextOne(lex_mode=lex_mode)
 | 
| 98 |             if self.bool_id != Id.Op_Newline:
 | 
| 99 |                 break
 | 
| 100 | 
 | 
| 101 |     def _LookAhead(self):
 | 
| 102 |         # type: () -> word_t
 | 
| 103 |         n = len(self.words)
 | 
| 104 |         if n != 1:
 | 
| 105 |             raise AssertionError(n)
 | 
| 106 | 
 | 
| 107 |         w = self.w_parser.ReadWord(lex_mode_e.DBracket)  # may raise
 | 
| 108 |         self.words.append(w)  # Save it for _Next()
 | 
| 109 |         return w
 | 
| 110 | 
 | 
| 111 |     def Parse(self):
 | 
| 112 |         # type: () -> Tuple[bool_expr_t, Token]
 | 
| 113 |         self._Next()
 | 
| 114 | 
 | 
| 115 |         node = self.ParseExpr()
 | 
| 116 |         if self.bool_id != Id.Lit_DRightBracket:
 | 
| 117 |             #p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
 | 
| 118 |             # NOTE: This might be better as unexpected token, since ]] doesn't always
 | 
| 119 |             # make sense.
 | 
| 120 |             p_die('Expected ]]', loc.Word(self.cur_word))
 | 
| 121 | 
 | 
| 122 |         # Extract the ']]' keyword and return it's token for location tracking
 | 
| 123 |         right = word_.LiteralToken(self.cur_word)
 | 
| 124 |         assert right is not None
 | 
| 125 | 
 | 
| 126 |         return node, right
 | 
| 127 | 
 | 
| 128 |     def _TestAtEnd(self):
 | 
| 129 |         # type: () -> bool
 | 
| 130 |         """For unit tests only."""
 | 
| 131 |         return self.bool_id == Id.Lit_DRightBracket
 | 
| 132 | 
 | 
| 133 |     def ParseForBuiltin(self):
 | 
| 134 |         # type: () -> bool_expr_t
 | 
| 135 |         """For test builtin."""
 | 
| 136 |         self._Next()
 | 
| 137 | 
 | 
| 138 |         node = self.ParseExpr()
 | 
| 139 |         if self.bool_id != Id.Eof_Real:
 | 
| 140 |             p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
 | 
| 141 |                   loc.Word(self.cur_word))
 | 
| 142 | 
 | 
| 143 |         return node
 | 
| 144 | 
 | 
| 145 |     def ParseExpr(self):
 | 
| 146 |         # type: () -> bool_expr_t
 | 
| 147 |         """
 | 
| 148 |         Iterative:
 | 
| 149 |         Expr    : Term (OR Term)*
 | 
| 150 | 
 | 
| 151 |         Right recursion:
 | 
| 152 |         Expr    : Term (OR Expr)?
 | 
| 153 |         """
 | 
| 154 |         left = self.ParseTerm()
 | 
| 155 |         # [[ uses || but [ uses -o
 | 
| 156 |         if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
 | 
| 157 |             self._Next()
 | 
| 158 |             right = self.ParseExpr()
 | 
| 159 |             return bool_expr.LogicalOr(left, right)
 | 
| 160 |         else:
 | 
| 161 |             return left
 | 
| 162 | 
 | 
| 163 |     def ParseTerm(self):
 | 
| 164 |         # type: () -> bool_expr_t
 | 
| 165 |         """
 | 
| 166 |         Term    : Negated (AND Negated)*
 | 
| 167 | 
 | 
| 168 |         Right recursion:
 | 
| 169 |         Term    : Negated (AND Term)?
 | 
| 170 |         """
 | 
| 171 |         left = self.ParseNegatedFactor()
 | 
| 172 |         # [[ uses && but [ uses -a
 | 
| 173 |         if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
 | 
| 174 |             self._Next()
 | 
| 175 |             right = self.ParseTerm()
 | 
| 176 |             return bool_expr.LogicalAnd(left, right)
 | 
| 177 |         else:
 | 
| 178 |             return left
 | 
| 179 | 
 | 
| 180 |     def ParseNegatedFactor(self):
 | 
| 181 |         # type: () -> bool_expr_t
 | 
| 182 |         """
 | 
| 183 |         Negated : '!'? Factor
 | 
| 184 |         """
 | 
| 185 |         if self.bool_id == Id.KW_Bang:
 | 
| 186 |             self._Next()
 | 
| 187 |             child = self.ParseFactor()
 | 
| 188 |             return bool_expr.LogicalNot(child)
 | 
| 189 |         else:
 | 
| 190 |             return self.ParseFactor()
 | 
| 191 | 
 | 
| 192 |     def ParseFactor(self):
 | 
| 193 |         # type: () -> bool_expr_t
 | 
| 194 |         """
 | 
| 195 |         Factor  : WORD
 | 
| 196 |                 | UNARY_OP WORD
 | 
| 197 |                 | WORD =~ Regex
 | 
| 198 |                 | WORD BINARY_OP WORD
 | 
| 199 |                 | '(' Expr ')'
 | 
| 200 |         """
 | 
| 201 |         if self.bool_kind == Kind.BoolUnary:
 | 
| 202 |             # Just save the type and not the token itself?
 | 
| 203 |             op = self.bool_id
 | 
| 204 |             self._Next()
 | 
| 205 |             w = self.cur_word
 | 
| 206 |             # e.g. [[ -f < ]].  But [[ -f '<' ]] is OK
 | 
| 207 | 
 | 
| 208 |             tag = w.tag()
 | 
| 209 |             if tag != word_e.Compound and tag != word_e.String:
 | 
| 210 |                 p_die('Invalid argument to unary operator', loc.Word(w))
 | 
| 211 |             self._Next()
 | 
| 212 | 
 | 
| 213 |             tilde = word_.TildeDetect(w)
 | 
| 214 |             if tilde:
 | 
| 215 |                 w = tilde
 | 
| 216 | 
 | 
| 217 |             node = bool_expr.Unary(op, w)  # type: bool_expr_t
 | 
| 218 |             return node
 | 
| 219 | 
 | 
| 220 |         if self.bool_kind == Kind.Word:
 | 
| 221 |             # Peek ahead another token.
 | 
| 222 |             t2 = self._LookAhead()
 | 
| 223 |             t2_bool_id = word_.BoolId(t2)
 | 
| 224 |             t2_bool_kind = consts.GetKind(t2_bool_id)
 | 
| 225 | 
 | 
| 226 |             #log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
 | 
| 227 |             # Op for < and >, -a and -o pun
 | 
| 228 |             if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
 | 
| 229 |                                                                  Id.Op_Great):
 | 
| 230 |                 left = self.cur_word
 | 
| 231 | 
 | 
| 232 |                 self._Next()
 | 
| 233 |                 op = self.bool_id
 | 
| 234 | 
 | 
| 235 |                 if t2_bool_id == Id.BoolBinary_EqualTilde:
 | 
| 236 |                     self._Next(lex_mode=lex_mode_e.BashRegex)
 | 
| 237 |                 else:
 | 
| 238 |                     self._Next()
 | 
| 239 | 
 | 
| 240 |                 right = self.cur_word
 | 
| 241 |                 self._Next()
 | 
| 242 | 
 | 
| 243 |                 tilde = word_.TildeDetect(left)
 | 
| 244 |                 if tilde:
 | 
| 245 |                     left = tilde
 | 
| 246 |                 tilde = word_.TildeDetect(right)
 | 
| 247 |                 if tilde:
 | 
| 248 |                     right = tilde
 | 
| 249 | 
 | 
| 250 |                 return bool_expr.Binary(op, left, right)
 | 
| 251 | 
 | 
| 252 |             else:  # [[ foo ]]
 | 
| 253 |                 w = self.cur_word
 | 
| 254 |                 tilde = word_.TildeDetect(w)
 | 
| 255 |                 if tilde:
 | 
| 256 |                     w = tilde
 | 
| 257 |                 self._Next()
 | 
| 258 |                 return bool_expr.WordTest(w)
 | 
| 259 | 
 | 
| 260 |         if self.bool_id == Id.Op_LParen:
 | 
| 261 |             self._Next()
 | 
| 262 |             node = self.ParseExpr()
 | 
| 263 |             if self.bool_id != Id.Op_RParen:
 | 
| 264 |                 p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
 | 
| 265 |                       loc.Word(self.cur_word))
 | 
| 266 |             self._Next()
 | 
| 267 |             return node
 | 
| 268 | 
 | 
| 269 |         # It's not WORD, UNARY_OP, or '('
 | 
| 270 |         p_die(
 | 
| 271 |             'Unexpected token in boolean expression (%s)' %
 | 
| 272 |             ui.PrettyId(self.bool_id), loc.Word(self.cur_word))
 |