| 1 | #!/usr/bin/env python
 | 
| 2 | # Copyright 2016 Andy Chu. All rights reserved.
 | 
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 | 
| 4 | # you may not use this file except in compliance with the License.
 | 
| 5 | # You may obtain a copy of the License at
 | 
| 6 | #
 | 
| 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 | 
| 8 | """
 | 
| 9 | bool_parse.py - Parse boolean expressions.
 | 
| 10 | 
 | 
| 11 | In contrast to test / [, the parsing of [[ expressions is done BEFORE
 | 
| 12 | evaluation.  So we are parsing a list of Word instances to an AST, rather than
 | 
| 13 | a list of strings.
 | 
| 14 | 
 | 
| 15 | TODO: If we implement "test / [", we should share the parsing.
 | 
| 16 | 
 | 
| 17 | Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
 | 
| 18 | syntax.
 | 
| 19 | 
 | 
| 20 |   Expr    : Term (OR Term)*
 | 
| 21 |   Term    : Negated (AND Negated)*
 | 
| 22 |   Negated : '!'? Factor
 | 
| 23 |   Factor  : WORD
 | 
| 24 |           | UNARY_OP WORD
 | 
| 25 |           | WORD BINARY_OP WORD
 | 
| 26 |           | '(' Expr ')'
 | 
| 27 | 
 | 
| 28 | OR = ||  -o
 | 
| 29 | AND = &&  -a
 | 
| 30 | WORD = any word
 | 
| 31 | UNARY_OP: -z -n, etc.
 | 
| 32 | BINARY_OP: -gt, -ot, ==, etc.
 | 
| 33 | """
 | 
| 34 | 
 | 
| 35 | from core import word
 | 
| 36 | from core import util
 | 
| 37 | from osh.meta import ast, Id, Kind, LookupKind, types
 | 
| 38 | 
 | 
| 39 | try:
 | 
| 40 |   import libc  # for regex_parse
 | 
| 41 | except ImportError:
 | 
| 42 |   from benchmarks import fake_libc as libc
 | 
| 43 | 
 | 
| 44 | lex_mode_e = types.lex_mode_e
 | 
| 45 | log = util.log
 | 
| 46 | 
 | 
| 47 | 
 | 
| 48 | class BoolParser(object):
 | 
| 49 |   """Parses [[ at compile time and [ at runtime."""
 | 
| 50 | 
 | 
| 51 |   def __init__(self, w_parser):
 | 
| 52 |     """
 | 
| 53 |     Args:
 | 
| 54 |       w_parser: WordParser
 | 
| 55 |     """
 | 
| 56 |     self.w_parser = w_parser
 | 
| 57 |     # Either one word or two words for lookahead
 | 
| 58 |     self.words = []
 | 
| 59 | 
 | 
| 60 |     self.cur_word = None
 | 
| 61 |     self.op_id = Id.Undefined_Tok
 | 
| 62 |     self.b_kind = Kind.Undefined
 | 
| 63 | 
 | 
| 64 |     self.error_stack = []
 | 
| 65 | 
 | 
| 66 |   def Error(self):
 | 
| 67 |     return self.error_stack
 | 
| 68 | 
 | 
| 69 |   def AddErrorContext(self, msg, *args, **kwargs):
 | 
| 70 |     err = util.ParseError(msg, *args, **kwargs)
 | 
| 71 |     self.error_stack.append(err)
 | 
| 72 | 
 | 
| 73 |   def _NextOne(self, lex_mode=lex_mode_e.DBRACKET):
 | 
| 74 |     #print('_Next', self.cur_word)
 | 
| 75 |     n = len(self.words)
 | 
| 76 |     if n == 2:
 | 
| 77 |       assert lex_mode == lex_mode_e.DBRACKET
 | 
| 78 |       self.words[0] = self.words[1]
 | 
| 79 |       self.cur_word = self.words[0]
 | 
| 80 |       del self.words[1]
 | 
| 81 |     elif n in (0, 1):
 | 
| 82 |       w = self.w_parser.ReadWord(lex_mode)
 | 
| 83 |       if not w:
 | 
| 84 |         err = self.w_parser.Error()
 | 
| 85 |         self.error_stack.extend(err)
 | 
| 86 |         return False
 | 
| 87 |       if n == 0:
 | 
| 88 |         self.words.append(w)
 | 
| 89 |       else:
 | 
| 90 |         self.words[0] = w
 | 
| 91 |       self.cur_word = w
 | 
| 92 | 
 | 
| 93 |     self.op_id = word.BoolId(self.cur_word)
 | 
| 94 |     self.b_kind = LookupKind(self.op_id)
 | 
| 95 |     #log('--- word %s', self.cur_word)
 | 
| 96 |     #log('op_id %s %s %s', self.op_id, self.b_kind, lex_mode)
 | 
| 97 |     return True
 | 
| 98 | 
 | 
| 99 |   def _Next(self, lex_mode=lex_mode_e.DBRACKET):
 | 
| 100 |     """Advance to the next token, skipping newlines.
 | 
| 101 | 
 | 
| 102 |     We don't handle newlines in the lexer because we want the newline after ]]
 | 
| 103 |     to be Id.Op_Newline rather than Id.WS_Newline.  It's more complicated if
 | 
| 104 |     it's Id.WS_Newline -- we might have to unread tokens, etc.
 | 
| 105 |     """
 | 
| 106 |     while True:
 | 
| 107 |       w = self._NextOne(lex_mode=lex_mode)
 | 
| 108 |       if not w:
 | 
| 109 |         return False
 | 
| 110 |       if self.op_id != Id.Op_Newline:
 | 
| 111 |         break
 | 
| 112 |     return True
 | 
| 113 | 
 | 
| 114 |   def _LookAhead(self):
 | 
| 115 |     n = len(self.words)
 | 
| 116 |     if n != 1:
 | 
| 117 |       raise AssertionError(self.words)
 | 
| 118 | 
 | 
| 119 |     w = self.w_parser.ReadWord(lex_mode_e.DBRACKET)
 | 
| 120 |     self.words.append(w)  # Save it for _Next()
 | 
| 121 |     return w
 | 
| 122 | 
 | 
| 123 |   def Parse(self):
 | 
| 124 |     if not self._Next(): return None
 | 
| 125 | 
 | 
| 126 |     node = self.ParseExpr()
 | 
| 127 |     if self.op_id != Id.Lit_DRightBracket:
 | 
| 128 |       self.AddErrorContext("Unexpected extra word %r", self.cur_word,
 | 
| 129 |           word=self.cur_word)
 | 
| 130 |       return None
 | 
| 131 |     return node
 | 
| 132 | 
 | 
| 133 |   def _TestAtEnd(self):
 | 
| 134 |     """For unit tests only."""
 | 
| 135 |     return self.op_id == Id.Lit_DRightBracket
 | 
| 136 | 
 | 
| 137 |   def ParseForBuiltin(self):
 | 
| 138 |     """For test builtin."""
 | 
| 139 |     if not self._Next(): return None
 | 
| 140 | 
 | 
| 141 |     node = self.ParseExpr()
 | 
| 142 |     if self.op_id != Id.Eof_Real:
 | 
| 143 |       self.AddErrorContext(
 | 
| 144 |           'Unexpected trailing word in test expression: %s',
 | 
| 145 |           self.cur_word)
 | 
| 146 |       return None
 | 
| 147 | 
 | 
| 148 |     return node
 | 
| 149 | 
 | 
| 150 |   def ParseExpr(self):
 | 
| 151 |     """
 | 
| 152 |     Iterative:
 | 
| 153 |     Expr    : Term (OR Term)*
 | 
| 154 | 
 | 
| 155 |     Right recursion:
 | 
| 156 |     Expr    : Term (OR Expr)?
 | 
| 157 |     """
 | 
| 158 |     left = self.ParseTerm()
 | 
| 159 |     # [[ uses || while [ uses -o
 | 
| 160 |     if self.op_id in (Id.Op_DPipe, Id.BoolUnary_o):
 | 
| 161 |       if not self._Next(): return None
 | 
| 162 |       right = self.ParseExpr()
 | 
| 163 |       if not right: return None
 | 
| 164 |       return ast.LogicalOr(left, right)
 | 
| 165 |     else:
 | 
| 166 |       return left
 | 
| 167 | 
 | 
| 168 |   def ParseTerm(self):
 | 
| 169 |     """
 | 
| 170 |     Term    : Negated (AND Negated)*
 | 
| 171 | 
 | 
| 172 |     Right recursion:
 | 
| 173 |     Term    : Negated (AND Term)?
 | 
| 174 |     """
 | 
| 175 |     left = self.ParseNegatedFactor()
 | 
| 176 |     if not left:
 | 
| 177 |       return None  # TODO: An exception should handle this case.
 | 
| 178 |     # [[ uses && while [ uses -a
 | 
| 179 |     if self.op_id in (Id.Op_DAmp, Id.BoolUnary_a):
 | 
| 180 |       if not self._Next(): return None
 | 
| 181 |       right = self.ParseTerm()
 | 
| 182 |       if not right: return None
 | 
| 183 |       return ast.LogicalAnd(left, right)
 | 
| 184 |     else:
 | 
| 185 |       return left
 | 
| 186 | 
 | 
| 187 |   def ParseNegatedFactor(self):
 | 
| 188 |     """
 | 
| 189 |     Negated : '!'? Factor
 | 
| 190 |     """
 | 
| 191 |     if self.op_id == Id.KW_Bang:
 | 
| 192 |       if not self._Next(): return None
 | 
| 193 |       child = self.ParseFactor()
 | 
| 194 |       if not child: return None
 | 
| 195 |       return ast.LogicalNot(child)
 | 
| 196 |     else:
 | 
| 197 |       return self.ParseFactor()
 | 
| 198 | 
 | 
| 199 |   def ParseFactor(self):
 | 
| 200 |     """
 | 
| 201 |     Factor  : WORD
 | 
| 202 |             | UNARY_OP WORD
 | 
| 203 |             | WORD BINARY_OP WORD
 | 
| 204 |             | '(' Expr ')'
 | 
| 205 |     """
 | 
| 206 |     if self.b_kind == Kind.BoolUnary:
 | 
| 207 |       # Just save the type and not the token itself?
 | 
| 208 |       op = self.op_id
 | 
| 209 |       if not self._Next(): return None
 | 
| 210 |       w = self.cur_word
 | 
| 211 |       if not self._Next(): return None
 | 
| 212 |       node = ast.BoolUnary(op, w)
 | 
| 213 |       return node
 | 
| 214 | 
 | 
| 215 |     if self.b_kind == Kind.Word:
 | 
| 216 |       # Peek ahead another token.
 | 
| 217 |       t2 = self._LookAhead()
 | 
| 218 |       t2_op_id = word.BoolId(t2)
 | 
| 219 |       t2_b_kind = LookupKind(t2_op_id)
 | 
| 220 | 
 | 
| 221 |       #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind)
 | 
| 222 |       # Redir pun for < and >, -a and -o pun
 | 
| 223 |       if t2_b_kind in (Kind.BoolBinary, Kind.Redir):
 | 
| 224 |         left = self.cur_word
 | 
| 225 | 
 | 
| 226 |         if not self._Next(): return None
 | 
| 227 |         op = self.op_id
 | 
| 228 | 
 | 
| 229 |         # TODO: Need to change to lex_mode_e.BASH_REGEX.
 | 
| 230 |         # _Next(lex_mode) then?
 | 
| 231 |         is_regex = t2_op_id == Id.BoolBinary_EqualTilde
 | 
| 232 |         if is_regex:
 | 
| 233 |           if not self._Next(lex_mode=lex_mode_e.BASH_REGEX): return None
 | 
| 234 |         else:
 | 
| 235 |           if not self._Next(): return None
 | 
| 236 | 
 | 
| 237 |         right = self.cur_word
 | 
| 238 |         if is_regex:
 | 
| 239 |           # TODO: Quoted parts need to be regex-escaped, e.g. [[ $a =~ "{" ]].
 | 
| 240 |           # I don't think libc has a function to do this.  Escape these
 | 
| 241 |           # characters:
 | 
| 242 |           # https://www.gnu.org/software/sed/manual/html_node/ERE-syntax.html0
 | 
| 243 | 
 | 
| 244 |           ok, regex_str, unused_quoted = word.StaticEval(right)
 | 
| 245 | 
 | 
| 246 |           # doesn't contain $foo, etc.
 | 
| 247 |           if ok and not libc.regex_parse(regex_str):
 | 
| 248 |             self.AddErrorContext("Invalid regex: %r" % regex_str, word=right)
 | 
| 249 |             return None
 | 
| 250 | 
 | 
| 251 |         if not self._Next(): return None
 | 
| 252 |         return ast.BoolBinary(op, left, right)
 | 
| 253 |       else:
 | 
| 254 |         # [[ foo ]] 
 | 
| 255 |         w = self.cur_word
 | 
| 256 |         if not self._Next(): return None
 | 
| 257 |         return ast.WordTest(w)
 | 
| 258 | 
 | 
| 259 |     if self.op_id == Id.Op_LParen:
 | 
| 260 |       if not self._Next(): return None
 | 
| 261 |       node = self.ParseExpr()
 | 
| 262 |       if self.op_id != Id.Op_RParen:
 | 
| 263 |         self.AddErrorContext(
 | 
| 264 |             'Expected ), got %s', self.cur_word, word=self.cur_word)
 | 
| 265 |         return None
 | 
| 266 |       if not self._Next(): return None
 | 
| 267 |       return node
 | 
| 268 | 
 | 
| 269 |     # TODO: A proper error, e.g. for [[ && ]] or [[ ]]
 | 
| 270 |     self.AddErrorContext(
 | 
| 271 |         'Unexpected token: %s' % self.cur_word, word=self.cur_word)
 | 
| 272 |     return None
 |