| 1 | """
 | 
| 2 | parse_lib.py - Consolidate various parser instantiations here.
 | 
| 3 | """
 | 
| 4 | 
 | 
| 5 | from _devbuild.gen.id_kind_asdl import Id_t
 | 
| 6 | from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
 | 
| 7 |                                        ArgList, Proc, Func, command, pat_t)
 | 
| 8 | from _devbuild.gen.types_asdl import lex_mode_e
 | 
| 9 | from _devbuild.gen import grammar_nt
 | 
| 10 | 
 | 
| 11 | from asdl import format as fmt
 | 
| 12 | from core import state
 | 
| 13 | from frontend import lexer
 | 
| 14 | from frontend import reader
 | 
| 15 | from osh import tdop
 | 
| 16 | from osh import arith_parse
 | 
| 17 | from osh import cmd_parse
 | 
| 18 | from osh import word_parse
 | 
| 19 | from mycpp import mylib
 | 
| 20 | from mycpp.mylib import log
 | 
| 21 | from ysh import expr_parse
 | 
| 22 | from ysh import expr_to_ast
 | 
| 23 | from ysh.expr_parse import ctx_PNodeAllocator
 | 
| 24 | 
 | 
| 25 | _ = log
 | 
| 26 | 
 | 
| 27 | from typing import Any, List, Tuple, Dict, TYPE_CHECKING
 | 
| 28 | if TYPE_CHECKING:
 | 
| 29 |     from core.alloc import Arena
 | 
| 30 |     from core.util import _DebugFile
 | 
| 31 |     from core import optview
 | 
| 32 |     from frontend.lexer import Lexer
 | 
| 33 |     from frontend.reader import _Reader
 | 
| 34 |     from osh.tdop import TdopParser
 | 
| 35 |     from osh.word_parse import WordParser
 | 
| 36 |     from osh.cmd_parse import CommandParser
 | 
| 37 |     from pgen2.grammar import Grammar
 | 
| 38 | 
 | 
| 39 | 
 | 
| 40 | class _BaseTrail(object):
 | 
| 41 |     """Base class has members, but no-ops for methods."""
 | 
| 42 | 
 | 
| 43 |     def __init__(self):
 | 
| 44 |         # type: () -> None
 | 
| 45 |         # word from a partially completed command.
 | 
| 46 |         # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
 | 
| 47 |         self.words = []  # type: List[CompoundWord]
 | 
| 48 |         self.redirects = []  # type: List[Redir]
 | 
| 49 |         # TODO: We should maintain the LST invariant and have a single list, but
 | 
| 50 |         # that I ran into the "cases classes are better than variants" problem.
 | 
| 51 | 
 | 
| 52 |         # Non-ignored tokens, after PushHint translation.  Used for variable name
 | 
| 53 |         # completion.  Filled in by _Peek() in osh/word_parse.py.
 | 
| 54 |         #
 | 
| 55 |         # Example:
 | 
| 56 |         # $ echo $\
 | 
| 57 |         # f<TAB>
 | 
| 58 |         # This could complete $foo.
 | 
| 59 |         # Problem: readline doesn't even allow that, because it spans more than one
 | 
| 60 |         # line!
 | 
| 61 |         self.tokens = []  # type: List[Token]
 | 
| 62 | 
 | 
| 63 |         self.alias_words = [
 | 
| 64 |         ]  # type: List[CompoundWord]  # words INSIDE an alias expansion
 | 
| 65 |         self._expanding_alias = False
 | 
| 66 | 
 | 
| 67 |     def Clear(self):
 | 
| 68 |         # type: () -> None
 | 
| 69 |         pass
 | 
| 70 | 
 | 
| 71 |     def SetLatestWords(self, words, redirects):
 | 
| 72 |         # type: (List[CompoundWord], List[Redir]) -> None
 | 
| 73 |         pass
 | 
| 74 | 
 | 
| 75 |     def AppendToken(self, token):
 | 
| 76 |         # type: (Token) -> None
 | 
| 77 |         pass
 | 
| 78 | 
 | 
| 79 |     def BeginAliasExpansion(self):
 | 
| 80 |         # type: () -> None
 | 
| 81 |         pass
 | 
| 82 | 
 | 
| 83 |     def EndAliasExpansion(self):
 | 
| 84 |         # type: () -> None
 | 
| 85 |         pass
 | 
| 86 | 
 | 
| 87 |     if mylib.PYTHON:
 | 
| 88 | 
 | 
| 89 |         def PrintDebugString(self, debug_f):
 | 
| 90 |             # type: (_DebugFile) -> None
 | 
| 91 | 
 | 
| 92 |             # note: could cast DebugFile to IO[str] instead of ignoring?
 | 
| 93 |             debug_f.writeln('  words:')
 | 
| 94 |             for w in self.words:
 | 
| 95 |                 fmt.PrettyPrint(w, f=debug_f)  # type: ignore
 | 
| 96 |             debug_f.writeln('')
 | 
| 97 | 
 | 
| 98 |             debug_f.writeln('  redirects:')
 | 
| 99 |             for r in self.redirects:
 | 
| 100 |                 fmt.PrettyPrint(r, f=debug_f)  # type: ignore
 | 
| 101 |             debug_f.writeln('')
 | 
| 102 | 
 | 
| 103 |             debug_f.writeln('  tokens:')
 | 
| 104 |             for p in self.tokens:
 | 
| 105 |                 fmt.PrettyPrint(p, f=debug_f)  # type: ignore
 | 
| 106 |             debug_f.writeln('')
 | 
| 107 | 
 | 
| 108 |             debug_f.writeln('  alias_words:')
 | 
| 109 |             for w in self.alias_words:
 | 
| 110 |                 fmt.PrettyPrint(w, f=debug_f)  # type: ignore
 | 
| 111 |             debug_f.writeln('')
 | 
| 112 | 
 | 
| 113 |     def __repr__(self):
 | 
| 114 |         # type: () -> str
 | 
| 115 |         return '<Trail %s %s %s %s>' % (self.words, self.redirects,
 | 
| 116 |                                         self.tokens, self.alias_words)
 | 
| 117 | 
 | 
| 118 | 
 | 
| 119 | class ctx_Alias(object):
 | 
| 120 |     """Used by CommandParser so we know to be ready for FIRST alias word.
 | 
| 121 | 
 | 
| 122 |     For example, for
 | 
| 123 | 
 | 
| 124 |     alias ll='ls -l'
 | 
| 125 | 
 | 
| 126 |     Then we want to capture 'ls' as the first word.
 | 
| 127 | 
 | 
| 128 |     We do NOT want SetLatestWords or AppendToken to be active, because we don't
 | 
| 129 |     need other tokens from 'ls -l'.
 | 
| 130 | 
 | 
| 131 |     It would also probably cause bugs in history expansion, e.g. echo !1 should
 | 
| 132 |     be the first word the user typed, not the first word after alias expansion.
 | 
| 133 |     """
 | 
| 134 | 
 | 
| 135 |     def __init__(self, trail):
 | 
| 136 |         # type: (_BaseTrail) -> None
 | 
| 137 |         trail._expanding_alias = True
 | 
| 138 |         self.trail = trail
 | 
| 139 | 
 | 
| 140 |     def __enter__(self):
 | 
| 141 |         # type: () -> None
 | 
| 142 |         pass
 | 
| 143 | 
 | 
| 144 |     def __exit__(self, type, value, traceback):
 | 
| 145 |         # type: (Any, Any, Any) -> None
 | 
| 146 |         self.trail._expanding_alias = False
 | 
| 147 | 
 | 
| 148 | 
 | 
| 149 | class Trail(_BaseTrail):
 | 
| 150 |     """Info left by the parser to help us complete shell syntax and commands.
 | 
| 151 | 
 | 
| 152 |     It's also used for history expansion.
 | 
| 153 |     """
 | 
| 154 | 
 | 
| 155 |     def __init__(self):
 | 
| 156 |         # type: () -> None
 | 
| 157 |         """Empty constructor for mycpp."""
 | 
| 158 |         _BaseTrail.__init__(self)
 | 
| 159 | 
 | 
| 160 |     def Clear(self):
 | 
| 161 |         # type: () -> None
 | 
| 162 |         del self.words[:]
 | 
| 163 |         del self.redirects[:]
 | 
| 164 |         # The other ones don't need to be reset?
 | 
| 165 |         del self.tokens[:]
 | 
| 166 |         del self.alias_words[:]
 | 
| 167 | 
 | 
| 168 |     def SetLatestWords(self, words, redirects):
 | 
| 169 |         # type: (List[CompoundWord], List[Redir]) -> None
 | 
| 170 |         if self._expanding_alias:
 | 
| 171 |             self.alias_words = words  # Save these separately
 | 
| 172 |             return
 | 
| 173 |         self.words = words
 | 
| 174 |         self.redirects = redirects
 | 
| 175 | 
 | 
| 176 |     def AppendToken(self, token):
 | 
| 177 |         # type: (Token) -> None
 | 
| 178 |         if self._expanding_alias:  # We don't want tokens inside aliases
 | 
| 179 |             return
 | 
| 180 |         self.tokens.append(token)
 | 
| 181 | 
 | 
| 182 | 
 | 
| 183 | if TYPE_CHECKING:
 | 
| 184 |     AliasesInFlight = List[Tuple[str, int]]
 | 
| 185 | 
 | 
| 186 | 
 | 
| 187 | class ParseContext(object):
 | 
| 188 |     """Context shared between the mutually recursive Command and Word parsers.
 | 
| 189 | 
 | 
| 190 |     In contrast, STATE is stored in the CommandParser and WordParser
 | 
| 191 |     instances.
 | 
| 192 |     """
 | 
| 193 | 
 | 
| 194 |     def __init__(self,
 | 
| 195 |                  arena,
 | 
| 196 |                  parse_opts,
 | 
| 197 |                  aliases,
 | 
| 198 |                  ysh_grammar,
 | 
| 199 |                  do_lossless=False):
 | 
| 200 |         # type: (Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
 | 
| 201 |         self.arena = arena
 | 
| 202 |         self.parse_opts = parse_opts
 | 
| 203 |         self.aliases = aliases
 | 
| 204 |         self.ysh_grammar = ysh_grammar
 | 
| 205 |         self.do_lossless = do_lossless
 | 
| 206 | 
 | 
| 207 |         # NOTE: The transformer is really a pure function.
 | 
| 208 |         if ysh_grammar:
 | 
| 209 |             self.tr = expr_to_ast.Transformer(ysh_grammar)
 | 
| 210 |         else:  # hack for unit tests, which pass None
 | 
| 211 |             self.tr = None
 | 
| 212 | 
 | 
| 213 |         if mylib.PYTHON:
 | 
| 214 |             if self.tr:
 | 
| 215 |                 self.p_printer = self.tr.p_printer
 | 
| 216 |             else:
 | 
| 217 |                 self.p_printer = None
 | 
| 218 | 
 | 
| 219 |         # Completion state lives here since it may span multiple parsers.
 | 
| 220 |         self.trail = _BaseTrail()  # no-op by default
 | 
| 221 | 
 | 
| 222 |     def Init_Trail(self, trail):
 | 
| 223 |         # type: (_BaseTrail) -> None
 | 
| 224 |         self.trail = trail
 | 
| 225 | 
 | 
| 226 |     def MakeLexer(self, line_reader):
 | 
| 227 |         # type: (_Reader) -> Lexer
 | 
| 228 |         """Helper function.
 | 
| 229 | 
 | 
| 230 |         NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
 | 
| 231 |         better.
 | 
| 232 |         """
 | 
| 233 |         # Take Arena from LineReader
 | 
| 234 |         line_lexer = lexer.LineLexer(line_reader.arena)
 | 
| 235 |         return lexer.Lexer(line_lexer, line_reader)
 | 
| 236 | 
 | 
| 237 |     def MakeOshParser(self, line_reader, emit_comp_dummy=False):
 | 
| 238 |         # type: (_Reader, bool) -> CommandParser
 | 
| 239 |         lx = self.MakeLexer(line_reader)
 | 
| 240 |         if emit_comp_dummy:
 | 
| 241 |             lx.EmitCompDummy()  # A special token before EOF!
 | 
| 242 | 
 | 
| 243 |         w_parser = word_parse.WordParser(self, lx, line_reader)
 | 
| 244 |         c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
 | 
| 245 |                                            line_reader)
 | 
| 246 |         return c_parser
 | 
| 247 | 
 | 
| 248 |     def MakeConfigParser(self, line_reader):
 | 
| 249 |         # type: (_Reader) -> CommandParser
 | 
| 250 |         lx = self.MakeLexer(line_reader)
 | 
| 251 |         parse_opts = state.MakeOilOpts()
 | 
| 252 |         w_parser = word_parse.WordParser(self, lx, line_reader)
 | 
| 253 |         c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
 | 
| 254 |                                            line_reader)
 | 
| 255 |         return c_parser
 | 
| 256 | 
 | 
| 257 |     def MakeWordParserForHereDoc(self, line_reader):
 | 
| 258 |         # type: (_Reader) -> WordParser
 | 
| 259 |         lx = self.MakeLexer(line_reader)
 | 
| 260 |         return word_parse.WordParser(self, lx, line_reader)
 | 
| 261 | 
 | 
| 262 |     def MakeWordParser(self, lx, line_reader):
 | 
| 263 |         # type: (Lexer, _Reader) -> WordParser
 | 
| 264 |         return word_parse.WordParser(self, lx, line_reader)
 | 
| 265 | 
 | 
| 266 |     def MakeArithParser(self, code_str):
 | 
| 267 |         # type: (str) -> TdopParser
 | 
| 268 |         """Used for a[x+1]=foo in the CommandParser."""
 | 
| 269 |         line_reader = reader.StringLineReader(code_str, self.arena)
 | 
| 270 |         lx = self.MakeLexer(line_reader)
 | 
| 271 |         w_parser = word_parse.WordParser(self, lx, line_reader)
 | 
| 272 |         w_parser.Init(lex_mode_e.Arith)  # Special initialization
 | 
| 273 |         a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
 | 
| 274 |                                    self.parse_opts)
 | 
| 275 |         return a_parser
 | 
| 276 | 
 | 
| 277 |     def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
 | 
| 278 |         # type: (_Reader, Lexer, Id_t) -> CommandParser
 | 
| 279 |         """To parse command sub, we want a fresh word parser state."""
 | 
| 280 |         w_parser = word_parse.WordParser(self, lexer, line_reader)
 | 
| 281 |         c_parser = cmd_parse.CommandParser(self,
 | 
| 282 |                                            self.parse_opts,
 | 
| 283 |                                            w_parser,
 | 
| 284 |                                            lexer,
 | 
| 285 |                                            line_reader,
 | 
| 286 |                                            eof_id=eof_id)
 | 
| 287 |         return c_parser
 | 
| 288 | 
 | 
| 289 |     def MakeWordParserForPlugin(self, code_str):
 | 
| 290 |         # type: (str) -> WordParser
 | 
| 291 |         """For $PS1, $PS4, etc."""
 | 
| 292 |         line_reader = reader.StringLineReader(code_str, self.arena)
 | 
| 293 |         lx = self.MakeLexer(line_reader)
 | 
| 294 |         return word_parse.WordParser(self, lx, line_reader)
 | 
| 295 | 
 | 
| 296 |     def _YshParser(self):
 | 
| 297 |         # type: () -> expr_parse.ExprParser
 | 
| 298 |         return expr_parse.ExprParser(self, self.ysh_grammar)
 | 
| 299 | 
 | 
| 300 |     def ParseVarDecl(self, kw_token, lexer):
 | 
| 301 |         # type: (Token, Lexer) -> Tuple[command.VarDecl, Token]
 | 
| 302 |         """ var mylist = [1, 2, 3] """
 | 
| 303 |         e_parser = self._YshParser()
 | 
| 304 |         with ctx_PNodeAllocator(e_parser):
 | 
| 305 |             pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
 | 
| 306 | 
 | 
| 307 |             if 0:
 | 
| 308 |                 self.p_printer.Print(pnode)
 | 
| 309 | 
 | 
| 310 |             ast_node = self.tr.MakeVarDecl(pnode)
 | 
| 311 |             ast_node.keyword = kw_token  # VarDecl didn't fill this in
 | 
| 312 | 
 | 
| 313 |         return ast_node, last_token
 | 
| 314 | 
 | 
| 315 |     def ParseMutation(self, kw_token, lexer):
 | 
| 316 |         # type: (Token, Lexer) -> Tuple[command.Mutation, Token]
 | 
| 317 |         """ setvar d['a'] += 1 """
 | 
| 318 |         e_parser = self._YshParser()
 | 
| 319 |         with ctx_PNodeAllocator(e_parser):
 | 
| 320 |             pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
 | 
| 321 |             if 0:
 | 
| 322 |                 self.p_printer.Print(pnode)
 | 
| 323 |             ast_node = self.tr.MakeMutation(pnode)
 | 
| 324 |             ast_node.keyword = kw_token  # VarDecl didn't fill this in
 | 
| 325 | 
 | 
| 326 |         return ast_node, last_token
 | 
| 327 | 
 | 
| 328 |     def ParseProcCallArgs(self, lx, out, start_symbol):
 | 
| 329 |         # type: (Lexer, ArgList, int) -> None
 | 
| 330 |         """ json write (x, foo=1) and assert [42 === x] """
 | 
| 331 | 
 | 
| 332 |         e_parser = self._YshParser()
 | 
| 333 |         with ctx_PNodeAllocator(e_parser):
 | 
| 334 |             pnode, last_token = e_parser.Parse(lx, start_symbol)
 | 
| 335 | 
 | 
| 336 |             if 0:
 | 
| 337 |                 self.p_printer.Print(pnode)
 | 
| 338 | 
 | 
| 339 |             self.tr.ProcCallArgs(pnode, out)
 | 
| 340 |             out.right = last_token
 | 
| 341 | 
 | 
| 342 |     def ParseYshExpr(self, lx, start_symbol):
 | 
| 343 |         # type: (Lexer, int) -> Tuple[expr_t, Token]
 | 
| 344 |         """if (x > 0) { ...
 | 
| 345 | 
 | 
| 346 |         }, while, etc.
 | 
| 347 |         """
 | 
| 348 | 
 | 
| 349 |         e_parser = self._YshParser()
 | 
| 350 |         with ctx_PNodeAllocator(e_parser):
 | 
| 351 |             pnode, last_token = e_parser.Parse(lx, start_symbol)
 | 
| 352 |             if 0:
 | 
| 353 |                 self.p_printer.Print(pnode)
 | 
| 354 | 
 | 
| 355 |             ast_node = self.tr.Expr(pnode)
 | 
| 356 | 
 | 
| 357 |         return ast_node, last_token
 | 
| 358 | 
 | 
| 359 |     def ParseYshCasePattern(self, lexer):
 | 
| 360 |         # type: (Lexer) -> Tuple[pat_t, Token, Token]
 | 
| 361 |         """(6) | (7), / dot* '.py' /, (else), etc.
 | 
| 362 | 
 | 
| 363 |         Alongside the pattern, this returns the first token in the pattern and
 | 
| 364 |         the LBrace token at the start of the case arm body.
 | 
| 365 |         """
 | 
| 366 |         e_parser = self._YshParser()
 | 
| 367 |         with ctx_PNodeAllocator(e_parser):
 | 
| 368 |             pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
 | 
| 369 | 
 | 
| 370 |             left_tok = pnode.GetChild(0).tok
 | 
| 371 |             pattern = self.tr.YshCasePattern(pnode)
 | 
| 372 | 
 | 
| 373 |         return pattern, left_tok, last_token
 | 
| 374 | 
 | 
| 375 |     def ParseProc(self, lexer, out):
 | 
| 376 |         # type: (Lexer, Proc) -> Token
 | 
| 377 |         """proc f(x, y, @args) {"""
 | 
| 378 |         e_parser = self._YshParser()
 | 
| 379 |         with ctx_PNodeAllocator(e_parser):
 | 
| 380 |             pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
 | 
| 381 | 
 | 
| 382 |             if 0:
 | 
| 383 |                 self.p_printer.Print(pnode)
 | 
| 384 | 
 | 
| 385 |             out.sig = self.tr.Proc(pnode)
 | 
| 386 | 
 | 
| 387 |         return last_token
 | 
| 388 | 
 | 
| 389 |     def ParseFunc(self, lexer, out):
 | 
| 390 |         # type: (Lexer, Func) -> Token
 | 
| 391 |         """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
 | 
| 392 |         e_parser = self._YshParser()
 | 
| 393 |         with ctx_PNodeAllocator(e_parser):
 | 
| 394 |             pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
 | 
| 395 | 
 | 
| 396 |             if 0:
 | 
| 397 |                 self.p_printer.Print(pnode)
 | 
| 398 | 
 | 
| 399 |             self.tr.YshFunc(pnode, out)
 | 
| 400 |         return last_token
 | 
| 401 | 
 | 
| 402 | 
 | 
| 403 | # Another parser instantiation:
 | 
| 404 | # - For Array Literal in word_parse.py WordParser:
 | 
| 405 | #   w_parser = WordParser(self.lexer, self.line_reader)
 |