| 1 | """
 | 
| 2 | parse_lib.py - Consolidate various parser instantiations here.
 | 
| 3 | """
 | 
| 4 | 
 | 
| 5 | from core import lexer
 | 
| 6 | from core import reader
 | 
| 7 | 
 | 
| 8 | from osh import lex
 | 
| 9 | from osh import word_parse
 | 
| 10 | from osh import cmd_parse
 | 
| 11 | from osh.meta import Id, IdInstance
 | 
| 12 | 
 | 
| 13 | # bin/osh should work without compiling fastlex?  But we want all the unit
 | 
| 14 | # tests to run with a known version of it.
 | 
| 15 | try:
 | 
| 16 |   import fastlex
 | 
| 17 | except ImportError:
 | 
| 18 |   fastlex = None
 | 
| 19 | 
 | 
| 20 | 
 | 
| 21 | class MatchToken_Slow(object):
 | 
| 22 |   """An abstract matcher that doesn't depend on OSH."""
 | 
| 23 |   def __init__(self, lexer_def):
 | 
| 24 |     self.lexer_def = {}
 | 
| 25 |     for state, pat_list in lexer_def.items():
 | 
| 26 |       self.lexer_def[state] = lexer.CompileAll(pat_list)
 | 
| 27 | 
 | 
| 28 |   def __call__(self, lex_mode, line, start_pos):
 | 
| 29 |     """Returns (id, end_pos)."""
 | 
| 30 |     # Simulate the EOL handling in re2c.
 | 
| 31 |     if start_pos >= len(line):
 | 
| 32 |       return Id.Eol_Tok, start_pos
 | 
| 33 | 
 | 
| 34 |     re_list = self.lexer_def[lex_mode]
 | 
| 35 |     matches = []
 | 
| 36 |     for regex, tok_type in re_list:
 | 
| 37 |       m = regex.match(line, start_pos)  # left-anchored
 | 
| 38 |       if m:
 | 
| 39 |         matches.append((m.end(0), tok_type, m.group(0)))
 | 
| 40 |     if not matches:
 | 
| 41 |       raise AssertionError('no match at position %d: %r' % (start_pos, line))
 | 
| 42 |     end_pos, tok_type, tok_val = max(matches, key=lambda m: m[0])
 | 
| 43 |     return tok_type, end_pos
 | 
| 44 | 
 | 
| 45 | 
 | 
| 46 | def MatchToken_Fast(lex_mode, line, start_pos):
 | 
| 47 |   """Returns (id, end_pos)."""
 | 
| 48 |   tok_type, end_pos = fastlex.MatchToken(lex_mode.enum_id, line, start_pos)
 | 
| 49 |   # IMPORTANT: We're reusing Id instances here.  Ids are very common, so this
 | 
| 50 |   # saves memory.
 | 
| 51 |   return IdInstance(tok_type), end_pos
 | 
| 52 | 
 | 
| 53 | 
 | 
| 54 | def _MakeMatcher():
 | 
| 55 |   # NOTE: Could have an environment variable to control this for speed?
 | 
| 56 |   #return MatchToken_Slow(lex.LEXER_DEF)
 | 
| 57 | 
 | 
| 58 |   if fastlex:
 | 
| 59 |     return MatchToken_Fast
 | 
| 60 |   else:
 | 
| 61 |     return MatchToken_Slow(lex.LEXER_DEF)
 | 
| 62 | 
 | 
| 63 | 
 | 
| 64 | def InitLexer(s, arena):
 | 
| 65 |   """For tests only."""
 | 
| 66 |   match_func = _MakeMatcher()
 | 
| 67 |   line_lexer = lexer.LineLexer(match_func, '', arena)
 | 
| 68 |   line_reader = reader.StringLineReader(s, arena)
 | 
| 69 |   lx = lexer.Lexer(line_lexer, line_reader)
 | 
| 70 |   return line_reader, lx
 | 
| 71 | 
 | 
| 72 | 
 | 
| 73 | # New API:
 | 
| 74 | # - MakeParser(reader, arena) - for top level, 'source'
 | 
| 75 | #   - eval: MakeParser(StringLineReader(), arena)
 | 
| 76 | #   - source: MakeParser(FileLineReader(), arena)
 | 
| 77 | # - MakeParserForCommandSub(reader, lexer) -- arena is inside lexer/reader
 | 
| 78 | # - MakeParserForCompletion(code_str)  # no arena?  no errors?
 | 
| 79 | # - MakeWordParserForHereDoc(lines, arena)  # arena is lost
 | 
| 80 | #   - although you want to AddLine
 | 
| 81 | #   - line_id = arena.AddLine()
 | 
| 82 | 
 | 
| 83 | 
 | 
| 84 | # NOTE:
 | 
| 85 | # - Does it make sense to create ParseState objects?  They have no dependencies
 | 
| 86 | #   -- just pure data.  Or just recreate them every time?  One issue is that
 | 
| 87 | #   you need somewhere to store the side effects -- errors for parsers, and the
 | 
| 88 | #   actual values for the evaluators/executors.
 | 
| 89 | 
 | 
| 90 | def MakeParser(line_reader, arena):
 | 
| 91 |   """Top level parser."""
 | 
| 92 |   line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
 | 
| 93 |   lx = lexer.Lexer(line_lexer, line_reader)
 | 
| 94 |   w_parser = word_parse.WordParser(lx, line_reader)
 | 
| 95 |   c_parser = cmd_parse.CommandParser(w_parser, lx, line_reader, arena)
 | 
| 96 |   return w_parser, c_parser
 | 
| 97 | 
 | 
| 98 | 
 | 
| 99 | # TODO: We could reuse w_parser with Reset() each time.  That's what the REPL
 | 
| 100 | # does.
 | 
| 101 | # But LineLexer and Lexer are also stateful!  So that might not be worth it.
 | 
| 102 | # Hm the REPL only does line_reader.Reset()?
 | 
| 103 | #
 | 
| 104 | # NOTE: It probably needs to take a VirtualLineReader for $PS1, $PS2, ...
 | 
| 105 | # values.
 | 
| 106 | def MakeParserForCompletion(code_str, arena):
 | 
| 107 |   """Parser for partial lines."""
 | 
| 108 |   # NOTE: We don't need to use a arena here?  Or we need a "scratch arena" that
 | 
| 109 |   # doesn't interfere with the rest of the program.
 | 
| 110 |   line_reader = reader.StringLineReader(code_str, arena)
 | 
| 111 |   line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)  # AtEnd() is true
 | 
| 112 |   lx = lexer.Lexer(line_lexer, line_reader)
 | 
| 113 |   w_parser = word_parse.WordParser(lx, line_reader)
 | 
| 114 |   c_parser = cmd_parse.CommandParser(w_parser, lx, line_reader, arena)
 | 
| 115 |   return w_parser, c_parser
 | 
| 116 | 
 | 
| 117 | 
 | 
| 118 | def MakeWordParserForHereDoc(lines, arena):
 | 
| 119 |   line_reader = reader.VirtualLineReader(lines, arena)
 | 
| 120 |   line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
 | 
| 121 |   lx = lexer.Lexer(line_lexer, line_reader)
 | 
| 122 |   return word_parse.WordParser(lx, line_reader)
 | 
| 123 | 
 | 
| 124 | 
 | 
| 125 | def MakeWordParserForPlugin(code_str, arena):
 | 
| 126 |   line_reader = reader.StringLineReader(code_str, arena)
 | 
| 127 |   line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
 | 
| 128 |   lx = lexer.Lexer(line_lexer, line_reader)
 | 
| 129 |   return word_parse.WordParser(lx, line_reader)
 | 
| 130 | 
 | 
| 131 | 
 | 
| 132 | def MakeParserForCommandSub(line_reader, lexer):
 | 
| 133 |   """To parse command sub, we want a fresh word parser state.
 | 
| 134 | 
 | 
| 135 |   It's a new instance based on same lexer and arena.
 | 
| 136 |   """
 | 
| 137 |   arena = line_reader.arena
 | 
| 138 |   w_parser = word_parse.WordParser(lexer, line_reader)
 | 
| 139 |   c_parser = cmd_parse.CommandParser(w_parser, lexer, line_reader, arena)
 | 
| 140 |   return c_parser
 | 
| 141 | 
 | 
| 142 | 
 | 
| 143 | # Another parser instantiation:
 | 
| 144 | # - For Array Literal in word_parse.py WordParser:
 | 
| 145 | #   w_parser = WordParser(self.lexer, self.line_reader)
 |