OILS / frontend / lexer_test.py View on Github | oilshell.org

216 lines, 142 significant
1#!/usr/bin/env python2
2"""
3lexer_test.py: Tests for lexer.py
4"""
5
6import unittest
7
8from _devbuild.gen.id_kind_asdl import Id, Id_str
9from _devbuild.gen.types_asdl import lex_mode_e
10from core import test_lib
11from mycpp.mylib import log
12from frontend.lexer import DummyToken as Tok
13from frontend.lexer_def import LEXER_DEF
14from frontend import lexer
15from frontend import reader
16
17
18def _PrintfOuterTokens(fmt):
19 log('PrintfOuter lexing %r', fmt)
20
21 parse_ctx = test_lib.InitParseContext()
22 arena = test_lib.MakeArena('<lexer_test.py>')
23 line_reader = reader.StringLineReader(fmt, arena)
24 lx = parse_ctx.MakeLexer(line_reader)
25
26 while True:
27 t = lx.Read(lex_mode_e.PrintfOuter)
28 print(t)
29 if t.id in (Id.Eof_Real, Id.Eol_Tok):
30 break
31
32 log('')
33
34
35def _PrintToken(t):
36 #print(t)
37 print('%20s %r' % (Id_str(t.id), t.tval))
38
39
40def _PrintAllTokens(lx, lex_mode):
41 while True:
42 t = lx.Read(lex_mode)
43 _PrintToken(t)
44 if t.id in (Id.Eof_Real, Id.Eol_Tok):
45 break
46
47
48class TokenTest(unittest.TestCase):
49
50 def testToken(self):
51 t = Tok(Id.Lit_Chars, 'abc')
52 print(t)
53
54 # This redundancy is OK I guess.
55 t = Tok(Id.Lit_LBrace, '{')
56 print(t)
57
58 t = Tok(Id.Op_Semi, ';')
59 print(t)
60
61 def testPrintStats(self):
62 states = sorted(LEXER_DEF.items(),
63 key=lambda pair: len(pair[1]),
64 reverse=True)
65 total = 0
66 for state, re_list in states:
67 n = len(re_list)
68 print(n, state)
69 total += n
70
71 print("Number of lex states: %d" % len(LEXER_DEF))
72 print("Number of token dispatches: %d" % total)
73
74 def testMoveToNextLine(self):
75 """Test that it doesn't mess up invariants."""
76 arena = test_lib.MakeArena('<lexer_test.py>')
77 code_str = '''cd {
78}'''
79
80 print('=== Printing all tokens')
81 if 1:
82 _, lx = test_lib.InitLexer(code_str, arena)
83 _PrintAllTokens(lx, lex_mode_e.ShCommand)
84
85 print()
86 print('=== MoveToNextLine() and LookAheadOne()')
87 _, lx = test_lib.InitLexer(code_str, arena)
88
89 t = lx.Read(lex_mode_e.ShCommand)
90 _PrintToken(t)
91 self.assertEqual(Id.Lit_Chars, t.id)
92
93 t = lx.Read(lex_mode_e.ShCommand)
94 _PrintToken(t)
95 self.assertEqual(Id.WS_Space, t.id)
96
97 t = lx.Read(lex_mode_e.ShCommand)
98 _PrintToken(t)
99 self.assertEqual(Id.Lit_LBrace, t.id)
100
101 try:
102 lx.MoveToNextLine()
103 except AssertionError:
104 pass
105 else:
106 self.fail('Should have asserted')
107
108 t = lx.Read(lex_mode_e.ShCommand)
109 _PrintToken(t)
110 self.assertEqual(Id.Op_Newline, t.id)
111
112 look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
113 self.assertEqual(Id.Unknown_Tok, look_ahead_id)
114
115 # Method being tested
116 lx.MoveToNextLine()
117
118 # Lookahead
119 print('Lookahead')
120 look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
121 self.assertEqual(Id.Lit_RBrace, look_ahead_id)
122
123 # Lookahead again
124 print('Lookahead 2')
125 look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
126 self.assertEqual(Id.Lit_RBrace, look_ahead_id)
127
128 t = lx.Read(lex_mode_e.ShCommand)
129 _PrintToken(t)
130 self.assertEqual(Id.Lit_RBrace, t.id)
131
132 t = lx.Read(lex_mode_e.ShCommand)
133 _PrintToken(t)
134 self.assertEqual(Id.Eof_Real, t.id)
135
136 def testMaybeUnreadOne(self):
137 arena = test_lib.MakeArena('<lexer_test.py>')
138 _, lx = test_lib.InitLexer('()', arena)
139
140 t = lx.Read(lex_mode_e.ShCommand)
141 print(t)
142 self.assertEqual(Id.Op_LParen, t.id)
143
144 t = lx.Read(lex_mode_e.ShCommand)
145 print(t)
146 self.assertEqual(Id.Op_RParen, t.id)
147
148 # Go back
149 lx.MaybeUnreadOne()
150
151 # Push Hint
152 lx.PushHint(Id.Op_RParen, Id.Right_CasePat)
153
154 # Now we see it again another a Id
155 t = lx.Read(lex_mode_e.ShCommand)
156 print(t)
157 self.assertEqual(Id.Right_CasePat, t.id)
158
159 def testPrintf(self):
160 # Demonstrate input handling quirk
161
162 # Get Id.Eof_Real because len('') == 0
163 _PrintfOuterTokens('')
164
165 # Get Id.Eol_Tok because len('\0') == 1
166 _PrintfOuterTokens('\0')
167
168 # Get x, then Id.Eof_Real because there are no more lines
169 _PrintfOuterTokens('x\0')
170
171
172class TokenFunctionsTest(unittest.TestCase):
173
174 def testContainsEquals(self):
175 arena = test_lib.MakeArena('<lexer_test.py>')
176 _, lx = test_lib.InitLexer('echo "hi $name"', arena)
177
178 tok = lx.Read(lex_mode_e.ShCommand)
179 print(tok)
180
181 self.assertEqual(True, lexer.TokenContains(tok, 'echo'))
182 self.assertEqual(True, lexer.TokenContains(tok, 'ech'))
183 self.assertEqual(True, lexer.TokenContains(tok, 'cho'))
184 self.assertEqual(True, lexer.TokenContains(tok, 'c'))
185 self.assertEqual(True, lexer.TokenContains(tok, ''))
186
187 self.assertEqual(True, lexer.TokenEquals(tok, 'echo'))
188 self.assertEqual(False, lexer.TokenEquals(tok, 'ech'))
189
190 self.assertEqual(True, lexer.TokenStartsWith(tok, ''))
191 self.assertEqual(True, lexer.TokenStartsWith(tok, 'e'))
192 self.assertEqual(True, lexer.TokenStartsWith(tok, 'ech'))
193 self.assertEqual(False, lexer.TokenStartsWith(tok, 'cho'))
194
195 self.assertEqual(True, lexer.TokenEndsWith(tok, ''))
196 self.assertEqual(False, lexer.TokenEndsWith(tok, 'ech'))
197 self.assertEqual(True, lexer.TokenEndsWith(tok, 'cho'))
198 self.assertEqual(True, lexer.TokenEndsWith(tok, 'o'))
199
200 def testIsPlusEquals(self):
201 arena = test_lib.MakeArena('<lexer_test.py>')
202 _, lx = test_lib.InitLexer('foo+=b"', arena)
203
204 tok = lx.Read(lex_mode_e.ShCommand)
205 print(tok)
206 self.assertEqual(True, lexer.IsPlusEquals(tok))
207
208 _, lx = test_lib.InitLexer('foo=b"', arena)
209
210 tok = lx.Read(lex_mode_e.ShCommand)
211 print(tok)
212 self.assertEqual(False, lexer.IsPlusEquals(tok))
213
214
215if __name__ == '__main__':
216 unittest.main()