1 | #!/usr/bin/env python2
|
2 | """
|
3 | lexer_test.py: Tests for lexer.py
|
4 | """
|
5 |
|
6 | import unittest
|
7 |
|
8 | from _devbuild.gen.id_kind_asdl import Id, Id_str
|
9 | from _devbuild.gen.types_asdl import lex_mode_e
|
10 | from core import test_lib
|
11 | from mycpp.mylib import log
|
12 | from frontend.lexer import DummyToken as Tok
|
13 | from frontend.lexer_def import LEXER_DEF
|
14 | from frontend import lexer
|
15 | from frontend import reader
|
16 |
|
17 |
|
18 | def _PrintfOuterTokens(fmt):
|
19 | log('PrintfOuter lexing %r', fmt)
|
20 |
|
21 | parse_ctx = test_lib.InitParseContext()
|
22 | arena = test_lib.MakeArena('<lexer_test.py>')
|
23 | line_reader = reader.StringLineReader(fmt, arena)
|
24 | lx = parse_ctx.MakeLexer(line_reader)
|
25 |
|
26 | while True:
|
27 | t = lx.Read(lex_mode_e.PrintfOuter)
|
28 | print(t)
|
29 | if t.id in (Id.Eof_Real, Id.Eol_Tok):
|
30 | break
|
31 |
|
32 | log('')
|
33 |
|
34 |
|
35 | def _PrintToken(t):
|
36 | #print(t)
|
37 | print('%20s %r' % (Id_str(t.id), t.tval))
|
38 |
|
39 |
|
40 | def _PrintAllTokens(lx, lex_mode):
|
41 | while True:
|
42 | t = lx.Read(lex_mode)
|
43 | _PrintToken(t)
|
44 | if t.id in (Id.Eof_Real, Id.Eol_Tok):
|
45 | break
|
46 |
|
47 |
|
48 | class TokenTest(unittest.TestCase):
|
49 |
|
50 | def testToken(self):
|
51 | t = Tok(Id.Lit_Chars, 'abc')
|
52 | print(t)
|
53 |
|
54 | # This redundancy is OK I guess.
|
55 | t = Tok(Id.Lit_LBrace, '{')
|
56 | print(t)
|
57 |
|
58 | t = Tok(Id.Op_Semi, ';')
|
59 | print(t)
|
60 |
|
61 | def testPrintStats(self):
|
62 | states = sorted(LEXER_DEF.items(),
|
63 | key=lambda pair: len(pair[1]),
|
64 | reverse=True)
|
65 | total = 0
|
66 | for state, re_list in states:
|
67 | n = len(re_list)
|
68 | print(n, state)
|
69 | total += n
|
70 |
|
71 | print("Number of lex states: %d" % len(LEXER_DEF))
|
72 | print("Number of token dispatches: %d" % total)
|
73 |
|
74 | def testMoveToNextLine(self):
|
75 | """Test that it doesn't mess up invariants."""
|
76 | arena = test_lib.MakeArena('<lexer_test.py>')
|
77 | code_str = '''cd {
|
78 | }'''
|
79 |
|
80 | print('=== Printing all tokens')
|
81 | if 1:
|
82 | _, lx = test_lib.InitLexer(code_str, arena)
|
83 | _PrintAllTokens(lx, lex_mode_e.ShCommand)
|
84 |
|
85 | print()
|
86 | print('=== MoveToNextLine() and LookAheadOne()')
|
87 | _, lx = test_lib.InitLexer(code_str, arena)
|
88 |
|
89 | t = lx.Read(lex_mode_e.ShCommand)
|
90 | _PrintToken(t)
|
91 | self.assertEqual(Id.Lit_Chars, t.id)
|
92 |
|
93 | t = lx.Read(lex_mode_e.ShCommand)
|
94 | _PrintToken(t)
|
95 | self.assertEqual(Id.WS_Space, t.id)
|
96 |
|
97 | t = lx.Read(lex_mode_e.ShCommand)
|
98 | _PrintToken(t)
|
99 | self.assertEqual(Id.Lit_LBrace, t.id)
|
100 |
|
101 | try:
|
102 | lx.MoveToNextLine()
|
103 | except AssertionError:
|
104 | pass
|
105 | else:
|
106 | self.fail('Should have asserted')
|
107 |
|
108 | t = lx.Read(lex_mode_e.ShCommand)
|
109 | _PrintToken(t)
|
110 | self.assertEqual(Id.Op_Newline, t.id)
|
111 |
|
112 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
113 | self.assertEqual(Id.Unknown_Tok, look_ahead_id)
|
114 |
|
115 | # Method being tested
|
116 | lx.MoveToNextLine()
|
117 |
|
118 | # Lookahead
|
119 | print('Lookahead')
|
120 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
121 | self.assertEqual(Id.Lit_RBrace, look_ahead_id)
|
122 |
|
123 | # Lookahead again
|
124 | print('Lookahead 2')
|
125 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
126 | self.assertEqual(Id.Lit_RBrace, look_ahead_id)
|
127 |
|
128 | t = lx.Read(lex_mode_e.ShCommand)
|
129 | _PrintToken(t)
|
130 | self.assertEqual(Id.Lit_RBrace, t.id)
|
131 |
|
132 | t = lx.Read(lex_mode_e.ShCommand)
|
133 | _PrintToken(t)
|
134 | self.assertEqual(Id.Eof_Real, t.id)
|
135 |
|
136 | def testMaybeUnreadOne(self):
|
137 | arena = test_lib.MakeArena('<lexer_test.py>')
|
138 | _, lx = test_lib.InitLexer('()', arena)
|
139 |
|
140 | t = lx.Read(lex_mode_e.ShCommand)
|
141 | print(t)
|
142 | self.assertEqual(Id.Op_LParen, t.id)
|
143 |
|
144 | t = lx.Read(lex_mode_e.ShCommand)
|
145 | print(t)
|
146 | self.assertEqual(Id.Op_RParen, t.id)
|
147 |
|
148 | # Go back
|
149 | lx.MaybeUnreadOne()
|
150 |
|
151 | # Push Hint
|
152 | lx.PushHint(Id.Op_RParen, Id.Right_CasePat)
|
153 |
|
154 | # Now we see it again another a Id
|
155 | t = lx.Read(lex_mode_e.ShCommand)
|
156 | print(t)
|
157 | self.assertEqual(Id.Right_CasePat, t.id)
|
158 |
|
159 | def testPrintf(self):
|
160 | # Demonstrate input handling quirk
|
161 |
|
162 | # Get Id.Eof_Real because len('') == 0
|
163 | _PrintfOuterTokens('')
|
164 |
|
165 | # Get Id.Eol_Tok because len('\0') == 1
|
166 | _PrintfOuterTokens('\0')
|
167 |
|
168 | # Get x, then Id.Eof_Real because there are no more lines
|
169 | _PrintfOuterTokens('x\0')
|
170 |
|
171 |
|
172 | class TokenFunctionsTest(unittest.TestCase):
|
173 |
|
174 | def testContainsEquals(self):
|
175 | arena = test_lib.MakeArena('<lexer_test.py>')
|
176 | _, lx = test_lib.InitLexer('echo "hi $name"', arena)
|
177 |
|
178 | tok = lx.Read(lex_mode_e.ShCommand)
|
179 | print(tok)
|
180 |
|
181 | self.assertEqual(True, lexer.TokenContains(tok, 'echo'))
|
182 | self.assertEqual(True, lexer.TokenContains(tok, 'ech'))
|
183 | self.assertEqual(True, lexer.TokenContains(tok, 'cho'))
|
184 | self.assertEqual(True, lexer.TokenContains(tok, 'c'))
|
185 | self.assertEqual(True, lexer.TokenContains(tok, ''))
|
186 |
|
187 | self.assertEqual(True, lexer.TokenEquals(tok, 'echo'))
|
188 | self.assertEqual(False, lexer.TokenEquals(tok, 'ech'))
|
189 |
|
190 | self.assertEqual(True, lexer.TokenStartsWith(tok, ''))
|
191 | self.assertEqual(True, lexer.TokenStartsWith(tok, 'e'))
|
192 | self.assertEqual(True, lexer.TokenStartsWith(tok, 'ech'))
|
193 | self.assertEqual(False, lexer.TokenStartsWith(tok, 'cho'))
|
194 |
|
195 | self.assertEqual(True, lexer.TokenEndsWith(tok, ''))
|
196 | self.assertEqual(False, lexer.TokenEndsWith(tok, 'ech'))
|
197 | self.assertEqual(True, lexer.TokenEndsWith(tok, 'cho'))
|
198 | self.assertEqual(True, lexer.TokenEndsWith(tok, 'o'))
|
199 |
|
200 | def testIsPlusEquals(self):
|
201 | arena = test_lib.MakeArena('<lexer_test.py>')
|
202 | _, lx = test_lib.InitLexer('foo+=b"', arena)
|
203 |
|
204 | tok = lx.Read(lex_mode_e.ShCommand)
|
205 | print(tok)
|
206 | self.assertEqual(True, lexer.IsPlusEquals(tok))
|
207 |
|
208 | _, lx = test_lib.InitLexer('foo=b"', arena)
|
209 |
|
210 | tok = lx.Read(lex_mode_e.ShCommand)
|
211 | print(tok)
|
212 | self.assertEqual(False, lexer.IsPlusEquals(tok))
|
213 |
|
214 |
|
215 | if __name__ == '__main__':
|
216 | unittest.main()
|