1 | #!/usr/bin/env python2
|
2 | """
|
3 | lexer_def_test.py: Tests for lexer_def.py
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import re
|
8 | import unittest
|
9 |
|
10 | from _devbuild.gen.id_kind_asdl import Id, Id_str, Kind
|
11 | from _devbuild.gen.types_asdl import lex_mode_e
|
12 | from core import test_lib
|
13 | from core.test_lib import FakeTok
|
14 | from mycpp.mylib import log
|
15 | from frontend import lexer
|
16 | from frontend import lexer_def
|
17 | from frontend import consts
|
18 | from frontend import match
|
19 |
|
20 | _ = log
|
21 |
|
22 |
|
23 | def _InitLexer(s):
|
24 | arena = test_lib.MakeArena('<lex_test.py>')
|
25 | _, lexer = test_lib.InitLexer(s, arena)
|
26 | return lexer
|
27 |
|
28 |
|
29 | class AsdlTest(unittest.TestCase):
|
30 |
|
31 | def testLexMode(self):
|
32 | print(lex_mode_e.DQ)
|
33 |
|
34 |
|
35 | CMD = """\
|
36 | ls /
|
37 | ls /home/
|
38 | """
|
39 |
|
40 |
|
41 | class LexerTest(unittest.TestCase):
|
42 |
|
43 | def assertTokensEqual(self, left, right):
|
44 | self.assertTrue(test_lib.TokensEqual(left, right),
|
45 | 'Expected %r, got %r' % (left, right))
|
46 |
|
47 | def testRead(self):
|
48 | lexer = _InitLexer(CMD)
|
49 |
|
50 | t = lexer.Read(lex_mode_e.ShCommand)
|
51 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
|
52 | t = lexer.Read(lex_mode_e.ShCommand)
|
53 |
|
54 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
55 |
|
56 | t = lexer.Read(lex_mode_e.ShCommand)
|
57 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
|
58 |
|
59 | t = lexer.Read(lex_mode_e.ShCommand)
|
60 | self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
|
61 |
|
62 | # Line two
|
63 | t = lexer.Read(lex_mode_e.ShCommand)
|
64 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
|
65 |
|
66 | t = lexer.Read(lex_mode_e.ShCommand)
|
67 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
68 |
|
69 | t = lexer.Read(lex_mode_e.ShCommand)
|
70 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
|
71 |
|
72 | t = lexer.Read(lex_mode_e.ShCommand)
|
73 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'home'), t)
|
74 |
|
75 | t = lexer.Read(lex_mode_e.ShCommand)
|
76 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
|
77 |
|
78 | t = lexer.Read(lex_mode_e.ShCommand)
|
79 | self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
|
80 |
|
81 | t = lexer.Read(lex_mode_e.ShCommand)
|
82 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
83 |
|
84 | # Another EOF gives EOF
|
85 | t = lexer.Read(lex_mode_e.ShCommand)
|
86 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
87 |
|
88 | def testMode_VSub_ArgUnquoted(self):
|
89 | # Another EOF gives EOF
|
90 | lx = _InitLexer("'hi'")
|
91 | t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
|
92 | print(t)
|
93 |
|
94 | self.assertTokensEqual(FakeTok(Id.Left_SingleQuote, "'"), t)
|
95 |
|
96 | lx = _InitLexer("~root")
|
97 | t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
|
98 | print(t)
|
99 |
|
100 | self.assertTokensEqual(FakeTok(Id.Lit_Tilde, '~'), t)
|
101 |
|
102 | def testMode_ExtGlob(self):
|
103 | lexer = _InitLexer('@(foo|bar)')
|
104 |
|
105 | t = lexer.Read(lex_mode_e.ShCommand)
|
106 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
|
107 |
|
108 | t = lexer.Read(lex_mode_e.ExtGlob)
|
109 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
|
110 |
|
111 | t = lexer.Read(lex_mode_e.ExtGlob)
|
112 | self.assertTokensEqual(FakeTok(Id.Op_Pipe, '|'), t)
|
113 |
|
114 | t = lexer.Read(lex_mode_e.ExtGlob)
|
115 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'bar'), t)
|
116 |
|
117 | t = lexer.Read(lex_mode_e.ExtGlob)
|
118 | self.assertTokensEqual(FakeTok(Id.Op_RParen, ')'), t)
|
119 |
|
120 | # Individual cases
|
121 |
|
122 | lexer = _InitLexer('@(')
|
123 | t = lexer.Read(lex_mode_e.ExtGlob)
|
124 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
|
125 |
|
126 | lexer = _InitLexer('*(')
|
127 | t = lexer.Read(lex_mode_e.ExtGlob)
|
128 | self.assertTokensEqual(FakeTok(Id.ExtGlob_Star, '*('), t)
|
129 |
|
130 | lexer = _InitLexer('?(')
|
131 | t = lexer.Read(lex_mode_e.ExtGlob)
|
132 | self.assertTokensEqual(FakeTok(Id.ExtGlob_QMark, '?('), t)
|
133 |
|
134 | lexer = _InitLexer('$')
|
135 | t = lexer.Read(lex_mode_e.ExtGlob)
|
136 | self.assertTokensEqual(FakeTok(Id.Lit_Other, '$'), t)
|
137 |
|
138 | def testMode_BashRegex(self):
|
139 | lexer = _InitLexer('(foo|bar)')
|
140 |
|
141 | t = lexer.Read(lex_mode_e.BashRegex)
|
142 | self.assertTokensEqual(FakeTok(Id.BashRegex_LParen, '('), t)
|
143 |
|
144 | t = lexer.Read(lex_mode_e.BashRegex)
|
145 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
|
146 |
|
147 | t = lexer.Read(lex_mode_e.BashRegex)
|
148 | self.assertTokensEqual(FakeTok(Id.Lit_Other, '|'), t)
|
149 |
|
150 | def testMode_DBracket(self):
|
151 | lex = _InitLexer('-z foo')
|
152 | t = lex.Read(lex_mode_e.DBracket)
|
153 | self.assertTokensEqual(FakeTok(Id.BoolUnary_z, '-z'), t)
|
154 | self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))
|
155 |
|
156 | def testMode_DollarSq(self):
|
157 | lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
|
158 |
|
159 | t = lexer.Read(lex_mode_e.SQ_C)
|
160 | print(t)
|
161 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo bar'), t)
|
162 |
|
163 | t = lexer.Read(lex_mode_e.SQ_C)
|
164 | print(t)
|
165 | self.assertTokensEqual(FakeTok(Id.Char_OneChar, r'\n'), t)
|
166 |
|
167 | def testMode_Backtick(self):
|
168 | CASES = [
|
169 | r'echo \" \\ hi`',
|
170 | r'`',
|
171 | r'',
|
172 | ]
|
173 |
|
174 | for case in CASES:
|
175 | print()
|
176 | print('--- %s ---' % case)
|
177 | print()
|
178 |
|
179 | lexer = _InitLexer(case)
|
180 |
|
181 | while True:
|
182 | t = lexer.Read(lex_mode_e.Backtick)
|
183 | print(t)
|
184 | if t.id == Id.Eof_Real:
|
185 | break
|
186 |
|
187 | def testMode_Printf(self):
|
188 | CASES = [
|
189 | r'hello %s\n',
|
190 | r'%% percent %%\377',
|
191 | ]
|
192 |
|
193 | for case in CASES:
|
194 | print()
|
195 | print('--- %s ---' % case)
|
196 | print()
|
197 |
|
198 | lexer = _InitLexer(case)
|
199 |
|
200 | while True:
|
201 | t = lexer.Read(lex_mode_e.PrintfOuter)
|
202 | print(t)
|
203 | if t.id == Id.Eof_Real:
|
204 | break
|
205 |
|
206 | # Now test the Printf_Percent mode
|
207 | CASES = [r'-3.3f', r'03d']
|
208 |
|
209 | for case in CASES:
|
210 | print()
|
211 | print('--- %s ---' % case)
|
212 | print()
|
213 |
|
214 | lexer = _InitLexer(case)
|
215 |
|
216 | while True:
|
217 | t = lexer.Read(lex_mode_e.PrintfPercent)
|
218 | print(t)
|
219 | if t.id == Id.Eof_Real:
|
220 | break
|
221 |
|
222 | def testMode_Expr(self):
|
223 | CASES = [
|
224 | r'@[ ]',
|
225 | ]
|
226 |
|
227 | for case in CASES:
|
228 | print()
|
229 | print('--- %s ---' % case)
|
230 | print()
|
231 |
|
232 | lexer = _InitLexer(case)
|
233 |
|
234 | while True:
|
235 | t = lexer.Read(lex_mode_e.Expr)
|
236 | print(t)
|
237 | if t.id == Id.Eof_Real:
|
238 | break
|
239 |
|
240 | def testLookPastSpace(self):
|
241 | # I think this is the usage pattern we care about. Peek and Next() past
|
242 | # the function; then Peek() the next token. Then Lookahead in that state.
|
243 | lexer = _InitLexer('fun()')
|
244 |
|
245 | t = lexer.Read(lex_mode_e.ShCommand)
|
246 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
|
247 |
|
248 | t = lexer.Read(lex_mode_e.ShCommand)
|
249 | self.assertTokensEqual(FakeTok(Id.Op_LParen, '('), t)
|
250 |
|
251 | self.assertEqual(Id.Op_RParen,
|
252 | lexer.LookPastSpace(lex_mode_e.ShCommand))
|
253 |
|
254 | lexer = _InitLexer('fun ()')
|
255 |
|
256 | t = lexer.Read(lex_mode_e.ShCommand)
|
257 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
|
258 |
|
259 | t = lexer.Read(lex_mode_e.ShCommand)
|
260 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
261 |
|
262 | self.assertEqual(Id.Op_LParen,
|
263 | lexer.LookPastSpace(lex_mode_e.ShCommand))
|
264 |
|
265 | def testPushHint(self):
|
266 | # Extglob use case
|
267 | lexer = _InitLexer('@()')
|
268 | lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
|
269 |
|
270 | t = lexer.Read(lex_mode_e.ShCommand)
|
271 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
|
272 |
|
273 | t = lexer.Read(lex_mode_e.ShCommand)
|
274 | self.assertTokensEqual(FakeTok(Id.Right_ExtGlob, ')'), t)
|
275 |
|
276 | t = lexer.Read(lex_mode_e.ShCommand)
|
277 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
278 |
|
279 | def testEmitCompDummy(self):
|
280 | lexer = _InitLexer('echo ')
|
281 | lexer.EmitCompDummy()
|
282 |
|
283 | t = lexer.Read(lex_mode_e.ShCommand)
|
284 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'echo'), t)
|
285 |
|
286 | t = lexer.Read(lex_mode_e.ShCommand)
|
287 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
288 |
|
289 | # Right before EOF
|
290 | t = lexer.Read(lex_mode_e.ShCommand)
|
291 | self.assertTokensEqual(FakeTok(Id.Lit_CompDummy, ''), t)
|
292 |
|
293 | t = lexer.Read(lex_mode_e.ShCommand)
|
294 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
295 |
|
296 |
|
297 | class LineLexerTest(unittest.TestCase):
|
298 |
|
299 | def setUp(self):
|
300 | self.arena = test_lib.MakeArena('<lex_test.py>')
|
301 |
|
302 | def assertTokensEqual(self, left, right):
|
303 | #log('LEFT %s', left)
|
304 | #log('RIGHT %s', right)
|
305 | # self.assertTrue(test_lib.TokensEqual(left, right))
|
306 | self.assertEqual(left.id, right.id,
|
307 | '%s != %s' % (Id_str(left.id), Id_str(right.id)))
|
308 | self.assertEqual(left.tval, right.tval)
|
309 |
|
310 | def testReadOuter(self):
|
311 | l = test_lib.InitLineLexer('\n', self.arena)
|
312 | self.assertTokensEqual(lexer.DummyToken(Id.Op_Newline, None),
|
313 | l.Read(lex_mode_e.ShCommand))
|
314 |
|
315 | def testRead_VSub_ArgUnquoted(self):
|
316 | l = test_lib.InitLineLexer("'hi'", self.arena)
|
317 | t = l.Read(lex_mode_e.VSub_ArgUnquoted)
|
318 | self.assertEqual(Id.Left_SingleQuote, t.id)
|
319 |
|
320 | def testLookPastSpace(self):
|
321 | # Lines always end with '\n'
|
322 | l = test_lib.InitLineLexer('', self.arena)
|
323 | self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
|
324 |
|
325 | l = test_lib.InitLineLexer('foo', self.arena)
|
326 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
|
327 | l.Read(lex_mode_e.ShCommand))
|
328 | self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
|
329 |
|
330 | l = test_lib.InitLineLexer('foo bar', self.arena)
|
331 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
|
332 | l.Read(lex_mode_e.ShCommand))
|
333 | self.assertEqual(Id.Lit_Chars, l.LookPastSpace(lex_mode_e.ShCommand))
|
334 |
|
335 | # No lookahead; using the cursor!
|
336 | l = test_lib.InitLineLexer('fun(', self.arena)
|
337 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
|
338 | l.Read(lex_mode_e.ShCommand))
|
339 | self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
|
340 |
|
341 | l = test_lib.InitLineLexer('fun (', self.arena)
|
342 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
|
343 | l.Read(lex_mode_e.ShCommand))
|
344 | self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
|
345 |
|
346 |
|
347 | class RegexTest(unittest.TestCase):
|
348 |
|
349 | def testNul(self):
|
350 | nul_pat = re.compile(r'[\0]')
|
351 | self.assertEqual(False, bool(nul_pat.match('x')))
|
352 | self.assertEqual(True, bool(nul_pat.match('\0')))
|
353 |
|
354 | _, p, _ = lexer_def.ECHO_E_DEF[-1]
|
355 | print('P %r' % p)
|
356 | last_echo_e_pat = re.compile(p)
|
357 | self.assertEqual(True, bool(last_echo_e_pat.match('x')))
|
358 | self.assertEqual(False, bool(last_echo_e_pat.match('\0')))
|
359 |
|
360 |
|
361 | class OtherLexerTest(unittest.TestCase):
|
362 |
|
363 | def testEchoLexer(self):
|
364 | CASES = [
|
365 | r'newline \n NUL \0 octal \0377 hex \x00',
|
366 | r'unicode \u0065 \U00000065',
|
367 | r'\d \e \f \g',
|
368 | ]
|
369 | for s in CASES:
|
370 | lex = match.EchoLexer(s)
|
371 | print(lex.Tokens())
|
372 |
|
373 | def testPS1Lexer(self):
|
374 | print(list(match.Ps1Tokens(r'foo')))
|
375 | print(list(match.Ps1Tokens(r'\h \w \$')))
|
376 |
|
377 | def testHistoryLexer(self):
|
378 | print(list(match.HistoryTokens(r'echo hi')))
|
379 |
|
380 | print(list(match.HistoryTokens(r'echo !! !* !^ !$')))
|
381 |
|
382 | # No history operator with \ escape
|
383 | tokens = list(match.HistoryTokens(r'echo \!!'))
|
384 | print(tokens)
|
385 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
386 |
|
387 | print(list(match.HistoryTokens(r'echo !3...')))
|
388 | print(list(match.HistoryTokens(r'echo !-5...')))
|
389 | print(list(match.HistoryTokens(r'echo !x/foo.py bar')))
|
390 |
|
391 | print('---')
|
392 |
|
393 | # No history operator in single quotes
|
394 | tokens = list(match.HistoryTokens(r"echo '!!' $'!!' "))
|
395 | print(tokens)
|
396 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
397 |
|
398 | # No history operator in incomplete single quotes
|
399 | tokens = list(match.HistoryTokens(r"echo '!! "))
|
400 | print(tokens)
|
401 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
402 |
|
403 | # Quoted single quote, and then a History operator
|
404 | tokens = list(match.HistoryTokens(r"echo \' !! "))
|
405 | print(tokens)
|
406 | # YES operator
|
407 | self.assert_(Id.History_Op in [tok_type for tok_type, _ in tokens])
|
408 |
|
409 | def testHistoryDoesNotConflict(self):
|
410 | # https://github.com/oilshell/oil/issues/264
|
411 | #
|
412 | # Bash has a bunch of hacks to suppress the conflict between ! for history
|
413 | # and:
|
414 | #
|
415 | # 1. [!abc] globbing
|
416 | # 2. ${!foo} indirect expansion
|
417 | # 3. $!x -- the PID
|
418 | # 4. !(foo|bar) -- extended glob
|
419 | #
|
420 | # I guess [[ a != b ]] doesn't match the pattern in bash.
|
421 |
|
422 | three_other = [Id.History_Other, Id.History_Other, Id.History_Other]
|
423 | two_other = [Id.History_Other, Id.History_Other]
|
424 | CASES = [
|
425 | (r'[!abc]', three_other),
|
426 | (r'${!indirect}', three_other),
|
427 | (r'$!x', three_other), # didn't need a special case
|
428 | (r'!(foo|bar)', two_other), # didn't need a special case
|
429 | ]
|
430 |
|
431 | for s, expected_types in CASES:
|
432 | tokens = list(match.HistoryTokens(s))
|
433 | print(tokens)
|
434 | actual_types = [id_ for id_, val in tokens]
|
435 |
|
436 | self.assert_(Id.History_Search not in actual_types, tokens)
|
437 |
|
438 | self.assertEqual(expected_types, actual_types)
|
439 |
|
440 | def testBraceRangeLexer(self):
|
441 | CASES = [
|
442 | 'a..z',
|
443 | '100..300',
|
444 | '-300..-100..1',
|
445 | '1.3', # invalid
|
446 | 'aa',
|
447 | ]
|
448 | for s in CASES:
|
449 | lex = match.BraceRangeLexer(s)
|
450 | print(lex.Tokens())
|
451 |
|
452 |
|
453 | if __name__ == '__main__':
|
454 | unittest.main()
|