OILS / frontend / lexer_def_test.py View on Github | oilshell.org

454 lines, 300 significant
1#!/usr/bin/env python2
2"""
3lexer_def_test.py: Tests for lexer_def.py
4"""
5from __future__ import print_function
6
7import re
8import unittest
9
10from _devbuild.gen.id_kind_asdl import Id, Id_str, Kind
11from _devbuild.gen.types_asdl import lex_mode_e
12from core import test_lib
13from core.test_lib import FakeTok
14from mycpp.mylib import log
15from frontend import lexer
16from frontend import lexer_def
17from frontend import consts
18from frontend import match
19
20_ = log
21
22
23def _InitLexer(s):
24 arena = test_lib.MakeArena('<lex_test.py>')
25 _, lexer = test_lib.InitLexer(s, arena)
26 return lexer
27
28
29class AsdlTest(unittest.TestCase):
30
31 def testLexMode(self):
32 print(lex_mode_e.DQ)
33
34
35CMD = """\
36ls /
37ls /home/
38"""
39
40
41class LexerTest(unittest.TestCase):
42
43 def assertTokensEqual(self, left, right):
44 self.assertTrue(test_lib.TokensEqual(left, right),
45 'Expected %r, got %r' % (left, right))
46
47 def testRead(self):
48 lexer = _InitLexer(CMD)
49
50 t = lexer.Read(lex_mode_e.ShCommand)
51 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
52 t = lexer.Read(lex_mode_e.ShCommand)
53
54 self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
55
56 t = lexer.Read(lex_mode_e.ShCommand)
57 self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
58
59 t = lexer.Read(lex_mode_e.ShCommand)
60 self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
61
62 # Line two
63 t = lexer.Read(lex_mode_e.ShCommand)
64 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
65
66 t = lexer.Read(lex_mode_e.ShCommand)
67 self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
68
69 t = lexer.Read(lex_mode_e.ShCommand)
70 self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
71
72 t = lexer.Read(lex_mode_e.ShCommand)
73 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'home'), t)
74
75 t = lexer.Read(lex_mode_e.ShCommand)
76 self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
77
78 t = lexer.Read(lex_mode_e.ShCommand)
79 self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
80
81 t = lexer.Read(lex_mode_e.ShCommand)
82 self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
83
84 # Another EOF gives EOF
85 t = lexer.Read(lex_mode_e.ShCommand)
86 self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
87
88 def testMode_VSub_ArgUnquoted(self):
89 # Another EOF gives EOF
90 lx = _InitLexer("'hi'")
91 t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
92 print(t)
93
94 self.assertTokensEqual(FakeTok(Id.Left_SingleQuote, "'"), t)
95
96 lx = _InitLexer("~root")
97 t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
98 print(t)
99
100 self.assertTokensEqual(FakeTok(Id.Lit_Tilde, '~'), t)
101
102 def testMode_ExtGlob(self):
103 lexer = _InitLexer('@(foo|bar)')
104
105 t = lexer.Read(lex_mode_e.ShCommand)
106 self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
107
108 t = lexer.Read(lex_mode_e.ExtGlob)
109 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
110
111 t = lexer.Read(lex_mode_e.ExtGlob)
112 self.assertTokensEqual(FakeTok(Id.Op_Pipe, '|'), t)
113
114 t = lexer.Read(lex_mode_e.ExtGlob)
115 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'bar'), t)
116
117 t = lexer.Read(lex_mode_e.ExtGlob)
118 self.assertTokensEqual(FakeTok(Id.Op_RParen, ')'), t)
119
120 # Individual cases
121
122 lexer = _InitLexer('@(')
123 t = lexer.Read(lex_mode_e.ExtGlob)
124 self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
125
126 lexer = _InitLexer('*(')
127 t = lexer.Read(lex_mode_e.ExtGlob)
128 self.assertTokensEqual(FakeTok(Id.ExtGlob_Star, '*('), t)
129
130 lexer = _InitLexer('?(')
131 t = lexer.Read(lex_mode_e.ExtGlob)
132 self.assertTokensEqual(FakeTok(Id.ExtGlob_QMark, '?('), t)
133
134 lexer = _InitLexer('$')
135 t = lexer.Read(lex_mode_e.ExtGlob)
136 self.assertTokensEqual(FakeTok(Id.Lit_Other, '$'), t)
137
138 def testMode_BashRegex(self):
139 lexer = _InitLexer('(foo|bar)')
140
141 t = lexer.Read(lex_mode_e.BashRegex)
142 self.assertTokensEqual(FakeTok(Id.BashRegex_LParen, '('), t)
143
144 t = lexer.Read(lex_mode_e.BashRegex)
145 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
146
147 t = lexer.Read(lex_mode_e.BashRegex)
148 self.assertTokensEqual(FakeTok(Id.Lit_Other, '|'), t)
149
150 def testMode_DBracket(self):
151 lex = _InitLexer('-z foo')
152 t = lex.Read(lex_mode_e.DBracket)
153 self.assertTokensEqual(FakeTok(Id.BoolUnary_z, '-z'), t)
154 self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))
155
156 def testMode_DollarSq(self):
157 lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
158
159 t = lexer.Read(lex_mode_e.SQ_C)
160 print(t)
161 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo bar'), t)
162
163 t = lexer.Read(lex_mode_e.SQ_C)
164 print(t)
165 self.assertTokensEqual(FakeTok(Id.Char_OneChar, r'\n'), t)
166
167 def testMode_Backtick(self):
168 CASES = [
169 r'echo \" \\ hi`',
170 r'`',
171 r'',
172 ]
173
174 for case in CASES:
175 print()
176 print('--- %s ---' % case)
177 print()
178
179 lexer = _InitLexer(case)
180
181 while True:
182 t = lexer.Read(lex_mode_e.Backtick)
183 print(t)
184 if t.id == Id.Eof_Real:
185 break
186
187 def testMode_Printf(self):
188 CASES = [
189 r'hello %s\n',
190 r'%% percent %%\377',
191 ]
192
193 for case in CASES:
194 print()
195 print('--- %s ---' % case)
196 print()
197
198 lexer = _InitLexer(case)
199
200 while True:
201 t = lexer.Read(lex_mode_e.PrintfOuter)
202 print(t)
203 if t.id == Id.Eof_Real:
204 break
205
206 # Now test the Printf_Percent mode
207 CASES = [r'-3.3f', r'03d']
208
209 for case in CASES:
210 print()
211 print('--- %s ---' % case)
212 print()
213
214 lexer = _InitLexer(case)
215
216 while True:
217 t = lexer.Read(lex_mode_e.PrintfPercent)
218 print(t)
219 if t.id == Id.Eof_Real:
220 break
221
222 def testMode_Expr(self):
223 CASES = [
224 r'@[ ]',
225 ]
226
227 for case in CASES:
228 print()
229 print('--- %s ---' % case)
230 print()
231
232 lexer = _InitLexer(case)
233
234 while True:
235 t = lexer.Read(lex_mode_e.Expr)
236 print(t)
237 if t.id == Id.Eof_Real:
238 break
239
240 def testLookPastSpace(self):
241 # I think this is the usage pattern we care about. Peek and Next() past
242 # the function; then Peek() the next token. Then Lookahead in that state.
243 lexer = _InitLexer('fun()')
244
245 t = lexer.Read(lex_mode_e.ShCommand)
246 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
247
248 t = lexer.Read(lex_mode_e.ShCommand)
249 self.assertTokensEqual(FakeTok(Id.Op_LParen, '('), t)
250
251 self.assertEqual(Id.Op_RParen,
252 lexer.LookPastSpace(lex_mode_e.ShCommand))
253
254 lexer = _InitLexer('fun ()')
255
256 t = lexer.Read(lex_mode_e.ShCommand)
257 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
258
259 t = lexer.Read(lex_mode_e.ShCommand)
260 self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
261
262 self.assertEqual(Id.Op_LParen,
263 lexer.LookPastSpace(lex_mode_e.ShCommand))
264
265 def testPushHint(self):
266 # Extglob use case
267 lexer = _InitLexer('@()')
268 lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
269
270 t = lexer.Read(lex_mode_e.ShCommand)
271 self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
272
273 t = lexer.Read(lex_mode_e.ShCommand)
274 self.assertTokensEqual(FakeTok(Id.Right_ExtGlob, ')'), t)
275
276 t = lexer.Read(lex_mode_e.ShCommand)
277 self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
278
279 def testEmitCompDummy(self):
280 lexer = _InitLexer('echo ')
281 lexer.EmitCompDummy()
282
283 t = lexer.Read(lex_mode_e.ShCommand)
284 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'echo'), t)
285
286 t = lexer.Read(lex_mode_e.ShCommand)
287 self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
288
289 # Right before EOF
290 t = lexer.Read(lex_mode_e.ShCommand)
291 self.assertTokensEqual(FakeTok(Id.Lit_CompDummy, ''), t)
292
293 t = lexer.Read(lex_mode_e.ShCommand)
294 self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
295
296
297class LineLexerTest(unittest.TestCase):
298
299 def setUp(self):
300 self.arena = test_lib.MakeArena('<lex_test.py>')
301
302 def assertTokensEqual(self, left, right):
303 #log('LEFT %s', left)
304 #log('RIGHT %s', right)
305 # self.assertTrue(test_lib.TokensEqual(left, right))
306 self.assertEqual(left.id, right.id,
307 '%s != %s' % (Id_str(left.id), Id_str(right.id)))
308 self.assertEqual(left.tval, right.tval)
309
310 def testReadOuter(self):
311 l = test_lib.InitLineLexer('\n', self.arena)
312 self.assertTokensEqual(lexer.DummyToken(Id.Op_Newline, None),
313 l.Read(lex_mode_e.ShCommand))
314
315 def testRead_VSub_ArgUnquoted(self):
316 l = test_lib.InitLineLexer("'hi'", self.arena)
317 t = l.Read(lex_mode_e.VSub_ArgUnquoted)
318 self.assertEqual(Id.Left_SingleQuote, t.id)
319
320 def testLookPastSpace(self):
321 # Lines always end with '\n'
322 l = test_lib.InitLineLexer('', self.arena)
323 self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
324
325 l = test_lib.InitLineLexer('foo', self.arena)
326 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
327 l.Read(lex_mode_e.ShCommand))
328 self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
329
330 l = test_lib.InitLineLexer('foo bar', self.arena)
331 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
332 l.Read(lex_mode_e.ShCommand))
333 self.assertEqual(Id.Lit_Chars, l.LookPastSpace(lex_mode_e.ShCommand))
334
335 # No lookahead; using the cursor!
336 l = test_lib.InitLineLexer('fun(', self.arena)
337 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
338 l.Read(lex_mode_e.ShCommand))
339 self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
340
341 l = test_lib.InitLineLexer('fun (', self.arena)
342 self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
343 l.Read(lex_mode_e.ShCommand))
344 self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
345
346
347class RegexTest(unittest.TestCase):
348
349 def testNul(self):
350 nul_pat = re.compile(r'[\0]')
351 self.assertEqual(False, bool(nul_pat.match('x')))
352 self.assertEqual(True, bool(nul_pat.match('\0')))
353
354 _, p, _ = lexer_def.ECHO_E_DEF[-1]
355 print('P %r' % p)
356 last_echo_e_pat = re.compile(p)
357 self.assertEqual(True, bool(last_echo_e_pat.match('x')))
358 self.assertEqual(False, bool(last_echo_e_pat.match('\0')))
359
360
361class OtherLexerTest(unittest.TestCase):
362
363 def testEchoLexer(self):
364 CASES = [
365 r'newline \n NUL \0 octal \0377 hex \x00',
366 r'unicode \u0065 \U00000065',
367 r'\d \e \f \g',
368 ]
369 for s in CASES:
370 lex = match.EchoLexer(s)
371 print(lex.Tokens())
372
373 def testPS1Lexer(self):
374 print(list(match.Ps1Tokens(r'foo')))
375 print(list(match.Ps1Tokens(r'\h \w \$')))
376
377 def testHistoryLexer(self):
378 print(list(match.HistoryTokens(r'echo hi')))
379
380 print(list(match.HistoryTokens(r'echo !! !* !^ !$')))
381
382 # No history operator with \ escape
383 tokens = list(match.HistoryTokens(r'echo \!!'))
384 print(tokens)
385 self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
386
387 print(list(match.HistoryTokens(r'echo !3...')))
388 print(list(match.HistoryTokens(r'echo !-5...')))
389 print(list(match.HistoryTokens(r'echo !x/foo.py bar')))
390
391 print('---')
392
393 # No history operator in single quotes
394 tokens = list(match.HistoryTokens(r"echo '!!' $'!!' "))
395 print(tokens)
396 self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
397
398 # No history operator in incomplete single quotes
399 tokens = list(match.HistoryTokens(r"echo '!! "))
400 print(tokens)
401 self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
402
403 # Quoted single quote, and then a History operator
404 tokens = list(match.HistoryTokens(r"echo \' !! "))
405 print(tokens)
406 # YES operator
407 self.assert_(Id.History_Op in [tok_type for tok_type, _ in tokens])
408
409 def testHistoryDoesNotConflict(self):
410 # https://github.com/oilshell/oil/issues/264
411 #
412 # Bash has a bunch of hacks to suppress the conflict between ! for history
413 # and:
414 #
415 # 1. [!abc] globbing
416 # 2. ${!foo} indirect expansion
417 # 3. $!x -- the PID
418 # 4. !(foo|bar) -- extended glob
419 #
420 # I guess [[ a != b ]] doesn't match the pattern in bash.
421
422 three_other = [Id.History_Other, Id.History_Other, Id.History_Other]
423 two_other = [Id.History_Other, Id.History_Other]
424 CASES = [
425 (r'[!abc]', three_other),
426 (r'${!indirect}', three_other),
427 (r'$!x', three_other), # didn't need a special case
428 (r'!(foo|bar)', two_other), # didn't need a special case
429 ]
430
431 for s, expected_types in CASES:
432 tokens = list(match.HistoryTokens(s))
433 print(tokens)
434 actual_types = [id_ for id_, val in tokens]
435
436 self.assert_(Id.History_Search not in actual_types, tokens)
437
438 self.assertEqual(expected_types, actual_types)
439
440 def testBraceRangeLexer(self):
441 CASES = [
442 'a..z',
443 '100..300',
444 '-300..-100..1',
445 '1.3', # invalid
446 'aa',
447 ]
448 for s in CASES:
449 lex = match.BraceRangeLexer(s)
450 print(lex.Tokens())
451
452
453if __name__ == '__main__':
454 unittest.main()