1 | #!/usr/bin/env python2
2 | """
3 | lexer_def_test.py: Tests for lexer_def.py
4 | """
5 | from __future__ import print_function
6 |
7 | import re
8 | import unittest
9 |
10 | from _devbuild.gen.id_kind_asdl import Id, Id_str, Kind
11 | from _devbuild.gen.types_asdl import lex_mode_e
12 | from core import test_lib
13 | from core.test_lib import FakeTok
14 | from mycpp.mylib import log
15 | from frontend import lexer
16 | from frontend import lexer_def
17 | from frontend import consts
18 | from frontend import match
19 |
20 | _ = log
21 |
22 |
23 | def _InitLexer(s):
24 | arena = test_lib.MakeArena('<lex_test.py>')
25 | _, lexer = test_lib.InitLexer(s, arena)
26 | return lexer
27 |
28 |
29 | class AsdlTest(unittest.TestCase):
30 |
31 | def testLexMode(self):
32 | print(lex_mode_e.DQ)
33 |
34 |
35 | CMD = """\
36 | ls /
37 | ls /home/
38 | """
39 |
40 |
41 | class LexerTest(unittest.TestCase):
42 |
43 | def assertTokensEqual(self, left, right):
44 | self.assertTrue(test_lib.TokensEqual(left, right),
45 | 'Expected %r, got %r' % (left, right))
46 |
47 | def testRead(self):
48 | lexer = _InitLexer(CMD)
49 |
50 | t = lexer.Read(lex_mode_e.ShCommand)
51 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
52 | t = lexer.Read(lex_mode_e.ShCommand)
53 |
54 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
55 |
56 | t = lexer.Read(lex_mode_e.ShCommand)
57 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
58 |
59 | t = lexer.Read(lex_mode_e.ShCommand)
60 | self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
61 |
62 | # Line two
63 | t = lexer.Read(lex_mode_e.ShCommand)
64 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
65 |
66 | t = lexer.Read(lex_mode_e.ShCommand)
67 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
68 |
69 | t = lexer.Read(lex_mode_e.ShCommand)
70 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
71 |
72 | t = lexer.Read(lex_mode_e.ShCommand)
73 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'home'), t)
74 |
75 | t = lexer.Read(lex_mode_e.ShCommand)
76 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
77 |
78 | t = lexer.Read(lex_mode_e.ShCommand)
79 | self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
80 |
81 | t = lexer.Read(lex_mode_e.ShCommand)
82 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
83 |
84 | # Another EOF gives EOF
85 | t = lexer.Read(lex_mode_e.ShCommand)
86 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
87 |
88 | def testMode_VSub_ArgUnquoted(self):
89 | # Another EOF gives EOF
90 | lx = _InitLexer("'hi'")
91 | t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
92 | print(t)
93 |
94 | self.assertTokensEqual(FakeTok(Id.Left_SingleQuote, "'"), t)
95 |
96 | lx = _InitLexer("~root")
97 | t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
98 | print(t)
99 |
100 | self.assertTokensEqual(FakeTok(Id.Lit_Tilde, '~'), t)
101 |
102 | def testMode_ExtGlob(self):
103 | lexer = _InitLexer('@(foo|bar)')
104 |
105 | t = lexer.Read(lex_mode_e.ShCommand)
106 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
107 |
108 | t = lexer.Read(lex_mode_e.ExtGlob)
109 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
110 |
111 | t = lexer.Read(lex_mode_e.ExtGlob)
112 | self.assertTokensEqual(FakeTok(Id.Op_Pipe, '|'), t)
113 |
114 | t = lexer.Read(lex_mode_e.ExtGlob)
115 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'bar'), t)
116 |
117 | t = lexer.Read(lex_mode_e.ExtGlob)
118 | self.assertTokensEqual(FakeTok(Id.Op_RParen, ')'), t)
119 |
120 | # Individual cases
121 |
122 | lexer = _InitLexer('@(')
123 | t = lexer.Read(lex_mode_e.ExtGlob)
124 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
125 |
126 | lexer = _InitLexer('*(')
127 | t = lexer.Read(lex_mode_e.ExtGlob)
128 | self.assertTokensEqual(FakeTok(Id.ExtGlob_Star, '*('), t)
129 |
130 | lexer = _InitLexer('?(')
131 | t = lexer.Read(lex_mode_e.ExtGlob)
132 | self.assertTokensEqual(FakeTok(Id.ExtGlob_QMark, '?('), t)
133 |
134 | lexer = _InitLexer('$')
135 | t = lexer.Read(lex_mode_e.ExtGlob)
136 | self.assertTokensEqual(FakeTok(Id.Lit_Other, '$'), t)
137 |
138 | def testMode_BashRegex(self):
139 | lexer = _InitLexer('(foo|bar)')
140 |
141 | t = lexer.Read(lex_mode_e.BashRegex)
142 | self.assertTokensEqual(FakeTok(Id.BashRegex_LParen, '('), t)
143 |
144 | t = lexer.Read(lex_mode_e.BashRegex)
145 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
146 |
147 | t = lexer.Read(lex_mode_e.BashRegex)
148 | self.assertTokensEqual(FakeTok(Id.Lit_Other, '|'), t)
149 |
150 | def testMode_DBracket(self):
151 | lex = _InitLexer('-z foo')
152 | t = lex.Read(lex_mode_e.DBracket)
153 | self.assertTokensEqual(FakeTok(Id.BoolUnary_z, '-z'), t)
154 | self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))
155 |
156 | def testMode_DollarSq(self):
157 | lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
158 |
159 | t = lexer.Read(lex_mode_e.SQ_C)
160 | print(t)
161 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo bar'), t)
162 |
163 | t = lexer.Read(lex_mode_e.SQ_C)
164 | print(t)
165 | self.assertTokensEqual(FakeTok(Id.Char_OneChar, r'\n'), t)
166 |
167 | def testMode_Backtick(self):
168 | CASES = [
169 | r'echo \" \\ hi`',
170 | r'`',
171 | r'',
172 | ]
173 |
174 | for case in CASES:
175 | print()
176 | print('--- %s ---' % case)
177 | print()
178 |
179 | lexer = _InitLexer(case)
180 |
181 | while True:
182 | t = lexer.Read(lex_mode_e.Backtick)
183 | print(t)
184 | if t.id == Id.Eof_Real:
185 | break
186 |
187 | def testMode_Printf(self):
188 | CASES = [
189 | r'hello %s\n',
190 | r'%% percent %%\377',
191 | ]
192 |
193 | for case in CASES:
194 | print()
195 | print('--- %s ---' % case)
196 | print()
197 |
198 | lexer = _InitLexer(case)
199 |
200 | while True:
201 | t = lexer.Read(lex_mode_e.PrintfOuter)
202 | print(t)
203 | if t.id == Id.Eof_Real:
204 | break
205 |
206 | # Now test the Printf_Percent mode
207 | CASES = [r'-3.3f', r'03d']
208 |
209 | for case in CASES:
210 | print()
211 | print('--- %s ---' % case)
212 | print()
213 |
214 | lexer = _InitLexer(case)
215 |
216 | while True:
217 | t = lexer.Read(lex_mode_e.PrintfPercent)
218 | print(t)
219 | if t.id == Id.Eof_Real:
220 | break
221 |
222 | def testMode_Expr(self):
223 | CASES = [
224 | r'@[ ]',
225 | ]
226 |
227 | for case in CASES:
228 | print()
229 | print('--- %s ---' % case)
230 | print()
231 |
232 | lexer = _InitLexer(case)
233 |
234 | while True:
235 | t = lexer.Read(lex_mode_e.Expr)
236 | print(t)
237 | if t.id == Id.Eof_Real:
238 | break
239 |
240 | def testLookPastSpace(self):
241 | # I think this is the usage pattern we care about. Peek and Next() past
242 | # the function; then Peek() the next token. Then Lookahead in that state.
243 | lexer = _InitLexer('fun()')
244 |
245 | t = lexer.Read(lex_mode_e.ShCommand)
246 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
247 |
248 | t = lexer.Read(lex_mode_e.ShCommand)
249 | self.assertTokensEqual(FakeTok(Id.Op_LParen, '('), t)
250 |
251 | self.assertEqual(Id.Op_RParen,
252 | lexer.LookPastSpace(lex_mode_e.ShCommand))
253 |
254 | lexer = _InitLexer('fun ()')
255 |
256 | t = lexer.Read(lex_mode_e.ShCommand)
257 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
258 |
259 | t = lexer.Read(lex_mode_e.ShCommand)
260 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
261 |
262 | self.assertEqual(Id.Op_LParen,
263 | lexer.LookPastSpace(lex_mode_e.ShCommand))
264 |
265 | def testPushHint(self):
266 | # Extglob use case
267 | lexer = _InitLexer('@()')
268 | lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
269 |
270 | t = lexer.Read(lex_mode_e.ShCommand)
271 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
272 |
273 | t = lexer.Read(lex_mode_e.ShCommand)
274 | self.assertTokensEqual(FakeTok(Id.Right_ExtGlob, ')'), t)
275 |
276 | t = lexer.Read(lex_mode_e.ShCommand)
277 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
278 |
279 | def testEmitCompDummy(self):
280 | lexer = _InitLexer('echo ')
281 | lexer.EmitCompDummy()
282 |
283 | t = lexer.Read(lex_mode_e.ShCommand)
284 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'echo'), t)
285 |
286 | t = lexer.Read(lex_mode_e.ShCommand)
287 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
288 |
289 | # Right before EOF
290 | t = lexer.Read(lex_mode_e.ShCommand)
291 | self.assertTokensEqual(FakeTok(Id.Lit_CompDummy, ''), t)
292 |
293 | t = lexer.Read(lex_mode_e.ShCommand)
294 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
295 |
296 |
297 | class LineLexerTest(unittest.TestCase):
298 |
299 | def setUp(self):
300 | self.arena = test_lib.MakeArena('<lex_test.py>')
301 |
302 | def assertTokensEqual(self, left, right):
303 | #log('LEFT %s', left)
304 | #log('RIGHT %s', right)
305 | # self.assertTrue(test_lib.TokensEqual(left, right))
306 | self.assertEqual(left.id, right.id,
307 | '%s != %s' % (Id_str(left.id), Id_str(right.id)))
308 | self.assertEqual(left.tval, right.tval)
309 |
310 | def testReadOuter(self):
311 | l = test_lib.InitLineLexer('\n', self.arena)
312 | self.assertTokensEqual(lexer.DummyToken(Id.Op_Newline, None),
313 | l.Read(lex_mode_e.ShCommand))
314 |
315 | def testRead_VSub_ArgUnquoted(self):
316 | l = test_lib.InitLineLexer("'hi'", self.arena)
317 | t = l.Read(lex_mode_e.VSub_ArgUnquoted)
318 | self.assertEqual(Id.Left_SingleQuote, t.id)
319 |
320 | def testLookPastSpace(self):
321 | # Lines always end with '\n'
322 | l = test_lib.InitLineLexer('', self.arena)
323 | self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
324 |
325 | l = test_lib.InitLineLexer('foo', self.arena)
326 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
327 | l.Read(lex_mode_e.ShCommand))
328 | self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
329 |
330 | l = test_lib.InitLineLexer('foo bar', self.arena)
331 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
332 | l.Read(lex_mode_e.ShCommand))
333 | self.assertEqual(Id.Lit_Chars, l.LookPastSpace(lex_mode_e.ShCommand))
334 |
335 | # No lookahead; using the cursor!
336 | l = test_lib.InitLineLexer('fun(', self.arena)
337 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
338 | l.Read(lex_mode_e.ShCommand))
339 | self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
340 |
341 | l = test_lib.InitLineLexer('fun (', self.arena)
342 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
343 | l.Read(lex_mode_e.ShCommand))
344 | self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
345 |
346 |
347 | class RegexTest(unittest.TestCase):
348 |
349 | def testNul(self):
350 | nul_pat = re.compile(r'[\0]')
351 | self.assertEqual(False, bool(nul_pat.match('x')))
352 | self.assertEqual(True, bool(nul_pat.match('\0')))
353 |
354 | _, p, _ = lexer_def.ECHO_E_DEF[-1]
355 | print('P %r' % p)
356 | last_echo_e_pat = re.compile(p)
357 | self.assertEqual(True, bool(last_echo_e_pat.match('x')))
358 | self.assertEqual(False, bool(last_echo_e_pat.match('\0')))
359 |
360 |
361 | class OtherLexerTest(unittest.TestCase):
362 |
363 | def testEchoLexer(self):
364 | CASES = [
365 | r'newline \n NUL \0 octal \0377 hex \x00',
366 | r'unicode \u0065 \U00000065',
367 | r'\d \e \f \g',
368 | ]
369 | for s in CASES:
370 | lex = match.EchoLexer(s)
371 | print(lex.Tokens())
372 |
373 | def testPS1Lexer(self):
374 | print(list(match.Ps1Tokens(r'foo')))
375 | print(list(match.Ps1Tokens(r'\h \w \$')))
376 |
377 | def testHistoryLexer(self):
378 | print(list(match.HistoryTokens(r'echo hi')))
379 |
380 | print(list(match.HistoryTokens(r'echo !! !* !^ !$')))
381 |
382 | # No history operator with \ escape
383 | tokens = list(match.HistoryTokens(r'echo \!!'))
384 | print(tokens)
385 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
386 |
387 | print(list(match.HistoryTokens(r'echo !3...')))
388 | print(list(match.HistoryTokens(r'echo !-5...')))
389 | print(list(match.HistoryTokens(r'echo !x/foo.py bar')))
390 |
391 | print('---')
392 |
393 | # No history operator in single quotes
394 | tokens = list(match.HistoryTokens(r"echo '!!' $'!!' "))
395 | print(tokens)
396 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
397 |
398 | # No history operator in incomplete single quotes
399 | tokens = list(match.HistoryTokens(r"echo '!! "))
400 | print(tokens)
401 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
402 |
403 | # Quoted single quote, and then a History operator
404 | tokens = list(match.HistoryTokens(r"echo \' !! "))
405 | print(tokens)
406 | # YES operator
407 | self.assert_(Id.History_Op in [tok_type for tok_type, _ in tokens])
408 |
409 | def testHistoryDoesNotConflict(self):
410 | # https://github.com/oilshell/oil/issues/264
411 | #
412 | # Bash has a bunch of hacks to suppress the conflict between ! for history
413 | # and:
414 | #
415 | # 1. [!abc] globbing
416 | # 2. ${!foo} indirect expansion
417 | # 3. $!x -- the PID
418 | # 4. !(foo|bar) -- extended glob
419 | #
420 | # I guess [[ a != b ]] doesn't match the pattern in bash.
421 |
422 | three_other = [Id.History_Other, Id.History_Other, Id.History_Other]
423 | two_other = [Id.History_Other, Id.History_Other]
424 | CASES = [
425 | (r'[!abc]', three_other),
426 | (r'${!indirect}', three_other),
427 | (r'$!x', three_other), # didn't need a special case
428 | (r'!(foo|bar)', two_other), # didn't need a special case
429 | ]
430 |
431 | for s, expected_types in CASES:
432 | tokens = list(match.HistoryTokens(s))
433 | print(tokens)
434 | actual_types = [id_ for id_, val in tokens]
435 |
436 | self.assert_(Id.History_Search not in actual_types, tokens)
437 |
438 | self.assertEqual(expected_types, actual_types)
439 |
440 | def testBraceRangeLexer(self):
441 | CASES = [
442 | 'a..z',
443 | '100..300',
444 | '-300..-100..1',
445 | '1.3', # invalid
446 | 'aa',
447 | ]
448 | for s in CASES:
449 | lex = match.BraceRangeLexer(s)
450 | print(lex.Tokens())
451 |
452 |
453 | if __name__ == '__main__':
454 | unittest.main()