OILS / osh / word_parse_test.py View on Github | oilshell.org

607 lines, 398 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9word_parse_test.py: Tests for word_parse.py
10"""
11
12import unittest
13
14from _devbuild.gen.id_kind_asdl import Id, Id_str
15from _devbuild.gen.syntax_asdl import arith_expr_e, word_e, rhs_word_e
16from _devbuild.gen.types_asdl import lex_mode_e
17
18from asdl import format as fmt
19from core import error
20from core import test_lib
21from core.test_lib import FakeTok
22from frontend import lexer
23from frontend import location
24from osh import word_
25
26
27def _assertReadWordWithArena(test, w_parser):
28 w = w_parser.ReadWord(lex_mode_e.ShCommand)
29 assert w is not None
30 fmt.PrettyPrint(w)
31 print('')
32
33 # Next word must be Eof_Real
34 w2 = w_parser.ReadWord(lex_mode_e.ShCommand)
35 test.assertTrue(test_lib.TokensEqual(FakeTok(Id.Eof_Real, ''), w2), w2)
36 return w
37
38
39def _assertReadWord(test, word_str, oil_at=False):
40 print('')
41 print('--- %s' % word_str)
42 print('')
43
44 arena = test_lib.MakeArena('word_parse_test.py')
45 w_parser = test_lib.InitWordParser(word_str, arena=arena, oil_at=oil_at)
46 w = _assertReadWordWithArena(test, w_parser)
47 return w
48
49
50def _assertReadWordFailure(test, word_str, oil_at=False):
51 print('\n---', word_str)
52 w_parser = test_lib.InitWordParser(word_str, oil_at=oil_at)
53 try:
54 w = w_parser.ReadWord(lex_mode_e.ShCommand)
55 except error.Parse as e:
56 print('Got expected ParseError: %s' % e)
57 else:
58 fmt.PrettyPrint(w)
59 test.fail('Expected a parser error, got %r' % w)
60
61
62def _assertSpanForWord(test, word_str):
63 arena = test_lib.MakeArena('word_parse_test.py')
64 w_parser = test_lib.InitWordParser(word_str, arena=arena)
65 w = _assertReadWordWithArena(test, w_parser)
66 tok = location.LeftTokenForWord(w)
67
68 print(word_str)
69 print(tok)
70
71
72def _GetSuffixOp(test, w):
73 """Get a single transform op."""
74 test.assertEqual(1, len(w.parts))
75 return w.parts[0].suffix_op
76
77
78def _GetPrefixOp(test, w):
79 """Get a single transform op."""
80 test.assertEqual(1, len(w.parts))
81 return w.parts[0].prefix_op.id
82
83
84def _GetVarSub(test, w):
85 test.assertEqual(1, len(w.parts))
86 part = w.parts[0]
87 return lexer.LazyStr(part.token)
88
89
90class ArenaTest(unittest.TestCase):
91 """It's more convenient to test the arena here, because we have a proper
92 lexer and so forth."""
93
94 def testSnipCodeString(self):
95 expr = """\
96hi'
97single quoted'"double
98quoted
99"there
100 """
101
102 arena = test_lib.MakeArena('hi')
103 w_parser = test_lib.InitWordParser(expr, arena=arena)
104 w = w_parser.ReadWord(lex_mode_e.ShCommand)
105 print(w)
106
107 left = w.parts[1].left # left single quote
108 right = w.parts[2].right # right double quote
109
110 s = arena.SnipCodeString(left, right)
111
112 print('s = %r' % s)
113 self.assertEqual("""\
114'
115single quoted'"double
116quoted
117"\
118""", s)
119
120 s = arena.SnipCodeString(w.parts[1].left, w.parts[1].right)
121
122 print('s = %r' % s)
123 self.assertEqual("""\
124'
125single quoted'\
126""", s)
127
128 # Just snip one token
129 s = arena.SnipCodeString(w.parts[0], w.parts[0])
130
131 print('s = %r' % s)
132 self.assertEqual('hi', s)
133
134 def testSaveLinesAndDiscard(self):
135 # Also takes a left, right, token
136 pass
137
138
139class LexerTest(unittest.TestCase):
140 """It's more convenient to test the lexer here, because we have a proper
141 lexer and so forth."""
142
143 def testAssignFunctions(self):
144 arena = test_lib.MakeArena('')
145
146 expr = 'ls; foo=42'
147 w_parser = test_lib.InitWordParser(expr, arena=arena)
148
149 # Skip first two words
150 w_parser.ReadWord(lex_mode_e.ShCommand)
151 w_parser.ReadWord(lex_mode_e.ShCommand)
152 w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
153 print(w3)
154
155 self.assertEqual(False, lexer.IsPlusEquals(w3.parts[0]))
156 self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -1))
157
158 expr = 'ls; foo+=X'
159 w_parser = test_lib.InitWordParser(expr, arena=arena)
160
161 # Skip first two words
162 w_parser.ReadWord(lex_mode_e.ShCommand)
163 w_parser.ReadWord(lex_mode_e.ShCommand)
164 w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
165 print(w3)
166
167 self.assertEqual(True, lexer.IsPlusEquals(w3.parts[0]))
168 self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -2))
169
170
171class WordParserTest(unittest.TestCase):
172
173 def testStaticEvalWord(self):
174 expr = r'\EOF' # Quoted here doc delimiter
175 w_parser = test_lib.InitWordParser(expr)
176 w = w_parser.ReadWord(lex_mode_e.ShCommand)
177 ok, s, quoted = word_.StaticEval(w)
178 self.assertEqual(True, ok)
179 self.assertEqual('EOF', s)
180 self.assertEqual(True, quoted)
181
182 def testDisambiguatePrefix(self):
183 w = _assertReadWord(self, '${#}')
184 self.assertEqual('#', _GetVarSub(self, w))
185 w = _assertReadWord(self, '${!}')
186 self.assertEqual('!', _GetVarSub(self, w))
187 w = _assertReadWord(self, '${?}')
188 self.assertEqual('?', _GetVarSub(self, w))
189
190 w = _assertReadWord(self, '${var}')
191
192 w = _assertReadWord(self, '${15}')
193
194 w = _assertReadWord(self, '${#var}')
195 self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
196 w = _assertReadWord(self, '${!ref}')
197 self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
198
199 # Length of length
200 w = _assertReadWord(self, '${##}')
201 self.assertEqual('#', _GetVarSub(self, w))
202 self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
203
204 w = _assertReadWord(self, '${array[0]}')
205 self.assertEqual(1, len(w.parts))
206 w = _assertReadWord(self, '${array[@]}')
207 self.assertEqual(1, len(w.parts))
208
209 # Length of element
210 w = _assertReadWord(self, '${#array[0]}')
211 self.assertEqual(1, len(w.parts))
212 self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
213 # Ref for element
214 w = _assertReadWord(self, '${!array[0]}')
215 self.assertEqual(1, len(w.parts))
216 self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
217
218 w = _assertReadWord(self, '${var#prefix}')
219 self.assertEqual(1, len(w.parts))
220 self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
221
222 w = _assertReadWord(self, '${!var#prefix}')
223 self.assertEqual(1, len(w.parts))
224 self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
225 self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
226
227 _assertReadWordFailure(self, '${#var#prefix}')
228
229 # Allowed by bash, but we don't parse it. Use len=$#; echo ${len#2}
230 # instead.
231 _assertReadWordFailure(self, '${##2}')
232
233 def testIncompleteWords(self):
234 # Bugs found in completion
235 w = _assertReadWordFailure(self, '${undef:-')
236 w = _assertReadWordFailure(self, '${undef:-$')
237 w = _assertReadWordFailure(self, '${undef:-$F')
238
239 w = _assertReadWordFailure(self, '${x@')
240 w = _assertReadWordFailure(self, '${x@Q')
241
242 w = _assertReadWordFailure(self, '${x%')
243
244 w = _assertReadWordFailure(self, '${x/')
245 w = _assertReadWordFailure(self, '${x/a/')
246 w = _assertReadWordFailure(self, '${x/a/b')
247 w = _assertReadWordFailure(self, '${x:')
248
249 def testVarOf(self):
250 w = _assertReadWord(self, '${name}')
251 w = _assertReadWord(self, '${name[0]}')
252
253 w = _assertReadWord(self, '${array[@]}')
254
255 # Should be DISALLOWED!
256 #w = _assertReadWord(self, '${11[@]}')
257
258 def assertUnquoted(self, expected, w):
259 ok, s, quoted = word_.StaticEval(w)
260 self.assertTrue(ok)
261 self.assertEqual(expected, s)
262 self.assertFalse(quoted)
263
264 def testPatSub(self):
265 w = _assertReadWord(self, '${var/pat/replace}')
266 op = _GetSuffixOp(self, w)
267 self.assertUnquoted('pat', op.pat)
268 self.assertUnquoted('replace', op.replace)
269 self.assertEqual(Id.Undefined_Tok, op.replace_mode)
270
271 w = _assertReadWord(self, '${var//pat/replace}') # sub all
272 op = _GetSuffixOp(self, w)
273 self.assertUnquoted('pat', op.pat)
274 self.assertUnquoted('replace', op.replace)
275 self.assertEqual(Id.Lit_Slash, op.replace_mode,
276 Id_str(op.replace_mode))
277
278 w = _assertReadWord(self, '${var/%pat/replace}') # prefix
279 op = _GetSuffixOp(self, w)
280 self.assertUnquoted('pat', op.pat)
281 self.assertUnquoted('replace', op.replace)
282 self.assertEqual(Id.Lit_Percent, op.replace_mode)
283
284 w = _assertReadWord(self, '${var/#pat/replace}') # suffix
285 op = _GetSuffixOp(self, w)
286 self.assertUnquoted('pat', op.pat)
287 self.assertUnquoted('replace', op.replace)
288 self.assertEqual(Id.Lit_Pound, op.replace_mode)
289
290 w = _assertReadWord(self, '${var/pat}') # no replacement
291 w = _assertReadWord(self, '${var//pat}') # no replacement
292 op = _GetSuffixOp(self, w)
293 self.assertUnquoted('pat', op.pat)
294 self.assertEqual(rhs_word_e.Empty, op.replace.tag())
295 self.assertEqual(Id.Lit_Slash, op.replace_mode)
296
297 # replace with slash
298 w = _assertReadWord(self, '${var/pat//}')
299 op = _GetSuffixOp(self, w)
300 self.assertUnquoted('pat', op.pat)
301 self.assertUnquoted('/', op.replace)
302
303 # replace with two slashes unquoted
304 w = _assertReadWord(self, '${var/pat///}')
305 op = _GetSuffixOp(self, w)
306 self.assertUnquoted('pat', op.pat)
307 self.assertUnquoted('//', op.replace)
308
309 # replace with two slashes quoted
310 w = _assertReadWord(self, '${var/pat/"//"}')
311 op = _GetSuffixOp(self, w)
312 self.assertUnquoted('pat', op.pat)
313
314 ok, s, quoted = word_.StaticEval(op.replace)
315 self.assertTrue(ok)
316 self.assertEqual('//', s)
317 self.assertTrue(quoted)
318
319 # Real example found in the wild!
320 # http://www.oilshell.org/blog/2016/11/07.html
321
322 # 2023-05: copied into spec/var-op-patsub.test.sh
323 w = _assertReadWord(self, r'${var////\\/}')
324 op = _GetSuffixOp(self, w)
325 self.assertEqual(Id.Lit_Slash, op.replace_mode)
326
327 self.assertUnquoted('/', op.pat)
328
329 ok, s, quoted = word_.StaticEval(op.replace)
330 self.assertTrue(ok)
331 self.assertEqual(r'\/', s)
332
333 def testSlice(self):
334 w = _assertReadWord(self, '${foo:0}')
335 # No length
336 self.assertEqual(None, _GetSuffixOp(self, w).length)
337
338 w = _assertReadWord(self, '${foo:0:1}')
339 w = _assertReadWord(self, '${foo:1+2:2+3}')
340
341 # This is allowed
342 w = _assertReadWord(self, '${foo::1}')
343 # No beginning
344 self.assertEqual(arith_expr_e.EmptyZero,
345 _GetSuffixOp(self, w).begin.tag())
346
347 def testLength(self):
348 # Synonym for $#, had a bug here
349 w = _assertReadWord(self, '${#@}')
350 self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
351
352 # Length of arg 11
353 w = _assertReadWord(self, '${#11}')
354 self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
355
356 w = _assertReadWord(self, '${#str}')
357 self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
358
359 w = _assertReadWord(self, '${#array[0]}')
360 # BUG!
361 #self.assertTrue(VS_POUND, _GetSuffixOp(self, w).id)
362
363 w = _assertReadWord(self, '${#array["key"]}')
364 # BUG!
365 #self.assertTrue(Id.VSub_POUND, _GetSuffixOp(self, w).id)
366
367 def testUnary(self):
368 w = _assertReadWord(self, '${var#}')
369 self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
370 w = _assertReadWord(self, '${var#prefix}')
371 self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
372
373 w = _assertReadWord(self, '${var##}')
374 self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
375 w = _assertReadWord(self, '${var##prefix}')
376 self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
377
378 w = _assertReadWord(self, '${var%suffix}')
379 w = _assertReadWord(self, '${var%%suffix}')
380
381 def testArrayOp(self):
382 w = _assertReadWord(self, '${array[0]}')
383 w = _assertReadWord(self, '${array[5+5]}')
384
385 w = _assertReadWord(self, '${array[@]}')
386 w = _assertReadWord(self, '${array[*]}')
387
388 def testTestOp(self):
389 w = _assertReadWord(self, '${var:-default]}')
390
391 def testTildeLike(self):
392 w = _assertReadWord(self, '~/git/oilshell/oil')
393 w = _assertReadWord(self, '~andy/git/oilshell/oil')
394 w = _assertReadWord(self, '~andy_c/git/oilshell/oil')
395 w = _assertReadWord(self, '~andy.c/git/oilshell/oil')
396 w = _assertReadWord(self, '~andy-c/git/oilshell/oil')
397 w = _assertReadWord(self, '~andy-c:git/oilshell/oil')
398
399 def testRead(self):
400 CASES = [
401 'ls "foo"',
402 '$(( 1 + 2 ))',
403 '$(echo $(( 1 )) )', # OLD BUG: arith sub within command sub
404 'echo ${#array[@]} b', # Had a bug here
405 'echo $(( ${#array[@]} ))', # Bug here
406
407 # Had a bug: unary minus
408 #'${mounted_disk_regex:0:-1}',
409 'echo ${@%suffix}', # had a bug here
410 '${@}',
411 'echo ${var,,}',
412 'echo ${var,,?}',
413
414 # Line continuation tests
415 '${\\\nfoo}', # VSub_1
416 '${foo\\\n}', # VSub_2
417 '${foo#\\\nyo}', # VS_ARG_UNQ
418 '"${foo#\\\nyo}"', # VS_ARG_DQ
419 ]
420 for expr in CASES:
421 print('---')
422 print(expr)
423 print()
424
425 w_parser = test_lib.InitWordParser(expr)
426
427 while True:
428 w = w_parser.ReadWord(lex_mode_e.ShCommand)
429 assert w is not None
430
431 fmt.PrettyPrint(w)
432
433 if word_.CommandId(w) == Id.Eof_Real:
434 break
435
436 def testOilSplice(self):
437 w = _assertReadWord(self, '@words', oil_at=True)
438
439 # These are normal words
440 w = _assertReadWord(self, '.@words', oil_at=True)
441 w = _assertReadWord(self, '.@words.', oil_at=True)
442
443 # Errors
444 _assertReadWordFailure(self, '@words[', oil_at=True)
445 _assertReadWordFailure(self, '@words.', oil_at=True)
446
447 def testReadComment(self):
448 # Test that we get Id.Op_Newline
449 code = 'foo # comment\nbar #comment\n'
450 w_parser = test_lib.InitWordParser(code)
451 w = w_parser.ReadWord(lex_mode_e.ShCommand)
452 assert w
453 self.assertEqual('foo', lexer.LazyStr(w.parts[0]))
454
455 w = w_parser.ReadWord(lex_mode_e.ShCommand)
456 assert w
457 self.assertEqual(Id.Op_Newline, w.id)
458
459 w = w_parser.ReadWord(lex_mode_e.ShCommand)
460 assert w
461 self.assertEqual('bar', lexer.LazyStr(w.parts[0]))
462
463 w = w_parser.ReadWord(lex_mode_e.ShCommand)
464 assert w
465 self.assertEqual(Id.Op_Newline, w.id)
466
467 w = w_parser.ReadWord(lex_mode_e.ShCommand)
468 assert w
469 self.assertEqual(Id.Eof_Real, w.id)
470
471 def testReadArithWord(self):
472 w = _assertReadWord(self, '$(( (1+2) ))')
473 child = w.parts[0].anode
474 self.assertEqual(arith_expr_e.Binary, child.tag())
475
476 w = _assertReadWord(self, '$(( (1+2) ))')
477 child = w.parts[0].anode
478 self.assertEqual(arith_expr_e.Binary, child.tag())
479
480 def testReadArith(self):
481 CASES = [
482 '1 + 2',
483 'a + b',
484 '$a * $b',
485 '${a} * ${b}',
486 '$(echo 1) * $(echo 2)',
487 '`echo 1` + 2',
488 '$((1 + 2)) * $((3 + 4))',
489 "'single quoted'", # Allowed by oil but not bash
490 '"${a}" + "${b}"', # Ditto
491 '$# + $$',
492 # This doesn't work but does in bash -- should be 15
493 #'$(( $(echo 1)$(echo 2) + 3 ))',
494 '$(( x[0] < 5 ))',
495 '$(( ++i ))',
496 '$(( i++ ))',
497 '$(( x -= 1))',
498 '$(( x |= 1))',
499 '$(( x[0] = 1 ))',
500 '$(( 1 | 0 ))',
501 '$((0x$size))',
502 ]
503
504 for expr in CASES:
505 print('---')
506 print(expr)
507 print()
508
509 w_parser = test_lib.InitWordParser(expr)
510 # Can we remove this initialization?
511 w_parser._SetNext(lex_mode_e.Arith)
512
513 while True:
514 w = w_parser.ReadArithWord()
515 assert w is not None
516 fmt.PrettyPrint(w)
517 if word_.CommandId(w) in (Id.Eof_Real, Id.Unknown_Tok):
518 break
519
520 def testHereDoc(self):
521 w_parser = test_lib.InitWordParser("""\
522ls foo
523
524# Multiple newlines and comments should be ignored
525
526ls bar
527""")
528
529 def assertWord(w, id_, val):
530 self.assertEqual(1, len(w.parts))
531 part = w.parts[0]
532 self.assertEqual(id_, part.id)
533 self.assertEqual(val, lexer.LazyStr(part))
534
535 print('--MULTI')
536 w = w_parser.ReadWord(lex_mode_e.ShCommand)
537 assertWord(w, Id.Lit_Chars, 'ls')
538
539 w = w_parser.ReadWord(lex_mode_e.ShCommand)
540 assertWord(w, Id.Lit_Chars, 'foo')
541
542 w = w_parser.ReadWord(lex_mode_e.ShCommand)
543 self.assertEqual(word_e.Operator, w.tag())
544 self.assertEqual(Id.Op_Newline, w.id)
545 self.assertEqual(None, w.tval)
546
547 w = w_parser.ReadWord(lex_mode_e.ShCommand)
548 assertWord(w, Id.Lit_Chars, 'ls')
549
550 w = w_parser.ReadWord(lex_mode_e.ShCommand)
551 assertWord(w, Id.Lit_Chars, 'bar')
552
553 w = w_parser.ReadWord(lex_mode_e.ShCommand)
554 self.assertEqual(word_e.Operator, w.tag())
555 self.assertEqual(Id.Op_Newline, w.id)
556 self.assertEqual(None, w.tval)
557
558 w = w_parser.ReadWord(lex_mode_e.ShCommand)
559 self.assertEqual(word_e.Operator, w.tag())
560 self.assertEqual(Id.Eof_Real, w.id)
561 self.assertEqual('', lexer.LazyStr(w))
562
563 def testUnicode(self):
564 words = 'z \xce\xbb \xe4\xb8\x89 \xf0\x9f\x98\x98'
565
566 def _Part(w, i):
567 return lexer.LazyStr(w.parts[i])
568
569 w_parser = test_lib.InitWordParser(words)
570 w = w_parser.ReadWord(lex_mode_e.ShCommand)
571 self.assertEqual('z', _Part(w, 0))
572
573 w = w_parser.ReadWord(lex_mode_e.ShCommand)
574 self.assertEqual('\xce\xbb', _Part(w, 0))
575
576 w = w_parser.ReadWord(lex_mode_e.ShCommand)
577 self.assertEqual('\xe4\xb8\x89', _Part(w, 0))
578
579 w = w_parser.ReadWord(lex_mode_e.ShCommand)
580 self.assertEqual('\xf0\x9f\x98\x98', _Part(w, 0))
581
582 def testParseErrorLocation(self):
583 w = _assertSpanForWord(self, 'a=(1 2 3)')
584
585 w = _assertSpanForWord(self, 'foo')
586
587 w = _assertSpanForWord(self, '\\$')
588
589 w = _assertSpanForWord(self, "''")
590
591 w = _assertSpanForWord(self, "'sq'")
592
593 w = _assertSpanForWord(self, '""')
594
595 w = _assertSpanForWord(self, '"dq"')
596
597 w = _assertSpanForWord(self, '$(echo command sub)')
598
599 w = _assertSpanForWord(self, '$(( 1 + 2 ))')
600
601 w = _assertSpanForWord(self, '~user')
602
603 w = _assertSpanForWord(self, '${var#}')
604
605
606if __name__ == '__main__':
607 unittest.main()