OILS / osh / word_parse_test.py View on Github | oilshell.org

606 lines, 397 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9word_parse_test.py: Tests for word_parse.py
10"""
11
12import unittest
13
14from _devbuild.gen.id_kind_asdl import Id, Id_str
15from _devbuild.gen.syntax_asdl import arith_expr_e, word_e, rhs_word_e
16from _devbuild.gen.types_asdl import lex_mode_e
17
18from asdl import format as fmt
19from core import error
20from core import test_lib
21from core.test_lib import FakeTok
22from frontend import lexer
23from frontend import location
24from osh import word_
25
26
27def _assertReadWordWithArena(test, w_parser):
28 w = w_parser.ReadWord(lex_mode_e.ShCommand)
29 assert w is not None
30 fmt.PrettyPrint(w)
31 print('')
32
33 # Next word must be Eof_Real
34 w2 = w_parser.ReadWord(lex_mode_e.ShCommand)
35 test.assertTrue(test_lib.TokensEqual(FakeTok(Id.Eof_Real, ''), w2), w2)
36 return w
37
38
39def _assertReadWord(test, word_str, oil_at=False):
40 print('')
41 print('--- %s' % word_str)
42 print('')
43
44 arena = test_lib.MakeArena('word_parse_test.py')
45 w_parser = test_lib.InitWordParser(word_str, arena=arena, oil_at=oil_at)
46 w = _assertReadWordWithArena(test, w_parser)
47 return w
48
49
50def _assertReadWordFailure(test, word_str, oil_at=False):
51 print('\n---', word_str)
52 w_parser = test_lib.InitWordParser(word_str, oil_at=oil_at)
53 try:
54 w = w_parser.ReadWord(lex_mode_e.ShCommand)
55 except error.Parse as e:
56 print('Got expected ParseError: %s' % e)
57 else:
58 fmt.PrettyPrint(w)
59 test.fail('Expected a parser error, got %r' % w)
60
61
62def _assertSpanForWord(test, word_str):
63 arena = test_lib.MakeArena('word_parse_test.py')
64 w_parser = test_lib.InitWordParser(word_str, arena=arena)
65 w = _assertReadWordWithArena(test, w_parser)
66 tok = location.LeftTokenForWord(w)
67
68 print(word_str)
69 print(tok)
70
71
72def _GetSuffixOp(test, w):
73 """Get a single transform op."""
74 test.assertEqual(1, len(w.parts))
75 return w.parts[0].suffix_op
76
77
78def _GetPrefixOp(test, w):
79 """Get a single transform op."""
80 test.assertEqual(1, len(w.parts))
81 return w.parts[0].prefix_op.id
82
83
84def _GetVarSub(test, w):
85 test.assertEqual(1, len(w.parts))
86 part = w.parts[0]
87 return lexer.LazyStr(part.token)
88
89
90class ArenaTest(unittest.TestCase):
91 """It's more convenient to test the arena here, because we have a proper
92 lexer and so forth."""
93
94 def testSnipCodeString(self):
95 expr = """\
96hi'
97single quoted'"double
98quoted
99"there
100 """
101
102 arena = test_lib.MakeArena('hi')
103 w_parser = test_lib.InitWordParser(expr, arena=arena)
104 w = w_parser.ReadWord(lex_mode_e.ShCommand)
105 print(w)
106
107 left = w.parts[1].left # left single quote
108 right = w.parts[2].right # right double quote
109
110 s = arena.SnipCodeString(left, right)
111
112 print('s = %r' % s)
113 self.assertEqual("""\
114'
115single quoted'"double
116quoted
117"\
118""", s)
119
120 s = arena.SnipCodeString(w.parts[1].left, w.parts[1].right)
121
122 print('s = %r' % s)
123 self.assertEqual("""\
124'
125single quoted'\
126""", s)
127
128 # Just snip one token
129 s = arena.SnipCodeString(w.parts[0], w.parts[0])
130
131 print('s = %r' % s)
132 self.assertEqual('hi', s)
133
134 def testSaveLinesAndDiscard(self):
135 # Also takes a left, right, token
136 pass
137
138
139class LexerTest(unittest.TestCase):
140 """It's more convenient to test the lexer here, because we have a proper
141 lexer and so forth."""
142
143 def testAssignFunctions(self):
144 arena = test_lib.MakeArena('')
145
146 expr = 'ls; foo=42'
147 w_parser = test_lib.InitWordParser(expr, arena=arena)
148
149 # Skip first two words
150 w_parser.ReadWord(lex_mode_e.ShCommand)
151 w_parser.ReadWord(lex_mode_e.ShCommand)
152 w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
153 print(w3)
154
155 self.assertEqual(False, lexer.IsPlusEquals(w3.parts[0]))
156 self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -1))
157
158 expr = 'ls; foo+=X'
159 w_parser = test_lib.InitWordParser(expr, arena=arena)
160
161 # Skip first two words
162 w_parser.ReadWord(lex_mode_e.ShCommand)
163 w_parser.ReadWord(lex_mode_e.ShCommand)
164 w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
165 print(w3)
166
167 self.assertEqual(True, lexer.IsPlusEquals(w3.parts[0]))
168 self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -2))
169
170
171class WordParserTest(unittest.TestCase):
172
173 def testStaticEvalWord(self):
174 expr = r'\EOF' # Quoted here doc delimiter
175 w_parser = test_lib.InitWordParser(expr)
176 w = w_parser.ReadWord(lex_mode_e.ShCommand)
177 ok, s, quoted = word_.StaticEval(w)
178 self.assertEqual(True, ok)
179 self.assertEqual('EOF', s)
180 self.assertEqual(True, quoted)
181
182 def testDisambiguatePrefix(self):
183 w = _assertReadWord(self, '${#}')
184 self.assertEqual('#', _GetVarSub(self, w))
185 w = _assertReadWord(self, '${!}')
186 self.assertEqual('!', _GetVarSub(self, w))
187 w = _assertReadWord(self, '${?}')
188 self.assertEqual('?', _GetVarSub(self, w))
189
190 w = _assertReadWord(self, '${var}')
191
192 w = _assertReadWord(self, '${15}')
193
194 w = _assertReadWord(self, '${#var}')
195 self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
196 w = _assertReadWord(self, '${!ref}')
197 self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
198
199 # Length of length
200 w = _assertReadWord(self, '${##}')
201 self.assertEqual('#', _GetVarSub(self, w))
202 self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
203
204 w = _assertReadWord(self, '${array[0]}')
205 self.assertEqual(1, len(w.parts))
206 w = _assertReadWord(self, '${array[@]}')
207 self.assertEqual(1, len(w.parts))
208
209 # Length of element
210 w = _assertReadWord(self, '${#array[0]}')
211 self.assertEqual(1, len(w.parts))
212 self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
213 # Ref for element
214 w = _assertReadWord(self, '${!array[0]}')
215 self.assertEqual(1, len(w.parts))
216 self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
217
218 w = _assertReadWord(self, '${var#prefix}')
219 self.assertEqual(1, len(w.parts))
220 self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
221
222 w = _assertReadWord(self, '${!var#prefix}')
223 self.assertEqual(1, len(w.parts))
224 self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
225 self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
226
227 _assertReadWordFailure(self, '${#var#prefix}')
228
229 # Allowed by bash, but we don't parse it. Use len=$#; echo ${len#2}
230 # instead.
231 _assertReadWordFailure(self, '${##2}')
232
233 def testIncompleteWords(self):
234 # Bugs found in completion
235 w = _assertReadWordFailure(self, '${undef:-')
236 w = _assertReadWordFailure(self, '${undef:-$')
237 w = _assertReadWordFailure(self, '${undef:-$F')
238
239 w = _assertReadWordFailure(self, '${x@')
240 w = _assertReadWordFailure(self, '${x@Q')
241
242 w = _assertReadWordFailure(self, '${x%')
243
244 w = _assertReadWordFailure(self, '${x/')
245 w = _assertReadWordFailure(self, '${x/a/')
246 w = _assertReadWordFailure(self, '${x/a/b')
247 w = _assertReadWordFailure(self, '${x:')
248
249 def testVarOf(self):
250 w = _assertReadWord(self, '${name}')
251 w = _assertReadWord(self, '${name[0]}')
252
253 w = _assertReadWord(self, '${array[@]}')
254
255 # Should be DISALLOWED!
256 #w = _assertReadWord(self, '${11[@]}')
257
258 def assertUnquoted(self, expected, w):
259 ok, s, quoted = word_.StaticEval(w)
260 self.assertTrue(ok)
261 self.assertEqual(expected, s)
262 self.assertFalse(quoted)
263
264 def testPatSub(self):
265 w = _assertReadWord(self, '${var/pat/replace}')
266 op = _GetSuffixOp(self, w)
267 self.assertUnquoted('pat', op.pat)
268 self.assertUnquoted('replace', op.replace)
269 self.assertEqual(Id.Undefined_Tok, op.replace_mode)
270
271 w = _assertReadWord(self, '${var//pat/replace}') # sub all
272 op = _GetSuffixOp(self, w)
273 self.assertUnquoted('pat', op.pat)
274 self.assertUnquoted('replace', op.replace)
275 self.assertEqual(Id.Lit_Slash, op.replace_mode,
276 Id_str(op.replace_mode))
277
278 w = _assertReadWord(self, '${var/%pat/replace}') # prefix
279 op = _GetSuffixOp(self, w)
280 self.assertUnquoted('pat', op.pat)
281 self.assertUnquoted('replace', op.replace)
282 self.assertEqual(Id.Lit_Percent, op.replace_mode)
283
284 w = _assertReadWord(self, '${var/#pat/replace}') # suffix
285 op = _GetSuffixOp(self, w)
286 self.assertUnquoted('pat', op.pat)
287 self.assertUnquoted('replace', op.replace)
288 self.assertEqual(Id.Lit_Pound, op.replace_mode)
289
290 w = _assertReadWord(self, '${var/pat}') # no replacement
291 w = _assertReadWord(self, '${var//pat}') # no replacement
292 op = _GetSuffixOp(self, w)
293 self.assertUnquoted('pat', op.pat)
294 self.assertEqual(rhs_word_e.Empty, op.replace.tag())
295 self.assertEqual(Id.Lit_Slash, op.replace_mode)
296
297 # replace with slash
298 w = _assertReadWord(self, '${var/pat//}')
299 op = _GetSuffixOp(self, w)
300 self.assertUnquoted('pat', op.pat)
301 self.assertUnquoted('/', op.replace)
302
303 # replace with two slashes unquoted
304 w = _assertReadWord(self, '${var/pat///}')
305 op = _GetSuffixOp(self, w)
306 self.assertUnquoted('pat', op.pat)
307 self.assertUnquoted('//', op.replace)
308
309 # replace with two slashes quoted
310 w = _assertReadWord(self, '${var/pat/"//"}')
311 op = _GetSuffixOp(self, w)
312 self.assertUnquoted('pat', op.pat)
313
314 ok, s, quoted = word_.StaticEval(op.replace)
315 self.assertTrue(ok)
316 self.assertEqual('//', s)
317 self.assertTrue(quoted)
318
319 # Real example found in the wild!
320 # http://www.oilshell.org/blog/2016/11/07.html
321
322 # 2023-05: copied into spec/var-op-patsub.test.sh
323 w = _assertReadWord(self, r'${var////\\/}')
324 op = _GetSuffixOp(self, w)
325 self.assertEqual(Id.Lit_Slash, op.replace_mode)
326
327 self.assertUnquoted('/', op.pat)
328
329 ok, s, quoted = word_.StaticEval(op.replace)
330 self.assertTrue(ok)
331 self.assertEqual(r'\/', s)
332
333 def testSlice(self):
334 w = _assertReadWord(self, '${foo:0}')
335 # No length
336 self.assertEqual(None, _GetSuffixOp(self, w).length)
337
338 w = _assertReadWord(self, '${foo:0:1}')
339 w = _assertReadWord(self, '${foo:1+2:2+3}')
340
341 # This is allowed
342 w = _assertReadWord(self, '${foo::1}')
343 # No beginning
344 self.assertEqual(None, _GetSuffixOp(self, w).begin)
345
346 def testLength(self):
347 # Synonym for $#, had a bug here
348 w = _assertReadWord(self, '${#@}')
349 self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
350
351 # Length of arg 11
352 w = _assertReadWord(self, '${#11}')
353 self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
354
355 w = _assertReadWord(self, '${#str}')
356 self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
357
358 w = _assertReadWord(self, '${#array[0]}')
359 # BUG!
360 #self.assertTrue(VS_POUND, _GetSuffixOp(self, w).id)
361
362 w = _assertReadWord(self, '${#array["key"]}')
363 # BUG!
364 #self.assertTrue(Id.VSub_POUND, _GetSuffixOp(self, w).id)
365
366 def testUnary(self):
367 w = _assertReadWord(self, '${var#}')
368 self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
369 w = _assertReadWord(self, '${var#prefix}')
370 self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
371
372 w = _assertReadWord(self, '${var##}')
373 self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
374 w = _assertReadWord(self, '${var##prefix}')
375 self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
376
377 w = _assertReadWord(self, '${var%suffix}')
378 w = _assertReadWord(self, '${var%%suffix}')
379
380 def testArrayOp(self):
381 w = _assertReadWord(self, '${array[0]}')
382 w = _assertReadWord(self, '${array[5+5]}')
383
384 w = _assertReadWord(self, '${array[@]}')
385 w = _assertReadWord(self, '${array[*]}')
386
387 def testTestOp(self):
388 w = _assertReadWord(self, '${var:-default]}')
389
390 def testTildeLike(self):
391 w = _assertReadWord(self, '~/git/oilshell/oil')
392 w = _assertReadWord(self, '~andy/git/oilshell/oil')
393 w = _assertReadWord(self, '~andy_c/git/oilshell/oil')
394 w = _assertReadWord(self, '~andy.c/git/oilshell/oil')
395 w = _assertReadWord(self, '~andy-c/git/oilshell/oil')
396 w = _assertReadWord(self, '~andy-c:git/oilshell/oil')
397
398 def testRead(self):
399 CASES = [
400 'ls "foo"',
401 '$(( 1 + 2 ))',
402 '$(echo $(( 1 )) )', # OLD BUG: arith sub within command sub
403 'echo ${#array[@]} b', # Had a bug here
404 'echo $(( ${#array[@]} ))', # Bug here
405
406 # Had a bug: unary minus
407 #'${mounted_disk_regex:0:-1}',
408 'echo ${@%suffix}', # had a bug here
409 '${@}',
410 'echo ${var,,}',
411 'echo ${var,,?}',
412
413 # Line continuation tests
414 '${\\\nfoo}', # VSub_1
415 '${foo\\\n}', # VSub_2
416 '${foo#\\\nyo}', # VS_ARG_UNQ
417 '"${foo#\\\nyo}"', # VS_ARG_DQ
418 ]
419 for expr in CASES:
420 print('---')
421 print(expr)
422 print()
423
424 w_parser = test_lib.InitWordParser(expr)
425
426 while True:
427 w = w_parser.ReadWord(lex_mode_e.ShCommand)
428 assert w is not None
429
430 fmt.PrettyPrint(w)
431
432 if word_.CommandId(w) == Id.Eof_Real:
433 break
434
435 def testOilSplice(self):
436 w = _assertReadWord(self, '@words', oil_at=True)
437
438 # These are normal words
439 w = _assertReadWord(self, '.@words', oil_at=True)
440 w = _assertReadWord(self, '.@words.', oil_at=True)
441
442 # Errors
443 _assertReadWordFailure(self, '@words[', oil_at=True)
444 _assertReadWordFailure(self, '@words.', oil_at=True)
445
446 def testReadComment(self):
447 # Test that we get Id.Op_Newline
448 code = 'foo # comment\nbar #comment\n'
449 w_parser = test_lib.InitWordParser(code)
450 w = w_parser.ReadWord(lex_mode_e.ShCommand)
451 assert w
452 self.assertEqual('foo', lexer.LazyStr(w.parts[0]))
453
454 w = w_parser.ReadWord(lex_mode_e.ShCommand)
455 assert w
456 self.assertEqual(Id.Op_Newline, w.id)
457
458 w = w_parser.ReadWord(lex_mode_e.ShCommand)
459 assert w
460 self.assertEqual('bar', lexer.LazyStr(w.parts[0]))
461
462 w = w_parser.ReadWord(lex_mode_e.ShCommand)
463 assert w
464 self.assertEqual(Id.Op_Newline, w.id)
465
466 w = w_parser.ReadWord(lex_mode_e.ShCommand)
467 assert w
468 self.assertEqual(Id.Eof_Real, w.id)
469
470 def testReadArithWord(self):
471 w = _assertReadWord(self, '$(( (1+2) ))')
472 child = w.parts[0].anode
473 self.assertEqual(arith_expr_e.Binary, child.tag())
474
475 w = _assertReadWord(self, '$(( (1+2) ))')
476 child = w.parts[0].anode
477 self.assertEqual(arith_expr_e.Binary, child.tag())
478
479 def testReadArith(self):
480 CASES = [
481 '1 + 2',
482 'a + b',
483 '$a * $b',
484 '${a} * ${b}',
485 '$(echo 1) * $(echo 2)',
486 '`echo 1` + 2',
487 '$((1 + 2)) * $((3 + 4))',
488 "'single quoted'", # Allowed by oil but not bash
489 '"${a}" + "${b}"', # Ditto
490 '$# + $$',
491 # This doesn't work but does in bash -- should be 15
492 #'$(( $(echo 1)$(echo 2) + 3 ))',
493 '$(( x[0] < 5 ))',
494 '$(( ++i ))',
495 '$(( i++ ))',
496 '$(( x -= 1))',
497 '$(( x |= 1))',
498 '$(( x[0] = 1 ))',
499 '$(( 1 | 0 ))',
500 '$((0x$size))',
501 ]
502
503 for expr in CASES:
504 print('---')
505 print(expr)
506 print()
507
508 w_parser = test_lib.InitWordParser(expr)
509 # Can we remove this initialization?
510 w_parser._SetNext(lex_mode_e.Arith)
511
512 while True:
513 w = w_parser.ReadArithWord()
514 assert w is not None
515 fmt.PrettyPrint(w)
516 if word_.CommandId(w) in (Id.Eof_Real, Id.Unknown_Tok):
517 break
518
519 def testHereDoc(self):
520 w_parser = test_lib.InitWordParser("""\
521ls foo
522
523# Multiple newlines and comments should be ignored
524
525ls bar
526""")
527
528 def assertWord(w, id_, val):
529 self.assertEqual(1, len(w.parts))
530 part = w.parts[0]
531 self.assertEqual(id_, part.id)
532 self.assertEqual(val, lexer.LazyStr(part))
533
534 print('--MULTI')
535 w = w_parser.ReadWord(lex_mode_e.ShCommand)
536 assertWord(w, Id.Lit_Chars, 'ls')
537
538 w = w_parser.ReadWord(lex_mode_e.ShCommand)
539 assertWord(w, Id.Lit_Chars, 'foo')
540
541 w = w_parser.ReadWord(lex_mode_e.ShCommand)
542 self.assertEqual(word_e.Operator, w.tag())
543 self.assertEqual(Id.Op_Newline, w.id)
544 self.assertEqual(None, w.tval)
545
546 w = w_parser.ReadWord(lex_mode_e.ShCommand)
547 assertWord(w, Id.Lit_Chars, 'ls')
548
549 w = w_parser.ReadWord(lex_mode_e.ShCommand)
550 assertWord(w, Id.Lit_Chars, 'bar')
551
552 w = w_parser.ReadWord(lex_mode_e.ShCommand)
553 self.assertEqual(word_e.Operator, w.tag())
554 self.assertEqual(Id.Op_Newline, w.id)
555 self.assertEqual(None, w.tval)
556
557 w = w_parser.ReadWord(lex_mode_e.ShCommand)
558 self.assertEqual(word_e.Operator, w.tag())
559 self.assertEqual(Id.Eof_Real, w.id)
560 self.assertEqual('', lexer.LazyStr(w))
561
562 def testUnicode(self):
563 words = 'z \xce\xbb \xe4\xb8\x89 \xf0\x9f\x98\x98'
564
565 def _Part(w, i):
566 return lexer.LazyStr(w.parts[i])
567
568 w_parser = test_lib.InitWordParser(words)
569 w = w_parser.ReadWord(lex_mode_e.ShCommand)
570 self.assertEqual('z', _Part(w, 0))
571
572 w = w_parser.ReadWord(lex_mode_e.ShCommand)
573 self.assertEqual('\xce\xbb', _Part(w, 0))
574
575 w = w_parser.ReadWord(lex_mode_e.ShCommand)
576 self.assertEqual('\xe4\xb8\x89', _Part(w, 0))
577
578 w = w_parser.ReadWord(lex_mode_e.ShCommand)
579 self.assertEqual('\xf0\x9f\x98\x98', _Part(w, 0))
580
581 def testParseErrorLocation(self):
582 w = _assertSpanForWord(self, 'a=(1 2 3)')
583
584 w = _assertSpanForWord(self, 'foo')
585
586 w = _assertSpanForWord(self, '\\$')
587
588 w = _assertSpanForWord(self, "''")
589
590 w = _assertSpanForWord(self, "'sq'")
591
592 w = _assertSpanForWord(self, '""')
593
594 w = _assertSpanForWord(self, '"dq"')
595
596 w = _assertSpanForWord(self, '$(echo command sub)')
597
598 w = _assertSpanForWord(self, '$(( 1 + 2 ))')
599
600 w = _assertSpanForWord(self, '~user')
601
602 w = _assertSpanForWord(self, '${var#}')
603
604
605if __name__ == '__main__':
606 unittest.main()