osh/word_parse

OILS / osh / word_parse_test.py View on Github | oilshell.org

607 lines, 398 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	word_parse_test.py: Tests for word_parse.py
10	"""
11
12	import unittest
13
14	from _devbuild.gen.id_kind_asdl import Id, Id_str
15	from _devbuild.gen.syntax_asdl import arith_expr_e, word_e, rhs_word_e
16	from _devbuild.gen.types_asdl import lex_mode_e
17
18	from asdl import format as fmt
19	from core import error
20	from core import test_lib
21	from core.test_lib import FakeTok
22	from frontend import lexer
23	from frontend import location
24	from osh import word_
25
26
27	def _assertReadWordWithArena(test, w_parser):
28	w = w_parser.ReadWord(lex_mode_e.ShCommand)
29	assert w is not None
30	fmt.PrettyPrint(w)
31	print('')
32
33	# Next word must be Eof_Real
34	w2 = w_parser.ReadWord(lex_mode_e.ShCommand)
35	test.assertTrue(test_lib.TokensEqual(FakeTok(Id.Eof_Real, ''), w2), w2)
36	return w
37
38
39	def _assertReadWord(test, word_str, oil_at=False):
40	print('')
41	print('--- %s' % word_str)
42	print('')
43
44	arena = test_lib.MakeArena('word_parse_test.py')
45	w_parser = test_lib.InitWordParser(word_str, arena=arena, oil_at=oil_at)
46	w = _assertReadWordWithArena(test, w_parser)
47	return w
48
49
50	def _assertReadWordFailure(test, word_str, oil_at=False):
51	print('\n---', word_str)
52	w_parser = test_lib.InitWordParser(word_str, oil_at=oil_at)
53	try:
54	w = w_parser.ReadWord(lex_mode_e.ShCommand)
55	except error.Parse as e:
56	print('Got expected ParseError: %s' % e)
57	else:
58	fmt.PrettyPrint(w)
59	test.fail('Expected a parser error, got %r' % w)
60
61
62	def _assertSpanForWord(test, word_str):
63	arena = test_lib.MakeArena('word_parse_test.py')
64	w_parser = test_lib.InitWordParser(word_str, arena=arena)
65	w = _assertReadWordWithArena(test, w_parser)
66	tok = location.LeftTokenForWord(w)
67
68	print(word_str)
69	print(tok)
70
71
72	def _GetSuffixOp(test, w):
73	"""Get a single transform op."""
74	test.assertEqual(1, len(w.parts))
75	return w.parts[0].suffix_op
76
77
78	def _GetPrefixOp(test, w):
79	"""Get a single transform op."""
80	test.assertEqual(1, len(w.parts))
81	return w.parts[0].prefix_op.id
82
83
84	def _GetVarSub(test, w):
85	test.assertEqual(1, len(w.parts))
86	part = w.parts[0]
87	return lexer.LazyStr(part.token)
88
89
90	class ArenaTest(unittest.TestCase):
91	"""It's more convenient to test the arena here, because we have a proper
92	lexer and so forth."""
93
94	def testSnipCodeString(self):
95	expr = """\
96	hi'
97	single quoted'"double
98	quoted
99	"there
100	"""
101
102	arena = test_lib.MakeArena('hi')
103	w_parser = test_lib.InitWordParser(expr, arena=arena)
104	w = w_parser.ReadWord(lex_mode_e.ShCommand)
105	print(w)
106
107	left = w.parts[1].left # left single quote
108	right = w.parts[2].right # right double quote
109
110	s = arena.SnipCodeString(left, right)
111
112	print('s = %r' % s)
113	self.assertEqual("""\
114	'
115	single quoted'"double
116	quoted
117	"\
118	""", s)
119
120	s = arena.SnipCodeString(w.parts[1].left, w.parts[1].right)
121
122	print('s = %r' % s)
123	self.assertEqual("""\
124	'
125	single quoted'\
126	""", s)
127
128	# Just snip one token
129	s = arena.SnipCodeString(w.parts[0], w.parts[0])
130
131	print('s = %r' % s)
132	self.assertEqual('hi', s)
133
134	def testSaveLinesAndDiscard(self):
135	# Also takes a left, right, token
136	pass
137
138
139	class LexerTest(unittest.TestCase):
140	"""It's more convenient to test the lexer here, because we have a proper
141	lexer and so forth."""
142
143	def testAssignFunctions(self):
144	arena = test_lib.MakeArena('')
145
146	expr = 'ls; foo=42'
147	w_parser = test_lib.InitWordParser(expr, arena=arena)
148
149	# Skip first two words
150	w_parser.ReadWord(lex_mode_e.ShCommand)
151	w_parser.ReadWord(lex_mode_e.ShCommand)
152	w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
153	print(w3)
154
155	self.assertEqual(False, lexer.IsPlusEquals(w3.parts[0]))
156	self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -1))
157
158	expr = 'ls; foo+=X'
159	w_parser = test_lib.InitWordParser(expr, arena=arena)
160
161	# Skip first two words
162	w_parser.ReadWord(lex_mode_e.ShCommand)
163	w_parser.ReadWord(lex_mode_e.ShCommand)
164	w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
165	print(w3)
166
167	self.assertEqual(True, lexer.IsPlusEquals(w3.parts[0]))
168	self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -2))
169
170
171	class WordParserTest(unittest.TestCase):
172
173	def testStaticEvalWord(self):
174	expr = r'\EOF' # Quoted here doc delimiter
175	w_parser = test_lib.InitWordParser(expr)
176	w = w_parser.ReadWord(lex_mode_e.ShCommand)
177	ok, s, quoted = word_.StaticEval(w)
178	self.assertEqual(True, ok)
179	self.assertEqual('EOF', s)
180	self.assertEqual(True, quoted)
181
182	def testDisambiguatePrefix(self):
183	w = _assertReadWord(self, '${#}')
184	self.assertEqual('#', _GetVarSub(self, w))
185	w = _assertReadWord(self, '${!}')
186	self.assertEqual('!', _GetVarSub(self, w))
187	w = _assertReadWord(self, '${?}')
188	self.assertEqual('?', _GetVarSub(self, w))
189
190	w = _assertReadWord(self, '${var}')
191
192	w = _assertReadWord(self, '${15}')
193
194	w = _assertReadWord(self, '${#var}')
195	self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
196	w = _assertReadWord(self, '${!ref}')
197	self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
198
199	# Length of length
200	w = _assertReadWord(self, '${##}')
201	self.assertEqual('#', _GetVarSub(self, w))
202	self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
203
204	w = _assertReadWord(self, '${array[0]}')
205	self.assertEqual(1, len(w.parts))
206	w = _assertReadWord(self, '${array[@]}')
207	self.assertEqual(1, len(w.parts))
208
209	# Length of element
210	w = _assertReadWord(self, '${#array[0]}')
211	self.assertEqual(1, len(w.parts))
212	self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
213	# Ref for element
214	w = _assertReadWord(self, '${!array[0]}')
215	self.assertEqual(1, len(w.parts))
216	self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
217
218	w = _assertReadWord(self, '${var#prefix}')
219	self.assertEqual(1, len(w.parts))
220	self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
221
222	w = _assertReadWord(self, '${!var#prefix}')
223	self.assertEqual(1, len(w.parts))
224	self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
225	self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
226
227	_assertReadWordFailure(self, '${#var#prefix}')
228
229	# Allowed by bash, but we don't parse it. Use len=$#; echo ${len#2}
230	# instead.
231	_assertReadWordFailure(self, '${##2}')
232
233	def testIncompleteWords(self):
234	# Bugs found in completion
235	w = _assertReadWordFailure(self, '${undef:-')
236	w = _assertReadWordFailure(self, '${undef:-$')
237	w = _assertReadWordFailure(self, '${undef:-$F')
238
239	w = _assertReadWordFailure(self, '${x@')
240	w = _assertReadWordFailure(self, '${x@Q')
241
242	w = _assertReadWordFailure(self, '${x%')
243
244	w = _assertReadWordFailure(self, '${x/')
245	w = _assertReadWordFailure(self, '${x/a/')
246	w = _assertReadWordFailure(self, '${x/a/b')
247	w = _assertReadWordFailure(self, '${x:')
248
249	def testVarOf(self):
250	w = _assertReadWord(self, '${name}')
251	w = _assertReadWord(self, '${name[0]}')
252
253	w = _assertReadWord(self, '${array[@]}')
254
255	# Should be DISALLOWED!
256	#w = _assertReadWord(self, '${11[@]}')
257
258	def assertUnquoted(self, expected, w):
259	ok, s, quoted = word_.StaticEval(w)
260	self.assertTrue(ok)
261	self.assertEqual(expected, s)
262	self.assertFalse(quoted)
263
264	def testPatSub(self):
265	w = _assertReadWord(self, '${var/pat/replace}')
266	op = _GetSuffixOp(self, w)
267	self.assertUnquoted('pat', op.pat)
268	self.assertUnquoted('replace', op.replace)
269	self.assertEqual(Id.Undefined_Tok, op.replace_mode)
270
271	w = _assertReadWord(self, '${var//pat/replace}') # sub all
272	op = _GetSuffixOp(self, w)
273	self.assertUnquoted('pat', op.pat)
274	self.assertUnquoted('replace', op.replace)
275	self.assertEqual(Id.Lit_Slash, op.replace_mode,
276	Id_str(op.replace_mode))
277
278	w = _assertReadWord(self, '${var/%pat/replace}') # prefix
279	op = _GetSuffixOp(self, w)
280	self.assertUnquoted('pat', op.pat)
281	self.assertUnquoted('replace', op.replace)
282	self.assertEqual(Id.Lit_Percent, op.replace_mode)
283
284	w = _assertReadWord(self, '${var/#pat/replace}') # suffix
285	op = _GetSuffixOp(self, w)
286	self.assertUnquoted('pat', op.pat)
287	self.assertUnquoted('replace', op.replace)
288	self.assertEqual(Id.Lit_Pound, op.replace_mode)
289
290	w = _assertReadWord(self, '${var/pat}') # no replacement
291	w = _assertReadWord(self, '${var//pat}') # no replacement
292	op = _GetSuffixOp(self, w)
293	self.assertUnquoted('pat', op.pat)
294	self.assertEqual(rhs_word_e.Empty, op.replace.tag())
295	self.assertEqual(Id.Lit_Slash, op.replace_mode)
296
297	# replace with slash
298	w = _assertReadWord(self, '${var/pat//}')
299	op = _GetSuffixOp(self, w)
300	self.assertUnquoted('pat', op.pat)
301	self.assertUnquoted('/', op.replace)
302
303	# replace with two slashes unquoted
304	w = _assertReadWord(self, '${var/pat///}')
305	op = _GetSuffixOp(self, w)
306	self.assertUnquoted('pat', op.pat)
307	self.assertUnquoted('//', op.replace)
308
309	# replace with two slashes quoted
310	w = _assertReadWord(self, '${var/pat/"//"}')
311	op = _GetSuffixOp(self, w)
312	self.assertUnquoted('pat', op.pat)
313
314	ok, s, quoted = word_.StaticEval(op.replace)
315	self.assertTrue(ok)
316	self.assertEqual('//', s)
317	self.assertTrue(quoted)
318
319	# Real example found in the wild!
320	# http://www.oilshell.org/blog/2016/11/07.html
321
322	# 2023-05: copied into spec/var-op-patsub.test.sh
323	w = _assertReadWord(self, r'${var////\\/}')
324	op = _GetSuffixOp(self, w)
325	self.assertEqual(Id.Lit_Slash, op.replace_mode)
326
327	self.assertUnquoted('/', op.pat)
328
329	ok, s, quoted = word_.StaticEval(op.replace)
330	self.assertTrue(ok)
331	self.assertEqual(r'\/', s)
332
333	def testSlice(self):
334	w = _assertReadWord(self, '${foo:0}')
335	# No length
336	self.assertEqual(None, _GetSuffixOp(self, w).length)
337
338	w = _assertReadWord(self, '${foo:0:1}')
339	w = _assertReadWord(self, '${foo:1+2:2+3}')
340
341	# This is allowed
342	w = _assertReadWord(self, '${foo::1}')
343	# No beginning
344	self.assertEqual(arith_expr_e.EmptyZero,
345	_GetSuffixOp(self, w).begin.tag())
346
347	def testLength(self):
348	# Synonym for $#, had a bug here
349	w = _assertReadWord(self, '${#@}')
350	self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
351
352	# Length of arg 11
353	w = _assertReadWord(self, '${#11}')
354	self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
355
356	w = _assertReadWord(self, '${#str}')
357	self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
358
359	w = _assertReadWord(self, '${#array[0]}')
360	# BUG!
361	#self.assertTrue(VS_POUND, _GetSuffixOp(self, w).id)
362
363	w = _assertReadWord(self, '${#array["key"]}')
364	# BUG!
365	#self.assertTrue(Id.VSub_POUND, _GetSuffixOp(self, w).id)
366
367	def testUnary(self):
368	w = _assertReadWord(self, '${var#}')
369	self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
370	w = _assertReadWord(self, '${var#prefix}')
371	self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
372
373	w = _assertReadWord(self, '${var##}')
374	self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
375	w = _assertReadWord(self, '${var##prefix}')
376	self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
377
378	w = _assertReadWord(self, '${var%suffix}')
379	w = _assertReadWord(self, '${var%%suffix}')
380
381	def testArrayOp(self):
382	w = _assertReadWord(self, '${array[0]}')
383	w = _assertReadWord(self, '${array[5+5]}')
384
385	w = _assertReadWord(self, '${array[@]}')
386	w = _assertReadWord(self, '${array[*]}')
387
388	def testTestOp(self):
389	w = _assertReadWord(self, '${var:-default]}')
390
391	def testTildeLike(self):
392	w = _assertReadWord(self, '~/git/oilshell/oil')
393	w = _assertReadWord(self, '~andy/git/oilshell/oil')
394	w = _assertReadWord(self, '~andy_c/git/oilshell/oil')
395	w = _assertReadWord(self, '~andy.c/git/oilshell/oil')
396	w = _assertReadWord(self, '~andy-c/git/oilshell/oil')
397	w = _assertReadWord(self, '~andy-c:git/oilshell/oil')
398
399	def testRead(self):
400	CASES = [
401	'ls "foo"',
402	'$(( 1 + 2 ))',
403	'$(echo $(( 1 )) )', # OLD BUG: arith sub within command sub
404	'echo ${#array[@]} b', # Had a bug here
405	'echo $(( ${#array[@]} ))', # Bug here
406
407	# Had a bug: unary minus
408	#'${mounted_disk_regex:0:-1}',
409	'echo ${@%suffix}', # had a bug here
410	'${@}',
411	'echo ${var,,}',
412	'echo ${var,,?}',
413
414	# Line continuation tests
415	'${\\\nfoo}', # VSub_1
416	'${foo\\\n}', # VSub_2
417	'${foo#\\\nyo}', # VS_ARG_UNQ
418	'"${foo#\\\nyo}"', # VS_ARG_DQ
419	]
420	for expr in CASES:
421	print('---')
422	print(expr)
423	print()
424
425	w_parser = test_lib.InitWordParser(expr)
426
427	while True:
428	w = w_parser.ReadWord(lex_mode_e.ShCommand)
429	assert w is not None
430
431	fmt.PrettyPrint(w)
432
433	if word_.CommandId(w) == Id.Eof_Real:
434	break
435
436	def testOilSplice(self):
437	w = _assertReadWord(self, '@words', oil_at=True)
438
439	# These are normal words
440	w = _assertReadWord(self, '.@words', oil_at=True)
441	w = _assertReadWord(self, '.@words.', oil_at=True)
442
443	# Errors
444	_assertReadWordFailure(self, '@words[', oil_at=True)
445	_assertReadWordFailure(self, '@words.', oil_at=True)
446
447	def testReadComment(self):
448	# Test that we get Id.Op_Newline
449	code = 'foo # comment\nbar #comment\n'
450	w_parser = test_lib.InitWordParser(code)
451	w = w_parser.ReadWord(lex_mode_e.ShCommand)
452	assert w
453	self.assertEqual('foo', lexer.LazyStr(w.parts[0]))
454
455	w = w_parser.ReadWord(lex_mode_e.ShCommand)
456	assert w
457	self.assertEqual(Id.Op_Newline, w.id)
458
459	w = w_parser.ReadWord(lex_mode_e.ShCommand)
460	assert w
461	self.assertEqual('bar', lexer.LazyStr(w.parts[0]))
462
463	w = w_parser.ReadWord(lex_mode_e.ShCommand)
464	assert w
465	self.assertEqual(Id.Op_Newline, w.id)
466
467	w = w_parser.ReadWord(lex_mode_e.ShCommand)
468	assert w
469	self.assertEqual(Id.Eof_Real, w.id)
470
471	def testReadArithWord(self):
472	w = _assertReadWord(self, '$(( (1+2) ))')
473	child = w.parts[0].anode
474	self.assertEqual(arith_expr_e.Binary, child.tag())
475
476	w = _assertReadWord(self, '$(( (1+2) ))')
477	child = w.parts[0].anode
478	self.assertEqual(arith_expr_e.Binary, child.tag())
479
480	def testReadArith(self):
481	CASES = [
482	'1 + 2',
483	'a + b',
484	'$a * $b',
485	'${a} * ${b}',
486	'$(echo 1) * $(echo 2)',
487	'`echo 1` + 2',
488	'$((1 + 2)) * $((3 + 4))',
489	"'single quoted'", # Allowed by oil but not bash
490	'"${a}" + "${b}"', # Ditto
491	'$# + $$',
492	# This doesn't work but does in bash -- should be 15
493	#'$(( $(echo 1)$(echo 2) + 3 ))',
494	'$(( x[0] < 5 ))',
495	'$(( ++i ))',
496	'$(( i++ ))',
497	'$(( x -= 1))',
498	'$(( x \|= 1))',
499	'$(( x[0] = 1 ))',
500	'$(( 1 \| 0 ))',
501	'$((0x$size))',
502	]
503
504	for expr in CASES:
505	print('---')
506	print(expr)
507	print()
508
509	w_parser = test_lib.InitWordParser(expr)
510	# Can we remove this initialization?
511	w_parser._SetNext(lex_mode_e.Arith)
512
513	while True:
514	w = w_parser.ReadArithWord()
515	assert w is not None
516	fmt.PrettyPrint(w)
517	if word_.CommandId(w) in (Id.Eof_Real, Id.Unknown_Tok):
518	break
519
520	def testHereDoc(self):
521	w_parser = test_lib.InitWordParser("""\
522	ls foo
523
524	# Multiple newlines and comments should be ignored
525
526	ls bar
527	""")
528
529	def assertWord(w, id_, val):
530	self.assertEqual(1, len(w.parts))
531	part = w.parts[0]
532	self.assertEqual(id_, part.id)
533	self.assertEqual(val, lexer.LazyStr(part))
534
535	print('--MULTI')
536	w = w_parser.ReadWord(lex_mode_e.ShCommand)
537	assertWord(w, Id.Lit_Chars, 'ls')
538
539	w = w_parser.ReadWord(lex_mode_e.ShCommand)
540	assertWord(w, Id.Lit_Chars, 'foo')
541
542	w = w_parser.ReadWord(lex_mode_e.ShCommand)
543	self.assertEqual(word_e.Operator, w.tag())
544	self.assertEqual(Id.Op_Newline, w.id)
545	self.assertEqual(None, w.tval)
546
547	w = w_parser.ReadWord(lex_mode_e.ShCommand)
548	assertWord(w, Id.Lit_Chars, 'ls')
549
550	w = w_parser.ReadWord(lex_mode_e.ShCommand)
551	assertWord(w, Id.Lit_Chars, 'bar')
552
553	w = w_parser.ReadWord(lex_mode_e.ShCommand)
554	self.assertEqual(word_e.Operator, w.tag())
555	self.assertEqual(Id.Op_Newline, w.id)
556	self.assertEqual(None, w.tval)
557
558	w = w_parser.ReadWord(lex_mode_e.ShCommand)
559	self.assertEqual(word_e.Operator, w.tag())
560	self.assertEqual(Id.Eof_Real, w.id)
561	self.assertEqual('', lexer.LazyStr(w))
562
563	def testUnicode(self):
564	words = 'z \xce\xbb \xe4\xb8\x89 \xf0\x9f\x98\x98'
565
566	def _Part(w, i):
567	return lexer.LazyStr(w.parts[i])
568
569	w_parser = test_lib.InitWordParser(words)
570	w = w_parser.ReadWord(lex_mode_e.ShCommand)
571	self.assertEqual('z', _Part(w, 0))
572
573	w = w_parser.ReadWord(lex_mode_e.ShCommand)
574	self.assertEqual('\xce\xbb', _Part(w, 0))
575
576	w = w_parser.ReadWord(lex_mode_e.ShCommand)
577	self.assertEqual('\xe4\xb8\x89', _Part(w, 0))
578
579	w = w_parser.ReadWord(lex_mode_e.ShCommand)
580	self.assertEqual('\xf0\x9f\x98\x98', _Part(w, 0))
581
582	def testParseErrorLocation(self):
583	w = _assertSpanForWord(self, 'a=(1 2 3)')
584
585	w = _assertSpanForWord(self, 'foo')
586
587	w = _assertSpanForWord(self, '\\$')
588
589	w = _assertSpanForWord(self, "''")
590
591	w = _assertSpanForWord(self, "'sq'")
592
593	w = _assertSpanForWord(self, '""')
594
595	w = _assertSpanForWord(self, '"dq"')
596
597	w = _assertSpanForWord(self, '$(echo command sub)')
598
599	w = _assertSpanForWord(self, '$(( 1 + 2 ))')
600
601	w = _assertSpanForWord(self, '~user')
602
603	w = _assertSpanForWord(self, '${var#}')
604
605
606	if __name__ == '__main__':
607	unittest.main()