osh/word_parse

OILS / osh / word_parse_test.py View on Github | oilshell.org

606 lines, 397 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	word_parse_test.py: Tests for word_parse.py
10	"""
11
12	import unittest
13
14	from _devbuild.gen.id_kind_asdl import Id, Id_str
15	from _devbuild.gen.syntax_asdl import arith_expr_e, word_e, rhs_word_e
16	from _devbuild.gen.types_asdl import lex_mode_e
17
18	from asdl import format as fmt
19	from core import error
20	from core import test_lib
21	from core.test_lib import FakeTok
22	from frontend import lexer
23	from frontend import location
24	from osh import word_
25
26
27	def _assertReadWordWithArena(test, w_parser):
28	w = w_parser.ReadWord(lex_mode_e.ShCommand)
29	assert w is not None
30	fmt.PrettyPrint(w)
31	print('')
32
33	# Next word must be Eof_Real
34	w2 = w_parser.ReadWord(lex_mode_e.ShCommand)
35	test.assertTrue(test_lib.TokensEqual(FakeTok(Id.Eof_Real, ''), w2), w2)
36	return w
37
38
39	def _assertReadWord(test, word_str, oil_at=False):
40	print('')
41	print('--- %s' % word_str)
42	print('')
43
44	arena = test_lib.MakeArena('word_parse_test.py')
45	w_parser = test_lib.InitWordParser(word_str, arena=arena, oil_at=oil_at)
46	w = _assertReadWordWithArena(test, w_parser)
47	return w
48
49
50	def _assertReadWordFailure(test, word_str, oil_at=False):
51	print('\n---', word_str)
52	w_parser = test_lib.InitWordParser(word_str, oil_at=oil_at)
53	try:
54	w = w_parser.ReadWord(lex_mode_e.ShCommand)
55	except error.Parse as e:
56	print('Got expected ParseError: %s' % e)
57	else:
58	fmt.PrettyPrint(w)
59	test.fail('Expected a parser error, got %r' % w)
60
61
62	def _assertSpanForWord(test, word_str):
63	arena = test_lib.MakeArena('word_parse_test.py')
64	w_parser = test_lib.InitWordParser(word_str, arena=arena)
65	w = _assertReadWordWithArena(test, w_parser)
66	tok = location.LeftTokenForWord(w)
67
68	print(word_str)
69	print(tok)
70
71
72	def _GetSuffixOp(test, w):
73	"""Get a single transform op."""
74	test.assertEqual(1, len(w.parts))
75	return w.parts[0].suffix_op
76
77
78	def _GetPrefixOp(test, w):
79	"""Get a single transform op."""
80	test.assertEqual(1, len(w.parts))
81	return w.parts[0].prefix_op.id
82
83
84	def _GetVarSub(test, w):
85	test.assertEqual(1, len(w.parts))
86	part = w.parts[0]
87	return lexer.LazyStr(part.token)
88
89
90	class ArenaTest(unittest.TestCase):
91	"""It's more convenient to test the arena here, because we have a proper
92	lexer and so forth."""
93
94	def testSnipCodeString(self):
95	expr = """\
96	hi'
97	single quoted'"double
98	quoted
99	"there
100	"""
101
102	arena = test_lib.MakeArena('hi')
103	w_parser = test_lib.InitWordParser(expr, arena=arena)
104	w = w_parser.ReadWord(lex_mode_e.ShCommand)
105	print(w)
106
107	left = w.parts[1].left # left single quote
108	right = w.parts[2].right # right double quote
109
110	s = arena.SnipCodeString(left, right)
111
112	print('s = %r' % s)
113	self.assertEqual("""\
114	'
115	single quoted'"double
116	quoted
117	"\
118	""", s)
119
120	s = arena.SnipCodeString(w.parts[1].left, w.parts[1].right)
121
122	print('s = %r' % s)
123	self.assertEqual("""\
124	'
125	single quoted'\
126	""", s)
127
128	# Just snip one token
129	s = arena.SnipCodeString(w.parts[0], w.parts[0])
130
131	print('s = %r' % s)
132	self.assertEqual('hi', s)
133
134	def testSaveLinesAndDiscard(self):
135	# Also takes a left, right, token
136	pass
137
138
139	class LexerTest(unittest.TestCase):
140	"""It's more convenient to test the lexer here, because we have a proper
141	lexer and so forth."""
142
143	def testAssignFunctions(self):
144	arena = test_lib.MakeArena('')
145
146	expr = 'ls; foo=42'
147	w_parser = test_lib.InitWordParser(expr, arena=arena)
148
149	# Skip first two words
150	w_parser.ReadWord(lex_mode_e.ShCommand)
151	w_parser.ReadWord(lex_mode_e.ShCommand)
152	w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
153	print(w3)
154
155	self.assertEqual(False, lexer.IsPlusEquals(w3.parts[0]))
156	self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -1))
157
158	expr = 'ls; foo+=X'
159	w_parser = test_lib.InitWordParser(expr, arena=arena)
160
161	# Skip first two words
162	w_parser.ReadWord(lex_mode_e.ShCommand)
163	w_parser.ReadWord(lex_mode_e.ShCommand)
164	w3 = w_parser.ReadWord(lex_mode_e.ShCommand)
165	print(w3)
166
167	self.assertEqual(True, lexer.IsPlusEquals(w3.parts[0]))
168	self.assertEqual('foo', lexer.TokenSliceRight(w3.parts[0], -2))
169
170
171	class WordParserTest(unittest.TestCase):
172
173	def testStaticEvalWord(self):
174	expr = r'\EOF' # Quoted here doc delimiter
175	w_parser = test_lib.InitWordParser(expr)
176	w = w_parser.ReadWord(lex_mode_e.ShCommand)
177	ok, s, quoted = word_.StaticEval(w)
178	self.assertEqual(True, ok)
179	self.assertEqual('EOF', s)
180	self.assertEqual(True, quoted)
181
182	def testDisambiguatePrefix(self):
183	w = _assertReadWord(self, '${#}')
184	self.assertEqual('#', _GetVarSub(self, w))
185	w = _assertReadWord(self, '${!}')
186	self.assertEqual('!', _GetVarSub(self, w))
187	w = _assertReadWord(self, '${?}')
188	self.assertEqual('?', _GetVarSub(self, w))
189
190	w = _assertReadWord(self, '${var}')
191
192	w = _assertReadWord(self, '${15}')
193
194	w = _assertReadWord(self, '${#var}')
195	self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
196	w = _assertReadWord(self, '${!ref}')
197	self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
198
199	# Length of length
200	w = _assertReadWord(self, '${##}')
201	self.assertEqual('#', _GetVarSub(self, w))
202	self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
203
204	w = _assertReadWord(self, '${array[0]}')
205	self.assertEqual(1, len(w.parts))
206	w = _assertReadWord(self, '${array[@]}')
207	self.assertEqual(1, len(w.parts))
208
209	# Length of element
210	w = _assertReadWord(self, '${#array[0]}')
211	self.assertEqual(1, len(w.parts))
212	self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
213	# Ref for element
214	w = _assertReadWord(self, '${!array[0]}')
215	self.assertEqual(1, len(w.parts))
216	self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
217
218	w = _assertReadWord(self, '${var#prefix}')
219	self.assertEqual(1, len(w.parts))
220	self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
221
222	w = _assertReadWord(self, '${!var#prefix}')
223	self.assertEqual(1, len(w.parts))
224	self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
225	self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
226
227	_assertReadWordFailure(self, '${#var#prefix}')
228
229	# Allowed by bash, but we don't parse it. Use len=$#; echo ${len#2}
230	# instead.
231	_assertReadWordFailure(self, '${##2}')
232
233	def testIncompleteWords(self):
234	# Bugs found in completion
235	w = _assertReadWordFailure(self, '${undef:-')
236	w = _assertReadWordFailure(self, '${undef:-$')
237	w = _assertReadWordFailure(self, '${undef:-$F')
238
239	w = _assertReadWordFailure(self, '${x@')
240	w = _assertReadWordFailure(self, '${x@Q')
241
242	w = _assertReadWordFailure(self, '${x%')
243
244	w = _assertReadWordFailure(self, '${x/')
245	w = _assertReadWordFailure(self, '${x/a/')
246	w = _assertReadWordFailure(self, '${x/a/b')
247	w = _assertReadWordFailure(self, '${x:')
248
249	def testVarOf(self):
250	w = _assertReadWord(self, '${name}')
251	w = _assertReadWord(self, '${name[0]}')
252
253	w = _assertReadWord(self, '${array[@]}')
254
255	# Should be DISALLOWED!
256	#w = _assertReadWord(self, '${11[@]}')
257
258	def assertUnquoted(self, expected, w):
259	ok, s, quoted = word_.StaticEval(w)
260	self.assertTrue(ok)
261	self.assertEqual(expected, s)
262	self.assertFalse(quoted)
263
264	def testPatSub(self):
265	w = _assertReadWord(self, '${var/pat/replace}')
266	op = _GetSuffixOp(self, w)
267	self.assertUnquoted('pat', op.pat)
268	self.assertUnquoted('replace', op.replace)
269	self.assertEqual(Id.Undefined_Tok, op.replace_mode)
270
271	w = _assertReadWord(self, '${var//pat/replace}') # sub all
272	op = _GetSuffixOp(self, w)
273	self.assertUnquoted('pat', op.pat)
274	self.assertUnquoted('replace', op.replace)
275	self.assertEqual(Id.Lit_Slash, op.replace_mode,
276	Id_str(op.replace_mode))
277
278	w = _assertReadWord(self, '${var/%pat/replace}') # prefix
279	op = _GetSuffixOp(self, w)
280	self.assertUnquoted('pat', op.pat)
281	self.assertUnquoted('replace', op.replace)
282	self.assertEqual(Id.Lit_Percent, op.replace_mode)
283
284	w = _assertReadWord(self, '${var/#pat/replace}') # suffix
285	op = _GetSuffixOp(self, w)
286	self.assertUnquoted('pat', op.pat)
287	self.assertUnquoted('replace', op.replace)
288	self.assertEqual(Id.Lit_Pound, op.replace_mode)
289
290	w = _assertReadWord(self, '${var/pat}') # no replacement
291	w = _assertReadWord(self, '${var//pat}') # no replacement
292	op = _GetSuffixOp(self, w)
293	self.assertUnquoted('pat', op.pat)
294	self.assertEqual(rhs_word_e.Empty, op.replace.tag())
295	self.assertEqual(Id.Lit_Slash, op.replace_mode)
296
297	# replace with slash
298	w = _assertReadWord(self, '${var/pat//}')
299	op = _GetSuffixOp(self, w)
300	self.assertUnquoted('pat', op.pat)
301	self.assertUnquoted('/', op.replace)
302
303	# replace with two slashes unquoted
304	w = _assertReadWord(self, '${var/pat///}')
305	op = _GetSuffixOp(self, w)
306	self.assertUnquoted('pat', op.pat)
307	self.assertUnquoted('//', op.replace)
308
309	# replace with two slashes quoted
310	w = _assertReadWord(self, '${var/pat/"//"}')
311	op = _GetSuffixOp(self, w)
312	self.assertUnquoted('pat', op.pat)
313
314	ok, s, quoted = word_.StaticEval(op.replace)
315	self.assertTrue(ok)
316	self.assertEqual('//', s)
317	self.assertTrue(quoted)
318
319	# Real example found in the wild!
320	# http://www.oilshell.org/blog/2016/11/07.html
321
322	# 2023-05: copied into spec/var-op-patsub.test.sh
323	w = _assertReadWord(self, r'${var////\\/}')
324	op = _GetSuffixOp(self, w)
325	self.assertEqual(Id.Lit_Slash, op.replace_mode)
326
327	self.assertUnquoted('/', op.pat)
328
329	ok, s, quoted = word_.StaticEval(op.replace)
330	self.assertTrue(ok)
331	self.assertEqual(r'\/', s)
332
333	def testSlice(self):
334	w = _assertReadWord(self, '${foo:0}')
335	# No length
336	self.assertEqual(None, _GetSuffixOp(self, w).length)
337
338	w = _assertReadWord(self, '${foo:0:1}')
339	w = _assertReadWord(self, '${foo:1+2:2+3}')
340
341	# This is allowed
342	w = _assertReadWord(self, '${foo::1}')
343	# No beginning
344	self.assertEqual(None, _GetSuffixOp(self, w).begin)
345
346	def testLength(self):
347	# Synonym for $#, had a bug here
348	w = _assertReadWord(self, '${#@}')
349	self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
350
351	# Length of arg 11
352	w = _assertReadWord(self, '${#11}')
353	self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
354
355	w = _assertReadWord(self, '${#str}')
356	self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
357
358	w = _assertReadWord(self, '${#array[0]}')
359	# BUG!
360	#self.assertTrue(VS_POUND, _GetSuffixOp(self, w).id)
361
362	w = _assertReadWord(self, '${#array["key"]}')
363	# BUG!
364	#self.assertTrue(Id.VSub_POUND, _GetSuffixOp(self, w).id)
365
366	def testUnary(self):
367	w = _assertReadWord(self, '${var#}')
368	self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
369	w = _assertReadWord(self, '${var#prefix}')
370	self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op.id)
371
372	w = _assertReadWord(self, '${var##}')
373	self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
374	w = _assertReadWord(self, '${var##prefix}')
375	self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op.id)
376
377	w = _assertReadWord(self, '${var%suffix}')
378	w = _assertReadWord(self, '${var%%suffix}')
379
380	def testArrayOp(self):
381	w = _assertReadWord(self, '${array[0]}')
382	w = _assertReadWord(self, '${array[5+5]}')
383
384	w = _assertReadWord(self, '${array[@]}')
385	w = _assertReadWord(self, '${array[*]}')
386
387	def testTestOp(self):
388	w = _assertReadWord(self, '${var:-default]}')
389
390	def testTildeLike(self):
391	w = _assertReadWord(self, '~/git/oilshell/oil')
392	w = _assertReadWord(self, '~andy/git/oilshell/oil')
393	w = _assertReadWord(self, '~andy_c/git/oilshell/oil')
394	w = _assertReadWord(self, '~andy.c/git/oilshell/oil')
395	w = _assertReadWord(self, '~andy-c/git/oilshell/oil')
396	w = _assertReadWord(self, '~andy-c:git/oilshell/oil')
397
398	def testRead(self):
399	CASES = [
400	'ls "foo"',
401	'$(( 1 + 2 ))',
402	'$(echo $(( 1 )) )', # OLD BUG: arith sub within command sub
403	'echo ${#array[@]} b', # Had a bug here
404	'echo $(( ${#array[@]} ))', # Bug here
405
406	# Had a bug: unary minus
407	#'${mounted_disk_regex:0:-1}',
408	'echo ${@%suffix}', # had a bug here
409	'${@}',
410	'echo ${var,,}',
411	'echo ${var,,?}',
412
413	# Line continuation tests
414	'${\\\nfoo}', # VSub_1
415	'${foo\\\n}', # VSub_2
416	'${foo#\\\nyo}', # VS_ARG_UNQ
417	'"${foo#\\\nyo}"', # VS_ARG_DQ
418	]
419	for expr in CASES:
420	print('---')
421	print(expr)
422	print()
423
424	w_parser = test_lib.InitWordParser(expr)
425
426	while True:
427	w = w_parser.ReadWord(lex_mode_e.ShCommand)
428	assert w is not None
429
430	fmt.PrettyPrint(w)
431
432	if word_.CommandId(w) == Id.Eof_Real:
433	break
434
435	def testOilSplice(self):
436	w = _assertReadWord(self, '@words', oil_at=True)
437
438	# These are normal words
439	w = _assertReadWord(self, '.@words', oil_at=True)
440	w = _assertReadWord(self, '.@words.', oil_at=True)
441
442	# Errors
443	_assertReadWordFailure(self, '@words[', oil_at=True)
444	_assertReadWordFailure(self, '@words.', oil_at=True)
445
446	def testReadComment(self):
447	# Test that we get Id.Op_Newline
448	code = 'foo # comment\nbar #comment\n'
449	w_parser = test_lib.InitWordParser(code)
450	w = w_parser.ReadWord(lex_mode_e.ShCommand)
451	assert w
452	self.assertEqual('foo', lexer.LazyStr(w.parts[0]))
453
454	w = w_parser.ReadWord(lex_mode_e.ShCommand)
455	assert w
456	self.assertEqual(Id.Op_Newline, w.id)
457
458	w = w_parser.ReadWord(lex_mode_e.ShCommand)
459	assert w
460	self.assertEqual('bar', lexer.LazyStr(w.parts[0]))
461
462	w = w_parser.ReadWord(lex_mode_e.ShCommand)
463	assert w
464	self.assertEqual(Id.Op_Newline, w.id)
465
466	w = w_parser.ReadWord(lex_mode_e.ShCommand)
467	assert w
468	self.assertEqual(Id.Eof_Real, w.id)
469
470	def testReadArithWord(self):
471	w = _assertReadWord(self, '$(( (1+2) ))')
472	child = w.parts[0].anode
473	self.assertEqual(arith_expr_e.Binary, child.tag())
474
475	w = _assertReadWord(self, '$(( (1+2) ))')
476	child = w.parts[0].anode
477	self.assertEqual(arith_expr_e.Binary, child.tag())
478
479	def testReadArith(self):
480	CASES = [
481	'1 + 2',
482	'a + b',
483	'$a * $b',
484	'${a} * ${b}',
485	'$(echo 1) * $(echo 2)',
486	'`echo 1` + 2',
487	'$((1 + 2)) * $((3 + 4))',
488	"'single quoted'", # Allowed by oil but not bash
489	'"${a}" + "${b}"', # Ditto
490	'$# + $$',
491	# This doesn't work but does in bash -- should be 15
492	#'$(( $(echo 1)$(echo 2) + 3 ))',
493	'$(( x[0] < 5 ))',
494	'$(( ++i ))',
495	'$(( i++ ))',
496	'$(( x -= 1))',
497	'$(( x \|= 1))',
498	'$(( x[0] = 1 ))',
499	'$(( 1 \| 0 ))',
500	'$((0x$size))',
501	]
502
503	for expr in CASES:
504	print('---')
505	print(expr)
506	print()
507
508	w_parser = test_lib.InitWordParser(expr)
509	# Can we remove this initialization?
510	w_parser._SetNext(lex_mode_e.Arith)
511
512	while True:
513	w = w_parser.ReadArithWord()
514	assert w is not None
515	fmt.PrettyPrint(w)
516	if word_.CommandId(w) in (Id.Eof_Real, Id.Unknown_Tok):
517	break
518
519	def testHereDoc(self):
520	w_parser = test_lib.InitWordParser("""\
521	ls foo
522
523	# Multiple newlines and comments should be ignored
524
525	ls bar
526	""")
527
528	def assertWord(w, id_, val):
529	self.assertEqual(1, len(w.parts))
530	part = w.parts[0]
531	self.assertEqual(id_, part.id)
532	self.assertEqual(val, lexer.LazyStr(part))
533
534	print('--MULTI')
535	w = w_parser.ReadWord(lex_mode_e.ShCommand)
536	assertWord(w, Id.Lit_Chars, 'ls')
537
538	w = w_parser.ReadWord(lex_mode_e.ShCommand)
539	assertWord(w, Id.Lit_Chars, 'foo')
540
541	w = w_parser.ReadWord(lex_mode_e.ShCommand)
542	self.assertEqual(word_e.Operator, w.tag())
543	self.assertEqual(Id.Op_Newline, w.id)
544	self.assertEqual(None, w.tval)
545
546	w = w_parser.ReadWord(lex_mode_e.ShCommand)
547	assertWord(w, Id.Lit_Chars, 'ls')
548
549	w = w_parser.ReadWord(lex_mode_e.ShCommand)
550	assertWord(w, Id.Lit_Chars, 'bar')
551
552	w = w_parser.ReadWord(lex_mode_e.ShCommand)
553	self.assertEqual(word_e.Operator, w.tag())
554	self.assertEqual(Id.Op_Newline, w.id)
555	self.assertEqual(None, w.tval)
556
557	w = w_parser.ReadWord(lex_mode_e.ShCommand)
558	self.assertEqual(word_e.Operator, w.tag())
559	self.assertEqual(Id.Eof_Real, w.id)
560	self.assertEqual('', lexer.LazyStr(w))
561
562	def testUnicode(self):
563	words = 'z \xce\xbb \xe4\xb8\x89 \xf0\x9f\x98\x98'
564
565	def _Part(w, i):
566	return lexer.LazyStr(w.parts[i])
567
568	w_parser = test_lib.InitWordParser(words)
569	w = w_parser.ReadWord(lex_mode_e.ShCommand)
570	self.assertEqual('z', _Part(w, 0))
571
572	w = w_parser.ReadWord(lex_mode_e.ShCommand)
573	self.assertEqual('\xce\xbb', _Part(w, 0))
574
575	w = w_parser.ReadWord(lex_mode_e.ShCommand)
576	self.assertEqual('\xe4\xb8\x89', _Part(w, 0))
577
578	w = w_parser.ReadWord(lex_mode_e.ShCommand)
579	self.assertEqual('\xf0\x9f\x98\x98', _Part(w, 0))
580
581	def testParseErrorLocation(self):
582	w = _assertSpanForWord(self, 'a=(1 2 3)')
583
584	w = _assertSpanForWord(self, 'foo')
585
586	w = _assertSpanForWord(self, '\\$')
587
588	w = _assertSpanForWord(self, "''")
589
590	w = _assertSpanForWord(self, "'sq'")
591
592	w = _assertSpanForWord(self, '""')
593
594	w = _assertSpanForWord(self, '"dq"')
595
596	w = _assertSpanForWord(self, '$(echo command sub)')
597
598	w = _assertSpanForWord(self, '$(( 1 + 2 ))')
599
600	w = _assertSpanForWord(self, '~user')
601
602	w = _assertSpanForWord(self, '${var#}')
603
604
605	if __name__ == '__main__':
606	unittest.main()