test/sh_spec.py

OILS / test / sh_spec.py View on Github | oilshell.org

1433 lines, 881 significant

1	#!/usr/bin/env python2
2	from __future__ import print_function
3	"""
4	sh_spec.py -- Test framework to compare shells.
5
6	Assertion help:
7	stdout: A single line of expected stdout. Newline is implicit.
8	stdout-json: JSON-encoded string. Use for the empty string (no newline),
9	for unicode chars, etc.
10
11	stderr: Ditto for stderr stream.
12	status: Expected shell return code. If not specified, the case must exit 0.
13
14	Results:
15	PASS - we got the ideal, expected value
16	OK - we got a value that was not ideal, but expected
17	For OSH this is behavior that was defined to be different?
18	N-I - Not implemented (e.g. $''). Assertions still checked (in case it
19	starts working)
20	BUG - we verified the value of a known bug
21	FAIL - we got an unexpected value. If the implementation can't be changed,
22	it should be converted to BUG or OK. Otherwise it should be made to
23	PASS.
24
25	NOTE: The difference between OK and BUG is a matter of judgement. If the ideal
26	behavior is a compile time error (code 2), a runtime error is generally OK.
27
28	If ALL shells agree on a broken behavior, they are all marked OK (but our
29	implementation will be PASS.) But if the behavior is NOT POSIX compliant, then
30	it will be a BUG.
31
32	If one shell disagrees with others, that is generally a BUG.
33
34	Example test case:
35
36	#### hello and fail
37	echo hello
38	echo world
39	exit 1
40	## status: 1
41	#
42	# ignored comment
43	#
44	## STDOUT:
45	hello
46	world
47	## END
48
49	"""
50
51	import collections
52	import cgi
53	import cStringIO
54	import errno
55	import json
56	import optparse
57	import os
58	import pprint
59	import re
60	import shutil
61	import subprocess
62	import sys
63
64	from test import spec_lib
65	from doctools import html_head
66
67	log = spec_lib.log
68
69
70	# Magic strings for other variants of OSH.
71
72	# NOTE: osh_ALT is usually _bin/osh -- the release binary.
73	# It would be better to rename these osh-cpython and osh-ovm. Have the concept
74	# of a suffix?
75
76	OSH_CPYTHON = ('osh', 'osh-dbg')
77	OTHER_OSH = ('osh_ALT',)
78
79	YSH_CPYTHON = ('ysh', 'ysh-dbg')
80	OTHER_YSH = ('oil_ALT',)
81
82	# For now, only count the Oils CPython failures. TODO: the spec-cpp job should
83	# assert the osh-cpp and ysh-cpp deltas.
84	OTHER_OILS = OTHER_OSH + OTHER_YSH + ('osh-cpp', 'ysh-cpp')
85
86
87	class ParseError(Exception):
88	pass
89
90
91	# EXAMPLES:
92	## stdout: foo
93	## stdout-json: ""
94	#
95	# In other words, it could be (name, value) or (qualifier, name, value)
96
97	KEY_VALUE_RE = re.compile(r'''
98	[#][#] \s+
99	# optional prefix with qualifier and shells
100	(?: (OK\|BUG\|N-I) \s+ ([\w+/]+) \s+ )?
101	([\w\-]+) # key
102	:
103	\s* (.*) # value
104	''', re.VERBOSE)
105
106	END_MULTILINE_RE = re.compile(r'''
107	[#][#] \s+ END
108	''', re.VERBOSE)
109
110	# Line types
111	TEST_CASE_BEGIN = 0 # Starts with ####
112	KEY_VALUE = 1 # Metadata
113	KEY_VALUE_MULTILINE = 2 # STDOUT STDERR
114	END_MULTILINE = 3 # STDOUT STDERR
115	PLAIN_LINE = 4 # Uncommented
116	EOF = 5
117
118	LEX_OUTER = 0 # Ignore blank lines, e.g. for separating cases
119	LEX_RAW = 1 # Blank lines are significant
120
121
122	class Tokenizer(object):
123	"""Modal lexer!"""
124
125	def __init__(self, f):
126	self.f = f
127
128	self.cursor = None
129	self.line_num = 0
130
131	self.next()
132
133	def _ClassifyLine(self, line, lex_mode):
134	if not line: # empty
135	return self.line_num, EOF, ''
136
137	if lex_mode == LEX_OUTER and not line.strip():
138	return None
139
140	if line.startswith('####'):
141	desc = line[4:].strip()
142	return self.line_num, TEST_CASE_BEGIN, desc
143
144	m = KEY_VALUE_RE.match(line)
145	if m:
146	qualifier, shells, name, value = m.groups()
147	# HACK: Expected data should have the newline.
148	if name in ('stdout', 'stderr'):
149	value += '\n'
150
151	if name in ('STDOUT', 'STDERR'):
152	token_type = KEY_VALUE_MULTILINE
153	else:
154	token_type = KEY_VALUE
155	return self.line_num, token_type, (qualifier, shells, name, value)
156
157	m = END_MULTILINE_RE.match(line)
158	if m:
159	return self.line_num, END_MULTILINE, None
160
161	# If it starts with ##, it should be metadata. This finds some typos.
162	if line.lstrip().startswith('##'):
163	raise RuntimeError('Invalid ## line %r' % line)
164
165	if line.lstrip().startswith('#'): # Ignore comments
166	return None # try again
167
168	# Non-empty line that doesn't start with '#'
169	# NOTE: We need the original line to test the whitespace sensitive <<-.
170	# And we need rstrip because we add newlines back below.
171	return self.line_num, PLAIN_LINE, line
172
173	def next(self, lex_mode=LEX_OUTER):
174	"""Raises StopIteration when exhausted."""
175	while True:
176	line = self.f.readline()
177	self.line_num += 1
178
179	tok = self._ClassifyLine(line, lex_mode)
180	if tok is not None:
181	break
182
183	self.cursor = tok
184	return self.cursor
185
186	def peek(self):
187	return self.cursor
188
189
190	def AddMetadataToCase(case, qualifier, shells, name, value):
191	shells = shells.split('/') # bash/dash/mksh
192	for shell in shells:
193	if shell not in case:
194	case[shell] = {}
195	case[shell][name] = value
196	case[shell]['qualifier'] = qualifier
197
198
199	# Format of a test script.
200	#
201	# -- Code is either literal lines, or a commented out code: value.
202	# code = PLAIN_LINE*
203	# \| '## code:' VALUE
204	#
205	# -- Key value pairs can be single- or multi-line
206	# key_value = '##' KEY ':' VALUE
207	# \| KEY_VALUE_MULTILINE PLAIN_LINE* END_MULTILINE
208	#
209	# -- Description, then key-value pairs surrounding code.
210	# test_case = '####' DESC
211	# key_value*
212	# code
213	# key_value*
214	#
215	# -- Should be a blank line after each test case. Leading comments and code
216	# -- are OK.
217	#
218	# test_file =
219	# key_value* -- file level metadata
220	# (test_case '\n')*
221
222
223	def ParseKeyValue(tokens, case):
224	"""Parse commented-out metadata in a test case.
225
226	The metadata must be contiguous.
227
228	Args:
229	tokens: Tokenizer
230	case: dictionary to add to
231	"""
232	while True:
233	line_num, kind, item = tokens.peek()
234
235	if kind == KEY_VALUE_MULTILINE:
236	qualifier, shells, name, empty_value = item
237	if empty_value:
238	raise ParseError(
239	'Line %d: got value %r for %r, but the value should be on the '
240	'following lines' % (line_num, empty_value, name))
241
242	value_lines = []
243	while True:
244	tokens.next(lex_mode=LEX_RAW) # empty lines aren't skipped
245	_, kind2, item2 = tokens.peek()
246	if kind2 != PLAIN_LINE:
247	break
248	value_lines.append(item2)
249
250	value = ''.join(value_lines)
251
252	name = name.lower() # STDOUT -> stdout
253	if qualifier:
254	AddMetadataToCase(case, qualifier, shells, name, value)
255	else:
256	case[name] = value
257
258	# END token is optional.
259	if kind2 == END_MULTILINE:
260	tokens.next()
261
262	elif kind == KEY_VALUE:
263	qualifier, shells, name, value = item
264
265	if qualifier:
266	AddMetadataToCase(case, qualifier, shells, name, value)
267	else:
268	case[name] = value
269
270	tokens.next()
271
272	else: # Unknown token type
273	break
274
275
276	def ParseCodeLines(tokens, case):
277	"""Parse uncommented code in a test case."""
278	_, kind, item = tokens.peek()
279	if kind != PLAIN_LINE:
280	raise ParseError('Expected a line of code (got %r, %r)' % (kind, item))
281	code_lines = []
282	while True:
283	_, kind, item = tokens.peek()
284	if kind != PLAIN_LINE:
285	case['code'] = ''.join(code_lines)
286	return
287	code_lines.append(item)
288	tokens.next(lex_mode=LEX_RAW)
289
290
291	def ParseTestCase(tokens):
292	"""Parse a single test case and return it.
293
294	If at EOF, return None.
295	"""
296	line_num, kind, item = tokens.peek()
297	if kind == EOF:
298	return None
299
300	if kind != TEST_CASE_BEGIN:
301	raise RuntimeError(
302	"line %d: Expected TEST_CASE_BEGIN, got %r" % (line_num, [kind, item]))
303
304	tokens.next()
305
306	case = {'desc': item, 'line_num': line_num}
307
308	ParseKeyValue(tokens, case)
309
310	# For broken code
311	if 'code' in case: # Got it through a key value pair
312	return case
313
314	ParseCodeLines(tokens, case)
315	ParseKeyValue(tokens, case)
316
317	return case
318
319
320	_META_FIELDS = [
321	'our_shell',
322	'compare_shells',
323	'suite',
324	'tags',
325	'oils_failures_allowed',
326	]
327
328
329	def ParseTestFile(test_file, tokens):
330	"""
331	test_file: Only for error message
332	"""
333	file_metadata = {}
334	test_cases = []
335
336	try:
337	# Skip over the header. Setup code can go here, although would we have to
338	# execute it on every case?
339	while True:
340	line_num, kind, item = tokens.peek()
341	if kind != KEY_VALUE:
342	break
343
344	qualifier, shells, name, value = item
345	if qualifier is not None:
346	raise RuntimeError('Invalid qualifier in spec file metadata')
347	if shells is not None:
348	raise RuntimeError('Invalid shells in spec file metadata')
349
350	file_metadata[name] = value
351
352	tokens.next()
353
354	while True: # Loop over cases
355	test_case = ParseTestCase(tokens)
356	if test_case is None:
357	break
358	test_cases.append(test_case)
359
360	except StopIteration:
361	raise RuntimeError('Unexpected EOF parsing test cases')
362
363	for name in file_metadata:
364	if name not in _META_FIELDS:
365	raise RuntimeError('Invalid file metadata %r in %r' % (name, test_file))
366
367	return file_metadata, test_cases
368
369
370	def CreateStringAssertion(d, key, assertions, qualifier=False):
371	found = False
372
373	exp = d.get(key)
374	if exp is not None:
375	a = EqualAssertion(key, exp, qualifier=qualifier)
376	assertions.append(a)
377	found = True
378
379	exp_json = d.get(key + '-json')
380	if exp_json is not None:
381	exp = json.loads(exp_json, encoding='utf-8')
382	a = EqualAssertion(key, exp, qualifier=qualifier)
383	assertions.append(a)
384	found = True
385
386	# For testing invalid unicode
387	exp_repr = d.get(key + '-repr')
388	if exp_repr is not None:
389	exp = eval(exp_repr)
390	a = EqualAssertion(key, exp, qualifier=qualifier)
391	assertions.append(a)
392	found = True
393
394	return found
395
396
397	def CreateIntAssertion(d, key, assertions, qualifier=False):
398	exp = d.get(key) # expected
399	if exp is not None:
400	# For now, turn it into int
401	a = EqualAssertion(key, int(exp), qualifier=qualifier)
402	assertions.append(a)
403	return True
404	return False
405
406
407	def CreateAssertions(case, sh_label):
408	"""
409	Given a raw test case and a shell label, create EqualAssertion instances to
410	run.
411	"""
412	assertions = []
413
414	# Whether we found assertions
415	stdout = False
416	stderr = False
417	status = False
418
419	# So the assertion are exactly the same for osh and osh_ALT
420
421	if sh_label.startswith('osh'):
422	case_sh = 'osh'
423	elif sh_label.startswith('bash'):
424	case_sh = 'bash'
425	else:
426	case_sh = sh_label
427
428	if case_sh in case:
429	q = case[case_sh]['qualifier']
430	if CreateStringAssertion(case[case_sh], 'stdout', assertions, qualifier=q):
431	stdout = True
432	if CreateStringAssertion(case[case_sh], 'stderr', assertions, qualifier=q):
433	stderr = True
434	if CreateIntAssertion(case[case_sh], 'status', assertions, qualifier=q):
435	status = True
436
437	if not stdout:
438	CreateStringAssertion(case, 'stdout', assertions)
439	if not stderr:
440	CreateStringAssertion(case, 'stderr', assertions)
441	if not status:
442	if 'status' in case:
443	CreateIntAssertion(case, 'status', assertions)
444	else:
445	# If the user didn't specify a 'status' assertion, assert that the exit
446	# code is 0.
447	a = EqualAssertion('status', 0)
448	assertions.append(a)
449
450	no_traceback = SubstringAssertion('stderr', 'Traceback (most recent')
451	assertions.append(no_traceback)
452
453	#print 'SHELL', shell
454	#pprint.pprint(case)
455	#print(assertions)
456	return assertions
457
458
459	class Result(object):
460	"""Result of an stdout/stderr/status assertion or of a (case, shell) cell.
461
462	Order is important: the result of a cell is the minimum of the results of
463	each assertion.
464	"""
465	TIMEOUT = 0 # ONLY a cell result, not an assertion result
466	FAIL = 1
467	BUG = 2
468	NI = 3
469	OK = 4
470	PASS = 5
471
472	length = 6 # for loops
473
474
475	class EqualAssertion(object):
476	"""Check that two values are equal."""
477
478	def __init__(self, key, expected, qualifier=None):
479	self.key = key
480	self.expected = expected # expected value
481	self.qualifier = qualifier # whether this was a special case?
482
483	def __repr__(self):
484	return '<EqualAssertion %s == %r>' % (self.key, self.expected)
485
486	def Check(self, shell, record):
487	actual = record[self.key]
488	if actual != self.expected:
489	if len(str(self.expected)) < 40:
490	msg = '[%s %s] Expected %r, got %r' % (shell, self.key, self.expected,
491	actual)
492	else:
493	msg = '''
494	[%s %s]
495	Expected %r
496	Got %r
497	''' % (shell, self.key, self.expected, actual)
498
499	# TODO: Make this better and add a flag for it.
500	if 0:
501	import difflib
502	for line in difflib.unified_diff(
503	self.expected, actual, fromfile='expected', tofile='actual'):
504	print(repr(line))
505
506	return Result.FAIL, msg
507	if self.qualifier == 'BUG': # equal, but known bad
508	return Result.BUG, ''
509	if self.qualifier == 'N-I': # equal, and known UNIMPLEMENTED
510	return Result.NI, ''
511	if self.qualifier == 'OK': # equal, but ok (not ideal)
512	return Result.OK, ''
513	return Result.PASS, '' # ideal behavior
514
515
516	class SubstringAssertion(object):
517	"""Check that a string like stderr doesn't have a substring."""
518
519	def __init__(self, key, substring):
520	self.key = key
521	self.substring = substring
522
523	def __repr__(self):
524	return '<SubstringAssertion %s == %r>' % (self.key, self.substring)
525
526	def Check(self, shell, record):
527	actual = record[self.key]
528	if self.substring in actual:
529	msg = '[%s %s] Found %r' % (shell, self.key, self.substring)
530	return Result.FAIL, msg
531	return Result.PASS, ''
532
533
534	class Stats(object):
535	def __init__(self, num_cases, sh_labels):
536	self.counters = collections.defaultdict(int)
537	c = self.counters
538	c['num_cases'] = num_cases
539	c['oils_num_passed'] = 0
540	c['oils_num_failed'] = 0
541	# Number of osh_ALT results that differed from osh.
542	c['oils_ALT_delta'] = 0
543
544	self.by_shell = {}
545	for sh in sh_labels:
546	self.by_shell[sh] = collections.defaultdict(int)
547	self.nonzero_results = collections.defaultdict(int)
548
549	self.tsv_rows = []
550
551	def Inc(self, counter_name):
552	self.counters[counter_name] += 1
553
554	def Get(self, counter_name):
555	return self.counters[counter_name]
556
557	def Set(self, counter_name, val):
558	self.counters[counter_name] = val
559
560	def ReportCell(self, case_num, cell_result, sh_label):
561	self.tsv_rows.append((str(case_num), sh_label, TEXT_CELLS[cell_result]))
562
563	self.by_shell[sh_label][cell_result] += 1
564	self.nonzero_results[cell_result] += 1
565
566	c = self.counters
567	if cell_result == Result.TIMEOUT:
568	c['num_timeout'] += 1
569	elif cell_result == Result.FAIL:
570	# Special logic: don't count osh_ALT because its failures will be
571	# counted in the delta.
572	if sh_label not in OTHER_OILS:
573	c['num_failed'] += 1
574
575	if sh_label in OSH_CPYTHON + YSH_CPYTHON:
576	c['oils_num_failed'] += 1
577	elif cell_result == Result.BUG:
578	c['num_bug'] += 1
579	elif cell_result == Result.NI:
580	c['num_ni'] += 1
581	elif cell_result == Result.OK:
582	c['num_ok'] += 1
583	elif cell_result == Result.PASS:
584	c['num_passed'] += 1
585	if sh_label in OSH_CPYTHON + YSH_CPYTHON:
586	c['oils_num_passed'] += 1
587	else:
588	raise AssertionError()
589
590	def WriteTsv(self, f):
591	f.write('case\tshell\tresult\n')
592	for row in self.tsv_rows:
593	f.write('\t'.join(row))
594	f.write('\n')
595
596
597	PIPE = subprocess.PIPE
598
599	def RunCases(cases, case_predicate, shells, env, out, opts):
600	"""
601	Run a list of test 'cases' for all 'shells' and write output to 'out'.
602	"""
603	if opts.trace:
604	for _, sh in shells:
605	log('\tshell: %s', sh)
606	print('\twhich $SH: ', end='', file=sys.stderr)
607	subprocess.call(['which', sh])
608
609	#pprint.pprint(cases)
610
611	sh_labels = [sh_label for sh_label, _ in shells]
612
613	out.WriteHeader(sh_labels)
614	stats = Stats(len(cases), sh_labels)
615
616	# Make an environment for each shell. $SH is the path to the shell, so we
617	# can test flags, etc.
618	sh_env = []
619	for _, sh_path in shells:
620	e = dict(env)
621	e[opts.sh_env_var_name] = sh_path
622	sh_env.append(e)
623
624	# Determine which one (if any) is osh-cpython, for comparison against other
625	# shells.
626	osh_cpython_index = -1
627	for i, (sh_label, _) in enumerate(shells):
628	if sh_label in OSH_CPYTHON:
629	osh_cpython_index = i
630	break
631
632	timeout_dir = os.path.abspath('_tmp/spec/timeouts')
633	try:
634	shutil.rmtree(timeout_dir)
635	os.mkdir(timeout_dir)
636	except OSError:
637	pass
638
639	# Now run each case, and print a table.
640	for i, case in enumerate(cases):
641	line_num = case['line_num']
642	desc = case['desc']
643	code = case['code']
644
645	if opts.trace:
646	log('case %d: %s', i, desc)
647
648	if not case_predicate(i, case):
649	stats.Inc('num_skipped')
650	continue
651
652	if opts.do_print:
653	print('#### %s' % case['desc'])
654	print(case['code'])
655	print()
656	continue
657
658	stats.Inc('num_cases_run')
659
660	result_row = []
661
662	for shell_index, (sh_label, sh_path) in enumerate(shells):
663	timeout_file = os.path.join(timeout_dir, '%02d-%s' % (i, sh_label))
664	if opts.timeout:
665	if opts.timeout_bin:
666	# This is what smoosh itself uses. See smoosh/tests/shell_tests.sh
667	# QUIRK: interval can only be a whole number
668	argv = [
669	opts.timeout_bin,
670	'-t', opts.timeout,
671	# Somehow I'm not able to get this timeout file working? I think
672	# it has a bug when using stdin. It waits for the background
673	# process too.
674
675	#'-i', '1',
676	#'-l', timeout_file
677	]
678	else:
679	# This kills hanging tests properly, but somehow they fail with code
680	# -9?
681	#argv = ['timeout', '-s', 'KILL', opts.timeout]
682
683	# s suffix for seconds
684	argv = ['timeout', opts.timeout + 's']
685	else:
686	argv = []
687	argv.append(sh_path)
688
689	# dash doesn't support -o posix
690	if opts.posix and sh_label != 'dash':
691	argv.extend(['-o', 'posix'])
692
693	if opts.trace:
694	log('\targv: %s', ' '.join(argv))
695
696	case_env = sh_env[shell_index]
697
698	# Unique dir for every test case and shell
699	tmp_base = os.path.normpath(opts.tmp_env) # no . or ..
700	case_tmp_dir = os.path.join(tmp_base, '%02d-%s' % (i, sh_label))
701
702	try:
703	os.makedirs(case_tmp_dir)
704	except OSError as e:
705	if e.errno != errno.EEXIST:
706	raise
707
708	# Some tests assume _tmp exists
709	try:
710	os.mkdir(os.path.join(case_tmp_dir, '_tmp'))
711	except OSError as e:
712	if e.errno != errno.EEXIST:
713	raise
714
715	case_env['TMP'] = case_tmp_dir
716
717	if opts.pyann_out_dir:
718	case_env = dict(case_env)
719	case_env['PYANN_OUT'] = os.path.join(opts.pyann_out_dir, '%d.json' % i)
720
721	try:
722	p = subprocess.Popen(argv, env=case_env, cwd=case_tmp_dir,
723	stdin=PIPE, stdout=PIPE, stderr=PIPE)
724	except OSError as e:
725	print('Error running %r: %s' % (sh_path, e), file=sys.stderr)
726	sys.exit(1)
727
728	p.stdin.write(code)
729	p.stdin.close()
730
731	actual = {}
732	actual['stdout'] = p.stdout.read()
733	actual['stderr'] = p.stderr.read()
734	p.stdout.close()
735	p.stderr.close()
736
737	actual['status'] = p.wait()
738
739	if opts.timeout_bin and os.path.exists(timeout_file):
740	cell_result = Result.TIMEOUT
741	elif not opts.timeout_bin and actual['status'] == 124:
742	cell_result = Result.TIMEOUT
743	else:
744	messages = []
745	cell_result = Result.PASS
746
747	# TODO: Warn about no assertions? Well it will always test the error
748	# code.
749	assertions = CreateAssertions(case, sh_label)
750	for a in assertions:
751	result, msg = a.Check(sh_label, actual)
752	# The minimum one wins.
753	# If any failed, then the result is FAIL.
754	# If any are OK, but none are FAIL, the result is OK.
755	cell_result = min(cell_result, result)
756	if msg:
757	messages.append(msg)
758
759	if cell_result != Result.PASS or opts.details:
760	d = (i, sh_label, actual['stdout'], actual['stderr'], messages)
761	out.AddDetails(d)
762
763	result_row.append(cell_result)
764
765	stats.ReportCell(i, cell_result, sh_label)
766
767	if sh_label in OTHER_OSH:
768	# This is only an error if we tried to run ANY OSH.
769	if osh_cpython_index == -1:
770	raise RuntimeError("Couldn't determine index of osh-cpython")
771
772	other_result = result_row[shell_index]
773	cpython_result = result_row[osh_cpython_index]
774	if other_result != cpython_result:
775	stats.Inc('oils_ALT_delta')
776
777	out.WriteRow(i, line_num, result_row, desc)
778
779	return stats
780
781
782	# ANSI color constants
783	_RESET = '\033[0;0m'
784	_BOLD = '\033[1m'
785
786	_RED = '\033[31m'
787	_GREEN = '\033[32m'
788	_YELLOW = '\033[33m'
789	_PURPLE = '\033[35m'
790
791
792	TEXT_CELLS = {
793	Result.TIMEOUT: 'TIME',
794	Result.FAIL: 'FAIL',
795	Result.BUG: 'BUG',
796	Result.NI: 'N-I',
797	Result.OK: 'ok',
798	Result.PASS: 'pass',
799	}
800
801	ANSI_COLORS = {
802	Result.TIMEOUT: _PURPLE,
803	Result.FAIL: _RED,
804	Result.BUG: _YELLOW,
805	Result.NI: _YELLOW,
806	Result.OK: _YELLOW,
807	Result.PASS: _GREEN,
808	}
809
810	def _AnsiCells():
811	lookup = {}
812	for i in xrange(Result.length):
813	lookup[i] = ''.join([ANSI_COLORS[i], _BOLD, TEXT_CELLS[i], _RESET])
814	return lookup
815
816	ANSI_CELLS = _AnsiCells()
817
818
819	HTML_CELLS = {
820	Result.TIMEOUT: '<td class="timeout">TIME',
821	Result.FAIL: '<td class="fail">FAIL',
822	Result.BUG: '<td class="bug">BUG',
823	Result.NI: '<td class="n-i">N-I',
824	Result.OK: '<td class="ok">ok',
825	Result.PASS: '<td class="pass">pass',
826	}
827
828
829	def _ValidUtf8String(s):
830	"""Return an arbitrary string as a readable utf-8 string.
831
832	We output utf-8 to either HTML or the console. If we get invalid utf-8 as
833	stdout/stderr (which is very possible), then show the ASCII repr().
834	"""
835	try:
836	s.decode('utf-8')
837	return s # it decoded OK
838	except UnicodeDecodeError:
839	return repr(s) # ASCII representation
840
841
842	class Output(object):
843
844	def __init__(self, f, verbose):
845	self.f = f
846	self.verbose = verbose
847	self.details = []
848
849	def BeginCases(self, test_file):
850	pass
851
852	def WriteHeader(self, sh_labels):
853	pass
854
855	def WriteRow(self, i, line_num, row, desc):
856	pass
857
858	def EndCases(self, sh_labels, stats):
859	pass
860
861	def AddDetails(self, entry):
862	self.details.append(entry)
863
864	# Helper function
865	def _WriteDetailsAsText(self, details):
866	for case_index, shell, stdout, stderr, messages in details:
867	print('case: %d' % case_index, file=self.f)
868	for m in messages:
869	print(m, file=self.f)
870
871	# Assume the terminal can show utf-8, but we don't want random binary.
872	print('%s stdout:' % shell, file=self.f)
873	print(_ValidUtf8String(stdout), file=self.f)
874
875	print('%s stderr:' % shell, file=self.f)
876	print(_ValidUtf8String(stderr), file=self.f)
877
878	print('', file=self.f)
879
880
881	class TeeOutput(object):
882	"""For multiple outputs in one run, e.g. HTML and TSV.
883
884	UNUSED
885	"""
886
887	def __init__(self, outs):
888	self.outs = outs
889
890	def BeginCases(self, test_file):
891	for out in self.outs:
892	out.BeginCases(test_file)
893
894	def WriteHeader(self, sh_labels):
895	for out in self.outs:
896	out.WriteHeader(sh_labels)
897
898	def WriteRow(self, i, line_num, row, desc):
899	for out in self.outs:
900	out.WriteRow(i, line_num, row, desc)
901
902	def EndCases(self, sh_labels, stats):
903	for out in self.outs:
904	out.EndCases(sh_labels, stats)
905
906	def AddDetails(self, entry):
907	for out in self.outs:
908	out.AddDetails(entry)
909
910
911	class TsvOutput(Output):
912	"""Write a plain-text TSV file.
913
914	UNUSED since we are outputting LONG format with --tsv-output.
915	"""
916
917	def WriteHeader(self, sh_labels):
918	self.f.write('case\tline\t') # case number and line number
919	for sh_label in sh_labels:
920	self.f.write(sh_label)
921	self.f.write('\t')
922	self.f.write('\n')
923
924	def WriteRow(self, i, line_num, row, desc):
925	self.f.write('%3d\t%3d\t' % (i, line_num))
926
927	for result in row:
928	c = TEXT_CELLS[result]
929	self.f.write(c)
930	self.f.write('\t')
931
932	# note: 'desc' could use TSV8, but just ignore it for now
933	#self.f.write(desc)
934	self.f.write('\n')
935
936
937	class AnsiOutput(Output):
938
939	def BeginCases(self, test_file):
940	self.f.write('%s\n' % test_file)
941
942	def WriteHeader(self, sh_labels):
943	self.f.write(_BOLD)
944	self.f.write('case\tline\t') # case number and line number
945	for sh_label in sh_labels:
946	self.f.write(sh_label)
947	self.f.write('\t')
948	self.f.write(_RESET)
949	self.f.write('\n')
950
951	def WriteRow(self, i, line_num, row, desc):
952	self.f.write('%3d\t%3d\t' % (i, line_num))
953
954	for result in row:
955	c = ANSI_CELLS[result]
956	self.f.write(c)
957	self.f.write('\t')
958
959	self.f.write(desc)
960	self.f.write('\n')
961
962	if self.verbose:
963	self._WriteDetailsAsText(self.details)
964	self.details = []
965
966	def _WriteShellSummary(self, sh_labels, stats):
967	if len(stats.nonzero_results) <= 1: # Skip trivial summaries
968	return
969
970	# Reiterate header
971	self.f.write(_BOLD)
972	self.f.write('\t\t')
973	for sh_label in sh_labels:
974	self.f.write(sh_label)
975	self.f.write('\t')
976	self.f.write(_RESET)
977	self.f.write('\n')
978
979	# Write totals by cell.
980	for result in sorted(stats.nonzero_results, reverse=True):
981	self.f.write('\t%s' % ANSI_CELLS[result])
982	for sh_label in sh_labels:
983	self.f.write('\t%d' % stats.by_shell[sh_label][result])
984	self.f.write('\n')
985
986	# The bottom row is all the same, but it helps readability.
987	self.f.write('\ttotal')
988	for sh_label in sh_labels:
989	self.f.write('\t%d' % stats.counters['num_cases_run'])
990	self.f.write('\n')
991
992	def EndCases(self, sh_labels, stats):
993	print()
994	self._WriteShellSummary(sh_labels, stats)
995
996
997	class HtmlOutput(Output):
998
999	def __init__(self, f, verbose, spec_name, sh_labels, cases):
1000	Output.__init__(self, f, verbose)
1001	self.spec_name = spec_name
1002	self.sh_labels = sh_labels # saved from header
1003	self.cases = cases # for linking to code
1004	self.row_html = [] # buffered
1005
1006	def _SourceLink(self, line_num, desc):
1007	return '<a href="%s.test.html#L%d">%s</a>' % (
1008	self.spec_name, line_num, cgi.escape(desc))
1009
1010	def BeginCases(self, test_file):
1011	css_urls = [ '../../../web/base.css', '../../../web/spec-tests.css' ]
1012	title = '%s: spec test case results' % self.spec_name
1013	html_head.Write(self.f, title, css_urls=css_urls)
1014
1015	self.f.write('''\
1016	<body class="width60">
1017	<p id="home-link">
1018	<a href=".">spec test index</a>
1019	/
1020	<a href="/">oilshell.org</a>
1021	</p>
1022	<h1>Results for %s</h1>
1023	<table>
1024	''' % test_file)
1025
1026	def _WriteShellSummary(self, sh_labels, stats):
1027	# NOTE: This table has multiple <thead>, which seems OK.
1028	self.f.write('''
1029	<thead>
1030	<tr class="table-header">
1031	''')
1032
1033	columns = ['status'] + sh_labels + ['']
1034	for c in columns:
1035	self.f.write('<td>%s</td>' % c)
1036
1037	self.f.write('''
1038	</tr>
1039	</thead>
1040	''')
1041
1042	# Write totals by cell.
1043	for result in sorted(stats.nonzero_results, reverse=True):
1044	self.f.write('<tr>')
1045
1046	self.f.write(HTML_CELLS[result])
1047	self.f.write('</td> ')
1048
1049	for sh_label in sh_labels:
1050	self.f.write('<td>%d</td>' % stats.by_shell[sh_label][result])
1051
1052	self.f.write('<td></td>')
1053	self.f.write('</tr>\n')
1054
1055	# The bottom row is all the same, but it helps readability.
1056	self.f.write('<tr>')
1057	self.f.write('<td>total</td>')
1058	for sh_label in sh_labels:
1059	self.f.write('<td>%d</td>' % stats.counters['num_cases_run'])
1060	self.f.write('<td></td>')
1061	self.f.write('</tr>\n')
1062
1063	# Blank row for space.
1064	self.f.write('<tr>')
1065	for i in xrange(len(sh_labels) + 2):
1066	self.f.write('<td style="height: 2em"></td>')
1067	self.f.write('</tr>\n')
1068
1069	def WriteHeader(self, sh_labels):
1070	f = cStringIO.StringIO()
1071
1072	f.write('''
1073	<thead>
1074	<tr class="table-header">
1075	''')
1076
1077	columns = ['case'] + sh_labels
1078	for c in columns:
1079	f.write('<td>%s</td>' % c)
1080	f.write('<td class="case-desc">description</td>')
1081
1082	f.write('''
1083	</tr>
1084	</thead>
1085	''')
1086
1087	self.row_html.append(f.getvalue())
1088
1089	def WriteRow(self, i, line_num, row, desc):
1090	f = cStringIO.StringIO()
1091	f.write('<tr>')
1092	f.write('<td>%3d</td>' % i)
1093
1094	show_details = False
1095
1096	for result in row:
1097	c = HTML_CELLS[result]
1098	if result not in (Result.PASS, Result.TIMEOUT): # nothing to show
1099	show_details = True
1100
1101	f.write(c)
1102	f.write('</td>')
1103	f.write('\t')
1104
1105	f.write('<td class="case-desc">')
1106	f.write(self._SourceLink(line_num, desc))
1107	f.write('</td>')
1108	f.write('</tr>\n')
1109
1110	# Show row with details link.
1111	if show_details:
1112	f.write('<tr>')
1113	f.write('<td class="details-row"></td>') # for the number
1114
1115	for col_index, result in enumerate(row):
1116	f.write('<td class="details-row">')
1117	if result != Result.PASS:
1118	sh_label = self.sh_labels[col_index]
1119	f.write('<a href="#details-%s-%s">details</a>' % (i, sh_label))
1120	f.write('</td>')
1121
1122	f.write('<td class="details-row"></td>') # for the description
1123	f.write('</tr>\n')
1124
1125	self.row_html.append(f.getvalue()) # buffer it
1126
1127	def _WriteStats(self, stats):
1128	self.f.write(
1129	'%(num_passed)d passed, %(num_ok)d OK, '
1130	'%(num_ni)d not implemented, %(num_bug)d BUG, '
1131	'%(num_failed)d failed, %(num_timeout)d timeouts, '
1132	'%(num_skipped)d cases skipped\n' % stats.counters)
1133
1134	def EndCases(self, sh_labels, stats):
1135	self._WriteShellSummary(sh_labels, stats)
1136
1137	# Write all the buffered rows
1138	for h in self.row_html:
1139	self.f.write(h)
1140
1141	self.f.write('</table>\n')
1142	self.f.write('<pre>')
1143	self._WriteStats(stats)
1144	if stats.Get('oils_num_failed'):
1145	self.f.write('%(oils_num_failed)d failed under osh\n' % stats.counters)
1146	self.f.write('</pre>')
1147
1148	if self.details:
1149	self._WriteDetails()
1150
1151	self.f.write('</body></html>')
1152
1153	def _WriteDetails(self):
1154	self.f.write("<h2>Details on runs that didn't PASS</h2>")
1155	self.f.write('<table id="details">')
1156
1157	for case_index, sh_label, stdout, stderr, messages in self.details:
1158	self.f.write('<tr>')
1159	self.f.write('<td><a name="details-%s-%s"></a><b>%s</b></td>' % (
1160	case_index, sh_label, sh_label))
1161
1162	self.f.write('<td>')
1163
1164	# Write description and link to the code
1165	case = self.cases[case_index]
1166	line_num = case['line_num']
1167	desc = case['desc']
1168	self.f.write('%d ' % case_index)
1169	self.f.write(self._SourceLink(line_num, desc))
1170	self.f.write('<br/><br/>\n')
1171
1172	for m in messages:
1173	self.f.write('<span class="assertion">%s</span><br/>\n' % cgi.escape(m))
1174	if messages:
1175	self.f.write('<br/>\n')
1176
1177	def _WriteRaw(s):
1178	self.f.write('<pre>')
1179
1180	# stdout might contain invalid utf-8; make it valid;
1181	valid_utf8 = _ValidUtf8String(s)
1182
1183	self.f.write(cgi.escape(valid_utf8))
1184	self.f.write('</pre>')
1185
1186	self.f.write('<i>stdout:</i> <br/>\n')
1187	_WriteRaw(stdout)
1188
1189	self.f.write('<i>stderr:</i> <br/>\n')
1190	_WriteRaw(stderr)
1191
1192	self.f.write('</td>')
1193	self.f.write('</tr>')
1194
1195	self.f.write('</table>')
1196
1197
1198	def MakeTestEnv(opts):
1199	if not opts.tmp_env:
1200	raise RuntimeError('--tmp-env required')
1201	if not opts.path_env:
1202	raise RuntimeError('--path-env required')
1203	env = {
1204	'PATH': opts.path_env,
1205	#'LANG': opts.lang_env,
1206	}
1207	for p in opts.env_pair:
1208	name, value = p.split('=', 1)
1209	env[name] = value
1210
1211	return env
1212
1213
1214	def _DefaultSuite(spec_name):
1215	if spec_name.startswith('ysh-'):
1216	suite = 'ysh'
1217	elif spec_name.startswith('hay'): # hay.test.sh is ysh
1218	suite = 'ysh'
1219
1220	elif spec_name.startswith('tea-'):
1221	suite = 'tea'
1222	else:
1223	suite = 'osh'
1224
1225	return suite
1226
1227
1228	def ParseTestList(test_files):
1229	for test_file in test_files:
1230	with open(test_file) as f:
1231	tokens = Tokenizer(f)
1232	try:
1233	file_metadata, cases = ParseTestFile(test_file, tokens)
1234	except RuntimeError as e:
1235	log('ERROR in %r', test_file)
1236	raise
1237
1238	tmp = os.path.basename(test_file)
1239	spec_name = tmp.split('.')[0] # foo.test.sh -> foo
1240
1241	suite = file_metadata.get('suite') or _DefaultSuite(spec_name)
1242
1243	tmp = file_metadata.get('tags')
1244	tags = tmp.split() if tmp else []
1245
1246	# Don't need compare_shells, etc. to decide what to run
1247
1248	row = {'spec_name': spec_name, 'suite': suite, 'tags': tags}
1249	#print(row)
1250	yield row
1251
1252
1253	def main(argv):
1254	# First check if bash is polluting the environment. Tests rely on the
1255	# environment.
1256	v = os.getenv('RANDOM')
1257	if v is not None:
1258	raise AssertionError('got $RANDOM = %s' % v)
1259	v = os.getenv('PPID')
1260	if v is not None:
1261	raise AssertionError('got $PPID = %s' % v)
1262
1263	p = optparse.OptionParser('%s [options] TEST_FILE shell...' % sys.argv[0])
1264	spec_lib.DefineCommon(p)
1265	spec_lib.DefineShSpec(p)
1266	opts, argv = p.parse_args(argv)
1267
1268	# --print-tagged to figure out what to run
1269	if opts.print_tagged:
1270	to_find = opts.print_tagged
1271	for row in ParseTestList(argv[1:]):
1272	if to_find in row['tags']:
1273	print(row['spec_name'])
1274	return 0
1275
1276	# --print-table to figure out what to run
1277	if opts.print_table:
1278	for row in ParseTestList(argv[1:]):
1279	print('%(suite)s\t%(spec_name)s' % row)
1280	#print(row)
1281	return 0
1282
1283	#
1284	# Now deal with a single file
1285	#
1286
1287	try:
1288	test_file = argv[1]
1289	except IndexError:
1290	p.print_usage()
1291	return 1
1292
1293	with open(test_file) as f:
1294	tokens = Tokenizer(f)
1295	file_metadata, cases = ParseTestFile(test_file, tokens)
1296
1297	# List test cases and return
1298	if opts.do_list:
1299	for i, case in enumerate(cases):
1300	if opts.verbose: # print the raw dictionary for debugging
1301	print(pprint.pformat(case))
1302	else:
1303	print('%d\t%s' % (i, case['desc']))
1304	return 0
1305
1306	# for test/spec-cpp.sh
1307	if opts.print_spec_suite:
1308	tmp = os.path.basename(test_file)
1309	spec_name = tmp.split('.')[0] # foo.test.sh -> foo
1310
1311	suite = file_metadata.get('suite') or _DefaultSuite(spec_name)
1312	print(suite)
1313	return 0
1314
1315	if opts.verbose:
1316	for k, v in file_metadata.items():
1317	print('\t%-20s: %s' % (k, v), file=sys.stderr)
1318	print('', file=sys.stderr)
1319
1320	if opts.oils_bin_dir:
1321
1322	shells = []
1323
1324	if opts.compare_shells:
1325	comp = file_metadata.get('compare_shells')
1326	# Compare 'compare_shells' and Python
1327	shells.extend(comp.split() if comp else [])
1328
1329	# Always run with the Python version
1330	our_shell = file_metadata.get('our_shell', 'osh') # default is OSH
1331	shells.append(os.path.join(opts.oils_bin_dir, our_shell))
1332
1333	# Legacy OVM/CPython build
1334	if opts.ovm_bin_dir:
1335	shells.append(os.path.join(opts.ovm_bin_dir, our_shell))
1336
1337	# New C++ build
1338	if opts.oils_cpp_bin_dir:
1339	shells.append(os.path.join(opts.oils_cpp_bin_dir, our_shell))
1340
1341	# Overwrite it when --oils-bin-dir is set
1342	# It's no longer a flag
1343	opts.oils_failures_allowed = \
1344	int(file_metadata.get('oils_failures_allowed', 0))
1345
1346	else:
1347	# TODO: remove this mode?
1348	shells = argv[2:]
1349
1350	shell_pairs = spec_lib.MakeShellPairs(shells)
1351
1352	if opts.range:
1353	begin, end = spec_lib.ParseRange(opts.range)
1354	case_predicate = spec_lib.RangePredicate(begin, end)
1355	elif opts.regex:
1356	desc_re = re.compile(opts.regex, re.IGNORECASE)
1357	case_predicate = spec_lib.RegexPredicate(desc_re)
1358	else:
1359	case_predicate = lambda i, case: True
1360
1361	out_f = sys.stderr if opts.do_print else sys.stdout
1362
1363	# Set up output style. Also see asdl/format.py
1364	if opts.format == 'ansi':
1365	out = AnsiOutput(out_f, opts.verbose)
1366
1367	elif opts.format == 'html':
1368	spec_name = os.path.basename(test_file)
1369	spec_name = spec_name.split('.')[0]
1370
1371	sh_labels = [label for label, _ in shell_pairs]
1372
1373	out = HtmlOutput(out_f, opts.verbose, spec_name, sh_labels, cases)
1374
1375	else:
1376	raise AssertionError()
1377
1378	out.BeginCases(os.path.basename(test_file))
1379
1380	env = MakeTestEnv(opts)
1381	stats = RunCases(cases, case_predicate, shell_pairs, env, out, opts)
1382
1383	out.EndCases([sh_label for sh_label, _ in shell_pairs], stats)
1384
1385	if opts.tsv_output:
1386	with open(opts.tsv_output, 'w') as f:
1387	stats.WriteTsv(f)
1388
1389	# TODO: Could --stats-{file,template} be a separate awk step on .tsv files?
1390	stats.Set('oils_failures_allowed', opts.oils_failures_allowed)
1391	if opts.stats_file:
1392	with open(opts.stats_file, 'w') as f:
1393	f.write(opts.stats_template % stats.counters)
1394	f.write('\n') # bash 'read' requires a newline
1395
1396	if stats.Get('num_failed') == 0:
1397	return 0
1398
1399	# spec/smoke.test.sh -> smoke
1400	test_name = os.path.basename(test_file).split('.')[0]
1401
1402	allowed = opts.oils_failures_allowed
1403	all_count = stats.Get('num_failed')
1404	oils_count = stats.Get('oils_num_failed')
1405	if allowed == 0:
1406	log('')
1407	log('%s: FATAL: %d tests failed (%d oils failures)', test_name, all_count,
1408	oils_count)
1409	log('')
1410	else:
1411	# If we got EXACTLY the allowed number of failures, exit 0.
1412	if allowed == all_count and all_count == oils_count:
1413	log('%s: note: Got %d allowed oils failures (exit with code 0)',
1414	test_name, allowed)
1415	return 0
1416	else:
1417	log('')
1418	log('%s: FATAL: Got %d failures (%d oils failures), but %d are allowed',
1419	test_name, all_count, oils_count, allowed)
1420	log('')
1421
1422	return 1
1423
1424
1425	if __name__ == '__main__':
1426	try:
1427	sys.exit(main(sys.argv))
1428	except KeyboardInterrupt as e:
1429	print('%s: interrupted with Ctrl-C' % sys.argv[0], file=sys.stderr)
1430	sys.exit(1)
1431	except RuntimeError as e:
1432	print('FATAL: %s' % e, file=sys.stderr)
1433	sys.exit(1)