spec/stateful/harness.py

OILS / spec / stateful / harness.py View on Github | oilshell.org

393 lines, 243 significant

1	#!/usr/bin/env python3
2	"""
3	State Machine style tests with pexpect, e.g. for interactive mode.
4
5	To invoke this file, run the shell wrapper:
6
7	test/stateful.sh all
8	"""
9	from __future__ import print_function
10
11	import optparse
12	import os
13	import pexpect
14	import signal
15	import sys
16
17	from core import ansi
18	from test import spec_lib # Using this for a common interface
19
20	log = spec_lib.log
21
22
23	def expect_prompt(sh):
24	sh.expect(r'.*\$')
25
26
27	def get_pid_by_name(name):
28	"""Return the pid of the process matching `name`."""
29	# XXX: make sure this is restricted to subprocesses under us.
30	# This could be problematic on the continuous build if many tests are running
31	# in parallel.
32	output = pexpect.run('pgrep --exact --newest %s' % name)
33	#log('pgrep output %r' % output)
34	return int(output.split()[-1])
35
36
37	def stop_process__hack(name, sig_num=signal.SIGSTOP):
38	"""Send SIGSTOP to the most recent process matching `name`
39
40	Hack in place of sh.sendcontrol('z'), which sends SIGTSTP. Why doesn't OSH
41	respond to this, or why don't the child processes respond?
42
43	TODO: Fix OSH and get rid of this hack.
44	"""
45	os.kill(get_pid_by_name(name), sig_num)
46
47
48	# Mutated by each test file.
49	CASES = []
50
51
52	def register(skip_shells=None, not_impl_shells=None):
53	skip_shells = skip_shells or []
54	not_impl_shells = not_impl_shells or []
55
56	def decorator(func):
57	CASES.append((func.__doc__, func, skip_shells, not_impl_shells))
58	return func
59
60	return decorator
61
62
63	class Result(object):
64	SKIP = 1
65	NI = 2
66	OK = 3
67	FAIL = 4
68
69
70	class TestRunner(object):
71
72	def __init__(self, num_retries, pexpect_timeout, verbose):
73	self.num_retries = num_retries
74	self.pexpect_timeout = pexpect_timeout
75	self.verbose = verbose
76
77	def RunOnce(self, shell_path, shell_label, func):
78	sh_argv = []
79	if shell_label in ('bash', 'osh'):
80	sh_argv.extend(['--rcfile', '/dev/null'])
81	# Why the heck is --norc different from --rcfile /dev/null in bash??? This
82	# makes it so the prompt of the parent shell doesn't leak. Very annoying.
83	if shell_label == 'bash':
84	sh_argv.append('--norc')
85	#print(sh_argv)
86
87	# Python 3: encoding required
88	sh = pexpect.spawn(shell_path,
89	sh_argv,
90	encoding='utf-8',
91	timeout=self.pexpect_timeout)
92
93	sh.shell_label = shell_label # for tests to use
94
95	# Generally don't want local echo, it gets confusing fast.
96	sh.setecho(False)
97
98	if self.verbose:
99	sh.logfile = sys.stdout
100
101	ok = True
102	try:
103	func(sh)
104	except Exception as e:
105	import traceback
106	traceback.print_exc(file=sys.stderr)
107	return Result.FAIL
108	ok = False
109
110	finally:
111	sh.close()
112
113	if ok:
114	return Result.OK
115
116	def RunCase(self, shell_path, shell_label, func):
117	result = self.RunOnce(shell_path, shell_label, func)
118
119	if result == Result.OK:
120	return result, -1 # short circuit for speed
121
122	elif result == Result.FAIL:
123	num_success = 0
124	if self.num_retries:
125	log('\tFAILED first time: Retrying 4 times')
126	for i in range(self.num_retries):
127	log('\tRetry %d of %d', i + 1, self.num_retries)
128	result = self.RunOnce(shell_path, shell_label, func)
129	if result == Result.OK:
130	num_success += 1
131	else:
132	log('\tFAILED')
133
134	if num_success >= 2:
135	return Result.OK, num_success
136	else:
137	return Result.FAIL, num_success
138
139	else:
140	raise AssertionError(result)
141
142	def RunCases(self, cases, case_predicate, shell_pairs, result_table,
143	flaky):
144	for case_num, (desc, func, skip_shells,
145	not_impl_shells) in enumerate(cases):
146	if not case_predicate(case_num, desc):
147	continue
148
149	result_row = [case_num]
150
151	for shell_label, shell_path in shell_pairs:
152	skip_str = ''
153	if shell_label in skip_shells:
154	skip_str = 'SKIP'
155	if shell_label in not_impl_shells:
156	skip_str = 'N-I'
157
158	print()
159	print('%s\t%d\t%s\t%s' %
160	(skip_str, case_num, shell_label, desc))
161	print()
162	sys.stdout.flush() # prevent interleaving
163
164	if shell_label in skip_shells:
165	result_row.append(Result.SKIP)
166	flaky[case_num, shell_label] = -1
167	continue
168
169	# N-I is just like SKIP, but it's displayed differently
170	if shell_label in not_impl_shells:
171	result_row.append(Result.NI)
172	flaky[case_num, shell_label] = -1
173	continue
174
175	result, retries = self.RunCase(shell_path, shell_label, func)
176	flaky[case_num, shell_label] = retries
177
178	result_row.append(result)
179
180	result_row.append(desc)
181	result_table.append(result_row)
182
183
184	def PrintResults(shell_pairs, result_table, flaky, num_retries, f):
185
186	# Note: In retrospect, it would be better if every process writes a "long"
187	# TSV file of results.
188	# And then we concatenate them and write the "wide" summary here.
189
190	if f.isatty():
191	fail_color = ansi.BOLD + ansi.RED
192	ok_color = ansi.BOLD + ansi.GREEN
193	bold = ansi.BOLD
194	reset = ansi.RESET
195	else:
196	fail_color = ''
197	ok_color = ''
198	bold = ''
199	reset = ''
200
201	f.write('\n')
202
203	# TODO: Might want an HTML version too
204	sh_labels = [shell_label for shell_label, _ in shell_pairs]
205
206	f.write(bold)
207	f.write('case\t') # case number
208	for sh_label in sh_labels:
209	f.write(sh_label)
210	f.write('\t')
211	f.write(reset)
212	f.write('\n')
213
214	num_failures = 0
215
216	for row in result_table:
217
218	case_num = row[0]
219	desc = row[-1]
220
221	f.write('%d\t' % case_num)
222
223	num_shells = len(row) - 2
224	extra_row = [''] * num_shells
225
226	for j, cell in enumerate(row[1:-1]):
227	shell_label = sh_labels[j]
228
229	num_success = flaky[case_num, shell_label]
230	if num_success != -1:
231	# the first of 5 failed
232	extra_row[j] = '%d/%d ok' % (num_success, num_retries + 1)
233
234	if cell == Result.SKIP:
235	f.write('SKIP\t')
236
237	elif cell == Result.NI:
238	f.write('N-I\t')
239
240	elif cell == Result.FAIL:
241	# Don't count C++ failures right now
242	if shell_label != 'osh-cpp':
243	log('Ignoring osh-cpp failure: %d %s', case_num, desc)
244	num_failures += 1
245	f.write('%sFAIL%s\t' % (fail_color, reset))
246
247	elif cell == Result.OK:
248	f.write('%sok%s\t' % (ok_color, reset))
249
250	else:
251	raise AssertionError(cell)
252
253	f.write(desc)
254	f.write('\n')
255
256	if any(extra_row):
257	for cell in extra_row:
258	f.write('\t%s' % cell)
259	f.write('\n')
260
261	return num_failures
262
263
264	def TestStop(exe):
265	if 0:
266	p = pexpect.spawn('/bin/dash', encoding='utf-8', timeout=2.0)
267
268	# Show output
269	p.logfile = sys.stdout
270	#p.setecho(True)
271
272	p.expect(r'.*\$')
273	p.sendline('sleep 2')
274
275	import time
276	time.sleep(0.1)
277
278	# Ctrl-C works for the child here
279	p.sendcontrol('c')
280	p.sendline('echo status=$?')
281	p.expect('status=130')
282
283	p.close()
284
285	return
286
287	# Note: pty.fork() calls os.setsid()
288	# How does that affect signaling and the process group?
289
290	p = pexpect.spawn(exe, encoding='utf-8', timeout=2.0)
291
292	# Show output
293	p.logfile = sys.stdout
294	#p.setecho(True)
295
296	p.sendline('sleep 2')
297	p.expect('in child')
298
299	import time
300	time.sleep(0.1)
301
302	log('Harness PID %d', os.getpid())
303
304	#input()
305
306	# Stop it
307
308	if 1:
309	# Main process gets KeyboardInterrupt
310	# hm but child process doesn't get interrupted? why not?
311	p.sendcontrol('c')
312	if 0: # does NOT work -- why?
313	p.sendcontrol('z')
314	if 0: # does NOT work
315	stop_process__hack('sleep', sig_num=signal.SIGTSTP)
316	if 0:
317	# WORKS
318	stop_process__hack('sleep', sig_num=signal.SIGSTOP)
319
320	# These will kill the parent, not the sleep child
321	#p.kill(signal.SIGTSTP)
322	#p.kill(signal.SIGSTOP)
323
324	p.expect('wait =>')
325	p.close()
326
327
328	def main(argv):
329	p = optparse.OptionParser('%s [options] TEST_FILE shell...' % sys.argv[0])
330	spec_lib.DefineCommon(p)
331	spec_lib.DefineStateful(p)
332	opts, argv = p.parse_args(argv)
333
334	if len(argv) >= 2 and argv[1] == 'test-stop': # Hack for testing
335	TestStop(argv[2])
336	return
337
338	# List test cases and return
339	if opts.do_list:
340	for i, (desc, _, _, _) in enumerate(CASES):
341	print('%d\t%s' % (i, desc))
342	return
343
344	shells = argv[1:]
345	if not shells:
346	raise RuntimeError('Expected shells to run')
347
348	shell_pairs = spec_lib.MakeShellPairs(shells)
349
350	if opts.range:
351	begin, end = spec_lib.ParseRange(opts.range)
352	case_predicate = spec_lib.RangePredicate(begin, end)
353	elif opts.regex:
354	desc_re = re.compile(opts.regex, re.IGNORECASE)
355	case_predicate = spec_lib.RegexPredicate(desc_re)
356	else:
357	case_predicate = lambda i, case: True
358
359	if 0:
360	print(shell_pairs)
361	print(CASES)
362
363	result_table = [] # each row is a list
364	flaky = {} # (case_num, shell) -> (succeeded, attempted)
365
366	r = TestRunner(opts.num_retries, opts.pexpect_timeout, opts.verbose)
367	r.RunCases(CASES, case_predicate, shell_pairs, result_table, flaky)
368
369	if opts.results_file:
370	results_f = open(opts.results_file, 'w')
371	else:
372	results_f = sys.stdout
373	num_failures = PrintResults(shell_pairs, result_table, flaky,
374	opts.num_retries, results_f)
375
376	results_f.close()
377
378	if opts.oils_failures_allowed != num_failures:
379	log('%s: Expected %d failures, got %d', sys.argv[0],
380	opts.oils_failures_allowed, num_failures)
381	return 1
382
383	return 0
384
385
386	if __name__ == '__main__':
387	try:
388	sys.exit(main(sys.argv))
389	except RuntimeError as e:
390	print('FATAL: %s' % e, file=sys.stderr)
391	sys.exit(1)
392
393	# vim: sw=2