test/syscall.sh

OILS / test / syscall.sh View on Github | oilshell.org

411 lines, 170 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the number of syscalls that shells use.
4	#
5	# Usage:
6	# test/syscall.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	source build/dev-shell.sh
13
14	readonly -a SHELLS=(dash bash mksh zsh ash yash osh)
15
16	readonly BASE_DIR='_tmp/syscall' # What we'll publish
17	readonly RAW_DIR='_tmp/syscall-raw' # Raw data
18
19	# Run it against the dev version of OSH
20	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
21
22	count-procs() {
23	local out_prefix=$1
24	local sh=$2
25	shift 2
26
27	case $sh in
28	# avoid the extra processes that bin/osh starts!
29	# relies on word splitting
30	#(X) # to compare against osh 0.8.pre3 installed
31	(osh)
32	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oil.py osh"
33	;;
34	esac
35
36	strace -ff -o $out_prefix -- $sh "$@"
37	}
38
39	run-case() {
40	### Run a test case with many shells
41
42	local num=$1
43	local code_str=$2
44
45	for sh in "${SHELLS[@]}"; do
46	local out_prefix=$RAW_DIR/$num-$sh
47	echo "--- $sh"
48	count-procs $out_prefix $sh -c "$code_str"
49	done
50	}
51
52	run-case-file() {
53	### Like the above, but the shell reads from a file
54
55	local num=$1
56	local code_str=$2
57
58	echo -n "$code_str" > _tmp/$num.sh
59
60	for sh in "${SHELLS[@]}"; do
61	local out_prefix=$RAW_DIR/$num-$sh
62	echo "--- $sh"
63	count-procs $out_prefix $sh _tmp/$num.sh
64	done
65	}
66
67	run-case-stdin() {
68	### Like the above, but read from a pipe
69
70	local num=$1
71	local code_str=$2
72
73	for sh in "${SHELLS[@]}"; do
74	local out_prefix=$RAW_DIR/$num-$sh
75	echo "--- $sh"
76	echo -n "$code_str" \| count-procs $out_prefix $sh
77	done
78	}
79
80
81	print-cases() {
82	# format: number, whitespace, then an arbitrary code string
83	egrep -v '^[[:space:]]*(#\|$)' <<EOF
84
85	# builtin
86	echo hi
87
88	# external command
89	date
90
91	# Oil sentence
92	date ;
93
94	# external then builtin
95	date; echo hi
96
97	# builtin then external
98	echo hi; date
99
100	# two external commands
101	date; date
102
103	# does a brace group make a difference?
104	{ date; date; }
105
106	# singleton brace group
107	date; { date; }
108
109	# does it behave differently if sourced?
110	. _tmp/sourced.sh
111
112	# dash and zsh somehow optimize this to 1
113	(echo hi)
114
115	(date)
116
117	( ( date ) )
118
119	( ( date ) ); echo hi
120
121	echo hi; (date)
122
123	# Sentence in Oil
124	(date;) > /tmp/out.txt
125
126	(date; echo hi)
127
128	# command sub
129	echo \$(date)
130
131	# command sub with builtin
132	echo \$(echo hi)
133
134	# command sub with useless subshell (some scripts use this)
135	echo \$( ( date ) )
136
137	# command sub with other subshell
138	echo \$( ( date ); echo hi )
139
140	# 2 processes for all shells
141	( echo hi ); echo done
142
143	# simple pipeline
144	date \| wc -l
145
146	# every shell does 3
147	echo a \| wc -l
148
149	# every shell does 3
150	command echo a \| wc -l
151
152	# bash does 4 here!
153	command date \| wc -l
154
155	# 3 processes for all?
156	# osh gives FIVE??? But others give 3. That's bad.
157	( date ) \| wc -l
158
159	# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
160	date \| read x
161
162	# osh has 3, but should be 2 like zsh?
163	# hm how can zsh do 2 here? That seems impossible.
164	# oh it's lastpipe turns the shell process into wc -l ??? wow.
165	{ echo a; echo b; } \| wc -l
166
167	# zsh behaves normally here. That is a crazy optimization. I guess it's
168	# nice when you have SH -c 'mypipeline \| wc-l'
169	{ echo a; echo b; } \| wc -l; echo done
170
171	# this is all over the map too. 3 4 4 2.
172	{ echo a; date; } \| wc -l
173
174	# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
175	( echo a; echo b ) \| wc -l
176
177	# osh does 5 when others do 3.
178	( echo a; echo b ) \| ( wc -l )
179	EOF
180
181	# Discarded because they're identical
182	# pipeline with redirect last
183	#date \| wc -l > /tmp/out.txt
184
185	# pipeline with redirect first
186	#date 2>&1 \| wc -l
187
188	}
189
190	number-cases() {
191	# Right justified, leading zeros, with 2
192	# Wish this was %02d
193	print-cases \| nl --number-format rz --number-width 2
194	}
195
196	by-input() {
197	### Run cases that vary by input reader
198	if ! strace true; then
199	echo "Aborting because we couldn't run strace"
200	return
201	fi
202
203	local suite='by-input'
204
205	rm -r -f -v $RAW_DIR
206	mkdir -p $RAW_DIR
207
208	# Wow this newline makes a difference in shells!
209
210	# This means that Id.Eof_Real is different than Id.Op_Newline?
211	# Should we create a Sentence for it too then?
212	# That is possible in _ParseCommandLine
213
214	zero=$'date; date'
215	one=$'date; date\n'
216	two=$'date; date\n#comment\n'
217	comment=$'# comment\ndate;date'
218	newline=$'date\n\ndate'
219	newline2=$'date\n\ndate\n#comment'
220
221	# zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
222	run-case 30 "$zero"
223	run-case 31 "$one"
224	run-case 32 "$two"
225	run-case 33 "$comment"
226	run-case 34 "$newline"
227	run-case 35 "$newline2"
228
229	run-case-file 40 "$zero"
230	run-case-file 41 "$one"
231	run-case-file 42 "$two"
232	run-case-file 43 "$comment"
233	run-case-file 44 "$newline2"
234	run-case-file 45 "$newline2"
235
236	# yash is the only shell to optimize the stdin case at all!
237	# it looks for a lack of trailing newline.
238	run-case-stdin 50 "$zero"
239	run-case-stdin 51 "$one"
240	run-case-stdin 52 "$two"
241	run-case-stdin 53 "$comment"
242	run-case-stdin 54 "$newline2"
243	run-case-stdin 55 "$newline2"
244
245	# This is identical for all shells
246	#run-case 32 $'date; date\n#comment\n'
247
248	cat >$BASE_DIR/${suite}-cases.txt <<EOF
249	30 -c: zero lines
250	31 -c: one line
251	32 -c: one line and comment
252	33 -c: comment first
253	34 -c: newline
254	35 -c: newline2
255	40 file: zero lines
256	41 file: one line
257	42 file: one line and comment
258	43 file: comment first
259	44 file: newline
260	45 file: newline2
261	50 stdin: zero lines
262	51 stdin: one line
263	52 stdin: one line and comment
264	53 stdin: comment first
265	54 stdin: newline
266	55 stdin: newline2
267	EOF
268
269	count-lines $suite
270	summarize $suite 3 0
271
272	}
273
274	# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
275	weird-command-sub() {
276	shopt -s nullglob
277	rm -r -f -v $RAW_DIR/*
278
279	local tmp=_tmp/cs
280	echo FOO > $tmp
281	run-case 60 "echo $(< $tmp)"
282	run-case 61 "echo $(< $tmp; echo hi)"
283
284	local suite=weird-command-sub
285
286	cat >$BASE_DIR/${suite}-cases.txt <<EOF
287	60 \$(< file)
288	61 \$(< file; echo hi)
289	EOF
290
291	count-lines $suite
292	summarize $suite 0 0
293	}
294
295	readonly MAX_CASES=100
296	#readonly MAX_CASES=3
297
298	by-code() {
299	### Run cases that vary by code snippet
300
301	if ! strace true; then
302	echo "Aborting because we couldn't run strace"
303	return
304	fi
305
306	local max_cases=${1:-$MAX_CASES}
307
308	rm -r -f -v $RAW_DIR
309	mkdir -p $RAW_DIR $BASE_DIR
310
311	write-sourced
312
313	local suite='by-code'
314	local cases=$BASE_DIR/${suite}-cases.txt
315
316	number-cases > $cases
317	head -n $max_cases $cases \| while read -r num code_str; do
318	echo
319	echo '==='
320	echo "$num $code_str"
321	echo
322
323	run-case $num "$code_str"
324	done
325
326	# omit total line
327	count-lines $suite
328	summarize $suite 3 0
329	}
330
331	syscall-py() {
332	PYTHONPATH=. test/syscall.py "$@"
333	}
334
335	write-sourced() {
336	echo -n 'date; date' > _tmp/sourced.sh
337	}
338
339	count-lines() {
340	local suite=${1:-by-code}
341	( cd $RAW_DIR && wc -l * ) \| head -n -1 > $BASE_DIR/${suite}-counts.txt
342	}
343
344	summarize() {
345	local suite=${1:-by-code}
346	local not_minimum=${2:-0}
347	local more_than_bash=${3:-0}
348
349	local out=$BASE_DIR/${suite}.txt
350	set +o errexit
351	cat $BASE_DIR/${suite}-counts.txt \
352	\| syscall-py --not-minimum $not_minimum --more-than-bash $more_than_bash \
353	$BASE_DIR/${suite}-cases.txt \
354	> $out
355	local status=$?
356	set -o errexit
357
358	echo "Wrote $out"
359	if test $status -eq 0; then
360	echo 'OK'
361	else
362	echo 'FAIL'
363	fi
364	}
365
366	run-for-release() {
367	### Run the two syscall suites
368
369	# Invoked as one of the "other" tests. Soil runs by-code and by-input
370	# separately.
371
372	# Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
373	by-code
374	by-input
375
376	echo 'OK'
377	}
378
379	#
380	# Real World
381	#
382	# $ ls\|grep dash\|wc -l
383	# 6098
384	# $ ls\|grep bash\|wc -l
385	# 6102
386	# $ ls\|grep osh\|wc -l
387	# 6098
388	#
389	# So Oil is already at dash level for CPython's configure, and bash isn't
390	# far off. So autoconf-generated scripts probably already use constructs
391	# that are already "optimal" in most shells.
392
393	readonly PY27_DIR=$PWD/Python-2.7.13
394
395	cpython-configure() {
396	local raw_dir=$PWD/$RAW_DIR/real
397	mkdir -p $raw_dir
398
399	pushd $PY27_DIR
400	#for sh in "${SHELLS[@]}"; do
401	for sh in bash dash osh; do
402	local out_prefix=$raw_dir/cpython-$sh
403	echo "--- $sh"
404
405	# TODO: Use a different dir
406	count-procs $out_prefix $sh -c './configure'
407	done
408	popd
409	}
410
411	"$@"