benchmarks/compute.sh

OILS / benchmarks / compute.sh View on Github | oilshell.org

614 lines, 305 significant

1	#!/usr/bin/env bash
2	#
3	# Compare operations on data structures, with little I/O: strings, array,
4	# associative arrays, integers.
5	#
6	# Usage:
7	# benchmarks/compute.sh <function name>
8	#
9	# List of benchmarks:
10	#
11	# - fib: integer, loop, assignment (shells don't have real integers
12	# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
13	# also integer counter
14	# - bubble_sort: indexed array (bash uses a linked list?)
15	# - palindrome: string, slicing, unicode
16	# - parse_help: realistic shell-only string processing, which I didn't write.
17	#
18	# TODO:
19	# - vary problem size, which is different than iters
20	# - bubble sort: array length, to test complexity of array indexing
21	# - palindrome: longer lines, to test complexity of unicode/byte slicing
22	# - word_freq: more unique words, to test complexity of assoc array
23	# - write awk versions of each benchmark (could be contributed)
24	# - assert that stdout is identical
25	# - create data frames and publish results
26	# - leave holes for Python, other shells, etc.
27
28	set -o nounset
29	set -o pipefail
30	set -o errexit
31
32	REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
33	readonly REPO_ROOT
34
35	source benchmarks/common.sh # filter-provenance
36	source test/tsv-lib.sh # tsv2html
37
38	readonly BASE_DIR=_tmp/compute
39
40	# Stabilize 'sort' output across machines (ugh locales!)
41	export LC_ALL=C
42
43	TIMEFORMAT='%U'
44
45	# task_name,iter,args
46	hello-tasks() {
47	local provenance=$1
48
49	# Add 1 field for each of 5 fields.
50	cat $provenance \| filter-provenance python2 bash dash "$OSH_CPP_REGEX" \|
51	while read fields; do
52	echo 'hello _ _' \| xargs -n 3 -- echo "$fields"
53	done
54	}
55
56	# task_name,iter,args
57	fib-tasks() {
58	local provenance=$1
59
60	# Add 1 field for each of 5 fields.
61	cat $provenance \| filter-provenance python2 bash dash "$OSH_CPP_REGEX" \|
62	while read fields; do
63	echo 'fib 200 44' \| xargs -n 3 -- echo "$fields"
64	done
65	}
66
67	word_freq-tasks() {
68	local provenance=$1
69
70	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
71	while read fields; do
72	# BUG: oils-for-unix differs on these two. Looks like it's related to
73	# backslashes!
74	#echo 'word_freq 10 benchmarks/testdata/abuild' \| xargs -n 3 -- echo "$fields"
75	#echo 'word_freq 2 benchmarks/testdata/ltmain.sh' \| xargs -n 3 -- echo "$fields"
76	echo 'word_freq 10 configure' \| xargs -n 3 -- echo "$fields"
77	done
78	}
79
80	assoc_array-tasks() {
81	local provenance=$1
82
83	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
84	while read fields; do
85	for n in 1000 2000 3000; do
86	echo "word_freq 10 $n" \| xargs -n 3 -- echo "$fields"
87	done
88	done
89	}
90
91	bubble_sort-tasks() {
92	# Note: this is quadratic, but bubble sort itself is quadratic!
93	local provenance=$1
94
95	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
96	while read fields; do
97	echo 'bubble_sort int 200' \| xargs -n 3 -- echo "$fields"
98	echo 'bubble_sort bytes 200' \| xargs -n 3 -- echo "$fields"
99	done
100	}
101
102	# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
103	# quadratic.
104	#
105	# See array_reference() in array.c in bash. It searches both back and
106	# forward. Every cell has its index, a value, a forward pointer, and a back
107	# pointer.
108	#
109	# You need pretty high N to see the quadratic behavior though!
110
111	# NOTE: osh is also slower with linear access, but not superlinear!
112
113	array_ref-tasks() {
114	local provenance=$1
115
116	cat $provenance \| filter-provenance bash \|
117	while read fields; do
118	for mode in seq random; do
119	for n in 10000 20000 30000 40000; do
120	echo "array_ref $mode $n" \| xargs -n 3 -- echo "$fields"
121	done
122	done
123	done
124
125	#array_ref $OSH_CC seq 5000
126	#array_ref $OSH_CC seq 10000
127	#array_ref $OSH_CC random 5000
128	#array_ref $OSH_CC random 10000
129	#EOF
130	}
131
132	palindrome-tasks() {
133	local provenance=$1
134
135	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
136	while read fields; do
137	echo 'palindrome unicode _' \| xargs -n 3 -- echo "$fields"
138	echo 'palindrome bytes _' \| xargs -n 3 -- echo "$fields"
139	done
140	}
141
142	parse_help-tasks() {
143	local provenance=$1
144
145	cat $provenance \| filter-provenance bash "$OSH_CPP_REGEX" \|
146	while read fields; do
147	echo 'parse_help ls-short _' \| xargs -n 3 -- echo "$fields"
148	echo 'parse_help ls _' \| xargs -n 3 -- echo "$fields"
149	echo 'parse_help mypy _' \| xargs -n 3 -- echo "$fields"
150	done
151	}
152
153	ext() {
154	local ext
155	case $runtime in
156	(python2)
157	echo 'py'
158	;;
159	(sh \| osh*)
160	echo 'sh'
161	;;
162	esac
163	}
164
165	word_freq-one() {
166	### Run one word_freq task (hash tables)
167
168	local name=${1:-word_freq}
169	local runtime=$2
170
171	local iters=${3:-10}
172	local in=${4:-configure} # input
173
174	$runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in \| sort -n
175	}
176
177	assoc_array-one() {
178	### Run word_freq with seq
179
180	local name=${1:-word_freq}
181	local runtime=$2
182
183	local iters=${3:-10}
184	local n=${4:-10}
185
186	# shuf so we don't get the bash optimization
187	seq $n \| shuf \|
188	$runtime benchmarks/compute/word_freq.$(ext $runtime) $iters \| sort -n
189	}
190
191	bubble_sort-one() {
192	### Run one bubble_sort task (arrays)
193
194	local name=${1:-bubble_sort}
195	local runtime=$2
196	local mode=${3:-int}
197	local n=${4:-100}
198
199	$runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
200	< $BASE_DIR/tmp/$name/testdata-$n.txt
201	}
202
203	# OSH is like 10x faster here!
204	array_ref-one() {
205	### Run one array_ref task (arrays)
206
207	local name=${1:-bubble_sort}
208	local runtime=$2
209	local mode=${3:-seq}
210	local n=${4:-100}
211
212	seq $n \| shuf \| $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
213	}
214
215	palindrome-one() {
216	### Run one palindrome task (strings)
217
218	local name=${1:-palindrome}
219	local runtime=$2
220	local mode=${3:-unicode}
221
222	$runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
223	< $BASE_DIR/tmp/$name/testdata.txt
224	}
225
226	parse_help-one() {
227	### Run one palindrome task (strings, real code)
228
229	local name=${1:-parse_help}
230	local runtime=$2
231	local workload=${3:-}
232
233	$runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
234	< benchmarks/parse-help/$workload.txt
235	}
236
237	#
238	# Helpers
239	#
240
241	hello-all() { task-all hello "$@"; }
242	fib-all() { task-all fib "$@"; }
243	word_freq-all() { task-all word_freq "$@"; }
244	assoc_array-all() { task-all assoc_array "$@"; }
245
246	# TODO: Fix the OSH comparison operator! It gives the wrong answer and
247	# completes quickly.
248	bubble_sort-all() { task-all bubble_sort "$@"; }
249
250	# Array that is not quadratic
251	array_ref-all() { task-all array_ref "$@"; }
252
253	# Hm osh is a little slower here
254	palindrome-all() { task-all palindrome "$@"; }
255
256	parse_help-all() { task-all parse_help "$@"; }
257
258	task-all() {
259	local task_name=$1
260	local provenance=$2
261	local host_job_id=$3
262	local out_dir=$4 # put files to save in benchmarks-data repo here
263
264	local tmp_dir=$BASE_DIR/tmp/$task_name
265
266	local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
267	rm -f $times_tsv
268
269	mkdir -p $tmp_dir $out_dir/$task_name
270
271	# header
272	tsv-row \
273	status elapsed_secs user_secs sys_secs max_rss_KiB \
274	stdout_md5sum \
275	host_name host_hash \
276	runtime_name runtime_hash \
277	task_name arg1 arg2 stdout_filename > $times_tsv
278
279	local task_id=0
280
281	${task_name}-tasks $provenance > $tmp_dir/tasks.txt
282
283	cat $tmp_dir/tasks.txt \|
284	while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
285	local file
286	case $runtime in
287	(python2)
288	file='py'
289	;;
290	(sh \| osh*)
291	file=$(basename $runtime)
292	;;
293	esac
294
295	#log "runtime=$runtime args=$args"
296
297	local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
298
299	# Measurement BUG! This makes dash have the memory usage of bash!
300	# It's better to get argv into the shell.
301
302	local -a cmd
303	case $task_name in
304	(hello\|fib)
305	# Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
306	# then dash, because bash uses more memory.
307	cmd=($runtime benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2")
308	;;
309	(*)
310	cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
311	;;
312	esac
313
314	# join args into a single field
315	time-tsv -o $times_tsv --append \
316	--stdout $tmp_dir/$stdout_filename \
317	--rusage \
318	--field "$host" --field "$host_hash" \
319	--field $runtime --field $runtime_hash \
320	--field "$task_name" --field "$arg1" --field "$arg2" \
321	--field "$stdout_filename" -- \
322	"${cmd[@]}"
323
324	task_id=$((task_id + 1))
325	done
326
327	#wc -l _tmp/compute/word_freq/*
328	maybe-tree $tmp_dir
329	cat $times_tsv
330	}
331
332	#
333	# Testdata
334	#
335
336	bubble_sort-testdata() {
337	local out=$BASE_DIR/tmp/bubble_sort
338	mkdir -p $out
339
340	# TODO: Make these deterministic for more stable benchmarks?
341	for n in 100 200 300 400; do
342	seq $n \| shuf > $out/testdata-$n.txt
343	done
344
345	wc -l $out/testdata-*.txt
346	}
347
348	palindrome-testdata() {
349	local out=$BASE_DIR/tmp/palindrome
350	mkdir -p $out
351
352	# TODO: Use iters?
353
354	for i in $(seq 500); do
355	cat <<EOF
356	foo
357	a
358	tat
359	cat
360
361	noon
362	amanaplanacanalpanama
363
364	μ
365	-μ-
366	EOF
367
368	done > $out/testdata.txt
369
370	wc -l $out/testdata.txt
371	}
372
373	measure() {
374	local provenance=$1
375	local host_job_id=$2
376	local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
377
378	mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
379
380	# set -x
381	hello-all $provenance $host_job_id $out_dir
382	fib-all $provenance $host_job_id $out_dir
383
384	# TODO: doesn't work because we would need duplicate logic in stage1
385	#if test -n "${QUICKLY:-}"; then
386	# return
387	#fi
388
389	word_freq-all $provenance $host_job_id $out_dir
390	parse_help-all $provenance $host_job_id $out_dir
391
392	bubble_sort-testdata
393	palindrome-testdata
394
395	bubble_sort-all $provenance $host_job_id $out_dir
396
397	# INCORRECT, but still run it
398	palindrome-all $provenance $host_job_id $out_dir
399
400	# array_ref takes too long to show quadratic behavior, and that's only
401	# necessary on 1 machine. I think I will make a separate blog post,
402	# if anything.
403
404	maybe-tree $out_dir
405	}
406
407	soil-run() {
408	### Run it on just this machine, and make a report
409
410	rm -r -f $BASE_DIR
411	mkdir -p $BASE_DIR
412
413	# Test the one that's IN TREE, NOT in ../benchmark-data
414	local -a osh_bin=( $OSH_CPP_NINJA_BUILD _bin/cxx-opt+bumpleak/osh)
415	ninja "${osh_bin[@]}"
416
417	local single_machine='no-host'
418
419	local job_id
420	job_id=$(benchmarks/id.sh print-job-id)
421
422	# Only measure what's in the Docker image
423	# - The Soil 'benchmarks' job uses the 'cpp' Docker image, which doesn't have
424	# layer-cpython, ../oil_DEPS/cpython-full
425	# - It also doesn't have mksh or zsh
426
427	benchmarks/id.sh shell-provenance-2 \
428	$single_machine $job_id _tmp \
429	bash dash python2 "${osh_bin[@]}"
430
431	local provenance=_tmp/provenance.txt
432	local host_job_id="$single_machine.$job_id"
433
434	measure $provenance $host_job_id
435
436	# Make it run on one machine
437	stage1 '' $single_machine
438
439	benchmarks/report.sh stage2 $BASE_DIR
440	benchmarks/report.sh stage3 $BASE_DIR
441	}
442
443
444	test-report() {
445	# Make it run on one machine
446	stage1 '' no-host
447
448	benchmarks/report.sh stage2 $BASE_DIR
449	benchmarks/report.sh stage3 $BASE_DIR
450	}
451
452	stage1() {
453	local raw_dir=${1:-$BASE_DIR/raw}
454
455	# This report works even if we only have one machine
456	local single_machine=${2:-}
457
458	local out_dir=$BASE_DIR/stage1
459	mkdir -p $out_dir
460
461	local times_tsv=$out_dir/times.tsv
462
463	local -a raw=()
464
465	# TODO: We should respect QUICKLY=1
466	for metric in hello fib word_freq parse_help bubble_sort palindrome; do
467	local dir=$raw_dir/$metric
468
469	if test -n "$single_machine"; then
470	local -a a=($dir/$single_machine.*.times.tsv)
471	raw+=(${a[-1]})
472	else
473	# Globs are in lexicographical order, which works for our dates.
474	local -a a=($dir/$MACHINE1.*.times.tsv)
475	local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
476
477	# take the latest file
478	raw+=(${a[-1]} ${b[-1]})
479	fi
480
481	done
482	csv-concat ${raw[@]} > $times_tsv
483	wc -l $times_tsv
484	}
485
486	print-report() {
487	local in_dir=$1
488
489	benchmark-html-head 'OSH Compute Performance'
490
491	cat <<EOF
492	<body class="width60">
493	<p id="home-link">
494	<a href="/">oilshell.org</a>
495	</p>
496	EOF
497	cmark <<EOF
498
499	## OSH Compute Performance
500
501	Running time and memory usage of programs that test data structures (as opposed
502	to I/O).
503
504	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
505
506	Source code: [oil/benchmarks/compute](https://github.com/oilshell/oil/tree/master/benchmarks/compute)
507
508	EOF
509
510	cmark <<EOF
511	### hello (minimal startup)
512
513	EOF
514
515	tsv2html $in_dir/hello.tsv
516
517	cmark <<EOF
518	### fibonacci (integers)
519
520	- arg1: number of repetitions
521	- arg2: the N in fib(N)
522	EOF
523
524	tsv2html $in_dir/fib.tsv
525
526	cmark <<EOF
527	### word_freq (associative arrays / hash tables)
528
529	- arg1: number of repetitions
530	- arg2: the file (varies size of hash table)
531	EOF
532
533	tsv2html $in_dir/word_freq.tsv
534
535	cmark <<EOF
536	### parse_help (strings, real code)
537
538	- arg1: file to parse
539	EOF
540
541	tsv2html $in_dir/parse_help.tsv
542
543	cmark <<EOF
544	### bubble_sort (array of integers, arrays of strings)
545
546	- arg1: type of array
547	- arg2: length of array
548	EOF
549
550	tsv2html $in_dir/bubble_sort.tsv
551
552	# Comment out until checksum is fixed
553
554	if false; then
555	cmark <<EOF
556	### palindrome (byte strings, unicode strings)
557
558	- arg1: type of string
559	- arg2: TODO: length of string
560	EOF
561
562	tsv2html $in_dir/palindrome.tsv
563
564	fi
565
566	cmark <<EOF
567	### Interpreter and Host Details
568	EOF
569
570	tsv2html $in_dir/shells.tsv
571	tsv2html $in_dir/hosts.tsv
572
573	cmark <<EOF
574	### Details
575	EOF
576
577	tsv2html $in_dir/details.tsv
578
579	cmark <<EOF
580	### Stdout Files
581	EOF
582
583	tsv2html $in_dir/stdout_files.tsv
584
585
586	cat <<EOF
587	</body>
588	</html>
589	EOF
590	}
591
592
593	control-flow() {
594	local osh=_bin/cxx-opt/osh
595	#set -x
596
597	ninja $osh
598
599	# do_neither: dash 296 ms, bash 922, osh 993. Not bad
600	#
601
602	for func in do_neither do_continue do_break; do
603	echo "=== $func"
604	echo
605	for sh in dash bash $osh; do
606	echo "--- $sh"
607	# TIMEFORMAT above
608	time $sh benchmarks/compute/control_flow.sh $func 500
609	echo
610	done
611	done
612	}
613
614	"$@"