OILS / benchmarks / autoconf.sh View on Github | oilshell.org

599 lines, 242 significant
1#!/usr/bin/env bash
2#
3# Why is CPython configure slower under OSH?
4#
5# Usage:
6# benchmarks/autoconf.sh <function name>
7#
8# Examples:
9# $0 patch-pyconf # times builtin
10# $0 measure-times # time-tsv with gc stats
11# $0 report-times # times builtin
12#
13# $0 measure-syscalls
14# $0 report-syscalls
15# $0 report-processes
16# $0 report-external
17#
18# Simpler:
19# $0 measure-rusage # time-tsv
20
21set -o nounset
22set -o pipefail
23set -o errexit
24
25REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
26
27source benchmarks/cachegrind.sh # with-cachegrind
28source benchmarks/callgrind.sh # with-cachegrind
29source test/tsv-lib.sh # $TAB
30
31readonly BASE_DIR_RELATIVE=_tmp/autoconf
32readonly BASE_DIR=$REPO_ROOT/$BASE_DIR_RELATIVE
33readonly PY_CONF=$REPO_ROOT/Python-2.7.13/configure
34
35#
36# Trying to measure allocation/GC overhead
37#
38# This DOES NOT HELP because bumpleak/bumproot are **slower** on bigger heaps.
39# There's less cache locality!
40#
41
42cpython-configure-tasks() {
43 local -a variants=( opt+bumpleak opt+bumproot opt )
44 for v in ${variants[@]}; do
45 echo "${v}${TAB}_bin/cxx-$v/osh"
46 done
47}
48
49cpython-setup() {
50 cpython-configure-tasks | while read -r _ osh; do
51 ninja $osh
52 done
53}
54
55measure-alloc-overhead() {
56 local base_dir=$BASE_DIR/alloc-overhead
57 rm -r -f -v $base_dir
58
59 cpython-configure-tasks | while read -r variant osh; do
60 osh=$REPO_ROOT/$osh
61
62 local task_dir=$base_dir/$variant
63
64 mkdir -p $task_dir
65 pushd $task_dir > /dev/null
66
67 local -a flags=(
68 --output "$base_dir/$variant.tsv"
69 --rusage
70 )
71
72 local -a time_argv
73
74 time_argv=(
75 time-tsv --print-header
76 "${flags[@]}"
77 --field variant
78 )
79 "${time_argv[@]}"
80
81 time_argv=(
82 time-tsv --append
83 "${flags[@]}"
84 --field "$variant"
85 -- $osh $PY_CONF
86 )
87
88 #echo "${time_argv[@]}"
89 "${time_argv[@]}"
90
91 popd > /dev/null
92
93 done
94}
95
96#
97# Compare bash/dash/osh locally
98#
99
100measure-rusage() {
101 local base_dir=$BASE_DIR/rusage
102 rm -r -f -v $base_dir
103
104 shell-tasks | while read -r sh_label sh_path; do
105
106 local task_dir=$base_dir/$sh_label
107
108 mkdir -p $task_dir
109 pushd $task_dir > /dev/null
110
111 local -a flags=(
112 --output "$base_dir/$sh_label.tsv"
113 --rusage
114 )
115
116 local -a time_argv
117
118 time_argv=(
119 time-tsv --print-header
120 "${flags[@]}"
121 --field sh_label
122 )
123 "${time_argv[@]}"
124
125 time_argv=(
126 time-tsv --append
127 "${flags[@]}"
128 --field "$sh_label"
129 -- $sh_path $PY_CONF
130 )
131
132 #echo "${time_argv[@]}"
133 "${time_argv[@]}"
134
135 popd > /dev/null
136
137 done
138}
139
140#
141# Now try strace
142#
143
144shell-tasks() {
145 echo "bash${TAB}bash"
146 echo "dash${TAB}dash"
147 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/osh"
148}
149
150measure-syscalls() {
151 local osh=_bin/cxx-opt/osh
152 #local osh=_bin/cxx-dbg/osh
153
154 ninja $osh
155
156 local base_dir=$BASE_DIR/syscalls
157
158 rm -r -f -v $base_dir
159
160 shell-tasks | while read -r sh_label sh_path; do
161 local dir=$base_dir/$sh_label
162 mkdir -p $dir
163
164 local counts_dir=$base_dir/$sh_label
165 mkdir -p $counts_dir
166
167 pushd $dir
168 #strace -o $counts -c $sh_path $PY_CONF
169 # See how many external processes are started?
170 #strace -o $counts -ff -e execve $sh_path $PY_CONF
171 strace -o $counts_dir/syscalls -ff $sh_path $PY_CONF
172 popd
173 done
174}
175
176# --- _tmp/autoconf/bash
177# 6047
178# 4621
179# --- _tmp/autoconf/dash
180# 6088
181# 4627
182# --- _tmp/autoconf/osh
183# 5691
184# 4631
185#
186# Woah we start fewer processes! But are not faster?
187
188grep-exec() {
189 # --max-count 1 - only report one execve per process
190 egrep --no-filename --max-count 1 -o 'execve\("[^"]+' "$@"
191}
192
193# andy@hoover:~/git/oilshell/oil$ benchmarks/autoconf.sh report-syscalls
194# --- _tmp/autoconf/syscalls/bash
195# 2592 _tmp/autoconf/syscalls/bash/syscalls.903220
196# 2608 _tmp/autoconf/syscalls/bash/syscalls.898727
197# 2632 _tmp/autoconf/syscalls/bash/syscalls.898387
198# 2679 _tmp/autoconf/syscalls/bash/syscalls.898292
199# 2853 _tmp/autoconf/syscalls/bash/syscalls.898927
200# 2873 _tmp/autoconf/syscalls/bash/syscalls.898334
201# 2920 _tmp/autoconf/syscalls/bash/syscalls.898895
202# 3204 _tmp/autoconf/syscalls/bash/syscalls.898664
203# 112549 _tmp/autoconf/syscalls/bash/syscalls.897471
204# 1360223 total
205#
206# --- _tmp/autoconf/syscalls/dash
207# 2592 _tmp/autoconf/syscalls/dash/syscalls.909344
208# 2607 _tmp/autoconf/syscalls/dash/syscalls.904921
209# 2630 _tmp/autoconf/syscalls/dash/syscalls.904581
210# 2683 _tmp/autoconf/syscalls/dash/syscalls.904486
211# 2851 _tmp/autoconf/syscalls/dash/syscalls.905109
212# 2873 _tmp/autoconf/syscalls/dash/syscalls.904528
213# 2920 _tmp/autoconf/syscalls/dash/syscalls.905088
214# 3204 _tmp/autoconf/syscalls/dash/syscalls.904858
215# 112922 _tmp/autoconf/syscalls/dash/syscalls.903626
216# 1372118 total
217#
218# --- _tmp/autoconf/syscalls/osh
219# 2592 _tmp/autoconf/syscalls/osh/syscalls.915226
220# 2607 _tmp/autoconf/syscalls/osh/syscalls.910993
221# 2630 _tmp/autoconf/syscalls/osh/syscalls.910647
222# 2679 _tmp/autoconf/syscalls/osh/syscalls.910561
223# 2851 _tmp/autoconf/syscalls/osh/syscalls.911162
224# 2873 _tmp/autoconf/syscalls/osh/syscalls.910599
225# 2920 _tmp/autoconf/syscalls/osh/syscalls.911143
226# 3204 _tmp/autoconf/syscalls/osh/syscalls.910936
227# 72921 _tmp/autoconf/syscalls/osh/syscalls.909769
228# 1211074 total
229
230report-processes() {
231 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
232 echo "--- $sh_dir"
233 ls $sh_dir/* | wc -l
234 grep-exec $sh_dir/syscalls.* | wc -l
235 echo
236
237 done
238}
239
240report-external() {
241 local n=${1:-5}
242
243 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
244 echo "--- $sh_dir"
245
246 grep-exec $sh_dir/syscalls.* | sort | uniq -c | sort -n | tail -n $n
247 echo
248 done
249}
250
251report-syscalls() {
252 # Hm this is instructive, the shell itself makes the most syscalls
253 # And fewer than other shells?
254
255 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
256 echo "--- $sh_dir"
257 wc -l $sh_dir/syscalls.* | sort -n | tail
258 echo
259 done
260}
261
262#
263# Cachegrind
264#
265
266measure-valgrind() {
267 local tool=$1
268
269 # opt seems to give OK results, but I thought dbg was more accurate
270 #local osh=_bin/cxx-opt/osh
271 local osh=_bin/cxx-dbg/osh
272
273 ninja $osh
274
275 local osh=$REPO_ROOT/$osh
276
277 local base_dir=$REPO_ROOT/_tmp/$tool
278
279 local dir=$base_dir/cpython-configure
280 rm -r -f -v $dir
281
282 local out_file=$base_dir/cpython-configure.txt
283
284 mkdir -v -p $dir
285
286 pushd $dir
287 $tool $out_file $osh $PY_CONF
288 popd
289}
290
291measure-cachegrind() {
292 measure-valgrind with-cachegrind
293}
294
295measure-callgrind() {
296 # This takes ~5 minutes with opt binary, ~6:43 with dbg
297 # vs ~15 seconds uninstrumented
298 time measure-valgrind with-callgrind
299}
300
301# Note:
302# benchmarks/osh-runtime.sh compares our release, which does not have #ifdef
303# GC_TIMING, so we don't know total GC time.
304
305# TODO:
306#
307# - Run locally, reproduce GC_TIMING - this is not in the release build
308# - it seems to say only 143 ms total GC time, but we're seeing 1.5+ seconds
309# slowdown on Cpython configure vs. bash
310# - I want a local run that automates it, and returns PERCENTAGES for elapsed
311# time, sys time, user time
312# - We also might not want to amortize free() inside Allocate()
313# - #ifdef LAZY_FREE I think! That might show a big slowdown with free
314
315patch-pyconf() {
316 #sed -i $'s/ac_compile=\'$CC/ac_compile=\'times; $CC/g' $PY_CONF
317
318 # temporary
319 echo 'times > $SH_BENCHMARK_TIMES' >> $PY_CONF
320}
321
322measure-times() {
323 local osh=_bin/cxx-opt/osh
324 ninja $osh
325
326 local base_dir=$BASE_DIR/times
327 rm -r -f -v $base_dir
328
329 local trace_dir=$base_dir/oils-trace
330 mkdir -p $trace_dir
331
332 shell-tasks | while read -r sh_label sh_path; do
333 #case $sh_label in bash|dash) continue ;; esac
334
335 local dir=$base_dir/$sh_label
336 mkdir -p $dir
337
338 pushd $dir
339
340 local -a flags=(
341 --output "$base_dir/$sh_label.tsv"
342 --rusage
343 )
344
345 local -a time_argv
346
347 time_argv=(
348 time-tsv --print-header
349 "${flags[@]}"
350 --field sh_label
351 )
352 "${time_argv[@]}"
353
354 time_argv=(
355 time-tsv --append
356 "${flags[@]}"
357 --field "$sh_label"
358 -- $sh_path $PY_CONF
359 )
360
361 #echo "${time_argv[@]}"
362
363 # 1269 argv0.json files created
364 # we can miss some via NOLASTFORK optimization
365 #OILS_TRACE_DIR=$trace_dir \
366
367 _OILS_GC_VERBOSE=1 OILS_GC_STATS_FD=99 \
368 SH_BENCHMARK_TIMES=$base_dir/$sh_label.times.txt \
369 "${time_argv[@]}" \
370 99>$base_dir/$sh_label.gc-stats.txt
371
372 popd
373 done
374}
375
376inner-long-tsv() {
377 python2 -c '
378import os, re, sys
379
380def PrintRow(row):
381 print("\t".join(row))
382
383PrintRow(["shell", "who", "what", "seconds"])
384
385for path in sys.argv[1:]:
386 filename = os.path.basename(path)
387 shell = filename.split(".")[0]
388
389 f = open(path)
390 s = f.read()
391
392 secs = re.findall("0m([0-9.]+)s", s)
393 assert len(secs) == 4, secs
394
395 PrintRow([shell, "self", "user", secs[0]])
396 PrintRow([shell, "self", "sys", secs[1]])
397 PrintRow([shell, "child", "user", secs[2]])
398 PrintRow([shell, "child", "sys", secs[3]])
399
400 # Non-normalized, but OK
401 total_secs = sum(float(s) for s in secs)
402 PrintRow([shell, "both", "both", str(total_secs)])
403
404 ' $BASE_DIR/times/*.times.txt
405}
406
407compare-dim() {
408 # 8% more child system time
409 local who=${1:-child}
410 local what=${2:-user}
411
412 echo "=== $who $what ==="
413
414 # Annoying
415 # https://www.math.utah.edu/docs/info/gawk_8.html
416 # "If, for some reason, you need to force a number to be converted to a
417 # string, concatenate the empty string, "", with that number. To force a
418 # string to be converted to a number, add zero to that string."
419
420 cat $BASE_DIR/times-long.tsv | awk -v "who=$who" -v "what=$what" '
421 BEGIN {
422 TAB = "\t"
423
424 i = 0
425
426 printf "%s\t%s\t%s\t%s\n", "shell", "secs", "ratio", "diff secs"
427 }
428 $2 == who && $3 == what {
429 if (i == 0) {
430 first_secs = $4 + 0
431 }
432 i++
433
434 secs = $4 + 0
435 ratio = secs / first_secs
436 diff = secs - first_secs
437
438 # Need commas for OFMT to work correctly?
439 printf "%s\t%5.3f\t%5.3f\t%5.3f\n", $1, secs, ratio, diff
440 }
441 '
442
443 echo
444}
445
446compare-times() {
447 log "INNER"
448 log ''
449
450 compare-dim self user
451
452 compare-dim self sys
453
454 compare-dim child user
455
456 compare-dim child sys
457
458 compare-dim both both
459
460 # outer
461 log "OUTER"
462 log ''
463
464 compare-dim both elapsed
465
466 # These kinda match
467 return
468 compare-dim both user
469 compare-dim both sys
470}
471
472outer-long-tsv() {
473 log "=== outer times ==="
474 awk '
475 BEGIN {
476 i = 0
477
478 printf "%s\t%s\t%s\t%s\n", "shell", "who", "what", "seconds"
479 }
480 i == 0 {
481 #print "Skipping header"
482 i++
483 next
484 }
485 i >= 1 {
486 elapsed = $2 + 0
487 user = $3 + 0
488 sys = $4 + 0
489 sh_label = $6
490
491 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "elapsed", elapsed
492 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "user", user
493 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "sys", sys
494
495 i++
496 }
497 ' $BASE_DIR/outer-wide.tsv
498}
499
500report-times() {
501 head $BASE_DIR/times/*.tsv
502 echo
503 head $BASE_DIR/times/*.times.txt
504 echo
505
506 inner-long-tsv | tee $BASE_DIR/inner-long.tsv
507 echo
508
509 tsv-concat $BASE_DIR/times/*.tsv | tee $BASE_DIR/outer-wide.tsv
510 outer-long-tsv | tee $BASE_DIR/outer-long.tsv
511 echo
512
513 tsv-concat $BASE_DIR/{inner,outer}-long.tsv | tee $BASE_DIR/times-long.tsv
514
515 compare-times
516}
517
518### Why is clone() taking longer according to strace?
519
520fork-tasks() {
521 echo "bash${TAB}bash"
522 echo "dash${TAB}dash"
523
524 # Hm this is noisy, but cxx-opt-sh does seem slower
525 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/osh"
526 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt-sh/osh"
527}
528
529measure-fork() {
530 fork-tasks | while read -r sh_label sh_path; do
531 #case $sh_label in bash|dash) continue ;; esac
532
533 echo "=== $sh_path ==="
534
535 # Builtin is very fast
536 #time $sh_path -c 'for i in $(seq 100); do true; done'
537
538 # Hm this is very noisy
539 # TODO use hyperfine?
540 time $sh_path -c 'for i in $(seq 100); do /bin/true; done'
541
542 case $sh_label in
543 osh)
544 # Oops, we are not symlinking to the .stripped binary!
545 # This is explicitly done for symbols and benchmarking.
546 # Hm does that make it slower then?
547
548 ls -l -L $sh_path
549 ldd $sh_path
550 ;;
551 esac
552 done
553}
554
555# $ head _tmp/elapsed/*.times.txt
556# ==> _tmp/elapsed/bash.times.txt <==
557# 0m0.213s 0m0.477s
558# 0m8.233s 0m2.931s
559#
560# ==> _tmp/elapsed/dash.times.txt <==
561# 0m0.217s 0m0.463s
562# 0m8.281s 0m2.922s
563#
564# ==> _tmp/elapsed/osh.times.txt <==
565# 0m0.360s 0m0.720s
566# 0m8.790s 0m2.960s
567
568# shell user time - GC and allocs
569# shell system time - ???
570# child user time - ???
571# TODO: count how many processes this is.
572# It's more than 500 ms
573# Is that 500 processes, and 1 ms per process?
574
575fork-time() {
576 local osh=_bin/cxx-opt/osh
577
578 $osh -c 'time for i in {1..1000}; do true; done'
579 echo
580
581 $osh -c 'time for i in {1..1000}; do ( true ); done'
582 echo
583
584 # Does this increase fork time or no?
585 # Hm I can only get the forks up to 306ms for 1000, or 300 us
586 # But the HereDocWriter does dup() and so forth
587 $osh -c '
588echo ysh-parse
589time for i in {1..40}; do
590 . test/ysh-parse-errors.sh
591done
592times
593time for i in {1..1000}; do
594 ( true )
595done'
596 echo
597}
598
599"$@"