OILS / benchmarks / ovm-build.sh View on Github | oilshell.org

479 lines, 240 significant
1#!/usr/bin/env bash
2#
3# Measure the time it takes to build a binary with different compilers on
4# different machines, and measure the binary size.
5#
6# Usage:
7# ./ovm-build.sh <function name>
8#
9# Run on its own:
10# 1. Follow common instructions in benchmarks/osh-parser.sh
11# 2. benchmarks/auto.sh measure-builds
12# 3. benchmarks/report.sh ovm-build
13
14# Directories used:
15#
16# oilshell.org/blob/
17# ovm-build/
18#
19# ~/git/oilshell/
20# oil/
21# _deps/
22# ovm-build # tarballs and extracted source
23# _tmp/
24# ovm-build/
25# raw/ # output CSV
26# stage1
27# benchmark-data/
28# ovm-build/
29# raw/
30# compiler-id/
31# host-id/
32
33set -o nounset
34set -o pipefail
35set -o errexit
36
37REPO_ROOT=$(cd $(dirname $0)/..; pwd)
38readonly REPO_ROOT
39
40source test/tsv-lib.sh # uses REPO_ROOT
41source benchmarks/common.sh # for log, etc.
42source build/common.sh # for $CLANG
43
44readonly BASE_DIR=_tmp/ovm-build
45readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
46
47#
48# Dependencies
49#
50
51# Leave out mksh for now, because it doesn't follow ./configure make. It just
52# has Build.sh.
53readonly -a TAR_SUBDIRS=( bash-4.4 dash-0.5.9.1 ) # mksh )
54
55# NOTE: Same list in oilshell.org/blob/run.sh.
56tarballs() {
57 cat <<EOF
58bash-4.4.tar.gz
59dash-0.5.9.1.tar.gz
60mksh-R56c.tgz
61EOF
62}
63
64download() {
65 mkdir -p $TAR_DIR
66 tarballs | xargs -n 1 -I {} --verbose -- \
67 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
68}
69
70# Done MANUALLY.
71extract-other() {
72 time for f in $TAR_DIR/*gz; do
73 tar -x --directory $TAR_DIR --file $f
74 done
75}
76
77# Done automatically by 'measure' function.
78#
79# NOTE: We assume that _release/oil.tar exists. It should be made by
80# scripts/release.sh build-and-test or benchmark-build.
81extract-oil() {
82 # This is different than the others tarballs.
83 rm -r -f -v $TAR_DIR/oil-*
84 tar -x --directory $TAR_DIR --file _release/oil.tar
85
86 # To run on multiple machines, use the one in the benchmarks-data repo.
87 cp --recursive --no-target-directory \
88 ../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
89 $TAR_DIR/oils-for-unix-$OIL_VERSION/
90}
91
92#
93# Measure Size of Binaries.
94#
95
96# Other tools:
97# - bloaty to look inside elf file
98# - nm? Just a flat list of symbols? Counting them would be nice.
99# - zipfile.py to look inside bytecode.zip
100
101sizes-tsv() {
102 # host_label matches the times.tsv file output by report.R
103 tsv-row host_label num_bytes path
104 local host=$(hostname)
105 find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
106}
107
108# NOTE: This should be the same on all x64 machines. But I want to run it on
109# x64 machines.
110measure-sizes() {
111 local prefix=${1:-$BASE_DIR/raw/demo}
112
113 # PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
114 # in R.
115
116 # clang/oils-for-unix
117 # clang/oils-for-unix.stripped
118 # gcc/oils-for-unix
119 # gcc/oils-for-unix.stripped
120 sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
121 > ${prefix}.native-sizes.tsv
122
123 sizes-tsv $TAR_DIR/oil-$OIL_VERSION/_build/oil/bytecode-opy.zip \
124 > ${prefix}.bytecode-size.tsv
125
126 sizes-tsv $BASE_DIR/bin/*/oil.* \
127 > ${prefix}.bin-sizes.tsv
128
129 sizes-tsv $BASE_DIR/bin/*/*sh \
130 > ${prefix}.other-shell-sizes.tsv
131
132 log "Wrote ${prefix}.*.tsv"
133}
134
135#
136# Unused Demos
137#
138
139bytecode-size() {
140 local zip=_build/oil/bytecode.zip
141
142 # 242 files, 1.85 MB
143 unzip -l $zip | tail -n 1
144
145 # 1.88 MB, so there's 30K of header overhead.
146 ls -l $zip
147}
148
149# 6.8 seconds for debug build, instead of 8 seconds.
150clang-oil-dbg() {
151 make clean
152 CC=$CLANG make _build/oil/ovm-dbg
153}
154
155#
156# Measure Elapsed Time
157#
158
159# Add --target-size? Add that functionality to benchmarks/time.py?
160#
161# Should we add explicit targets?
162# - ovm-clang, ovm-clang-dbg
163# - ovm-gcc, ovm-gcc-dbg
164#
165# It would be possible, but it complicates the makefile.
166
167build-task() {
168 local out_dir=$1
169 local job_id=$2
170 local host=$3
171 local host_hash=$4
172 local compiler_path=$5
173 local compiler_hash=$6
174 local src_dir=$7
175 local action=$8
176
177 local times_out="$PWD/$out_dir/$host.$job_id.times.tsv"
178
179 # Definitions that depends on $PWD.
180 local -a TIME_PREFIX=(
181 time-tsv \
182 --append \
183 --output $times_out \
184 --field "$host" --field "$host_hash" \
185 --field "$compiler_path" --field "$compiler_hash" \
186 --field "$src_dir" --field "$action"
187 )
188 local bin_base_dir=$PWD/$BASE_DIR/bin
189
190 local bin_dir="$bin_base_dir/$(basename $compiler_path)"
191 mkdir -p $bin_dir
192
193 pushd $src_dir >/dev/null
194
195 # NOTE: We're not saving the output anywhere. We save the status, which
196 # protects against basic errors.
197
198 case $action in
199 (configure)
200 "${TIME_PREFIX[@]}" -- ./configure
201
202 # Cleaning here relies on the ORDER of tasks.txt. configure happens
203 # before build. The Clang build shouldn't reuse GCC objects!
204 # It has to be done after configure, because the Makefile must exist!
205 make clean
206 ;;
207
208 (make)
209 "${TIME_PREFIX[@]}" -- make CC=$compiler_path
210
211 local target
212 case $src_dir in
213 (*/bash*)
214 target=bash
215 ;;
216 (*/dash*)
217 target=src/dash
218 ;;
219 esac
220
221 strip $target
222 cp -v $target $bin_dir
223 ;;
224
225 (oils-for-unix*)
226 case $action in
227 (oils-for-unix)
228 local variant='dbg'
229 ;;
230 (oils-for-unix.stripped)
231 local variant='opt'
232 ;;
233 *)
234 die "Invalid target"
235 ;;
236 esac
237
238 # Change the C compiler into the corresponding C++ compiler
239 local compiler
240 case $compiler_path in
241 (*gcc)
242 # note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
243 compiler='cxx'
244 ;;
245 (*clang)
246 # Note on slight mess: benchmarks/id.sh takes the provenanec of
247 # $CLANG. We translate that to 'clang' here, and
248 # _build/oils.sh uses $CLANGXX.
249 compiler='clang'
250 ;;
251 *)
252 die "Invalid compiler"
253 ;;
254 esac
255
256 "${TIME_PREFIX[@]}" -- _build/oils.sh $compiler $variant
257
258 # e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
259 local filename=$action
260 cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
261 ;;
262
263 *)
264 local target=$action # Assume it's a target like _bin/oil.ovm
265
266 "${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
267
268 cp -v $target $bin_dir
269 ;;
270 esac
271
272 popd >/dev/null
273
274 log "DONE BUILD TASK $action $src_dir __ status=$?"
275}
276
277oil-tasks() {
278 local provenance=$1
279
280 # NOTE: it MUST be a tarball and not the git repo, because we don't build
281 # bytecode-*.zip! We care about the "packager's experience".
282 local oil_dir="$TAR_DIR/oil-$OIL_VERSION"
283 local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
284
285 # Add 1 field for each of 5 fields.
286 cat $provenance | while read line; do
287 # NOTE: configure is independent of compiler.
288 echo "$line" $oil_dir configure
289 echo "$line" $oil_dir _bin/oil.ovm
290 echo "$line" $oil_dir _bin/oil.ovm-dbg
291
292 echo "$line" $ofu_dir oils-for-unix
293 echo "$line" $ofu_dir oils-for-unix.stripped
294 done
295}
296
297other-shell-tasks() {
298 local provenance=$1
299
300 # NOTE: it MUST be a tarball and not the git repo, because we do the build
301 # of bytecode.zip! We care about the "package experience".
302 local tarball='_release/oil.0.5.alpha1.gz'
303
304 # Add 1 field for each of 5 fields.
305 cat $provenance | while read line; do
306 case $line in
307 # Skip clang for now.
308 (*clang*)
309 continue
310 ;;
311 esac
312
313 for dir in "${TAR_SUBDIRS[@]}"; do
314 echo "$line" $TAR_DIR/$dir configure
315 echo "$line" $TAR_DIR/$dir make
316 done
317 done
318}
319
320# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
321# show the drop.
322oil-historical-tasks() {
323 echo
324}
325
326# action is 'configure', a target name, etc.
327readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
328
329measure() {
330 local provenance=$1 # from benchmarks/id.sh compiler-provenance
331 local out_dir=${2:-$BASE_DIR/raw}
332
333 extract-oil
334
335 # Job ID is everything up to the first dot in the filename.
336 local name=$(basename $provenance)
337 local prefix=${name%.compiler-provenance.txt} # strip suffix
338
339 local times_out="$out_dir/$prefix.times.tsv"
340 # NOTE: Do we need two raw dirs?
341 mkdir -p $BASE_DIR/{raw,stage1,bin} $out_dir
342
343 # TODO: the $times_out calculation is duplicated in build-task()
344
345 # Write header of the TSV file that is appended to.
346 tsv-row \
347 status elapsed_secs \
348 host_name host_hash compiler_path compiler_hash \
349 src_dir action > $times_out
350
351 local t1=$BASE_DIR/oil-tasks.txt
352 local t2=$BASE_DIR/other-shell-tasks.txt
353
354 oil-tasks $provenance > $t1
355 other-shell-tasks $provenance > $t2
356
357 #grep dash $t2 |
358 #time cat $t1 |
359 set +o errexit
360 time cat $t1 $t2 | xargs --verbose -n $NUM_COLUMNS -- $0 build-task $out_dir
361 local status=$?
362 set -o errexit
363
364 if test $status -ne 0; then
365 die "*** Some tasks failed. (xargs status=$status) ***"
366 fi
367
368 measure-sizes $out_dir/$prefix
369
370 cp -v $provenance $out_dir
371}
372
373#
374# Data Preparation and Analysis
375#
376
377stage1() {
378 local raw_dir=${1:-$BASE_DIR/raw}
379
380 local out=$BASE_DIR/stage1
381 mkdir -p $out
382
383 local x
384 local -a a b
385
386 # Globs are in lexicographical order, which works for our dates.
387 x=$out/times.tsv
388 a=($raw_dir/$MACHINE1.*.times.tsv)
389 b=($raw_dir/$MACHINE2.*.times.tsv)
390 tsv-concat ${a[-1]} ${b[-1]} > $x
391
392 x=$out/bytecode-size.tsv
393 a=($raw_dir/$MACHINE1.*.bytecode-size.tsv)
394 b=($raw_dir/$MACHINE2.*.bytecode-size.tsv)
395 tsv-concat ${a[-1]} ${b[-1]} > $x
396
397 x=$out/bin-sizes.tsv
398 a=($raw_dir/$MACHINE1.*.bin-sizes.tsv)
399 b=($raw_dir/$MACHINE2.*.bin-sizes.tsv)
400 tsv-concat ${a[-1]} ${b[-1]} > $x
401
402 x=$out/native-sizes.tsv
403 a=($raw_dir/$MACHINE1.*.native-sizes.tsv)
404 b=($raw_dir/$MACHINE2.*.native-sizes.tsv)
405 #tsv-concat ${b[-1]} > $x
406 tsv-concat ${a[-1]} ${b[-1]} > $x
407
408 # NOTE: unused
409 # Construct a one-column TSV file
410 local raw_data_tsv=$out/raw-data.tsv
411 { echo 'path'
412 echo ${a[-1]}
413 echo ${b[-1]}
414 } > $raw_data_tsv
415
416 head $out/*
417 wc -l $out/*
418}
419
420print-report() {
421 local in_dir=$1
422 local base_url='../../web'
423
424 benchmark-html-head 'OVM Build Performance'
425
426 cat <<EOF
427 <body class="width60">
428 <p id="home-link">
429 <a href="/">oilshell.org</a>
430 </p>
431EOF
432
433 cmark << 'EOF'
434## OVM Build Performance
435
436Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
437
438### Time in Seconds by Host and Compiler
439
440We measure the build speed of `bash` and `dash` for comparison.
441EOF
442
443 # Highlighting clang makes this table easier to read.
444 tsv2html \
445 --css-class-pattern 'special ^gcc' \
446 $in_dir/times.tsv
447
448 cmark << 'EOF'
449### Native Binary Size
450
451EOF
452 tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
453
454 cmark << 'EOF'
455### OVM Binary Size
456
457The oil binary has two portions:
458
459- Architecture-independent `bytecode.zip`
460- Architecture- and compiler- dependent native code (`_build/oil/ovm*`)
461
462EOF
463 # Highlight the "default" production build
464 tsv2html --css-class-pattern 'special /gcc/oil.ovm$' $in_dir/sizes.tsv
465
466 cmark << 'EOF'
467
468### Host and Compiler Details
469EOF
470 tsv2html $in_dir/hosts.tsv
471 tsv2html $in_dir/compilers.tsv
472
473 cat <<EOF
474 </body>
475</html>
476EOF
477}
478
479"$@"