| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Measure the time it takes to build a binary with different compilers on
 | 
| 4 | # different machines, and measure the binary size.
 | 
| 5 | #
 | 
| 6 | # Usage:
 | 
| 7 | #   ./ovm-build.sh <function name>
 | 
| 8 | #
 | 
| 9 | # Run on its own:
 | 
| 10 | #   1. Follow common instructions in benchmarks/osh-parser.sh
 | 
| 11 | #   2. benchmarks/auto.sh measure-builds
 | 
| 12 | #   3. benchmarks/report.sh ovm-build
 | 
| 13 | 
 | 
| 14 | # Directories used:
 | 
| 15 | #
 | 
| 16 | # oilshell.org/blob/
 | 
| 17 | #  ovm-build/
 | 
| 18 | #
 | 
| 19 | # ~/git/oilshell/
 | 
| 20 | #   oil/
 | 
| 21 | #     _deps/
 | 
| 22 | #       ovm-build  # tarballs and extracted source
 | 
| 23 | #     _tmp/
 | 
| 24 | #       ovm-build/  
 | 
| 25 | #         raw/     # output CSV
 | 
| 26 | #         stage1
 | 
| 27 | #   benchmark-data/
 | 
| 28 | #     ovm-build/
 | 
| 29 | #       raw/
 | 
| 30 | #     compiler-id/
 | 
| 31 | #     host-id/
 | 
| 32 | 
 | 
| 33 | set -o nounset
 | 
| 34 | set -o pipefail
 | 
| 35 | set -o errexit
 | 
| 36 | 
 | 
| 37 | REPO_ROOT=$(cd $(dirname $0)/..; pwd)
 | 
| 38 | readonly REPO_ROOT
 | 
| 39 | 
 | 
| 40 | source test/tsv-lib.sh  # uses REPO_ROOT
 | 
| 41 | source benchmarks/common.sh  # for log, etc.
 | 
| 42 | source build/common.sh  # for $CLANG
 | 
| 43 | 
 | 
| 44 | readonly BASE_DIR=_tmp/ovm-build
 | 
| 45 | readonly TAR_DIR=$PWD/_deps/ovm-build  # Make it absolute
 | 
| 46 | 
 | 
| 47 | #
 | 
| 48 | # Dependencies
 | 
| 49 | #
 | 
| 50 | 
 | 
| 51 | # Leave out mksh for now, because it doesn't follow ./configure make.  It just
 | 
| 52 | # has Build.sh.
 | 
| 53 | readonly -a TAR_SUBDIRS=( bash-4.4 dash-0.5.9.1 )  # mksh )
 | 
| 54 | 
 | 
| 55 | # NOTE: Same list in oilshell.org/blob/run.sh.
 | 
| 56 | tarballs() {
 | 
| 57 |   cat <<EOF
 | 
| 58 | bash-4.4.tar.gz
 | 
| 59 | dash-0.5.9.1.tar.gz
 | 
| 60 | mksh-R56c.tgz
 | 
| 61 | EOF
 | 
| 62 | }
 | 
| 63 | 
 | 
| 64 | download() {
 | 
| 65 |   mkdir -p $TAR_DIR
 | 
| 66 |   tarballs | xargs -n 1 -I {} --verbose -- \
 | 
| 67 |     wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
 | 
| 68 | }
 | 
| 69 | 
 | 
| 70 | # Done MANUALLY.
 | 
| 71 | extract-other() {
 | 
| 72 |   time for f in $TAR_DIR/*gz; do
 | 
| 73 |     tar -x --directory $TAR_DIR --file $f 
 | 
| 74 |   done
 | 
| 75 | }
 | 
| 76 | 
 | 
| 77 | # Done automatically by 'measure' function.
 | 
| 78 | #
 | 
| 79 | # NOTE: We assume that _release/oil.tar exists.  It should be made by
 | 
| 80 | # scripts/release.sh build-and-test or benchmark-build.
 | 
| 81 | extract-oil() {
 | 
| 82 |   # This is different than the others tarballs.
 | 
| 83 |   rm -r -f -v $TAR_DIR/oil-*
 | 
| 84 |   tar -x --directory $TAR_DIR --file _release/oil.tar
 | 
| 85 | 
 | 
| 86 |   # To run on multiple machines, use the one in the benchmarks-data repo.
 | 
| 87 |   cp --recursive --no-target-directory \
 | 
| 88 |     ../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
 | 
| 89 |     $TAR_DIR/oils-for-unix-$OIL_VERSION/
 | 
| 90 | }
 | 
| 91 | 
 | 
| 92 | #
 | 
| 93 | # Measure Size of Binaries.
 | 
| 94 | #
 | 
| 95 | 
 | 
| 96 | # Other tools:
 | 
| 97 | # - bloaty to look inside elf file
 | 
| 98 | # - nm?  Just a flat list of symbols?  Counting them would be nice.
 | 
| 99 | # - zipfile.py to look inside bytecode.zip
 | 
| 100 | 
 | 
| 101 | sizes-tsv() {
 | 
| 102 |   # host_label matches the times.tsv file output by report.R
 | 
| 103 |   tsv-row host_label num_bytes path
 | 
| 104 |   local host=$(hostname)
 | 
| 105 |   find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
 | 
| 106 | }
 | 
| 107 | 
 | 
| 108 | # NOTE: This should be the same on all x64 machines.  But I want to run it on
 | 
| 109 | # x64 machines.
 | 
| 110 | measure-sizes() {
 | 
| 111 |   local prefix=${1:-$BASE_DIR/raw/demo}
 | 
| 112 | 
 | 
| 113 |   # PROBLEM: Do I need provenance for gcc/clang here?  I can just join it later
 | 
| 114 |   # in R.
 | 
| 115 | 
 | 
| 116 |   # clang/oils-for-unix
 | 
| 117 |   # clang/oils-for-unix.stripped
 | 
| 118 |   # gcc/oils-for-unix
 | 
| 119 |   # gcc/oils-for-unix.stripped
 | 
| 120 |   sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
 | 
| 121 |     > ${prefix}.native-sizes.tsv
 | 
| 122 | 
 | 
| 123 |   sizes-tsv $TAR_DIR/oil-$OIL_VERSION/_build/oil/bytecode-opy.zip \
 | 
| 124 |     > ${prefix}.bytecode-size.tsv
 | 
| 125 | 
 | 
| 126 |   sizes-tsv $BASE_DIR/bin/*/oil.* \
 | 
| 127 |     > ${prefix}.bin-sizes.tsv
 | 
| 128 | 
 | 
| 129 |   sizes-tsv $BASE_DIR/bin/*/*sh \
 | 
| 130 |     > ${prefix}.other-shell-sizes.tsv
 | 
| 131 | 
 | 
| 132 |   log "Wrote ${prefix}.*.tsv"
 | 
| 133 | }
 | 
| 134 | 
 | 
| 135 | #
 | 
| 136 | # Unused Demos
 | 
| 137 | #
 | 
| 138 | 
 | 
| 139 | bytecode-size() {
 | 
| 140 |   local zip=_build/oil/bytecode.zip
 | 
| 141 | 
 | 
| 142 |   # 242 files, 1.85 MB
 | 
| 143 |   unzip -l $zip | tail -n 1 
 | 
| 144 | 
 | 
| 145 |   # 1.88 MB, so there's 30K of header overhead.
 | 
| 146 |   ls -l $zip
 | 
| 147 | }
 | 
| 148 | 
 | 
| 149 | # 6.8 seconds for debug build, instead of 8 seconds.
 | 
| 150 | clang-oil-dbg() {
 | 
| 151 |   make clean
 | 
| 152 |   CC=$CLANG make _build/oil/ovm-dbg
 | 
| 153 | }
 | 
| 154 | 
 | 
| 155 | #
 | 
| 156 | # Measure Elapsed Time
 | 
| 157 | #
 | 
| 158 | 
 | 
| 159 | # Add --target-size?  Add that functionality to benchmarks/time.py?
 | 
| 160 | #
 | 
| 161 | # Should we add explicit targets?
 | 
| 162 | #   - ovm-clang, ovm-clang-dbg
 | 
| 163 | #   - ovm-gcc, ovm-gcc-dbg
 | 
| 164 | #
 | 
| 165 | # It would be possible, but it complicates the makefile.
 | 
| 166 | 
 | 
| 167 | build-task() {
 | 
| 168 |   local out_dir=$1
 | 
| 169 |   local job_id=$2
 | 
| 170 |   local host=$3
 | 
| 171 |   local host_hash=$4
 | 
| 172 |   local compiler_path=$5
 | 
| 173 |   local compiler_hash=$6
 | 
| 174 |   local src_dir=$7
 | 
| 175 |   local action=$8
 | 
| 176 | 
 | 
| 177 |   local times_out="$PWD/$out_dir/$host.$job_id.times.tsv"
 | 
| 178 | 
 | 
| 179 |   # Definitions that depends on $PWD.
 | 
| 180 |   local -a TIME_PREFIX=(
 | 
| 181 |     time-tsv \
 | 
| 182 |     --append \
 | 
| 183 |     --output $times_out \
 | 
| 184 |     --field "$host" --field "$host_hash" \
 | 
| 185 |     --field "$compiler_path" --field "$compiler_hash" \
 | 
| 186 |     --field "$src_dir" --field "$action"
 | 
| 187 |   )
 | 
| 188 |   local bin_base_dir=$PWD/$BASE_DIR/bin
 | 
| 189 | 
 | 
| 190 |   local bin_dir="$bin_base_dir/$(basename $compiler_path)"
 | 
| 191 |   mkdir -p $bin_dir
 | 
| 192 | 
 | 
| 193 |   pushd $src_dir >/dev/null
 | 
| 194 | 
 | 
| 195 |   # NOTE: We're not saving the output anywhere.  We save the status, which
 | 
| 196 |   # protects against basic errors.
 | 
| 197 | 
 | 
| 198 |   case $action in
 | 
| 199 |     (configure)
 | 
| 200 |       "${TIME_PREFIX[@]}" -- ./configure
 | 
| 201 | 
 | 
| 202 |       # Cleaning here relies on the ORDER of tasks.txt.  configure happens
 | 
| 203 |       # before build.  The Clang build shouldn't reuse GCC objects!
 | 
| 204 |       # It has to be done after configure, because the Makefile must exist!
 | 
| 205 |       make clean
 | 
| 206 |       ;;
 | 
| 207 | 
 | 
| 208 |     (make)
 | 
| 209 |       "${TIME_PREFIX[@]}" -- make CC=$compiler_path
 | 
| 210 | 
 | 
| 211 |       local target
 | 
| 212 |       case $src_dir in
 | 
| 213 |         (*/bash*)
 | 
| 214 |           target=bash
 | 
| 215 |           ;;
 | 
| 216 |         (*/dash*)
 | 
| 217 |           target=src/dash
 | 
| 218 |           ;;
 | 
| 219 |       esac
 | 
| 220 | 
 | 
| 221 |       strip $target
 | 
| 222 |       cp -v $target $bin_dir
 | 
| 223 |       ;;
 | 
| 224 | 
 | 
| 225 |     (oils-for-unix*)
 | 
| 226 |       case $action in
 | 
| 227 |         (oils-for-unix)
 | 
| 228 |           local variant='dbg'
 | 
| 229 |           ;;
 | 
| 230 |         (oils-for-unix.stripped)
 | 
| 231 |           local variant='opt'
 | 
| 232 |           ;;
 | 
| 233 |         *)
 | 
| 234 |           die "Invalid target"
 | 
| 235 |           ;;
 | 
| 236 |       esac
 | 
| 237 | 
 | 
| 238 |       # Change the C compiler into the corresponding C++ compiler
 | 
| 239 |       local compiler
 | 
| 240 |       case $compiler_path in 
 | 
| 241 |         (*gcc)
 | 
| 242 |           # note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
 | 
| 243 |           compiler='cxx'
 | 
| 244 |           ;;
 | 
| 245 |         (*clang)
 | 
| 246 |           # Note on slight mess: benchmarks/id.sh takes the provenanec of
 | 
| 247 |           # $CLANG.  We translate that to 'clang' here, and
 | 
| 248 |           # _build/oils.sh uses $CLANGXX.
 | 
| 249 |           compiler='clang'
 | 
| 250 |           ;;
 | 
| 251 |         *)
 | 
| 252 |           die "Invalid compiler"
 | 
| 253 |           ;;
 | 
| 254 |       esac
 | 
| 255 | 
 | 
| 256 |       "${TIME_PREFIX[@]}" -- _build/oils.sh $compiler $variant
 | 
| 257 | 
 | 
| 258 |       # e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
 | 
| 259 |       local filename=$action
 | 
| 260 |       cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
 | 
| 261 |       ;;
 | 
| 262 | 
 | 
| 263 |     *)
 | 
| 264 |       local target=$action  # Assume it's a target like _bin/oil.ovm
 | 
| 265 | 
 | 
| 266 |       "${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
 | 
| 267 | 
 | 
| 268 |       cp -v $target $bin_dir
 | 
| 269 |       ;;
 | 
| 270 |   esac
 | 
| 271 | 
 | 
| 272 |   popd >/dev/null
 | 
| 273 | 
 | 
| 274 |   log "DONE BUILD TASK $action $src_dir __ status=$?"
 | 
| 275 | }
 | 
| 276 | 
 | 
| 277 | oil-tasks() {
 | 
| 278 |   local provenance=$1
 | 
| 279 | 
 | 
| 280 |   # NOTE: it MUST be a tarball and not the git repo, because we don't build
 | 
| 281 |   # bytecode-*.zip!  We care about the "packager's experience".
 | 
| 282 |   local oil_dir="$TAR_DIR/oil-$OIL_VERSION"
 | 
| 283 |   local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
 | 
| 284 | 
 | 
| 285 |   # Add 1 field for each of 5 fields.
 | 
| 286 |   cat $provenance | while read line; do
 | 
| 287 |     # NOTE: configure is independent of compiler.
 | 
| 288 |     echo "$line" $oil_dir configure
 | 
| 289 |     echo "$line" $oil_dir _bin/oil.ovm
 | 
| 290 |     echo "$line" $oil_dir _bin/oil.ovm-dbg
 | 
| 291 | 
 | 
| 292 |     echo "$line" $ofu_dir oils-for-unix
 | 
| 293 |     echo "$line" $ofu_dir oils-for-unix.stripped
 | 
| 294 |   done
 | 
| 295 | }
 | 
| 296 | 
 | 
| 297 | other-shell-tasks() {
 | 
| 298 |   local provenance=$1
 | 
| 299 | 
 | 
| 300 |   # NOTE: it MUST be a tarball and not the git repo, because we do the build
 | 
| 301 |   # of bytecode.zip!  We care about the "package experience".
 | 
| 302 |   local tarball='_release/oil.0.5.alpha1.gz'
 | 
| 303 | 
 | 
| 304 |   # Add 1 field for each of 5 fields.
 | 
| 305 |   cat $provenance | while read line; do
 | 
| 306 |     case $line in
 | 
| 307 |       # Skip clang for now.
 | 
| 308 |       (*clang*)
 | 
| 309 |         continue
 | 
| 310 |         ;;
 | 
| 311 |     esac
 | 
| 312 | 
 | 
| 313 |     for dir in "${TAR_SUBDIRS[@]}"; do
 | 
| 314 |       echo "$line" $TAR_DIR/$dir configure
 | 
| 315 |       echo "$line" $TAR_DIR/$dir make
 | 
| 316 |     done
 | 
| 317 |   done
 | 
| 318 | }
 | 
| 319 | 
 | 
| 320 | # 5 releases: 0.0.0 to 0.4.0.  For now, just do the 0.5.alpha1 release, and
 | 
| 321 | # show the drop.
 | 
| 322 | oil-historical-tasks() {
 | 
| 323 |   echo 
 | 
| 324 | }
 | 
| 325 | 
 | 
| 326 | # action is 'configure', a target name, etc.
 | 
| 327 | readonly NUM_COLUMNS=7  # 5 from provenence, then tarball/target
 | 
| 328 | 
 | 
| 329 | measure() {
 | 
| 330 |   local provenance=$1  # from benchmarks/id.sh compiler-provenance
 | 
| 331 |   local out_dir=${2:-$BASE_DIR/raw}
 | 
| 332 | 
 | 
| 333 |   extract-oil
 | 
| 334 | 
 | 
| 335 |   # Job ID is everything up to the first dot in the filename.
 | 
| 336 |   local name=$(basename $provenance)
 | 
| 337 |   local prefix=${name%.compiler-provenance.txt}  # strip suffix
 | 
| 338 | 
 | 
| 339 |   local times_out="$out_dir/$prefix.times.tsv"
 | 
| 340 |   # NOTE: Do we need two raw dirs?
 | 
| 341 |   mkdir -p $BASE_DIR/{raw,stage1,bin} $out_dir
 | 
| 342 | 
 | 
| 343 |   # TODO: the $times_out calculation is duplicated in build-task()
 | 
| 344 | 
 | 
| 345 |   # Write header of the TSV file that is appended to.
 | 
| 346 |   tsv-row \
 | 
| 347 |     status elapsed_secs \
 | 
| 348 |     host_name host_hash compiler_path compiler_hash \
 | 
| 349 |     src_dir action > $times_out
 | 
| 350 | 
 | 
| 351 |   local t1=$BASE_DIR/oil-tasks.txt
 | 
| 352 |   local t2=$BASE_DIR/other-shell-tasks.txt
 | 
| 353 | 
 | 
| 354 |   oil-tasks $provenance > $t1
 | 
| 355 |   other-shell-tasks $provenance > $t2
 | 
| 356 | 
 | 
| 357 |   #grep dash $t2 |
 | 
| 358 |   #time cat $t1 |
 | 
| 359 |   set +o errexit
 | 
| 360 |   time cat $t1 $t2 | xargs --verbose -n $NUM_COLUMNS -- $0 build-task $out_dir 
 | 
| 361 |   local status=$?
 | 
| 362 |   set -o errexit
 | 
| 363 | 
 | 
| 364 |   if test $status -ne 0; then
 | 
| 365 |     die "*** Some tasks failed. (xargs status=$status) ***"
 | 
| 366 |   fi
 | 
| 367 | 
 | 
| 368 |   measure-sizes $out_dir/$prefix
 | 
| 369 | 
 | 
| 370 |   cp -v $provenance $out_dir
 | 
| 371 | }
 | 
| 372 | 
 | 
| 373 | #
 | 
| 374 | # Data Preparation and Analysis
 | 
| 375 | #
 | 
| 376 | 
 | 
| 377 | stage1() {
 | 
| 378 |   local raw_dir=${1:-$BASE_DIR/raw}
 | 
| 379 | 
 | 
| 380 |   local out=$BASE_DIR/stage1
 | 
| 381 |   mkdir -p $out
 | 
| 382 | 
 | 
| 383 |   local x
 | 
| 384 |   local -a a b
 | 
| 385 | 
 | 
| 386 |   # Globs are in lexicographical order, which works for our dates.
 | 
| 387 |   x=$out/times.tsv
 | 
| 388 |   a=($raw_dir/$MACHINE1.*.times.tsv)
 | 
| 389 |   b=($raw_dir/$MACHINE2.*.times.tsv)
 | 
| 390 |   tsv-concat ${a[-1]} ${b[-1]} > $x
 | 
| 391 | 
 | 
| 392 |   x=$out/bytecode-size.tsv
 | 
| 393 |   a=($raw_dir/$MACHINE1.*.bytecode-size.tsv)
 | 
| 394 |   b=($raw_dir/$MACHINE2.*.bytecode-size.tsv)
 | 
| 395 |   tsv-concat ${a[-1]} ${b[-1]} > $x
 | 
| 396 | 
 | 
| 397 |   x=$out/bin-sizes.tsv
 | 
| 398 |   a=($raw_dir/$MACHINE1.*.bin-sizes.tsv)
 | 
| 399 |   b=($raw_dir/$MACHINE2.*.bin-sizes.tsv)
 | 
| 400 |   tsv-concat ${a[-1]} ${b[-1]} > $x
 | 
| 401 | 
 | 
| 402 |   x=$out/native-sizes.tsv
 | 
| 403 |   a=($raw_dir/$MACHINE1.*.native-sizes.tsv)
 | 
| 404 |   b=($raw_dir/$MACHINE2.*.native-sizes.tsv)
 | 
| 405 |   #tsv-concat ${b[-1]} > $x
 | 
| 406 |   tsv-concat ${a[-1]} ${b[-1]} > $x
 | 
| 407 | 
 | 
| 408 |   # NOTE: unused
 | 
| 409 |   # Construct a one-column TSV file
 | 
| 410 |   local raw_data_tsv=$out/raw-data.tsv
 | 
| 411 |   { echo 'path'
 | 
| 412 |     echo ${a[-1]}
 | 
| 413 |     echo ${b[-1]}
 | 
| 414 |   } > $raw_data_tsv
 | 
| 415 | 
 | 
| 416 |   head $out/*
 | 
| 417 |   wc -l $out/*
 | 
| 418 | }
 | 
| 419 | 
 | 
| 420 | print-report() {
 | 
| 421 |   local in_dir=$1
 | 
| 422 |   local base_url='../../web'
 | 
| 423 | 
 | 
| 424 |   benchmark-html-head 'OVM Build Performance'
 | 
| 425 | 
 | 
| 426 |   cat <<EOF
 | 
| 427 |   <body class="width60">
 | 
| 428 |     <p id="home-link">
 | 
| 429 |       <a href="/">oilshell.org</a>
 | 
| 430 |     </p>
 | 
| 431 | EOF
 | 
| 432 | 
 | 
| 433 |   cmark << 'EOF'
 | 
| 434 | ## OVM Build Performance
 | 
| 435 | 
 | 
| 436 | Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
 | 
| 437 | 
 | 
| 438 | ### Time in Seconds by Host and Compiler
 | 
| 439 | 
 | 
| 440 | We measure the build speed of `bash` and `dash` for comparison.
 | 
| 441 | EOF
 | 
| 442 | 
 | 
| 443 |   # Highlighting clang makes this table easier to read.
 | 
| 444 |   tsv2html \
 | 
| 445 |     --css-class-pattern 'special ^gcc' \
 | 
| 446 |     $in_dir/times.tsv
 | 
| 447 | 
 | 
| 448 |   cmark << 'EOF'
 | 
| 449 | ### Native Binary Size
 | 
| 450 | 
 | 
| 451 | EOF
 | 
| 452 |   tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
 | 
| 453 | 
 | 
| 454 |   cmark << 'EOF'
 | 
| 455 | ### OVM Binary Size
 | 
| 456 | 
 | 
| 457 | The oil binary has two portions:
 | 
| 458 | 
 | 
| 459 | - Architecture-independent `bytecode.zip`
 | 
| 460 | - Architecture- and compiler- dependent native code (`_build/oil/ovm*`)
 | 
| 461 | 
 | 
| 462 | EOF
 | 
| 463 |   # Highlight the "default" production build
 | 
| 464 |   tsv2html --css-class-pattern 'special /gcc/oil.ovm$' $in_dir/sizes.tsv
 | 
| 465 | 
 | 
| 466 |   cmark << 'EOF'
 | 
| 467 | 
 | 
| 468 | ### Host and Compiler Details
 | 
| 469 | EOF
 | 
| 470 |   tsv2html $in_dir/hosts.tsv
 | 
| 471 |   tsv2html $in_dir/compilers.tsv
 | 
| 472 | 
 | 
| 473 |   cat <<EOF
 | 
| 474 |   </body>
 | 
| 475 | </html>
 | 
| 476 | EOF
 | 
| 477 | }
 | 
| 478 | 
 | 
| 479 | "$@"
 |