OILS / benchmarks / id.sh View on Github | oilshell.org

448 lines, 228 significant
1#!/usr/bin/env bash
2#
3# Keep track of benchmark data provenance.
4#
5# Usage:
6# benchmarks/id.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source build/common.sh # for $CLANG
15source benchmarks/common.sh
16source test/tsv-lib.sh # tsv-row
17
18print-job-id() {
19 date '+%Y-%m-%d__%H-%M-%S'
20}
21
22# TODO: add benchmark labels/hashes for osh and all other shells
23#
24# Need to archive labels too.
25#
26# TODO: How do I make sure the zsh label is current? Across different
27# machines?
28#
29# What happens when zsh is silently upgraded?
30# I guess before every benchmark, you have to run the ID collection. Man
31# that is a lot of code.
32#
33# Should I make symlinks to the published location?
34#
35# Maybe bash/dash/mksh/zsh should be invoked through a symlink?
36# Every symlink is a shell runtime version, and it has an associated
37# toolchain?
38
39# Platform is ambient?
40# _tmp/
41# shell-id/
42# bash/
43# HASH.txt
44# version.txt
45# dash/
46# HASH.txt
47# version.txt
48# host-id/
49# lisa/
50# HASH.txt
51# cpuinfo.txt
52
53# ../benchmark-data/
54# shell-id/
55# bash-$HASH/
56# osh-$HASH/ # osh-cpython, osh-ovm? osh-opy-ovm? Too many dimensions.
57# # the other shells don't have this?
58# zsh-$HASH/
59# host-id/
60# lisa-$HASH/
61
62_dump-if-exists() {
63 local path=$1
64 local out=$2
65 if ! test -f "$path"; then
66 return
67 fi
68 cat "$path" > $out
69}
70
71#
72# Shell ID
73#
74
75dump-shell-id() {
76 local sh_path=$1
77 local out_dir=$2
78
79 if ! command -v $sh_path >/dev/null; then
80 die "dump-shell-id: Couldn't find $sh_path"
81 fi
82
83 mkdir -p $out_dir
84
85 echo $sh_path > $out_dir/sh-path.txt
86
87 # Add extra repository info for osh.
88 case $sh_path in
89 */osh*)
90 local commit_hash=$out_dir/git-commit-hash.txt
91
92 if test -n "${XSHAR_GIT_COMMIT:-}"; then
93 echo "$XSHAR_GIT_COMMIT" > $commit_hash
94 else
95 local branch
96 branch=$(git rev-parse --abbrev-ref HEAD)
97 echo $branch > $out_dir/git-branch.txt
98 git rev-parse $branch > $commit_hash
99 fi
100 ;;
101 esac
102
103 local sh_name
104 sh_name=$(basename $sh_path)
105
106 case $sh_name in
107 bash|zsh|yash)
108 $sh_path --version > $out_dir/version.txt
109 ;;
110 osh)
111 case $sh_path in
112 *_bin/*/osh)
113 # Doesn't support --version yet
114 ;;
115 *)
116 $sh_path --version > $out_dir/osh-version.txt
117 ;;
118 esac
119 ;;
120 # oils-for-unix|oils-for-unix.stripped)
121 # ;;
122 dash|mksh)
123 # These don't have version strings!
124 dpkg -s $sh_name > $out_dir/dpkg-version.txt
125 ;;
126
127 # not a shell, but useful for benchmarks/compute
128 python2)
129 $sh_path -V 2> $out_dir/version.txt
130 ;;
131 *)
132 die "Invalid shell '$sh_name'"
133 ;;
134 esac
135}
136
137_shell-id-hash() {
138 local src=$1
139
140 local file
141
142 # for shells and Python
143 file=$src/version.txt
144 test -f $file && cat $file
145
146 # Only hash the dimensions we want to keep
147 file=$src/dpkg-version.txt
148 test -f $file && egrep '^Version' $file
149
150 # Interpreter as CPython vs. OVM is what we care about, so
151 # select 'Interpreter:' but not 'Interpreter version:'.
152 # For example, the version is different on Ubuntu Bionic vs. Trusty, but we
153 # ignore that.
154 file=$src/osh-version.txt
155 test -f $file && egrep '^Oil version|^Interpreter:' $file
156
157 # For OSH
158 file=$src/git-commit-hash.txt
159 test -f $file && cat $file
160
161 return 0
162}
163
164publish-shell-id() {
165 ### Copy temp directory to hashed location
166
167 local src=$1 # e.g. _tmp/prov-tmp/osh
168 local dest_base=${2:-../benchmark-data/shell-id} # or _tmp/shell-id
169
170 local sh_path sh_name
171 read sh_path < $src/sh-path.txt
172 sh_name=$(basename $sh_path)
173
174 local hash
175 hash=$(_shell-id-hash $src | md5sum) # not secure, an identifier
176
177 local id="${hash:0:8}"
178 local dest="$dest_base/$sh_name-$id"
179
180 mkdir -p $dest
181 cp --no-target-directory --recursive $src/ $dest/
182
183 echo $hash > $dest/HASH.txt
184
185 log "Published shell ID to $dest"
186
187 echo $id
188}
189
190#
191# Platform ID
192#
193
194# Events that will change the env for a given machine:
195# - kernel upgrade
196# - distro upgrade
197
198# How about ~/git/oilshell/benchmark-data/host-id/lisa-$HASH
199# How to calculate the hash though?
200
201dump-host-id() {
202 local out_dir=${1:-_tmp/host-id/$(hostname)}
203
204 mkdir -p $out_dir
205
206 hostname > $out_dir/hostname.txt
207
208 # does it make sense to do individual fields like -m?
209 # avoid parsing?
210 # We care about the kernel and the CPU architecture.
211 # There is a lot of redundant information there.
212 uname -m > $out_dir/machine.txt
213
214 {
215 # Short flags work on OS X too
216 uname -s # --kernel-name
217 uname -r # --kernel-release
218 uname -v # --kernel-version
219 } > $out_dir/kernel.txt
220
221 _dump-if-exists /etc/lsb-release $out_dir/lsb-release.txt
222
223 # remove the cpu MHz field, which changes a lot
224 if test -e /proc/cpuinfo; then
225 grep -i -v 'cpu mhz' /proc/cpuinfo > $out_dir/cpuinfo.txt
226 fi
227
228 # mem info doesn't make a difference? I guess it's just nice to check that
229 # it's not swapping. But shouldn't be part of the hash.
230
231 if test -e /proc/meminfo; then
232 grep '^MemTotal' /proc/meminfo > $out_dir/meminfo.txt
233 fi
234
235 #head $out_dir/* 1>&2 # don't write to stdout
236}
237
238# There is already concept of the triple?
239# http://wiki.osdev.org/Target_Triplet
240# It's not exactly the same as what we need here, but close.
241
242_host-id-hash() {
243 local src=$1
244
245 # Don't hash CPU or memory
246 #cat $src/cpuinfo.txt
247 #cat $src/hostname.txt # e.g. lisa
248
249 cat $src/machine.txt # e.g. x86_64
250 cat $src/kernel.txt
251
252 # OS
253 local file=$src/lsb-release.txt
254 if test -f $file; then
255 cat $file
256 fi
257
258 return 0
259}
260
261# Writes a short ID to stdout.
262publish-host-id() {
263 local src=$1 # e.g. _tmp/host-id/lisa
264 local dest_base=${2:-../benchmark-data/host-id}
265
266 local name
267 name=$(basename $src)
268
269 local hash
270 hash=$(_host-id-hash $src | md5sum) # not secure, an identifier
271
272 local id="${hash:0:8}"
273 local dest="$dest_base/$name-$id"
274
275 mkdir -p $dest
276 cp --no-target-directory --recursive $src/ $dest/
277
278 echo $hash > $dest/HASH.txt
279
280 log "Published host ID to $dest"
281
282 echo $id
283}
284
285#
286# Compilers
287#
288
289dump-compiler-id() {
290 local cc=$1 # path to the compiler
291 local out_dir=${2:-_tmp/compiler-id/$(basename $cc)}
292
293 mkdir -p $out_dir
294
295 case $cc in
296 */gcc)
297 $cc --version
298 # -v has more details, but they might be overkill.
299 ;;
300 */clang)
301 $cc --version
302 # -v has stuff we don't want
303 ;;
304 esac > $out_dir/version.txt
305}
306
307_compiler-id-hash() {
308 local src=$1
309
310 # Remove some extraneous information from clang.
311 cat $src/version.txt | grep -v InstalledDir
312}
313
314# Writes a short ID to stdout.
315publish-compiler-id() {
316 local src=$1 # e.g. _tmp/compiler-id/clang
317 local dest_base=${2:-../benchmark-data/compiler-id}
318
319 local name=$(basename $src)
320 local hash
321 hash=$(_compiler-id-hash $src | md5sum) # not secure, an identifier
322
323 local id="${hash:0:8}"
324 local dest="$dest_base/$name-$id"
325
326 mkdir -p $dest
327 cp --no-target-directory --recursive $src/ $dest/
328
329 echo $hash > $dest/HASH.txt
330
331 log "Published compiler ID to $dest"
332
333 echo $id
334}
335
336#
337# Table Output
338#
339
340# Writes a table of host and shells to stdout. Writes text files and
341# calculates IDs for them as a side effect.
342#
343# The table can be passed to other benchmarks to ensure that their provenance
344# is recorded.
345
346shell-provenance-2() {
347 ### Write to _tmp/provenance.{txt,tsv} and $out_dir/{shell,host-id}
348
349 local maybe_host=$1 # if it exists, it overrides the host
350 local job_id=$2
351 local out_dir=$3
352 shift 3
353
354 # log "*** shell-provenance"
355
356 mkdir -p _tmp/provenance
357
358 local host_name
359 if test -n "$maybe_host"; then # label is often 'no-host'
360 host_name=$maybe_host
361 else
362 host_name=$(hostname)
363 fi
364
365 log "*** $maybe_host $host_name $job_id $out_dir"
366
367 local tmp_dir=_tmp/prov-tmp/$host_name
368 dump-host-id $tmp_dir
369
370 local host_hash
371 host_hash=$(publish-host-id $tmp_dir "$out_dir/host-id")
372 local shell_hash
373
374 local out_txt=_tmp/provenance.txt # Legacy text file
375 echo -n '' > $out_txt # trunacte, no header
376
377 local out_tsv=_tmp/provenance.tsv
378 tsv-row job_id host_name host_hash sh_path shell_hash > $out_tsv
379
380 local i=0
381
382 for sh_path in "$@"; do
383 # There can be two different OSH
384
385 tmp_dir=_tmp/prov-tmp/shell-$i
386 i=$((i + 1))
387
388 dump-shell-id $sh_path $tmp_dir
389
390 # writes to ../benchmark-data or _tmp/provenance
391 shell_hash=$(publish-shell-id $tmp_dir "$out_dir/shell-id")
392
393 # note: filter-provenance depends on $4 being $sh_path
394 # APPEND to txt
395 echo "$job_id $host_name $host_hash $sh_path $shell_hash" >> $out_txt
396
397 tsv-row "$job_id" "$host_name" "$host_hash" "$sh_path" "$shell_hash" >> $out_tsv
398 done
399
400 log "Wrote $out_txt and $out_tsv"
401}
402
403compiler-provenance() {
404 local job_id
405 job_id=$(print-job-id)
406
407 local host
408 host=$(hostname)
409
410 # Filename
411 local out=_tmp/provenance/${host}.${job_id}.compiler-provenance.txt
412
413 local tmp_dir=_tmp/host-id/$host
414 dump-host-id $tmp_dir
415
416 local host_hash
417 host_hash=$(publish-host-id $tmp_dir)
418
419 local compiler_hash
420
421 # gcc is assumed to be in the $PATH.
422 for compiler_path in $(which gcc) $CLANG; do
423 local name=$(basename $compiler_path)
424
425 tmp_dir=_tmp/compiler-id/$name
426 dump-compiler-id $compiler_path $tmp_dir
427
428 compiler_hash=$(publish-compiler-id $tmp_dir)
429
430 echo "$job_id $host $host_hash $compiler_path $compiler_hash"
431 done > $out
432
433 log "Wrote $out"
434
435 # Return value used in command sub
436 echo $out
437}
438
439out-param() {
440 declare -n out=$1
441
442 out=returned
443}
444
445if test $(basename $0) = 'id.sh'; then
446 "$@"
447fi
448