| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Metrics for Oil bytecode produced by the OPy compiler.
|
| 4 | #
|
| 5 | # This is more like a metric than a benchmark. In particular, we do NOT need
|
| 6 | # to run it on multiple machines! It doesn't need the provenance of binaries
|
| 7 | # and so forth.
|
| 8 | #
|
| 9 | # But it IS like a benchmark in that we use R to analyze data and want HTML
|
| 10 | # reports.
|
| 11 | #
|
| 12 | # NOTE: We will eventually have benchmarks for OPy compile time.
|
| 13 | #
|
| 14 | # Usage:
|
| 15 | # ./bytecode.sh <function name>
|
| 16 |
|
| 17 | set -o nounset
|
| 18 | set -o pipefail
|
| 19 | set -o errexit
|
| 20 |
|
| 21 | source build/dev-shell.sh # R_LIBS_USER
|
| 22 | source test/common.sh # log
|
| 23 |
|
| 24 | readonly BASE_DIR=_tmp/metrics/bytecode
|
| 25 |
|
| 26 | write-opcodes() {
|
| 27 | # 119 ops?
|
| 28 | PYTHONPATH=. python2 > _tmp/opcodes-defined.txt -c '
|
| 29 | from opy.lib import opcode
|
| 30 | names = sorted(opcode.opmap)
|
| 31 | for n in names:
|
| 32 | print(n)
|
| 33 | '
|
| 34 | wc -l _tmp/opcodes-defined.txt # 119 defined
|
| 35 | }
|
| 36 |
|
| 37 | # NOTE: We analyze ~76 bytecode files. This outputs produces 5 TSV2 files that
|
| 38 | # are ~131K rows in ~8.5 MB altogether. The biggest table is the 'ops' table.
|
| 39 |
|
| 40 | opy-dis-tables() {
|
| 41 | local out_dir=$BASE_DIR/opy-dis-tables
|
| 42 | mkdir -p $out_dir
|
| 43 |
|
| 44 | # Pass the .pyc files in the bytecode-opy.zip file to 'opyc dis'
|
| 45 |
|
| 46 | # The .pyc files look like _build/oil/bytecode-opy/os.pyc
|
| 47 | time cat _build/oil/opy-app-deps.txt \
|
| 48 | | awk ' $1 ~ /\.pyc$/ { print $1 }' \
|
| 49 | | xargs -- bin/opyc dis-tables $out_dir
|
| 50 |
|
| 51 | wc -l $out_dir/*.tsv2
|
| 52 | }
|
| 53 |
|
| 54 | # Hm it seems like build/prepare.sh build-python is necessary for this?
|
| 55 | cpython-dis-tables() {
|
| 56 | local out_dir=$BASE_DIR/cpython-dis-tables
|
| 57 | mkdir -p $out_dir
|
| 58 | # The .py files look like /home/andy/git/oilshell/oil/Python-2.7.13/Lib/os.py
|
| 59 | time cat _build/oil/opy-app-deps.txt \
|
| 60 | | awk ' $1 ~ /\.py$/ { print $1 "c" }' \
|
| 61 | | xargs -- bin/opyc dis-tables $out_dir
|
| 62 |
|
| 63 | wc -l $out_dir/*.tsv2
|
| 64 | }
|
| 65 |
|
| 66 | # CPython:
|
| 67 | #
|
| 68 | # 9143 _tmp/metrics/bytecode/cpython/consts.tsv2
|
| 69 | # 3956 _tmp/metrics/bytecode/cpython/flags.tsv2
|
| 70 | # 1858 _tmp/metrics/bytecode/cpython/frames.tsv2
|
| 71 | # 19808 _tmp/metrics/bytecode/cpython/names.tsv2
|
| 72 | # 76504 _tmp/metrics/bytecode/cpython/ops.tsv2
|
| 73 | # 111269 total
|
| 74 | #
|
| 75 | # OPy:
|
| 76 | # 8338 _tmp/metrics/bytecode/consts.tsv2 # fewer docstrings?
|
| 77 | # 3909 _tmp/metrics/bytecode/flags.tsv2
|
| 78 | # 1857 _tmp/metrics/bytecode/frames.tsv2
|
| 79 | # 35609 _tmp/metrics/bytecode/names.tsv2
|
| 80 | # 80396 _tmp/metrics/bytecode/ops.tsv2
|
| 81 | # 130109 total
|
| 82 | #
|
| 83 | # Yes I see there is bug in the names.
|
| 84 | # Frames are almost exactly the same, which I expected.
|
| 85 |
|
| 86 |
|
| 87 | report() {
|
| 88 | metrics/bytecode.R "$@"
|
| 89 | }
|
| 90 |
|
| 91 | # Reads the 5 tables and produces some metrics.
|
| 92 | metrics-opy() {
|
| 93 | report metrics $BASE_DIR/opy-dis-tables
|
| 94 | }
|
| 95 |
|
| 96 | compare() {
|
| 97 | report compare $BASE_DIR/cpython-dis-tables $BASE_DIR/opy-dis-tables
|
| 98 | }
|
| 99 |
|
| 100 | # Reads a .py / .pyc manifest and calculates the ratio of input/output file
|
| 101 | # sizes.
|
| 102 | src-bin-ratio() {
|
| 103 | # Pass the manifest and the base directory of .pyc files.
|
| 104 | report src-bin-ratio _build/oil/all-deps-py.txt _build/oil/bytecode-opy
|
| 105 | }
|
| 106 |
|
| 107 | run-for-release() {
|
| 108 | write-opcodes # _tmp/opcodes-defined.txt, for analysis
|
| 109 |
|
| 110 | opy-dis-tables
|
| 111 | cpython-dis-tables
|
| 112 |
|
| 113 | local out
|
| 114 |
|
| 115 | out=$BASE_DIR/oil-with-opy.txt
|
| 116 | report metrics $BASE_DIR/opy-dis-tables > $out
|
| 117 | log "Wrote $out"
|
| 118 |
|
| 119 | out=$BASE_DIR/oil-with-cpython.txt
|
| 120 | report metrics $BASE_DIR/cpython-dis-tables > $out
|
| 121 | log "Wrote $out"
|
| 122 |
|
| 123 | out=$BASE_DIR/src-bin-ratio-with-opy.txt
|
| 124 | src-bin-ratio > $out
|
| 125 | log "Wrote $out"
|
| 126 |
|
| 127 | out=$BASE_DIR/overview.txt
|
| 128 | compare > $out
|
| 129 | log "Wrote $out"
|
| 130 | }
|
| 131 |
|
| 132 | # TODO:
|
| 133 | # - opy/callgraph.py should output a table too
|
| 134 | # - then take the difference to find which ones are unused
|
| 135 | # - problem: it doesn't have unique names? Should we add (name, firstlineno)
|
| 136 | # to the key? That is only stable for the exact same version.
|
| 137 | # - compare bytecode vs CPython
|
| 138 | # - I think there is a bug with 'names' ?
|
| 139 |
|
| 140 | # maybe:
|
| 141 | # - analyze native code for OVM from GCC/Clang output?
|
| 142 |
|
| 143 | "$@"
|