| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Metrics for Oil bytecode produced by the OPy compiler.
 | 
| 4 | #
 | 
| 5 | # This is more like a metric than a benchmark.  In particular, we do NOT need
 | 
| 6 | # to run it on multiple machines!  It doesn't need the provenance of binaries
 | 
| 7 | # and so forth.
 | 
| 8 | #
 | 
| 9 | # But it IS like a benchmark in that we use R to analyze data and want HTML
 | 
| 10 | # reports.
 | 
| 11 | #
 | 
| 12 | # NOTE: We will eventually have benchmarks for OPy compile time.
 | 
| 13 | #
 | 
| 14 | # Usage:
 | 
| 15 | #   ./bytecode.sh <function name>
 | 
| 16 | 
 | 
| 17 | set -o nounset
 | 
| 18 | set -o pipefail
 | 
| 19 | set -o errexit
 | 
| 20 | 
 | 
| 21 | source build/dev-shell.sh  # R_LIBS_USER
 | 
| 22 | source test/common.sh  # log
 | 
| 23 | 
 | 
| 24 | readonly BASE_DIR=_tmp/metrics/bytecode
 | 
| 25 | 
 | 
| 26 | write-opcodes() {
 | 
| 27 |   # 119 ops?
 | 
| 28 |   PYTHONPATH=. python2 > _tmp/opcodes-defined.txt -c '
 | 
| 29 | from opy.lib import opcode
 | 
| 30 | names = sorted(opcode.opmap)
 | 
| 31 | for n in names:
 | 
| 32 |   print(n)
 | 
| 33 | '
 | 
| 34 |   wc -l _tmp/opcodes-defined.txt  # 119 defined
 | 
| 35 | }
 | 
| 36 | 
 | 
| 37 | # NOTE: We analyze ~76 bytecode files.  This outputs produces 5 TSV2 files that
 | 
| 38 | # are ~131K rows in ~8.5 MB altogether.  The biggest table is the 'ops' table.
 | 
| 39 | 
 | 
| 40 | opy-dis-tables() {
 | 
| 41 |   local out_dir=$BASE_DIR/opy-dis-tables
 | 
| 42 |   mkdir -p $out_dir
 | 
| 43 | 
 | 
| 44 |   # Pass the .pyc files in the bytecode-opy.zip file to 'opyc dis'
 | 
| 45 | 
 | 
| 46 |   # The .pyc files look like _build/oil/bytecode-opy/os.pyc
 | 
| 47 |   time cat _build/oil/opy-app-deps.txt \
 | 
| 48 |     | awk ' $1 ~ /\.pyc$/ { print $1 }' \
 | 
| 49 |     | xargs -- bin/opyc dis-tables $out_dir
 | 
| 50 | 
 | 
| 51 |   wc -l $out_dir/*.tsv2
 | 
| 52 | }
 | 
| 53 | 
 | 
| 54 | # Hm it seems like build/prepare.sh build-python is necessary for this?
 | 
| 55 | cpython-dis-tables() {
 | 
| 56 |   local out_dir=$BASE_DIR/cpython-dis-tables
 | 
| 57 |   mkdir -p $out_dir
 | 
| 58 |   # The .py files look like /home/andy/git/oilshell/oil/Python-2.7.13/Lib/os.py
 | 
| 59 |   time cat _build/oil/opy-app-deps.txt \
 | 
| 60 |     | awk ' $1 ~ /\.py$/ { print $1 "c" }' \
 | 
| 61 |     | xargs -- bin/opyc dis-tables $out_dir
 | 
| 62 | 
 | 
| 63 |   wc -l $out_dir/*.tsv2
 | 
| 64 | }
 | 
| 65 | 
 | 
| 66 | # CPython:
 | 
| 67 | #
 | 
| 68 | #   9143 _tmp/metrics/bytecode/cpython/consts.tsv2
 | 
| 69 | #   3956 _tmp/metrics/bytecode/cpython/flags.tsv2
 | 
| 70 | #   1858 _tmp/metrics/bytecode/cpython/frames.tsv2
 | 
| 71 | #  19808 _tmp/metrics/bytecode/cpython/names.tsv2
 | 
| 72 | #  76504 _tmp/metrics/bytecode/cpython/ops.tsv2
 | 
| 73 | # 111269 total
 | 
| 74 | #
 | 
| 75 | # OPy:
 | 
| 76 | #   8338 _tmp/metrics/bytecode/consts.tsv2  # fewer docstrings?
 | 
| 77 | #   3909 _tmp/metrics/bytecode/flags.tsv2
 | 
| 78 | #   1857 _tmp/metrics/bytecode/frames.tsv2
 | 
| 79 | #  35609 _tmp/metrics/bytecode/names.tsv2
 | 
| 80 | #  80396 _tmp/metrics/bytecode/ops.tsv2
 | 
| 81 | # 130109 total
 | 
| 82 | #
 | 
| 83 | # Yes I see there is bug in the names.
 | 
| 84 | # Frames are almost exactly the same, which I expected.
 | 
| 85 | 
 | 
| 86 | 
 | 
| 87 | report() {
 | 
| 88 |   metrics/bytecode.R "$@"
 | 
| 89 | }
 | 
| 90 | 
 | 
| 91 | # Reads the 5 tables and produces some metrics.
 | 
| 92 | metrics-opy() {
 | 
| 93 |   report metrics $BASE_DIR/opy-dis-tables
 | 
| 94 | }
 | 
| 95 | 
 | 
| 96 | compare() {
 | 
| 97 |   report compare $BASE_DIR/cpython-dis-tables $BASE_DIR/opy-dis-tables
 | 
| 98 | }
 | 
| 99 | 
 | 
| 100 | # Reads a .py / .pyc manifest and calculates the ratio of input/output file
 | 
| 101 | # sizes.
 | 
| 102 | src-bin-ratio() {
 | 
| 103 |   # Pass the manifest and the base directory of .pyc files.
 | 
| 104 |   report src-bin-ratio _build/oil/all-deps-py.txt _build/oil/bytecode-opy
 | 
| 105 | }
 | 
| 106 | 
 | 
| 107 | run-for-release() {
 | 
| 108 |   write-opcodes  # _tmp/opcodes-defined.txt, for analysis
 | 
| 109 | 
 | 
| 110 |   opy-dis-tables
 | 
| 111 |   cpython-dis-tables
 | 
| 112 | 
 | 
| 113 |   local out
 | 
| 114 | 
 | 
| 115 |   out=$BASE_DIR/oil-with-opy.txt
 | 
| 116 |   report metrics $BASE_DIR/opy-dis-tables > $out
 | 
| 117 |   log "Wrote $out"
 | 
| 118 | 
 | 
| 119 |   out=$BASE_DIR/oil-with-cpython.txt
 | 
| 120 |   report metrics $BASE_DIR/cpython-dis-tables > $out
 | 
| 121 |   log "Wrote $out"
 | 
| 122 | 
 | 
| 123 |   out=$BASE_DIR/src-bin-ratio-with-opy.txt
 | 
| 124 |   src-bin-ratio > $out
 | 
| 125 |   log "Wrote $out"
 | 
| 126 | 
 | 
| 127 |   out=$BASE_DIR/overview.txt
 | 
| 128 |   compare > $out
 | 
| 129 |   log "Wrote $out"
 | 
| 130 | }
 | 
| 131 | 
 | 
| 132 | # TODO:
 | 
| 133 | # - opy/callgraph.py should output a table too
 | 
| 134 | #   - then take the difference to find which ones are unused
 | 
| 135 | #   - problem: it doesn't have unique names?  Should we add (name, firstlineno)
 | 
| 136 | #     to the key?  That is only stable for the exact same version.
 | 
| 137 | # - compare bytecode vs CPython
 | 
| 138 | #   - I think there is a bug with 'names' ?
 | 
| 139 | 
 | 
| 140 | # maybe:
 | 
| 141 | # - analyze native code for OVM from GCC/Clang output?
 | 
| 142 | 
 | 
| 143 | "$@"
 |