1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Metrics for Oil bytecode produced by the OPy compiler.
|
4 | #
|
5 | # This is more like a metric than a benchmark. In particular, we do NOT need
|
6 | # to run it on multiple machines! It doesn't need the provenance of binaries
|
7 | # and so forth.
|
8 | #
|
9 | # But it IS like a benchmark in that we use R to analyze data and want HTML
|
10 | # reports.
|
11 | #
|
12 | # NOTE: We will eventually have benchmarks for OPy compile time.
|
13 | #
|
14 | # Usage:
|
15 | # ./bytecode.sh <function name>
|
16 |
|
17 | set -o nounset
|
18 | set -o pipefail
|
19 | set -o errexit
|
20 |
|
21 | source build/dev-shell.sh # R_LIBS_USER
|
22 | source test/common.sh # log
|
23 |
|
24 | readonly BASE_DIR=_tmp/metrics/bytecode
|
25 |
|
26 | write-opcodes() {
|
27 | # 119 ops?
|
28 | PYTHONPATH=. python2 > _tmp/opcodes-defined.txt -c '
|
29 | from opy.lib import opcode
|
30 | names = sorted(opcode.opmap)
|
31 | for n in names:
|
32 | print(n)
|
33 | '
|
34 | wc -l _tmp/opcodes-defined.txt # 119 defined
|
35 | }
|
36 |
|
37 | # NOTE: We analyze ~76 bytecode files. This outputs produces 5 TSV2 files that
|
38 | # are ~131K rows in ~8.5 MB altogether. The biggest table is the 'ops' table.
|
39 |
|
40 | opy-dis-tables() {
|
41 | local out_dir=$BASE_DIR/opy-dis-tables
|
42 | mkdir -p $out_dir
|
43 |
|
44 | # Pass the .pyc files in the bytecode-opy.zip file to 'opyc dis'
|
45 |
|
46 | # The .pyc files look like _build/oil/bytecode-opy/os.pyc
|
47 | time cat _build/oil/opy-app-deps.txt \
|
48 | | awk ' $1 ~ /\.pyc$/ { print $1 }' \
|
49 | | xargs -- bin/opyc dis-tables $out_dir
|
50 |
|
51 | wc -l $out_dir/*.tsv2
|
52 | }
|
53 |
|
54 | # Hm it seems like build/prepare.sh build-python is necessary for this?
|
55 | cpython-dis-tables() {
|
56 | local out_dir=$BASE_DIR/cpython-dis-tables
|
57 | mkdir -p $out_dir
|
58 | # The .py files look like /home/andy/git/oilshell/oil/Python-2.7.13/Lib/os.py
|
59 | time cat _build/oil/opy-app-deps.txt \
|
60 | | awk ' $1 ~ /\.py$/ { print $1 "c" }' \
|
61 | | xargs -- bin/opyc dis-tables $out_dir
|
62 |
|
63 | wc -l $out_dir/*.tsv2
|
64 | }
|
65 |
|
66 | # CPython:
|
67 | #
|
68 | # 9143 _tmp/metrics/bytecode/cpython/consts.tsv2
|
69 | # 3956 _tmp/metrics/bytecode/cpython/flags.tsv2
|
70 | # 1858 _tmp/metrics/bytecode/cpython/frames.tsv2
|
71 | # 19808 _tmp/metrics/bytecode/cpython/names.tsv2
|
72 | # 76504 _tmp/metrics/bytecode/cpython/ops.tsv2
|
73 | # 111269 total
|
74 | #
|
75 | # OPy:
|
76 | # 8338 _tmp/metrics/bytecode/consts.tsv2 # fewer docstrings?
|
77 | # 3909 _tmp/metrics/bytecode/flags.tsv2
|
78 | # 1857 _tmp/metrics/bytecode/frames.tsv2
|
79 | # 35609 _tmp/metrics/bytecode/names.tsv2
|
80 | # 80396 _tmp/metrics/bytecode/ops.tsv2
|
81 | # 130109 total
|
82 | #
|
83 | # Yes I see there is bug in the names.
|
84 | # Frames are almost exactly the same, which I expected.
|
85 |
|
86 |
|
87 | report() {
|
88 | metrics/bytecode.R "$@"
|
89 | }
|
90 |
|
91 | # Reads the 5 tables and produces some metrics.
|
92 | metrics-opy() {
|
93 | report metrics $BASE_DIR/opy-dis-tables
|
94 | }
|
95 |
|
96 | compare() {
|
97 | report compare $BASE_DIR/cpython-dis-tables $BASE_DIR/opy-dis-tables
|
98 | }
|
99 |
|
100 | # Reads a .py / .pyc manifest and calculates the ratio of input/output file
|
101 | # sizes.
|
102 | src-bin-ratio() {
|
103 | # Pass the manifest and the base directory of .pyc files.
|
104 | report src-bin-ratio _build/oil/all-deps-py.txt _build/oil/bytecode-opy
|
105 | }
|
106 |
|
107 | run-for-release() {
|
108 | write-opcodes # _tmp/opcodes-defined.txt, for analysis
|
109 |
|
110 | opy-dis-tables
|
111 | cpython-dis-tables
|
112 |
|
113 | local out
|
114 |
|
115 | out=$BASE_DIR/oil-with-opy.txt
|
116 | report metrics $BASE_DIR/opy-dis-tables > $out
|
117 | log "Wrote $out"
|
118 |
|
119 | out=$BASE_DIR/oil-with-cpython.txt
|
120 | report metrics $BASE_DIR/cpython-dis-tables > $out
|
121 | log "Wrote $out"
|
122 |
|
123 | out=$BASE_DIR/src-bin-ratio-with-opy.txt
|
124 | src-bin-ratio > $out
|
125 | log "Wrote $out"
|
126 |
|
127 | out=$BASE_DIR/overview.txt
|
128 | compare > $out
|
129 | log "Wrote $out"
|
130 | }
|
131 |
|
132 | # TODO:
|
133 | # - opy/callgraph.py should output a table too
|
134 | # - then take the difference to find which ones are unused
|
135 | # - problem: it doesn't have unique names? Should we add (name, firstlineno)
|
136 | # to the key? That is only stable for the exact same version.
|
137 | # - compare bytecode vs CPython
|
138 | # - I think there is a bug with 'names' ?
|
139 |
|
140 | # maybe:
|
141 | # - analyze native code for OVM from GCC/Clang output?
|
142 |
|
143 | "$@"
|