1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Test the size of file, encoding, and decoding speed.
|
4 | #
|
5 | # Usage:
|
6 | # ./oheap.sh <function name>
|
7 |
|
8 | set -o nounset
|
9 | set -o pipefail
|
10 | set -o errexit
|
11 |
|
12 | source test/common.sh
|
13 | source benchmarks/common.sh
|
14 |
|
15 | readonly BASE_DIR=_tmp/oheap
|
16 |
|
17 | encode-one() {
|
18 | local script=$1
|
19 | local oheap_out=$2
|
20 | $OSH_OVM -n --ast-format oheap "$script" > $oheap_out
|
21 | }
|
22 |
|
23 | task-spec() {
|
24 | while read path; do
|
25 | echo "$path _tmp/oheap/$(basename $path)__oheap"
|
26 | done < benchmarks/osh-parser-files.txt
|
27 | }
|
28 |
|
29 | encode-all() {
|
30 | mkdir -p _tmp/oheap
|
31 |
|
32 | local times_csv=_tmp/oheap/times.csv
|
33 | echo 'status,elapsed_secs' > $times_csv
|
34 |
|
35 | task-spec | xargs -n 2 --verbose -- \
|
36 | benchmarks/time.py --output $times_csv -- \
|
37 | $0 encode-one
|
38 | }
|
39 |
|
40 | # Out of curiosity, compress oheap and originals.
|
41 |
|
42 | compress-oheap() {
|
43 | local c_dir=$BASE_DIR/oheap-compressed
|
44 | mkdir -p $c_dir
|
45 | for bin in _tmp/oheap/*__oheap; do
|
46 | local name=$(basename $bin)
|
47 | log "Compressing $name"
|
48 | gzip --stdout $bin > $c_dir/$name.gz
|
49 | xz --stdout $bin > $c_dir/$name.xz
|
50 | done
|
51 | }
|
52 |
|
53 | compress-text() {
|
54 | local c_dir=$BASE_DIR/src-compressed
|
55 | mkdir -p $c_dir
|
56 |
|
57 | while read src; do
|
58 | local name=$(basename $src)
|
59 | log "Compressing $name"
|
60 | gzip --stdout $src > $c_dir/${name}__text.gz
|
61 | xz --stdout $src > $c_dir/${name}__text.xz
|
62 | done < benchmarks/osh-parser-files.txt
|
63 | }
|
64 |
|
65 | print-size() {
|
66 | local c1=$1
|
67 | local c2=$2
|
68 | shift 2
|
69 |
|
70 | # depth 0: just the filename itself.
|
71 | find "$@" -maxdepth 0 -printf "%s,$c1,$c2,%p\n"
|
72 | }
|
73 |
|
74 | print-csv() {
|
75 | echo 'num_bytes,format,compression,path'
|
76 | # TODO
|
77 | print-size text none benchmarks/testdata/*
|
78 | print-size text gz $BASE_DIR/src-compressed/*.gz
|
79 | print-size text xz $BASE_DIR/src-compressed/*.xz
|
80 |
|
81 | print-size oheap none $BASE_DIR/*__oheap
|
82 | print-size oheap gz $BASE_DIR/oheap-compressed/*.gz
|
83 | print-size oheap xz $BASE_DIR/oheap-compressed/*.xz
|
84 | }
|
85 |
|
86 | # This can be done on any host.
|
87 | measure() {
|
88 | encode-all
|
89 | compress-oheap
|
90 | compress-text
|
91 | }
|
92 |
|
93 | stage1() {
|
94 | local out_dir=$BASE_DIR/stage1
|
95 | mkdir -p $out_dir
|
96 | print-csv > $out_dir/sizes.csv
|
97 | }
|
98 |
|
99 | print-report() {
|
100 | local in_dir=$1
|
101 | local base_url='../../web'
|
102 |
|
103 | cat <<EOF
|
104 | <!DOCTYPE html>
|
105 | <html>
|
106 | <head>
|
107 | <title>OHeap Encoding</title>
|
108 | <script type="text/javascript" src="$base_url/table/table-sort.js"></script>
|
109 | <link rel="stylesheet" type="text/css" href="$base_url/table/table-sort.css" />
|
110 | <link rel="stylesheet" type="text/css" href="$base_url/benchmarks.css" />
|
111 |
|
112 | </head>
|
113 | <body>
|
114 | <p id="home-link">
|
115 | <a href="/">oilshell.org</a>
|
116 | </p>
|
117 | <h2>OHeap Encoding</h2>
|
118 |
|
119 | <h3>Encoding Size (KB)</h3>
|
120 |
|
121 | <p>Sizes are in KB (powers of 10), not KiB (powers of 2).</p>
|
122 | EOF
|
123 | csv2html $in_dir/encoding_size.csv
|
124 |
|
125 | cat <<EOF
|
126 | <h3>Encoding Ratios</h3>
|
127 | EOF
|
128 | csv2html $in_dir/encoding_ratios.csv
|
129 |
|
130 | cat <<EOF
|
131 | </body>
|
132 | </html>
|
133 | EOF
|
134 | }
|
135 |
|
136 |
|
137 | # TODO: instead of running osh_demo, we should generate a C++ program that
|
138 | # visits every node and counts it. The output might look like:
|
139 | #
|
140 | # - It can also print out the depth of the tree.
|
141 | # - Summary: number of different types used
|
142 | # - another option: decode/validate utf-8. See Visitor Use Cases.
|
143 | #
|
144 | # # 500 instances
|
145 | # line_span = (...)
|
146 | # # 455 instances
|
147 | # token = (
|
148 | # id id,
|
149 | # string val, # lengths: min 0, max 20, avg 30
|
150 | # int? span_id,
|
151 | # )
|
152 | #
|
153 | # command =
|
154 | # # 20 instances
|
155 | # NoOp
|
156 | # -- TODO: respect order
|
157 | # # 20 instances
|
158 | # | SimpleCommand(
|
159 | # word* words, # min length: 0, max: 10, mean: 3.3 ?
|
160 | # redir* redirects, # min length 0, max: 2, mean: 4.4
|
161 | # env_pair* more_env)
|
162 | # | Sentence(command child, token terminator)
|
163 | #
|
164 | # This might help with encoding things inline?
|
165 | # You will definitely need to append to ASDL arrays. I don't think you'll need
|
166 | # to append to strings. But you might want to store strings inline with
|
167 | # structs.
|
168 | # I guess it wouldn't hurt to print out a table of EVERY node an array, along
|
169 | # with the type.
|
170 | # parent_type,field_name,type,subtype,length
|
171 | # token,val,Str,-,5
|
172 | # SimpleCommand,redirects,Array,redirect,10
|
173 | #
|
174 | # This lets you figure out what the common types are, as well as the common
|
175 | # lengths.
|
176 |
|
177 | decode-all() {
|
178 | for bin in _tmp/oheap/*__oheap; do
|
179 | echo $bin
|
180 | time _tmp/osh_demo $bin | wc -l
|
181 | done
|
182 | }
|
183 |
|
184 | "$@"
|