| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Test the size of file, encoding, and decoding speed.
 | 
| 4 | #
 | 
| 5 | # Usage:
 | 
| 6 | #   ./oheap.sh <function name>
 | 
| 7 | 
 | 
| 8 | set -o nounset
 | 
| 9 | set -o pipefail
 | 
| 10 | set -o errexit
 | 
| 11 | 
 | 
| 12 | source test/common.sh
 | 
| 13 | source benchmarks/common.sh
 | 
| 14 | 
 | 
| 15 | readonly BASE_DIR=_tmp/oheap
 | 
| 16 | 
 | 
| 17 | encode-one() {
 | 
| 18 |   local script=$1
 | 
| 19 |   local oheap_out=$2
 | 
| 20 |   $OSH_OVM -n --ast-format oheap "$script" > $oheap_out
 | 
| 21 | }
 | 
| 22 | 
 | 
| 23 | task-spec() {
 | 
| 24 |   while read path; do
 | 
| 25 |     echo "$path _tmp/oheap/$(basename $path)__oheap"
 | 
| 26 |   done < benchmarks/osh-parser-files.txt 
 | 
| 27 | }
 | 
| 28 | 
 | 
| 29 | encode-all() {
 | 
| 30 |   mkdir -p _tmp/oheap
 | 
| 31 | 
 | 
| 32 |   local times_csv=_tmp/oheap/times.csv
 | 
| 33 |   echo 'status,elapsed_secs' > $times_csv
 | 
| 34 | 
 | 
| 35 |   task-spec | xargs -n 2 --verbose -- \
 | 
| 36 |     benchmarks/time.py --output $times_csv -- \
 | 
| 37 |     $0 encode-one
 | 
| 38 | }
 | 
| 39 | 
 | 
| 40 | # Out of curiosity, compress oheap and originals.
 | 
| 41 | 
 | 
| 42 | compress-oheap() {
 | 
| 43 |   local c_dir=$BASE_DIR/oheap-compressed
 | 
| 44 |   mkdir -p $c_dir
 | 
| 45 |   for bin in _tmp/oheap/*__oheap; do
 | 
| 46 |     local name=$(basename $bin)
 | 
| 47 |     log "Compressing $name"
 | 
| 48 |     gzip --stdout $bin > $c_dir/$name.gz
 | 
| 49 |     xz --stdout $bin > $c_dir/$name.xz
 | 
| 50 |   done
 | 
| 51 | }
 | 
| 52 | 
 | 
| 53 | compress-text() {
 | 
| 54 |   local c_dir=$BASE_DIR/src-compressed
 | 
| 55 |   mkdir -p $c_dir
 | 
| 56 | 
 | 
| 57 |   while read src; do
 | 
| 58 |     local name=$(basename $src)
 | 
| 59 |     log "Compressing $name"
 | 
| 60 |     gzip --stdout $src > $c_dir/${name}__text.gz
 | 
| 61 |     xz --stdout $src > $c_dir/${name}__text.xz
 | 
| 62 |   done < benchmarks/osh-parser-files.txt 
 | 
| 63 | }
 | 
| 64 | 
 | 
| 65 | print-size() {
 | 
| 66 |   local c1=$1
 | 
| 67 |   local c2=$2
 | 
| 68 |   shift 2
 | 
| 69 | 
 | 
| 70 |   # depth 0: just the filename itself.
 | 
| 71 |   find "$@" -maxdepth 0 -printf "%s,$c1,$c2,%p\n"
 | 
| 72 | }
 | 
| 73 | 
 | 
| 74 | print-csv() {
 | 
| 75 |   echo 'num_bytes,format,compression,path'
 | 
| 76 |   # TODO
 | 
| 77 |   print-size text none benchmarks/testdata/*
 | 
| 78 |   print-size text gz $BASE_DIR/src-compressed/*.gz
 | 
| 79 |   print-size text xz $BASE_DIR/src-compressed/*.xz
 | 
| 80 | 
 | 
| 81 |   print-size oheap none $BASE_DIR/*__oheap
 | 
| 82 |   print-size oheap gz $BASE_DIR/oheap-compressed/*.gz
 | 
| 83 |   print-size oheap xz $BASE_DIR/oheap-compressed/*.xz 
 | 
| 84 | }
 | 
| 85 | 
 | 
| 86 | # This can be done on any host.
 | 
| 87 | measure() {
 | 
| 88 |   encode-all
 | 
| 89 |   compress-oheap
 | 
| 90 |   compress-text
 | 
| 91 | }
 | 
| 92 | 
 | 
| 93 | stage1() {
 | 
| 94 |   local out_dir=$BASE_DIR/stage1
 | 
| 95 |   mkdir -p $out_dir
 | 
| 96 |   print-csv > $out_dir/sizes.csv
 | 
| 97 | }
 | 
| 98 | 
 | 
| 99 | print-report() {
 | 
| 100 |   local in_dir=$1
 | 
| 101 |   local base_url='../../web'
 | 
| 102 | 
 | 
| 103 |   cat <<EOF
 | 
| 104 | <!DOCTYPE html>
 | 
| 105 | <html>
 | 
| 106 |   <head>
 | 
| 107 |     <title>OHeap Encoding</title>
 | 
| 108 |     <script type="text/javascript" src="$base_url/table/table-sort.js"></script>
 | 
| 109 |     <link rel="stylesheet" type="text/css" href="$base_url/table/table-sort.css" />
 | 
| 110 |     <link rel="stylesheet" type="text/css" href="$base_url/benchmarks.css" />
 | 
| 111 | 
 | 
| 112 |   </head>
 | 
| 113 |   <body>
 | 
| 114 |     <p id="home-link">
 | 
| 115 |       <a href="/">oilshell.org</a>
 | 
| 116 |     </p>
 | 
| 117 |     <h2>OHeap Encoding</h2>
 | 
| 118 | 
 | 
| 119 |     <h3>Encoding Size (KB)</h3>
 | 
| 120 | 
 | 
| 121 |     <p>Sizes are in KB (powers of 10), not KiB (powers of 2).</p>
 | 
| 122 | EOF
 | 
| 123 |   csv2html $in_dir/encoding_size.csv
 | 
| 124 | 
 | 
| 125 |   cat <<EOF
 | 
| 126 |     <h3>Encoding Ratios</h3>
 | 
| 127 | EOF
 | 
| 128 |   csv2html $in_dir/encoding_ratios.csv
 | 
| 129 | 
 | 
| 130 |   cat <<EOF
 | 
| 131 |   </body>
 | 
| 132 | </html>
 | 
| 133 | EOF
 | 
| 134 | }
 | 
| 135 | 
 | 
| 136 | 
 | 
| 137 | # TODO: instead of running osh_demo, we should generate a C++ program that
 | 
| 138 | # visits every node and counts it.  The output might look like:
 | 
| 139 | #
 | 
| 140 | # - It can also print out the depth of the tree.
 | 
| 141 | # - Summary: number of different types used
 | 
| 142 | # - another option: decode/validate utf-8.  See Visitor Use Cases.
 | 
| 143 | # 
 | 
| 144 | # # 500 instances
 | 
| 145 | # line_span = (...)
 | 
| 146 | # # 455 instances
 | 
| 147 | # token = (
 | 
| 148 | #  id id,
 | 
| 149 | #  string val,    # lengths: min 0, max 20, avg 30
 | 
| 150 | #  int? span_id,
 | 
| 151 | # )
 | 
| 152 | #
 | 
| 153 | #  command = 
 | 
| 154 | #    # 20 instances
 | 
| 155 | #    NoOp   
 | 
| 156 | #    -- TODO: respect order 
 | 
| 157 | #    # 20 instances
 | 
| 158 | #  | SimpleCommand(
 | 
| 159 | #      word* words,        # min length: 0, max: 10, mean: 3.3 ?
 | 
| 160 | #      redir* redirects,   # min length 0, max: 2, mean: 4.4
 | 
| 161 | #      env_pair* more_env)
 | 
| 162 | #  | Sentence(command child, token terminator)
 | 
| 163 | #
 | 
| 164 | # This might help with encoding things inline?
 | 
| 165 | # You will definitely need to append to ASDL arrays.  I don't think you'll need
 | 
| 166 | # to append to strings.  But you might want to store strings inline with
 | 
| 167 | # structs.
 | 
| 168 | # I guess it wouldn't hurt to print out a table of EVERY node an array, along
 | 
| 169 | # with the type.
 | 
| 170 | # parent_type,field_name,type,subtype,length
 | 
| 171 | # token,val,Str,-,5
 | 
| 172 | # SimpleCommand,redirects,Array,redirect,10
 | 
| 173 | #
 | 
| 174 | # This lets you figure out what the common types are, as well as the common
 | 
| 175 | # lengths.
 | 
| 176 | 
 | 
| 177 | decode-all() {
 | 
| 178 |   for bin in _tmp/oheap/*__oheap; do
 | 
| 179 |     echo $bin
 | 
| 180 |     time _tmp/osh_demo $bin | wc -l
 | 
| 181 |   done
 | 
| 182 | }
 | 
| 183 | 
 | 
| 184 | "$@"
 |