| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Use sys.setprofile() and maybe sys.settrace() to trace Oil execution.
 | 
| 4 | #
 | 
| 5 | # Problem: Python callbacks for sys.setprofile() are too slow I think.
 | 
| 6 | #
 | 
| 7 | # Usage:
 | 
| 8 | #   ./pytrace.sh <function name>
 | 
| 9 | 
 | 
| 10 | set -o nounset
 | 
| 11 | set -o pipefail
 | 
| 12 | set -o errexit
 | 
| 13 | 
 | 
| 14 | export PYTHONPATH='.:vendor'
 | 
| 15 | 
 | 
| 16 | readonly BIGGEST=benchmarks/testdata/configure-coreutils
 | 
| 17 | readonly GIT_COMPLETION=testdata/completion/git
 | 
| 18 | readonly OSH_COMPLETION=../bash-completion/osh_completion
 | 
| 19 | readonly ABUILD=benchmarks/testdata/abuild
 | 
| 20 | 
 | 
| 21 | readonly -a RUN_ABUILD=(bin/oil.py osh $ABUILD -h)
 | 
| 22 | # Slightly faster but not significantly.
 | 
| 23 | #readonly -a RUN_ABUILD=(_bin/osh $ABUILD -h)
 | 
| 24 | readonly -a OSH_PARSE=(bin/oil.py osh --ast-format none -n)
 | 
| 25 | 
 | 
| 26 | #
 | 
| 27 | # Use Python's cProfile, which uses _lsprof.  This is pretty fast.
 | 
| 28 | #
 | 
| 29 | 
 | 
| 30 | time-bash-run-abuild() { time bash $ABUILD -h; }
 | 
| 31 | 
 | 
| 32 | # Old: ~2.7 seconds (no tracing)
 | 
| 33 | # 2017/11/27, After ASDL optimization: 0.72 seconds.
 | 
| 34 | time-run-abuild() { time "${RUN_ABUILD[@]}"; }
 | 
| 35 | 
 | 
| 36 | # ~250 ms
 | 
| 37 | time-parse-abuild() { time "${OSH_PARSE[@]}" $ABUILD; }
 | 
| 38 | 
 | 
| 39 | # ~160 ms
 | 
| 40 | time-parse-git-completion() { time "${OSH_PARSE[@]}" $GIT_COMPLETION; }
 | 
| 41 | # ~150 ms
 | 
| 42 | time-parse-osh-completion() { time "${OSH_PARSE[@]}" $OSH_COMPLETION; }
 | 
| 43 | 
 | 
| 44 | # 4.3 seconds on lisa
 | 
| 45 | time-parse-biggest() { time "${OSH_PARSE[@]}" $BIGGEST; }
 | 
| 46 | 
 | 
| 47 | _cprofile() {
 | 
| 48 |   local out=$1
 | 
| 49 |   shift
 | 
| 50 |   time python -m cProfile -o $out "$@"
 | 
| 51 | }
 | 
| 52 | 
 | 
| 53 | # Takes about 380 ms.
 | 
| 54 | cprofile-osh-parse() {
 | 
| 55 |   local in=${1:-$ABUILD}
 | 
| 56 |   local out=${2:-abuild.cprofile}
 | 
| 57 |   _cprofile $out "${OSH_PARSE[@]}" $in
 | 
| 58 |   ls -l $out
 | 
| 59 | }
 | 
| 60 | 
 | 
| 61 | cprofile-parse-abuild() {
 | 
| 62 |   cprofile-osh-parse $ABUILD _tmp/abuild.cprofile
 | 
| 63 | }
 | 
| 64 | cprofile-parse-biggest() {
 | 
| 65 |   cprofile-osh-parse $BIGGEST _tmp/biggest.cprofile
 | 
| 66 | }
 | 
| 67 | cprofile-run-abuild() {
 | 
| 68 |   _cprofile _tmp/abuild-run.cprofile "${RUN_ABUILD[@]}"
 | 
| 69 | }
 | 
| 70 | 
 | 
| 71 | # TODO: Why doesn't this run correctly?  The results are different.  Maybe run
 | 
| 72 | # spec tests with bin/osh-cprofile and see where it goes wrong?
 | 
| 73 | readonly pydir=~/src/languages/Python-2.7.15
 | 
| 74 | cprofile-pyconfigure() {
 | 
| 75 |   readonly REPO_ROOT=$PWD
 | 
| 76 | 
 | 
| 77 |   cd $pydir
 | 
| 78 | 
 | 
| 79 |   PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor \
 | 
| 80 |     time python -m cProfile -o pyconfigure.cprofile \
 | 
| 81 |     $REPO_ROOT/bin/oil.py osh myconfigure
 | 
| 82 |     #_cprofile pyconfigure.cprofile \
 | 
| 83 | }
 | 
| 84 | print-pyconfigure() { print-cprofile $pydir/pyconfigure.cprofile; }
 | 
| 85 | 
 | 
| 86 | # TODO: Try uftrace?  I guess you can compare wait4() call duration with bash
 | 
| 87 | # vs. osh?
 | 
| 88 | strace-run-abuild() {
 | 
| 89 |   #local filter='read,wait4' 
 | 
| 90 |   local filter='execve,wait4' 
 | 
| 91 |   time strace -ff -e "$filter" "${RUN_ABUILD[@]}"
 | 
| 92 |   #time strace -c "${RUN_ABUILD[@]}"
 | 
| 93 | }
 | 
| 94 | 
 | 
| 95 | # Yeah I understand from this why Chrome Tracing / Flame Graphs are better.
 | 
| 96 | # This format doesn't respect the stack!
 | 
| 97 | # cumtime: bin/oil.py is the top, obviously
 | 
| 98 | print-cprofile() {
 | 
| 99 |   local profile=${1:-_tmp/abuild.cprofile}
 | 
| 100 |   python -c '
 | 
| 101 | import pstats
 | 
| 102 | import sys
 | 
| 103 | p = pstats.Stats(sys.argv[1])
 | 
| 104 | p.sort_stats("tottime").print_stats()
 | 
| 105 | ' $profile
 | 
| 106 | }
 | 
| 107 | 
 | 
| 108 | #
 | 
| 109 | # My Own Tracing with pytrace.py.  Too slow!
 | 
| 110 | #
 | 
| 111 | 
 | 
| 112 | 
 | 
| 113 | # Abuild call/return events:
 | 
| 114 | # Parsing: 4,345,706 events
 | 
| 115 | # Execution: 530,924 events
 | 
| 116 | 
 | 
| 117 | # Total events:
 | 
| 118 | # 14,918,308
 | 
| 119 | # Actually that is still doable as binary.  Not sure it's viewable in Chrome
 | 
| 120 | # though.
 | 
| 121 | # 14 bytes * 14.9M is 209 MB.
 | 
| 122 | 
 | 
| 123 | abuild-trace() {
 | 
| 124 |   _PY_TRACE=abuild.pytrace time "${PARSE_ABUILD[@]}"
 | 
| 125 | }
 | 
| 126 | 
 | 
| 127 | #
 | 
| 128 | # Depends on pytracing, which is also too slow.
 | 
| 129 | #
 | 
| 130 | 
 | 
| 131 | # Trace a parsing function
 | 
| 132 | parse() {
 | 
| 133 |   #local script=$ABUILD 
 | 
| 134 |   local script=$0
 | 
| 135 |   time bin/oil.py osh --ast-format none -n $script >/dev/null
 | 
| 136 | }
 | 
| 137 | 
 | 
| 138 | # Trace the execution
 | 
| 139 | execute() {
 | 
| 140 |   #local script=$ABUILD 
 | 
| 141 |   local script=$0
 | 
| 142 |   #time bin/oil.py osh -c 'echo hi'
 | 
| 143 |   time bin/oil.py osh $0
 | 
| 144 | 
 | 
| 145 |   ls -l -h *.json
 | 
| 146 | }
 | 
| 147 | 
 | 
| 148 | # Idea: I Want a flame graph based on determistic data!  That way you get the
 | 
| 149 | # full stack trace.
 | 
| 150 | 
 | 
| 151 | # It wasn't happening in the python-flamegraph stuff for some reason.  Not sure
 | 
| 152 | # why.  I think it is because I/O was exaggerated.
 | 
| 153 | # 
 | 
| 154 | # Interpreter hook:
 | 
| 155 | #
 | 
| 156 | # for thread_id, frame in sys._current_frames().items():
 | 
| 157 | #   if thread_id == my_thread:
 | 
| 158 | #     continue
 | 
| 159 | 
 | 
| 160 | # Note that opening file descriptors can cause bugs!  I have to open it above
 | 
| 161 | # descriptor 10!
 | 
| 162 | 
 | 
| 163 | # python-flamegraph
 | 
| 164 | # - suffers from measurement error due to threads.  
 | 
| 165 | # - is RunCommandSub is being inflated?
 | 
| 166 | #    - well actually i'm not sure.  I have to do it myself on a single thread
 | 
| 167 | #    and see.
 | 
| 168 | # pytracing:
 | 
| 169 | # - the format is too bloated.  It can't handle abuild -h.  So I have to
 | 
| 170 | # optimize it.
 | 
| 171 | #
 | 
| 172 | # I want to unify these two approaches: both flame graphs and function traces.
 | 
| 173 | #
 | 
| 174 | # Advantage: sys.setprofile() gets C function call events!
 | 
| 175 | #
 | 
| 176 | # Reservoir Sampling!  Get a list of all unique stacks.
 | 
| 177 | #
 | 
| 178 | # You can figure out the stack from the current/call/return sequence.  So you
 | 
| 179 | # can use the reservoir sampling algorithm to get say 100,000 random stacks out
 | 
| 180 | # of 14 M events.
 | 
| 181 | #
 | 
| 182 | # sys.getframes()
 | 
| 183 | 
 | 
| 184 | "$@"
 |