| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Use sys.setprofile() and maybe sys.settrace() to trace Oil execution.
|
| 4 | #
|
| 5 | # Problem: Python callbacks for sys.setprofile() are too slow I think.
|
| 6 | #
|
| 7 | # Usage:
|
| 8 | # ./pytrace.sh <function name>
|
| 9 |
|
| 10 | set -o nounset
|
| 11 | set -o pipefail
|
| 12 | set -o errexit
|
| 13 |
|
| 14 | export PYTHONPATH='.:vendor'
|
| 15 |
|
| 16 | readonly BIGGEST=benchmarks/testdata/configure-coreutils
|
| 17 | readonly GIT_COMPLETION=testdata/completion/git
|
| 18 | readonly OSH_COMPLETION=../bash-completion/osh_completion
|
| 19 | readonly ABUILD=benchmarks/testdata/abuild
|
| 20 |
|
| 21 | readonly -a RUN_ABUILD=(bin/oil.py osh $ABUILD -h)
|
| 22 | # Slightly faster but not significantly.
|
| 23 | #readonly -a RUN_ABUILD=(_bin/osh $ABUILD -h)
|
| 24 | readonly -a OSH_PARSE=(bin/oil.py osh --ast-format none -n)
|
| 25 |
|
| 26 | #
|
| 27 | # Use Python's cProfile, which uses _lsprof. This is pretty fast.
|
| 28 | #
|
| 29 |
|
| 30 | time-bash-run-abuild() { time bash $ABUILD -h; }
|
| 31 |
|
| 32 | # Old: ~2.7 seconds (no tracing)
|
| 33 | # 2017/11/27, After ASDL optimization: 0.72 seconds.
|
| 34 | time-run-abuild() { time "${RUN_ABUILD[@]}"; }
|
| 35 |
|
| 36 | # ~250 ms
|
| 37 | time-parse-abuild() { time "${OSH_PARSE[@]}" $ABUILD; }
|
| 38 |
|
| 39 | # ~160 ms
|
| 40 | time-parse-git-completion() { time "${OSH_PARSE[@]}" $GIT_COMPLETION; }
|
| 41 | # ~150 ms
|
| 42 | time-parse-osh-completion() { time "${OSH_PARSE[@]}" $OSH_COMPLETION; }
|
| 43 |
|
| 44 | # 4.3 seconds on lisa
|
| 45 | time-parse-biggest() { time "${OSH_PARSE[@]}" $BIGGEST; }
|
| 46 |
|
| 47 | _cprofile() {
|
| 48 | local out=$1
|
| 49 | shift
|
| 50 | time python -m cProfile -o $out "$@"
|
| 51 | }
|
| 52 |
|
| 53 | # Takes about 380 ms.
|
| 54 | cprofile-osh-parse() {
|
| 55 | local in=${1:-$ABUILD}
|
| 56 | local out=${2:-abuild.cprofile}
|
| 57 | _cprofile $out "${OSH_PARSE[@]}" $in
|
| 58 | ls -l $out
|
| 59 | }
|
| 60 |
|
| 61 | cprofile-parse-abuild() {
|
| 62 | cprofile-osh-parse $ABUILD _tmp/abuild.cprofile
|
| 63 | }
|
| 64 | cprofile-parse-biggest() {
|
| 65 | cprofile-osh-parse $BIGGEST _tmp/biggest.cprofile
|
| 66 | }
|
| 67 | cprofile-run-abuild() {
|
| 68 | _cprofile _tmp/abuild-run.cprofile "${RUN_ABUILD[@]}"
|
| 69 | }
|
| 70 |
|
| 71 | # TODO: Why doesn't this run correctly? The results are different. Maybe run
|
| 72 | # spec tests with bin/osh-cprofile and see where it goes wrong?
|
| 73 | readonly pydir=~/src/languages/Python-2.7.15
|
| 74 | cprofile-pyconfigure() {
|
| 75 | readonly REPO_ROOT=$PWD
|
| 76 |
|
| 77 | cd $pydir
|
| 78 |
|
| 79 | PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor \
|
| 80 | time python -m cProfile -o pyconfigure.cprofile \
|
| 81 | $REPO_ROOT/bin/oil.py osh myconfigure
|
| 82 | #_cprofile pyconfigure.cprofile \
|
| 83 | }
|
| 84 | print-pyconfigure() { print-cprofile $pydir/pyconfigure.cprofile; }
|
| 85 |
|
| 86 | # TODO: Try uftrace? I guess you can compare wait4() call duration with bash
|
| 87 | # vs. osh?
|
| 88 | strace-run-abuild() {
|
| 89 | #local filter='read,wait4'
|
| 90 | local filter='execve,wait4'
|
| 91 | time strace -ff -e "$filter" "${RUN_ABUILD[@]}"
|
| 92 | #time strace -c "${RUN_ABUILD[@]}"
|
| 93 | }
|
| 94 |
|
| 95 | # Yeah I understand from this why Chrome Tracing / Flame Graphs are better.
|
| 96 | # This format doesn't respect the stack!
|
| 97 | # cumtime: bin/oil.py is the top, obviously
|
| 98 | print-cprofile() {
|
| 99 | local profile=${1:-_tmp/abuild.cprofile}
|
| 100 | python -c '
|
| 101 | import pstats
|
| 102 | import sys
|
| 103 | p = pstats.Stats(sys.argv[1])
|
| 104 | p.sort_stats("tottime").print_stats()
|
| 105 | ' $profile
|
| 106 | }
|
| 107 |
|
| 108 | #
|
| 109 | # My Own Tracing with pytrace.py. Too slow!
|
| 110 | #
|
| 111 |
|
| 112 |
|
| 113 | # Abuild call/return events:
|
| 114 | # Parsing: 4,345,706 events
|
| 115 | # Execution: 530,924 events
|
| 116 |
|
| 117 | # Total events:
|
| 118 | # 14,918,308
|
| 119 | # Actually that is still doable as binary. Not sure it's viewable in Chrome
|
| 120 | # though.
|
| 121 | # 14 bytes * 14.9M is 209 MB.
|
| 122 |
|
| 123 | abuild-trace() {
|
| 124 | _PY_TRACE=abuild.pytrace time "${PARSE_ABUILD[@]}"
|
| 125 | }
|
| 126 |
|
| 127 | #
|
| 128 | # Depends on pytracing, which is also too slow.
|
| 129 | #
|
| 130 |
|
| 131 | # Trace a parsing function
|
| 132 | parse() {
|
| 133 | #local script=$ABUILD
|
| 134 | local script=$0
|
| 135 | time bin/oil.py osh --ast-format none -n $script >/dev/null
|
| 136 | }
|
| 137 |
|
| 138 | # Trace the execution
|
| 139 | execute() {
|
| 140 | #local script=$ABUILD
|
| 141 | local script=$0
|
| 142 | #time bin/oil.py osh -c 'echo hi'
|
| 143 | time bin/oil.py osh $0
|
| 144 |
|
| 145 | ls -l -h *.json
|
| 146 | }
|
| 147 |
|
| 148 | # Idea: I Want a flame graph based on determistic data! That way you get the
|
| 149 | # full stack trace.
|
| 150 |
|
| 151 | # It wasn't happening in the python-flamegraph stuff for some reason. Not sure
|
| 152 | # why. I think it is because I/O was exaggerated.
|
| 153 | #
|
| 154 | # Interpreter hook:
|
| 155 | #
|
| 156 | # for thread_id, frame in sys._current_frames().items():
|
| 157 | # if thread_id == my_thread:
|
| 158 | # continue
|
| 159 |
|
| 160 | # Note that opening file descriptors can cause bugs! I have to open it above
|
| 161 | # descriptor 10!
|
| 162 |
|
| 163 | # python-flamegraph
|
| 164 | # - suffers from measurement error due to threads.
|
| 165 | # - is RunCommandSub is being inflated?
|
| 166 | # - well actually i'm not sure. I have to do it myself on a single thread
|
| 167 | # and see.
|
| 168 | # pytracing:
|
| 169 | # - the format is too bloated. It can't handle abuild -h. So I have to
|
| 170 | # optimize it.
|
| 171 | #
|
| 172 | # I want to unify these two approaches: both flame graphs and function traces.
|
| 173 | #
|
| 174 | # Advantage: sys.setprofile() gets C function call events!
|
| 175 | #
|
| 176 | # Reservoir Sampling! Get a list of all unique stacks.
|
| 177 | #
|
| 178 | # You can figure out the stack from the current/call/return sequence. So you
|
| 179 | # can use the reservoir sampling algorithm to get say 100,000 random stacks out
|
| 180 | # of 14 M events.
|
| 181 | #
|
| 182 | # sys.getframes()
|
| 183 |
|
| 184 | "$@"
|