1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Use sys.setprofile() and maybe sys.settrace() to trace Oil execution.
|
4 | #
|
5 | # Problem: Python callbacks for sys.setprofile() are too slow I think.
|
6 | #
|
7 | # Usage:
|
8 | # ./pytrace.sh <function name>
|
9 |
|
10 | set -o nounset
|
11 | set -o pipefail
|
12 | set -o errexit
|
13 |
|
14 | export PYTHONPATH='.:vendor'
|
15 |
|
16 | readonly BIGGEST=benchmarks/testdata/configure-coreutils
|
17 | readonly GIT_COMPLETION=testdata/completion/git
|
18 | readonly OSH_COMPLETION=../bash-completion/osh_completion
|
19 | readonly ABUILD=benchmarks/testdata/abuild
|
20 |
|
21 | readonly -a RUN_ABUILD=(bin/oil.py osh $ABUILD -h)
|
22 | # Slightly faster but not significantly.
|
23 | #readonly -a RUN_ABUILD=(_bin/osh $ABUILD -h)
|
24 | readonly -a OSH_PARSE=(bin/oil.py osh --ast-format none -n)
|
25 |
|
26 | #
|
27 | # Use Python's cProfile, which uses _lsprof. This is pretty fast.
|
28 | #
|
29 |
|
30 | time-bash-run-abuild() { time bash $ABUILD -h; }
|
31 |
|
32 | # Old: ~2.7 seconds (no tracing)
|
33 | # 2017/11/27, After ASDL optimization: 0.72 seconds.
|
34 | time-run-abuild() { time "${RUN_ABUILD[@]}"; }
|
35 |
|
36 | # ~250 ms
|
37 | time-parse-abuild() { time "${OSH_PARSE[@]}" $ABUILD; }
|
38 |
|
39 | # ~160 ms
|
40 | time-parse-git-completion() { time "${OSH_PARSE[@]}" $GIT_COMPLETION; }
|
41 | # ~150 ms
|
42 | time-parse-osh-completion() { time "${OSH_PARSE[@]}" $OSH_COMPLETION; }
|
43 |
|
44 | # 4.3 seconds on lisa
|
45 | time-parse-biggest() { time "${OSH_PARSE[@]}" $BIGGEST; }
|
46 |
|
47 | _cprofile() {
|
48 | local out=$1
|
49 | shift
|
50 | time python -m cProfile -o $out "$@"
|
51 | }
|
52 |
|
53 | # Takes about 380 ms.
|
54 | cprofile-osh-parse() {
|
55 | local in=${1:-$ABUILD}
|
56 | local out=${2:-abuild.cprofile}
|
57 | _cprofile $out "${OSH_PARSE[@]}" $in
|
58 | ls -l $out
|
59 | }
|
60 |
|
61 | cprofile-parse-abuild() {
|
62 | cprofile-osh-parse $ABUILD _tmp/abuild.cprofile
|
63 | }
|
64 | cprofile-parse-biggest() {
|
65 | cprofile-osh-parse $BIGGEST _tmp/biggest.cprofile
|
66 | }
|
67 | cprofile-run-abuild() {
|
68 | _cprofile _tmp/abuild-run.cprofile "${RUN_ABUILD[@]}"
|
69 | }
|
70 |
|
71 | # TODO: Why doesn't this run correctly? The results are different. Maybe run
|
72 | # spec tests with bin/osh-cprofile and see where it goes wrong?
|
73 | readonly pydir=~/src/languages/Python-2.7.15
|
74 | cprofile-pyconfigure() {
|
75 | readonly REPO_ROOT=$PWD
|
76 |
|
77 | cd $pydir
|
78 |
|
79 | PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor \
|
80 | time python -m cProfile -o pyconfigure.cprofile \
|
81 | $REPO_ROOT/bin/oil.py osh myconfigure
|
82 | #_cprofile pyconfigure.cprofile \
|
83 | }
|
84 | print-pyconfigure() { print-cprofile $pydir/pyconfigure.cprofile; }
|
85 |
|
86 | # TODO: Try uftrace? I guess you can compare wait4() call duration with bash
|
87 | # vs. osh?
|
88 | strace-run-abuild() {
|
89 | #local filter='read,wait4'
|
90 | local filter='execve,wait4'
|
91 | time strace -ff -e "$filter" "${RUN_ABUILD[@]}"
|
92 | #time strace -c "${RUN_ABUILD[@]}"
|
93 | }
|
94 |
|
95 | # Yeah I understand from this why Chrome Tracing / Flame Graphs are better.
|
96 | # This format doesn't respect the stack!
|
97 | # cumtime: bin/oil.py is the top, obviously
|
98 | print-cprofile() {
|
99 | local profile=${1:-_tmp/abuild.cprofile}
|
100 | python -c '
|
101 | import pstats
|
102 | import sys
|
103 | p = pstats.Stats(sys.argv[1])
|
104 | p.sort_stats("tottime").print_stats()
|
105 | ' $profile
|
106 | }
|
107 |
|
108 | #
|
109 | # My Own Tracing with pytrace.py. Too slow!
|
110 | #
|
111 |
|
112 |
|
113 | # Abuild call/return events:
|
114 | # Parsing: 4,345,706 events
|
115 | # Execution: 530,924 events
|
116 |
|
117 | # Total events:
|
118 | # 14,918,308
|
119 | # Actually that is still doable as binary. Not sure it's viewable in Chrome
|
120 | # though.
|
121 | # 14 bytes * 14.9M is 209 MB.
|
122 |
|
123 | abuild-trace() {
|
124 | _PY_TRACE=abuild.pytrace time "${PARSE_ABUILD[@]}"
|
125 | }
|
126 |
|
127 | #
|
128 | # Depends on pytracing, which is also too slow.
|
129 | #
|
130 |
|
131 | # Trace a parsing function
|
132 | parse() {
|
133 | #local script=$ABUILD
|
134 | local script=$0
|
135 | time bin/oil.py osh --ast-format none -n $script >/dev/null
|
136 | }
|
137 |
|
138 | # Trace the execution
|
139 | execute() {
|
140 | #local script=$ABUILD
|
141 | local script=$0
|
142 | #time bin/oil.py osh -c 'echo hi'
|
143 | time bin/oil.py osh $0
|
144 |
|
145 | ls -l -h *.json
|
146 | }
|
147 |
|
148 | # Idea: I Want a flame graph based on determistic data! That way you get the
|
149 | # full stack trace.
|
150 |
|
151 | # It wasn't happening in the python-flamegraph stuff for some reason. Not sure
|
152 | # why. I think it is because I/O was exaggerated.
|
153 | #
|
154 | # Interpreter hook:
|
155 | #
|
156 | # for thread_id, frame in sys._current_frames().items():
|
157 | # if thread_id == my_thread:
|
158 | # continue
|
159 |
|
160 | # Note that opening file descriptors can cause bugs! I have to open it above
|
161 | # descriptor 10!
|
162 |
|
163 | # python-flamegraph
|
164 | # - suffers from measurement error due to threads.
|
165 | # - is RunCommandSub is being inflated?
|
166 | # - well actually i'm not sure. I have to do it myself on a single thread
|
167 | # and see.
|
168 | # pytracing:
|
169 | # - the format is too bloated. It can't handle abuild -h. So I have to
|
170 | # optimize it.
|
171 | #
|
172 | # I want to unify these two approaches: both flame graphs and function traces.
|
173 | #
|
174 | # Advantage: sys.setprofile() gets C function call events!
|
175 | #
|
176 | # Reservoir Sampling! Get a list of all unique stacks.
|
177 | #
|
178 | # You can figure out the stack from the current/call/return sequence. So you
|
179 | # can use the reservoir sampling algorithm to get say 100,000 random stacks out
|
180 | # of 14 M events.
|
181 | #
|
182 | # sys.getframes()
|
183 |
|
184 | "$@"
|