OILS / benchmarks / pytrace.sh View on Github | oilshell.org

184 lines, 66 significant
1#!/usr/bin/env bash
2#
3# Use sys.setprofile() and maybe sys.settrace() to trace Oil execution.
4#
5# Problem: Python callbacks for sys.setprofile() are too slow I think.
6#
7# Usage:
8# ./pytrace.sh <function name>
9
10set -o nounset
11set -o pipefail
12set -o errexit
13
14export PYTHONPATH='.:vendor'
15
16readonly BIGGEST=benchmarks/testdata/configure-coreutils
17readonly GIT_COMPLETION=testdata/completion/git
18readonly OSH_COMPLETION=../bash-completion/osh_completion
19readonly ABUILD=benchmarks/testdata/abuild
20
21readonly -a RUN_ABUILD=(bin/oil.py osh $ABUILD -h)
22# Slightly faster but not significantly.
23#readonly -a RUN_ABUILD=(_bin/osh $ABUILD -h)
24readonly -a OSH_PARSE=(bin/oil.py osh --ast-format none -n)
25
26#
27# Use Python's cProfile, which uses _lsprof. This is pretty fast.
28#
29
30time-bash-run-abuild() { time bash $ABUILD -h; }
31
32# Old: ~2.7 seconds (no tracing)
33# 2017/11/27, After ASDL optimization: 0.72 seconds.
34time-run-abuild() { time "${RUN_ABUILD[@]}"; }
35
36# ~250 ms
37time-parse-abuild() { time "${OSH_PARSE[@]}" $ABUILD; }
38
39# ~160 ms
40time-parse-git-completion() { time "${OSH_PARSE[@]}" $GIT_COMPLETION; }
41# ~150 ms
42time-parse-osh-completion() { time "${OSH_PARSE[@]}" $OSH_COMPLETION; }
43
44# 4.3 seconds on lisa
45time-parse-biggest() { time "${OSH_PARSE[@]}" $BIGGEST; }
46
47_cprofile() {
48 local out=$1
49 shift
50 time python -m cProfile -o $out "$@"
51}
52
53# Takes about 380 ms.
54cprofile-osh-parse() {
55 local in=${1:-$ABUILD}
56 local out=${2:-abuild.cprofile}
57 _cprofile $out "${OSH_PARSE[@]}" $in
58 ls -l $out
59}
60
61cprofile-parse-abuild() {
62 cprofile-osh-parse $ABUILD _tmp/abuild.cprofile
63}
64cprofile-parse-biggest() {
65 cprofile-osh-parse $BIGGEST _tmp/biggest.cprofile
66}
67cprofile-run-abuild() {
68 _cprofile _tmp/abuild-run.cprofile "${RUN_ABUILD[@]}"
69}
70
71# TODO: Why doesn't this run correctly? The results are different. Maybe run
72# spec tests with bin/osh-cprofile and see where it goes wrong?
73readonly pydir=~/src/languages/Python-2.7.15
74cprofile-pyconfigure() {
75 readonly REPO_ROOT=$PWD
76
77 cd $pydir
78
79 PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor \
80 time python -m cProfile -o pyconfigure.cprofile \
81 $REPO_ROOT/bin/oil.py osh myconfigure
82 #_cprofile pyconfigure.cprofile \
83}
84print-pyconfigure() { print-cprofile $pydir/pyconfigure.cprofile; }
85
86# TODO: Try uftrace? I guess you can compare wait4() call duration with bash
87# vs. osh?
88strace-run-abuild() {
89 #local filter='read,wait4'
90 local filter='execve,wait4'
91 time strace -ff -e "$filter" "${RUN_ABUILD[@]}"
92 #time strace -c "${RUN_ABUILD[@]}"
93}
94
95# Yeah I understand from this why Chrome Tracing / Flame Graphs are better.
96# This format doesn't respect the stack!
97# cumtime: bin/oil.py is the top, obviously
98print-cprofile() {
99 local profile=${1:-_tmp/abuild.cprofile}
100 python -c '
101import pstats
102import sys
103p = pstats.Stats(sys.argv[1])
104p.sort_stats("tottime").print_stats()
105' $profile
106}
107
108#
109# My Own Tracing with pytrace.py. Too slow!
110#
111
112
113# Abuild call/return events:
114# Parsing: 4,345,706 events
115# Execution: 530,924 events
116
117# Total events:
118# 14,918,308
119# Actually that is still doable as binary. Not sure it's viewable in Chrome
120# though.
121# 14 bytes * 14.9M is 209 MB.
122
123abuild-trace() {
124 _PY_TRACE=abuild.pytrace time "${PARSE_ABUILD[@]}"
125}
126
127#
128# Depends on pytracing, which is also too slow.
129#
130
131# Trace a parsing function
132parse() {
133 #local script=$ABUILD
134 local script=$0
135 time bin/oil.py osh --ast-format none -n $script >/dev/null
136}
137
138# Trace the execution
139execute() {
140 #local script=$ABUILD
141 local script=$0
142 #time bin/oil.py osh -c 'echo hi'
143 time bin/oil.py osh $0
144
145 ls -l -h *.json
146}
147
148# Idea: I Want a flame graph based on determistic data! That way you get the
149# full stack trace.
150
151# It wasn't happening in the python-flamegraph stuff for some reason. Not sure
152# why. I think it is because I/O was exaggerated.
153#
154# Interpreter hook:
155#
156# for thread_id, frame in sys._current_frames().items():
157# if thread_id == my_thread:
158# continue
159
160# Note that opening file descriptors can cause bugs! I have to open it above
161# descriptor 10!
162
163# python-flamegraph
164# - suffers from measurement error due to threads.
165# - is RunCommandSub is being inflated?
166# - well actually i'm not sure. I have to do it myself on a single thread
167# and see.
168# pytracing:
169# - the format is too bloated. It can't handle abuild -h. So I have to
170# optimize it.
171#
172# I want to unify these two approaches: both flame graphs and function traces.
173#
174# Advantage: sys.setprofile() gets C function call events!
175#
176# Reservoir Sampling! Get a list of all unique stacks.
177#
178# You can figure out the stack from the current/call/return sequence. So you
179# can use the reservoir sampling algorithm to get say 100,000 random stacks out
180# of 14 M events.
181#
182# sys.getframes()
183
184"$@"