benchmarks/pytrace.sh

OILS / benchmarks / pytrace.sh View on Github | oilshell.org

184 lines, 66 significant

1	#!/usr/bin/env bash
2	#
3	# Use sys.setprofile() and maybe sys.settrace() to trace Oil execution.
4	#
5	# Problem: Python callbacks for sys.setprofile() are too slow I think.
6	#
7	# Usage:
8	# ./pytrace.sh <function name>
9
10	set -o nounset
11	set -o pipefail
12	set -o errexit
13
14	export PYTHONPATH='.:vendor'
15
16	readonly BIGGEST=benchmarks/testdata/configure-coreutils
17	readonly GIT_COMPLETION=testdata/completion/git
18	readonly OSH_COMPLETION=../bash-completion/osh_completion
19	readonly ABUILD=benchmarks/testdata/abuild
20
21	readonly -a RUN_ABUILD=(bin/oil.py osh $ABUILD -h)
22	# Slightly faster but not significantly.
23	#readonly -a RUN_ABUILD=(_bin/osh $ABUILD -h)
24	readonly -a OSH_PARSE=(bin/oil.py osh --ast-format none -n)
25
26	#
27	# Use Python's cProfile, which uses _lsprof. This is pretty fast.
28	#
29
30	time-bash-run-abuild() { time bash $ABUILD -h; }
31
32	# Old: ~2.7 seconds (no tracing)
33	# 2017/11/27, After ASDL optimization: 0.72 seconds.
34	time-run-abuild() { time "${RUN_ABUILD[@]}"; }
35
36	# ~250 ms
37	time-parse-abuild() { time "${OSH_PARSE[@]}" $ABUILD; }
38
39	# ~160 ms
40	time-parse-git-completion() { time "${OSH_PARSE[@]}" $GIT_COMPLETION; }
41	# ~150 ms
42	time-parse-osh-completion() { time "${OSH_PARSE[@]}" $OSH_COMPLETION; }
43
44	# 4.3 seconds on lisa
45	time-parse-biggest() { time "${OSH_PARSE[@]}" $BIGGEST; }
46
47	_cprofile() {
48	local out=$1
49	shift
50	time python -m cProfile -o $out "$@"
51	}
52
53	# Takes about 380 ms.
54	cprofile-osh-parse() {
55	local in=${1:-$ABUILD}
56	local out=${2:-abuild.cprofile}
57	_cprofile $out "${OSH_PARSE[@]}" $in
58	ls -l $out
59	}
60
61	cprofile-parse-abuild() {
62	cprofile-osh-parse $ABUILD _tmp/abuild.cprofile
63	}
64	cprofile-parse-biggest() {
65	cprofile-osh-parse $BIGGEST _tmp/biggest.cprofile
66	}
67	cprofile-run-abuild() {
68	_cprofile _tmp/abuild-run.cprofile "${RUN_ABUILD[@]}"
69	}
70
71	# TODO: Why doesn't this run correctly? The results are different. Maybe run
72	# spec tests with bin/osh-cprofile and see where it goes wrong?
73	readonly pydir=~/src/languages/Python-2.7.15
74	cprofile-pyconfigure() {
75	readonly REPO_ROOT=$PWD
76
77	cd $pydir
78
79	PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor \
80	time python -m cProfile -o pyconfigure.cprofile \
81	$REPO_ROOT/bin/oil.py osh myconfigure
82	#_cprofile pyconfigure.cprofile \
83	}
84	print-pyconfigure() { print-cprofile $pydir/pyconfigure.cprofile; }
85
86	# TODO: Try uftrace? I guess you can compare wait4() call duration with bash
87	# vs. osh?
88	strace-run-abuild() {
89	#local filter='read,wait4'
90	local filter='execve,wait4'
91	time strace -ff -e "$filter" "${RUN_ABUILD[@]}"
92	#time strace -c "${RUN_ABUILD[@]}"
93	}
94
95	# Yeah I understand from this why Chrome Tracing / Flame Graphs are better.
96	# This format doesn't respect the stack!
97	# cumtime: bin/oil.py is the top, obviously
98	print-cprofile() {
99	local profile=${1:-_tmp/abuild.cprofile}
100	python -c '
101	import pstats
102	import sys
103	p = pstats.Stats(sys.argv[1])
104	p.sort_stats("tottime").print_stats()
105	' $profile
106	}
107
108	#
109	# My Own Tracing with pytrace.py. Too slow!
110	#
111
112
113	# Abuild call/return events:
114	# Parsing: 4,345,706 events
115	# Execution: 530,924 events
116
117	# Total events:
118	# 14,918,308
119	# Actually that is still doable as binary. Not sure it's viewable in Chrome
120	# though.
121	# 14 bytes * 14.9M is 209 MB.
122
123	abuild-trace() {
124	_PY_TRACE=abuild.pytrace time "${PARSE_ABUILD[@]}"
125	}
126
127	#
128	# Depends on pytracing, which is also too slow.
129	#
130
131	# Trace a parsing function
132	parse() {
133	#local script=$ABUILD
134	local script=$0
135	time bin/oil.py osh --ast-format none -n $script >/dev/null
136	}
137
138	# Trace the execution
139	execute() {
140	#local script=$ABUILD
141	local script=$0
142	#time bin/oil.py osh -c 'echo hi'
143	time bin/oil.py osh $0
144
145	ls -l -h *.json
146	}
147
148	# Idea: I Want a flame graph based on determistic data! That way you get the
149	# full stack trace.
150
151	# It wasn't happening in the python-flamegraph stuff for some reason. Not sure
152	# why. I think it is because I/O was exaggerated.
153	#
154	# Interpreter hook:
155	#
156	# for thread_id, frame in sys._current_frames().items():
157	# if thread_id == my_thread:
158	# continue
159
160	# Note that opening file descriptors can cause bugs! I have to open it above
161	# descriptor 10!
162
163	# python-flamegraph
164	# - suffers from measurement error due to threads.
165	# - is RunCommandSub is being inflated?
166	# - well actually i'm not sure. I have to do it myself on a single thread
167	# and see.
168	# pytracing:
169	# - the format is too bloated. It can't handle abuild -h. So I have to
170	# optimize it.
171	#
172	# I want to unify these two approaches: both flame graphs and function traces.
173	#
174	# Advantage: sys.setprofile() gets C function call events!
175	#
176	# Reservoir Sampling! Get a list of all unique stacks.
177	#
178	# You can figure out the stack from the current/call/return sequence. So you
179	# can use the reservoir sampling algorithm to get say 100,000 random stacks out
180	# of 14 M events.
181	#
182	# sys.getframes()
183
184	"$@"