benchmarks/report.R

OILS / benchmarks / report.R View on Github | oilshell.org

1351 lines, 936 significant

1	#!/usr/bin/env Rscript
2	#
3	# benchmarks/report.R -- Analyze data collected by shell scripts.
4	#
5	# Usage:
6	# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8	# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9	# filter, lag
10	# intersect, setdiff, setequal, union
11	library(dplyr, warn.conflicts = FALSE)
12	library(tidyr) # spread()
13	library(stringr)
14
15	source('benchmarks/common.R')
16
17	options(stringsAsFactors = F)
18
19	# For pretty printing
20	commas = function(x) {
21	format(x, big.mark=',')
22	}
23
24	sourceUrl = function(path) {
25	sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26	}
27
28	# Takes a filename, not a path.
29	sourceUrl2 = function(filename) {
30	sprintf(
31	'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32	filename)
33	}
34
35	mycppUrl = function(path) {
36	sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', path)
37	}
38
39
40	# TODO: Set up cgit because Github links are slow.
41	benchmarkDataLink = function(subdir, name, suffix) {
42	#sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
43	sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
44	subdir, name, suffix)
45	}
46
47	provenanceLink = function(subdir, name, suffix) {
48	sprintf('../%s/%s%s', subdir, name, suffix)
49	}
50
51
52	GetOshLabel = function(shell_hash, prov_dir) {
53	### Given a string, return another string.
54
55	path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
56
57	if (file.exists(path)) {
58	Log('Reading %s', path)
59	lines = readLines(path)
60	if (length(grep('_bin/osh', lines)) > 0) {
61	label = 'osh-ovm'
62	} else if (length(grep('bin/osh', lines)) > 0) {
63	label = 'osh-cpython'
64	} else if (length(grep('_bin/.*/osh', lines)) > 0) {
65	label = 'osh-native'
66	} else {
67	stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
68	}
69	} else {
70	stop(sprintf("%s doesn't exist", path))
71	}
72	return(label)
73	}
74
75	opt_suffix1 = '_bin/cxx-opt/osh'
76	opt_suffix2 = '_bin/cxx-opt-sh/osh'
77
78	ShellLabels = function(shell_name, shell_hash, num_hosts) {
79	### Given 2 vectors, return a vector of readable labels.
80
81	# TODO: Clean up callers. Some metrics all this function with a
82	# shell/runtime BASENAME, and others a PATH
83	# - e.g. ComputeReport calls this with runtime_name which is actually a PATH
84
85	#Log('name %s', shell_name)
86	#Log('hash %s', shell_hash)
87
88	if (num_hosts == 1) {
89	prov_dir = '_tmp'
90	} else {
91	prov_dir = '../benchmark-data/'
92	}
93
94	labels = c()
95	for (i in 1:length(shell_name)) {
96	sh = shell_name[i]
97	if (sh == 'osh') {
98	label = GetOshLabel(shell_hash[i], prov_dir)
99
100	} else if (endsWith(sh, opt_suffix1) \|\| endsWith(sh, opt_suffix2)) {
101	label = 'opt/osh'
102
103	} else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
104	label = 'bumpleak/osh'
105
106	} else {
107	label = sh
108	}
109
110	Log('[%s] [%s]', shell_name[i], label)
111	labels = c(labels, label)
112	}
113
114	return(labels)
115	}
116
117	# Simple version of the above, used by benchmarks/gc
118	ShellLabelFromPath = function(sh_path) {
119	labels = c()
120	for (i in 1:length(sh_path)) {
121	sh = sh_path[i]
122
123	if (endsWith(sh, opt_suffix1) \|\| endsWith(sh, opt_suffix2)) {
124	# the opt binary is osh-native
125	label = 'osh-native'
126
127	} else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
128	label = 'bumpleak/osh'
129
130	} else if (endsWith(sh, '_bin/osh')) { # the app bundle
131	label = 'osh-ovm'
132
133	} else if (endsWith(sh, 'bin/osh')) {
134	label = 'osh-cpython'
135
136	} else {
137	label = sh
138	}
139	labels = c(labels, label)
140	}
141	return(labels)
142	}
143
144	DistinctHosts = function(t) {
145	t %>% distinct(host_name, host_hash) -> distinct_hosts
146	# The label is just the name
147	distinct_hosts$host_label = distinct_hosts$host_name
148	return(distinct_hosts)
149	}
150
151	DistinctShells = function(t, num_hosts = -1) {
152	t %>% distinct(shell_name, shell_hash) -> distinct_shells
153
154	Log('')
155	Log('Labeling shells')
156
157	# Calculate it if not passed
158	if (num_hosts == -1) {
159	num_hosts = nrow(DistinctHosts(t))
160	}
161
162	distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
163	distinct_shells$shell_hash,
164	num_hosts)
165	return(distinct_shells)
166	}
167
168	ParserReport = function(in_dir, out_dir) {
169	times = read.csv(file.path(in_dir, 'times.csv'))
170	lines = read.csv(file.path(in_dir, 'lines.csv'))
171	raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
172
173	cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
174
175	# For joining by filename
176	lines_by_filename = tibble(
177	num_lines = lines$num_lines,
178	filename = basename(lines$path)
179	)
180
181	# Remove failures
182	times %>% filter(status == 0) %>% select(-c(status)) -> times
183	cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
184
185	# Add the number of lines, joining on path, and compute lines/ms
186	times %>%
187	left_join(lines, by = c('path')) %>%
188	mutate(filename = basename(path), filename_HREF = sourceUrl(path),
189	max_rss_MB = max_rss_KiB * 1024 / 1e6,
190	elapsed_ms = elapsed_secs * 1000,
191	user_ms = user_secs * 1000,
192	sys_ms = sys_secs * 1000,
193	lines_per_ms = num_lines / elapsed_ms) %>%
194	select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
195	joined_times
196
197	#print(head(times))
198	#print(head(lines))
199	#print(head(vm))
200	#print(head(joined_times))
201
202	print(summary(joined_times))
203
204	#
205	# Find distinct shells and hosts, and label them for readability.
206	#
207
208	distinct_hosts = DistinctHosts(joined_times)
209	Log('')
210	Log('Distinct hosts')
211	print(distinct_hosts)
212
213	distinct_shells = DistinctShells(joined_times)
214	Log('')
215	Log('Distinct shells')
216	print(distinct_shells)
217
218	# Replace name/hash combinations with labels.
219	joined_times %>%
220	left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
221	left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
222	select(-c(host_name, host_hash, shell_name, shell_hash)) ->
223	joined_times
224
225	# Like 'times', but do shell_label as one step
226	# Hack: we know benchmarks/auto.sh runs this on one machine
227	distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
228	cachegrind %>%
229	left_join(lines, by = c('path')) %>%
230	select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
231	left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
232	select(-c(shell_name, shell_hash)) %>%
233	mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
234	select(-c(path)) ->
235	joined_cachegrind
236
237	Log('summary(joined_times):')
238	print(summary(joined_times))
239	Log('head(joined_times):')
240	print(head(joined_times))
241
242	# Summarize rates by platform/shell
243	joined_times %>%
244	mutate(host_label = paste("host", host_label)) %>%
245	group_by(host_label, shell_label) %>%
246	summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
247	mutate(lines_per_ms = total_lines / total_ms) %>%
248	select(-c(total_ms)) %>%
249	spread(key = host_label, value = lines_per_ms) ->
250	times_summary
251
252	# Sort by parsing rate on the fast machine
253	if ("host lenny" %in% colnames(times_summary)) {
254	times_summary %>% arrange(desc(`host lenny`)) -> times_summary
255	} else {
256	times_summary %>% arrange(desc(`host no-host`)) -> times_summary
257	}
258
259	Log('times_summary:')
260	print(times_summary)
261
262	# Summarize cachegrind by platform/shell
263	# Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
264	joined_cachegrind %>%
265	group_by(shell_label) %>%
266	summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
267	mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
268	select(-c(total_irefs)) ->
269	cachegrind_summary
270
271	if ("no-host" %in% distinct_hosts$host_label) {
272
273	# We don't have all the shells
274	elapsed = NULL
275	rate = NULL
276	max_rss = NULL
277	instructions = NULL
278
279	joined_times %>%
280	select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
281	num_lines, filename, filename_HREF)) %>%
282	arrange(filename, elapsed_ms) ->
283	times_flat
284
285	joined_cachegrind %>%
286	select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
287	arrange(filename, irefs) ->
288	cachegrind_flat
289
290	} else {
291
292	times_flat = NULL
293	cachegrind_flat = NULL
294
295	# Elapsed seconds for each shell by platform and file
296	joined_times %>%
297	select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
298	spread(key = shell_label, value = elapsed_ms) %>%
299	arrange(host_label, num_lines) %>%
300	mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
301	select(c(host_label, bash, dash, mksh, zsh,
302	`osh-ovm`, `osh-cpython`, `osh-native`,
303	osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
304	elapsed
305
306	Log('\n')
307	Log('ELAPSED')
308	print(elapsed)
309
310	# Rates by file and shell
311	joined_times %>%
312	select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
313	spread(key = shell_label, value = lines_per_ms) %>%
314	arrange(host_label, num_lines) %>%
315	select(c(host_label, bash, dash, mksh, zsh,
316	`osh-ovm`, `osh-cpython`, `osh-native`,
317	num_lines, filename, filename_HREF)) ->
318	rate
319
320	Log('\n')
321	Log('RATE')
322	print(rate)
323
324	# Memory usage by file
325	joined_times %>%
326	select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
327	spread(key = shell_label, value = max_rss_MB) %>%
328	arrange(host_label, num_lines) %>%
329	select(c(host_label, bash, dash, mksh, zsh,
330	`osh-ovm`, `osh-cpython`, `osh-native`,
331	num_lines, filename, filename_HREF)) ->
332	max_rss
333
334	Log('\n')
335	Log('MAX RSS')
336	print(max_rss)
337
338	Log('\n')
339	Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
340	print(joined_cachegrind)
341	#print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
342
343	# Cachegrind instructions by file
344	joined_cachegrind %>%
345	mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
346	select(-c(irefs)) %>%
347	spread(key = shell_label, value = thousand_irefs_per_line) %>%
348	arrange(num_lines) %>%
349	select(c(bash, dash, mksh, `osh-native`,
350	num_lines, filename, filename_HREF)) ->
351	instructions
352
353	Log('\n')
354	Log('instructions has %d rows', nrow(instructions))
355	print(instructions)
356	}
357
358	WriteProvenance(distinct_hosts, distinct_shells, out_dir)
359
360	raw_data_table = tibble(
361	filename = basename(as.character(raw_data$path)),
362	filename_HREF = benchmarkDataLink('osh-parser', filename, '')
363	)
364	#print(raw_data_table)
365
366	writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
367
368	precision = SamePrecision(0) # lines per ms
369	writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
370
371	precision = ColumnPrecision(list(), default = 1)
372	writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
373
374	if (!is.null(times_flat)) {
375	precision = SamePrecision(0)
376	writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
377	}
378
379	if (!is.null(cachegrind_flat)) {
380	precision = SamePrecision(0)
381	writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
382	}
383
384	if (!is.null(elapsed)) { # equivalent to no-host
385	# Round to nearest millisecond, but the ratio has a decimal point.
386	precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
387	writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
388
389	precision = SamePrecision(0)
390	writeCsv(rate, file.path(out_dir, 'rate'), precision)
391
392	writeCsv(max_rss, file.path(out_dir, 'max_rss'))
393
394	precision = SamePrecision(1)
395	writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
396	}
397
398	Log('Wrote %s', out_dir)
399	}
400
401	WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
402
403	num_hosts = nrow(distinct_hosts)
404	if (num_hosts == 1) {
405	linkify = provenanceLink
406	} else {
407	linkify = benchmarkDataLink
408	}
409
410	Log('distinct_hosts')
411	print(distinct_hosts)
412	Log('')
413
414	Log('distinct_shells')
415	print(distinct_shells)
416	Log('')
417
418	# Should be:
419	# host_id_url
420	# And then csv_to_html will be smart enough? It should take --url flag?
421	host_table = tibble(
422	host_label = distinct_hosts$host_label,
423	host_id = paste(distinct_hosts$host_name,
424	distinct_hosts$host_hash, sep='-'),
425	host_id_HREF = linkify('host-id', host_id, '/')
426	)
427	Log('host_table')
428	print(host_table)
429	Log('')
430
431	shell_table = tibble(
432	shell_label = distinct_shells$shell_label,
433	shell_id = paste(distinct_shells$shell_name,
434	distinct_shells$shell_hash, sep='-'),
435	shell_id_HREF = linkify('shell-id', shell_id, '/')
436	)
437
438	Log('shell_table')
439	print(shell_table)
440	Log('')
441
442	if (tsv) {
443	writeTsv(host_table, file.path(out_dir, 'hosts'))
444	writeTsv(shell_table, file.path(out_dir, 'shells'))
445	} else {
446	writeCsv(host_table, file.path(out_dir, 'hosts'))
447	writeCsv(shell_table, file.path(out_dir, 'shells'))
448	}
449	}
450
451	WriteSimpleProvenance = function(provenance, out_dir) {
452	Log('provenance')
453	print(provenance)
454	Log('')
455
456	# Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
457	# benchmarks/id.sh publish-shell-id uses
458	provenance %>%
459	mutate(shell_name = basename(sh_path)) %>%
460	distinct(shell_label, shell_name, shell_hash) ->
461	distinct_shells
462
463	Log('distinct_shells')
464	print(distinct_shells)
465	Log('')
466
467	provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
468
469	WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
470	}
471
472	RuntimeReport = function(in_dir, out_dir) {
473	times = readTsv(file.path(in_dir, 'times.tsv'))
474
475	gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
476	provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
477
478	times %>% filter(status != 0) -> failed
479	if (nrow(failed) != 0) {
480	print(failed)
481	stop('Some osh-runtime tasks failed')
482	}
483
484	# Joins:
485	# times <= sh_path => provenance
486	# times <= join_id, host_name => gc_stats
487
488	# TODO: provenance may have rows from 2 machines. Could validate them and
489	# deduplicate.
490
491	# It should have (host_label, host_name, host_hash)
492	# (shell_label, sh_path, shell_hash)
493	provenance %>%
494	mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
495	provenance
496
497	provenance %>% distinct(sh_path, shell_label) -> label_lookup
498
499	Log('label_lookup')
500	print(label_lookup)
501
502	# Join with provenance for host label and shell label
503	times %>%
504	select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
505	host_name, sh_path, workload)) %>%
506	mutate(elapsed_ms = elapsed_secs * 1000,
507	user_ms = user_secs * 1000,
508	sys_ms = sys_secs * 1000,
509	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
510	select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
511	left_join(label_lookup, by = c('sh_path')) %>%
512	select(-c(sh_path)) %>%
513	# we want to compare workloads on adjacent rows
514	arrange(workload) ->
515	details
516
517	times %>%
518	select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
519	left_join(label_lookup, by = c('sh_path')) %>%
520	select(-c(sh_path)) %>%
521	# we want to compare workloads on adjacent rows
522	arrange(workload) ->
523	details_io
524
525	Log('details')
526	print(details)
527
528	# Elapsed time comparison
529	details %>%
530	select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
531	spread(key = shell_label, value = elapsed_ms) %>%
532	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
533	mutate(native_bash_ratio = `osh-native` / bash) %>%
534	arrange(workload, host_name) %>%
535	select(c(workload, host_name,
536	bash, dash, `osh-cpython`, `osh-native`,
537	py_bash_ratio, native_bash_ratio)) ->
538
539	elapsed
540
541	Log('elapsed')
542	print(elapsed)
543
544	# Minor Page Faults Comparison
545	details_io %>%
546	select(c(host_name, shell_label, workload, minor_faults)) %>%
547	spread(key = shell_label, value = minor_faults) %>%
548	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
549	mutate(native_bash_ratio = `osh-native` / bash) %>%
550	arrange(workload, host_name) %>%
551	select(c(workload, host_name,
552	bash, dash, `osh-cpython`, `osh-native`,
553	py_bash_ratio, native_bash_ratio)) ->
554	page_faults
555
556	Log('page_faults')
557	print(page_faults)
558
559	# Max RSS comparison
560	details %>%
561	select(c(host_name, shell_label, workload, max_rss_MB)) %>%
562	spread(key = shell_label, value = max_rss_MB) %>%
563	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
564	mutate(native_bash_ratio = `osh-native` / bash) %>%
565	arrange(workload, host_name) %>%
566	select(c(workload, host_name,
567	bash, dash, `osh-cpython`, `osh-native`,
568	py_bash_ratio, native_bash_ratio)) ->
569	max_rss
570
571	Log('max rss')
572	print(max_rss)
573
574	details %>%
575	select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
576	mutate(join_id = sprintf("gc-%d", task_id)) %>%
577	select(-c(task_id)) ->
578	gc_details
579
580	Log('GC details')
581	print(gc_details)
582	Log('')
583
584	Log('GC stats')
585	print(gc_stats)
586	Log('')
587
588	gc_stats %>%
589	left_join(gc_details, by = c('join_id', 'host_name')) %>%
590	select(-c(join_id, roots_capacity, objs_capacity)) %>%
591	# Do same transformations as GcReport()
592	mutate(allocated_MB = bytes_allocated / 1e6) %>%
593	select(-c(bytes_allocated)) %>%
594	rename(num_gc_done = num_collections) %>%
595	# Put these columns first
596	relocate(workload, host_name,
597	elapsed_ms, max_gc_millis, total_gc_millis,
598	allocated_MB, max_rss_MB, num_allocated) ->
599	gc_stats
600
601	Log('After GC stats')
602	print(gc_stats)
603	Log('')
604
605	WriteSimpleProvenance(provenance, out_dir)
606
607	# milliseconds don't need decimal digit
608	precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
609	`osh-native` = 0, py_bash_ratio = 2,
610	native_bash_ratio = 2))
611	writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
612	writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
613
614	precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
615	writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
616
617	precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
618	default = 0)
619	writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
620
621	writeTsv(details, file.path(out_dir, 'details'), precision3)
622	writeTsv(details_io, file.path(out_dir, 'details_io'))
623
624	Log('Wrote %s', out_dir)
625	}
626
627	VmBaselineReport = function(in_dir, out_dir) {
628	vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
629	#print(vm)
630
631	# Not using DistinctHosts() because field host_hash isn't collected
632	num_hosts = nrow(vm %>% distinct(host))
633
634	vm %>%
635	rename(kib = metric_value) %>%
636	mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
637	megabytes = kib * 1024 / 1e6) %>%
638	select(-c(shell_name, kib)) %>%
639	spread(key = c(metric_name), value = megabytes) %>%
640	rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
641	select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
642	arrange(shell_label, shell_hash, host, VmPeak_MB) ->
643	vm
644
645	print(vm)
646
647	writeTsv(vm, file.path(out_dir, 'vm-baseline'))
648	}
649
650	WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
651	host_table = tibble(
652	host_label = distinct_hosts$host_label,
653	host_id = paste(distinct_hosts$host_name,
654	distinct_hosts$host_hash, sep='-'),
655	host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
656	)
657	print(host_table)
658
659	dc = distinct_compilers
660	compiler_table = tibble(
661	compiler_label = dc$compiler_label,
662	compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
663	compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
664	)
665	print(compiler_table)
666
667	writeTsv(host_table, file.path(out_dir, 'hosts'))
668	writeTsv(compiler_table, file.path(out_dir, 'compilers'))
669	}
670
671	OvmBuildReport = function(in_dir, out_dir) {
672	times = readTsv(file.path(in_dir, 'times.tsv'))
673	bytecode_size = readTsv(file.path(in_dir, 'bytecode-size.tsv'))
674	bin_sizes = readTsv(file.path(in_dir, 'bin-sizes.tsv'))
675	native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
676	raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
677
678	times %>% filter(status != 0) -> failed
679	if (nrow(failed) != 0) {
680	print(failed)
681	stop('Some ovm-build tasks failed')
682	}
683
684	times %>% distinct(host_name, host_hash) -> distinct_hosts
685	distinct_hosts$host_label = distinct_hosts$host_name
686
687	times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
688	distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
689
690	#print(distinct_hosts)
691	#print(distinct_compilers)
692
693	WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
694
695	times %>%
696	select(-c(status)) %>%
697	left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
698	left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
699	select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
700	mutate(src_dir = basename(src_dir),
701	host_label = paste("host ", host_label),
702	is_conf = str_detect(action, 'configure'),
703	is_ovm = str_detect(action, 'oil.ovm'),
704	is_dbg = str_detect(action, 'dbg'),
705	) %>%
706	select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
707	elapsed_secs) %>%
708	spread(key = c(host_label), value = elapsed_secs) %>%
709	arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
710	select(-c(is_conf, is_ovm, is_dbg)) ->
711	times
712
713	#print(times)
714
715	bytecode_size %>%
716	rename(bytecode_size = num_bytes) %>%
717	select(-c(path)) ->
718	bytecode_size
719
720	bin_sizes %>%
721	# reorder
722	select(c(host_label, path, num_bytes)) %>%
723	left_join(bytecode_size, by = c('host_label')) %>%
724	mutate(native_code_size = num_bytes - bytecode_size) ->
725	sizes
726
727	# paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
728	native_sizes %>%
729	select(c(host_label, path, num_bytes)) %>%
730	mutate(host_label = paste("host ", host_label),
731	binary = basename(path),
732	compiler = basename(dirname(path)),
733	) %>%
734	select(-c(path)) %>%
735	spread(key = c(host_label), value = num_bytes) %>%
736	arrange(compiler, binary) ->
737	native_sizes
738
739	# NOTE: These don't have the host and compiler.
740	writeTsv(times, file.path(out_dir, 'times'))
741	writeTsv(bytecode_size, file.path(out_dir, 'bytecode-size'))
742	writeTsv(sizes, file.path(out_dir, 'sizes'))
743	writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
744
745	# TODO: I want a size report too
746	#writeCsv(sizes, file.path(out_dir, 'sizes'))
747	}
748
749	unique_stdout_md5sum = function(t, num_expected) {
750	u = n_distinct(t$stdout_md5sum)
751	if (u != num_expected) {
752	t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
753	stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
754	}
755	}
756
757	ComputeReport = function(in_dir, out_dir) {
758	# TSV file, not CSV
759	times = read.table(file.path(in_dir, 'times.tsv'), header=T)
760	print(times)
761
762	times %>% filter(status != 0) -> failed
763	if (nrow(failed) != 0) {
764	print(failed)
765	stop('Some compute tasks failed')
766	}
767
768	#
769	# Check correctness
770	#
771
772	times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
773	times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
774	times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
775	# 3 different inputs
776	times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
777
778	times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
779
780	# TODO:
781	# - oils_cpp doesn't implement unicode LANG=C
782	# - bash behaves differently on your desktop vs. in the container
783	# - might need layer-locales in the image?
784
785	#times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
786	# Ditto here
787	#times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
788
789	#
790	# Find distinct shells and hosts, and label them for readability.
791	#
792
793	# Runtimes are called shells, as a hack for code reuse
794	times %>%
795	mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
796	select(c(host_name, host_hash, shell_name, shell_hash)) ->
797	tmp
798
799	distinct_hosts = DistinctHosts(tmp)
800	Log('')
801	Log('Distinct hosts')
802	print(distinct_hosts)
803
804	distinct_shells = DistinctShells(tmp)
805	Log('')
806	Log('Distinct runtimes')
807	print(distinct_shells)
808
809	num_hosts = nrow(distinct_hosts)
810
811	times %>%
812	select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
813	mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
814	elapsed_ms = elapsed_secs * 1000,
815	user_ms = user_secs * 1000,
816	sys_ms = sys_secs * 1000,
817	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
818	select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
819	arrange(host_name, task_name, arg1, arg2, user_ms) ->
820	details
821
822	times %>%
823	mutate(
824	runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
825	stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
826	select(c(host_name, task_name, arg1, arg2, runtime_label,
827	stdout_md5sum, stdout_md5sum_HREF)) ->
828	stdout_files
829
830	details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
831	details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
832	details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
833	# There's no arg2
834	details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
835
836	details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
837	details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
838
839	precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
840	writeTsv(details, file.path(out_dir, 'details'), precision)
841
842	writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
843
844	writeTsv(hello, file.path(out_dir, 'hello'), precision)
845	writeTsv(fib, file.path(out_dir, 'fib'), precision)
846	writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
847	writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
848
849	writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
850	writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
851
852	WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
853	}
854
855	WriteOneTask = function(times, out_dir, task_name, precision) {
856	times %>%
857	filter(task == task_name) %>%
858	select(-c(task)) -> subset
859
860	writeTsv(subset, file.path(out_dir, task_name), precision)
861	}
862
863	SHELL_ORDER = c('dash',
864	'bash',
865	'zsh',
866	'_bin/cxx-opt+bumpleak/osh',
867	'_bin/cxx-opt+bumproot/osh',
868	'_bin/cxx-opt+bumpsmall/osh',
869	'_bin/cxx-opt/osh',
870	'_bin/cxx-opt+nopool/osh')
871
872	GcReport = function(in_dir, out_dir) {
873	times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
874	gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
875
876	times %>% filter(status != 0) -> failed
877	if (nrow(failed) != 0) {
878	print(failed)
879	stop('Some gc tasks failed')
880	}
881
882	# Change units and order columns
883	times %>%
884	arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
885	mutate(elapsed_ms = elapsed_secs * 1000,
886	user_ms = user_secs * 1000,
887	sys_ms = sys_secs * 1000,
888	max_rss_MB = max_rss_KiB * 1024 / 1e6,
889	shell_label = ShellLabelFromPath(sh_path)
890	) %>%
891	select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
892	shell_runtime_opts)) ->
893	times
894
895	# Join and order columns
896	gc_stats %>% left_join(times, by = c('join_id')) %>%
897	arrange(desc(task)) %>%
898	mutate(allocated_MB = bytes_allocated / 1e6) %>%
899	# try to make the table skinnier
900	rename(num_gc_done = num_collections) %>%
901	select(task, elapsed_ms, max_gc_millis, total_gc_millis,
902	allocated_MB, max_rss_MB, num_allocated,
903	num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
904	shell_label) ->
905	gc_stats
906
907	times %>% select(-c(join_id)) -> times
908
909
910	precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
911	default = 0)
912
913	writeTsv(times, file.path(out_dir, 'times'), precision)
914	writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
915
916	tasks = c('parse.configure-coreutils',
917	'parse.configure-cpython',
918	'parse.abuild',
919	'ex.compute-fib',
920	'ex.bashcomp-parse-help',
921	'ex.abuild-print-help')
922	# Write out separate rows
923	for (task in tasks) {
924	WriteOneTask(times, out_dir, task, precision)
925	}
926	}
927
928	GcCachegrindReport = function(in_dir, out_dir) {
929	times = readTsv(file.path(in_dir, 'raw/times.tsv'))
930	counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
931
932	times %>% filter(status != 0) -> failed
933	if (nrow(failed) != 0) {
934	print(failed)
935	stop('Some gc tasks failed')
936	}
937
938	print(times)
939	print(counts)
940
941	counts %>% left_join(times, by = c('join_id')) %>%
942	mutate(million_irefs = irefs / 1e6) %>%
943	select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
944	arrange(factor(sh_path, levels = SHELL_ORDER)) ->
945	counts
946
947	precision = NULL
948	tasks = c('parse.abuild', 'ex.compute-fib')
949	for (task in tasks) {
950	WriteOneTask(counts, out_dir, task, precision)
951	}
952	}
953
954	MyCppReport = function(in_dir, out_dir) {
955	times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
956	print(times)
957
958	times %>% filter(status != 0) -> failed
959	if (nrow(failed) != 0) {
960	print(failed)
961	stop('Some mycpp tasks failed')
962	}
963
964	# Don't care about elapsed and system
965	times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
966	mutate(example_name_HREF = mycppUrl(example_name),
967	user_ms = user_secs * 1000,
968	sys_ms = sys_secs * 1000,
969	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
970	select(-c(user_secs, sys_secs, max_rss_KiB)) ->
971	details
972
973	details %>% select(-c(sys_ms, max_rss_MB)) %>%
974	spread(key = impl, value = user_ms) %>%
975	mutate(`C++ : Python` = `C++` / Python) %>%
976	arrange(`C++ : Python`) ->
977	user_time
978
979	details %>% select(-c(user_ms, max_rss_MB)) %>%
980	spread(key = impl, value = sys_ms) %>%
981	mutate(`C++ : Python` = `C++` / Python) %>%
982	arrange(`C++ : Python`) ->
983	sys_time
984
985	details %>% select(-c(user_ms, sys_ms)) %>%
986	spread(key = impl, value = max_rss_MB) %>%
987	mutate(`C++ : Python` = `C++` / Python) %>%
988	arrange(`C++ : Python`) ->
989	max_rss
990
991	# Sometimes it speeds up by more than 10x
992	precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
993	writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
994	writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
995
996	precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
997	writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
998
999	writeTsv(details, file.path(out_dir, 'details'))
1000	}
1001
1002	UftraceTaskReport = function(env, task_name, summaries) {
1003	# Need this again after redirect
1004	MaybeDisableColor(stdout())
1005
1006	task_env = env[[task_name]]
1007
1008	untyped = task_env$untyped
1009	typed = task_env$typed
1010	strings = task_env$strings
1011	slabs = task_env$slabs
1012	reserve = task_env$reserve
1013
1014	string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1015	strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1016
1017	# TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1018	# big/small
1019	#
1020	# And then zoom in on distributions as well
1021
1022	num_allocs = nrow(untyped)
1023	total_bytes = sum(untyped$obj_len)
1024
1025	untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1026	#print(untyped_hist)
1027
1028	untyped_hist %>%
1029	mutate(n_less_than = cumsum(n),
1030	percent = n_less_than * 100.0 / num_allocs) ->
1031	alloc_sizes
1032
1033	a24 = untyped_hist %>% filter(obj_len <= 24)
1034	a48 = untyped_hist %>% filter(obj_len <= 48)
1035	a96 = untyped_hist %>% filter(obj_len <= 96)
1036
1037	allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1038	allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1039	allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1040
1041	Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1042
1043	options(tibble.print_min=25)
1044
1045	Log('')
1046	Log('All allocations')
1047	print(alloc_sizes %>% head(22))
1048	print(alloc_sizes %>% tail(5))
1049
1050	Log('')
1051	Log('Common Sizes')
1052	print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1053
1054	Log('')
1055	Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1056	Log('')
1057
1058	Log('Typed allocations')
1059
1060	num_typed = nrow(typed)
1061
1062	typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1063	mutate(percent = n * 100.0 / num_typed) %>%
1064	arrange(desc(n)) -> most_common_types
1065
1066	print(most_common_types %>% head(20))
1067	print(most_common_types %>% tail(5))
1068
1069	lists = typed %>% filter(str_starts(func_name, ('List<')))
1070	#print(lists)
1071
1072	num_lists = nrow(lists)
1073	total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1074
1075	Log('')
1076	Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1077	Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1078	Log('')
1079
1080	#
1081	# Strings
1082	#
1083
1084	num_strings = nrow(strings)
1085	total_string_bytes = sum(strings$obj_len)
1086
1087	strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1088	mutate(n_less_than = cumsum(n),
1089	percent = n_less_than * 100.0 / num_strings) ->
1090	string_lengths
1091
1092	strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1093	strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1094
1095	# Parse workload
1096	# 62% of strings <= 6 bytes
1097	# 84% of strings <= 14 bytes
1098
1099	Log('Str - NewStr() and OverAllocatedStr()')
1100	print(string_lengths %>% head(16))
1101	print(string_lengths %>% tail(5))
1102	Log('')
1103
1104	Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1105	commas(sum(strings$str_len)), commas(total_string_bytes))
1106	Log('')
1107	Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1108	Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1109	Log('')
1110
1111	#
1112	# Slabs
1113	#
1114
1115	Log('NewSlab()')
1116
1117	num_slabs = nrow(slabs)
1118	slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1119	mutate(n_less_than = cumsum(n),
1120	percent = n_less_than * 100.0 / num_slabs) ->
1121	slab_lengths
1122
1123	slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1124	arrange(desc(n)) -> slab_types
1125
1126	Log(' Lengths')
1127	print(slab_lengths %>% head())
1128	print(slab_lengths %>% tail(5))
1129	Log('')
1130
1131	Log(' Slab Types')
1132	print(slab_types %>% head())
1133	print(slab_types %>% tail(5))
1134	Log('')
1135
1136	total_slab_items = sum(slabs$slab_len)
1137
1138	Log('%s slabs, total items = %s', commas(num_slabs),
1139	commas(sum(slabs$slab_len)))
1140	Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1141	Log('')
1142
1143	#
1144	# reserve() calls
1145	#
1146
1147	# There should be strictly more List::reserve() calls than NewSlab
1148
1149	Log('::reserve(int n)')
1150	Log('')
1151
1152	num_reserve = nrow(reserve)
1153	reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1154	mutate(n_less_than = cumsum(n),
1155	percent = n_less_than * 100.0 / num_reserve) ->
1156	reserve_args
1157
1158	Log(' Num Items')
1159	print(reserve_args %>% head(15))
1160	print(reserve_args %>% tail(5))
1161	Log('')
1162
1163	Log('%s reserve() calls, total items = %s', commas(num_reserve),
1164	commas(sum(reserve$num_items)))
1165	Log('')
1166
1167	# Accounting for all allocations!
1168	Log('Untyped: %s', commas(num_allocs))
1169	Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1170	Log('')
1171
1172	num_other_typed = num_typed - num_lists
1173
1174	# Summary table
1175	stats = tibble(task = task_name,
1176	total_bytes_ = commas(total_bytes),
1177	num_allocs_ = commas(num_allocs),
1178	sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1179	num_reserve_calls = commas(num_reserve),
1180
1181	percent_list_allocs = Percent(num_lists, num_allocs),
1182	percent_slab_allocs = Percent(num_slabs, num_allocs),
1183	percent_string_allocs = Percent(num_strings, num_allocs),
1184	percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1185
1186	percent_list_bytes = Percent(total_list_bytes, total_bytes),
1187	percent_string_bytes = Percent(total_string_bytes, total_bytes),
1188
1189	allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1190	allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1191	allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1192
1193	strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1194	strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1195	)
1196	summaries$stats[[task_name]] = stats
1197
1198	summaries$most_common_types[[task_name]] = most_common_types
1199	}
1200
1201	LoadUftraceTsv = function(in_dir, env) {
1202	for (task in list.files(in_dir)) {
1203	Log('Loading data for task %s', task)
1204	base_dir = file.path(in_dir, task)
1205
1206	task_env = new.env()
1207	env[[task]] = task_env
1208
1209	# TSV file, not CSV
1210	task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1211	task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1212	task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1213	task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1214	task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1215
1216	# median string length is 4, mean is 9.5!
1217	Log('UNTYPED')
1218	print(summary(task_env$untyped))
1219	Log('')
1220
1221	Log('TYPED')
1222	print(summary(task_env$typed))
1223	Log('')
1224
1225	Log('STRINGS')
1226	print(summary(task_env$strings))
1227	Log('')
1228
1229	Log('SLABS')
1230	print(summary(task_env$slabs))
1231	Log('')
1232
1233	Log('RESERVE')
1234	print(summary(task_env$reserve))
1235	Log('')
1236	}
1237	}
1238
1239	Percent = function(n, total) {
1240	sprintf('%.1f%%', n * 100.0 / total)
1241	}
1242
1243	PrettyPrintLong = function(d) {
1244	tr = t(d) # transpose
1245
1246	row_names = rownames(tr)
1247
1248	for (i in 1:nrow(tr)) {
1249	row_name = row_names[i]
1250	cat(sprintf('%26s', row_name)) # calculated min width manually
1251	cat(sprintf('%20s', tr[i,]))
1252	cat('\n')
1253
1254	# Extra spacing
1255	if (row_name %in% c('num_reserve_calls',
1256	'percent_string_bytes',
1257	'percent_other_typed_allocs',
1258	'allocs_96_bytes_or_less')) {
1259	cat('\n')
1260	}
1261	}
1262	}
1263
1264
1265	UftraceReport = function(env, out_dir) {
1266	# summaries$stats should be a list of 1-row data frames
1267	# summaries$top_types should be a list of types
1268	summaries = new.env()
1269
1270	for (task_name in names(env)) {
1271	report_out = file.path(out_dir, paste0(task_name, '.txt'))
1272
1273	Log('Making report for task %s -> %s', task_name, report_out)
1274
1275	sink(file = report_out)
1276	UftraceTaskReport(env, task_name, summaries)
1277	sink() # reset
1278	}
1279	Log('')
1280
1281	# Concate all the data frames added to summary
1282	stats = bind_rows(as.list(summaries$stats))
1283
1284	sink(file = file.path(out_dir, 'summary.txt'))
1285	#print(stats)
1286	#Log('')
1287
1288	PrettyPrintLong(stats)
1289	Log('')
1290
1291	mct = summaries$most_common_types
1292	for (task_name in names(mct)) {
1293	Log('Common types in workload %s', task_name)
1294	Log('')
1295
1296	print(mct[[task_name]] %>% head(5))
1297	Log('')
1298	}
1299	sink()
1300
1301	# For the REPL
1302	return(list(stats = stats))
1303	}
1304
1305	main = function(argv) {
1306	action = argv[[1]]
1307	in_dir = argv[[2]]
1308	out_dir = argv[[3]]
1309
1310	if (action == 'osh-parser') {
1311	ParserReport(in_dir, out_dir)
1312
1313	} else if (action == 'osh-runtime') {
1314	RuntimeReport(in_dir, out_dir)
1315
1316	} else if (action == 'vm-baseline') {
1317	VmBaselineReport(in_dir, out_dir)
1318
1319	} else if (action == 'ovm-build') {
1320	OvmBuildReport(in_dir, out_dir)
1321
1322	} else if (action == 'compute') {
1323	ComputeReport(in_dir, out_dir)
1324
1325	} else if (action == 'gc') {
1326	GcReport(in_dir, out_dir)
1327
1328	} else if (action == 'gc-cachegrind') {
1329	GcCachegrindReport(in_dir, out_dir)
1330
1331	} else if (action == 'mycpp') {
1332	MyCppReport(in_dir, out_dir)
1333
1334	} else if (action == 'uftrace') {
1335	d = new.env()
1336	LoadUftraceTsv(in_dir, d)
1337	UftraceReport(d, out_dir)
1338
1339	} else {
1340	Log("Invalid action '%s'", action)
1341	quit(status = 1)
1342	}
1343	Log('PID %d done', Sys.getpid())
1344	}
1345
1346	if (length(sys.frames()) == 0) {
1347	# increase ggplot font size globally
1348	#theme_set(theme_grey(base_size = 20))
1349
1350	main(commandArgs(TRUE))
1351	}