OILS / benchmarks / report.R View on Github | oilshell.org

1351 lines, 936 significant
1#!/usr/bin/env Rscript
2#
3# benchmarks/report.R -- Analyze data collected by shell scripts.
4#
5# Usage:
6# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9# filter, lag
10# intersect, setdiff, setequal, union
11library(dplyr, warn.conflicts = FALSE)
12library(tidyr) # spread()
13library(stringr)
14
15source('benchmarks/common.R')
16
17options(stringsAsFactors = F)
18
19# For pretty printing
20commas = function(x) {
21 format(x, big.mark=',')
22}
23
24sourceUrl = function(path) {
25 sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26}
27
28# Takes a filename, not a path.
29sourceUrl2 = function(filename) {
30 sprintf(
31 'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32 filename)
33}
34
35mycppUrl = function(path) {
36 sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', path)
37}
38
39
40# TODO: Set up cgit because Github links are slow.
41benchmarkDataLink = function(subdir, name, suffix) {
42 #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
43 sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
44 subdir, name, suffix)
45}
46
47provenanceLink = function(subdir, name, suffix) {
48 sprintf('../%s/%s%s', subdir, name, suffix)
49}
50
51
52GetOshLabel = function(shell_hash, prov_dir) {
53 ### Given a string, return another string.
54
55 path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
56
57 if (file.exists(path)) {
58 Log('Reading %s', path)
59 lines = readLines(path)
60 if (length(grep('_bin/osh', lines)) > 0) {
61 label = 'osh-ovm'
62 } else if (length(grep('bin/osh', lines)) > 0) {
63 label = 'osh-cpython'
64 } else if (length(grep('_bin/.*/osh', lines)) > 0) {
65 label = 'osh-native'
66 } else {
67 stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
68 }
69 } else {
70 stop(sprintf("%s doesn't exist", path))
71 }
72 return(label)
73}
74
75opt_suffix1 = '_bin/cxx-opt/osh'
76opt_suffix2 = '_bin/cxx-opt-sh/osh'
77
78ShellLabels = function(shell_name, shell_hash, num_hosts) {
79 ### Given 2 vectors, return a vector of readable labels.
80
81 # TODO: Clean up callers. Some metrics all this function with a
82 # shell/runtime BASENAME, and others a PATH
83 # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
84
85 #Log('name %s', shell_name)
86 #Log('hash %s', shell_hash)
87
88 if (num_hosts == 1) {
89 prov_dir = '_tmp'
90 } else {
91 prov_dir = '../benchmark-data/'
92 }
93
94 labels = c()
95 for (i in 1:length(shell_name)) {
96 sh = shell_name[i]
97 if (sh == 'osh') {
98 label = GetOshLabel(shell_hash[i], prov_dir)
99
100 } else if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
101 label = 'opt/osh'
102
103 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
104 label = 'bumpleak/osh'
105
106 } else {
107 label = sh
108 }
109
110 Log('[%s] [%s]', shell_name[i], label)
111 labels = c(labels, label)
112 }
113
114 return(labels)
115}
116
117# Simple version of the above, used by benchmarks/gc
118ShellLabelFromPath = function(sh_path) {
119 labels = c()
120 for (i in 1:length(sh_path)) {
121 sh = sh_path[i]
122
123 if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
124 # the opt binary is osh-native
125 label = 'osh-native'
126
127 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
128 label = 'bumpleak/osh'
129
130 } else if (endsWith(sh, '_bin/osh')) { # the app bundle
131 label = 'osh-ovm'
132
133 } else if (endsWith(sh, 'bin/osh')) {
134 label = 'osh-cpython'
135
136 } else {
137 label = sh
138 }
139 labels = c(labels, label)
140 }
141 return(labels)
142}
143
144DistinctHosts = function(t) {
145 t %>% distinct(host_name, host_hash) -> distinct_hosts
146 # The label is just the name
147 distinct_hosts$host_label = distinct_hosts$host_name
148 return(distinct_hosts)
149}
150
151DistinctShells = function(t, num_hosts = -1) {
152 t %>% distinct(shell_name, shell_hash) -> distinct_shells
153
154 Log('')
155 Log('Labeling shells')
156
157 # Calculate it if not passed
158 if (num_hosts == -1) {
159 num_hosts = nrow(DistinctHosts(t))
160 }
161
162 distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
163 distinct_shells$shell_hash,
164 num_hosts)
165 return(distinct_shells)
166}
167
168ParserReport = function(in_dir, out_dir) {
169 times = read.csv(file.path(in_dir, 'times.csv'))
170 lines = read.csv(file.path(in_dir, 'lines.csv'))
171 raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
172
173 cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
174
175 # For joining by filename
176 lines_by_filename = tibble(
177 num_lines = lines$num_lines,
178 filename = basename(lines$path)
179 )
180
181 # Remove failures
182 times %>% filter(status == 0) %>% select(-c(status)) -> times
183 cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
184
185 # Add the number of lines, joining on path, and compute lines/ms
186 times %>%
187 left_join(lines, by = c('path')) %>%
188 mutate(filename = basename(path), filename_HREF = sourceUrl(path),
189 max_rss_MB = max_rss_KiB * 1024 / 1e6,
190 elapsed_ms = elapsed_secs * 1000,
191 user_ms = user_secs * 1000,
192 sys_ms = sys_secs * 1000,
193 lines_per_ms = num_lines / elapsed_ms) %>%
194 select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
195 joined_times
196
197 #print(head(times))
198 #print(head(lines))
199 #print(head(vm))
200 #print(head(joined_times))
201
202 print(summary(joined_times))
203
204 #
205 # Find distinct shells and hosts, and label them for readability.
206 #
207
208 distinct_hosts = DistinctHosts(joined_times)
209 Log('')
210 Log('Distinct hosts')
211 print(distinct_hosts)
212
213 distinct_shells = DistinctShells(joined_times)
214 Log('')
215 Log('Distinct shells')
216 print(distinct_shells)
217
218 # Replace name/hash combinations with labels.
219 joined_times %>%
220 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
221 left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
222 select(-c(host_name, host_hash, shell_name, shell_hash)) ->
223 joined_times
224
225 # Like 'times', but do shell_label as one step
226 # Hack: we know benchmarks/auto.sh runs this on one machine
227 distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
228 cachegrind %>%
229 left_join(lines, by = c('path')) %>%
230 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
231 left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
232 select(-c(shell_name, shell_hash)) %>%
233 mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
234 select(-c(path)) ->
235 joined_cachegrind
236
237 Log('summary(joined_times):')
238 print(summary(joined_times))
239 Log('head(joined_times):')
240 print(head(joined_times))
241
242 # Summarize rates by platform/shell
243 joined_times %>%
244 mutate(host_label = paste("host", host_label)) %>%
245 group_by(host_label, shell_label) %>%
246 summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
247 mutate(lines_per_ms = total_lines / total_ms) %>%
248 select(-c(total_ms)) %>%
249 spread(key = host_label, value = lines_per_ms) ->
250 times_summary
251
252 # Sort by parsing rate on the fast machine
253 if ("host lenny" %in% colnames(times_summary)) {
254 times_summary %>% arrange(desc(`host lenny`)) -> times_summary
255 } else {
256 times_summary %>% arrange(desc(`host no-host`)) -> times_summary
257 }
258
259 Log('times_summary:')
260 print(times_summary)
261
262 # Summarize cachegrind by platform/shell
263 # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
264 joined_cachegrind %>%
265 group_by(shell_label) %>%
266 summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
267 mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
268 select(-c(total_irefs)) ->
269 cachegrind_summary
270
271 if ("no-host" %in% distinct_hosts$host_label) {
272
273 # We don't have all the shells
274 elapsed = NULL
275 rate = NULL
276 max_rss = NULL
277 instructions = NULL
278
279 joined_times %>%
280 select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
281 num_lines, filename, filename_HREF)) %>%
282 arrange(filename, elapsed_ms) ->
283 times_flat
284
285 joined_cachegrind %>%
286 select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
287 arrange(filename, irefs) ->
288 cachegrind_flat
289
290 } else {
291
292 times_flat = NULL
293 cachegrind_flat = NULL
294
295 # Elapsed seconds for each shell by platform and file
296 joined_times %>%
297 select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
298 spread(key = shell_label, value = elapsed_ms) %>%
299 arrange(host_label, num_lines) %>%
300 mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
301 select(c(host_label, bash, dash, mksh, zsh,
302 `osh-ovm`, `osh-cpython`, `osh-native`,
303 osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
304 elapsed
305
306 Log('\n')
307 Log('ELAPSED')
308 print(elapsed)
309
310 # Rates by file and shell
311 joined_times %>%
312 select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
313 spread(key = shell_label, value = lines_per_ms) %>%
314 arrange(host_label, num_lines) %>%
315 select(c(host_label, bash, dash, mksh, zsh,
316 `osh-ovm`, `osh-cpython`, `osh-native`,
317 num_lines, filename, filename_HREF)) ->
318 rate
319
320 Log('\n')
321 Log('RATE')
322 print(rate)
323
324 # Memory usage by file
325 joined_times %>%
326 select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
327 spread(key = shell_label, value = max_rss_MB) %>%
328 arrange(host_label, num_lines) %>%
329 select(c(host_label, bash, dash, mksh, zsh,
330 `osh-ovm`, `osh-cpython`, `osh-native`,
331 num_lines, filename, filename_HREF)) ->
332 max_rss
333
334 Log('\n')
335 Log('MAX RSS')
336 print(max_rss)
337
338 Log('\n')
339 Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
340 print(joined_cachegrind)
341 #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
342
343 # Cachegrind instructions by file
344 joined_cachegrind %>%
345 mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
346 select(-c(irefs)) %>%
347 spread(key = shell_label, value = thousand_irefs_per_line) %>%
348 arrange(num_lines) %>%
349 select(c(bash, dash, mksh, `osh-native`,
350 num_lines, filename, filename_HREF)) ->
351 instructions
352
353 Log('\n')
354 Log('instructions has %d rows', nrow(instructions))
355 print(instructions)
356 }
357
358 WriteProvenance(distinct_hosts, distinct_shells, out_dir)
359
360 raw_data_table = tibble(
361 filename = basename(as.character(raw_data$path)),
362 filename_HREF = benchmarkDataLink('osh-parser', filename, '')
363 )
364 #print(raw_data_table)
365
366 writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
367
368 precision = SamePrecision(0) # lines per ms
369 writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
370
371 precision = ColumnPrecision(list(), default = 1)
372 writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
373
374 if (!is.null(times_flat)) {
375 precision = SamePrecision(0)
376 writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
377 }
378
379 if (!is.null(cachegrind_flat)) {
380 precision = SamePrecision(0)
381 writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
382 }
383
384 if (!is.null(elapsed)) { # equivalent to no-host
385 # Round to nearest millisecond, but the ratio has a decimal point.
386 precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
387 writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
388
389 precision = SamePrecision(0)
390 writeCsv(rate, file.path(out_dir, 'rate'), precision)
391
392 writeCsv(max_rss, file.path(out_dir, 'max_rss'))
393
394 precision = SamePrecision(1)
395 writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
396 }
397
398 Log('Wrote %s', out_dir)
399}
400
401WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
402
403 num_hosts = nrow(distinct_hosts)
404 if (num_hosts == 1) {
405 linkify = provenanceLink
406 } else {
407 linkify = benchmarkDataLink
408 }
409
410 Log('distinct_hosts')
411 print(distinct_hosts)
412 Log('')
413
414 Log('distinct_shells')
415 print(distinct_shells)
416 Log('')
417
418 # Should be:
419 # host_id_url
420 # And then csv_to_html will be smart enough? It should take --url flag?
421 host_table = tibble(
422 host_label = distinct_hosts$host_label,
423 host_id = paste(distinct_hosts$host_name,
424 distinct_hosts$host_hash, sep='-'),
425 host_id_HREF = linkify('host-id', host_id, '/')
426 )
427 Log('host_table')
428 print(host_table)
429 Log('')
430
431 shell_table = tibble(
432 shell_label = distinct_shells$shell_label,
433 shell_id = paste(distinct_shells$shell_name,
434 distinct_shells$shell_hash, sep='-'),
435 shell_id_HREF = linkify('shell-id', shell_id, '/')
436 )
437
438 Log('shell_table')
439 print(shell_table)
440 Log('')
441
442 if (tsv) {
443 writeTsv(host_table, file.path(out_dir, 'hosts'))
444 writeTsv(shell_table, file.path(out_dir, 'shells'))
445 } else {
446 writeCsv(host_table, file.path(out_dir, 'hosts'))
447 writeCsv(shell_table, file.path(out_dir, 'shells'))
448 }
449}
450
451WriteSimpleProvenance = function(provenance, out_dir) {
452 Log('provenance')
453 print(provenance)
454 Log('')
455
456 # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
457 # benchmarks/id.sh publish-shell-id uses
458 provenance %>%
459 mutate(shell_name = basename(sh_path)) %>%
460 distinct(shell_label, shell_name, shell_hash) ->
461 distinct_shells
462
463 Log('distinct_shells')
464 print(distinct_shells)
465 Log('')
466
467 provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
468
469 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
470}
471
472RuntimeReport = function(in_dir, out_dir) {
473 times = readTsv(file.path(in_dir, 'times.tsv'))
474
475 gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
476 provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
477
478 times %>% filter(status != 0) -> failed
479 if (nrow(failed) != 0) {
480 print(failed)
481 stop('Some osh-runtime tasks failed')
482 }
483
484 # Joins:
485 # times <= sh_path => provenance
486 # times <= join_id, host_name => gc_stats
487
488 # TODO: provenance may have rows from 2 machines. Could validate them and
489 # deduplicate.
490
491 # It should have (host_label, host_name, host_hash)
492 # (shell_label, sh_path, shell_hash)
493 provenance %>%
494 mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
495 provenance
496
497 provenance %>% distinct(sh_path, shell_label) -> label_lookup
498
499 Log('label_lookup')
500 print(label_lookup)
501
502 # Join with provenance for host label and shell label
503 times %>%
504 select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
505 host_name, sh_path, workload)) %>%
506 mutate(elapsed_ms = elapsed_secs * 1000,
507 user_ms = user_secs * 1000,
508 sys_ms = sys_secs * 1000,
509 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
510 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
511 left_join(label_lookup, by = c('sh_path')) %>%
512 select(-c(sh_path)) %>%
513 # we want to compare workloads on adjacent rows
514 arrange(workload) ->
515 details
516
517 times %>%
518 select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
519 left_join(label_lookup, by = c('sh_path')) %>%
520 select(-c(sh_path)) %>%
521 # we want to compare workloads on adjacent rows
522 arrange(workload) ->
523 details_io
524
525 Log('details')
526 print(details)
527
528 # Elapsed time comparison
529 details %>%
530 select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
531 spread(key = shell_label, value = elapsed_ms) %>%
532 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
533 mutate(native_bash_ratio = `osh-native` / bash) %>%
534 arrange(workload, host_name) %>%
535 select(c(workload, host_name,
536 bash, dash, `osh-cpython`, `osh-native`,
537 py_bash_ratio, native_bash_ratio)) ->
538
539 elapsed
540
541 Log('elapsed')
542 print(elapsed)
543
544 # Minor Page Faults Comparison
545 details_io %>%
546 select(c(host_name, shell_label, workload, minor_faults)) %>%
547 spread(key = shell_label, value = minor_faults) %>%
548 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
549 mutate(native_bash_ratio = `osh-native` / bash) %>%
550 arrange(workload, host_name) %>%
551 select(c(workload, host_name,
552 bash, dash, `osh-cpython`, `osh-native`,
553 py_bash_ratio, native_bash_ratio)) ->
554 page_faults
555
556 Log('page_faults')
557 print(page_faults)
558
559 # Max RSS comparison
560 details %>%
561 select(c(host_name, shell_label, workload, max_rss_MB)) %>%
562 spread(key = shell_label, value = max_rss_MB) %>%
563 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
564 mutate(native_bash_ratio = `osh-native` / bash) %>%
565 arrange(workload, host_name) %>%
566 select(c(workload, host_name,
567 bash, dash, `osh-cpython`, `osh-native`,
568 py_bash_ratio, native_bash_ratio)) ->
569 max_rss
570
571 Log('max rss')
572 print(max_rss)
573
574 details %>%
575 select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
576 mutate(join_id = sprintf("gc-%d", task_id)) %>%
577 select(-c(task_id)) ->
578 gc_details
579
580 Log('GC details')
581 print(gc_details)
582 Log('')
583
584 Log('GC stats')
585 print(gc_stats)
586 Log('')
587
588 gc_stats %>%
589 left_join(gc_details, by = c('join_id', 'host_name')) %>%
590 select(-c(join_id, roots_capacity, objs_capacity)) %>%
591 # Do same transformations as GcReport()
592 mutate(allocated_MB = bytes_allocated / 1e6) %>%
593 select(-c(bytes_allocated)) %>%
594 rename(num_gc_done = num_collections) %>%
595 # Put these columns first
596 relocate(workload, host_name,
597 elapsed_ms, max_gc_millis, total_gc_millis,
598 allocated_MB, max_rss_MB, num_allocated) ->
599 gc_stats
600
601 Log('After GC stats')
602 print(gc_stats)
603 Log('')
604
605 WriteSimpleProvenance(provenance, out_dir)
606
607 # milliseconds don't need decimal digit
608 precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
609 `osh-native` = 0, py_bash_ratio = 2,
610 native_bash_ratio = 2))
611 writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
612 writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
613
614 precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
615 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
616
617 precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
618 default = 0)
619 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
620
621 writeTsv(details, file.path(out_dir, 'details'), precision3)
622 writeTsv(details_io, file.path(out_dir, 'details_io'))
623
624 Log('Wrote %s', out_dir)
625}
626
627VmBaselineReport = function(in_dir, out_dir) {
628 vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
629 #print(vm)
630
631 # Not using DistinctHosts() because field host_hash isn't collected
632 num_hosts = nrow(vm %>% distinct(host))
633
634 vm %>%
635 rename(kib = metric_value) %>%
636 mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
637 megabytes = kib * 1024 / 1e6) %>%
638 select(-c(shell_name, kib)) %>%
639 spread(key = c(metric_name), value = megabytes) %>%
640 rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
641 select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
642 arrange(shell_label, shell_hash, host, VmPeak_MB) ->
643 vm
644
645 print(vm)
646
647 writeTsv(vm, file.path(out_dir, 'vm-baseline'))
648}
649
650WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
651 host_table = tibble(
652 host_label = distinct_hosts$host_label,
653 host_id = paste(distinct_hosts$host_name,
654 distinct_hosts$host_hash, sep='-'),
655 host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
656 )
657 print(host_table)
658
659 dc = distinct_compilers
660 compiler_table = tibble(
661 compiler_label = dc$compiler_label,
662 compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
663 compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
664 )
665 print(compiler_table)
666
667 writeTsv(host_table, file.path(out_dir, 'hosts'))
668 writeTsv(compiler_table, file.path(out_dir, 'compilers'))
669}
670
671OvmBuildReport = function(in_dir, out_dir) {
672 times = readTsv(file.path(in_dir, 'times.tsv'))
673 bytecode_size = readTsv(file.path(in_dir, 'bytecode-size.tsv'))
674 bin_sizes = readTsv(file.path(in_dir, 'bin-sizes.tsv'))
675 native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
676 raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
677
678 times %>% filter(status != 0) -> failed
679 if (nrow(failed) != 0) {
680 print(failed)
681 stop('Some ovm-build tasks failed')
682 }
683
684 times %>% distinct(host_name, host_hash) -> distinct_hosts
685 distinct_hosts$host_label = distinct_hosts$host_name
686
687 times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
688 distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
689
690 #print(distinct_hosts)
691 #print(distinct_compilers)
692
693 WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
694
695 times %>%
696 select(-c(status)) %>%
697 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
698 left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
699 select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
700 mutate(src_dir = basename(src_dir),
701 host_label = paste("host ", host_label),
702 is_conf = str_detect(action, 'configure'),
703 is_ovm = str_detect(action, 'oil.ovm'),
704 is_dbg = str_detect(action, 'dbg'),
705 ) %>%
706 select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
707 elapsed_secs) %>%
708 spread(key = c(host_label), value = elapsed_secs) %>%
709 arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
710 select(-c(is_conf, is_ovm, is_dbg)) ->
711 times
712
713 #print(times)
714
715 bytecode_size %>%
716 rename(bytecode_size = num_bytes) %>%
717 select(-c(path)) ->
718 bytecode_size
719
720 bin_sizes %>%
721 # reorder
722 select(c(host_label, path, num_bytes)) %>%
723 left_join(bytecode_size, by = c('host_label')) %>%
724 mutate(native_code_size = num_bytes - bytecode_size) ->
725 sizes
726
727 # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
728 native_sizes %>%
729 select(c(host_label, path, num_bytes)) %>%
730 mutate(host_label = paste("host ", host_label),
731 binary = basename(path),
732 compiler = basename(dirname(path)),
733 ) %>%
734 select(-c(path)) %>%
735 spread(key = c(host_label), value = num_bytes) %>%
736 arrange(compiler, binary) ->
737 native_sizes
738
739 # NOTE: These don't have the host and compiler.
740 writeTsv(times, file.path(out_dir, 'times'))
741 writeTsv(bytecode_size, file.path(out_dir, 'bytecode-size'))
742 writeTsv(sizes, file.path(out_dir, 'sizes'))
743 writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
744
745 # TODO: I want a size report too
746 #writeCsv(sizes, file.path(out_dir, 'sizes'))
747}
748
749unique_stdout_md5sum = function(t, num_expected) {
750 u = n_distinct(t$stdout_md5sum)
751 if (u != num_expected) {
752 t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
753 stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
754 }
755}
756
757ComputeReport = function(in_dir, out_dir) {
758 # TSV file, not CSV
759 times = read.table(file.path(in_dir, 'times.tsv'), header=T)
760 print(times)
761
762 times %>% filter(status != 0) -> failed
763 if (nrow(failed) != 0) {
764 print(failed)
765 stop('Some compute tasks failed')
766 }
767
768 #
769 # Check correctness
770 #
771
772 times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
773 times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
774 times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
775 # 3 different inputs
776 times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
777
778 times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
779
780 # TODO:
781 # - oils_cpp doesn't implement unicode LANG=C
782 # - bash behaves differently on your desktop vs. in the container
783 # - might need layer-locales in the image?
784
785 #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
786 # Ditto here
787 #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
788
789 #
790 # Find distinct shells and hosts, and label them for readability.
791 #
792
793 # Runtimes are called shells, as a hack for code reuse
794 times %>%
795 mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
796 select(c(host_name, host_hash, shell_name, shell_hash)) ->
797 tmp
798
799 distinct_hosts = DistinctHosts(tmp)
800 Log('')
801 Log('Distinct hosts')
802 print(distinct_hosts)
803
804 distinct_shells = DistinctShells(tmp)
805 Log('')
806 Log('Distinct runtimes')
807 print(distinct_shells)
808
809 num_hosts = nrow(distinct_hosts)
810
811 times %>%
812 select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
813 mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
814 elapsed_ms = elapsed_secs * 1000,
815 user_ms = user_secs * 1000,
816 sys_ms = sys_secs * 1000,
817 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
818 select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
819 arrange(host_name, task_name, arg1, arg2, user_ms) ->
820 details
821
822 times %>%
823 mutate(
824 runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
825 stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
826 select(c(host_name, task_name, arg1, arg2, runtime_label,
827 stdout_md5sum, stdout_md5sum_HREF)) ->
828 stdout_files
829
830 details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
831 details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
832 details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
833 # There's no arg2
834 details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
835
836 details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
837 details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
838
839 precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
840 writeTsv(details, file.path(out_dir, 'details'), precision)
841
842 writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
843
844 writeTsv(hello, file.path(out_dir, 'hello'), precision)
845 writeTsv(fib, file.path(out_dir, 'fib'), precision)
846 writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
847 writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
848
849 writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
850 writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
851
852 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
853}
854
855WriteOneTask = function(times, out_dir, task_name, precision) {
856 times %>%
857 filter(task == task_name) %>%
858 select(-c(task)) -> subset
859
860 writeTsv(subset, file.path(out_dir, task_name), precision)
861}
862
863SHELL_ORDER = c('dash',
864 'bash',
865 'zsh',
866 '_bin/cxx-opt+bumpleak/osh',
867 '_bin/cxx-opt+bumproot/osh',
868 '_bin/cxx-opt+bumpsmall/osh',
869 '_bin/cxx-opt/osh',
870 '_bin/cxx-opt+nopool/osh')
871
872GcReport = function(in_dir, out_dir) {
873 times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
874 gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
875
876 times %>% filter(status != 0) -> failed
877 if (nrow(failed) != 0) {
878 print(failed)
879 stop('Some gc tasks failed')
880 }
881
882 # Change units and order columns
883 times %>%
884 arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
885 mutate(elapsed_ms = elapsed_secs * 1000,
886 user_ms = user_secs * 1000,
887 sys_ms = sys_secs * 1000,
888 max_rss_MB = max_rss_KiB * 1024 / 1e6,
889 shell_label = ShellLabelFromPath(sh_path)
890 ) %>%
891 select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
892 shell_runtime_opts)) ->
893 times
894
895 # Join and order columns
896 gc_stats %>% left_join(times, by = c('join_id')) %>%
897 arrange(desc(task)) %>%
898 mutate(allocated_MB = bytes_allocated / 1e6) %>%
899 # try to make the table skinnier
900 rename(num_gc_done = num_collections) %>%
901 select(task, elapsed_ms, max_gc_millis, total_gc_millis,
902 allocated_MB, max_rss_MB, num_allocated,
903 num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
904 shell_label) ->
905 gc_stats
906
907 times %>% select(-c(join_id)) -> times
908
909
910 precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
911 default = 0)
912
913 writeTsv(times, file.path(out_dir, 'times'), precision)
914 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
915
916 tasks = c('parse.configure-coreutils',
917 'parse.configure-cpython',
918 'parse.abuild',
919 'ex.compute-fib',
920 'ex.bashcomp-parse-help',
921 'ex.abuild-print-help')
922 # Write out separate rows
923 for (task in tasks) {
924 WriteOneTask(times, out_dir, task, precision)
925 }
926}
927
928GcCachegrindReport = function(in_dir, out_dir) {
929 times = readTsv(file.path(in_dir, 'raw/times.tsv'))
930 counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
931
932 times %>% filter(status != 0) -> failed
933 if (nrow(failed) != 0) {
934 print(failed)
935 stop('Some gc tasks failed')
936 }
937
938 print(times)
939 print(counts)
940
941 counts %>% left_join(times, by = c('join_id')) %>%
942 mutate(million_irefs = irefs / 1e6) %>%
943 select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
944 arrange(factor(sh_path, levels = SHELL_ORDER)) ->
945 counts
946
947 precision = NULL
948 tasks = c('parse.abuild', 'ex.compute-fib')
949 for (task in tasks) {
950 WriteOneTask(counts, out_dir, task, precision)
951 }
952}
953
954MyCppReport = function(in_dir, out_dir) {
955 times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
956 print(times)
957
958 times %>% filter(status != 0) -> failed
959 if (nrow(failed) != 0) {
960 print(failed)
961 stop('Some mycpp tasks failed')
962 }
963
964 # Don't care about elapsed and system
965 times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
966 mutate(example_name_HREF = mycppUrl(example_name),
967 user_ms = user_secs * 1000,
968 sys_ms = sys_secs * 1000,
969 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
970 select(-c(user_secs, sys_secs, max_rss_KiB)) ->
971 details
972
973 details %>% select(-c(sys_ms, max_rss_MB)) %>%
974 spread(key = impl, value = user_ms) %>%
975 mutate(`C++ : Python` = `C++` / Python) %>%
976 arrange(`C++ : Python`) ->
977 user_time
978
979 details %>% select(-c(user_ms, max_rss_MB)) %>%
980 spread(key = impl, value = sys_ms) %>%
981 mutate(`C++ : Python` = `C++` / Python) %>%
982 arrange(`C++ : Python`) ->
983 sys_time
984
985 details %>% select(-c(user_ms, sys_ms)) %>%
986 spread(key = impl, value = max_rss_MB) %>%
987 mutate(`C++ : Python` = `C++` / Python) %>%
988 arrange(`C++ : Python`) ->
989 max_rss
990
991 # Sometimes it speeds up by more than 10x
992 precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
993 writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
994 writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
995
996 precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
997 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
998
999 writeTsv(details, file.path(out_dir, 'details'))
1000}
1001
1002UftraceTaskReport = function(env, task_name, summaries) {
1003 # Need this again after redirect
1004 MaybeDisableColor(stdout())
1005
1006 task_env = env[[task_name]]
1007
1008 untyped = task_env$untyped
1009 typed = task_env$typed
1010 strings = task_env$strings
1011 slabs = task_env$slabs
1012 reserve = task_env$reserve
1013
1014 string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1015 strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1016
1017 # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1018 # big/small
1019 #
1020 # And then zoom in on distributions as well
1021
1022 num_allocs = nrow(untyped)
1023 total_bytes = sum(untyped$obj_len)
1024
1025 untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1026 #print(untyped_hist)
1027
1028 untyped_hist %>%
1029 mutate(n_less_than = cumsum(n),
1030 percent = n_less_than * 100.0 / num_allocs) ->
1031 alloc_sizes
1032
1033 a24 = untyped_hist %>% filter(obj_len <= 24)
1034 a48 = untyped_hist %>% filter(obj_len <= 48)
1035 a96 = untyped_hist %>% filter(obj_len <= 96)
1036
1037 allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1038 allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1039 allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1040
1041 Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1042
1043 options(tibble.print_min=25)
1044
1045 Log('')
1046 Log('All allocations')
1047 print(alloc_sizes %>% head(22))
1048 print(alloc_sizes %>% tail(5))
1049
1050 Log('')
1051 Log('Common Sizes')
1052 print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1053
1054 Log('')
1055 Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1056 Log('')
1057
1058 Log('Typed allocations')
1059
1060 num_typed = nrow(typed)
1061
1062 typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1063 mutate(percent = n * 100.0 / num_typed) %>%
1064 arrange(desc(n)) -> most_common_types
1065
1066 print(most_common_types %>% head(20))
1067 print(most_common_types %>% tail(5))
1068
1069 lists = typed %>% filter(str_starts(func_name, ('List<')))
1070 #print(lists)
1071
1072 num_lists = nrow(lists)
1073 total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1074
1075 Log('')
1076 Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1077 Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1078 Log('')
1079
1080 #
1081 # Strings
1082 #
1083
1084 num_strings = nrow(strings)
1085 total_string_bytes = sum(strings$obj_len)
1086
1087 strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1088 mutate(n_less_than = cumsum(n),
1089 percent = n_less_than * 100.0 / num_strings) ->
1090 string_lengths
1091
1092 strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1093 strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1094
1095 # Parse workload
1096 # 62% of strings <= 6 bytes
1097 # 84% of strings <= 14 bytes
1098
1099 Log('Str - NewStr() and OverAllocatedStr()')
1100 print(string_lengths %>% head(16))
1101 print(string_lengths %>% tail(5))
1102 Log('')
1103
1104 Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1105 commas(sum(strings$str_len)), commas(total_string_bytes))
1106 Log('')
1107 Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1108 Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1109 Log('')
1110
1111 #
1112 # Slabs
1113 #
1114
1115 Log('NewSlab()')
1116
1117 num_slabs = nrow(slabs)
1118 slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1119 mutate(n_less_than = cumsum(n),
1120 percent = n_less_than * 100.0 / num_slabs) ->
1121 slab_lengths
1122
1123 slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1124 arrange(desc(n)) -> slab_types
1125
1126 Log(' Lengths')
1127 print(slab_lengths %>% head())
1128 print(slab_lengths %>% tail(5))
1129 Log('')
1130
1131 Log(' Slab Types')
1132 print(slab_types %>% head())
1133 print(slab_types %>% tail(5))
1134 Log('')
1135
1136 total_slab_items = sum(slabs$slab_len)
1137
1138 Log('%s slabs, total items = %s', commas(num_slabs),
1139 commas(sum(slabs$slab_len)))
1140 Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1141 Log('')
1142
1143 #
1144 # reserve() calls
1145 #
1146
1147 # There should be strictly more List::reserve() calls than NewSlab
1148
1149 Log('::reserve(int n)')
1150 Log('')
1151
1152 num_reserve = nrow(reserve)
1153 reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1154 mutate(n_less_than = cumsum(n),
1155 percent = n_less_than * 100.0 / num_reserve) ->
1156 reserve_args
1157
1158 Log(' Num Items')
1159 print(reserve_args %>% head(15))
1160 print(reserve_args %>% tail(5))
1161 Log('')
1162
1163 Log('%s reserve() calls, total items = %s', commas(num_reserve),
1164 commas(sum(reserve$num_items)))
1165 Log('')
1166
1167 # Accounting for all allocations!
1168 Log('Untyped: %s', commas(num_allocs))
1169 Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1170 Log('')
1171
1172 num_other_typed = num_typed - num_lists
1173
1174 # Summary table
1175 stats = tibble(task = task_name,
1176 total_bytes_ = commas(total_bytes),
1177 num_allocs_ = commas(num_allocs),
1178 sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1179 num_reserve_calls = commas(num_reserve),
1180
1181 percent_list_allocs = Percent(num_lists, num_allocs),
1182 percent_slab_allocs = Percent(num_slabs, num_allocs),
1183 percent_string_allocs = Percent(num_strings, num_allocs),
1184 percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1185
1186 percent_list_bytes = Percent(total_list_bytes, total_bytes),
1187 percent_string_bytes = Percent(total_string_bytes, total_bytes),
1188
1189 allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1190 allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1191 allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1192
1193 strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1194 strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1195 )
1196 summaries$stats[[task_name]] = stats
1197
1198 summaries$most_common_types[[task_name]] = most_common_types
1199}
1200
1201LoadUftraceTsv = function(in_dir, env) {
1202 for (task in list.files(in_dir)) {
1203 Log('Loading data for task %s', task)
1204 base_dir = file.path(in_dir, task)
1205
1206 task_env = new.env()
1207 env[[task]] = task_env
1208
1209 # TSV file, not CSV
1210 task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1211 task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1212 task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1213 task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1214 task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1215
1216 # median string length is 4, mean is 9.5!
1217 Log('UNTYPED')
1218 print(summary(task_env$untyped))
1219 Log('')
1220
1221 Log('TYPED')
1222 print(summary(task_env$typed))
1223 Log('')
1224
1225 Log('STRINGS')
1226 print(summary(task_env$strings))
1227 Log('')
1228
1229 Log('SLABS')
1230 print(summary(task_env$slabs))
1231 Log('')
1232
1233 Log('RESERVE')
1234 print(summary(task_env$reserve))
1235 Log('')
1236 }
1237}
1238
1239Percent = function(n, total) {
1240 sprintf('%.1f%%', n * 100.0 / total)
1241}
1242
1243PrettyPrintLong = function(d) {
1244 tr = t(d) # transpose
1245
1246 row_names = rownames(tr)
1247
1248 for (i in 1:nrow(tr)) {
1249 row_name = row_names[i]
1250 cat(sprintf('%26s', row_name)) # calculated min width manually
1251 cat(sprintf('%20s', tr[i,]))
1252 cat('\n')
1253
1254 # Extra spacing
1255 if (row_name %in% c('num_reserve_calls',
1256 'percent_string_bytes',
1257 'percent_other_typed_allocs',
1258 'allocs_96_bytes_or_less')) {
1259 cat('\n')
1260 }
1261 }
1262}
1263
1264
1265UftraceReport = function(env, out_dir) {
1266 # summaries$stats should be a list of 1-row data frames
1267 # summaries$top_types should be a list of types
1268 summaries = new.env()
1269
1270 for (task_name in names(env)) {
1271 report_out = file.path(out_dir, paste0(task_name, '.txt'))
1272
1273 Log('Making report for task %s -> %s', task_name, report_out)
1274
1275 sink(file = report_out)
1276 UftraceTaskReport(env, task_name, summaries)
1277 sink() # reset
1278 }
1279 Log('')
1280
1281 # Concate all the data frames added to summary
1282 stats = bind_rows(as.list(summaries$stats))
1283
1284 sink(file = file.path(out_dir, 'summary.txt'))
1285 #print(stats)
1286 #Log('')
1287
1288 PrettyPrintLong(stats)
1289 Log('')
1290
1291 mct = summaries$most_common_types
1292 for (task_name in names(mct)) {
1293 Log('Common types in workload %s', task_name)
1294 Log('')
1295
1296 print(mct[[task_name]] %>% head(5))
1297 Log('')
1298 }
1299 sink()
1300
1301 # For the REPL
1302 return(list(stats = stats))
1303}
1304
1305main = function(argv) {
1306 action = argv[[1]]
1307 in_dir = argv[[2]]
1308 out_dir = argv[[3]]
1309
1310 if (action == 'osh-parser') {
1311 ParserReport(in_dir, out_dir)
1312
1313 } else if (action == 'osh-runtime') {
1314 RuntimeReport(in_dir, out_dir)
1315
1316 } else if (action == 'vm-baseline') {
1317 VmBaselineReport(in_dir, out_dir)
1318
1319 } else if (action == 'ovm-build') {
1320 OvmBuildReport(in_dir, out_dir)
1321
1322 } else if (action == 'compute') {
1323 ComputeReport(in_dir, out_dir)
1324
1325 } else if (action == 'gc') {
1326 GcReport(in_dir, out_dir)
1327
1328 } else if (action == 'gc-cachegrind') {
1329 GcCachegrindReport(in_dir, out_dir)
1330
1331 } else if (action == 'mycpp') {
1332 MyCppReport(in_dir, out_dir)
1333
1334 } else if (action == 'uftrace') {
1335 d = new.env()
1336 LoadUftraceTsv(in_dir, d)
1337 UftraceReport(d, out_dir)
1338
1339 } else {
1340 Log("Invalid action '%s'", action)
1341 quit(status = 1)
1342 }
1343 Log('PID %d done', Sys.getpid())
1344}
1345
1346if (length(sys.frames()) == 0) {
1347 # increase ggplot font size globally
1348 #theme_set(theme_grey(base_size = 20))
1349
1350 main(commandArgs(TRUE))
1351}