1 | #!/usr/bin/env Rscript
2 | #
3 | # benchmarks/report.R -- Analyze data collected by shell scripts.
4 | #
5 | # Usage:
6 | # benchmarks/report.R OUT_DIR [TIMES_CSV...]
7 |
8 | # Suppress warnings about functions masked from 'package:stats' and 'package:base'
9 | # filter, lag
10 | # intersect, setdiff, setequal, union
11 | library(dplyr, warn.conflicts = FALSE)
12 | library(tidyr) # spread()
13 | library(stringr)
14 |
15 | source('benchmarks/common.R')
16 |
17 | options(stringsAsFactors = F)
18 |
19 | # For pretty printing
20 | commas = function(x) {
21 | format(x, big.mark=',')
22 | }
23 |
24 | sourceUrl = function(path) {
25 | sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26 | }
27 |
28 | # Takes a filename, not a path.
29 | sourceUrl2 = function(filename) {
30 | sprintf(
31 | 'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32 | filename)
33 | }
34 |
35 | mycppUrl = function(path) {
36 | sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', path)
37 | }
38 |
39 |
40 | # TODO: Set up cgit because Github links are slow.
41 | benchmarkDataLink = function(subdir, name, suffix) {
42 | #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
43 | sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
44 | subdir, name, suffix)
45 | }
46 |
47 | provenanceLink = function(subdir, name, suffix) {
48 | sprintf('../%s/%s%s', subdir, name, suffix)
49 | }
50 |
51 |
52 | GetOshLabel = function(shell_hash, prov_dir) {
53 | ### Given a string, return another string.
54 |
55 | path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
56 |
57 | if (file.exists(path)) {
58 | Log('Reading %s', path)
59 | lines = readLines(path)
60 | if (length(grep('_bin/osh', lines)) > 0) {
61 | label = 'osh-ovm'
62 | } else if (length(grep('bin/osh', lines)) > 0) {
63 | label = 'osh-cpython'
64 | } else if (length(grep('_bin/.*/osh', lines)) > 0) {
65 | label = 'osh-native'
66 | } else {
67 | stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
68 | }
69 | } else {
70 | stop(sprintf("%s doesn't exist", path))
71 | }
72 | return(label)
73 | }
74 |
75 | opt_suffix1 = '_bin/cxx-opt/osh'
76 | opt_suffix2 = '_bin/cxx-opt-sh/osh'
77 |
78 | ShellLabels = function(shell_name, shell_hash, num_hosts) {
79 | ### Given 2 vectors, return a vector of readable labels.
80 |
81 | # TODO: Clean up callers. Some metrics all this function with a
82 | # shell/runtime BASENAME, and others a PATH
83 | # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
84 |
85 | #Log('name %s', shell_name)
86 | #Log('hash %s', shell_hash)
87 |
88 | if (num_hosts == 1) {
89 | prov_dir = '_tmp'
90 | } else {
91 | prov_dir = '../benchmark-data/'
92 | }
93 |
94 | labels = c()
95 | for (i in 1:length(shell_name)) {
96 | sh = shell_name[i]
97 | if (sh == 'osh') {
98 | label = GetOshLabel(shell_hash[i], prov_dir)
99 |
100 | } else if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
101 | label = 'opt/osh'
102 |
103 | } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
104 | label = 'bumpleak/osh'
105 |
106 | } else {
107 | label = sh
108 | }
109 |
110 | Log('[%s] [%s]', shell_name[i], label)
111 | labels = c(labels, label)
112 | }
113 |
114 | return(labels)
115 | }
116 |
117 | # Simple version of the above, used by benchmarks/gc
118 | ShellLabelFromPath = function(sh_path) {
119 | labels = c()
120 | for (i in 1:length(sh_path)) {
121 | sh = sh_path[i]
122 |
123 | if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
124 | # the opt binary is osh-native
125 | label = 'osh-native'
126 |
127 | } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
128 | label = 'bumpleak/osh'
129 |
130 | } else if (endsWith(sh, '_bin/osh')) { # the app bundle
131 | label = 'osh-ovm'
132 |
133 | } else if (endsWith(sh, 'bin/osh')) {
134 | label = 'osh-cpython'
135 |
136 | } else {
137 | label = sh
138 | }
139 | labels = c(labels, label)
140 | }
141 | return(labels)
142 | }
143 |
144 | DistinctHosts = function(t) {
145 | t %>% distinct(host_name, host_hash) -> distinct_hosts
146 | # The label is just the name
147 | distinct_hosts$host_label = distinct_hosts$host_name
148 | return(distinct_hosts)
149 | }
150 |
151 | DistinctShells = function(t, num_hosts = -1) {
152 | t %>% distinct(shell_name, shell_hash) -> distinct_shells
153 |
154 | Log('')
155 | Log('Labeling shells')
156 |
157 | # Calculate it if not passed
158 | if (num_hosts == -1) {
159 | num_hosts = nrow(DistinctHosts(t))
160 | }
161 |
162 | distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
163 | distinct_shells$shell_hash,
164 | num_hosts)
165 | return(distinct_shells)
166 | }
167 |
168 | ParserReport = function(in_dir, out_dir) {
169 | times = read.csv(file.path(in_dir, 'times.csv'))
170 | lines = read.csv(file.path(in_dir, 'lines.csv'))
171 | raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
172 |
173 | cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
174 |
175 | # For joining by filename
176 | lines_by_filename = tibble(
177 | num_lines = lines$num_lines,
178 | filename = basename(lines$path)
179 | )
180 |
181 | # Remove failures
182 | times %>% filter(status == 0) %>% select(-c(status)) -> times
183 | cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
184 |
185 | # Add the number of lines, joining on path, and compute lines/ms
186 | times %>%
187 | left_join(lines, by = c('path')) %>%
188 | mutate(filename = basename(path), filename_HREF = sourceUrl(path),
189 | max_rss_MB = max_rss_KiB * 1024 / 1e6,
190 | elapsed_ms = elapsed_secs * 1000,
191 | user_ms = user_secs * 1000,
192 | sys_ms = sys_secs * 1000,
193 | lines_per_ms = num_lines / elapsed_ms) %>%
194 | select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
195 | joined_times
196 |
197 | #print(head(times))
198 | #print(head(lines))
199 | #print(head(vm))
200 | #print(head(joined_times))
201 |
202 | print(summary(joined_times))
203 |
204 | #
205 | # Find distinct shells and hosts, and label them for readability.
206 | #
207 |
208 | distinct_hosts = DistinctHosts(joined_times)
209 | Log('')
210 | Log('Distinct hosts')
211 | print(distinct_hosts)
212 |
213 | distinct_shells = DistinctShells(joined_times)
214 | Log('')
215 | Log('Distinct shells')
216 | print(distinct_shells)
217 |
218 | # Replace name/hash combinations with labels.
219 | joined_times %>%
220 | left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
221 | left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
222 | select(-c(host_name, host_hash, shell_name, shell_hash)) ->
223 | joined_times
224 |
225 | # Like 'times', but do shell_label as one step
226 | # Hack: we know benchmarks/auto.sh runs this on one machine
227 | distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
228 | cachegrind %>%
229 | left_join(lines, by = c('path')) %>%
230 | select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
231 | left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
232 | select(-c(shell_name, shell_hash)) %>%
233 | mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
234 | select(-c(path)) ->
235 | joined_cachegrind
236 |
237 | Log('summary(joined_times):')
238 | print(summary(joined_times))
239 | Log('head(joined_times):')
240 | print(head(joined_times))
241 |
242 | # Summarize rates by platform/shell
243 | joined_times %>%
244 | mutate(host_label = paste("host", host_label)) %>%
245 | group_by(host_label, shell_label) %>%
246 | summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
247 | mutate(lines_per_ms = total_lines / total_ms) %>%
248 | select(-c(total_ms)) %>%
249 | spread(key = host_label, value = lines_per_ms) ->
250 | times_summary
251 |
252 | # Sort by parsing rate on the fast machine
253 | if ("host lenny" %in% colnames(times_summary)) {
254 | times_summary %>% arrange(desc(`host lenny`)) -> times_summary
255 | } else {
256 | times_summary %>% arrange(desc(`host no-host`)) -> times_summary
257 | }
258 |
259 | Log('times_summary:')
260 | print(times_summary)
261 |
262 | # Summarize cachegrind by platform/shell
263 | # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
264 | joined_cachegrind %>%
265 | group_by(shell_label) %>%
266 | summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
267 | mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
268 | select(-c(total_irefs)) ->
269 | cachegrind_summary
270 |
271 | if ("no-host" %in% distinct_hosts$host_label) {
272 |
273 | # We don't have all the shells
274 | elapsed = NULL
275 | rate = NULL
276 | max_rss = NULL
277 | instructions = NULL
278 |
279 | joined_times %>%
280 | select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
281 | num_lines, filename, filename_HREF)) %>%
282 | arrange(filename, elapsed_ms) ->
283 | times_flat
284 |
285 | joined_cachegrind %>%
286 | select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
287 | arrange(filename, irefs) ->
288 | cachegrind_flat
289 |
290 | } else {
291 |
292 | times_flat = NULL
293 | cachegrind_flat = NULL
294 |
295 | # Elapsed seconds for each shell by platform and file
296 | joined_times %>%
297 | select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
298 | spread(key = shell_label, value = elapsed_ms) %>%
299 | arrange(host_label, num_lines) %>%
300 | mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
301 | select(c(host_label, bash, dash, mksh, zsh,
302 | `osh-ovm`, `osh-cpython`, `osh-native`,
303 | osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
304 | elapsed
305 |
306 | Log('\n')
307 | Log('ELAPSED')
308 | print(elapsed)
309 |
310 | # Rates by file and shell
311 | joined_times %>%
312 | select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
313 | spread(key = shell_label, value = lines_per_ms) %>%
314 | arrange(host_label, num_lines) %>%
315 | select(c(host_label, bash, dash, mksh, zsh,
316 | `osh-ovm`, `osh-cpython`, `osh-native`,
317 | num_lines, filename, filename_HREF)) ->
318 | rate
319 |
320 | Log('\n')
321 | Log('RATE')
322 | print(rate)
323 |
324 | # Memory usage by file
325 | joined_times %>%
326 | select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
327 | spread(key = shell_label, value = max_rss_MB) %>%
328 | arrange(host_label, num_lines) %>%
329 | select(c(host_label, bash, dash, mksh, zsh,
330 | `osh-ovm`, `osh-cpython`, `osh-native`,
331 | num_lines, filename, filename_HREF)) ->
332 | max_rss
333 |
334 | Log('\n')
335 | Log('MAX RSS')
336 | print(max_rss)
337 |
338 | Log('\n')
339 | Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
340 | print(joined_cachegrind)
341 | #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
342 |
343 | # Cachegrind instructions by file
344 | joined_cachegrind %>%
345 | mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
346 | select(-c(irefs)) %>%
347 | spread(key = shell_label, value = thousand_irefs_per_line) %>%
348 | arrange(num_lines) %>%
349 | select(c(bash, dash, mksh, `osh-native`,
350 | num_lines, filename, filename_HREF)) ->
351 | instructions
352 |
353 | Log('\n')
354 | Log('instructions has %d rows', nrow(instructions))
355 | print(instructions)
356 | }
357 |
358 | WriteProvenance(distinct_hosts, distinct_shells, out_dir)
359 |
360 | raw_data_table = tibble(
361 | filename = basename(as.character(raw_data$path)),
362 | filename_HREF = benchmarkDataLink('osh-parser', filename, '')
363 | )
364 | #print(raw_data_table)
365 |
366 | writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
367 |
368 | precision = SamePrecision(0) # lines per ms
369 | writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
370 |
371 | precision = ColumnPrecision(list(), default = 1)
372 | writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
373 |
374 | if (!is.null(times_flat)) {
375 | precision = SamePrecision(0)
376 | writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
377 | }
378 |
379 | if (!is.null(cachegrind_flat)) {
380 | precision = SamePrecision(0)
381 | writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
382 | }
383 |
384 | if (!is.null(elapsed)) { # equivalent to no-host
385 | # Round to nearest millisecond, but the ratio has a decimal point.
386 | precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
387 | writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
388 |
389 | precision = SamePrecision(0)
390 | writeCsv(rate, file.path(out_dir, 'rate'), precision)
391 |
392 | writeCsv(max_rss, file.path(out_dir, 'max_rss'))
393 |
394 | precision = SamePrecision(1)
395 | writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
396 | }
397 |
398 | Log('Wrote %s', out_dir)
399 | }
400 |
401 | WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
402 |
403 | num_hosts = nrow(distinct_hosts)
404 | if (num_hosts == 1) {
405 | linkify = provenanceLink
406 | } else {
407 | linkify = benchmarkDataLink
408 | }
409 |
410 | Log('distinct_hosts')
411 | print(distinct_hosts)
412 | Log('')
413 |
414 | Log('distinct_shells')
415 | print(distinct_shells)
416 | Log('')
417 |
418 | # Should be:
419 | # host_id_url
420 | # And then csv_to_html will be smart enough? It should take --url flag?
421 | host_table = tibble(
422 | host_label = distinct_hosts$host_label,
423 | host_id = paste(distinct_hosts$host_name,
424 | distinct_hosts$host_hash, sep='-'),
425 | host_id_HREF = linkify('host-id', host_id, '/')
426 | )
427 | Log('host_table')
428 | print(host_table)
429 | Log('')
430 |
431 | shell_table = tibble(
432 | shell_label = distinct_shells$shell_label,
433 | shell_id = paste(distinct_shells$shell_name,
434 | distinct_shells$shell_hash, sep='-'),
435 | shell_id_HREF = linkify('shell-id', shell_id, '/')
436 | )
437 |
438 | Log('shell_table')
439 | print(shell_table)
440 | Log('')
441 |
442 | if (tsv) {
443 | writeTsv(host_table, file.path(out_dir, 'hosts'))
444 | writeTsv(shell_table, file.path(out_dir, 'shells'))
445 | } else {
446 | writeCsv(host_table, file.path(out_dir, 'hosts'))
447 | writeCsv(shell_table, file.path(out_dir, 'shells'))
448 | }
449 | }
450 |
451 | WriteSimpleProvenance = function(provenance, out_dir) {
452 | Log('provenance')
453 | print(provenance)
454 | Log('')
455 |
456 | # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
457 | # benchmarks/id.sh publish-shell-id uses
458 | provenance %>%
459 | mutate(shell_name = basename(sh_path)) %>%
460 | distinct(shell_label, shell_name, shell_hash) ->
461 | distinct_shells
462 |
463 | Log('distinct_shells')
464 | print(distinct_shells)
465 | Log('')
466 |
467 | provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
468 |
469 | WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
470 | }
471 |
472 | RuntimeReport = function(in_dir, out_dir) {
473 | times = readTsv(file.path(in_dir, 'times.tsv'))
474 |
475 | gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
476 | provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
477 |
478 | times %>% filter(status != 0) -> failed
479 | if (nrow(failed) != 0) {
480 | print(failed)
481 | stop('Some osh-runtime tasks failed')
482 | }
483 |
484 | # Joins:
485 | # times <= sh_path => provenance
486 | # times <= join_id, host_name => gc_stats
487 |
488 | # TODO: provenance may have rows from 2 machines. Could validate them and
489 | # deduplicate.
490 |
491 | # It should have (host_label, host_name, host_hash)
492 | # (shell_label, sh_path, shell_hash)
493 | provenance %>%
494 | mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
495 | provenance
496 |
497 | provenance %>% distinct(sh_path, shell_label) -> label_lookup
498 |
499 | Log('label_lookup')
500 | print(label_lookup)
501 |
502 | # Join with provenance for host label and shell label
503 | times %>%
504 | select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
505 | host_name, sh_path, workload)) %>%
506 | mutate(elapsed_ms = elapsed_secs * 1000,
507 | user_ms = user_secs * 1000,
508 | sys_ms = sys_secs * 1000,
509 | max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
510 | select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
511 | left_join(label_lookup, by = c('sh_path')) %>%
512 | select(-c(sh_path)) %>%
513 | # we want to compare workloads on adjacent rows
514 | arrange(workload) ->
515 | details
516 |
517 | times %>%
518 | select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
519 | left_join(label_lookup, by = c('sh_path')) %>%
520 | select(-c(sh_path)) %>%
521 | # we want to compare workloads on adjacent rows
522 | arrange(workload) ->
523 | details_io
524 |
525 | Log('details')
526 | print(details)
527 |
528 | # Elapsed time comparison
529 | details %>%
530 | select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
531 | spread(key = shell_label, value = elapsed_ms) %>%
532 | mutate(py_bash_ratio = `osh-cpython` / bash) %>%
533 | mutate(native_bash_ratio = `osh-native` / bash) %>%
534 | arrange(workload, host_name) %>%
535 | select(c(workload, host_name,
536 | bash, dash, `osh-cpython`, `osh-native`,
537 | py_bash_ratio, native_bash_ratio)) ->
538 |
539 | elapsed
540 |
541 | Log('elapsed')
542 | print(elapsed)
543 |
544 | # Minor Page Faults Comparison
545 | details_io %>%
546 | select(c(host_name, shell_label, workload, minor_faults)) %>%
547 | spread(key = shell_label, value = minor_faults) %>%
548 | mutate(py_bash_ratio = `osh-cpython` / bash) %>%
549 | mutate(native_bash_ratio = `osh-native` / bash) %>%
550 | arrange(workload, host_name) %>%
551 | select(c(workload, host_name,
552 | bash, dash, `osh-cpython`, `osh-native`,
553 | py_bash_ratio, native_bash_ratio)) ->
554 | page_faults
555 |
556 | Log('page_faults')
557 | print(page_faults)
558 |
559 | # Max RSS comparison
560 | details %>%
561 | select(c(host_name, shell_label, workload, max_rss_MB)) %>%
562 | spread(key = shell_label, value = max_rss_MB) %>%
563 | mutate(py_bash_ratio = `osh-cpython` / bash) %>%
564 | mutate(native_bash_ratio = `osh-native` / bash) %>%
565 | arrange(workload, host_name) %>%
566 | select(c(workload, host_name,
567 | bash, dash, `osh-cpython`, `osh-native`,
568 | py_bash_ratio, native_bash_ratio)) ->
569 | max_rss
570 |
571 | Log('max rss')
572 | print(max_rss)
573 |
574 | details %>%
575 | select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
576 | mutate(join_id = sprintf("gc-%d", task_id)) %>%
577 | select(-c(task_id)) ->
578 | gc_details
579 |
580 | Log('GC details')
581 | print(gc_details)
582 | Log('')
583 |
584 | Log('GC stats')
585 | print(gc_stats)
586 | Log('')
587 |
588 | gc_stats %>%
589 | left_join(gc_details, by = c('join_id', 'host_name')) %>%
590 | select(-c(join_id, roots_capacity, objs_capacity)) %>%
591 | # Do same transformations as GcReport()
592 | mutate(allocated_MB = bytes_allocated / 1e6) %>%
593 | select(-c(bytes_allocated)) %>%
594 | rename(num_gc_done = num_collections) %>%
595 | # Put these columns first
596 | relocate(workload, host_name,
597 | elapsed_ms, max_gc_millis, total_gc_millis,
598 | allocated_MB, max_rss_MB, num_allocated) ->
599 | gc_stats
600 |
601 | Log('After GC stats')
602 | print(gc_stats)
603 | Log('')
604 |
605 | WriteSimpleProvenance(provenance, out_dir)
606 |
607 | # milliseconds don't need decimal digit
608 | precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
609 | `osh-native` = 0, py_bash_ratio = 2,
610 | native_bash_ratio = 2))
611 | writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
612 | writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
613 |
614 | precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
615 | writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
616 |
617 | precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
618 | default = 0)
619 | writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
620 |
621 | writeTsv(details, file.path(out_dir, 'details'), precision3)
622 | writeTsv(details_io, file.path(out_dir, 'details_io'))
623 |
624 | Log('Wrote %s', out_dir)
625 | }
626 |
627 | VmBaselineReport = function(in_dir, out_dir) {
628 | vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
629 | #print(vm)
630 |
631 | # Not using DistinctHosts() because field host_hash isn't collected
632 | num_hosts = nrow(vm %>% distinct(host))
633 |
634 | vm %>%
635 | rename(kib = metric_value) %>%
636 | mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
637 | megabytes = kib * 1024 / 1e6) %>%
638 | select(-c(shell_name, kib)) %>%
639 | spread(key = c(metric_name), value = megabytes) %>%
640 | rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
641 | select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
642 | arrange(shell_label, shell_hash, host, VmPeak_MB) ->
643 | vm
644 |
645 | print(vm)
646 |
647 | writeTsv(vm, file.path(out_dir, 'vm-baseline'))
648 | }
649 |
650 | WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
651 | host_table = tibble(
652 | host_label = distinct_hosts$host_label,
653 | host_id = paste(distinct_hosts$host_name,
654 | distinct_hosts$host_hash, sep='-'),
655 | host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
656 | )
657 | print(host_table)
658 |
659 | dc = distinct_compilers
660 | compiler_table = tibble(
661 | compiler_label = dc$compiler_label,
662 | compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
663 | compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
664 | )
665 | print(compiler_table)
666 |
667 | writeTsv(host_table, file.path(out_dir, 'hosts'))
668 | writeTsv(compiler_table, file.path(out_dir, 'compilers'))
669 | }
670 |
671 | OvmBuildReport = function(in_dir, out_dir) {
672 | times = readTsv(file.path(in_dir, 'times.tsv'))
673 | bytecode_size = readTsv(file.path(in_dir, 'bytecode-size.tsv'))
674 | bin_sizes = readTsv(file.path(in_dir, 'bin-sizes.tsv'))
675 | native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
676 | raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
677 |
678 | times %>% filter(status != 0) -> failed
679 | if (nrow(failed) != 0) {
680 | print(failed)
681 | stop('Some ovm-build tasks failed')
682 | }
683 |
684 | times %>% distinct(host_name, host_hash) -> distinct_hosts
685 | distinct_hosts$host_label = distinct_hosts$host_name
686 |
687 | times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
688 | distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
689 |
690 | #print(distinct_hosts)
691 | #print(distinct_compilers)
692 |
693 | WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
694 |
695 | times %>%
696 | select(-c(status)) %>%
697 | left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
698 | left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
699 | select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
700 | mutate(src_dir = basename(src_dir),
701 | host_label = paste("host ", host_label),
702 | is_conf = str_detect(action, 'configure'),
703 | is_ovm = str_detect(action, 'oil.ovm'),
704 | is_dbg = str_detect(action, 'dbg'),
705 | ) %>%
706 | select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
707 | elapsed_secs) %>%
708 | spread(key = c(host_label), value = elapsed_secs) %>%
709 | arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
710 | select(-c(is_conf, is_ovm, is_dbg)) ->
711 | times
712 |
713 | #print(times)
714 |
715 | bytecode_size %>%
716 | rename(bytecode_size = num_bytes) %>%
717 | select(-c(path)) ->
718 | bytecode_size
719 |
720 | bin_sizes %>%
721 | # reorder
722 | select(c(host_label, path, num_bytes)) %>%
723 | left_join(bytecode_size, by = c('host_label')) %>%
724 | mutate(native_code_size = num_bytes - bytecode_size) ->
725 | sizes
726 |
727 | # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
728 | native_sizes %>%
729 | select(c(host_label, path, num_bytes)) %>%
730 | mutate(host_label = paste("host ", host_label),
731 | binary = basename(path),
732 | compiler = basename(dirname(path)),
733 | ) %>%
734 | select(-c(path)) %>%
735 | spread(key = c(host_label), value = num_bytes) %>%
736 | arrange(compiler, binary) ->
737 | native_sizes
738 |
739 | # NOTE: These don't have the host and compiler.
740 | writeTsv(times, file.path(out_dir, 'times'))
741 | writeTsv(bytecode_size, file.path(out_dir, 'bytecode-size'))
742 | writeTsv(sizes, file.path(out_dir, 'sizes'))
743 | writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
744 |
745 | # TODO: I want a size report too
746 | #writeCsv(sizes, file.path(out_dir, 'sizes'))
747 | }
748 |
749 | unique_stdout_md5sum = function(t, num_expected) {
750 | u = n_distinct(t$stdout_md5sum)
751 | if (u != num_expected) {
752 | t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
753 | stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
754 | }
755 | }
756 |
757 | ComputeReport = function(in_dir, out_dir) {
758 | # TSV file, not CSV
759 | times = read.table(file.path(in_dir, 'times.tsv'), header=T)
760 | print(times)
761 |
762 | times %>% filter(status != 0) -> failed
763 | if (nrow(failed) != 0) {
764 | print(failed)
765 | stop('Some compute tasks failed')
766 | }
767 |
768 | #
769 | # Check correctness
770 | #
771 |
772 | times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
773 | times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
774 | times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
775 | # 3 different inputs
776 | times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
777 |
778 | times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
779 |
780 | # TODO:
781 | # - oils_cpp doesn't implement unicode LANG=C
782 | # - bash behaves differently on your desktop vs. in the container
783 | # - might need layer-locales in the image?
784 |
785 | #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
786 | # Ditto here
787 | #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
788 |
789 | #
790 | # Find distinct shells and hosts, and label them for readability.
791 | #
792 |
793 | # Runtimes are called shells, as a hack for code reuse
794 | times %>%
795 | mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
796 | select(c(host_name, host_hash, shell_name, shell_hash)) ->
797 | tmp
798 |
799 | distinct_hosts = DistinctHosts(tmp)
800 | Log('')
801 | Log('Distinct hosts')
802 | print(distinct_hosts)
803 |
804 | distinct_shells = DistinctShells(tmp)
805 | Log('')
806 | Log('Distinct runtimes')
807 | print(distinct_shells)
808 |
809 | num_hosts = nrow(distinct_hosts)
810 |
811 | times %>%
812 | select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
813 | mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
814 | elapsed_ms = elapsed_secs * 1000,
815 | user_ms = user_secs * 1000,
816 | sys_ms = sys_secs * 1000,
817 | max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
818 | select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
819 | arrange(host_name, task_name, arg1, arg2, user_ms) ->
820 | details
821 |
822 | times %>%
823 | mutate(
824 | runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
825 | stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
826 | select(c(host_name, task_name, arg1, arg2, runtime_label,
827 | stdout_md5sum, stdout_md5sum_HREF)) ->
828 | stdout_files
829 |
830 | details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
831 | details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
832 | details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
833 | # There's no arg2
834 | details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
835 |
836 | details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
837 | details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
838 |
839 | precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
840 | writeTsv(details, file.path(out_dir, 'details'), precision)
841 |
842 | writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
843 |
844 | writeTsv(hello, file.path(out_dir, 'hello'), precision)
845 | writeTsv(fib, file.path(out_dir, 'fib'), precision)
846 | writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
847 | writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
848 |
849 | writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
850 | writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
851 |
852 | WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
853 | }
854 |
855 | WriteOneTask = function(times, out_dir, task_name, precision) {
856 | times %>%
857 | filter(task == task_name) %>%
858 | select(-c(task)) -> subset
859 |
860 | writeTsv(subset, file.path(out_dir, task_name), precision)
861 | }
862 |
863 | SHELL_ORDER = c('dash',
864 | 'bash',
865 | 'zsh',
866 | '_bin/cxx-opt+bumpleak/osh',
867 | '_bin/cxx-opt+bumproot/osh',
868 | '_bin/cxx-opt+bumpsmall/osh',
869 | '_bin/cxx-opt/osh',
870 | '_bin/cxx-opt+nopool/osh')
871 |
872 | GcReport = function(in_dir, out_dir) {
873 | times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
874 | gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
875 |
876 | times %>% filter(status != 0) -> failed
877 | if (nrow(failed) != 0) {
878 | print(failed)
879 | stop('Some gc tasks failed')
880 | }
881 |
882 | # Change units and order columns
883 | times %>%
884 | arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
885 | mutate(elapsed_ms = elapsed_secs * 1000,
886 | user_ms = user_secs * 1000,
887 | sys_ms = sys_secs * 1000,
888 | max_rss_MB = max_rss_KiB * 1024 / 1e6,
889 | shell_label = ShellLabelFromPath(sh_path)
890 | ) %>%
891 | select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
892 | shell_runtime_opts)) ->
893 | times
894 |
895 | # Join and order columns
896 | gc_stats %>% left_join(times, by = c('join_id')) %>%
897 | arrange(desc(task)) %>%
898 | mutate(allocated_MB = bytes_allocated / 1e6) %>%
899 | # try to make the table skinnier
900 | rename(num_gc_done = num_collections) %>%
901 | select(task, elapsed_ms, max_gc_millis, total_gc_millis,
902 | allocated_MB, max_rss_MB, num_allocated,
903 | num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
904 | shell_label) ->
905 | gc_stats
906 |
907 | times %>% select(-c(join_id)) -> times
908 |
909 |
910 | precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
911 | default = 0)
912 |
913 | writeTsv(times, file.path(out_dir, 'times'), precision)
914 | writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
915 |
916 | tasks = c('parse.configure-coreutils',
917 | 'parse.configure-cpython',
918 | 'parse.abuild',
919 | 'ex.compute-fib',
920 | 'ex.bashcomp-parse-help',
921 | 'ex.abuild-print-help')
922 | # Write out separate rows
923 | for (task in tasks) {
924 | WriteOneTask(times, out_dir, task, precision)
925 | }
926 | }
927 |
928 | GcCachegrindReport = function(in_dir, out_dir) {
929 | times = readTsv(file.path(in_dir, 'raw/times.tsv'))
930 | counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
931 |
932 | times %>% filter(status != 0) -> failed
933 | if (nrow(failed) != 0) {
934 | print(failed)
935 | stop('Some gc tasks failed')
936 | }
937 |
938 | print(times)
939 | print(counts)
940 |
941 | counts %>% left_join(times, by = c('join_id')) %>%
942 | mutate(million_irefs = irefs / 1e6) %>%
943 | select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
944 | arrange(factor(sh_path, levels = SHELL_ORDER)) ->
945 | counts
946 |
947 | precision = NULL
948 | tasks = c('parse.abuild', 'ex.compute-fib')
949 | for (task in tasks) {
950 | WriteOneTask(counts, out_dir, task, precision)
951 | }
952 | }
953 |
954 | MyCppReport = function(in_dir, out_dir) {
955 | times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
956 | print(times)
957 |
958 | times %>% filter(status != 0) -> failed
959 | if (nrow(failed) != 0) {
960 | print(failed)
961 | stop('Some mycpp tasks failed')
962 | }
963 |
964 | # Don't care about elapsed and system
965 | times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
966 | mutate(example_name_HREF = mycppUrl(example_name),
967 | user_ms = user_secs * 1000,
968 | sys_ms = sys_secs * 1000,
969 | max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
970 | select(-c(user_secs, sys_secs, max_rss_KiB)) ->
971 | details
972 |
973 | details %>% select(-c(sys_ms, max_rss_MB)) %>%
974 | spread(key = impl, value = user_ms) %>%
975 | mutate(`C++ : Python` = `C++` / Python) %>%
976 | arrange(`C++ : Python`) ->
977 | user_time
978 |
979 | details %>% select(-c(user_ms, max_rss_MB)) %>%
980 | spread(key = impl, value = sys_ms) %>%
981 | mutate(`C++ : Python` = `C++` / Python) %>%
982 | arrange(`C++ : Python`) ->
983 | sys_time
984 |
985 | details %>% select(-c(user_ms, sys_ms)) %>%
986 | spread(key = impl, value = max_rss_MB) %>%
987 | mutate(`C++ : Python` = `C++` / Python) %>%
988 | arrange(`C++ : Python`) ->
989 | max_rss
990 |
991 | # Sometimes it speeds up by more than 10x
992 | precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
993 | writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
994 | writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
995 |
996 | precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
997 | writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
998 |
999 | writeTsv(details, file.path(out_dir, 'details'))
1000 | }
1001 |
1002 | UftraceTaskReport = function(env, task_name, summaries) {
1003 | # Need this again after redirect
1004 | MaybeDisableColor(stdout())
1005 |
1006 | task_env = env[[task_name]]
1007 |
1008 | untyped = task_env$untyped
1009 | typed = task_env$typed
1010 | strings = task_env$strings
1011 | slabs = task_env$slabs
1012 | reserve = task_env$reserve
1013 |
1014 | string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1015 | strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1016 |
1017 | # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1018 | # big/small
1019 | #
1020 | # And then zoom in on distributions as well
1021 |
1022 | num_allocs = nrow(untyped)
1023 | total_bytes = sum(untyped$obj_len)
1024 |
1025 | untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1026 | #print(untyped_hist)
1027 |
1028 | untyped_hist %>%
1029 | mutate(n_less_than = cumsum(n),
1030 | percent = n_less_than * 100.0 / num_allocs) ->
1031 | alloc_sizes
1032 |
1033 | a24 = untyped_hist %>% filter(obj_len <= 24)
1034 | a48 = untyped_hist %>% filter(obj_len <= 48)
1035 | a96 = untyped_hist %>% filter(obj_len <= 96)
1036 |
1037 | allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1038 | allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1039 | allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1040 |
1041 | Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1042 |
1043 | options(tibble.print_min=25)
1044 |
1045 | Log('')
1046 | Log('All allocations')
1047 | print(alloc_sizes %>% head(22))
1048 | print(alloc_sizes %>% tail(5))
1049 |
1050 | Log('')
1051 | Log('Common Sizes')
1052 | print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1053 |
1054 | Log('')
1055 | Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1056 | Log('')
1057 |
1058 | Log('Typed allocations')
1059 |
1060 | num_typed = nrow(typed)
1061 |
1062 | typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1063 | mutate(percent = n * 100.0 / num_typed) %>%
1064 | arrange(desc(n)) -> most_common_types
1065 |
1066 | print(most_common_types %>% head(20))
1067 | print(most_common_types %>% tail(5))
1068 |
1069 | lists = typed %>% filter(str_starts(func_name, ('List<')))
1070 | #print(lists)
1071 |
1072 | num_lists = nrow(lists)
1073 | total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1074 |
1075 | Log('')
1076 | Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1077 | Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1078 | Log('')
1079 |
1080 | #
1081 | # Strings
1082 | #
1083 |
1084 | num_strings = nrow(strings)
1085 | total_string_bytes = sum(strings$obj_len)
1086 |
1087 | strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1088 | mutate(n_less_than = cumsum(n),
1089 | percent = n_less_than * 100.0 / num_strings) ->
1090 | string_lengths
1091 |
1092 | strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1093 | strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1094 |
1095 | # Parse workload
1096 | # 62% of strings <= 6 bytes
1097 | # 84% of strings <= 14 bytes
1098 |
1099 | Log('Str - NewStr() and OverAllocatedStr()')
1100 | print(string_lengths %>% head(16))
1101 | print(string_lengths %>% tail(5))
1102 | Log('')
1103 |
1104 | Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1105 | commas(sum(strings$str_len)), commas(total_string_bytes))
1106 | Log('')
1107 | Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1108 | Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1109 | Log('')
1110 |
1111 | #
1112 | # Slabs
1113 | #
1114 |
1115 | Log('NewSlab()')
1116 |
1117 | num_slabs = nrow(slabs)
1118 | slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1119 | mutate(n_less_than = cumsum(n),
1120 | percent = n_less_than * 100.0 / num_slabs) ->
1121 | slab_lengths
1122 |
1123 | slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1124 | arrange(desc(n)) -> slab_types
1125 |
1126 | Log(' Lengths')
1127 | print(slab_lengths %>% head())
1128 | print(slab_lengths %>% tail(5))
1129 | Log('')
1130 |
1131 | Log(' Slab Types')
1132 | print(slab_types %>% head())
1133 | print(slab_types %>% tail(5))
1134 | Log('')
1135 |
1136 | total_slab_items = sum(slabs$slab_len)
1137 |
1138 | Log('%s slabs, total items = %s', commas(num_slabs),
1139 | commas(sum(slabs$slab_len)))
1140 | Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1141 | Log('')
1142 |
1143 | #
1144 | # reserve() calls
1145 | #
1146 |
1147 | # There should be strictly more List::reserve() calls than NewSlab
1148 |
1149 | Log('::reserve(int n)')
1150 | Log('')
1151 |
1152 | num_reserve = nrow(reserve)
1153 | reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1154 | mutate(n_less_than = cumsum(n),
1155 | percent = n_less_than * 100.0 / num_reserve) ->
1156 | reserve_args
1157 |
1158 | Log(' Num Items')
1159 | print(reserve_args %>% head(15))
1160 | print(reserve_args %>% tail(5))
1161 | Log('')
1162 |
1163 | Log('%s reserve() calls, total items = %s', commas(num_reserve),
1164 | commas(sum(reserve$num_items)))
1165 | Log('')
1166 |
1167 | # Accounting for all allocations!
1168 | Log('Untyped: %s', commas(num_allocs))
1169 | Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1170 | Log('')
1171 |
1172 | num_other_typed = num_typed - num_lists
1173 |
1174 | # Summary table
1175 | stats = tibble(task = task_name,
1176 | total_bytes_ = commas(total_bytes),
1177 | num_allocs_ = commas(num_allocs),
1178 | sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1179 | num_reserve_calls = commas(num_reserve),
1180 |
1181 | percent_list_allocs = Percent(num_lists, num_allocs),
1182 | percent_slab_allocs = Percent(num_slabs, num_allocs),
1183 | percent_string_allocs = Percent(num_strings, num_allocs),
1184 | percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1185 |
1186 | percent_list_bytes = Percent(total_list_bytes, total_bytes),
1187 | percent_string_bytes = Percent(total_string_bytes, total_bytes),
1188 |
1189 | allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1190 | allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1191 | allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1192 |
1193 | strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1194 | strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1195 | )
1196 | summaries$stats[[task_name]] = stats
1197 |
1198 | summaries$most_common_types[[task_name]] = most_common_types
1199 | }
1200 |
1201 | LoadUftraceTsv = function(in_dir, env) {
1202 | for (task in list.files(in_dir)) {
1203 | Log('Loading data for task %s', task)
1204 | base_dir = file.path(in_dir, task)
1205 |
1206 | task_env = new.env()
1207 | env[[task]] = task_env
1208 |
1209 | # TSV file, not CSV
1210 | task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1211 | task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1212 | task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1213 | task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1214 | task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1215 |
1216 | # median string length is 4, mean is 9.5!
1217 | Log('UNTYPED')
1218 | print(summary(task_env$untyped))
1219 | Log('')
1220 |
1221 | Log('TYPED')
1222 | print(summary(task_env$typed))
1223 | Log('')
1224 |
1225 | Log('STRINGS')
1226 | print(summary(task_env$strings))
1227 | Log('')
1228 |
1229 | Log('SLABS')
1230 | print(summary(task_env$slabs))
1231 | Log('')
1232 |
1233 | Log('RESERVE')
1234 | print(summary(task_env$reserve))
1235 | Log('')
1236 | }
1237 | }
1238 |
1239 | Percent = function(n, total) {
1240 | sprintf('%.1f%%', n * 100.0 / total)
1241 | }
1242 |
1243 | PrettyPrintLong = function(d) {
1244 | tr = t(d) # transpose
1245 |
1246 | row_names = rownames(tr)
1247 |
1248 | for (i in 1:nrow(tr)) {
1249 | row_name = row_names[i]
1250 | cat(sprintf('%26s', row_name)) # calculated min width manually
1251 | cat(sprintf('%20s', tr[i,]))
1252 | cat('\n')
1253 |
1254 | # Extra spacing
1255 | if (row_name %in% c('num_reserve_calls',
1256 | 'percent_string_bytes',
1257 | 'percent_other_typed_allocs',
1258 | 'allocs_96_bytes_or_less')) {
1259 | cat('\n')
1260 | }
1261 | }
1262 | }
1263 |
1264 |
1265 | UftraceReport = function(env, out_dir) {
1266 | # summaries$stats should be a list of 1-row data frames
1267 | # summaries$top_types should be a list of types
1268 | summaries = new.env()
1269 |
1270 | for (task_name in names(env)) {
1271 | report_out = file.path(out_dir, paste0(task_name, '.txt'))
1272 |
1273 | Log('Making report for task %s -> %s', task_name, report_out)
1274 |
1275 | sink(file = report_out)
1276 | UftraceTaskReport(env, task_name, summaries)
1277 | sink() # reset
1278 | }
1279 | Log('')
1280 |
1281 | # Concate all the data frames added to summary
1282 | stats = bind_rows(as.list(summaries$stats))
1283 |
1284 | sink(file = file.path(out_dir, 'summary.txt'))
1285 | #print(stats)
1286 | #Log('')
1287 |
1288 | PrettyPrintLong(stats)
1289 | Log('')
1290 |
1291 | mct = summaries$most_common_types
1292 | for (task_name in names(mct)) {
1293 | Log('Common types in workload %s', task_name)
1294 | Log('')
1295 |
1296 | print(mct[[task_name]] %>% head(5))
1297 | Log('')
1298 | }
1299 | sink()
1300 |
1301 | # For the REPL
1302 | return(list(stats = stats))
1303 | }
1304 |
1305 | main = function(argv) {
1306 | action = argv[[1]]
1307 | in_dir = argv[[2]]
1308 | out_dir = argv[[3]]
1309 |
1310 | if (action == 'osh-parser') {
1311 | ParserReport(in_dir, out_dir)
1312 |
1313 | } else if (action == 'osh-runtime') {
1314 | RuntimeReport(in_dir, out_dir)
1315 |
1316 | } else if (action == 'vm-baseline') {
1317 | VmBaselineReport(in_dir, out_dir)
1318 |
1319 | } else if (action == 'ovm-build') {
1320 | OvmBuildReport(in_dir, out_dir)
1321 |
1322 | } else if (action == 'compute') {
1323 | ComputeReport(in_dir, out_dir)
1324 |
1325 | } else if (action == 'gc') {
1326 | GcReport(in_dir, out_dir)
1327 |
1328 | } else if (action == 'gc-cachegrind') {
1329 | GcCachegrindReport(in_dir, out_dir)
1330 |
1331 | } else if (action == 'mycpp') {
1332 | MyCppReport(in_dir, out_dir)
1333 |
1334 | } else if (action == 'uftrace') {
1335 | d = new.env()
1336 | LoadUftraceTsv(in_dir, d)
1337 | UftraceReport(d, out_dir)
1338 |
1339 | } else {
1340 | Log("Invalid action '%s'", action)
1341 | quit(status = 1)
1342 | }
1343 | Log('PID %d done', Sys.getpid())
1344 | }
1345 |
1346 | if (length(sys.frames()) == 0) {
1347 | # increase ggplot font size globally
1348 | #theme_set(theme_grey(base_size = 20))
1349 |
1350 | main(commandArgs(TRUE))
1351 | }