| 1 | #!/usr/bin/env Rscript
 | 
| 2 | #
 | 
| 3 | # benchmarks/report.R -- Analyze data collected by shell scripts.
 | 
| 4 | #
 | 
| 5 | # Usage:
 | 
| 6 | #   benchmarks/report.R OUT_DIR [TIMES_CSV...]
 | 
| 7 | 
 | 
| 8 | # Suppress warnings about functions masked from 'package:stats' and 'package:base'
 | 
| 9 | #   filter, lag
 | 
| 10 | #   intersect, setdiff, setequal, union
 | 
| 11 | library(dplyr, warn.conflicts = FALSE)
 | 
| 12 | library(tidyr)  # spread()
 | 
| 13 | library(stringr)
 | 
| 14 | 
 | 
| 15 | source('benchmarks/common.R')
 | 
| 16 | 
 | 
| 17 | options(stringsAsFactors = F)
 | 
| 18 | 
 | 
| 19 | # For pretty printing
 | 
| 20 | commas = function(x) {
 | 
| 21 |   format(x, big.mark=',')
 | 
| 22 | }
 | 
| 23 | 
 | 
| 24 | sourceUrl = function(path) {
 | 
| 25 |   sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
 | 
| 26 | }
 | 
| 27 | 
 | 
| 28 | # Takes a filename, not a path.
 | 
| 29 | sourceUrl2 = function(filename) {
 | 
| 30 |   sprintf(
 | 
| 31 |       'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
 | 
| 32 |       filename)
 | 
| 33 | }
 | 
| 34 | 
 | 
| 35 | mycppUrl = function(path) {
 | 
| 36 |   sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', path)
 | 
| 37 | }
 | 
| 38 | 
 | 
| 39 | 
 | 
| 40 | # TODO: Set up cgit because Github links are slow.
 | 
| 41 | benchmarkDataLink = function(subdir, name, suffix) {
 | 
| 42 |   #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
 | 
| 43 |   sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
 | 
| 44 |           subdir, name, suffix)
 | 
| 45 | }
 | 
| 46 | 
 | 
| 47 | provenanceLink = function(subdir, name, suffix) {
 | 
| 48 |   sprintf('../%s/%s%s', subdir, name, suffix)
 | 
| 49 | }
 | 
| 50 | 
 | 
| 51 | 
 | 
| 52 | GetOshLabel = function(shell_hash, prov_dir) {
 | 
| 53 |   ### Given a string, return another string.
 | 
| 54 | 
 | 
| 55 |   path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
 | 
| 56 | 
 | 
| 57 |   if (file.exists(path)) {
 | 
| 58 |     Log('Reading %s', path)
 | 
| 59 |     lines = readLines(path)
 | 
| 60 |     if (length(grep('_bin/osh', lines)) > 0) {
 | 
| 61 |       label = 'osh-ovm'
 | 
| 62 |     } else if (length(grep('bin/osh', lines)) > 0) {
 | 
| 63 |       label = 'osh-cpython'
 | 
| 64 |     } else if (length(grep('_bin/.*/osh', lines)) > 0) {
 | 
| 65 |       label = 'osh-native'
 | 
| 66 |     } else {
 | 
| 67 |       stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
 | 
| 68 |     }
 | 
| 69 |   } else {
 | 
| 70 |     stop(sprintf("%s doesn't exist", path))
 | 
| 71 |   }
 | 
| 72 |   return(label)
 | 
| 73 | }
 | 
| 74 | 
 | 
| 75 | opt_suffix1 = '_bin/cxx-opt/osh'
 | 
| 76 | opt_suffix2 = '_bin/cxx-opt-sh/osh'
 | 
| 77 | 
 | 
| 78 | ShellLabels = function(shell_name, shell_hash, num_hosts) {
 | 
| 79 |   ### Given 2 vectors, return a vector of readable labels.
 | 
| 80 | 
 | 
| 81 |   # TODO: Clean up callers.  Some metrics all this function with a
 | 
| 82 |   # shell/runtime BASENAME, and others a PATH
 | 
| 83 |   # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
 | 
| 84 | 
 | 
| 85 |   #Log('name %s', shell_name)
 | 
| 86 |   #Log('hash  %s', shell_hash)
 | 
| 87 | 
 | 
| 88 |   if (num_hosts == 1) {
 | 
| 89 |     prov_dir = '_tmp'
 | 
| 90 |   } else {
 | 
| 91 |     prov_dir = '../benchmark-data/'
 | 
| 92 |   }
 | 
| 93 | 
 | 
| 94 |   labels = c()
 | 
| 95 |   for (i in 1:length(shell_name)) {
 | 
| 96 |     sh = shell_name[i]
 | 
| 97 |     if (sh == 'osh') {
 | 
| 98 |       label = GetOshLabel(shell_hash[i], prov_dir)
 | 
| 99 | 
 | 
| 100 |     } else if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
 | 
| 101 |       label = 'opt/osh'
 | 
| 102 | 
 | 
| 103 |     } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
 | 
| 104 |       label = 'bumpleak/osh'
 | 
| 105 | 
 | 
| 106 |     } else {
 | 
| 107 |       label = sh
 | 
| 108 |     }
 | 
| 109 | 
 | 
| 110 |     Log('[%s] [%s]', shell_name[i], label)
 | 
| 111 |     labels = c(labels, label)
 | 
| 112 |   }
 | 
| 113 | 
 | 
| 114 |   return(labels)
 | 
| 115 | }
 | 
| 116 | 
 | 
| 117 | # Simple version of the above, used by benchmarks/gc
 | 
| 118 | ShellLabelFromPath = function(sh_path) {
 | 
| 119 |   labels = c()
 | 
| 120 |   for (i in 1:length(sh_path)) {
 | 
| 121 |     sh = sh_path[i]
 | 
| 122 | 
 | 
| 123 |     if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
 | 
| 124 |       # the opt binary is osh-native
 | 
| 125 |       label = 'osh-native'
 | 
| 126 | 
 | 
| 127 |     } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
 | 
| 128 |       label = 'bumpleak/osh'
 | 
| 129 | 
 | 
| 130 |     } else if (endsWith(sh, '_bin/osh')) {  # the app bundle
 | 
| 131 |       label = 'osh-ovm'
 | 
| 132 | 
 | 
| 133 |     } else if (endsWith(sh, 'bin/osh')) {
 | 
| 134 |       label = 'osh-cpython'
 | 
| 135 | 
 | 
| 136 |     } else {
 | 
| 137 |       label = sh
 | 
| 138 |     }
 | 
| 139 |     labels = c(labels, label)
 | 
| 140 |   }
 | 
| 141 |   return(labels)
 | 
| 142 | }
 | 
| 143 | 
 | 
| 144 | DistinctHosts = function(t) {
 | 
| 145 |   t %>% distinct(host_name, host_hash) -> distinct_hosts
 | 
| 146 |   # The label is just the name
 | 
| 147 |   distinct_hosts$host_label = distinct_hosts$host_name
 | 
| 148 |   return(distinct_hosts)
 | 
| 149 | }
 | 
| 150 | 
 | 
| 151 | DistinctShells = function(t, num_hosts = -1) {
 | 
| 152 |   t %>% distinct(shell_name, shell_hash) -> distinct_shells
 | 
| 153 | 
 | 
| 154 |   Log('')
 | 
| 155 |   Log('Labeling shells')
 | 
| 156 | 
 | 
| 157 |   # Calculate it if not passed
 | 
| 158 |   if (num_hosts == -1) {
 | 
| 159 |     num_hosts = nrow(DistinctHosts(t))
 | 
| 160 |   }
 | 
| 161 | 
 | 
| 162 |   distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
 | 
| 163 |                                             distinct_shells$shell_hash,
 | 
| 164 |                                             num_hosts)
 | 
| 165 |   return(distinct_shells)
 | 
| 166 | }
 | 
| 167 | 
 | 
| 168 | ParserReport = function(in_dir, out_dir) {
 | 
| 169 |   times = read.csv(file.path(in_dir, 'times.csv'))
 | 
| 170 |   lines = read.csv(file.path(in_dir, 'lines.csv'))
 | 
| 171 |   raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
 | 
| 172 | 
 | 
| 173 |   cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
 | 
| 174 | 
 | 
| 175 |   # For joining by filename
 | 
| 176 |   lines_by_filename = tibble(
 | 
| 177 |       num_lines = lines$num_lines,
 | 
| 178 |       filename = basename(lines$path)
 | 
| 179 |   )
 | 
| 180 | 
 | 
| 181 |   # Remove failures
 | 
| 182 |   times %>% filter(status == 0) %>% select(-c(status)) -> times
 | 
| 183 |   cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
 | 
| 184 | 
 | 
| 185 |   # Add the number of lines, joining on path, and compute lines/ms
 | 
| 186 |   times %>%
 | 
| 187 |     left_join(lines, by = c('path')) %>%
 | 
| 188 |     mutate(filename = basename(path), filename_HREF = sourceUrl(path),
 | 
| 189 |            max_rss_MB = max_rss_KiB * 1024 / 1e6,
 | 
| 190 |            elapsed_ms = elapsed_secs * 1000,
 | 
| 191 |            user_ms = user_secs * 1000,
 | 
| 192 |            sys_ms = sys_secs * 1000,
 | 
| 193 |            lines_per_ms = num_lines / elapsed_ms) %>%
 | 
| 194 |     select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
 | 
| 195 |     joined_times
 | 
| 196 | 
 | 
| 197 |   #print(head(times))
 | 
| 198 |   #print(head(lines))
 | 
| 199 |   #print(head(vm))
 | 
| 200 |   #print(head(joined_times))
 | 
| 201 | 
 | 
| 202 |   print(summary(joined_times))
 | 
| 203 | 
 | 
| 204 |   #
 | 
| 205 |   # Find distinct shells and hosts, and label them for readability.
 | 
| 206 |   #
 | 
| 207 | 
 | 
| 208 |   distinct_hosts = DistinctHosts(joined_times)
 | 
| 209 |   Log('')
 | 
| 210 |   Log('Distinct hosts')
 | 
| 211 |   print(distinct_hosts)
 | 
| 212 | 
 | 
| 213 |   distinct_shells = DistinctShells(joined_times)
 | 
| 214 |   Log('')
 | 
| 215 |   Log('Distinct shells')
 | 
| 216 |   print(distinct_shells)
 | 
| 217 | 
 | 
| 218 |   # Replace name/hash combinations with labels.
 | 
| 219 |   joined_times %>%
 | 
| 220 |     left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
 | 
| 221 |     left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
 | 
| 222 |     select(-c(host_name, host_hash, shell_name, shell_hash)) ->
 | 
| 223 |     joined_times
 | 
| 224 | 
 | 
| 225 |   # Like 'times', but do shell_label as one step
 | 
| 226 |   # Hack: we know benchmarks/auto.sh runs this on one machine
 | 
| 227 |   distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
 | 
| 228 |   cachegrind %>%
 | 
| 229 |     left_join(lines, by = c('path')) %>%
 | 
| 230 |     select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>% 
 | 
| 231 |     left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
 | 
| 232 |     select(-c(shell_name, shell_hash)) %>%
 | 
| 233 |     mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
 | 
| 234 |     select(-c(path)) ->
 | 
| 235 |     joined_cachegrind
 | 
| 236 | 
 | 
| 237 |   Log('summary(joined_times):')
 | 
| 238 |   print(summary(joined_times))
 | 
| 239 |   Log('head(joined_times):')
 | 
| 240 |   print(head(joined_times))
 | 
| 241 | 
 | 
| 242 |   # Summarize rates by platform/shell
 | 
| 243 |   joined_times %>%
 | 
| 244 |     mutate(host_label = paste("host", host_label)) %>%
 | 
| 245 |     group_by(host_label, shell_label) %>%
 | 
| 246 |     summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
 | 
| 247 |     mutate(lines_per_ms = total_lines / total_ms) %>%
 | 
| 248 |     select(-c(total_ms)) %>%
 | 
| 249 |     spread(key = host_label, value = lines_per_ms) ->
 | 
| 250 |     times_summary
 | 
| 251 | 
 | 
| 252 |   # Sort by parsing rate on the fast machine
 | 
| 253 |   if ("host lenny" %in% colnames(times_summary)) {
 | 
| 254 |     times_summary %>% arrange(desc(`host lenny`)) -> times_summary
 | 
| 255 |   } else {
 | 
| 256 |     times_summary %>% arrange(desc(`host no-host`)) -> times_summary
 | 
| 257 |   }
 | 
| 258 | 
 | 
| 259 |   Log('times_summary:')
 | 
| 260 |   print(times_summary)
 | 
| 261 | 
 | 
| 262 |   # Summarize cachegrind by platform/shell
 | 
| 263 |   # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
 | 
| 264 |   joined_cachegrind %>%
 | 
| 265 |     group_by(shell_label) %>%
 | 
| 266 |     summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
 | 
| 267 |     mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
 | 
| 268 |     select(-c(total_irefs)) ->
 | 
| 269 |     cachegrind_summary
 | 
| 270 | 
 | 
| 271 |   if ("no-host" %in% distinct_hosts$host_label) {
 | 
| 272 | 
 | 
| 273 |     # We don't have all the shells
 | 
| 274 |     elapsed = NULL
 | 
| 275 |     rate = NULL
 | 
| 276 |     max_rss = NULL
 | 
| 277 |     instructions = NULL
 | 
| 278 | 
 | 
| 279 |     joined_times %>%
 | 
| 280 |       select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
 | 
| 281 |                num_lines, filename, filename_HREF)) %>%
 | 
| 282 |       arrange(filename, elapsed_ms) ->
 | 
| 283 |       times_flat
 | 
| 284 | 
 | 
| 285 |     joined_cachegrind %>%
 | 
| 286 |       select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
 | 
| 287 |       arrange(filename, irefs) ->
 | 
| 288 |       cachegrind_flat
 | 
| 289 | 
 | 
| 290 |   } else {
 | 
| 291 | 
 | 
| 292 |     times_flat = NULL
 | 
| 293 |     cachegrind_flat = NULL
 | 
| 294 | 
 | 
| 295 |     # Elapsed seconds for each shell by platform and file
 | 
| 296 |     joined_times %>%
 | 
| 297 |       select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>% 
 | 
| 298 |       spread(key = shell_label, value = elapsed_ms) %>%
 | 
| 299 |       arrange(host_label, num_lines) %>%
 | 
| 300 |       mutate(osh_to_bash_ratio = `osh-native` / bash) %>% 
 | 
| 301 |       select(c(host_label, bash, dash, mksh, zsh,
 | 
| 302 |                `osh-ovm`, `osh-cpython`, `osh-native`,
 | 
| 303 |                osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
 | 
| 304 |       elapsed
 | 
| 305 | 
 | 
| 306 |     Log('\n')
 | 
| 307 |     Log('ELAPSED')
 | 
| 308 |     print(elapsed)
 | 
| 309 | 
 | 
| 310 |     # Rates by file and shell
 | 
| 311 |     joined_times  %>%
 | 
| 312 |       select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>% 
 | 
| 313 |       spread(key = shell_label, value = lines_per_ms) %>%
 | 
| 314 |       arrange(host_label, num_lines) %>%
 | 
| 315 |       select(c(host_label, bash, dash, mksh, zsh,
 | 
| 316 |                `osh-ovm`, `osh-cpython`, `osh-native`,
 | 
| 317 |                num_lines, filename, filename_HREF)) ->
 | 
| 318 |       rate
 | 
| 319 | 
 | 
| 320 |     Log('\n')
 | 
| 321 |     Log('RATE')
 | 
| 322 |     print(rate)
 | 
| 323 | 
 | 
| 324 |     # Memory usage by file
 | 
| 325 |     joined_times %>%
 | 
| 326 |       select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>% 
 | 
| 327 |       spread(key = shell_label, value = max_rss_MB) %>%
 | 
| 328 |       arrange(host_label, num_lines) %>%
 | 
| 329 |       select(c(host_label, bash, dash, mksh, zsh,
 | 
| 330 |                `osh-ovm`, `osh-cpython`, `osh-native`,
 | 
| 331 |                num_lines, filename, filename_HREF)) ->
 | 
| 332 |       max_rss
 | 
| 333 | 
 | 
| 334 |     Log('\n')
 | 
| 335 |     Log('MAX RSS')
 | 
| 336 |     print(max_rss)
 | 
| 337 | 
 | 
| 338 |     Log('\n')
 | 
| 339 |     Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
 | 
| 340 |     print(joined_cachegrind)
 | 
| 341 |     #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
 | 
| 342 | 
 | 
| 343 |     # Cachegrind instructions by file
 | 
| 344 |     joined_cachegrind %>%
 | 
| 345 |       mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
 | 
| 346 |       select(-c(irefs)) %>%
 | 
| 347 |       spread(key = shell_label, value = thousand_irefs_per_line) %>%
 | 
| 348 |       arrange(num_lines) %>%
 | 
| 349 |       select(c(bash, dash, mksh, `osh-native`,
 | 
| 350 |                num_lines, filename, filename_HREF)) ->
 | 
| 351 |       instructions
 | 
| 352 | 
 | 
| 353 |     Log('\n')
 | 
| 354 |     Log('instructions has %d rows', nrow(instructions))
 | 
| 355 |     print(instructions)
 | 
| 356 |   }
 | 
| 357 | 
 | 
| 358 |   WriteProvenance(distinct_hosts, distinct_shells, out_dir)
 | 
| 359 | 
 | 
| 360 |   raw_data_table = tibble(
 | 
| 361 |     filename = basename(as.character(raw_data$path)),
 | 
| 362 |     filename_HREF = benchmarkDataLink('osh-parser', filename, '')
 | 
| 363 |   )
 | 
| 364 |   #print(raw_data_table)
 | 
| 365 | 
 | 
| 366 |   writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
 | 
| 367 | 
 | 
| 368 |   precision = SamePrecision(0)  # lines per ms
 | 
| 369 |   writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
 | 
| 370 | 
 | 
| 371 |   precision = ColumnPrecision(list(), default = 1)
 | 
| 372 |   writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
 | 
| 373 | 
 | 
| 374 |   if (!is.null(times_flat)) {
 | 
| 375 |     precision = SamePrecision(0)
 | 
| 376 |     writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
 | 
| 377 |   }
 | 
| 378 | 
 | 
| 379 |   if (!is.null(cachegrind_flat)) {
 | 
| 380 |     precision = SamePrecision(0)
 | 
| 381 |     writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
 | 
| 382 |   }
 | 
| 383 | 
 | 
| 384 |   if (!is.null(elapsed)) {  # equivalent to no-host
 | 
| 385 |     # Round to nearest millisecond, but the ratio has a decimal point.
 | 
| 386 |     precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
 | 
| 387 |     writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
 | 
| 388 | 
 | 
| 389 |     precision = SamePrecision(0)
 | 
| 390 |     writeCsv(rate, file.path(out_dir, 'rate'), precision)
 | 
| 391 | 
 | 
| 392 |     writeCsv(max_rss, file.path(out_dir, 'max_rss'))
 | 
| 393 | 
 | 
| 394 |     precision = SamePrecision(1)
 | 
| 395 |     writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
 | 
| 396 |   }
 | 
| 397 | 
 | 
| 398 |   Log('Wrote %s', out_dir)
 | 
| 399 | }
 | 
| 400 | 
 | 
| 401 | WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
 | 
| 402 | 
 | 
| 403 |   num_hosts = nrow(distinct_hosts)
 | 
| 404 |   if (num_hosts == 1) {
 | 
| 405 |     linkify = provenanceLink
 | 
| 406 |   } else {
 | 
| 407 |     linkify = benchmarkDataLink
 | 
| 408 |   }
 | 
| 409 | 
 | 
| 410 |   Log('distinct_hosts')
 | 
| 411 |   print(distinct_hosts)
 | 
| 412 |   Log('')
 | 
| 413 | 
 | 
| 414 |   Log('distinct_shells')
 | 
| 415 |   print(distinct_shells)
 | 
| 416 |   Log('')
 | 
| 417 | 
 | 
| 418 |   # Should be:
 | 
| 419 |   # host_id_url
 | 
| 420 |   # And then csv_to_html will be smart enough?  It should take --url flag?
 | 
| 421 |   host_table = tibble(
 | 
| 422 |     host_label = distinct_hosts$host_label,
 | 
| 423 |     host_id = paste(distinct_hosts$host_name,
 | 
| 424 |                     distinct_hosts$host_hash, sep='-'),
 | 
| 425 |     host_id_HREF = linkify('host-id', host_id, '/')
 | 
| 426 |   )
 | 
| 427 |   Log('host_table')
 | 
| 428 |   print(host_table)
 | 
| 429 |   Log('')
 | 
| 430 | 
 | 
| 431 |   shell_table = tibble(
 | 
| 432 |     shell_label = distinct_shells$shell_label,
 | 
| 433 |     shell_id = paste(distinct_shells$shell_name,
 | 
| 434 |                      distinct_shells$shell_hash, sep='-'),
 | 
| 435 |     shell_id_HREF = linkify('shell-id', shell_id, '/')
 | 
| 436 |   )
 | 
| 437 | 
 | 
| 438 |   Log('shell_table')
 | 
| 439 |   print(shell_table)
 | 
| 440 |   Log('')
 | 
| 441 | 
 | 
| 442 |   if (tsv) {
 | 
| 443 |     writeTsv(host_table, file.path(out_dir, 'hosts'))
 | 
| 444 |     writeTsv(shell_table, file.path(out_dir, 'shells'))
 | 
| 445 |   } else {
 | 
| 446 |     writeCsv(host_table, file.path(out_dir, 'hosts'))
 | 
| 447 |     writeCsv(shell_table, file.path(out_dir, 'shells'))
 | 
| 448 |   }
 | 
| 449 | }
 | 
| 450 | 
 | 
| 451 | WriteSimpleProvenance = function(provenance, out_dir) {
 | 
| 452 |   Log('provenance')
 | 
| 453 |   print(provenance)
 | 
| 454 |   Log('')
 | 
| 455 | 
 | 
| 456 |   # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
 | 
| 457 |   # benchmarks/id.sh publish-shell-id uses
 | 
| 458 |   provenance %>%
 | 
| 459 |     mutate(shell_name = basename(sh_path)) %>%
 | 
| 460 |     distinct(shell_label, shell_name, shell_hash) ->
 | 
| 461 |     distinct_shells 
 | 
| 462 | 
 | 
| 463 |   Log('distinct_shells')
 | 
| 464 |   print(distinct_shells)
 | 
| 465 |   Log('')
 | 
| 466 | 
 | 
| 467 |   provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
 | 
| 468 | 
 | 
| 469 |   WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
 | 
| 470 | }
 | 
| 471 | 
 | 
| 472 | RuntimeReport = function(in_dir, out_dir) {
 | 
| 473 |   times = readTsv(file.path(in_dir, 'times.tsv'))
 | 
| 474 |    
 | 
| 475 |   gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
 | 
| 476 |   provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
 | 
| 477 | 
 | 
| 478 |   times %>% filter(status != 0) -> failed
 | 
| 479 |   if (nrow(failed) != 0) {
 | 
| 480 |     print(failed)
 | 
| 481 |     stop('Some osh-runtime tasks failed')
 | 
| 482 |   }
 | 
| 483 | 
 | 
| 484 |   # Joins:
 | 
| 485 |   # times <= sh_path => provenance
 | 
| 486 |   # times <= join_id, host_name => gc_stats
 | 
| 487 | 
 | 
| 488 |   # TODO: provenance may have rows from 2 machines.  Could validate them and
 | 
| 489 |   # deduplicate.
 | 
| 490 | 
 | 
| 491 |   # It should have (host_label, host_name, host_hash)
 | 
| 492 |   #                (shell_label, sh_path, shell_hash)
 | 
| 493 |   provenance %>%
 | 
| 494 |     mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
 | 
| 495 |     provenance
 | 
| 496 | 
 | 
| 497 |   provenance %>% distinct(sh_path, shell_label) -> label_lookup
 | 
| 498 | 
 | 
| 499 |   Log('label_lookup')
 | 
| 500 |   print(label_lookup)
 | 
| 501 | 
 | 
| 502 |   # Join with provenance for host label and shell label
 | 
| 503 |   times %>%
 | 
| 504 |     select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
 | 
| 505 |              host_name, sh_path, workload)) %>%
 | 
| 506 |     mutate(elapsed_ms = elapsed_secs * 1000,
 | 
| 507 |            user_ms = user_secs * 1000,
 | 
| 508 |            sys_ms = sys_secs * 1000,
 | 
| 509 |            max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
 | 
| 510 |     select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
 | 
| 511 |     left_join(label_lookup, by = c('sh_path')) %>%
 | 
| 512 |     select(-c(sh_path)) %>%
 | 
| 513 |     # we want to compare workloads on adjacent rows
 | 
| 514 |     arrange(workload) -> 
 | 
| 515 |     details
 | 
| 516 | 
 | 
| 517 |   times %>%
 | 
| 518 |     select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
 | 
| 519 |     left_join(label_lookup, by = c('sh_path')) %>%
 | 
| 520 |     select(-c(sh_path)) %>%
 | 
| 521 |     # we want to compare workloads on adjacent rows
 | 
| 522 |     arrange(workload) -> 
 | 
| 523 |     details_io
 | 
| 524 | 
 | 
| 525 |   Log('details')
 | 
| 526 |   print(details)
 | 
| 527 | 
 | 
| 528 |   # Elapsed time comparison
 | 
| 529 |   details %>%
 | 
| 530 |     select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
 | 
| 531 |     spread(key = shell_label, value = elapsed_ms) %>%
 | 
| 532 |     mutate(py_bash_ratio = `osh-cpython` / bash) %>%
 | 
| 533 |     mutate(native_bash_ratio = `osh-native` / bash) %>%
 | 
| 534 |     arrange(workload, host_name) %>%
 | 
| 535 |     select(c(workload, host_name,
 | 
| 536 |              bash, dash, `osh-cpython`, `osh-native`,
 | 
| 537 |              py_bash_ratio, native_bash_ratio)) ->
 | 
| 538 | 
 | 
| 539 |     elapsed
 | 
| 540 | 
 | 
| 541 |   Log('elapsed')
 | 
| 542 |   print(elapsed)
 | 
| 543 | 
 | 
| 544 |   # Minor Page Faults Comparison
 | 
| 545 |   details_io %>%
 | 
| 546 |     select(c(host_name, shell_label, workload, minor_faults)) %>%
 | 
| 547 |     spread(key = shell_label, value = minor_faults) %>%
 | 
| 548 |     mutate(py_bash_ratio = `osh-cpython` / bash) %>%
 | 
| 549 |     mutate(native_bash_ratio = `osh-native` / bash) %>%
 | 
| 550 |     arrange(workload, host_name) %>%
 | 
| 551 |     select(c(workload, host_name,
 | 
| 552 |              bash, dash, `osh-cpython`, `osh-native`,
 | 
| 553 |              py_bash_ratio, native_bash_ratio)) ->
 | 
| 554 |     page_faults
 | 
| 555 | 
 | 
| 556 |   Log('page_faults')
 | 
| 557 |   print(page_faults)
 | 
| 558 | 
 | 
| 559 |   # Max RSS comparison
 | 
| 560 |   details %>%
 | 
| 561 |     select(c(host_name, shell_label, workload, max_rss_MB)) %>%
 | 
| 562 |     spread(key = shell_label, value = max_rss_MB) %>%
 | 
| 563 |     mutate(py_bash_ratio = `osh-cpython` / bash) %>%
 | 
| 564 |     mutate(native_bash_ratio = `osh-native` / bash) %>%
 | 
| 565 |     arrange(workload, host_name) %>%
 | 
| 566 |     select(c(workload, host_name,
 | 
| 567 |              bash, dash, `osh-cpython`, `osh-native`,
 | 
| 568 |              py_bash_ratio, native_bash_ratio)) ->
 | 
| 569 |     max_rss
 | 
| 570 | 
 | 
| 571 |   Log('max rss')
 | 
| 572 |   print(max_rss)
 | 
| 573 | 
 | 
| 574 |   details %>% 
 | 
| 575 |     select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
 | 
| 576 |     mutate(join_id = sprintf("gc-%d", task_id)) %>%
 | 
| 577 |     select(-c(task_id)) ->
 | 
| 578 |     gc_details
 | 
| 579 | 
 | 
| 580 |   Log('GC details')
 | 
| 581 |   print(gc_details)
 | 
| 582 |   Log('')
 | 
| 583 | 
 | 
| 584 |   Log('GC stats')
 | 
| 585 |   print(gc_stats)
 | 
| 586 |   Log('')
 | 
| 587 | 
 | 
| 588 |   gc_stats %>%
 | 
| 589 |     left_join(gc_details, by = c('join_id', 'host_name')) %>%
 | 
| 590 |     select(-c(join_id, roots_capacity, objs_capacity)) %>%
 | 
| 591 |     # Do same transformations as GcReport()
 | 
| 592 |     mutate(allocated_MB = bytes_allocated / 1e6) %>%
 | 
| 593 |     select(-c(bytes_allocated)) %>%
 | 
| 594 |     rename(num_gc_done = num_collections) %>%
 | 
| 595 |     # Put these columns first
 | 
| 596 |     relocate(workload, host_name,
 | 
| 597 |              elapsed_ms, max_gc_millis, total_gc_millis,
 | 
| 598 |              allocated_MB, max_rss_MB, num_allocated) ->
 | 
| 599 |     gc_stats
 | 
| 600 | 
 | 
| 601 |   Log('After GC stats')
 | 
| 602 |   print(gc_stats)
 | 
| 603 |   Log('')
 | 
| 604 | 
 | 
| 605 |   WriteSimpleProvenance(provenance, out_dir)
 | 
| 606 | 
 | 
| 607 |   # milliseconds don't need decimal digit
 | 
| 608 |   precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
 | 
| 609 |                                    `osh-native` = 0, py_bash_ratio = 2,
 | 
| 610 |                                    native_bash_ratio = 2))
 | 
| 611 |   writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
 | 
| 612 |   writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
 | 
| 613 | 
 | 
| 614 |   precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
 | 
| 615 |   writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
 | 
| 616 | 
 | 
| 617 |   precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
 | 
| 618 |                                default = 0)
 | 
| 619 |   writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
 | 
| 620 | 
 | 
| 621 |   writeTsv(details, file.path(out_dir, 'details'), precision3)
 | 
| 622 |   writeTsv(details_io, file.path(out_dir, 'details_io'))
 | 
| 623 | 
 | 
| 624 |   Log('Wrote %s', out_dir)
 | 
| 625 | }
 | 
| 626 | 
 | 
| 627 | VmBaselineReport = function(in_dir, out_dir) {
 | 
| 628 |   vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
 | 
| 629 |   #print(vm)
 | 
| 630 | 
 | 
| 631 |   # Not using DistinctHosts() because field host_hash isn't collected
 | 
| 632 |   num_hosts = nrow(vm %>% distinct(host))
 | 
| 633 | 
 | 
| 634 |   vm %>%
 | 
| 635 |     rename(kib = metric_value) %>%
 | 
| 636 |     mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
 | 
| 637 |            megabytes = kib * 1024 / 1e6) %>%
 | 
| 638 |     select(-c(shell_name, kib)) %>%
 | 
| 639 |     spread(key = c(metric_name), value = megabytes) %>%
 | 
| 640 |     rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
 | 
| 641 |     select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
 | 
| 642 |     arrange(shell_label, shell_hash, host, VmPeak_MB) ->
 | 
| 643 |     vm
 | 
| 644 | 
 | 
| 645 |   print(vm)
 | 
| 646 | 
 | 
| 647 |   writeTsv(vm, file.path(out_dir, 'vm-baseline'))
 | 
| 648 | }
 | 
| 649 | 
 | 
| 650 | WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
 | 
| 651 |   host_table = tibble(
 | 
| 652 |     host_label = distinct_hosts$host_label,
 | 
| 653 |     host_id = paste(distinct_hosts$host_name,
 | 
| 654 |                     distinct_hosts$host_hash, sep='-'),
 | 
| 655 |     host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
 | 
| 656 |   )
 | 
| 657 |   print(host_table)
 | 
| 658 | 
 | 
| 659 |   dc = distinct_compilers
 | 
| 660 |   compiler_table = tibble(
 | 
| 661 |     compiler_label = dc$compiler_label,
 | 
| 662 |     compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
 | 
| 663 |     compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
 | 
| 664 |   )
 | 
| 665 |   print(compiler_table)
 | 
| 666 | 
 | 
| 667 |   writeTsv(host_table, file.path(out_dir, 'hosts'))
 | 
| 668 |   writeTsv(compiler_table, file.path(out_dir, 'compilers'))
 | 
| 669 | }
 | 
| 670 | 
 | 
| 671 | OvmBuildReport = function(in_dir, out_dir) {
 | 
| 672 |   times = readTsv(file.path(in_dir, 'times.tsv'))
 | 
| 673 |   bytecode_size = readTsv(file.path(in_dir, 'bytecode-size.tsv'))
 | 
| 674 |   bin_sizes = readTsv(file.path(in_dir, 'bin-sizes.tsv'))
 | 
| 675 |   native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
 | 
| 676 |   raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
 | 
| 677 | 
 | 
| 678 |   times %>% filter(status != 0) -> failed
 | 
| 679 |   if (nrow(failed) != 0) {
 | 
| 680 |     print(failed)
 | 
| 681 |     stop('Some ovm-build tasks failed')
 | 
| 682 |   }
 | 
| 683 | 
 | 
| 684 |   times %>% distinct(host_name, host_hash) -> distinct_hosts
 | 
| 685 |   distinct_hosts$host_label = distinct_hosts$host_name
 | 
| 686 | 
 | 
| 687 |   times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
 | 
| 688 |   distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
 | 
| 689 | 
 | 
| 690 |   #print(distinct_hosts)
 | 
| 691 |   #print(distinct_compilers)
 | 
| 692 | 
 | 
| 693 |   WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
 | 
| 694 | 
 | 
| 695 |   times %>%
 | 
| 696 |     select(-c(status)) %>%
 | 
| 697 |     left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
 | 
| 698 |     left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
 | 
| 699 |     select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
 | 
| 700 |     mutate(src_dir = basename(src_dir),
 | 
| 701 |            host_label = paste("host ", host_label),
 | 
| 702 |            is_conf = str_detect(action, 'configure'),
 | 
| 703 |            is_ovm = str_detect(action, 'oil.ovm'),
 | 
| 704 |            is_dbg = str_detect(action, 'dbg'),
 | 
| 705 |            ) %>%
 | 
| 706 |     select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
 | 
| 707 |            elapsed_secs) %>%
 | 
| 708 |     spread(key = c(host_label), value = elapsed_secs) %>%
 | 
| 709 |     arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
 | 
| 710 |     select(-c(is_conf, is_ovm, is_dbg)) ->
 | 
| 711 |     times
 | 
| 712 | 
 | 
| 713 |   #print(times)
 | 
| 714 | 
 | 
| 715 |   bytecode_size %>%
 | 
| 716 |     rename(bytecode_size = num_bytes) %>%
 | 
| 717 |     select(-c(path)) ->
 | 
| 718 |     bytecode_size
 | 
| 719 | 
 | 
| 720 |   bin_sizes %>%
 | 
| 721 |     # reorder
 | 
| 722 |     select(c(host_label, path, num_bytes)) %>%
 | 
| 723 |     left_join(bytecode_size, by = c('host_label')) %>%
 | 
| 724 |     mutate(native_code_size = num_bytes - bytecode_size) ->
 | 
| 725 |     sizes
 | 
| 726 | 
 | 
| 727 |   # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
 | 
| 728 |   native_sizes %>%
 | 
| 729 |     select(c(host_label, path, num_bytes)) %>%
 | 
| 730 |     mutate(host_label = paste("host ", host_label),
 | 
| 731 |            binary = basename(path),
 | 
| 732 |            compiler = basename(dirname(path)),
 | 
| 733 |            ) %>%
 | 
| 734 |     select(-c(path)) %>%
 | 
| 735 |     spread(key = c(host_label), value = num_bytes) %>%
 | 
| 736 |     arrange(compiler, binary) ->
 | 
| 737 |     native_sizes
 | 
| 738 | 
 | 
| 739 |   # NOTE: These don't have the host and compiler.
 | 
| 740 |   writeTsv(times, file.path(out_dir, 'times'))
 | 
| 741 |   writeTsv(bytecode_size, file.path(out_dir, 'bytecode-size'))
 | 
| 742 |   writeTsv(sizes, file.path(out_dir, 'sizes'))
 | 
| 743 |   writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
 | 
| 744 | 
 | 
| 745 |   # TODO: I want a size report too
 | 
| 746 |   #writeCsv(sizes, file.path(out_dir, 'sizes'))
 | 
| 747 | }
 | 
| 748 | 
 | 
| 749 | unique_stdout_md5sum = function(t, num_expected) {
 | 
| 750 |   u = n_distinct(t$stdout_md5sum)
 | 
| 751 |   if (u != num_expected) {
 | 
| 752 |     t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
 | 
| 753 |     stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
 | 
| 754 |   }
 | 
| 755 | }
 | 
| 756 | 
 | 
| 757 | ComputeReport = function(in_dir, out_dir) {
 | 
| 758 |   # TSV file, not CSV
 | 
| 759 |   times = read.table(file.path(in_dir, 'times.tsv'), header=T)
 | 
| 760 |   print(times)
 | 
| 761 | 
 | 
| 762 |   times %>% filter(status != 0) -> failed
 | 
| 763 |   if (nrow(failed) != 0) {
 | 
| 764 |     print(failed)
 | 
| 765 |     stop('Some compute tasks failed')
 | 
| 766 |   }
 | 
| 767 | 
 | 
| 768 |   #
 | 
| 769 |   # Check correctness
 | 
| 770 |   #
 | 
| 771 | 
 | 
| 772 |   times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
 | 
| 773 |   times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
 | 
| 774 |   times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
 | 
| 775 |   # 3 different inputs
 | 
| 776 |   times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
 | 
| 777 | 
 | 
| 778 |   times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
 | 
| 779 | 
 | 
| 780 |   # TODO: 
 | 
| 781 |   # - oils_cpp doesn't implement unicode LANG=C
 | 
| 782 |   # - bash behaves differently on your desktop vs. in the container
 | 
| 783 |   #   - might need layer-locales in the image?
 | 
| 784 | 
 | 
| 785 |   #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
 | 
| 786 |   # Ditto here
 | 
| 787 |   #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
 | 
| 788 | 
 | 
| 789 |   #
 | 
| 790 |   # Find distinct shells and hosts, and label them for readability.
 | 
| 791 |   #
 | 
| 792 | 
 | 
| 793 |   # Runtimes are called shells, as a hack for code reuse
 | 
| 794 |   times %>%
 | 
| 795 |     mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
 | 
| 796 |     select(c(host_name, host_hash, shell_name, shell_hash)) ->
 | 
| 797 |     tmp
 | 
| 798 | 
 | 
| 799 |   distinct_hosts = DistinctHosts(tmp)
 | 
| 800 |   Log('')
 | 
| 801 |   Log('Distinct hosts')
 | 
| 802 |   print(distinct_hosts)
 | 
| 803 | 
 | 
| 804 |   distinct_shells = DistinctShells(tmp)
 | 
| 805 |   Log('')
 | 
| 806 |   Log('Distinct runtimes')
 | 
| 807 |   print(distinct_shells)
 | 
| 808 | 
 | 
| 809 |   num_hosts = nrow(distinct_hosts)
 | 
| 810 | 
 | 
| 811 |   times %>%
 | 
| 812 |     select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
 | 
| 813 |     mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
 | 
| 814 |            elapsed_ms = elapsed_secs * 1000,
 | 
| 815 |            user_ms = user_secs * 1000,
 | 
| 816 |            sys_ms = sys_secs * 1000,
 | 
| 817 |            max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
 | 
| 818 |     select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
 | 
| 819 |     arrange(host_name, task_name, arg1, arg2, user_ms) ->
 | 
| 820 |     details
 | 
| 821 | 
 | 
| 822 |   times %>%
 | 
| 823 |     mutate(
 | 
| 824 |       runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
 | 
| 825 |       stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
 | 
| 826 |     select(c(host_name, task_name, arg1, arg2, runtime_label,
 | 
| 827 |              stdout_md5sum, stdout_md5sum_HREF)) ->
 | 
| 828 |     stdout_files
 | 
| 829 | 
 | 
| 830 |   details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
 | 
| 831 |   details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
 | 
| 832 |   details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
 | 
| 833 |   # There's no arg2
 | 
| 834 |   details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
 | 
| 835 | 
 | 
| 836 |   details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
 | 
| 837 |   details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
 | 
| 838 | 
 | 
| 839 |   precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
 | 
| 840 |   writeTsv(details, file.path(out_dir, 'details'), precision)
 | 
| 841 | 
 | 
| 842 |   writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
 | 
| 843 | 
 | 
| 844 |   writeTsv(hello, file.path(out_dir, 'hello'), precision)
 | 
| 845 |   writeTsv(fib, file.path(out_dir, 'fib'), precision)
 | 
| 846 |   writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
 | 
| 847 |   writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
 | 
| 848 | 
 | 
| 849 |   writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
 | 
| 850 |   writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
 | 
| 851 | 
 | 
| 852 |   WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
 | 
| 853 | }
 | 
| 854 | 
 | 
| 855 | WriteOneTask = function(times, out_dir, task_name, precision) {
 | 
| 856 |   times %>%
 | 
| 857 |     filter(task == task_name) %>%
 | 
| 858 |     select(-c(task)) -> subset
 | 
| 859 | 
 | 
| 860 |   writeTsv(subset, file.path(out_dir, task_name), precision)
 | 
| 861 | }
 | 
| 862 | 
 | 
| 863 | SHELL_ORDER = c('dash',
 | 
| 864 |                 'bash',
 | 
| 865 |                 'zsh', 
 | 
| 866 |                 '_bin/cxx-opt+bumpleak/osh',
 | 
| 867 |                 '_bin/cxx-opt+bumproot/osh',
 | 
| 868 |                 '_bin/cxx-opt+bumpsmall/osh',
 | 
| 869 |                 '_bin/cxx-opt/osh',
 | 
| 870 |                 '_bin/cxx-opt+nopool/osh')
 | 
| 871 | 
 | 
| 872 | GcReport = function(in_dir, out_dir) {
 | 
| 873 |   times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
 | 
| 874 |   gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
 | 
| 875 | 
 | 
| 876 |   times %>% filter(status != 0) -> failed
 | 
| 877 |   if (nrow(failed) != 0) {
 | 
| 878 |     print(failed)
 | 
| 879 |     stop('Some gc tasks failed')
 | 
| 880 |   }
 | 
| 881 | 
 | 
| 882 |   # Change units and order columns
 | 
| 883 |   times %>%
 | 
| 884 |     arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
 | 
| 885 |     mutate(elapsed_ms = elapsed_secs * 1000,
 | 
| 886 |            user_ms = user_secs * 1000,
 | 
| 887 |            sys_ms = sys_secs * 1000,
 | 
| 888 |            max_rss_MB = max_rss_KiB * 1024 / 1e6,
 | 
| 889 |            shell_label = ShellLabelFromPath(sh_path)
 | 
| 890 |            ) %>%
 | 
| 891 |     select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
 | 
| 892 |              shell_runtime_opts)) ->
 | 
| 893 |     times
 | 
| 894 | 
 | 
| 895 |   # Join and order columns
 | 
| 896 |   gc_stats %>% left_join(times, by = c('join_id')) %>% 
 | 
| 897 |     arrange(desc(task)) %>%
 | 
| 898 |     mutate(allocated_MB = bytes_allocated / 1e6) %>%
 | 
| 899 |     # try to make the table skinnier
 | 
| 900 |     rename(num_gc_done = num_collections) %>%
 | 
| 901 |     select(task, elapsed_ms, max_gc_millis, total_gc_millis,
 | 
| 902 |            allocated_MB, max_rss_MB, num_allocated,
 | 
| 903 |            num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
 | 
| 904 |            shell_label) ->
 | 
| 905 |     gc_stats
 | 
| 906 | 
 | 
| 907 |   times %>% select(-c(join_id)) -> times
 | 
| 908 | 
 | 
| 909 | 
 | 
| 910 |   precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
 | 
| 911 |                               default = 0)
 | 
| 912 | 
 | 
| 913 |   writeTsv(times, file.path(out_dir, 'times'), precision)
 | 
| 914 |   writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
 | 
| 915 | 
 | 
| 916 |   tasks = c('parse.configure-coreutils',
 | 
| 917 |             'parse.configure-cpython',
 | 
| 918 |             'parse.abuild',
 | 
| 919 |             'ex.compute-fib',
 | 
| 920 |             'ex.bashcomp-parse-help',
 | 
| 921 |             'ex.abuild-print-help')
 | 
| 922 |   # Write out separate rows
 | 
| 923 |   for (task in tasks) {
 | 
| 924 |     WriteOneTask(times, out_dir, task, precision)
 | 
| 925 |   }
 | 
| 926 | }
 | 
| 927 | 
 | 
| 928 | GcCachegrindReport = function(in_dir, out_dir) {
 | 
| 929 |   times = readTsv(file.path(in_dir, 'raw/times.tsv'))
 | 
| 930 |   counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
 | 
| 931 | 
 | 
| 932 |   times %>% filter(status != 0) -> failed
 | 
| 933 |   if (nrow(failed) != 0) {
 | 
| 934 |     print(failed)
 | 
| 935 |     stop('Some gc tasks failed')
 | 
| 936 |   }
 | 
| 937 | 
 | 
| 938 |   print(times)
 | 
| 939 |   print(counts)
 | 
| 940 | 
 | 
| 941 |   counts %>% left_join(times, by = c('join_id')) %>% 
 | 
| 942 |     mutate(million_irefs = irefs / 1e6) %>%
 | 
| 943 |     select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
 | 
| 944 |     arrange(factor(sh_path, levels = SHELL_ORDER)) ->
 | 
| 945 |     counts
 | 
| 946 | 
 | 
| 947 |   precision = NULL
 | 
| 948 |   tasks = c('parse.abuild', 'ex.compute-fib')
 | 
| 949 |   for (task in tasks) {
 | 
| 950 |     WriteOneTask(counts, out_dir, task, precision)
 | 
| 951 |   }
 | 
| 952 | }
 | 
| 953 | 
 | 
| 954 | MyCppReport = function(in_dir, out_dir) {
 | 
| 955 |   times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
 | 
| 956 |   print(times)
 | 
| 957 | 
 | 
| 958 |   times %>% filter(status != 0) -> failed
 | 
| 959 |   if (nrow(failed) != 0) {
 | 
| 960 |     print(failed)
 | 
| 961 |     stop('Some mycpp tasks failed')
 | 
| 962 |   }
 | 
| 963 | 
 | 
| 964 |   # Don't care about elapsed and system
 | 
| 965 |   times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
 | 
| 966 |     mutate(example_name_HREF = mycppUrl(example_name),
 | 
| 967 |            user_ms = user_secs * 1000, 
 | 
| 968 |            sys_ms = sys_secs * 1000, 
 | 
| 969 |            max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
 | 
| 970 |     select(-c(user_secs, sys_secs, max_rss_KiB)) ->
 | 
| 971 |     details
 | 
| 972 | 
 | 
| 973 |   details %>% select(-c(sys_ms, max_rss_MB)) %>%
 | 
| 974 |     spread(key = impl, value = user_ms) %>%
 | 
| 975 |     mutate(`C++ : Python` = `C++` / Python) %>%
 | 
| 976 |     arrange(`C++ : Python`) ->
 | 
| 977 |     user_time
 | 
| 978 | 
 | 
| 979 |   details %>% select(-c(user_ms, max_rss_MB)) %>%
 | 
| 980 |     spread(key = impl, value = sys_ms) %>%
 | 
| 981 |     mutate(`C++ : Python` = `C++` / Python) %>%
 | 
| 982 |     arrange(`C++ : Python`) ->
 | 
| 983 |     sys_time
 | 
| 984 | 
 | 
| 985 |   details %>% select(-c(user_ms, sys_ms)) %>%
 | 
| 986 |     spread(key = impl, value = max_rss_MB) %>%
 | 
| 987 |     mutate(`C++ : Python` = `C++` / Python) %>%
 | 
| 988 |     arrange(`C++ : Python`) ->
 | 
| 989 |     max_rss
 | 
| 990 | 
 | 
| 991 |   # Sometimes it speeds up by more than 10x
 | 
| 992 |   precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
 | 
| 993 |   writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
 | 
| 994 |   writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
 | 
| 995 | 
 | 
| 996 |   precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
 | 
| 997 |   writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
 | 
| 998 | 
 | 
| 999 |   writeTsv(details, file.path(out_dir, 'details'))
 | 
| 1000 | }
 | 
| 1001 | 
 | 
| 1002 | UftraceTaskReport = function(env, task_name, summaries) {
 | 
| 1003 |   # Need this again after redirect
 | 
| 1004 |   MaybeDisableColor(stdout())
 | 
| 1005 | 
 | 
| 1006 |   task_env = env[[task_name]]
 | 
| 1007 | 
 | 
| 1008 |   untyped = task_env$untyped
 | 
| 1009 |   typed = task_env$typed
 | 
| 1010 |   strings = task_env$strings
 | 
| 1011 |   slabs = task_env$slabs
 | 
| 1012 |   reserve = task_env$reserve
 | 
| 1013 | 
 | 
| 1014 |   string_overhead = 17  # GC header (8) + len (4) + hash value (4) + NUL (1)
 | 
| 1015 |   strings %>% mutate(obj_len = str_len + string_overhead) -> strings
 | 
| 1016 | 
 | 
| 1017 |   # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
 | 
| 1018 |   # big/small
 | 
| 1019 |   #
 | 
| 1020 |   # And then zoom in on distributions as well
 | 
| 1021 | 
 | 
| 1022 |   num_allocs = nrow(untyped)
 | 
| 1023 |   total_bytes = sum(untyped$obj_len)
 | 
| 1024 | 
 | 
| 1025 |   untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
 | 
| 1026 |   #print(untyped_hist)
 | 
| 1027 |   
 | 
| 1028 |   untyped_hist %>%
 | 
| 1029 |     mutate(n_less_than = cumsum(n),
 | 
| 1030 |            percent = n_less_than * 100.0 / num_allocs) ->
 | 
| 1031 |     alloc_sizes
 | 
| 1032 | 
 | 
| 1033 |   a24 = untyped_hist %>% filter(obj_len <= 24)
 | 
| 1034 |   a48 = untyped_hist %>% filter(obj_len <= 48)
 | 
| 1035 |   a96 = untyped_hist %>% filter(obj_len <= 96)
 | 
| 1036 | 
 | 
| 1037 |   allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
 | 
| 1038 |   allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
 | 
| 1039 |   allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
 | 
| 1040 | 
 | 
| 1041 |   Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
 | 
| 1042 | 
 | 
| 1043 |   options(tibble.print_min=25)
 | 
| 1044 | 
 | 
| 1045 |   Log('')
 | 
| 1046 |   Log('All allocations')
 | 
| 1047 |   print(alloc_sizes %>% head(22))
 | 
| 1048 |   print(alloc_sizes %>% tail(5))
 | 
| 1049 | 
 | 
| 1050 |   Log('')
 | 
| 1051 |   Log('Common Sizes')
 | 
| 1052 |   print(untyped_hist %>% arrange(desc(n)) %>% head(8))
 | 
| 1053 | 
 | 
| 1054 |   Log('')
 | 
| 1055 |   Log('    %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
 | 
| 1056 |   Log('')
 | 
| 1057 | 
 | 
| 1058 |   Log('Typed allocations')
 | 
| 1059 | 
 | 
| 1060 |   num_typed = nrow(typed)
 | 
| 1061 | 
 | 
| 1062 |   typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
 | 
| 1063 |     mutate(percent = n * 100.0 / num_typed) %>%
 | 
| 1064 |     arrange(desc(n)) -> most_common_types
 | 
| 1065 | 
 | 
| 1066 |   print(most_common_types %>% head(20))
 | 
| 1067 |   print(most_common_types %>% tail(5))
 | 
| 1068 | 
 | 
| 1069 |   lists = typed %>% filter(str_starts(func_name, ('List<')))
 | 
| 1070 |   #print(lists)
 | 
| 1071 | 
 | 
| 1072 |   num_lists = nrow(lists)
 | 
| 1073 |   total_list_bytes = num_lists * 24  # sizeof List<T> head is hard-coded
 | 
| 1074 | 
 | 
| 1075 |   Log('')
 | 
| 1076 |   Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
 | 
| 1077 |   Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
 | 
| 1078 |   Log('')
 | 
| 1079 | 
 | 
| 1080 |   #
 | 
| 1081 |   # Strings
 | 
| 1082 |   #
 | 
| 1083 | 
 | 
| 1084 |   num_strings = nrow(strings)
 | 
| 1085 |   total_string_bytes = sum(strings$obj_len)
 | 
| 1086 | 
 | 
| 1087 |   strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
 | 
| 1088 |     mutate(n_less_than = cumsum(n),
 | 
| 1089 |            percent = n_less_than * 100.0 / num_strings) ->
 | 
| 1090 |     string_lengths
 | 
| 1091 | 
 | 
| 1092 |   strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
 | 
| 1093 |   strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
 | 
| 1094 | 
 | 
| 1095 |   # Parse workload
 | 
| 1096 |   # 62% of strings <= 6 bytes
 | 
| 1097 |   # 84% of strings <= 14 bytes
 | 
| 1098 | 
 | 
| 1099 |   Log('Str - NewStr() and OverAllocatedStr()')
 | 
| 1100 |   print(string_lengths %>% head(16))
 | 
| 1101 |   print(string_lengths %>% tail(5))
 | 
| 1102 |   Log('')
 | 
| 1103 | 
 | 
| 1104 |   Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
 | 
| 1105 |       commas(sum(strings$str_len)), commas(total_string_bytes))
 | 
| 1106 |   Log('')
 | 
| 1107 |   Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
 | 
| 1108 |   Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
 | 
| 1109 |   Log('')
 | 
| 1110 | 
 | 
| 1111 |   #
 | 
| 1112 |   # Slabs
 | 
| 1113 |   #
 | 
| 1114 | 
 | 
| 1115 |   Log('NewSlab()')
 | 
| 1116 | 
 | 
| 1117 |   num_slabs = nrow(slabs)
 | 
| 1118 |   slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
 | 
| 1119 |     mutate(n_less_than = cumsum(n),
 | 
| 1120 |            percent = n_less_than * 100.0 / num_slabs) ->
 | 
| 1121 |     slab_lengths
 | 
| 1122 | 
 | 
| 1123 |   slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
 | 
| 1124 |     arrange(desc(n)) -> slab_types
 | 
| 1125 | 
 | 
| 1126 |   Log('  Lengths')
 | 
| 1127 |   print(slab_lengths %>% head())
 | 
| 1128 |   print(slab_lengths %>% tail(5))
 | 
| 1129 |   Log('')
 | 
| 1130 | 
 | 
| 1131 |   Log('  Slab Types')
 | 
| 1132 |   print(slab_types %>% head())
 | 
| 1133 |   print(slab_types %>% tail(5))
 | 
| 1134 |   Log('')
 | 
| 1135 | 
 | 
| 1136 |   total_slab_items = sum(slabs$slab_len)
 | 
| 1137 | 
 | 
| 1138 |   Log('%s slabs, total items = %s', commas(num_slabs),
 | 
| 1139 |       commas(sum(slabs$slab_len)))
 | 
| 1140 |   Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
 | 
| 1141 |   Log('')
 | 
| 1142 | 
 | 
| 1143 |   #
 | 
| 1144 |   # reserve() calls
 | 
| 1145 |   #
 | 
| 1146 | 
 | 
| 1147 |   # There should be strictly more List::reserve() calls than NewSlab
 | 
| 1148 | 
 | 
| 1149 |   Log('::reserve(int n)')
 | 
| 1150 |   Log('')
 | 
| 1151 | 
 | 
| 1152 |   num_reserve = nrow(reserve)
 | 
| 1153 |   reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
 | 
| 1154 |     mutate(n_less_than = cumsum(n),
 | 
| 1155 |            percent = n_less_than * 100.0 / num_reserve) ->
 | 
| 1156 |     reserve_args
 | 
| 1157 | 
 | 
| 1158 |   Log('  Num Items')
 | 
| 1159 |   print(reserve_args %>% head(15))
 | 
| 1160 |   print(reserve_args %>% tail(5))
 | 
| 1161 |   Log('')
 | 
| 1162 | 
 | 
| 1163 |   Log('%s reserve() calls, total items = %s', commas(num_reserve),
 | 
| 1164 |       commas(sum(reserve$num_items)))
 | 
| 1165 |   Log('')
 | 
| 1166 | 
 | 
| 1167 |   # Accounting for all allocations!
 | 
| 1168 |   Log('Untyped: %s', commas(num_allocs))
 | 
| 1169 |   Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
 | 
| 1170 |   Log('')
 | 
| 1171 | 
 | 
| 1172 |   num_other_typed = num_typed - num_lists
 | 
| 1173 | 
 | 
| 1174 |   # Summary table
 | 
| 1175 |   stats = tibble(task = task_name,
 | 
| 1176 |                  total_bytes_ = commas(total_bytes),
 | 
| 1177 |                  num_allocs_ = commas(num_allocs),
 | 
| 1178 |                  sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
 | 
| 1179 |                  num_reserve_calls = commas(num_reserve),
 | 
| 1180 | 
 | 
| 1181 |                  percent_list_allocs = Percent(num_lists, num_allocs),
 | 
| 1182 |                  percent_slab_allocs = Percent(num_slabs, num_allocs),
 | 
| 1183 |                  percent_string_allocs = Percent(num_strings, num_allocs),
 | 
| 1184 |                  percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
 | 
| 1185 | 
 | 
| 1186 |                  percent_list_bytes = Percent(total_list_bytes, total_bytes),
 | 
| 1187 |                  percent_string_bytes = Percent(total_string_bytes, total_bytes),
 | 
| 1188 | 
 | 
| 1189 |                  allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
 | 
| 1190 |                  allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
 | 
| 1191 |                  allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
 | 
| 1192 | 
 | 
| 1193 |                  strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
 | 
| 1194 |                  strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
 | 
| 1195 |                  )
 | 
| 1196 |   summaries$stats[[task_name]] = stats
 | 
| 1197 | 
 | 
| 1198 |   summaries$most_common_types[[task_name]] = most_common_types
 | 
| 1199 | }
 | 
| 1200 | 
 | 
| 1201 | LoadUftraceTsv = function(in_dir, env) {
 | 
| 1202 |   for (task in list.files(in_dir)) {
 | 
| 1203 |     Log('Loading data for task %s', task)
 | 
| 1204 |     base_dir = file.path(in_dir, task)
 | 
| 1205 | 
 | 
| 1206 |     task_env = new.env()
 | 
| 1207 |     env[[task]] = task_env
 | 
| 1208 | 
 | 
| 1209 |     # TSV file, not CSV
 | 
| 1210 |     task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
 | 
| 1211 |     task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
 | 
| 1212 |     task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
 | 
| 1213 |     task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
 | 
| 1214 |     task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
 | 
| 1215 | 
 | 
| 1216 |     # median string length is 4, mean is 9.5!
 | 
| 1217 |     Log('UNTYPED')
 | 
| 1218 |     print(summary(task_env$untyped))
 | 
| 1219 |     Log('')
 | 
| 1220 | 
 | 
| 1221 |     Log('TYPED')
 | 
| 1222 |     print(summary(task_env$typed))
 | 
| 1223 |     Log('')
 | 
| 1224 | 
 | 
| 1225 |     Log('STRINGS')
 | 
| 1226 |     print(summary(task_env$strings))
 | 
| 1227 |     Log('')
 | 
| 1228 | 
 | 
| 1229 |     Log('SLABS')
 | 
| 1230 |     print(summary(task_env$slabs))
 | 
| 1231 |     Log('')
 | 
| 1232 | 
 | 
| 1233 |     Log('RESERVE')
 | 
| 1234 |     print(summary(task_env$reserve))
 | 
| 1235 |     Log('')
 | 
| 1236 |   }
 | 
| 1237 | }
 | 
| 1238 | 
 | 
| 1239 | Percent = function(n, total) {
 | 
| 1240 |   sprintf('%.1f%%', n * 100.0 / total)
 | 
| 1241 | }
 | 
| 1242 | 
 | 
| 1243 | PrettyPrintLong = function(d) {
 | 
| 1244 |   tr = t(d)  # transpose
 | 
| 1245 | 
 | 
| 1246 |   row_names = rownames(tr)
 | 
| 1247 | 
 | 
| 1248 |   for (i in 1:nrow(tr)) {
 | 
| 1249 |     row_name = row_names[i]
 | 
| 1250 |     cat(sprintf('%26s', row_name))  # calculated min width manually
 | 
| 1251 |     cat(sprintf('%20s', tr[i,]))
 | 
| 1252 |     cat('\n')
 | 
| 1253 | 
 | 
| 1254 |     # Extra spacing
 | 
| 1255 |     if (row_name %in% c('num_reserve_calls',
 | 
| 1256 |                         'percent_string_bytes',
 | 
| 1257 |                         'percent_other_typed_allocs',
 | 
| 1258 |                         'allocs_96_bytes_or_less')) {
 | 
| 1259 |       cat('\n')
 | 
| 1260 |     }
 | 
| 1261 |   }
 | 
| 1262 | }
 | 
| 1263 | 
 | 
| 1264 | 
 | 
| 1265 | UftraceReport = function(env, out_dir) {
 | 
| 1266 |   # summaries$stats should be a list of 1-row data frames
 | 
| 1267 |   # summaries$top_types should be a list of types
 | 
| 1268 |   summaries = new.env()
 | 
| 1269 | 
 | 
| 1270 |   for (task_name in names(env)) {
 | 
| 1271 |     report_out = file.path(out_dir, paste0(task_name, '.txt'))
 | 
| 1272 | 
 | 
| 1273 |     Log('Making report for task %s -> %s', task_name, report_out)
 | 
| 1274 | 
 | 
| 1275 |     sink(file = report_out)
 | 
| 1276 |     UftraceTaskReport(env, task_name, summaries)
 | 
| 1277 |     sink()  # reset
 | 
| 1278 |   }
 | 
| 1279 |   Log('')
 | 
| 1280 | 
 | 
| 1281 |   # Concate all the data frames added to summary
 | 
| 1282 |   stats = bind_rows(as.list(summaries$stats))
 | 
| 1283 | 
 | 
| 1284 |   sink(file = file.path(out_dir, 'summary.txt'))
 | 
| 1285 |   #print(stats)
 | 
| 1286 |   #Log('')
 | 
| 1287 | 
 | 
| 1288 |   PrettyPrintLong(stats)
 | 
| 1289 |   Log('')
 | 
| 1290 | 
 | 
| 1291 |   mct = summaries$most_common_types
 | 
| 1292 |   for (task_name in names(mct)) {
 | 
| 1293 |     Log('Common types in workload %s', task_name)
 | 
| 1294 |     Log('')
 | 
| 1295 | 
 | 
| 1296 |     print(mct[[task_name]] %>% head(5))
 | 
| 1297 |     Log('')
 | 
| 1298 |   }
 | 
| 1299 |   sink()
 | 
| 1300 | 
 | 
| 1301 |   # For the REPL
 | 
| 1302 |   return(list(stats = stats))
 | 
| 1303 | }
 | 
| 1304 | 
 | 
| 1305 | main = function(argv) {
 | 
| 1306 |   action = argv[[1]]
 | 
| 1307 |   in_dir = argv[[2]]
 | 
| 1308 |   out_dir = argv[[3]]
 | 
| 1309 | 
 | 
| 1310 |   if (action == 'osh-parser') {
 | 
| 1311 |     ParserReport(in_dir, out_dir)
 | 
| 1312 | 
 | 
| 1313 |   } else if (action == 'osh-runtime') {
 | 
| 1314 |     RuntimeReport(in_dir, out_dir)
 | 
| 1315 | 
 | 
| 1316 |   } else if (action == 'vm-baseline') {
 | 
| 1317 |     VmBaselineReport(in_dir, out_dir)
 | 
| 1318 | 
 | 
| 1319 |   } else if (action == 'ovm-build') {
 | 
| 1320 |     OvmBuildReport(in_dir, out_dir)
 | 
| 1321 | 
 | 
| 1322 |   } else if (action == 'compute') {
 | 
| 1323 |     ComputeReport(in_dir, out_dir)
 | 
| 1324 | 
 | 
| 1325 |   } else if (action == 'gc') {
 | 
| 1326 |     GcReport(in_dir, out_dir)
 | 
| 1327 | 
 | 
| 1328 |   } else if (action == 'gc-cachegrind') {
 | 
| 1329 |     GcCachegrindReport(in_dir, out_dir)
 | 
| 1330 | 
 | 
| 1331 |   } else if (action == 'mycpp') {
 | 
| 1332 |     MyCppReport(in_dir, out_dir)
 | 
| 1333 | 
 | 
| 1334 |   } else if (action == 'uftrace') {
 | 
| 1335 |     d = new.env()
 | 
| 1336 |     LoadUftraceTsv(in_dir, d)
 | 
| 1337 |     UftraceReport(d, out_dir)
 | 
| 1338 | 
 | 
| 1339 |   } else {
 | 
| 1340 |     Log("Invalid action '%s'", action)
 | 
| 1341 |     quit(status = 1)
 | 
| 1342 |   }
 | 
| 1343 |   Log('PID %d done', Sys.getpid())
 | 
| 1344 | }
 | 
| 1345 | 
 | 
| 1346 | if (length(sys.frames()) == 0) {
 | 
| 1347 |   # increase ggplot font size globally
 | 
| 1348 |   #theme_set(theme_grey(base_size = 20))
 | 
| 1349 | 
 | 
| 1350 |   main(commandArgs(TRUE))
 | 
| 1351 | }
 |