OILS / benchmarks / time_.py View on Github | oilshell.org

267 lines, 186 significant
1#!/usr/bin/env python2
2"""
3time.py -- Replacement for coreutils 'time'.
4
5TODO:
6(Must be Python 3 because it's used before a Python 2 WEDGE can be installed.)
7
8The interface of this program is modelled after:
9
10/usr/bin/time --append --output foo.txt --format '%x %e'
11
12Why we're not using /usr/bin/time:
13- We need to print extra TSV fields at the end of the row, but it prints a newline
14- It prints extraneous output: 'Command exited with non-zero status 1'
15- Elapsed time only has 2 digits of precision. Apparently it uses times()
16 rather than getrusage()? https://unix.stackexchange.com/questions/70653/increase-e-precision-with-usr-bin-time-shell-command
17
18Problems with resource.getrusage() in Python:
19
20 The memory usage of dash and bash get obscured by Python! Because
21 subprocess.call() does a fork(), which includes Python's address space.
22 # https://stackoverflow.com/questions/13880724/python-getrusage-with-rusage-children-behaves-stangely
23
24Problems with bash time builtin
25- has no way to get the exit code
26- writes to stderr, so you it's annoying to get both process stderr and the
27 timing
28
29This program also writes CSV and TSV directly.
30- CSV values get escaped
31- TSV values can't have tabs
32
33So we use a tiny C program time-helper.c to do it, and not /usr/bin/time.
34"""
35from __future__ import print_function
36
37import csv
38try:
39 import hashlib # PY3
40except ImportError:
41 hashlib = None
42 import md5
43
44import optparse
45import os
46import sys
47import subprocess
48import time
49
50THIS_DIR = os.path.dirname(os.path.abspath(sys.argv[0]))
51
52time1 = os.path.abspath(os.path.join(THIS_DIR, '../_devbuild/bin/time-helper'))
53# Pre-built one
54time2 = '/wedge/oils-for-unix.org/pkg/time-helper/2023-02-28/time-helper'
55
56if os.path.exists(time1):
57 TIME_HELPER = time1
58elif os.path.exists(time2):
59 TIME_HELPER = time2
60else:
61 raise AssertionError('time-helper not found')
62
63
64def log(msg, *args):
65 if args:
66 msg = msg % args
67 print(msg, file=sys.stderr)
68
69
70def Options():
71 """Returns an option parser instance."""
72 p = optparse.OptionParser('time.py [options] ARGV...')
73 p.add_option('--tsv',
74 dest='tsv',
75 default=False,
76 action='store_true',
77 help='Write output in TSV format')
78 p.add_option(
79 '--rusage',
80 dest='rusage',
81 default=False,
82 action='store_true',
83 help='Also show user time, system time, and max resident set size')
84 p.add_option('--rusage-2',
85 dest='rusage_2',
86 default=False,
87 action='store_true',
88 help='Also show page faults, context switches, etc.')
89 p.add_option('--time-span',
90 dest='time_span',
91 default=False,
92 action='store_true',
93 help='Also show start_time and end_time')
94 p.add_option('-o',
95 '--output',
96 dest='output',
97 default=None,
98 help='Name of output file to write to to')
99 p.add_option('-a',
100 '--append',
101 dest='append',
102 default=False,
103 action='store_true',
104 help='Append to the file instead of overwriting it')
105 p.add_option(
106 '--field',
107 dest='fields',
108 default=[],
109 action='append',
110 help='A string to append to each row, after the exit code and status')
111 p.add_option('--time-fmt',
112 dest='time_fmt',
113 default='%.4f',
114 help='sprintf format for elapsed seconds (float)')
115 p.add_option(
116 '--stdout',
117 dest='stdout',
118 default=None,
119 help='Save stdout to this file, and add a column for its md5 checksum')
120 p.add_option(
121 '--print-header',
122 dest='print_header',
123 default=False,
124 action='store_true',
125 help=
126 'Print an XSV header, respecting --time-span, --rusage, --rusage-2, --stdout, --field, and --tsv'
127 )
128 return p
129
130
131def MakeTableOutput(f, tsv):
132 if tsv: # TSV output.
133 out = csv.writer(f,
134 delimiter='\t',
135 lineterminator='\n',
136 doublequote=False,
137 quoting=csv.QUOTE_NONE)
138 else:
139 out = csv.writer(f)
140 return out
141
142
143def main(argv):
144 (opts, child_argv) = Options().parse_args(argv[1:])
145
146 # Used only for 'time' format string. For --field, we use our own.
147 sep = '\t' if opts.tsv else ','
148
149 if opts.print_header:
150 if child_argv:
151 raise RuntimeError('No arguments allowed with --print-header')
152 names = ['status', 'elapsed_secs']
153 if opts.time_span:
154 names.extend(['start_time', 'end_time'])
155 if opts.rusage:
156 names.extend(['user_secs', 'sys_secs', 'max_rss_KiB'])
157 if opts.rusage_2:
158 names.extend([
159 'minor_faults', 'major_faults', 'swaps', 'in_block',
160 'out_block', 'signals', 'voluntary_ctx', 'involuntary_ctx'
161 ])
162 if opts.stdout:
163 names.append('stdout_md5sum')
164 names.extend(opts.fields)
165
166 if opts.output:
167 f = open(opts.output, 'w')
168 else:
169 f = sys.stdout
170 table_out = MakeTableOutput(f, opts.tsv)
171 table_out.writerow(names)
172 return 0
173
174 if not child_argv:
175 raise RuntimeError('Expected a command')
176
177 # built by build/dev.sh all
178 time_argv = [TIME_HELPER, '-d', sep]
179
180 if opts.append:
181 time_argv.append('-a')
182
183 if opts.output:
184 time_argv.extend(['-o', opts.output])
185 else:
186 pass
187 # Somehow this doesn't work? Getting binary data?
188 # time_argv.extend(['-o', '/dev/stdout'])
189
190 # %x: exit status
191 # %e: elapsed
192 time_argv.extend(['-x', '-e'])
193 if opts.time_span:
194 # begin and end time
195 time_argv.extend(['-y', '-z'])
196 if opts.rusage:
197 # %U: user time
198 # %S: sys time
199 # %M: Max RSS
200 time_argv.extend(['-U', '-S', '-M'])
201 if opts.rusage_2:
202 time_argv.extend(['-m'])
203
204 time_argv.append('--')
205 time_argv.extend(child_argv)
206 #log('time_argv %s', time_argv)
207
208 start_time = time.time()
209 try:
210 if opts.stdout:
211 # We don't want to intermingle 'time' stdout with the program's stdout
212 if not opts.output:
213 raise RuntimeError('Flag -o is required when --stdout')
214 with open(opts.stdout, 'w') as f:
215 exit_code = subprocess.call(time_argv, stdout=f)
216 else:
217 exit_code = subprocess.call(time_argv)
218 except OSError as e:
219 log('Error executing %s: %s', time_argv, e)
220 return 1
221
222 elapsed = time.time() - start_time
223 if opts.stdout:
224 if hashlib:
225 m = hashlib.md5() # PY3
226 else:
227 m = md5.new() # PY2
228 with open(opts.stdout, 'rb') as f:
229 while True:
230 chunk = f.read(4096)
231 if not chunk:
232 break
233 m.update(chunk)
234 maybe_stdout = [m.hexdigest()]
235 else:
236 maybe_stdout = [] # no field
237
238 more_cells = maybe_stdout + opts.fields
239
240 if opts.output:
241 with open(opts.output, 'a') as f: # append
242 if more_cells:
243 f.write(sep) # tab or comma for more cells
244 table_out = MakeTableOutput(f, opts.tsv)
245 table_out.writerow(more_cells)
246 else:
247 f.write('\n') # end the row
248 else:
249 if more_cells:
250 log("More cells that -o would have written: %s", more_cells)
251
252 # Preserve the command's exit code. (This means you can't distinguish
253 # between a failure of time.py and the command, but that's better than
254 # swallowing the error.)
255 return exit_code
256
257
258if __name__ == '__main__':
259 try:
260 status = main(sys.argv)
261 except KeyboardInterrupt as e:
262 print('%s: interrupted with Ctrl-C' % sys.argv[0], file=sys.stderr)
263 sys.exit(1)
264 except RuntimeError as e:
265 log('time_.py: %s', e)
266 status = 2
267 sys.exit(status)