OILS / tools / xargs / xargs.py View on Github | oilshell.org

390 lines, 282 significant
1#!/usr/bin/env python2
2# Copyright 2019 Wilke Schwiedop. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8
9from __future__ import print_function
10
11import argparse
12import collections
13import itertools
14import os
15# TODO docs.python.org suggests https://pypi.org/project/subprocess32/
16# for POSIX users
17import shlex
18import subprocess
19import sys
20
21class GNUXargsQuirks(argparse.Action):
22 def __init__(self, option_strings, dest, **kwargs):
23 super(GNUXargsQuirks, self).__init__(option_strings, dest, **kwargs)
24 def __call__(self, parser, namespace, values, option_string=None):
25 setattr(namespace, self.dest, values)
26 if self.dest == 'replace_str':
27 namespace.max_args = None
28 namespace.max_lines = None
29 elif self.dest == 'max_lines':
30 namespace.max_args = None
31 namespace.replace_str = None
32 elif self.dest == 'max_args':
33 namespace.max_lines = None
34 if namespace.max_args == 1 and namespace.replace_str:
35 namespace.max_args = None
36 else:
37 namespace.replace_str = None
38 elif self.dest == 'max_chars':
39 pass
40 else:
41 assert False, "dest '%s' not handled" % self.dest
42
43xargs = argparse.ArgumentParser(prog='xargs')
44xargs.add_argument('-a', '--arg-file', metavar='file', nargs=1, default='-', help='read arguments from FILE, not standard input')
45xargs.add_argument('-E', metavar='eof-str', dest='eof_str', help='set logical EOF string; if END occurs as a line of input, the rest of the input is ignored (ignored if -0 or -d was specified)')
46xargs.add_argument('-e', '--eof', metavar='eof-str', nargs='?', dest='eof_str', help='equivalent to -E END if END is specified; otherwise, there is no end-of-file string')
47xargs.add_argument('-0', '--null', dest='delimiter', action='store_const', const='\0', help='items are separated by a null, not whitespace; disables quote and backslash processing and logical EOF processing')
48xargs.add_argument('-d', '--delimiter', metavar='delimiter', dest='delimiter', help='items in input stream are separated by CHARACTER, not by whitespace; disables quote and backslash processing and logical EOF processing')
49xargs.add_argument('-I', metavar='replace-str', dest='replace_str', action=GNUXargsQuirks, help='same as --replace=R')
50xargs.add_argument('-i', '--replace', metavar='replace-str', nargs='?', const='{}', dest='replace_str', action=GNUXargsQuirks, help='replace R in INITIAL-ARGS with names read from standard input; if R is unspecified, assume {}')
51xargs.add_argument('-L', metavar='max-lines', dest='max_lines', type=int, action=GNUXargsQuirks, help='use at most MAX-LINES non-blank input lines per command line')
52xargs.add_argument('-l', '--max-lines', metavar='max-lines', nargs='?', const=1, dest='max_lines', type=int, action=GNUXargsQuirks, help='similar to -L but defaults to at most one non-blank input line if MAX-LINES is not specified')
53xargs.add_argument('-n', '--max-args', metavar='max-args', dest='max_args', type=int, action=GNUXargsQuirks, help='use at most MAX-ARGS arguments per command line')
54xargs.add_argument('-s', '--max-chars', metavar='max-chars', dest='max_chars', type=int, action=GNUXargsQuirks, help='limit length of command line to MAX-CHARS')
55xargs.add_argument('-P', '--max-procs', metavar='max-procs', default=1, dest='max_procs', type=int, help='run at most MAX-PROCS processes at a time')
56xargs.add_argument('--process-slot-var', metavar='name', help='set environment variable VAR in child processes')
57xargs.add_argument('-p', '--interactive', action='store_true', help='prompt before running commands')
58xargs.add_argument('-t', '--verbose', action='store_true', help='print commands before executing them')
59xargs.add_argument('-x', '--exit', action='store_true', help='exit if the size (see -s) is exceeded')
60xargs.add_argument('-r', '--no-run-if-empty', action='store_true', help='if there are no arguments, then do not run COMMAND; if this option is not given, COMMAND will be run at least once')
61xargs.add_argument('--show-limits', action='store_true', help='show limits on command-line length')
62xargs.add_argument('--version', action='version', version='%(prog)s 0.0.1', help='output version information and exit')
63xargs.add_argument('command', nargs='?', default='echo')
64xargs.add_argument('initial_arguments', nargs=argparse.REMAINDER)
65
66class PeekableIterator():
67 def __init__(self, iterator):
68 self.iterator = iterator
69 self.peeked = False
70 self.item = None
71 def peek(self):
72 """
73 Return the next item but does not advance the iterator further.
74 Raise StopIteration if there is no such item.
75 """
76 if not self.peeked:
77 self.item = next(self.iterator)
78 self.peeked = True
79 return self.item
80 def next(self):
81 """
82 Return the next item and advance the iterator.
83 Raise StopIteration if there is no such item.
84 """
85 if self.peeked:
86 self.peeked = False
87 return self.item
88 return next(self.iterator)
89 def __iter__(self):
90 return self
91
92def read_lines_eof(eof_str, input):
93 # type (str, Iterable[str]) -> Iterable[str]
94 """Read lines from input until a line equals eof_str or EOF is reached"""
95 return iter(input.next, eof_str + '\n')
96
97def str_memsize(*strings):
98 # type: (*str) -> int
99 """Calculate the amount of memory required to store the strings in an argv."""
100 return sum(len(s) + 1 for s in strings)
101
102def is_complete_line(line):
103 # type: (str) -> bool
104 return len(line) > 1 and line[-2] not in (' ', '\t')
105
106def argsplit_ws(lines):
107 # type: (Iterable[str]) -> Iterator[str]
108 """Split lines into arguments and append metainfo to each argument."""
109 for line in lines:
110 # TODO this might require some more testing
111 for arg in shlex.split(line):
112 yield arg
113
114def argsplit_delim(delim, lines):
115 # type: (str, Iterable[str]) -> Iterator[str]
116 """Split lines into arguments and append metainfo to each argument."""
117 buf = []
118 for c in itertools.chain.from_iterable(lines):
119 if c == delim:
120 yield "".join(buf)
121 buf = []
122 else:
123 buf.append(c)
124 if buf:
125 yield "".join(buf)
126
127def read_n_xargs_lines(linec, line_iter):
128 # type: (int, Iterator[str]) -> Iterator[str]
129 while linec > 0:
130 line = next(line_iter)
131 yield line
132 if is_complete_line(line):
133 linec -= 1
134
135def take_chars(charc, iterator):
136 # type: (int, Iterator[str]) -> Iterator[str]
137 charc -= str_memsize(iterator.peek())
138 while charc >= 0:
139 yield next(iterator)
140 charc -= str_memsize(iterator.peek())
141
142def take(n, iterator):
143 # type: (int, Iterator[Any]) -> Iterator[Any]
144 for _ in range(n):
145 yield next(iterator)
146
147def group_args_lines(max_lines, input):
148 # type: (int, Iterator[str]) -> Iterator[List[str]]
149 while True:
150 it = argsplit_ws(read_n_xargs_lines(max_lines, input))
151 buf = [next(it)] # raise StopIteration if iterator is empty
152 buf.extend(it)
153 yield buf
154
155def group_args(max_chars, max_args, arg_iter):
156 # type: (Optional[int], Optional[int], Iterator[str]) -> Iterator[List[str]]
157 arg_iter = PeekableIterator(arg_iter)
158 while arg_iter.peek() or True: # raise StopIteration if iterator is empty
159 it = arg_iter
160 if max_chars:
161 it = take_chars(max_chars, it)
162 if max_args:
163 it = take(max_args, it)
164 yield list(it)
165
166def replace_args(initial_arguments, replace_str, additional_arguments):
167 # type: (Sequence[str], str, Iterable[str]) -> Iterator[str]
168 additional_arguments = list(additional_arguments)
169 for arg in initial_arguments:
170 if arg == replace_str:
171 for x in additional_arguments:
172 yield x
173 else:
174 yield arg
175
176def build_cmdlines_replace(command, initial_arguments, replace_str, arggroup_iter):
177 # type: (str, Sequence[str], str, Iterator[Iterator[str]]) -> Iterator[List[str]]
178 """
179 Build command-lines suitable for subprocess.Popen,
180 replacing instances of replace_str in initial_arguments.
181 """
182 cmdline = [command]
183 for additional_arguments in arggroup_iter:
184 cmdline.extend(
185 replace_args(
186 initial_arguments,
187 replace_str,
188 additional_arguments
189 )
190 )
191 yield cmdline
192 cmdline = cmdline[:1]
193
194def build_cmdlines(command, initial_arguments, arggroup_iter):
195 # type: (str, Sequence[str], Iterator[Iterator[str]]) -> Iterator[List[str]]
196 """Build command-lines suitable for subprocess.Popen."""
197 cmdline = [command]
198 cmdline.extend(initial_arguments)
199 for additional_arguments in arggroup_iter:
200 cmdline.extend(additional_arguments)
201 yield cmdline
202 cmdline = cmdline[:1+len(initial_arguments)]
203
204def check_items(p, on_false, cmdline_iter):
205 for cmdline in cmdline_iter:
206 if p(cmdline):
207 yield cmdline
208 else:
209 on_false()
210
211def tee_cmdline(cmdline_iter):
212 # type: (Iterator[List[str]]) -> Iterator[List[str]]
213 """Go over each cmdline and print them to stderr."""
214 for cmdline in cmdline_iter:
215 print(*cmdline, file=sys.stderr)
216 yield cmdline
217
218def prompt_user(cmdline_iter):
219 # type: (Iterator[List[str]]) -> Iterator[List[str]]
220 """Prompt the user for each cmdline."""
221 with open("/dev/tty", 'r') as tty:
222 for cmdline in cmdline_iter:
223 print(*cmdline, end=' ?...', file=sys.stderr)
224 response = tty.readline()
225 if response[0] not in ('y', 'Y'):
226 continue
227 yield cmdline
228
229def wait_open_slot(processes):
230 # type: (List[Optional[Any]])-> int
231 while processes:
232 for i, p in enumerate(processes):
233 # process doesn't yet exist or has finished
234 if p is None or p.poll() is not None:
235 return i
236 _pid, _err = os.wait()
237
238def map_errcode(rc):
239 # type: int -> int
240 """map the returncode of a child-process to the returncode of the main process."""
241 if rc == 0:
242 return 0
243 if rc >= 0 and rc <= 125:
244 return 123
245 if rc == 255:
246 return 124
247 if rc < 0:
248 return 125
249 return 1
250
251def main(xargs_args):
252 # phase 1: read input
253 if xargs_args.arg_file == '-':
254 xargs_input = sys.stdin
255 cmd_input = open(os.devnull, 'r')
256 else:
257 xargs_input = xargs_args.arg_file
258 cmd_input = sys.stdin
259
260 if xargs_args.eof_str:
261 xargs_input = read_lines_eof(xargs_args.eof_str, xargs_input)
262
263 # phase 2: parse and group args
264 if xargs_args.max_lines:
265 assert not xargs_args.max_args
266 assert not xargs_args.delimiter
267 assert xargs_args.exit
268 arggroup_iter = group_args_lines(xargs_args.max_lines, xargs_input)
269 else:
270 if xargs_args.delimiter:
271 arg_iter = argsplit_delim(xargs_args.delimiter, xargs_input)
272 else:
273 arg_iter = argsplit_ws(xargs_input)
274 # if exit is True, max_chars is checked later
275 arggroup_iter = group_args(
276 xargs_args.max_chars if not xargs_args.exit else None,
277 xargs_args.max_args,
278 arg_iter
279 )
280
281 arggroup_iter = PeekableIterator(arggroup_iter)
282 if xargs_args.no_run_if_empty:
283 try:
284 x = arggroup_iter.peek()
285 # TODO not even sure how the interaction with -I is supposed to work
286 # echo | xargs -I {} echo {} : dont run
287 # echo | xargs -I {} echo {} "x" : dont run
288 # echo | xargs -I {} echo "x" : dont run
289 # echo x | xargs -I {} echo : run
290 # echo xx | xargs -I {} -d 'x' echo {} : run 3 times ('', '', '\n')
291
292# if not x or not x[0]:
293 if not x:
294 return 0
295 except StopIteration:
296 return 0
297 else:
298 try:
299 arggroup_iter.peek()
300 except StopIteration:
301 arggroup_iter = [[]]
302
303 # phase 3: build command-lines
304 if xargs_args.replace_str:
305 cmdline_iter = build_cmdlines_replace(
306 xargs_args.command,
307 xargs_args.initial_arguments,
308 xargs_args.replace_str,
309 arggroup_iter
310 )
311 else:
312 cmdline_iter = build_cmdlines(
313 xargs_args.command,
314 xargs_args.initial_arguments,
315 arggroup_iter
316 )
317
318 if xargs_args.max_chars is not None and xargs_args.exit:
319 cmdline_iter = check_items(
320 lambda c: str_memsize(*c) < xargs_args.max_chars,
321 lambda: sys.exit(1),
322 cmdline_iter
323 )
324
325 if xargs_args.interactive:
326 cmdline_iter = prompt_user(cmdline_iter)
327 elif xargs_args.verbose:
328 cmdline_iter = tee_cmdline(cmdline_iter)
329
330 # phase 4: execute command-lines
331 if xargs_args.max_procs > 1:
332 ps = [None] * xargs_args.max_procs
333 environ = os.environ.copy()
334 for cmdline in cmdline_iter:
335 i = wait_open_slot(ps)
336 if ps[i] is not None and ps[i].returncode:
337 break
338 if xargs_args.process_slot_var:
339 environ[xargs_args.process_slot_var] = str(i)
340 ps[i] = subprocess.Popen(cmdline, stdin=cmd_input, env=environ)
341 return max(map_errcode(p.wait()) for p in ps if p is not None)
342 else:
343 for cmdline in cmdline_iter:
344 p = subprocess.Popen(cmdline, stdin=cmd_input)
345 if p.wait():
346 return map_errcode(p.returncode)
347 return 0
348
349if __name__ == "__main__":
350 xargs_args = xargs.parse_args()
351
352 if xargs_args.delimiter:
353 xargs_args.delimiter = xargs_args.delimiter.decode('string_escape')
354 if len(xargs_args.delimiter) > 1:
355 # TODO error
356 sys.exit(1)
357 if xargs_args.max_chars and not xargs_args.replace_str:
358 base = str_memsize(xargs_args.command, *xargs_args.initial_arguments)
359 if base > xargs_args.max_chars:
360 # TODO error
361 sys.exit(1)
362 xargs_args.max_chars -= base
363
364 # TODO warnings when appropriate
365 # -d disables -e
366 if xargs_args.delimiter and xargs_args.eof_str:
367 xargs_args.eof_str = None
368 # -I implies -L 1 (and transitively -x)
369 if xargs_args.replace_str and xargs_args.max_lines != 1:
370 xargs_args.max_lines = 1
371 # -I implies -d '\n'
372 if xargs_args.replace_str and xargs_args.delimiter != '\n':
373 xargs_args.delimiter = '\n'
374 # -L implies -x
375 if xargs_args.max_lines is not None and not xargs_args.exit:
376 xargs_args.exit = True
377 # -p implies -t
378 if xargs_args.interactive and not xargs_args.verbose:
379 xargs_args.verbose = True
380
381 # (undocumented)
382 # if -d then -L equals -n
383 if xargs_args.delimiter and xargs_args.max_lines:
384 xargs_args.max_args = xargs_args.max_lines
385 xargs_args.max_lines = None
386 # TODO? -I implies -r
387 if xargs_args.replace_str and not xargs_args.no_run_if_empty:
388 xargs_args.no_run_if_empty = True
389
390 sys.exit(main(xargs_args))