1 | #!/usr/bin/env python2
|
2 | # Copyright 2019 Wilke Schwiedop. All rights reserved.
|
3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
4 | # you may not use this file except in compliance with the License.
|
5 | # You may obtain a copy of the License at
|
6 | #
|
7 | # http://www.apache.org/licenses/LICENSE-2.0
|
8 |
|
9 | from __future__ import print_function
|
10 |
|
11 | import argparse
|
12 | import collections
|
13 | import itertools
|
14 | import os
|
15 | # TODO docs.python.org suggests https://pypi.org/project/subprocess32/
|
16 | # for POSIX users
|
17 | import shlex
|
18 | import subprocess
|
19 | import sys
|
20 |
|
21 | class GNUXargsQuirks(argparse.Action):
|
22 | def __init__(self, option_strings, dest, **kwargs):
|
23 | super(GNUXargsQuirks, self).__init__(option_strings, dest, **kwargs)
|
24 | def __call__(self, parser, namespace, values, option_string=None):
|
25 | setattr(namespace, self.dest, values)
|
26 | if self.dest == 'replace_str':
|
27 | namespace.max_args = None
|
28 | namespace.max_lines = None
|
29 | elif self.dest == 'max_lines':
|
30 | namespace.max_args = None
|
31 | namespace.replace_str = None
|
32 | elif self.dest == 'max_args':
|
33 | namespace.max_lines = None
|
34 | if namespace.max_args == 1 and namespace.replace_str:
|
35 | namespace.max_args = None
|
36 | else:
|
37 | namespace.replace_str = None
|
38 | elif self.dest == 'max_chars':
|
39 | pass
|
40 | else:
|
41 | assert False, "dest '%s' not handled" % self.dest
|
42 |
|
43 | xargs = argparse.ArgumentParser(prog='xargs')
|
44 | xargs.add_argument('-a', '--arg-file', metavar='file', nargs=1, default='-', help='read arguments from FILE, not standard input')
|
45 | xargs.add_argument('-E', metavar='eof-str', dest='eof_str', help='set logical EOF string; if END occurs as a line of input, the rest of the input is ignored (ignored if -0 or -d was specified)')
|
46 | xargs.add_argument('-e', '--eof', metavar='eof-str', nargs='?', dest='eof_str', help='equivalent to -E END if END is specified; otherwise, there is no end-of-file string')
|
47 | xargs.add_argument('-0', '--null', dest='delimiter', action='store_const', const='\0', help='items are separated by a null, not whitespace; disables quote and backslash processing and logical EOF processing')
|
48 | xargs.add_argument('-d', '--delimiter', metavar='delimiter', dest='delimiter', help='items in input stream are separated by CHARACTER, not by whitespace; disables quote and backslash processing and logical EOF processing')
|
49 | xargs.add_argument('-I', metavar='replace-str', dest='replace_str', action=GNUXargsQuirks, help='same as --replace=R')
|
50 | xargs.add_argument('-i', '--replace', metavar='replace-str', nargs='?', const='{}', dest='replace_str', action=GNUXargsQuirks, help='replace R in INITIAL-ARGS with names read from standard input; if R is unspecified, assume {}')
|
51 | xargs.add_argument('-L', metavar='max-lines', dest='max_lines', type=int, action=GNUXargsQuirks, help='use at most MAX-LINES non-blank input lines per command line')
|
52 | xargs.add_argument('-l', '--max-lines', metavar='max-lines', nargs='?', const=1, dest='max_lines', type=int, action=GNUXargsQuirks, help='similar to -L but defaults to at most one non-blank input line if MAX-LINES is not specified')
|
53 | xargs.add_argument('-n', '--max-args', metavar='max-args', dest='max_args', type=int, action=GNUXargsQuirks, help='use at most MAX-ARGS arguments per command line')
|
54 | xargs.add_argument('-s', '--max-chars', metavar='max-chars', dest='max_chars', type=int, action=GNUXargsQuirks, help='limit length of command line to MAX-CHARS')
|
55 | xargs.add_argument('-P', '--max-procs', metavar='max-procs', default=1, dest='max_procs', type=int, help='run at most MAX-PROCS processes at a time')
|
56 | xargs.add_argument('--process-slot-var', metavar='name', help='set environment variable VAR in child processes')
|
57 | xargs.add_argument('-p', '--interactive', action='store_true', help='prompt before running commands')
|
58 | xargs.add_argument('-t', '--verbose', action='store_true', help='print commands before executing them')
|
59 | xargs.add_argument('-x', '--exit', action='store_true', help='exit if the size (see -s) is exceeded')
|
60 | xargs.add_argument('-r', '--no-run-if-empty', action='store_true', help='if there are no arguments, then do not run COMMAND; if this option is not given, COMMAND will be run at least once')
|
61 | xargs.add_argument('--show-limits', action='store_true', help='show limits on command-line length')
|
62 | xargs.add_argument('--version', action='version', version='%(prog)s 0.0.1', help='output version information and exit')
|
63 | xargs.add_argument('command', nargs='?', default='echo')
|
64 | xargs.add_argument('initial_arguments', nargs=argparse.REMAINDER)
|
65 |
|
66 | class PeekableIterator():
|
67 | def __init__(self, iterator):
|
68 | self.iterator = iterator
|
69 | self.peeked = False
|
70 | self.item = None
|
71 | def peek(self):
|
72 | """
|
73 | Return the next item but does not advance the iterator further.
|
74 | Raise StopIteration if there is no such item.
|
75 | """
|
76 | if not self.peeked:
|
77 | self.item = next(self.iterator)
|
78 | self.peeked = True
|
79 | return self.item
|
80 | def next(self):
|
81 | """
|
82 | Return the next item and advance the iterator.
|
83 | Raise StopIteration if there is no such item.
|
84 | """
|
85 | if self.peeked:
|
86 | self.peeked = False
|
87 | return self.item
|
88 | return next(self.iterator)
|
89 | def __iter__(self):
|
90 | return self
|
91 |
|
92 | def read_lines_eof(eof_str, input):
|
93 | # type (str, Iterable[str]) -> Iterable[str]
|
94 | """Read lines from input until a line equals eof_str or EOF is reached"""
|
95 | return iter(input.next, eof_str + '\n')
|
96 |
|
97 | def str_memsize(*strings):
|
98 | # type: (*str) -> int
|
99 | """Calculate the amount of memory required to store the strings in an argv."""
|
100 | return sum(len(s) + 1 for s in strings)
|
101 |
|
102 | def is_complete_line(line):
|
103 | # type: (str) -> bool
|
104 | return len(line) > 1 and line[-2] not in (' ', '\t')
|
105 |
|
106 | def argsplit_ws(lines):
|
107 | # type: (Iterable[str]) -> Iterator[str]
|
108 | """Split lines into arguments and append metainfo to each argument."""
|
109 | for line in lines:
|
110 | # TODO this might require some more testing
|
111 | for arg in shlex.split(line):
|
112 | yield arg
|
113 |
|
114 | def argsplit_delim(delim, lines):
|
115 | # type: (str, Iterable[str]) -> Iterator[str]
|
116 | """Split lines into arguments and append metainfo to each argument."""
|
117 | buf = []
|
118 | for c in itertools.chain.from_iterable(lines):
|
119 | if c == delim:
|
120 | yield "".join(buf)
|
121 | buf = []
|
122 | else:
|
123 | buf.append(c)
|
124 | if buf:
|
125 | yield "".join(buf)
|
126 |
|
127 | def read_n_xargs_lines(linec, line_iter):
|
128 | # type: (int, Iterator[str]) -> Iterator[str]
|
129 | while linec > 0:
|
130 | line = next(line_iter)
|
131 | yield line
|
132 | if is_complete_line(line):
|
133 | linec -= 1
|
134 |
|
135 | def take_chars(charc, iterator):
|
136 | # type: (int, Iterator[str]) -> Iterator[str]
|
137 | charc -= str_memsize(iterator.peek())
|
138 | while charc >= 0:
|
139 | yield next(iterator)
|
140 | charc -= str_memsize(iterator.peek())
|
141 |
|
142 | def take(n, iterator):
|
143 | # type: (int, Iterator[Any]) -> Iterator[Any]
|
144 | for _ in range(n):
|
145 | yield next(iterator)
|
146 |
|
147 | def group_args_lines(max_lines, input):
|
148 | # type: (int, Iterator[str]) -> Iterator[List[str]]
|
149 | while True:
|
150 | it = argsplit_ws(read_n_xargs_lines(max_lines, input))
|
151 | buf = [next(it)] # raise StopIteration if iterator is empty
|
152 | buf.extend(it)
|
153 | yield buf
|
154 |
|
155 | def group_args(max_chars, max_args, arg_iter):
|
156 | # type: (Optional[int], Optional[int], Iterator[str]) -> Iterator[List[str]]
|
157 | arg_iter = PeekableIterator(arg_iter)
|
158 | while arg_iter.peek() or True: # raise StopIteration if iterator is empty
|
159 | it = arg_iter
|
160 | if max_chars:
|
161 | it = take_chars(max_chars, it)
|
162 | if max_args:
|
163 | it = take(max_args, it)
|
164 | yield list(it)
|
165 |
|
166 | def replace_args(initial_arguments, replace_str, additional_arguments):
|
167 | # type: (Sequence[str], str, Iterable[str]) -> Iterator[str]
|
168 | additional_arguments = list(additional_arguments)
|
169 | for arg in initial_arguments:
|
170 | if arg == replace_str:
|
171 | for x in additional_arguments:
|
172 | yield x
|
173 | else:
|
174 | yield arg
|
175 |
|
176 | def build_cmdlines_replace(command, initial_arguments, replace_str, arggroup_iter):
|
177 | # type: (str, Sequence[str], str, Iterator[Iterator[str]]) -> Iterator[List[str]]
|
178 | """
|
179 | Build command-lines suitable for subprocess.Popen,
|
180 | replacing instances of replace_str in initial_arguments.
|
181 | """
|
182 | cmdline = [command]
|
183 | for additional_arguments in arggroup_iter:
|
184 | cmdline.extend(
|
185 | replace_args(
|
186 | initial_arguments,
|
187 | replace_str,
|
188 | additional_arguments
|
189 | )
|
190 | )
|
191 | yield cmdline
|
192 | cmdline = cmdline[:1]
|
193 |
|
194 | def build_cmdlines(command, initial_arguments, arggroup_iter):
|
195 | # type: (str, Sequence[str], Iterator[Iterator[str]]) -> Iterator[List[str]]
|
196 | """Build command-lines suitable for subprocess.Popen."""
|
197 | cmdline = [command]
|
198 | cmdline.extend(initial_arguments)
|
199 | for additional_arguments in arggroup_iter:
|
200 | cmdline.extend(additional_arguments)
|
201 | yield cmdline
|
202 | cmdline = cmdline[:1+len(initial_arguments)]
|
203 |
|
204 | def check_items(p, on_false, cmdline_iter):
|
205 | for cmdline in cmdline_iter:
|
206 | if p(cmdline):
|
207 | yield cmdline
|
208 | else:
|
209 | on_false()
|
210 |
|
211 | def tee_cmdline(cmdline_iter):
|
212 | # type: (Iterator[List[str]]) -> Iterator[List[str]]
|
213 | """Go over each cmdline and print them to stderr."""
|
214 | for cmdline in cmdline_iter:
|
215 | print(*cmdline, file=sys.stderr)
|
216 | yield cmdline
|
217 |
|
218 | def prompt_user(cmdline_iter):
|
219 | # type: (Iterator[List[str]]) -> Iterator[List[str]]
|
220 | """Prompt the user for each cmdline."""
|
221 | with open("/dev/tty", 'r') as tty:
|
222 | for cmdline in cmdline_iter:
|
223 | print(*cmdline, end=' ?...', file=sys.stderr)
|
224 | response = tty.readline()
|
225 | if response[0] not in ('y', 'Y'):
|
226 | continue
|
227 | yield cmdline
|
228 |
|
229 | def wait_open_slot(processes):
|
230 | # type: (List[Optional[Any]])-> int
|
231 | while processes:
|
232 | for i, p in enumerate(processes):
|
233 | # process doesn't yet exist or has finished
|
234 | if p is None or p.poll() is not None:
|
235 | return i
|
236 | _pid, _err = os.wait()
|
237 |
|
238 | def map_errcode(rc):
|
239 | # type: int -> int
|
240 | """map the returncode of a child-process to the returncode of the main process."""
|
241 | if rc == 0:
|
242 | return 0
|
243 | if rc >= 0 and rc <= 125:
|
244 | return 123
|
245 | if rc == 255:
|
246 | return 124
|
247 | if rc < 0:
|
248 | return 125
|
249 | return 1
|
250 |
|
251 | def main(xargs_args):
|
252 | # phase 1: read input
|
253 | if xargs_args.arg_file == '-':
|
254 | xargs_input = sys.stdin
|
255 | cmd_input = open(os.devnull, 'r')
|
256 | else:
|
257 | xargs_input = xargs_args.arg_file
|
258 | cmd_input = sys.stdin
|
259 |
|
260 | if xargs_args.eof_str:
|
261 | xargs_input = read_lines_eof(xargs_args.eof_str, xargs_input)
|
262 |
|
263 | # phase 2: parse and group args
|
264 | if xargs_args.max_lines:
|
265 | assert not xargs_args.max_args
|
266 | assert not xargs_args.delimiter
|
267 | assert xargs_args.exit
|
268 | arggroup_iter = group_args_lines(xargs_args.max_lines, xargs_input)
|
269 | else:
|
270 | if xargs_args.delimiter:
|
271 | arg_iter = argsplit_delim(xargs_args.delimiter, xargs_input)
|
272 | else:
|
273 | arg_iter = argsplit_ws(xargs_input)
|
274 | # if exit is True, max_chars is checked later
|
275 | arggroup_iter = group_args(
|
276 | xargs_args.max_chars if not xargs_args.exit else None,
|
277 | xargs_args.max_args,
|
278 | arg_iter
|
279 | )
|
280 |
|
281 | arggroup_iter = PeekableIterator(arggroup_iter)
|
282 | if xargs_args.no_run_if_empty:
|
283 | try:
|
284 | x = arggroup_iter.peek()
|
285 | # TODO not even sure how the interaction with -I is supposed to work
|
286 | # echo | xargs -I {} echo {} : dont run
|
287 | # echo | xargs -I {} echo {} "x" : dont run
|
288 | # echo | xargs -I {} echo "x" : dont run
|
289 | # echo x | xargs -I {} echo : run
|
290 | # echo xx | xargs -I {} -d 'x' echo {} : run 3 times ('', '', '\n')
|
291 |
|
292 | # if not x or not x[0]:
|
293 | if not x:
|
294 | return 0
|
295 | except StopIteration:
|
296 | return 0
|
297 | else:
|
298 | try:
|
299 | arggroup_iter.peek()
|
300 | except StopIteration:
|
301 | arggroup_iter = [[]]
|
302 |
|
303 | # phase 3: build command-lines
|
304 | if xargs_args.replace_str:
|
305 | cmdline_iter = build_cmdlines_replace(
|
306 | xargs_args.command,
|
307 | xargs_args.initial_arguments,
|
308 | xargs_args.replace_str,
|
309 | arggroup_iter
|
310 | )
|
311 | else:
|
312 | cmdline_iter = build_cmdlines(
|
313 | xargs_args.command,
|
314 | xargs_args.initial_arguments,
|
315 | arggroup_iter
|
316 | )
|
317 |
|
318 | if xargs_args.max_chars is not None and xargs_args.exit:
|
319 | cmdline_iter = check_items(
|
320 | lambda c: str_memsize(*c) < xargs_args.max_chars,
|
321 | lambda: sys.exit(1),
|
322 | cmdline_iter
|
323 | )
|
324 |
|
325 | if xargs_args.interactive:
|
326 | cmdline_iter = prompt_user(cmdline_iter)
|
327 | elif xargs_args.verbose:
|
328 | cmdline_iter = tee_cmdline(cmdline_iter)
|
329 |
|
330 | # phase 4: execute command-lines
|
331 | if xargs_args.max_procs > 1:
|
332 | ps = [None] * xargs_args.max_procs
|
333 | environ = os.environ.copy()
|
334 | for cmdline in cmdline_iter:
|
335 | i = wait_open_slot(ps)
|
336 | if ps[i] is not None and ps[i].returncode:
|
337 | break
|
338 | if xargs_args.process_slot_var:
|
339 | environ[xargs_args.process_slot_var] = str(i)
|
340 | ps[i] = subprocess.Popen(cmdline, stdin=cmd_input, env=environ)
|
341 | return max(map_errcode(p.wait()) for p in ps if p is not None)
|
342 | else:
|
343 | for cmdline in cmdline_iter:
|
344 | p = subprocess.Popen(cmdline, stdin=cmd_input)
|
345 | if p.wait():
|
346 | return map_errcode(p.returncode)
|
347 | return 0
|
348 |
|
349 | if __name__ == "__main__":
|
350 | xargs_args = xargs.parse_args()
|
351 |
|
352 | if xargs_args.delimiter:
|
353 | xargs_args.delimiter = xargs_args.delimiter.decode('string_escape')
|
354 | if len(xargs_args.delimiter) > 1:
|
355 | # TODO error
|
356 | sys.exit(1)
|
357 | if xargs_args.max_chars and not xargs_args.replace_str:
|
358 | base = str_memsize(xargs_args.command, *xargs_args.initial_arguments)
|
359 | if base > xargs_args.max_chars:
|
360 | # TODO error
|
361 | sys.exit(1)
|
362 | xargs_args.max_chars -= base
|
363 |
|
364 | # TODO warnings when appropriate
|
365 | # -d disables -e
|
366 | if xargs_args.delimiter and xargs_args.eof_str:
|
367 | xargs_args.eof_str = None
|
368 | # -I implies -L 1 (and transitively -x)
|
369 | if xargs_args.replace_str and xargs_args.max_lines != 1:
|
370 | xargs_args.max_lines = 1
|
371 | # -I implies -d '\n'
|
372 | if xargs_args.replace_str and xargs_args.delimiter != '\n':
|
373 | xargs_args.delimiter = '\n'
|
374 | # -L implies -x
|
375 | if xargs_args.max_lines is not None and not xargs_args.exit:
|
376 | xargs_args.exit = True
|
377 | # -p implies -t
|
378 | if xargs_args.interactive and not xargs_args.verbose:
|
379 | xargs_args.verbose = True
|
380 |
|
381 | # (undocumented)
|
382 | # if -d then -L equals -n
|
383 | if xargs_args.delimiter and xargs_args.max_lines:
|
384 | xargs_args.max_args = xargs_args.max_lines
|
385 | xargs_args.max_lines = None
|
386 | # TODO? -I implies -r
|
387 | if xargs_args.replace_str and not xargs_args.no_run_if_empty:
|
388 | xargs_args.no_run_if_empty = True
|
389 |
|
390 | sys.exit(main(xargs_args))
|