| 1 | #!/usr/bin/env python2
 | 
| 2 | # Copyright 2019 Wilke Schwiedop. All rights reserved.
 | 
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 | 
| 4 | # you may not use this file except in compliance with the License.
 | 
| 5 | # You may obtain a copy of the License at
 | 
| 6 | #
 | 
| 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 | 
| 8 | 
 | 
| 9 | from __future__ import print_function
 | 
| 10 | 
 | 
| 11 | import argparse
 | 
| 12 | import collections
 | 
| 13 | import itertools
 | 
| 14 | import os
 | 
| 15 | # TODO docs.python.org suggests https://pypi.org/project/subprocess32/
 | 
| 16 | #      for POSIX users
 | 
| 17 | import shlex
 | 
| 18 | import subprocess
 | 
| 19 | import sys
 | 
| 20 | 
 | 
| 21 | class GNUXargsQuirks(argparse.Action):
 | 
| 22 | 	def __init__(self, option_strings, dest, **kwargs):
 | 
| 23 | 		super(GNUXargsQuirks, self).__init__(option_strings, dest, **kwargs)
 | 
| 24 | 	def __call__(self, parser, namespace, values, option_string=None):
 | 
| 25 | 		setattr(namespace, self.dest, values)
 | 
| 26 | 		if self.dest == 'replace_str':
 | 
| 27 | 			namespace.max_args = None
 | 
| 28 | 			namespace.max_lines = None
 | 
| 29 | 		elif self.dest == 'max_lines':
 | 
| 30 | 			namespace.max_args = None
 | 
| 31 | 			namespace.replace_str = None
 | 
| 32 | 		elif self.dest == 'max_args':
 | 
| 33 | 			namespace.max_lines = None
 | 
| 34 | 			if namespace.max_args == 1 and namespace.replace_str:
 | 
| 35 | 				namespace.max_args = None
 | 
| 36 | 			else:
 | 
| 37 | 				namespace.replace_str = None
 | 
| 38 | 		elif self.dest == 'max_chars':
 | 
| 39 | 			pass
 | 
| 40 | 		else:
 | 
| 41 | 			assert False, "dest '%s' not handled" % self.dest
 | 
| 42 | 
 | 
| 43 | xargs = argparse.ArgumentParser(prog='xargs')
 | 
| 44 | xargs.add_argument('-a', '--arg-file', metavar='file', nargs=1, default='-', help='read arguments from FILE, not standard input')
 | 
| 45 | xargs.add_argument('-E', metavar='eof-str', dest='eof_str', help='set logical EOF string; if END occurs as a line of input, the rest of the input is ignored (ignored if -0 or -d was specified)')
 | 
| 46 | xargs.add_argument('-e', '--eof', metavar='eof-str', nargs='?', dest='eof_str', help='equivalent to -E END if END is specified; otherwise, there is no end-of-file string')
 | 
| 47 | xargs.add_argument('-0', '--null', dest='delimiter', action='store_const', const='\0', help='items are separated by a null, not whitespace; disables quote and backslash processing and logical EOF processing')
 | 
| 48 | xargs.add_argument('-d', '--delimiter', metavar='delimiter', dest='delimiter', help='items in input stream are separated by CHARACTER, not by whitespace; disables quote and backslash processing and logical EOF processing')
 | 
| 49 | xargs.add_argument('-I', metavar='replace-str', dest='replace_str', action=GNUXargsQuirks, help='same as --replace=R')
 | 
| 50 | xargs.add_argument('-i', '--replace', metavar='replace-str', nargs='?', const='{}', dest='replace_str', action=GNUXargsQuirks, help='replace R in INITIAL-ARGS with names read from standard input; if R is unspecified, assume {}')
 | 
| 51 | xargs.add_argument('-L', metavar='max-lines', dest='max_lines', type=int, action=GNUXargsQuirks, help='use at most MAX-LINES non-blank input lines per command line')
 | 
| 52 | xargs.add_argument('-l', '--max-lines', metavar='max-lines', nargs='?', const=1, dest='max_lines', type=int, action=GNUXargsQuirks, help='similar to -L but defaults to at most one non-blank input line if MAX-LINES is not specified')
 | 
| 53 | xargs.add_argument('-n', '--max-args', metavar='max-args', dest='max_args', type=int, action=GNUXargsQuirks, help='use at most MAX-ARGS arguments per command line')
 | 
| 54 | xargs.add_argument('-s', '--max-chars', metavar='max-chars', dest='max_chars', type=int, action=GNUXargsQuirks, help='limit length of command line to MAX-CHARS')
 | 
| 55 | xargs.add_argument('-P', '--max-procs', metavar='max-procs', default=1, dest='max_procs', type=int, help='run at most MAX-PROCS processes at a time')
 | 
| 56 | xargs.add_argument('--process-slot-var', metavar='name', help='set environment variable VAR in child processes')
 | 
| 57 | xargs.add_argument('-p', '--interactive', action='store_true', help='prompt before running commands')
 | 
| 58 | xargs.add_argument('-t', '--verbose', action='store_true', help='print commands before executing them')
 | 
| 59 | xargs.add_argument('-x', '--exit', action='store_true', help='exit if the size (see -s) is exceeded')
 | 
| 60 | xargs.add_argument('-r', '--no-run-if-empty', action='store_true', help='if there are no arguments, then do not run COMMAND; if this option is not given, COMMAND will be run at least once')
 | 
| 61 | xargs.add_argument('--show-limits', action='store_true', help='show limits on command-line length')
 | 
| 62 | xargs.add_argument('--version', action='version', version='%(prog)s 0.0.1', help='output version information and exit')
 | 
| 63 | xargs.add_argument('command', nargs='?', default='echo')
 | 
| 64 | xargs.add_argument('initial_arguments', nargs=argparse.REMAINDER)
 | 
| 65 | 
 | 
| 66 | class PeekableIterator():
 | 
| 67 | 	def __init__(self, iterator):
 | 
| 68 | 		self.iterator = iterator
 | 
| 69 | 		self.peeked = False
 | 
| 70 | 		self.item = None
 | 
| 71 | 	def peek(self):
 | 
| 72 | 		"""
 | 
| 73 | 		Return the next item but does not advance the iterator further.
 | 
| 74 | 		Raise StopIteration if there is no such item.
 | 
| 75 | 		"""
 | 
| 76 | 		if not self.peeked:
 | 
| 77 | 			self.item = next(self.iterator)
 | 
| 78 | 			self.peeked = True
 | 
| 79 | 		return self.item
 | 
| 80 | 	def next(self):
 | 
| 81 | 		"""
 | 
| 82 | 		Return the next item and advance the iterator.
 | 
| 83 | 		Raise StopIteration if there is no such item.
 | 
| 84 | 		"""
 | 
| 85 | 		if self.peeked:
 | 
| 86 | 			self.peeked = False
 | 
| 87 | 			return self.item
 | 
| 88 | 		return next(self.iterator)
 | 
| 89 | 	def __iter__(self):
 | 
| 90 | 		return self
 | 
| 91 | 
 | 
| 92 | def read_lines_eof(eof_str, input):
 | 
| 93 | 	# type (str, Iterable[str]) -> Iterable[str]
 | 
| 94 | 	"""Read lines from input until a line equals eof_str or EOF is reached"""
 | 
| 95 | 	return iter(input.next, eof_str + '\n')
 | 
| 96 | 
 | 
| 97 | def str_memsize(*strings):
 | 
| 98 | 	# type: (*str) -> int
 | 
| 99 | 	"""Calculate the amount of memory required to store the strings in an argv."""
 | 
| 100 | 	return sum(len(s) + 1 for s in strings)
 | 
| 101 | 
 | 
| 102 | def is_complete_line(line):
 | 
| 103 | 	# type: (str) -> bool
 | 
| 104 | 	return len(line) > 1 and line[-2] not in (' ', '\t')
 | 
| 105 | 
 | 
| 106 | def argsplit_ws(lines):
 | 
| 107 | 	# type: (Iterable[str]) -> Iterator[str]
 | 
| 108 | 	"""Split lines into arguments and append metainfo to each argument."""
 | 
| 109 | 	for line in lines:
 | 
| 110 | 		# TODO this might require some more testing
 | 
| 111 | 		for arg in shlex.split(line):
 | 
| 112 | 			yield arg
 | 
| 113 | 
 | 
| 114 | def argsplit_delim(delim, lines):
 | 
| 115 | 	# type: (str, Iterable[str]) -> Iterator[str]
 | 
| 116 | 	"""Split lines into arguments and append metainfo to each argument."""
 | 
| 117 | 	buf = []
 | 
| 118 | 	for c in itertools.chain.from_iterable(lines):
 | 
| 119 | 		if c == delim:
 | 
| 120 | 			yield "".join(buf)
 | 
| 121 | 			buf = []
 | 
| 122 | 		else:
 | 
| 123 | 			buf.append(c)
 | 
| 124 | 	if buf:
 | 
| 125 | 		yield "".join(buf)
 | 
| 126 | 
 | 
| 127 | def read_n_xargs_lines(linec, line_iter):
 | 
| 128 | 	# type: (int, Iterator[str]) -> Iterator[str]
 | 
| 129 | 	while linec > 0:
 | 
| 130 | 		line = next(line_iter)
 | 
| 131 | 		yield line
 | 
| 132 | 		if is_complete_line(line):
 | 
| 133 | 			linec -= 1
 | 
| 134 | 
 | 
| 135 | def take_chars(charc, iterator):
 | 
| 136 | 	# type: (int, Iterator[str]) -> Iterator[str]
 | 
| 137 | 	charc -= str_memsize(iterator.peek())
 | 
| 138 | 	while charc >= 0:
 | 
| 139 | 		yield next(iterator)
 | 
| 140 | 		charc -= str_memsize(iterator.peek())
 | 
| 141 | 
 | 
| 142 | def take(n, iterator):
 | 
| 143 | 	# type: (int, Iterator[Any]) -> Iterator[Any]
 | 
| 144 | 	for _ in range(n):
 | 
| 145 | 		yield next(iterator)
 | 
| 146 | 
 | 
| 147 | def group_args_lines(max_lines, input):
 | 
| 148 | 	# type: (int, Iterator[str]) -> Iterator[List[str]]
 | 
| 149 | 	while True:
 | 
| 150 | 		it = argsplit_ws(read_n_xargs_lines(max_lines, input))
 | 
| 151 | 		buf = [next(it)] # raise StopIteration if iterator is empty
 | 
| 152 | 		buf.extend(it)
 | 
| 153 | 		yield buf
 | 
| 154 | 
 | 
| 155 | def group_args(max_chars, max_args, arg_iter):
 | 
| 156 | 	# type: (Optional[int], Optional[int], Iterator[str]) -> Iterator[List[str]]
 | 
| 157 | 	arg_iter = PeekableIterator(arg_iter)
 | 
| 158 | 	while arg_iter.peek() or True: # raise StopIteration if iterator is empty
 | 
| 159 | 		it = arg_iter
 | 
| 160 | 		if max_chars:
 | 
| 161 | 			it = take_chars(max_chars, it)
 | 
| 162 | 		if max_args:
 | 
| 163 | 			it = take(max_args, it)
 | 
| 164 | 		yield list(it)
 | 
| 165 | 
 | 
| 166 | def replace_args(initial_arguments, replace_str, additional_arguments):
 | 
| 167 | 	# type: (Sequence[str], str, Iterable[str]) -> Iterator[str]
 | 
| 168 | 	additional_arguments = list(additional_arguments)
 | 
| 169 | 	for arg in initial_arguments:
 | 
| 170 | 		if arg == replace_str:
 | 
| 171 | 			for x in additional_arguments:
 | 
| 172 | 				yield x
 | 
| 173 | 		else:
 | 
| 174 | 			yield arg
 | 
| 175 | 
 | 
| 176 | def build_cmdlines_replace(command, initial_arguments, replace_str, arggroup_iter):
 | 
| 177 | 	# type: (str, Sequence[str], str, Iterator[Iterator[str]]) -> Iterator[List[str]]
 | 
| 178 | 	"""
 | 
| 179 | 	Build command-lines suitable for subprocess.Popen,
 | 
| 180 | 	replacing instances of replace_str in initial_arguments.
 | 
| 181 | 	"""
 | 
| 182 | 	cmdline = [command]
 | 
| 183 | 	for additional_arguments in arggroup_iter:
 | 
| 184 | 		cmdline.extend(
 | 
| 185 | 			replace_args(
 | 
| 186 | 				initial_arguments,
 | 
| 187 | 				replace_str,
 | 
| 188 | 				additional_arguments
 | 
| 189 | 			)
 | 
| 190 | 		)
 | 
| 191 | 		yield cmdline
 | 
| 192 | 		cmdline = cmdline[:1]
 | 
| 193 | 
 | 
| 194 | def build_cmdlines(command, initial_arguments, arggroup_iter):
 | 
| 195 | 	# type: (str, Sequence[str], Iterator[Iterator[str]]) -> Iterator[List[str]]
 | 
| 196 | 	"""Build command-lines suitable for subprocess.Popen."""
 | 
| 197 | 	cmdline = [command]
 | 
| 198 | 	cmdline.extend(initial_arguments)
 | 
| 199 | 	for additional_arguments in arggroup_iter:
 | 
| 200 | 		cmdline.extend(additional_arguments)
 | 
| 201 | 		yield cmdline
 | 
| 202 | 		cmdline = cmdline[:1+len(initial_arguments)]
 | 
| 203 | 
 | 
| 204 | def check_items(p, on_false, cmdline_iter):
 | 
| 205 | 	for cmdline in cmdline_iter:
 | 
| 206 | 		if p(cmdline):
 | 
| 207 | 			yield cmdline
 | 
| 208 | 		else:
 | 
| 209 | 			on_false()
 | 
| 210 | 
 | 
| 211 | def tee_cmdline(cmdline_iter):
 | 
| 212 | 	# type: (Iterator[List[str]]) -> Iterator[List[str]]
 | 
| 213 | 	"""Go over each cmdline and print them to stderr."""
 | 
| 214 | 	for cmdline in cmdline_iter:
 | 
| 215 | 		print(*cmdline, file=sys.stderr)
 | 
| 216 | 		yield cmdline
 | 
| 217 | 
 | 
| 218 | def prompt_user(cmdline_iter):
 | 
| 219 | 	# type: (Iterator[List[str]]) -> Iterator[List[str]]
 | 
| 220 | 	"""Prompt the user for each cmdline."""
 | 
| 221 | 	with open("/dev/tty", 'r') as tty:
 | 
| 222 | 		for cmdline in cmdline_iter:
 | 
| 223 | 			print(*cmdline, end=' ?...', file=sys.stderr)
 | 
| 224 | 			response = tty.readline()
 | 
| 225 | 			if response[0] not in ('y', 'Y'):
 | 
| 226 | 				continue
 | 
| 227 | 			yield cmdline
 | 
| 228 | 
 | 
| 229 | def wait_open_slot(processes):
 | 
| 230 | 	# type: (List[Optional[Any]])-> int
 | 
| 231 | 	while processes:
 | 
| 232 | 		for i, p in enumerate(processes):
 | 
| 233 | 			# process doesn't yet exist or has finished
 | 
| 234 | 			if p is None or p.poll() is not None:
 | 
| 235 | 				return i
 | 
| 236 | 		_pid, _err = os.wait()
 | 
| 237 | 
 | 
| 238 | def map_errcode(rc):
 | 
| 239 | 	# type: int -> int
 | 
| 240 | 	"""map the returncode of a child-process to the returncode of the main process."""
 | 
| 241 | 	if rc == 0:
 | 
| 242 | 		return 0
 | 
| 243 | 	if rc >= 0 and rc <= 125:
 | 
| 244 | 		return 123
 | 
| 245 | 	if rc == 255:
 | 
| 246 | 		return 124
 | 
| 247 | 	if rc < 0:
 | 
| 248 | 		return 125
 | 
| 249 | 	return 1
 | 
| 250 | 
 | 
| 251 | def main(xargs_args):
 | 
| 252 | 	# phase 1: read input
 | 
| 253 | 	if xargs_args.arg_file == '-':
 | 
| 254 | 		xargs_input = sys.stdin
 | 
| 255 | 		cmd_input = open(os.devnull, 'r')
 | 
| 256 | 	else:
 | 
| 257 | 		xargs_input = xargs_args.arg_file
 | 
| 258 | 		cmd_input = sys.stdin
 | 
| 259 | 	
 | 
| 260 | 	if xargs_args.eof_str:
 | 
| 261 | 		xargs_input = read_lines_eof(xargs_args.eof_str, xargs_input)
 | 
| 262 | 
 | 
| 263 | 	# phase 2: parse and group args
 | 
| 264 | 	if xargs_args.max_lines:
 | 
| 265 | 		assert not xargs_args.max_args
 | 
| 266 | 		assert not xargs_args.delimiter
 | 
| 267 | 		assert xargs_args.exit
 | 
| 268 | 		arggroup_iter = group_args_lines(xargs_args.max_lines, xargs_input)
 | 
| 269 | 	else:
 | 
| 270 | 		if xargs_args.delimiter:
 | 
| 271 | 			arg_iter = argsplit_delim(xargs_args.delimiter, xargs_input)
 | 
| 272 | 		else:
 | 
| 273 | 			arg_iter = argsplit_ws(xargs_input)
 | 
| 274 | 		# if exit is True, max_chars is checked later
 | 
| 275 | 		arggroup_iter = group_args(
 | 
| 276 | 			xargs_args.max_chars if not xargs_args.exit else None,
 | 
| 277 | 			xargs_args.max_args,
 | 
| 278 | 			arg_iter
 | 
| 279 | 		)
 | 
| 280 | 
 | 
| 281 | 	arggroup_iter = PeekableIterator(arggroup_iter)
 | 
| 282 | 	if xargs_args.no_run_if_empty:
 | 
| 283 | 		try:
 | 
| 284 | 			x = arggroup_iter.peek()
 | 
| 285 | 			# TODO not even sure how the interaction with -I is supposed to work
 | 
| 286 | 			# echo   | xargs -I {} echo {}		: dont run
 | 
| 287 | 			# echo   | xargs -I {} echo {} "x"	: dont run
 | 
| 288 | 			# echo   | xargs -I {} echo    "x"	: dont run
 | 
| 289 | 			# echo x | xargs -I {} echo 		: run
 | 
| 290 | 			# echo xx | xargs -I {} -d 'x' echo {}	: run 3 times ('', '', '\n')
 | 
| 291 | 
 | 
| 292 | #			if not x or not x[0]:
 | 
| 293 | 			if not x:
 | 
| 294 | 				return 0
 | 
| 295 | 		except StopIteration:
 | 
| 296 | 			return 0
 | 
| 297 | 	else:
 | 
| 298 | 		try:
 | 
| 299 | 			arggroup_iter.peek()
 | 
| 300 | 		except StopIteration:
 | 
| 301 | 			arggroup_iter = [[]]
 | 
| 302 | 
 | 
| 303 | 	# phase 3: build command-lines
 | 
| 304 | 	if xargs_args.replace_str:
 | 
| 305 | 		cmdline_iter = build_cmdlines_replace(
 | 
| 306 | 			xargs_args.command,
 | 
| 307 | 			xargs_args.initial_arguments,
 | 
| 308 | 			xargs_args.replace_str,
 | 
| 309 | 			arggroup_iter
 | 
| 310 | 		)
 | 
| 311 | 	else:
 | 
| 312 | 		cmdline_iter = build_cmdlines(
 | 
| 313 | 			xargs_args.command,
 | 
| 314 | 			xargs_args.initial_arguments,
 | 
| 315 | 			arggroup_iter
 | 
| 316 | 		)
 | 
| 317 | 
 | 
| 318 | 	if xargs_args.max_chars is not None and xargs_args.exit:
 | 
| 319 | 		cmdline_iter = check_items(
 | 
| 320 | 			lambda c: str_memsize(*c) < xargs_args.max_chars,
 | 
| 321 | 			lambda: sys.exit(1),
 | 
| 322 | 			cmdline_iter
 | 
| 323 | 		)
 | 
| 324 | 
 | 
| 325 | 	if xargs_args.interactive:
 | 
| 326 | 		cmdline_iter = prompt_user(cmdline_iter)
 | 
| 327 | 	elif xargs_args.verbose:
 | 
| 328 | 		cmdline_iter = tee_cmdline(cmdline_iter)
 | 
| 329 | 
 | 
| 330 | 	# phase 4: execute command-lines
 | 
| 331 | 	if xargs_args.max_procs > 1:
 | 
| 332 | 		ps = [None] * xargs_args.max_procs
 | 
| 333 | 		environ = os.environ.copy()
 | 
| 334 | 		for cmdline in cmdline_iter:
 | 
| 335 | 			i = wait_open_slot(ps)
 | 
| 336 | 			if ps[i] is not None and ps[i].returncode:
 | 
| 337 | 				break
 | 
| 338 | 			if xargs_args.process_slot_var:
 | 
| 339 | 				environ[xargs_args.process_slot_var] = str(i)
 | 
| 340 | 			ps[i] = subprocess.Popen(cmdline, stdin=cmd_input, env=environ)
 | 
| 341 | 		return max(map_errcode(p.wait()) for p in ps if p is not None)
 | 
| 342 | 	else:
 | 
| 343 | 		for cmdline in cmdline_iter:
 | 
| 344 | 			p = subprocess.Popen(cmdline, stdin=cmd_input)
 | 
| 345 | 			if p.wait():
 | 
| 346 | 				return map_errcode(p.returncode)
 | 
| 347 | 	return 0
 | 
| 348 | 
 | 
| 349 | if __name__ == "__main__":
 | 
| 350 | 	xargs_args = xargs.parse_args()
 | 
| 351 | 
 | 
| 352 | 	if xargs_args.delimiter:
 | 
| 353 | 		xargs_args.delimiter = xargs_args.delimiter.decode('string_escape')
 | 
| 354 | 		if len(xargs_args.delimiter) > 1:
 | 
| 355 | 			# TODO error
 | 
| 356 | 			sys.exit(1)
 | 
| 357 | 	if xargs_args.max_chars and not xargs_args.replace_str:
 | 
| 358 | 		base = str_memsize(xargs_args.command, *xargs_args.initial_arguments)
 | 
| 359 | 		if base > xargs_args.max_chars:
 | 
| 360 | 			# TODO error
 | 
| 361 | 			sys.exit(1)
 | 
| 362 | 		xargs_args.max_chars -= base
 | 
| 363 | 
 | 
| 364 | 	# TODO warnings when appropriate
 | 
| 365 | 	# -d disables -e
 | 
| 366 | 	if xargs_args.delimiter and xargs_args.eof_str:
 | 
| 367 | 		xargs_args.eof_str = None
 | 
| 368 | 	# -I implies -L 1 (and transitively -x)
 | 
| 369 | 	if xargs_args.replace_str and xargs_args.max_lines != 1:
 | 
| 370 | 		xargs_args.max_lines = 1
 | 
| 371 | 	# -I implies -d '\n'
 | 
| 372 | 	if xargs_args.replace_str and xargs_args.delimiter != '\n':
 | 
| 373 | 		xargs_args.delimiter = '\n'
 | 
| 374 | 	# -L implies -x
 | 
| 375 | 	if xargs_args.max_lines is not None and not xargs_args.exit:
 | 
| 376 | 		xargs_args.exit = True
 | 
| 377 | 	# -p implies -t
 | 
| 378 | 	if xargs_args.interactive and not xargs_args.verbose:
 | 
| 379 | 		xargs_args.verbose = True
 | 
| 380 | 
 | 
| 381 | 	# (undocumented)
 | 
| 382 | 	# if -d then -L equals -n
 | 
| 383 | 	if xargs_args.delimiter and xargs_args.max_lines:
 | 
| 384 | 		xargs_args.max_args = xargs_args.max_lines
 | 
| 385 | 		xargs_args.max_lines = None
 | 
| 386 | 	# TODO? -I implies -r
 | 
| 387 | 	if xargs_args.replace_str and not xargs_args.no_run_if_empty:
 | 
| 388 | 		xargs_args.no_run_if_empty = True
 | 
| 389 | 
 | 
| 390 | 	sys.exit(main(xargs_args))
 |