builtin/read

OILS / builtin / read_osh.py View on Github | oilshell.org

521 lines, 305 significant

1	from __future__ import print_function
2
3	from errno import EINTR
4
5	from _devbuild.gen import arg_types
6	from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
7	from _devbuild.gen.syntax_asdl import source, loc_t
8	from _devbuild.gen.value_asdl import value, LeftName
9	from core import alloc
10	from core import error
11	from core.error import e_die
12	from core import pyos
13	from core import pyutil
14	from core import state
15	from core import ui
16	from core import vm
17	from frontend import flag_util
18	from frontend import reader
19	from frontend import typed_args
20	from mycpp import mops
21	from mycpp import mylib
22	from mycpp.mylib import log, STDIN_FILENO
23
24	import posix_ as posix
25
26	from typing import Tuple, List, Any, TYPE_CHECKING
27	if TYPE_CHECKING:
28	from _devbuild.gen.runtime_asdl import span_t
29	from frontend.parse_lib import ParseContext
30	from frontend import args
31	from osh.cmd_eval import CommandEvaluator
32	from osh.split import SplitContext
33
34	_ = log
35
36	# The read builtin splits using IFS.
37	#
38	# Summary:
39	# - Split with IFS, except \ can escape them! This is different than the
40	# algorithm for splitting words (at least the way I've represented it.)
41
42	# Bash manual:
43	# - If there are more words than names, the remaining words and their
44	# intervening delimiters are assigned to the last name.
45	# - If there are fewer words read from the input stream than names, the
46	# remaining names are assigned empty values.
47	# - The characters in the value of the IFS variable are used to split the line
48	# into words using the same rules the shell uses for expansion (described
49	# above in Word Splitting).
50	# - The backslash character '\' may be used to remove any special meaning for
51	# the next character read and for line continuation.
52
53
54	def _AppendParts(
55	s, # type: str
56	spans, # type: List[Tuple[span_t, int]]
57	max_results, # type: int
58	join_next, # type: bool
59	parts, # type: List[mylib.BufWriter]
60	):
61	# type: (...) -> Tuple[bool, bool]
62	"""Append to 'parts', for the 'read' builtin.
63
64	Similar to _SpansToParts in osh/split.py
65
66	Args:
67	s: The original string
68	spans: List of (span, end_index)
69	max_results: the maximum number of parts we want
70	join_next: Whether to join the next span to the previous part. This
71	happens in two cases:
72	- when we have '\ '
73	- and when we have more spans # than max_results.
74	"""
75	start_index = 0
76	# If the last span was black, and we get a backslash, set join_next to merge
77	# two black spans.
78	last_span_was_black = False
79
80	for span_type, end_index in spans:
81	if span_type == span_e.Black:
82	if join_next and len(parts):
83	parts[-1].write(s[start_index:end_index])
84	join_next = False
85	else:
86	buf = mylib.BufWriter()
87	buf.write(s[start_index:end_index])
88	parts.append(buf)
89	last_span_was_black = True
90
91	elif span_type == span_e.Delim:
92	if join_next:
93	parts[-1].write(s[start_index:end_index])
94	join_next = False
95	last_span_was_black = False
96
97	elif span_type == span_e.Backslash:
98	if last_span_was_black:
99	join_next = True
100	last_span_was_black = False
101
102	if max_results and len(parts) >= max_results:
103	join_next = True
104
105	start_index = end_index
106
107	done = True
108	if len(spans):
109	#log('%s %s', s, spans)
110	#log('%s', spans[-1])
111	last_span_type, _ = spans[-1]
112	if last_span_type == span_e.Backslash:
113	done = False
114
115	#log('PARTS %s', parts)
116	return done, join_next
117
118
119	#
120	# Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
121	# _ReadPortion, and ReadLineSlowly
122	#
123
124
125	def _ReadN(num_bytes, cmd_ev):
126	# type: (int, CommandEvaluator) -> str
127	chunks = [] # type: List[str]
128	bytes_left = num_bytes
129	while bytes_left > 0:
130	n, err_num = pyos.Read(STDIN_FILENO, bytes_left,
131	chunks) # read up to n bytes
132
133	if n < 0:
134	if err_num == EINTR:
135	cmd_ev.RunPendingTraps()
136	# retry after running traps
137	else:
138	raise pyos.ReadError(err_num)
139
140	elif n == 0: # EOF
141	break
142
143	else:
144	bytes_left -= n
145
146	return ''.join(chunks)
147
148
149	def _ReadPortion(delim_byte, max_chars, cmd_ev):
150	# type: (int, int, CommandEvaluator) -> Tuple[str, bool]
151	"""Read a portion of stdin.
152
153	Reads until delimiter or max_chars, which ever comes first. Will ignore
154	max_chars if it's set to -1.
155
156	The delimiter is not included in the result.
157	"""
158	eof = False
159	ch_array = [] # type: List[int]
160	bytes_read = 0
161	while True:
162	if max_chars >= 0 and bytes_read >= max_chars:
163	break
164
165	ch, err_num = pyos.ReadByte(0)
166	if ch < 0:
167	if err_num == EINTR:
168	cmd_ev.RunPendingTraps()
169	# retry after running traps
170	else:
171	raise pyos.ReadError(err_num)
172
173	elif ch == pyos.EOF_SENTINEL:
174	eof = True
175	break
176
177	elif ch == delim_byte:
178	break
179
180	else:
181	ch_array.append(ch)
182
183	bytes_read += 1
184
185	return pyutil.ChArrayToString(ch_array), eof
186
187
188	def ReadLineSlowly(cmd_ev, with_eol=True):
189	# type: (CommandEvaluator, bool) -> Tuple[str, bool]
190	"""Read a line from stdin, unbuffered
191
192	sys.stdin.readline() in Python has its own buffering which is incompatible
193	with shell semantics. dash, mksh, and zsh all read a single byte at a time
194	with read(0, 1).
195	"""
196	eof = False
197	ch_array = [] # type: List[int]
198	while True:
199	ch, err_num = pyos.ReadByte(0)
200
201	if ch < 0:
202	if err_num == EINTR:
203	cmd_ev.RunPendingTraps()
204	# retry after running traps
205	else:
206	raise pyos.ReadError(err_num)
207
208	elif ch == pyos.EOF_SENTINEL:
209	eof = True
210	break
211
212	else:
213	ch_array.append(ch)
214
215	if ch == pyos.NEWLINE_CH:
216	if not with_eol:
217	ch_array.pop()
218	break
219
220	return pyutil.ChArrayToString(ch_array), eof
221
222
223	def ReadAll():
224	# type: () -> str
225	"""Read all of stdin.
226
227	Similar to command sub in core/executor.py.
228	"""
229	chunks = [] # type: List[str]
230	while True:
231	n, err_num = pyos.Read(0, 4096, chunks)
232
233	if n < 0:
234	if err_num == EINTR:
235	# Retry only. Like read --line (and command sub), read --all
236	# doesn't run traps. It would be a bit weird to run every 4096
237	# bytes.
238	pass
239	else:
240	raise pyos.ReadError(err_num)
241
242	elif n == 0: # EOF
243	break
244
245	return ''.join(chunks)
246
247
248	class ctx_TermAttrs(object):
249
250	def __init__(self, fd, local_modes):
251	# type: (int, int) -> None
252	self.fd = fd
253
254	# We change term_attrs[3] in Python, which is lflag "local modes"
255	orig_local_modes, term_attrs = pyos.PushTermAttrs(fd, local_modes)
256
257	# Workaround: destructured assignment into members doesn't work
258	self.orig_local_modes = orig_local_modes
259	self.term_attrs = term_attrs
260
261	def __enter__(self):
262	# type: () -> None
263	pass
264
265	def __exit__(self, type, value, traceback):
266	# type: (Any, Any, Any) -> None
267	pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
268
269
270	class Read(vm._Builtin):
271
272	def __init__(
273	self,
274	splitter, # type: SplitContext
275	mem, # type: state.Mem
276	parse_ctx, # type: ParseContext
277	cmd_ev, # type: CommandEvaluator
278	errfmt, # type: ui.ErrorFormatter
279	):
280	# type: (...) -> None
281	self.splitter = splitter
282	self.mem = mem
283	self.parse_ctx = parse_ctx
284	self.cmd_ev = cmd_ev
285	self.errfmt = errfmt
286	self.stdin_ = mylib.Stdin()
287
288	# Was --qsn, might be restored as --j8-word or --j8-line
289	if 0:
290	#from data_lang import qsn_native
291	def _MaybeDecodeLine(self, line):
292	# type: (str) -> str
293	"""Raises error.Parse if line isn't valid."""
294
295	# Lines that don't start with a single quote aren't QSN. They may
296	# contain a single quote internally, like:
297	#
298	# Fool's Gold
299	if not line.startswith("'"):
300	return line
301
302	arena = self.parse_ctx.arena
303	line_reader = reader.StringLineReader(line, arena)
304	lexer = self.parse_ctx.MakeLexer(line_reader)
305
306	# The parser only yields valid tokens:
307	# Char_OneChar, Char_Hex, Char_UBraced
308	# So we can use word_compile.EvalCStringToken, which is also used for
309	# $''.
310	# Important: we don't generate Id.Unknown_Backslash because that is valid
311	# in echo -e. We just make it Id.Unknown_Tok?
312
313	# TODO: read location info should know about stdin, and redirects, and
314	# pipelines?
315	with alloc.ctx_SourceCode(arena, source.Stdin('')):
316	#tokens = qsn_native.Parse(lexer)
317	pass
318	#tmp = [word_compile.EvalCStringToken(t) for t in tokens]
319	#return ''.join(tmp)
320	return ''
321
322	def Run(self, cmd_val):
323	# type: (cmd_value.Argv) -> int
324	try:
325	status = self._Run(cmd_val)
326	except pyos.ReadError as e: # different paths for read -d, etc.
327	# don't quote code since YSH errexit will likely quote
328	self.errfmt.PrintMessage("Oils read error: %s" %
329	posix.strerror(e.err_num))
330	status = 1
331	except (IOError, OSError) as e: # different paths for read -d, etc.
332	self.errfmt.PrintMessage("Oils read I/O error: %s" %
333	pyutil.strerror(e))
334	status = 1
335	return status
336
337	def _ReadYsh(self, arg, arg_r, cmd_val):
338	# type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
339	"""
340	Usage:
341
342	read --all # sets _reply
343	read --all (&x) # sets x
344
345	Invalid for now:
346
347	read (&x) # YSH doesn't have token splitting
348	# we probably want read --row too
349	"""
350	place = None # type: value.Place
351
352	if cmd_val.typed_args: # read --flag (&x)
353	rd = typed_args.ReaderForProc(cmd_val)
354	place = rd.PosPlace()
355	rd.Done()
356
357	blame_loc = cmd_val.typed_args.left # type: loc_t
358
359	else: # read --flag
360	var_name = '_reply'
361
362	#log('VAR %s', var_name)
363	blame_loc = cmd_val.arg_locs[0]
364	place = value.Place(LeftName(var_name, blame_loc),
365	self.mem.TopNamespace())
366
367	next_arg, next_loc = arg_r.Peek2()
368	if next_arg is not None:
369	raise error.Usage('got extra argument', next_loc)
370
371	num_bytes = mops.BigTruncate(arg.num_bytes)
372	if num_bytes != -1: # read --num-bytes
373	contents = _ReadN(num_bytes, self.cmd_ev)
374	status = 0
375
376	elif arg.raw_line: # read --raw-line is unbuffered
377	contents, eof = ReadLineSlowly(self.cmd_ev, with_eol=arg.with_eol)
378	status = 1 if eof else 0
379
380	elif arg.all: # read --all
381	contents = ReadAll()
382	status = 0
383
384	else:
385	raise AssertionError()
386
387	self.mem.SetPlace(place, value.Str(contents), blame_loc)
388	return status
389
390	def _Run(self, cmd_val):
391	# type: (cmd_value.Argv) -> int
392	attrs, arg_r = flag_util.ParseCmdVal('read',
393	cmd_val,
394	accept_typed_args=True)
395	arg = arg_types.read(attrs.attrs)
396	names = arg_r.Rest()
397
398	if arg.raw_line or arg.all or mops.BigTruncate(arg.num_bytes) != -1:
399	return self._ReadYsh(arg, arg_r, cmd_val)
400
401	if cmd_val.typed_args:
402	raise error.Usage(
403	"doesn't accept typed args without --all, or --num-bytes",
404	cmd_val.typed_args.left)
405
406	if arg.t >= 0.0:
407	if arg.t != 0.0:
408	e_die("read -t isn't implemented (except t=0)")
409	else:
410	return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
411
412	bits = 0
413	if self.stdin_.isatty():
414	# -d and -n should be unbuffered
415	if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
416	bits \|= pyos.TERM_ICANON
417	if arg.s: # silent
418	bits \|= pyos.TERM_ECHO
419
420	if arg.p is not None: # only if tty
421	mylib.Stderr().write(arg.p)
422
423	if bits == 0:
424	status = self._Read(arg, names)
425	else:
426	with ctx_TermAttrs(STDIN_FILENO, ~bits):
427	status = self._Read(arg, names)
428	return status
429
430	def _Read(self, arg, names):
431	# type: (arg_types.read, List[str]) -> int
432
433	# read a certain number of bytes, NOT respecting delimiter (-1 means
434	# unset)
435	arg_N = mops.BigTruncate(arg.N)
436	if arg_N >= 0:
437	s = _ReadN(arg_N, self.cmd_ev)
438
439	if len(names):
440	name = names[0] # ignore other names
441
442	# Clear extra names, as bash does
443	for i in xrange(1, len(names)):
444	state.BuiltinSetString(self.mem, names[i], '')
445	else:
446	name = 'REPLY' # default variable name
447
448	state.BuiltinSetString(self.mem, name, s)
449
450	# Did we read all the bytes we wanted?
451	return 0 if len(s) == arg_N else 1
452
453	do_split = False
454
455	if len(names):
456	do_split = True # read myvar does word splitting
457	else:
458	# read without args does NOT split, and fills in $REPLY
459	names.append('REPLY')
460
461	if arg.a is not None:
462	max_results = 0 # array can hold all parts
463	do_split = True
464	else:
465	# Assign one part to each variable name; leftovers are assigned to
466	# the last name
467	max_results = len(names)
468
469	if arg.Z: # -0 is synonym for -r -d ''
470	raw = True
471	delim_byte = 0
472	else:
473	raw = arg.r
474	if arg.d is not None:
475	if len(arg.d):
476	delim_byte = ord(arg.d[0])
477	else:
478	delim_byte = 0 # -d '' delimits by NUL
479	else:
480	delim_byte = pyos.NEWLINE_CH # read a line
481
482	# Read MORE THAN ONE line for \ line continuation (and not read -r)
483	parts = [] # type: List[mylib.BufWriter]
484	join_next = False
485	status = 0
486	while True:
487	chunk, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n),
488	self.cmd_ev)
489
490	if eof:
491	# status 1 to terminate loop. (This is true even though we set
492	# variables).
493	status = 1
494
495	#log('LINE %r', chunk)
496	if len(chunk) == 0:
497	break
498
499	spans = self.splitter.SplitForRead(chunk, not raw, do_split)
500	done, join_next = _AppendParts(chunk, spans, max_results,
501	join_next, parts)
502
503	#log('PARTS %s continued %s', parts, continued)
504	if done:
505	break
506
507	entries = [buf.getvalue() for buf in parts]
508	num_parts = len(entries)
509	if arg.a is not None:
510	state.BuiltinSetArray(self.mem, arg.a, entries)
511	else:
512	for i in xrange(max_results):
513	if i < num_parts:
514	s = entries[i]
515	else:
516	s = '' # if there are too many variables
517	var_name = names[i]
518	#log('read: %s = %s', var_name, s)
519	state.BuiltinSetString(self.mem, var_name, s)
520
521	return status