builtin/read

OILS / builtin / read_osh.py View on Github | oilshell.org

530 lines, 310 significant

1	from __future__ import print_function
2
3	from errno import EINTR
4
5	from _devbuild.gen import arg_types
6	from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
7	from _devbuild.gen.syntax_asdl import source, loc_t
8	from _devbuild.gen.value_asdl import value, LeftName
9	from core import alloc
10	from core import error
11	from core.error import e_die
12	from core import pyos
13	from core import pyutil
14	from core import state
15	from core import ui
16	from core import vm
17	from frontend import flag_util
18	from frontend import reader
19	from frontend import typed_args
20	from mycpp import mops
21	from mycpp import mylib
22	from mycpp.mylib import log, STDIN_FILENO
23
24	import posix_ as posix
25
26	from typing import Tuple, List, Any, TYPE_CHECKING
27	if TYPE_CHECKING:
28	from _devbuild.gen.runtime_asdl import span_t
29	from frontend.parse_lib import ParseContext
30	from frontend import args
31	from osh.cmd_eval import CommandEvaluator
32	from osh.split import SplitContext
33
34	_ = log
35
36	# The Read builtin splits using IFS.
37	#
38	# Summary:
39	# - Split with IFS, except \ can escape them! This is different than the
40	# algorithm for splitting words (at least the way I've represented it.)
41
42	# Bash manual:
43	# - If there are more words than names, the remaining words and their
44	# intervening delimiters are assigned to the last name.
45	# - If there are fewer words read from the input stream than names, the
46	# remaining names are assigned empty values.
47	# - The characters in the value of the IFS variable are used to split the line
48	# into words using the same rules the shell uses for expansion (described
49	# above in Word Splitting).
50	# - The backslash character '\' may be used to remove any special meaning for
51	# the next character read and for line continuation.
52
53
54	def _AppendParts(
55	s, # type: str
56	spans, # type: List[Tuple[span_t, int]]
57	max_results, # type: int
58	join_next, # type: bool
59	parts, # type: List[mylib.BufWriter]
60	):
61	# type: (...) -> Tuple[bool, bool]
62	"""Append to 'parts', for the 'read' builtin.
63
64	Similar to _SpansToParts in osh/split.py
65
66	Args:
67	s: The original string
68	spans: List of (span, end_index)
69	max_results: the maximum number of parts we want
70	join_next: Whether to join the next span to the previous part. This
71	happens in two cases:
72	- when we have '\ '
73	- and when we have more spans # than max_results.
74	"""
75	start_index = 0
76	# If the last span was black, and we get a backslash, set join_next to merge
77	# two black spans.
78	last_span_was_black = False
79
80	for span_type, end_index in spans:
81	if span_type == span_e.Black:
82	if join_next and len(parts):
83	parts[-1].write(s[start_index:end_index])
84	join_next = False
85	else:
86	buf = mylib.BufWriter()
87	buf.write(s[start_index:end_index])
88	parts.append(buf)
89	last_span_was_black = True
90
91	elif span_type == span_e.Delim:
92	if join_next:
93	parts[-1].write(s[start_index:end_index])
94	join_next = False
95	last_span_was_black = False
96
97	elif span_type == span_e.Backslash:
98	if last_span_was_black:
99	join_next = True
100	last_span_was_black = False
101
102	if max_results and len(parts) >= max_results:
103	join_next = True
104
105	start_index = end_index
106
107	done = True
108	if len(spans):
109	#log('%s %s', s, spans)
110	#log('%s', spans[-1])
111	last_span_type, _ = spans[-1]
112	if last_span_type == span_e.Backslash:
113	done = False
114
115	#log('PARTS %s', parts)
116	return done, join_next
117
118
119	#
120	# Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
121	# _ReadPortion, and ReadLineSlowly
122	#
123
124
125	def _ReadN(num_bytes, cmd_ev):
126	# type: (int, CommandEvaluator) -> str
127	chunks = [] # type: List[str]
128	bytes_left = num_bytes
129	while bytes_left > 0:
130	n, err_num = pyos.Read(STDIN_FILENO, bytes_left,
131	chunks) # read up to n bytes
132
133	if n < 0:
134	if err_num == EINTR:
135	cmd_ev.RunPendingTraps()
136	# retry after running traps
137	else:
138	raise pyos.ReadError(err_num)
139
140	elif n == 0: # EOF
141	break
142
143	else:
144	bytes_left -= n
145
146	return ''.join(chunks)
147
148
149	def _ReadPortion(delim_byte, max_chars, cmd_ev):
150	# type: (int, int, CommandEvaluator) -> Tuple[str, bool]
151	"""Read a portion of stdin.
152
153	Reads until delimiter or max_chars, which ever comes first. Will ignore
154	max_chars if it's set to -1.
155
156	The delimiter is not included in the result.
157	"""
158	eof = False
159	ch_array = [] # type: List[int]
160	bytes_read = 0
161	while True:
162	if max_chars >= 0 and bytes_read >= max_chars:
163	break
164
165	ch, err_num = pyos.ReadByte(0)
166	if ch < 0:
167	if err_num == EINTR:
168	cmd_ev.RunPendingTraps()
169	# retry after running traps
170	else:
171	raise pyos.ReadError(err_num)
172
173	elif ch == pyos.EOF_SENTINEL:
174	eof = True
175	break
176
177	elif ch == delim_byte:
178	break
179
180	else:
181	ch_array.append(ch)
182
183	bytes_read += 1
184
185	return pyutil.ChArrayToString(ch_array), eof
186
187
188	def ReadLineSlowly(cmd_ev, with_eol=True):
189	# type: (CommandEvaluator, bool) -> Tuple[str, bool]
190	"""Read a line from stdin, unbuffered
191
192	sys.stdin.readline() in Python has its own buffering which is incompatible
193	with shell semantics. dash, mksh, and zsh all read a single byte at a time
194	with read(0, 1).
195	"""
196	eof = False
197	ch_array = [] # type: List[int]
198	while True:
199	ch, err_num = pyos.ReadByte(0)
200
201	if ch < 0:
202	if err_num == EINTR:
203	cmd_ev.RunPendingTraps()
204	# retry after running traps
205	else:
206	raise pyos.ReadError(err_num)
207
208	elif ch == pyos.EOF_SENTINEL:
209	eof = True
210	break
211
212	else:
213	ch_array.append(ch)
214
215	if ch == pyos.NEWLINE_CH:
216	if not with_eol:
217	ch_array.pop()
218	break
219
220	return pyutil.ChArrayToString(ch_array), eof
221
222
223	def ReadAll():
224	# type: () -> str
225	"""Read all of stdin.
226
227	Similar to command sub in core/executor.py.
228	"""
229	chunks = [] # type: List[str]
230	while True:
231	n, err_num = pyos.Read(0, 4096, chunks)
232
233	if n < 0:
234	if err_num == EINTR:
235	# Retry only. Like read --line (and command sub), read --all
236	# doesn't run traps. It would be a bit weird to run every 4096
237	# bytes.
238	pass
239	else:
240	raise pyos.ReadError(err_num)
241
242	elif n == 0: # EOF
243	break
244
245	return ''.join(chunks)
246
247
248	class ctx_TermAttrs(object):
249
250	def __init__(self, fd, local_modes):
251	# type: (int, int) -> None
252	self.fd = fd
253
254	# We change term_attrs[3] in Python, which is lflag "local modes"
255	orig_local_modes, term_attrs = pyos.PushTermAttrs(fd, local_modes)
256
257	# Workaround: destructured assignment into members doesn't work
258	self.orig_local_modes = orig_local_modes
259	self.term_attrs = term_attrs
260
261	def __enter__(self):
262	# type: () -> None
263	pass
264
265	def __exit__(self, type, value, traceback):
266	# type: (Any, Any, Any) -> None
267	pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
268
269
270	class Read(vm._Builtin):
271
272	def __init__(
273	self,
274	splitter, # type: SplitContext
275	mem, # type: state.Mem
276	parse_ctx, # type: ParseContext
277	cmd_ev, # type: CommandEvaluator
278	errfmt, # type: ui.ErrorFormatter
279	):
280	# type: (...) -> None
281	self.splitter = splitter
282	self.mem = mem
283	self.parse_ctx = parse_ctx
284	self.cmd_ev = cmd_ev
285	self.errfmt = errfmt
286	self.stdin_ = mylib.Stdin()
287
288	# Was --qsn, might be restored as --j8-word or --j8-line
289	if 0:
290	#from data_lang import qsn_native
291	def _MaybeDecodeLine(self, line):
292	# type: (str) -> str
293	"""Raises error.Parse if line isn't valid."""
294
295	# Lines that don't start with a single quote aren't QSN. They may
296	# contain a single quote internally, like:
297	#
298	# Fool's Gold
299	if not line.startswith("'"):
300	return line
301
302	arena = self.parse_ctx.arena
303	line_reader = reader.StringLineReader(line, arena)
304	lexer = self.parse_ctx.MakeLexer(line_reader)
305
306	# The parser only yields valid tokens:
307	# Char_OneChar, Char_Hex, Char_UBraced
308	# So we can use word_compile.EvalCStringToken, which is also used for
309	# $''.
310	# Important: we don't generate Id.Unknown_Backslash because that is valid
311	# in echo -e. We just make it Id.Unknown_Tok?
312
313	# TODO: read location info should know about stdin, and redirects, and
314	# pipelines?
315	with alloc.ctx_SourceCode(arena, source.Stdin('')):
316	#tokens = qsn_native.Parse(lexer)
317	pass
318	#tmp = [word_compile.EvalCStringToken(t) for t in tokens]
319	#return ''.join(tmp)
320	return ''
321
322	def Run(self, cmd_val):
323	# type: (cmd_value.Argv) -> int
324	try:
325	status = self._Run(cmd_val)
326	except pyos.ReadError as e: # different paths for read -d, etc.
327	# don't quote code since YSH errexit will likely quote
328	self.errfmt.PrintMessage("Oils read error: %s" %
329	posix.strerror(e.err_num))
330	status = 1
331	except (IOError, OSError) as e: # different paths for read -d, etc.
332	self.errfmt.PrintMessage("Oils read I/O error: %s" %
333	pyutil.strerror(e))
334	status = 1
335	return status
336
337	def _ReadYsh(self, arg, arg_r, cmd_val):
338	# type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
339	"""
340	Usage:
341
342	read --all # sets _reply
343	read --all (&x) # sets x
344
345	Invalid for now:
346
347	read (&x) # YSH doesn't have token splitting
348	# we probably want read --row too
349	"""
350	place = None # type: value.Place
351
352	if cmd_val.typed_args: # read --flag (&x)
353	rd = typed_args.ReaderForProc(cmd_val)
354	place = rd.PosPlace()
355	rd.Done()
356
357	blame_loc = cmd_val.typed_args.left # type: loc_t
358
359	else: # read --flag
360	var_name = '_reply'
361
362	#log('VAR %s', var_name)
363	blame_loc = cmd_val.arg_locs[0]
364	place = value.Place(LeftName(var_name, blame_loc),
365	self.mem.TopNamespace())
366
367	next_arg, next_loc = arg_r.Peek2()
368	if next_arg is not None:
369	raise error.Usage('got extra argument', next_loc)
370
371	if arg.line: # read --line is buffered, calls getline()
372	raise error.Usage(
373	"no longer supports --line; please use read -r instead (unbuffered I/O)",
374	next_loc)
375
376	num_bytes = mops.BigTruncate(arg.num_bytes)
377	if num_bytes != -1: # read --num-bytes
378	contents = _ReadN(num_bytes, self.cmd_ev)
379	status = 0
380
381	elif arg.raw_line: # read --raw-line is unbuffered
382	contents, eof = ReadLineSlowly(self.cmd_ev, with_eol=arg.with_eol)
383	status = 1 if eof else 0
384
385	elif arg.all: # read --all
386	contents = ReadAll()
387	status = 0
388
389	else:
390	raise AssertionError()
391
392	self.mem.SetPlace(place, value.Str(contents), blame_loc)
393	return status
394
395	def _Run(self, cmd_val):
396	# type: (cmd_value.Argv) -> int
397	attrs, arg_r = flag_util.ParseCmdVal('read',
398	cmd_val,
399	accept_typed_args=True)
400	arg = arg_types.read(attrs.attrs)
401	names = arg_r.Rest()
402
403	#if arg.q and not arg.line:
404	# e_usage('--qsn can only be used with --line', loc.Missing)
405
406	if (arg.line or arg.raw_line or arg.all or
407	mops.BigTruncate(arg.num_bytes) != -1):
408	return self._ReadYsh(arg, arg_r, cmd_val)
409
410	if cmd_val.typed_args:
411	raise error.Usage(
412	"doesn't accept typed args without --all, or --num-bytes",
413	cmd_val.typed_args.left)
414
415	if arg.t >= 0.0:
416	if arg.t != 0.0:
417	e_die("read -t isn't implemented (except t=0)")
418	else:
419	return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
420
421	bits = 0
422	if self.stdin_.isatty():
423	# -d and -n should be unbuffered
424	if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
425	bits \|= pyos.TERM_ICANON
426	if arg.s: # silent
427	bits \|= pyos.TERM_ECHO
428
429	if arg.p is not None: # only if tty
430	mylib.Stderr().write(arg.p)
431
432	if bits == 0:
433	status = self._Read(arg, names)
434	else:
435	with ctx_TermAttrs(STDIN_FILENO, ~bits):
436	status = self._Read(arg, names)
437	return status
438
439	def _Read(self, arg, names):
440	# type: (arg_types.read, List[str]) -> int
441
442	# read a certain number of bytes, NOT respecting delimiter (-1 means
443	# unset)
444	arg_N = mops.BigTruncate(arg.N)
445	if arg_N >= 0:
446	s = _ReadN(arg_N, self.cmd_ev)
447
448	if len(names):
449	name = names[0] # ignore other names
450
451	# Clear extra names, as bash does
452	for i in xrange(1, len(names)):
453	state.BuiltinSetString(self.mem, names[i], '')
454	else:
455	name = 'REPLY' # default variable name
456
457	state.BuiltinSetString(self.mem, name, s)
458
459	# Did we read all the bytes we wanted?
460	return 0 if len(s) == arg_N else 1
461
462	do_split = False
463
464	if len(names):
465	do_split = True # read myvar does word splitting
466	else:
467	# read without args does NOT split, and fills in $REPLY
468	names.append('REPLY')
469
470	if arg.a is not None:
471	max_results = 0 # array can hold all parts
472	do_split = True
473	else:
474	# Assign one part to each variable name; leftovers are assigned to
475	# the last name
476	max_results = len(names)
477
478	if arg.Z: # -0 is synonym for -r -d ''
479	raw = True
480	delim_byte = 0
481	else:
482	raw = arg.r
483	if arg.d is not None:
484	if len(arg.d):
485	delim_byte = ord(arg.d[0])
486	else:
487	delim_byte = 0 # -d '' delimits by NUL
488	else:
489	delim_byte = pyos.NEWLINE_CH # read a line
490
491	# Read MORE THAN ONE line for \ line continuation (and not read -r)
492	parts = [] # type: List[mylib.BufWriter]
493	join_next = False
494	status = 0
495	while True:
496	chunk, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n),
497	self.cmd_ev)
498
499	if eof:
500	# status 1 to terminate loop. (This is true even though we set
501	# variables).
502	status = 1
503
504	#log('LINE %r', chunk)
505	if len(chunk) == 0:
506	break
507
508	spans = self.splitter.SplitForRead(chunk, not raw, do_split)
509	done, join_next = _AppendParts(chunk, spans, max_results,
510	join_next, parts)
511
512	#log('PARTS %s continued %s', parts, continued)
513	if done:
514	break
515
516	entries = [buf.getvalue() for buf in parts]
517	num_parts = len(entries)
518	if arg.a is not None:
519	state.BuiltinSetArray(self.mem, arg.a, entries)
520	else:
521	for i in xrange(max_results):
522	if i < num_parts:
523	s = entries[i]
524	else:
525	s = '' # if there are too many variables
526	var_name = names[i]
527	#log('read: %s = %s', var_name, s)
528	state.BuiltinSetString(self.mem, var_name, s)
529
530	return status