| 1 | from __future__ import print_function
|
| 2 |
|
| 3 | from errno import EINTR
|
| 4 |
|
| 5 | from _devbuild.gen import arg_types
|
| 6 | from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
|
| 7 | from _devbuild.gen.syntax_asdl import source, loc_t
|
| 8 | from _devbuild.gen.value_asdl import value, LeftName
|
| 9 | from core import alloc
|
| 10 | from core import error
|
| 11 | from core.error import e_die
|
| 12 | from core import pyos
|
| 13 | from core import pyutil
|
| 14 | from core import state
|
| 15 | from core import ui
|
| 16 | from core import vm
|
| 17 | from frontend import flag_util
|
| 18 | from frontend import reader
|
| 19 | from frontend import typed_args
|
| 20 | from mycpp import mops
|
| 21 | from mycpp import mylib
|
| 22 | from mycpp.mylib import log, STDIN_FILENO
|
| 23 |
|
| 24 | import posix_ as posix
|
| 25 |
|
| 26 | from typing import Tuple, List, Any, TYPE_CHECKING
|
| 27 | if TYPE_CHECKING:
|
| 28 | from _devbuild.gen.runtime_asdl import span_t
|
| 29 | from frontend.parse_lib import ParseContext
|
| 30 | from frontend import args
|
| 31 | from osh.cmd_eval import CommandEvaluator
|
| 32 | from osh.split import SplitContext
|
| 33 |
|
| 34 | _ = log
|
| 35 |
|
| 36 | # The Read builtin splits using IFS.
|
| 37 | #
|
| 38 | # Summary:
|
| 39 | # - Split with IFS, except \ can escape them! This is different than the
|
| 40 | # algorithm for splitting words (at least the way I've represented it.)
|
| 41 |
|
| 42 | # Bash manual:
|
| 43 | # - If there are more words than names, the remaining words and their
|
| 44 | # intervening delimiters are assigned to the last name.
|
| 45 | # - If there are fewer words read from the input stream than names, the
|
| 46 | # remaining names are assigned empty values.
|
| 47 | # - The characters in the value of the IFS variable are used to split the line
|
| 48 | # into words using the same rules the shell uses for expansion (described
|
| 49 | # above in Word Splitting).
|
| 50 | # - The backslash character '\' may be used to remove any special meaning for
|
| 51 | # the next character read and for line continuation.
|
| 52 |
|
| 53 |
|
| 54 | def _AppendParts(
|
| 55 | s, # type: str
|
| 56 | spans, # type: List[Tuple[span_t, int]]
|
| 57 | max_results, # type: int
|
| 58 | join_next, # type: bool
|
| 59 | parts, # type: List[mylib.BufWriter]
|
| 60 | ):
|
| 61 | # type: (...) -> Tuple[bool, bool]
|
| 62 | """Append to 'parts', for the 'read' builtin.
|
| 63 |
|
| 64 | Similar to _SpansToParts in osh/split.py
|
| 65 |
|
| 66 | Args:
|
| 67 | s: The original string
|
| 68 | spans: List of (span, end_index)
|
| 69 | max_results: the maximum number of parts we want
|
| 70 | join_next: Whether to join the next span to the previous part. This
|
| 71 | happens in two cases:
|
| 72 | - when we have '\ '
|
| 73 | - and when we have more spans # than max_results.
|
| 74 | """
|
| 75 | start_index = 0
|
| 76 | # If the last span was black, and we get a backslash, set join_next to merge
|
| 77 | # two black spans.
|
| 78 | last_span_was_black = False
|
| 79 |
|
| 80 | for span_type, end_index in spans:
|
| 81 | if span_type == span_e.Black:
|
| 82 | if join_next and len(parts):
|
| 83 | parts[-1].write(s[start_index:end_index])
|
| 84 | join_next = False
|
| 85 | else:
|
| 86 | buf = mylib.BufWriter()
|
| 87 | buf.write(s[start_index:end_index])
|
| 88 | parts.append(buf)
|
| 89 | last_span_was_black = True
|
| 90 |
|
| 91 | elif span_type == span_e.Delim:
|
| 92 | if join_next:
|
| 93 | parts[-1].write(s[start_index:end_index])
|
| 94 | join_next = False
|
| 95 | last_span_was_black = False
|
| 96 |
|
| 97 | elif span_type == span_e.Backslash:
|
| 98 | if last_span_was_black:
|
| 99 | join_next = True
|
| 100 | last_span_was_black = False
|
| 101 |
|
| 102 | if max_results and len(parts) >= max_results:
|
| 103 | join_next = True
|
| 104 |
|
| 105 | start_index = end_index
|
| 106 |
|
| 107 | done = True
|
| 108 | if len(spans):
|
| 109 | #log('%s %s', s, spans)
|
| 110 | #log('%s', spans[-1])
|
| 111 | last_span_type, _ = spans[-1]
|
| 112 | if last_span_type == span_e.Backslash:
|
| 113 | done = False
|
| 114 |
|
| 115 | #log('PARTS %s', parts)
|
| 116 | return done, join_next
|
| 117 |
|
| 118 |
|
| 119 | #
|
| 120 | # Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
|
| 121 | # _ReadPortion, and ReadLineSlowly
|
| 122 | #
|
| 123 |
|
| 124 |
|
| 125 | def _ReadN(num_bytes, cmd_ev):
|
| 126 | # type: (int, CommandEvaluator) -> str
|
| 127 | chunks = [] # type: List[str]
|
| 128 | bytes_left = num_bytes
|
| 129 | while bytes_left > 0:
|
| 130 | n, err_num = pyos.Read(STDIN_FILENO, bytes_left,
|
| 131 | chunks) # read up to n bytes
|
| 132 |
|
| 133 | if n < 0:
|
| 134 | if err_num == EINTR:
|
| 135 | cmd_ev.RunPendingTraps()
|
| 136 | # retry after running traps
|
| 137 | else:
|
| 138 | raise pyos.ReadError(err_num)
|
| 139 |
|
| 140 | elif n == 0: # EOF
|
| 141 | break
|
| 142 |
|
| 143 | else:
|
| 144 | bytes_left -= n
|
| 145 |
|
| 146 | return ''.join(chunks)
|
| 147 |
|
| 148 |
|
| 149 | def _ReadPortion(delim_byte, max_chars, cmd_ev):
|
| 150 | # type: (int, int, CommandEvaluator) -> Tuple[str, bool]
|
| 151 | """Read a portion of stdin.
|
| 152 |
|
| 153 | Reads until delimiter or max_chars, which ever comes first. Will ignore
|
| 154 | max_chars if it's set to -1.
|
| 155 |
|
| 156 | The delimiter is not included in the result.
|
| 157 | """
|
| 158 | eof = False
|
| 159 | ch_array = [] # type: List[int]
|
| 160 | bytes_read = 0
|
| 161 | while True:
|
| 162 | if max_chars >= 0 and bytes_read >= max_chars:
|
| 163 | break
|
| 164 |
|
| 165 | ch, err_num = pyos.ReadByte(0)
|
| 166 | if ch < 0:
|
| 167 | if err_num == EINTR:
|
| 168 | cmd_ev.RunPendingTraps()
|
| 169 | # retry after running traps
|
| 170 | else:
|
| 171 | raise pyos.ReadError(err_num)
|
| 172 |
|
| 173 | elif ch == pyos.EOF_SENTINEL:
|
| 174 | eof = True
|
| 175 | break
|
| 176 |
|
| 177 | elif ch == delim_byte:
|
| 178 | break
|
| 179 |
|
| 180 | else:
|
| 181 | ch_array.append(ch)
|
| 182 |
|
| 183 | bytes_read += 1
|
| 184 |
|
| 185 | return pyutil.ChArrayToString(ch_array), eof
|
| 186 |
|
| 187 |
|
| 188 | # sys.stdin.readline() in Python has its own buffering which is incompatible
|
| 189 | # with shell semantics. dash, mksh, and zsh all read a single byte at a
|
| 190 | # time with read(0, 1).
|
| 191 |
|
| 192 | # TODO:
|
| 193 | # - ReadLineSlowly should have keep_newline (mapfile -t)
|
| 194 | # - this halves memory usage!
|
| 195 |
|
| 196 |
|
| 197 | def ReadLineSlowly(cmd_ev):
|
| 198 | # type: (CommandEvaluator) -> str
|
| 199 | """Read a line from stdin."""
|
| 200 | ch_array = [] # type: List[int]
|
| 201 | while True:
|
| 202 | ch, err_num = pyos.ReadByte(0)
|
| 203 |
|
| 204 | if ch < 0:
|
| 205 | if err_num == EINTR:
|
| 206 | cmd_ev.RunPendingTraps()
|
| 207 | # retry after running traps
|
| 208 | else:
|
| 209 | raise pyos.ReadError(err_num)
|
| 210 |
|
| 211 | elif ch == pyos.EOF_SENTINEL:
|
| 212 | break
|
| 213 |
|
| 214 | else:
|
| 215 | ch_array.append(ch)
|
| 216 |
|
| 217 | # TODO: Add option to omit newline
|
| 218 | if ch == pyos.NEWLINE_CH:
|
| 219 | break
|
| 220 |
|
| 221 | return pyutil.ChArrayToString(ch_array)
|
| 222 |
|
| 223 |
|
| 224 | def ReadAll():
|
| 225 | # type: () -> str
|
| 226 | """Read all of stdin.
|
| 227 |
|
| 228 | Similar to command sub in core/executor.py.
|
| 229 | """
|
| 230 | chunks = [] # type: List[str]
|
| 231 | while True:
|
| 232 | n, err_num = pyos.Read(0, 4096, chunks)
|
| 233 |
|
| 234 | if n < 0:
|
| 235 | if err_num == EINTR:
|
| 236 | # Retry only. Like read --line (and command sub), read --all doesn't
|
| 237 | # run traps. It would be a bit weird to run every 4096 bytes.
|
| 238 | pass
|
| 239 | else:
|
| 240 | raise pyos.ReadError(err_num)
|
| 241 |
|
| 242 | elif n == 0: # EOF
|
| 243 | break
|
| 244 |
|
| 245 | return ''.join(chunks)
|
| 246 |
|
| 247 |
|
| 248 | class ctx_TermAttrs(object):
|
| 249 |
|
| 250 | def __init__(self, fd, local_modes):
|
| 251 | # type: (int, int) -> None
|
| 252 | self.fd = fd
|
| 253 |
|
| 254 | # We change term_attrs[3] in Python, which is lflag "local modes"
|
| 255 | orig_local_modes, term_attrs = pyos.PushTermAttrs(fd, local_modes)
|
| 256 |
|
| 257 | # Workaround: destructured assignment into members doesn't work
|
| 258 | self.orig_local_modes = orig_local_modes
|
| 259 | self.term_attrs = term_attrs
|
| 260 |
|
| 261 | def __enter__(self):
|
| 262 | # type: () -> None
|
| 263 | pass
|
| 264 |
|
| 265 | def __exit__(self, type, value, traceback):
|
| 266 | # type: (Any, Any, Any) -> None
|
| 267 | pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
|
| 268 |
|
| 269 |
|
| 270 | class Read(vm._Builtin):
|
| 271 |
|
| 272 | def __init__(
|
| 273 | self,
|
| 274 | splitter, # type: SplitContext
|
| 275 | mem, # type: state.Mem
|
| 276 | parse_ctx, # type: ParseContext
|
| 277 | cmd_ev, # type: CommandEvaluator
|
| 278 | errfmt, # type: ui.ErrorFormatter
|
| 279 | ):
|
| 280 | # type: (...) -> None
|
| 281 | self.splitter = splitter
|
| 282 | self.mem = mem
|
| 283 | self.parse_ctx = parse_ctx
|
| 284 | self.cmd_ev = cmd_ev
|
| 285 | self.errfmt = errfmt
|
| 286 | self.stdin_ = mylib.Stdin()
|
| 287 |
|
| 288 | # Was --qsn, might be restored as --j8-word or --j8-line
|
| 289 | if 0:
|
| 290 | #from data_lang import qsn_native
|
| 291 | def _MaybeDecodeLine(self, line):
|
| 292 | # type: (str) -> str
|
| 293 | """Raises error.Parse if line isn't valid."""
|
| 294 |
|
| 295 | # Lines that don't start with a single quote aren't QSN. They may
|
| 296 | # contain a single quote internally, like:
|
| 297 | #
|
| 298 | # Fool's Gold
|
| 299 | if not line.startswith("'"):
|
| 300 | return line
|
| 301 |
|
| 302 | arena = self.parse_ctx.arena
|
| 303 | line_reader = reader.StringLineReader(line, arena)
|
| 304 | lexer = self.parse_ctx.MakeLexer(line_reader)
|
| 305 |
|
| 306 | # The parser only yields valid tokens:
|
| 307 | # Char_OneChar, Char_Hex, Char_UBraced
|
| 308 | # So we can use word_compile.EvalCStringToken, which is also used for
|
| 309 | # $''.
|
| 310 | # Important: we don't generate Id.Unknown_Backslash because that is valid
|
| 311 | # in echo -e. We just make it Id.Unknown_Tok?
|
| 312 |
|
| 313 | # TODO: read location info should know about stdin, and redirects, and
|
| 314 | # pipelines?
|
| 315 | with alloc.ctx_SourceCode(arena, source.Stdin('')):
|
| 316 | #tokens = qsn_native.Parse(lexer)
|
| 317 | pass
|
| 318 | #tmp = [word_compile.EvalCStringToken(t) for t in tokens]
|
| 319 | #return ''.join(tmp)
|
| 320 | return ''
|
| 321 |
|
| 322 | def Run(self, cmd_val):
|
| 323 | # type: (cmd_value.Argv) -> int
|
| 324 | try:
|
| 325 | status = self._Run(cmd_val)
|
| 326 | except pyos.ReadError as e: # different paths for read -d, etc.
|
| 327 | # don't quote code since YSH errexit will likely quote
|
| 328 | self.errfmt.PrintMessage("Oils read error: %s" %
|
| 329 | posix.strerror(e.err_num))
|
| 330 | status = 1
|
| 331 | except (IOError, OSError) as e: # different paths for read -d, etc.
|
| 332 | self.errfmt.PrintMessage("Oils read I/O error: %s" %
|
| 333 | pyutil.strerror(e))
|
| 334 | status = 1
|
| 335 | return status
|
| 336 |
|
| 337 | def _ReadYsh(self, arg, arg_r, cmd_val):
|
| 338 | # type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
|
| 339 | """
|
| 340 | Usage:
|
| 341 |
|
| 342 | read --all # sets _reply
|
| 343 | read --all (&x) # sets x
|
| 344 |
|
| 345 | Invalid for now:
|
| 346 |
|
| 347 | read (&x) # YSH doesn't have token splitting
|
| 348 | # we probably want read --row too
|
| 349 | """
|
| 350 | place = None # type: value.Place
|
| 351 |
|
| 352 | if cmd_val.typed_args: # read --flag (&x)
|
| 353 | rd = typed_args.ReaderForProc(cmd_val)
|
| 354 | place = rd.PosPlace()
|
| 355 | rd.Done()
|
| 356 |
|
| 357 | blame_loc = cmd_val.typed_args.left # type: loc_t
|
| 358 |
|
| 359 | else: # read --flag
|
| 360 | var_name = '_reply'
|
| 361 |
|
| 362 | #log('VAR %s', var_name)
|
| 363 | blame_loc = cmd_val.arg_locs[0]
|
| 364 | place = value.Place(LeftName(var_name, blame_loc),
|
| 365 | self.mem.TopNamespace())
|
| 366 |
|
| 367 | next_arg, next_loc = arg_r.Peek2()
|
| 368 | if next_arg is not None:
|
| 369 | raise error.Usage('got extra argument', next_loc)
|
| 370 |
|
| 371 | if arg.line: # read --line is buffered, calls getline()
|
| 372 | raise error.Usage(
|
| 373 | "no longer supports --line; please use read -r instead (unbuffered I/O)",
|
| 374 | next_loc)
|
| 375 |
|
| 376 | num_bytes = mops.BigTruncate(arg.num_bytes)
|
| 377 | if num_bytes != -1: # read --num-bytes
|
| 378 | contents = _ReadN(num_bytes, self.cmd_ev)
|
| 379 | self.mem.SetPlace(place, value.Str(contents), blame_loc)
|
| 380 | return 0
|
| 381 |
|
| 382 | if arg.all: # read --all
|
| 383 | contents = ReadAll()
|
| 384 | self.mem.SetPlace(place, value.Str(contents), blame_loc)
|
| 385 | return 0
|
| 386 |
|
| 387 | # arg.line or arg.all should be true
|
| 388 | raise AssertionError()
|
| 389 |
|
| 390 | def _Run(self, cmd_val):
|
| 391 | # type: (cmd_value.Argv) -> int
|
| 392 | attrs, arg_r = flag_util.ParseCmdVal('read',
|
| 393 | cmd_val,
|
| 394 | accept_typed_args=True)
|
| 395 | arg = arg_types.read(attrs.attrs)
|
| 396 | names = arg_r.Rest()
|
| 397 |
|
| 398 | #if arg.q and not arg.line:
|
| 399 | # e_usage('--qsn can only be used with --line', loc.Missing)
|
| 400 |
|
| 401 | if arg.line or arg.all or mops.BigTruncate(arg.num_bytes) != -1:
|
| 402 | return self._ReadYsh(arg, arg_r, cmd_val)
|
| 403 |
|
| 404 | if cmd_val.typed_args:
|
| 405 | raise error.Usage(
|
| 406 | "doesn't accept typed args without --all, or --num-bytes",
|
| 407 | cmd_val.typed_args.left)
|
| 408 |
|
| 409 | if arg.t >= 0.0:
|
| 410 | if arg.t != 0.0:
|
| 411 | e_die("read -t isn't implemented (except t=0)")
|
| 412 | else:
|
| 413 | return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
|
| 414 |
|
| 415 | bits = 0
|
| 416 | if self.stdin_.isatty():
|
| 417 | # -d and -n should be unbuffered
|
| 418 | if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
|
| 419 | bits |= pyos.TERM_ICANON
|
| 420 | if arg.s: # silent
|
| 421 | bits |= pyos.TERM_ECHO
|
| 422 |
|
| 423 | if arg.p is not None: # only if tty
|
| 424 | mylib.Stderr().write(arg.p)
|
| 425 |
|
| 426 | if bits == 0:
|
| 427 | status = self._Read(arg, names)
|
| 428 | else:
|
| 429 | with ctx_TermAttrs(STDIN_FILENO, ~bits):
|
| 430 | status = self._Read(arg, names)
|
| 431 | return status
|
| 432 |
|
| 433 | def _Read(self, arg, names):
|
| 434 | # type: (arg_types.read, List[str]) -> int
|
| 435 |
|
| 436 | # read a certain number of bytes, NOT respecting delimiter (-1 means
|
| 437 | # unset)
|
| 438 | arg_N = mops.BigTruncate(arg.N)
|
| 439 | if arg_N >= 0:
|
| 440 | s = _ReadN(arg_N, self.cmd_ev)
|
| 441 |
|
| 442 | if len(names):
|
| 443 | name = names[0] # ignore other names
|
| 444 |
|
| 445 | # Clear extra names, as bash does
|
| 446 | for i in xrange(1, len(names)):
|
| 447 | state.BuiltinSetString(self.mem, names[i], '')
|
| 448 | else:
|
| 449 | name = 'REPLY' # default variable name
|
| 450 |
|
| 451 | state.BuiltinSetString(self.mem, name, s)
|
| 452 |
|
| 453 | # Did we read all the bytes we wanted?
|
| 454 | return 0 if len(s) == arg_N else 1
|
| 455 |
|
| 456 | do_split = False
|
| 457 |
|
| 458 | if len(names):
|
| 459 | do_split = True # read myvar does word splitting
|
| 460 | else:
|
| 461 | # read without args does NOT split, and fills in $REPLY
|
| 462 | names.append('REPLY')
|
| 463 |
|
| 464 | if arg.a is not None:
|
| 465 | max_results = 0 # array can hold all parts
|
| 466 | do_split = True
|
| 467 | else:
|
| 468 | # Assign one part to each variable name; leftovers are assigned to
|
| 469 | # the last name
|
| 470 | max_results = len(names)
|
| 471 |
|
| 472 | if arg.Z: # -0 is synonym for -r -d ''
|
| 473 | raw = True
|
| 474 | delim_byte = 0
|
| 475 | else:
|
| 476 | raw = arg.r
|
| 477 | if arg.d is not None:
|
| 478 | if len(arg.d):
|
| 479 | delim_byte = ord(arg.d[0])
|
| 480 | else:
|
| 481 | delim_byte = 0 # -d '' delimits by NUL
|
| 482 | else:
|
| 483 | delim_byte = pyos.NEWLINE_CH # read a line
|
| 484 |
|
| 485 | # Read MORE THAN ONE line for \ line continuation (and not read -r)
|
| 486 | parts = [] # type: List[mylib.BufWriter]
|
| 487 | join_next = False
|
| 488 | status = 0
|
| 489 | while True:
|
| 490 | chunk, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n),
|
| 491 | self.cmd_ev)
|
| 492 |
|
| 493 | if eof:
|
| 494 | # status 1 to terminate loop. (This is true even though we set
|
| 495 | # variables).
|
| 496 | status = 1
|
| 497 |
|
| 498 | #log('LINE %r', chunk)
|
| 499 | if len(chunk) == 0:
|
| 500 | break
|
| 501 |
|
| 502 | spans = self.splitter.SplitForRead(chunk, not raw, do_split)
|
| 503 | done, join_next = _AppendParts(chunk, spans, max_results,
|
| 504 | join_next, parts)
|
| 505 |
|
| 506 | #log('PARTS %s continued %s', parts, continued)
|
| 507 | if done:
|
| 508 | break
|
| 509 |
|
| 510 | entries = [buf.getvalue() for buf in parts]
|
| 511 | num_parts = len(entries)
|
| 512 | if arg.a is not None:
|
| 513 | state.BuiltinSetArray(self.mem, arg.a, entries)
|
| 514 | else:
|
| 515 | for i in xrange(max_results):
|
| 516 | if i < num_parts:
|
| 517 | s = entries[i]
|
| 518 | else:
|
| 519 | s = '' # if there are too many variables
|
| 520 | var_name = names[i]
|
| 521 | #log('read: %s = %s', var_name, s)
|
| 522 | state.BuiltinSetString(self.mem, var_name, s)
|
| 523 |
|
| 524 | return status
|