| 1 | """
|
| 2 | string_ops.py - String library functions that can be exposed with a saner syntax.
|
| 3 |
|
| 4 | OSH:
|
| 5 |
|
| 6 | local y=${x//a*/b}
|
| 7 |
|
| 8 | YSH:
|
| 9 |
|
| 10 | var y = x => sub('a*', 'b', :ALL)
|
| 11 |
|
| 12 | Pass x => sub('a*', 'b', :ALL) => var y
|
| 13 | """
|
| 14 |
|
| 15 | from _devbuild.gen.id_kind_asdl import Id
|
| 16 | from _devbuild.gen.syntax_asdl import loc, Token, suffix_op
|
| 17 | from core import pyutil
|
| 18 | from core import ui
|
| 19 | from core import error
|
| 20 | from core.error import e_die, e_strict
|
| 21 | from mycpp.mylib import log
|
| 22 | from mycpp import mylib
|
| 23 | from osh import glob_
|
| 24 |
|
| 25 | import libc
|
| 26 | import fastfunc
|
| 27 |
|
| 28 | from typing import List, Tuple
|
| 29 |
|
| 30 | _ = log
|
| 31 |
|
| 32 | # Error types returned by fastfunc.Utf8DecodeOne
|
| 33 | # Derived from Utf8Error enum from data_lang/utf8.h
|
| 34 | UTF8_ERR_OVERLONG = -1 # Encodes a codepoint in more bytes than necessary
|
| 35 | UTF8_ERR_SURROGATE = -2 # Encodes a codepoint in the surrogate range (0xD800 to 0xDFFF)
|
| 36 | UTF8_ERR_TOO_LARGE = -3 # Encodes a value greater than the max codepoint U+10FFFF
|
| 37 | UTF8_ERR_BAD_ENCODING = -4 # Encoding doesn't conform to the UTF-8 bit patterns
|
| 38 | UTF8_ERR_TRUNCATED_BYTES = -5 # It looks like there is another codepoint, but it has been truncated
|
| 39 |
|
| 40 |
|
| 41 | def Utf8Error_str(error):
|
| 42 | # type: (int) -> str
|
| 43 | if error == UTF8_ERR_OVERLONG:
|
| 44 | return "UTF-8 Error: Decoded Overlong"
|
| 45 | if error == UTF8_ERR_SURROGATE:
|
| 46 | return "UTF-8 Error: Decoded Surrogate"
|
| 47 | if error == UTF8_ERR_TOO_LARGE:
|
| 48 | return "UTF-8 Error: Decoded Invalid Codepoint"
|
| 49 | if error == UTF8_ERR_BAD_ENCODING:
|
| 50 | return "UTF-8 Error: Bad Encoding"
|
| 51 | if error == UTF8_ERR_TRUNCATED_BYTES:
|
| 52 | return "UTF-8 Error: Truncated Bytes"
|
| 53 |
|
| 54 | raise AssertionError(0)
|
| 55 |
|
| 56 |
|
| 57 | def DecodeUtf8Char(s, start):
|
| 58 | # type: (str, int) -> int
|
| 59 | """Given a string and start index, decode the Unicode char immediately
|
| 60 | following the start index. The start location is in bytes and should be
|
| 61 | found using a function like NextUtf8Char or PreviousUtf8Char.
|
| 62 |
|
| 63 | If the codepoint in invalid, we raise an `error.Expr`. (This is different
|
| 64 | from {Next,Previous}Utf8Char which raises an `error.Strict` on encoding
|
| 65 | errors.)
|
| 66 | """
|
| 67 | codepoint_or_error, _bytes_read = fastfunc.Utf8DecodeOne(s, start)
|
| 68 | if codepoint_or_error < 0:
|
| 69 | raise error.Expr(
|
| 70 | "%s at byte index %d in string of length %d" %
|
| 71 | (Utf8Error_str(codepoint_or_error), start, len(s)), loc.Missing)
|
| 72 | return codepoint_or_error
|
| 73 |
|
| 74 |
|
| 75 | def NextUtf8Char(s, i):
|
| 76 | # type: (str, int) -> int
|
| 77 | """Given a string and a byte offset, returns the byte position after the
|
| 78 | character at this position. Usually this is the position of the next
|
| 79 | character, but for the last character in the string, it's the position just
|
| 80 | past the end of the string.
|
| 81 |
|
| 82 | Validates UTF-8.
|
| 83 | """
|
| 84 | codepoint_or_error, bytes_read = fastfunc.Utf8DecodeOne(s, i)
|
| 85 | if codepoint_or_error < 0:
|
| 86 | e_strict(
|
| 87 | "%s at byte index %d in string of length %d" %
|
| 88 | (Utf8Error_str(codepoint_or_error), i, len(s)), loc.Missing)
|
| 89 | return i + bytes_read
|
| 90 |
|
| 91 |
|
| 92 | _INVALID_START = 'Invalid start of UTF-8 sequence'
|
| 93 |
|
| 94 |
|
| 95 | def _Utf8CharLen(starting_byte):
|
| 96 | # type: (int) -> int
|
| 97 | if (starting_byte >> 7) == 0b0:
|
| 98 | return 1
|
| 99 | elif (starting_byte >> 5) == 0b110:
|
| 100 | return 2
|
| 101 | elif (starting_byte >> 4) == 0b1110:
|
| 102 | return 3
|
| 103 | elif (starting_byte >> 3) == 0b11110:
|
| 104 | return 4
|
| 105 | else:
|
| 106 | e_strict(_INVALID_START, loc.Missing)
|
| 107 |
|
| 108 |
|
| 109 | def PreviousUtf8Char(s, i):
|
| 110 | # type: (str, int) -> int
|
| 111 | """Given a string and a byte offset, returns the position of the character
|
| 112 | before that offset. To start (find the first byte of the last character),
|
| 113 | pass len(s) for the initial value of i.
|
| 114 |
|
| 115 | Validates UTF-8.
|
| 116 | """
|
| 117 | # All bytes in a valid UTF-8 string have one of the following formats:
|
| 118 | #
|
| 119 | # 0xxxxxxx (1-byte char)
|
| 120 | # 110xxxxx (start of 2-byte char)
|
| 121 | # 1110xxxx (start of 3-byte char)
|
| 122 | # 11110xxx (start of 4-byte char)
|
| 123 | # 10xxxxxx (continuation byte)
|
| 124 | #
|
| 125 | # Any byte that starts with 10... MUST be a continuation byte,
|
| 126 | # otherwise it must be the start of a character (or just invalid
|
| 127 | # data).
|
| 128 | #
|
| 129 | # Walking backward, we stop at the first non-continuaton byte
|
| 130 | # found. We try to interpret it as a valid UTF-8 character starting
|
| 131 | # byte, and check that it indicates the correct length, based on how
|
| 132 | # far we've moved from the original byte. Possible problems:
|
| 133 | # * byte we stopped on does not have a valid value (e.g., 11111111)
|
| 134 | # * start byte indicates more or fewer continuation bytes than we've seen
|
| 135 | # * no start byte at beginning of array
|
| 136 | #
|
| 137 | # Note that because we are going backward, on malformed input, we
|
| 138 | # won't error out in the same place as when parsing the string
|
| 139 | # forwards as normal.
|
| 140 | orig_i = i
|
| 141 |
|
| 142 | while i > 0:
|
| 143 | i -= 1
|
| 144 | byte_as_int = mylib.ByteAt(s, i)
|
| 145 | if (byte_as_int >> 6) != 0b10:
|
| 146 | offset = orig_i - i
|
| 147 | if offset != _Utf8CharLen(byte_as_int):
|
| 148 | # Leaving a generic error for now, but if we want to, it's not
|
| 149 | # hard to calculate the position where things go wrong. Note
|
| 150 | # that offset might be more than 4, for an invalid utf-8 string.
|
| 151 | e_strict(_INVALID_START, loc.Missing)
|
| 152 | return i
|
| 153 |
|
| 154 | e_strict(_INVALID_START, loc.Missing)
|
| 155 |
|
| 156 |
|
| 157 | def CountUtf8Chars(s):
|
| 158 | # type: (str) -> int
|
| 159 | """Returns the number of utf-8 characters in the byte string 's'.
|
| 160 |
|
| 161 | TODO: Raise exception rather than returning a string, so we can set the exit
|
| 162 | code of the command to 1 ?
|
| 163 |
|
| 164 | $ echo ${#bad}
|
| 165 | Invalid utf-8 at index 3 of string 'bad': 'ab\xffd'
|
| 166 | $ echo $?
|
| 167 | 1
|
| 168 | """
|
| 169 | num_chars = 0
|
| 170 | num_bytes = len(s)
|
| 171 | i = 0
|
| 172 | while i < num_bytes:
|
| 173 | i = NextUtf8Char(s, i)
|
| 174 | num_chars += 1
|
| 175 | return num_chars
|
| 176 |
|
| 177 |
|
| 178 | def AdvanceUtf8Chars(s, num_chars, byte_offset):
|
| 179 | # type: (str, int, int) -> int
|
| 180 | """Starting from byte offset, advance by N UTF-8 runes
|
| 181 |
|
| 182 | Returns a byte offset.
|
| 183 |
|
| 184 | Used for shell slicing.
|
| 185 | """
|
| 186 | num_bytes = len(s)
|
| 187 | i = byte_offset # current byte position
|
| 188 |
|
| 189 | for _ in xrange(num_chars):
|
| 190 | # Neither bash or zsh checks out of bounds for slicing. Either begin or
|
| 191 | # length.
|
| 192 | if i >= num_bytes:
|
| 193 | return i
|
| 194 | #raise RuntimeError('Out of bounds')
|
| 195 |
|
| 196 | i = NextUtf8Char(s, i)
|
| 197 |
|
| 198 | return i
|
| 199 |
|
| 200 |
|
| 201 | # Limited Unicode codepoints for whitespace characters.
|
| 202 | # Oils intentionally does not include characters from <USP>, as that set
|
| 203 | # depends on the version of the Unicode standard used.
|
| 204 | #
|
| 205 | # See discussion on the original pull request which added this list here:
|
| 206 | #
|
| 207 | # https://github.com/oilshell/oil/pull/1836#issuecomment-1942173520
|
| 208 | #
|
| 209 | # See also the Mozilla Javascript documentation, and the note on how
|
| 210 | # changes to the standard affected Javascript:
|
| 211 | #
|
| 212 | # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space
|
| 213 |
|
| 214 | SPACES = [
|
| 215 | 0x0009, # Horizontal tab (\t)
|
| 216 | 0x000A, # Newline (\n)
|
| 217 | 0x000B, # Vertical tab (\v)
|
| 218 | 0x000C, # Form feed (\f)
|
| 219 | 0x000D, # Carriage return (\r)
|
| 220 | 0x0020, # Normal space
|
| 221 | 0x00A0, # No-break space <NBSP>
|
| 222 | 0xFEFF, # Zero-width no-break space <ZWNBSP>
|
| 223 | ]
|
| 224 |
|
| 225 |
|
| 226 | def _IsSpace(codepoint):
|
| 227 | # type: (int) -> bool
|
| 228 | return codepoint in SPACES
|
| 229 |
|
| 230 |
|
| 231 | def StartsWithWhitespaceByteRange(s):
|
| 232 | # type: (str) -> Tuple[int, int]
|
| 233 | """Returns the range of 's' which has leading whitespace characters.
|
| 234 |
|
| 235 | If 's' has no leading whitespace, an valid but empty range is returned.
|
| 236 |
|
| 237 | The returned range is given as byte positions, and is a half-open range
|
| 238 | "[start, end)" which is returned as a tuple.
|
| 239 |
|
| 240 | Used for shell functions like 'trimStart' to match then trim whitespace.
|
| 241 | """
|
| 242 | len_s = len(s)
|
| 243 | i = 0
|
| 244 | while i < len_s:
|
| 245 | codepoint = DecodeUtf8Char(s, i)
|
| 246 | if not _IsSpace(codepoint):
|
| 247 | break
|
| 248 |
|
| 249 | try:
|
| 250 | i = NextUtf8Char(s, i)
|
| 251 | except error.Strict:
|
| 252 | assert False, "DecodeUtf8Char should have caught any encoding errors"
|
| 253 |
|
| 254 | start = 0
|
| 255 | end = i
|
| 256 | return (start, end)
|
| 257 |
|
| 258 |
|
| 259 | def EndsWithWhitespaceByteRange(s):
|
| 260 | # type: (str) -> Tuple[int, int]
|
| 261 | """Returns the range of 's' which has trailing whitespace characters.
|
| 262 |
|
| 263 | If 's' has no leading whitespace, an valid but empty range is returned.
|
| 264 |
|
| 265 | The returned range is given as byte positions, and is a half-open range
|
| 266 | "[start, end)" which is returned as a tuple.
|
| 267 |
|
| 268 | Used for shell functions like 'trimEnd' to match then trim whitespace.
|
| 269 | """
|
| 270 | len_s = len(s)
|
| 271 | i = len_s
|
| 272 | while i > 0:
|
| 273 | # TODO: Gracefully handle surrogate pairs and overlong encodings when
|
| 274 | # finding the start of each character.
|
| 275 | prev = PreviousUtf8Char(s, i)
|
| 276 |
|
| 277 | codepoint = DecodeUtf8Char(s, prev)
|
| 278 | if not _IsSpace(codepoint):
|
| 279 | break
|
| 280 |
|
| 281 | i = prev
|
| 282 |
|
| 283 | start = i
|
| 284 | end = len_s
|
| 285 | return (start, end)
|
| 286 |
|
| 287 |
|
| 288 | # Implementation without Python regex:
|
| 289 | #
|
| 290 | # (1) PatSub: I think we fill in GlobToExtendedRegex, then use regcomp and
|
| 291 | # regexec. in a loop. fnmatch() does NOT given positions of matches.
|
| 292 | #
|
| 293 | # (2) Strip -- % %% # ## -
|
| 294 | #
|
| 295 | # a. Fast path for constant strings.
|
| 296 | # b. Convert to POSIX extended regex, to see if it matches at ALL. If it
|
| 297 | # doesn't match, short circuit out? We can't do this with fnmatch.
|
| 298 | # c. If it does match, call fnmatch() iteratively over prefixes / suffixes.
|
| 299 | #
|
| 300 | # - # shortest prefix - [:1], [:2], [:3] until it matches
|
| 301 | # - ## longest prefix - [:-1] [:-2], [:3]. Works because fnmatch does not
|
| 302 | # match prefixes, it matches EXACTLY.
|
| 303 | # - % shortest suffix - [-1:] [-2:] [-3:] ...
|
| 304 | # - %% longest suffix - [1:] [2:] [3:]
|
| 305 | #
|
| 306 | # See remove_pattern() in subst.c for bash, and trimsub() in eval.c for
|
| 307 | # mksh. Dash doesn't implement it.
|
| 308 |
|
| 309 | # TODO:
|
| 310 | # - Unicode support: Convert both pattern, string, and replacement to unicode,
|
| 311 | # then the result back at the end.
|
| 312 | # - Compile time errors for [[:space:]] ?
|
| 313 |
|
| 314 |
|
| 315 | def DoUnarySuffixOp(s, op_tok, arg, is_extglob):
|
| 316 | # type: (str, Token, str, bool) -> str
|
| 317 | """Helper for ${x#prefix} and family."""
|
| 318 |
|
| 319 | id_ = op_tok.id
|
| 320 |
|
| 321 | # Fast path for constant strings.
|
| 322 | # TODO: Should be LooksLikeExtendedGlob!
|
| 323 | if not is_extglob and not glob_.LooksLikeGlob(arg):
|
| 324 | # It doesn't look like a glob, but we glob-escaped it (e.g. [ -> \[). So
|
| 325 | # reverse it. NOTE: We also do this check in Globber.Expand(). It would
|
| 326 | # be nice to somehow store the original string rather than
|
| 327 | # escaping/unescaping.
|
| 328 | arg = glob_.GlobUnescape(arg)
|
| 329 |
|
| 330 | if id_ in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix
|
| 331 | # explicit check for non-empty arg (len for mycpp)
|
| 332 | if len(arg) and s.startswith(arg):
|
| 333 | return s[len(arg):]
|
| 334 | else:
|
| 335 | return s
|
| 336 |
|
| 337 | elif id_ in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix
|
| 338 | # need explicit check for non-empty arg (len for mycpp)
|
| 339 | if len(arg) and s.endswith(arg):
|
| 340 | return s[:-len(arg)]
|
| 341 | else:
|
| 342 | return s
|
| 343 |
|
| 344 | # These operators take glob arguments, we don't implement that obscure case.
|
| 345 | elif id_ == Id.VOp1_Comma: # Only lowercase the first letter
|
| 346 | if arg != '':
|
| 347 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
| 348 | if len(s):
|
| 349 | return s[0].lower() + s[1:]
|
| 350 | else:
|
| 351 | return s
|
| 352 |
|
| 353 | elif id_ == Id.VOp1_DComma:
|
| 354 | if arg != '':
|
| 355 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
| 356 | return s.lower()
|
| 357 |
|
| 358 | elif id_ == Id.VOp1_Caret: # Only uppercase the first letter
|
| 359 | if arg != '':
|
| 360 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
| 361 | if len(s):
|
| 362 | return s[0].upper() + s[1:]
|
| 363 | else:
|
| 364 | return s
|
| 365 |
|
| 366 | elif id_ == Id.VOp1_DCaret:
|
| 367 | if arg != '':
|
| 368 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
| 369 | return s.upper()
|
| 370 |
|
| 371 | else: # e.g. ^ ^^ , ,,
|
| 372 | raise AssertionError(id_)
|
| 373 |
|
| 374 | # For patterns, do fnmatch() in a loop.
|
| 375 | #
|
| 376 | # TODO:
|
| 377 | # - Another potential fast path:
|
| 378 | # v=aabbccdd
|
| 379 | # echo ${v#*b} # strip shortest prefix
|
| 380 | #
|
| 381 | # If the whole thing doesn't match '*b*', then no test can succeed. So we
|
| 382 | # can fail early. Conversely echo ${v%%c*} and '*c*'.
|
| 383 | #
|
| 384 | # (Although honestly this whole construct is nuts and should be deprecated.)
|
| 385 |
|
| 386 | n = len(s)
|
| 387 |
|
| 388 | if id_ == Id.VOp1_Pound: # shortest prefix
|
| 389 | # 'abcd': match '', 'a', 'ab', 'abc', ...
|
| 390 | i = 0
|
| 391 | while True:
|
| 392 | assert i <= n
|
| 393 | #log('Matching pattern %r with %r', arg, s[:i])
|
| 394 | if libc.fnmatch(arg, s[:i]):
|
| 395 | return s[i:]
|
| 396 | if i >= n:
|
| 397 | break
|
| 398 | i = NextUtf8Char(s, i)
|
| 399 | return s
|
| 400 |
|
| 401 | elif id_ == Id.VOp1_DPound: # longest prefix
|
| 402 | # 'abcd': match 'abc', 'ab', 'a'
|
| 403 | i = n
|
| 404 | while True:
|
| 405 | assert i >= 0
|
| 406 | #log('Matching pattern %r with %r', arg, s[:i])
|
| 407 | if libc.fnmatch(arg, s[:i]):
|
| 408 | return s[i:]
|
| 409 | if i == 0:
|
| 410 | break
|
| 411 | i = PreviousUtf8Char(s, i)
|
| 412 | return s
|
| 413 |
|
| 414 | elif id_ == Id.VOp1_Percent: # shortest suffix
|
| 415 | # 'abcd': match 'abcd', 'abc', 'ab', 'a'
|
| 416 | i = n
|
| 417 | while True:
|
| 418 | assert i >= 0
|
| 419 | #log('Matching pattern %r with %r', arg, s[:i])
|
| 420 | if libc.fnmatch(arg, s[i:]):
|
| 421 | return s[:i]
|
| 422 | if i == 0:
|
| 423 | break
|
| 424 | i = PreviousUtf8Char(s, i)
|
| 425 | return s
|
| 426 |
|
| 427 | elif id_ == Id.VOp1_DPercent: # longest suffix
|
| 428 | # 'abcd': match 'abc', 'bc', 'c', ...
|
| 429 | i = 0
|
| 430 | while True:
|
| 431 | assert i <= n
|
| 432 | #log('Matching pattern %r with %r', arg, s[:i])
|
| 433 | if libc.fnmatch(arg, s[i:]):
|
| 434 | return s[:i]
|
| 435 | if i >= n:
|
| 436 | break
|
| 437 | i = NextUtf8Char(s, i)
|
| 438 | return s
|
| 439 |
|
| 440 | else:
|
| 441 | raise NotImplementedError(ui.PrettyId(id_))
|
| 442 |
|
| 443 |
|
| 444 | def _AllMatchPositions(s, regex):
|
| 445 | # type: (str, str) -> List[Tuple[int, int]]
|
| 446 | """Returns a list of all (start, end) match positions of the regex against
|
| 447 | s.
|
| 448 |
|
| 449 | (If there are no matches, it returns the empty list.)
|
| 450 | """
|
| 451 | matches = [] # type: List[Tuple[int, int]]
|
| 452 | pos = 0
|
| 453 | n = len(s)
|
| 454 | while pos < n: # needed to prevent infinite loop in (.*) case
|
| 455 | m = libc.regex_first_group_match(regex, s, pos)
|
| 456 | if m is None:
|
| 457 | break
|
| 458 | matches.append(m)
|
| 459 | start, end = m
|
| 460 | pos = end # advance position
|
| 461 | return matches
|
| 462 |
|
| 463 |
|
| 464 | def _PatSubAll(s, regex, replace_str):
|
| 465 | # type: (str, str, str) -> str
|
| 466 | parts = [] # type: List[str]
|
| 467 | prev_end = 0
|
| 468 | for start, end in _AllMatchPositions(s, regex):
|
| 469 | parts.append(s[prev_end:start])
|
| 470 | parts.append(replace_str)
|
| 471 | prev_end = end
|
| 472 | parts.append(s[prev_end:])
|
| 473 | return ''.join(parts)
|
| 474 |
|
| 475 |
|
| 476 | class GlobReplacer(object):
|
| 477 |
|
| 478 | def __init__(self, regex, replace_str, slash_tok):
|
| 479 | # type: (str, str, Token) -> None
|
| 480 |
|
| 481 | # TODO: It would be nice to cache the compilation of the regex here,
|
| 482 | # instead of just the string. That would require more sophisticated use of
|
| 483 | # the Python/C API in libc.c, which we might want to avoid.
|
| 484 | self.regex = regex
|
| 485 | self.replace_str = replace_str
|
| 486 | self.slash_tok = slash_tok
|
| 487 |
|
| 488 | def __repr__(self):
|
| 489 | # type: () -> str
|
| 490 | return '<_GlobReplacer regex %r r %r>' % (self.regex, self.replace_str)
|
| 491 |
|
| 492 | def Replace(self, s, op):
|
| 493 | # type: (str, suffix_op.PatSub) -> str
|
| 494 |
|
| 495 | regex = '(%s)' % self.regex # make it a group
|
| 496 |
|
| 497 | if op.replace_mode == Id.Lit_Slash:
|
| 498 | # Avoid infinite loop when replacing all copies of empty string
|
| 499 | if len(self.regex) == 0:
|
| 500 | return s
|
| 501 |
|
| 502 | try:
|
| 503 | # loop over matches
|
| 504 | return _PatSubAll(s, regex, self.replace_str)
|
| 505 | except RuntimeError as e:
|
| 506 | # Not sure if this is possible since we convert from glob:
|
| 507 | # libc.regex_first_group_match raises RuntimeError on regex syntax
|
| 508 | # error.
|
| 509 | msg = e.message # type: str
|
| 510 | e_die('Error matching regex %r: %s' % (regex, msg),
|
| 511 | self.slash_tok)
|
| 512 |
|
| 513 | if op.replace_mode == Id.Lit_Pound:
|
| 514 | regex = '^' + regex
|
| 515 | elif op.replace_mode == Id.Lit_Percent:
|
| 516 | regex = regex + '$'
|
| 517 |
|
| 518 | m = libc.regex_first_group_match(regex, s, 0)
|
| 519 | #log('regex = %r, s = %r, match = %r', regex, s, m)
|
| 520 | if m is None:
|
| 521 | return s
|
| 522 | start, end = m
|
| 523 | return s[:start] + self.replace_str + s[end:]
|
| 524 |
|
| 525 |
|
| 526 | def ShellQuoteB(s):
|
| 527 | # type: (str) -> str
|
| 528 | """Quote by adding backslashes.
|
| 529 |
|
| 530 | Used for autocompletion, so it's friendlier for display on the
|
| 531 | command line. We use the strategy above for other use cases.
|
| 532 | """
|
| 533 | # There's no way to escape a newline! Bash prints ^J for some reason, but
|
| 534 | # we're more explicit. This will happen if there's a newline on a file
|
| 535 | # system or a completion plugin returns a newline.
|
| 536 |
|
| 537 | # NOTE: tabs CAN be escaped with \.
|
| 538 | s = s.replace('\r', '<INVALID CR>').replace('\n', '<INVALID NEWLINE>')
|
| 539 |
|
| 540 | # ~ for home dir
|
| 541 | # ! for history
|
| 542 | # * [] ? for glob
|
| 543 | # {} for brace expansion
|
| 544 | # space because it separates words
|
| 545 | return pyutil.BackslashEscape(s, ' `~!$&*()[]{}\\|;\'"<>?')
|