| 1 | """
|
| 2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
| 3 | """
|
| 4 |
|
| 5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
| 6 | from _devbuild.gen.syntax_asdl import (
|
| 7 | Token,
|
| 8 | CompoundWord,
|
| 9 | DoubleQuoted,
|
| 10 | SingleQuoted,
|
| 11 | word,
|
| 12 | word_e,
|
| 13 | word_t,
|
| 14 | word_str,
|
| 15 | word_part,
|
| 16 | word_part_t,
|
| 17 | word_part_e,
|
| 18 | AssocPair,
|
| 19 | )
|
| 20 | from frontend import consts
|
| 21 | from frontend import lexer
|
| 22 | from mycpp import mylib
|
| 23 | from mycpp.mylib import tagswitch, log
|
| 24 |
|
| 25 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
| 26 | if TYPE_CHECKING:
|
| 27 | from osh.word_parse import WordParser
|
| 28 |
|
| 29 | _ = log
|
| 30 |
|
| 31 |
|
| 32 | def LiteralId(p):
|
| 33 | # type: (word_part_t) -> Id_t
|
| 34 | """If the WordPart consists of a single literal token, return its Id.
|
| 35 |
|
| 36 | Used for Id.KW_For, or Id.RBrace, etc.
|
| 37 | """
|
| 38 | UP_part = p
|
| 39 | if p.tag() == word_part_e.Literal:
|
| 40 | return cast(Token, UP_part).id
|
| 41 | else:
|
| 42 | return Id.Undefined_Tok # unequal to any other Id
|
| 43 |
|
| 44 |
|
| 45 | def _EvalWordPart(part):
|
| 46 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
| 47 | """Evaluate a WordPart at PARSE TIME.
|
| 48 |
|
| 49 | Used for:
|
| 50 |
|
| 51 | 1. here doc delimiters
|
| 52 | 2. function names
|
| 53 | 3. for loop variable names
|
| 54 | 4. Compiling constant regex words at parse time
|
| 55 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
| 56 | pattern.
|
| 57 |
|
| 58 | Returns:
|
| 59 | 3-tuple of
|
| 60 | ok: bool, success. If there are parts that can't be statically
|
| 61 | evaluated, then we return false.
|
| 62 | value: a string (not Value)
|
| 63 | quoted: whether any part of the word was quoted
|
| 64 | """
|
| 65 | UP_part = part
|
| 66 | with tagswitch(part) as case:
|
| 67 | if case(word_part_e.Literal):
|
| 68 | tok = cast(Token, UP_part)
|
| 69 | # Weird performance issue: if we change this to lexer.LazyStr(),
|
| 70 | # the parser slows down, e.g. on configure-coreutils from 805 B
|
| 71 | # irefs to ~830 B. The real issue is that we should avoid calling
|
| 72 | # this from CommandParser - for the Hay node.
|
| 73 | return True, lexer.TokenVal(tok), False
|
| 74 | #return True, lexer.LazyStr(tok), False
|
| 75 |
|
| 76 | elif case(word_part_e.EscapedLiteral):
|
| 77 | part = cast(word_part.EscapedLiteral, UP_part)
|
| 78 | if mylib.PYTHON:
|
| 79 | val = lexer.TokenVal(part.token)
|
| 80 | assert len(val) == 2, val # e.g. \*
|
| 81 | assert val[0] == '\\'
|
| 82 | s = lexer.TokenSliceLeft(part.token, 1)
|
| 83 | return True, s, True
|
| 84 |
|
| 85 | elif case(word_part_e.SingleQuoted):
|
| 86 | part = cast(SingleQuoted, UP_part)
|
| 87 | return True, part.sval, True
|
| 88 |
|
| 89 | elif case(word_part_e.DoubleQuoted):
|
| 90 | part = cast(DoubleQuoted, UP_part)
|
| 91 | strs = [] # type: List[str]
|
| 92 | for p in part.parts:
|
| 93 | ok, s, _ = _EvalWordPart(p)
|
| 94 | if not ok:
|
| 95 | return False, '', True
|
| 96 | strs.append(s)
|
| 97 |
|
| 98 | return True, ''.join(strs), True # At least one part was quoted!
|
| 99 |
|
| 100 | elif case(word_part_e.ShArrayLiteral, word_part_e.BashAssocLiteral,
|
| 101 | word_part_e.ZshVarSub, word_part_e.CommandSub,
|
| 102 | word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
|
| 103 | word_part_e.TildeSub, word_part_e.ArithSub,
|
| 104 | word_part_e.ExtGlob, word_part_e.Splice,
|
| 105 | word_part_e.ExprSub):
|
| 106 | return False, '', False
|
| 107 |
|
| 108 | else:
|
| 109 | raise AssertionError(part.tag())
|
| 110 |
|
| 111 |
|
| 112 | def FastStrEval(w):
|
| 113 | # type: (CompoundWord) -> Optional[str]
|
| 114 | """
|
| 115 | Detects common case
|
| 116 |
|
| 117 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
| 118 | For echo -e, test x -lt 0, etc.
|
| 119 | (2) single quoted word like 'foo'
|
| 120 |
|
| 121 | Other patterns we could detect are:
|
| 122 | (1) "foo"
|
| 123 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
| 124 | - I think val_ops.Stringify() can handle all the errors
|
| 125 | """
|
| 126 | if len(w.parts) != 1:
|
| 127 | return None
|
| 128 |
|
| 129 | part0 = w.parts[0]
|
| 130 | UP_part0 = part0
|
| 131 | with tagswitch(part0) as case:
|
| 132 | if case(word_part_e.Literal):
|
| 133 | part0 = cast(Token, UP_part0)
|
| 134 |
|
| 135 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
| 136 | # Could add more tokens in this case
|
| 137 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
| 138 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
| 139 | # know those are common
|
| 140 | # { } are not as common
|
| 141 | return lexer.LazyStr(part0)
|
| 142 |
|
| 143 | else:
|
| 144 | # e.g. Id.Lit_Star needs to be glob expanded
|
| 145 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
| 146 | return None
|
| 147 |
|
| 148 | elif case(word_part_e.SingleQuoted):
|
| 149 | part0 = cast(SingleQuoted, UP_part0)
|
| 150 | # TODO: SingleQuoted should have lazy (str? sval) field
|
| 151 | # This would only affect multi-line strings though?
|
| 152 | return part0.sval
|
| 153 |
|
| 154 | else:
|
| 155 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
| 156 | # might have "$@" and such
|
| 157 | return None
|
| 158 |
|
| 159 |
|
| 160 | def StaticEval(UP_w):
|
| 161 | # type: (word_t) -> Tuple[bool, str, bool]
|
| 162 | """Evaluate a Compound at PARSE TIME."""
|
| 163 | quoted = False
|
| 164 |
|
| 165 | # e.g. for ( instead of for (( is a token word
|
| 166 | if UP_w.tag() != word_e.Compound:
|
| 167 | return False, '', quoted
|
| 168 |
|
| 169 | w = cast(CompoundWord, UP_w)
|
| 170 |
|
| 171 | strs = [] # type: List[str]
|
| 172 | for part in w.parts:
|
| 173 | ok, s, q = _EvalWordPart(part)
|
| 174 | if not ok:
|
| 175 | return False, '', quoted
|
| 176 | if q:
|
| 177 | quoted = True # at least one part was quoted
|
| 178 | strs.append(s)
|
| 179 | #log('StaticEval parts %s', w.parts)
|
| 180 | return True, ''.join(strs), quoted
|
| 181 |
|
| 182 |
|
| 183 | # From bash, general.c, unquoted_tilde_word():
|
| 184 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
| 185 | # the beginning of the word, followed by all of the characters preceding the
|
| 186 | # first unquoted slash in the word, or all the characters in the word if there
|
| 187 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
| 188 | # characters in the tilde-prefix following the tilde shell be treated as a
|
| 189 | # possible login name.
|
| 190 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
| 191 | #
|
| 192 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
| 193 | # substitutions.
|
| 194 | #
|
| 195 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
| 196 |
|
| 197 |
|
| 198 | def TildeDetect(UP_w):
|
| 199 | # type: (word_t) -> Optional[CompoundWord]
|
| 200 | """Detect tilde expansion in a word.
|
| 201 |
|
| 202 | It might begin with Literal that needs to be turned into a TildeSub.
|
| 203 | (It depends on whether the second token begins with slash).
|
| 204 |
|
| 205 | If so, it return a new word. Otherwise return None.
|
| 206 |
|
| 207 | NOTE:
|
| 208 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
| 209 | conservative, like Lit_Chars without the /.
|
| 210 | - It's possible to write this in a mutating style, since only the first token
|
| 211 | is changed. But note that we CANNOT know this during lexing.
|
| 212 | """
|
| 213 | # BracedTree can't be tilde expanded
|
| 214 | if UP_w.tag() != word_e.Compound:
|
| 215 | return None
|
| 216 |
|
| 217 | w = cast(CompoundWord, UP_w)
|
| 218 | return TildeDetect2(w)
|
| 219 |
|
| 220 |
|
| 221 | def TildeDetect2(w):
|
| 222 | # type: (CompoundWord) -> Optional[CompoundWord]
|
| 223 | """If tilde sub is detected, returns a new CompoundWord.
|
| 224 |
|
| 225 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
| 226 | List[CompoundWord].
|
| 227 |
|
| 228 | Tilde detection:
|
| 229 |
|
| 230 | YES:
|
| 231 | ~ ~/
|
| 232 | ~bob ~bob/
|
| 233 |
|
| 234 | NO:
|
| 235 | ~bob# ~bob#/
|
| 236 | ~bob$x
|
| 237 | ~$x
|
| 238 |
|
| 239 | Pattern to match (all must be word_part_e.Literal):
|
| 240 |
|
| 241 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
| 242 | """
|
| 243 | if len(w.parts) == 0: # ${a-} has no parts
|
| 244 | return None
|
| 245 |
|
| 246 | part0 = w.parts[0]
|
| 247 | id0 = LiteralId(part0)
|
| 248 | if id0 != Id.Lit_Tilde:
|
| 249 | return None # $x is not TildeSub
|
| 250 |
|
| 251 | tok0 = cast(Token, part0)
|
| 252 |
|
| 253 | new_parts = [] # type: List[word_part_t]
|
| 254 |
|
| 255 | if len(w.parts) == 1: # ~
|
| 256 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 257 | return CompoundWord(new_parts)
|
| 258 |
|
| 259 | id1 = LiteralId(w.parts[1])
|
| 260 | if id1 == Id.Lit_Slash: # ~/
|
| 261 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 262 | new_parts.extend(w.parts[1:])
|
| 263 | return CompoundWord(new_parts)
|
| 264 |
|
| 265 | if id1 != Id.Lit_Chars:
|
| 266 | return None # ~$x is not TildeSub
|
| 267 |
|
| 268 | tok1 = cast(Token, w.parts[1])
|
| 269 |
|
| 270 | if len(w.parts) == 2: # ~foo
|
| 271 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 272 | return CompoundWord(new_parts)
|
| 273 |
|
| 274 | id2 = LiteralId(w.parts[2])
|
| 275 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
| 276 | return None
|
| 277 |
|
| 278 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 279 | new_parts.extend(w.parts[2:])
|
| 280 | return CompoundWord(new_parts)
|
| 281 |
|
| 282 |
|
| 283 | def TildeDetectAssign(w):
|
| 284 | # type: (CompoundWord) -> None
|
| 285 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
| 286 |
|
| 287 | MUTATES its argument.
|
| 288 |
|
| 289 | Pattern for to match (all must be word_part_e.Literal):
|
| 290 |
|
| 291 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
| 292 | """
|
| 293 | parts = w.parts
|
| 294 |
|
| 295 | # Bail out EARLY if there are no ~ at all
|
| 296 | has_tilde = False
|
| 297 | for part in parts:
|
| 298 | if LiteralId(part) == Id.Lit_Tilde:
|
| 299 | has_tilde = True
|
| 300 | break
|
| 301 | if not has_tilde:
|
| 302 | return # Avoid further work and allocations
|
| 303 |
|
| 304 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
| 305 | parts.append(None)
|
| 306 | parts.append(None)
|
| 307 |
|
| 308 | new_parts = [] # type: List[word_part_t]
|
| 309 |
|
| 310 | tilde_could_be_next = True # true at first, and true after :
|
| 311 |
|
| 312 | i = 0
|
| 313 | n = len(parts)
|
| 314 |
|
| 315 | while i < n:
|
| 316 | part0 = parts[i]
|
| 317 | if part0 is None:
|
| 318 | break
|
| 319 |
|
| 320 | #log('i = %d', i)
|
| 321 | #log('part0 %s', part0)
|
| 322 |
|
| 323 | # Skip tilde in middle of word, like a=foo~bar
|
| 324 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
| 325 | # If ~ ends the string, we have
|
| 326 | part1 = parts[i + 1]
|
| 327 | part2 = parts[i + 2]
|
| 328 |
|
| 329 | tok0 = cast(Token, part0)
|
| 330 |
|
| 331 | if part1 is None: # x=foo:~
|
| 332 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 333 | break # at end
|
| 334 |
|
| 335 | id1 = LiteralId(part1)
|
| 336 |
|
| 337 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
| 338 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 339 | new_parts.append(part1)
|
| 340 | i += 2
|
| 341 | continue
|
| 342 |
|
| 343 | if id1 != Id.Lit_Chars:
|
| 344 | new_parts.append(part0) # unchanged
|
| 345 | new_parts.append(part1) # ...
|
| 346 | i += 2
|
| 347 | continue # x=foo:~$x is not tilde sub
|
| 348 |
|
| 349 | tok1 = cast(Token, part1)
|
| 350 |
|
| 351 | if part2 is None: # x=foo:~foo
|
| 352 | # consume both
|
| 353 | new_parts.append(
|
| 354 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 355 | break # at end
|
| 356 |
|
| 357 | id2 = LiteralId(part2)
|
| 358 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
| 359 | new_parts.append(part0) # unchanged
|
| 360 | new_parts.append(part1) # ...
|
| 361 | new_parts.append(part2) # ...
|
| 362 | i += 3
|
| 363 | continue
|
| 364 |
|
| 365 | new_parts.append(
|
| 366 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 367 | new_parts.append(part2)
|
| 368 | i += 3
|
| 369 |
|
| 370 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
| 371 |
|
| 372 | else:
|
| 373 | new_parts.append(part0)
|
| 374 | i += 1
|
| 375 |
|
| 376 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
| 377 |
|
| 378 | parts.pop()
|
| 379 | parts.pop()
|
| 380 |
|
| 381 | # Mutate argument
|
| 382 | w.parts = new_parts
|
| 383 |
|
| 384 |
|
| 385 | def TildeDetectAll(words):
|
| 386 | # type: (List[word_t]) -> List[word_t]
|
| 387 | out = [] # type: List[word_t]
|
| 388 | for w in words:
|
| 389 | t = TildeDetect(w)
|
| 390 | if t:
|
| 391 | out.append(t)
|
| 392 | else:
|
| 393 | out.append(w)
|
| 394 | return out
|
| 395 |
|
| 396 |
|
| 397 | def HasArrayPart(w):
|
| 398 | # type: (CompoundWord) -> bool
|
| 399 | """Used in cmd_parse."""
|
| 400 | for part in w.parts:
|
| 401 | if part.tag() == word_part_e.ShArrayLiteral:
|
| 402 | return True
|
| 403 | return False
|
| 404 |
|
| 405 |
|
| 406 | def ShFunctionName(w):
|
| 407 | # type: (CompoundWord) -> str
|
| 408 | """Returns a valid shell function name, or the empty string.
|
| 409 |
|
| 410 | TODO: Maybe use this regex to validate:
|
| 411 |
|
| 412 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
| 413 |
|
| 414 | Bash is very lenient, but that would disallow confusing characters, for
|
| 415 | better error messages on a[x]=(), etc.
|
| 416 | """
|
| 417 | ok, s, quoted = StaticEval(w)
|
| 418 | # Function names should not have quotes
|
| 419 | if not ok or quoted:
|
| 420 | return ''
|
| 421 | return s
|
| 422 |
|
| 423 |
|
| 424 | def LooksLikeArithVar(UP_w):
|
| 425 | # type: (word_t) -> Optional[Token]
|
| 426 | """Return a token if this word looks like an arith var.
|
| 427 |
|
| 428 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
| 429 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
| 430 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
| 431 | """
|
| 432 | if UP_w.tag() != word_e.Compound:
|
| 433 | return None
|
| 434 |
|
| 435 | w = cast(CompoundWord, UP_w)
|
| 436 | if len(w.parts) != 1:
|
| 437 | return None
|
| 438 |
|
| 439 | UP_part0 = w.parts[0]
|
| 440 | if LiteralId(UP_part0) != Id.Lit_ArithVarLike:
|
| 441 | return None
|
| 442 |
|
| 443 | return cast(Token, UP_part0)
|
| 444 |
|
| 445 |
|
| 446 | def IsVarLike(w):
|
| 447 | # type: (CompoundWord) -> bool
|
| 448 | """Tests whether a word looks like FOO=bar.
|
| 449 |
|
| 450 | This is a quick test for the command parser to distinguish:
|
| 451 |
|
| 452 | func() { echo hi; }
|
| 453 | func=(1 2 3)
|
| 454 | """
|
| 455 | if len(w.parts) == 0:
|
| 456 | return False
|
| 457 |
|
| 458 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
| 459 |
|
| 460 |
|
| 461 | def DetectShAssignment(w):
|
| 462 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
| 463 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
| 464 |
|
| 465 | Returns:
|
| 466 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
| 467 | # assignment
|
| 468 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
| 469 | part_offset # where to start the value word, 0 if not an assignment
|
| 470 |
|
| 471 | Cases:
|
| 472 |
|
| 473 | s=1
|
| 474 | s+=1
|
| 475 | s[x]=1
|
| 476 | s[x]+=1
|
| 477 |
|
| 478 | a=()
|
| 479 | a+=()
|
| 480 | a[x]=(
|
| 481 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
| 482 | # can't be nested.
|
| 483 | """
|
| 484 | no_token = None # type: Optional[Token]
|
| 485 |
|
| 486 | n = len(w.parts)
|
| 487 | if n == 0:
|
| 488 | return no_token, no_token, 0
|
| 489 |
|
| 490 | UP_part0 = w.parts[0]
|
| 491 | id0 = LiteralId(UP_part0)
|
| 492 | if id0 == Id.Lit_VarLike:
|
| 493 | tok = cast(Token, UP_part0)
|
| 494 | return tok, no_token, 1 # everything after first token is the value
|
| 495 |
|
| 496 | if id0 == Id.Lit_ArrayLhsOpen:
|
| 497 | tok0 = cast(Token, UP_part0)
|
| 498 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
| 499 | if n < 2:
|
| 500 | return no_token, no_token, 0
|
| 501 | for i in xrange(1, n):
|
| 502 | UP_part = w.parts[i]
|
| 503 | if LiteralId(UP_part) == Id.Lit_ArrayLhsClose:
|
| 504 | tok_close = cast(Token, UP_part)
|
| 505 | return tok0, tok_close, i + 1
|
| 506 |
|
| 507 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
| 508 | return no_token, no_token, 0
|
| 509 |
|
| 510 |
|
| 511 | def DetectAssocPair(w):
|
| 512 | # type: (CompoundWord) -> Optional[AssocPair]
|
| 513 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
| 514 |
|
| 515 | The key and the value are both strings. So we just pick out
|
| 516 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
| 517 | [k] syntax is only used for associative array literals, as opposed
|
| 518 | to indexed array literals.
|
| 519 | """
|
| 520 | parts = w.parts
|
| 521 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
| 522 | return None
|
| 523 |
|
| 524 | n = len(parts)
|
| 525 | for i in xrange(n):
|
| 526 | id_ = LiteralId(parts[i])
|
| 527 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
| 528 | # e.g. if we have [$x$y]=$a$b
|
| 529 | key = CompoundWord(parts[1:i]) # $x$y
|
| 530 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
| 531 |
|
| 532 | # Type-annotated intermediate value for mycpp translation
|
| 533 | return AssocPair(key, value)
|
| 534 |
|
| 535 | return None
|
| 536 |
|
| 537 |
|
| 538 | def IsControlFlow(w):
|
| 539 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
| 540 | """Tests if a word is a control flow word."""
|
| 541 | no_token = None # type: Optional[Token]
|
| 542 |
|
| 543 | if len(w.parts) != 1:
|
| 544 | return Kind.Undefined, no_token
|
| 545 |
|
| 546 | UP_part0 = w.parts[0]
|
| 547 | token_type = LiteralId(UP_part0)
|
| 548 | if token_type == Id.Undefined_Tok:
|
| 549 | return Kind.Undefined, no_token
|
| 550 |
|
| 551 | token_kind = consts.GetKind(token_type)
|
| 552 | if token_kind == Kind.ControlFlow:
|
| 553 | return token_kind, cast(Token, UP_part0)
|
| 554 |
|
| 555 | return Kind.Undefined, no_token
|
| 556 |
|
| 557 |
|
| 558 | def LiteralToken(UP_w):
|
| 559 | # type: (word_t) -> Optional[Token]
|
| 560 | """If a word consists of a literal token, return it.
|
| 561 |
|
| 562 | Otherwise return None.
|
| 563 | """
|
| 564 | # We're casting here because this function is called by the CommandParser for
|
| 565 | # var, setvar, '...', etc. It's easier to cast in one place.
|
| 566 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 567 | w = cast(CompoundWord, UP_w)
|
| 568 |
|
| 569 | if len(w.parts) != 1:
|
| 570 | return None
|
| 571 |
|
| 572 | part0 = w.parts[0]
|
| 573 | if part0.tag() == word_part_e.Literal:
|
| 574 | return cast(Token, part0)
|
| 575 |
|
| 576 | return None
|
| 577 |
|
| 578 |
|
| 579 | def BraceToken(UP_w):
|
| 580 | # type: (word_t) -> Optional[Token]
|
| 581 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
| 582 |
|
| 583 | This is a special case for osh/cmd_parse.py
|
| 584 |
|
| 585 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
| 586 | may get a token, not a word.
|
| 587 | """
|
| 588 | with tagswitch(UP_w) as case:
|
| 589 | if case(word_e.Operator):
|
| 590 | tok = cast(Token, UP_w)
|
| 591 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
| 592 | return tok
|
| 593 |
|
| 594 | elif case(word_e.Compound):
|
| 595 | w = cast(CompoundWord, UP_w)
|
| 596 | return LiteralToken(w)
|
| 597 |
|
| 598 | else:
|
| 599 | raise AssertionError()
|
| 600 |
|
| 601 |
|
| 602 | def AsKeywordToken(UP_w):
|
| 603 | # type: (word_t) -> Token
|
| 604 | """Given a word that IS A CompoundWord containing just a keyword, return
|
| 605 | the single token at the start."""
|
| 606 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 607 | w = cast(CompoundWord, UP_w)
|
| 608 |
|
| 609 | part = w.parts[0]
|
| 610 | assert part.tag() == word_part_e.Literal, part
|
| 611 | tok = cast(Token, part)
|
| 612 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
| 613 | return tok
|
| 614 |
|
| 615 |
|
| 616 | def AsOperatorToken(word):
|
| 617 | # type: (word_t) -> Token
|
| 618 | """For a word that IS an operator (word.Token), return that token.
|
| 619 |
|
| 620 | This must only be called on a word which is known to be an operator
|
| 621 | (word.Token).
|
| 622 | """
|
| 623 | assert word.tag() == word_e.Operator, word
|
| 624 | return cast(Token, word)
|
| 625 |
|
| 626 |
|
| 627 | #
|
| 628 | # Polymorphic between Token and Compound
|
| 629 | #
|
| 630 |
|
| 631 |
|
| 632 | def ArithId(w):
|
| 633 | # type: (word_t) -> Id_t
|
| 634 | """Used by shell arithmetic parsing."""
|
| 635 | if w.tag() == word_e.Operator:
|
| 636 | tok = cast(Token, w)
|
| 637 | return tok.id
|
| 638 |
|
| 639 | assert isinstance(w, CompoundWord)
|
| 640 | return Id.Word_Compound
|
| 641 |
|
| 642 |
|
| 643 | def BoolId(w):
|
| 644 | # type: (word_t) -> Id_t
|
| 645 | UP_w = w
|
| 646 | with tagswitch(w) as case:
|
| 647 | if case(word_e.String): # for test/[
|
| 648 | w = cast(word.String, UP_w)
|
| 649 | return w.id
|
| 650 |
|
| 651 | elif case(word_e.Operator):
|
| 652 | tok = cast(Token, UP_w)
|
| 653 | return tok.id
|
| 654 |
|
| 655 | elif case(word_e.Compound):
|
| 656 | w = cast(CompoundWord, UP_w)
|
| 657 |
|
| 658 | if len(w.parts) != 1:
|
| 659 | return Id.Word_Compound
|
| 660 |
|
| 661 | token_type = LiteralId(w.parts[0])
|
| 662 | if token_type == Id.Undefined_Tok:
|
| 663 | return Id.Word_Compound # It's a regular word
|
| 664 |
|
| 665 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
| 666 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
| 667 | return token_type # special boolean "tokens"
|
| 668 |
|
| 669 | token_kind = consts.GetKind(token_type)
|
| 670 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
| 671 | return token_type # boolean operators
|
| 672 |
|
| 673 | return Id.Word_Compound
|
| 674 |
|
| 675 | else:
|
| 676 | # I think Empty never happens in this context?
|
| 677 | raise AssertionError(w.tag())
|
| 678 |
|
| 679 |
|
| 680 | def CommandId(w):
|
| 681 | # type: (word_t) -> Id_t
|
| 682 | """Used by CommandParser."""
|
| 683 | UP_w = w
|
| 684 | with tagswitch(w) as case:
|
| 685 | if case(word_e.Operator):
|
| 686 | tok = cast(Token, UP_w)
|
| 687 | return tok.id
|
| 688 |
|
| 689 | elif case(word_e.Compound):
|
| 690 | w = cast(CompoundWord, UP_w)
|
| 691 |
|
| 692 | # Fine-grained categorization of SINGLE literal parts
|
| 693 | if len(w.parts) != 1:
|
| 694 | return Id.Word_Compound # generic word
|
| 695 |
|
| 696 | token_type = LiteralId(w.parts[0])
|
| 697 | if token_type == Id.Undefined_Tok:
|
| 698 | return Id.Word_Compound # Not Kind.Lit, generic word
|
| 699 |
|
| 700 | if token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
| 701 | Id.Lit_TDot):
|
| 702 | # - { } are for YSH braces
|
| 703 | # - = is for the = keyword
|
| 704 | # - ... is to start multiline mode
|
| 705 | #
|
| 706 | # TODO: Should we use Op_{LBrace,RBrace} and Kind.Op when
|
| 707 | # parse_brace? Lit_Equals could be KW_Equals?
|
| 708 | return token_type
|
| 709 |
|
| 710 | token_kind = consts.GetKind(token_type)
|
| 711 | if token_kind == Kind.KW:
|
| 712 | return token_type # Id.KW_Var, etc.
|
| 713 |
|
| 714 | return Id.Word_Compound # generic word
|
| 715 |
|
| 716 | else:
|
| 717 | raise AssertionError(w.tag())
|
| 718 |
|
| 719 |
|
| 720 | def CommandKind(w):
|
| 721 | # type: (word_t) -> Kind_t
|
| 722 | """The CommandKind is for coarse-grained decisions in the CommandParser.
|
| 723 |
|
| 724 | NOTE: This is inconsistent with CommandId(), because we never return
|
| 725 | Kind.KW or Kind.Lit. But the CommandParser is easier to write this way.
|
| 726 |
|
| 727 | For example, these are valid redirects to a Kind.Word, and the parser
|
| 728 | checks:
|
| 729 |
|
| 730 | echo hi > =
|
| 731 | echo hi > {
|
| 732 |
|
| 733 | Invalid:
|
| 734 | echo hi > (
|
| 735 | echo hi > ;
|
| 736 | """
|
| 737 | if w.tag() == word_e.Operator:
|
| 738 | tok = cast(Token, w)
|
| 739 | # CommandParser uses Kind.Redir, Kind.Op, Kind.Eof, etc.
|
| 740 | return consts.GetKind(tok.id)
|
| 741 |
|
| 742 | return Kind.Word
|
| 743 |
|
| 744 |
|
| 745 | # Stubs for converting RHS of assignment to expression mode.
|
| 746 | # For osh2oil.py
|
| 747 | def IsVarSub(w):
|
| 748 | # type: (word_t) -> bool
|
| 749 | """Return whether it's any var sub, or a double quoted one."""
|
| 750 | return False
|
| 751 |
|
| 752 |
|
| 753 | # Doesn't translate with mycpp because of dynamic %
|
| 754 | def ErrorWord(error_str):
|
| 755 | # type: (str) -> CompoundWord
|
| 756 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
| 757 | return CompoundWord([t])
|
| 758 |
|
| 759 |
|
| 760 | def Pretty(w):
|
| 761 | # type: (word_t) -> str
|
| 762 | """Return a string to display to the user."""
|
| 763 | UP_w = w
|
| 764 | if w.tag() == word_e.String:
|
| 765 | w = cast(word.String, UP_w)
|
| 766 | if w.id == Id.Eof_Real:
|
| 767 | return 'EOF'
|
| 768 | else:
|
| 769 | return repr(w.s)
|
| 770 | else:
|
| 771 | return word_str(w.tag()) # tag name
|
| 772 |
|
| 773 |
|
| 774 | class ctx_EmitDocToken(object):
|
| 775 | """For doc comments."""
|
| 776 |
|
| 777 | def __init__(self, w_parser):
|
| 778 | # type: (WordParser) -> None
|
| 779 | w_parser.EmitDocToken(True)
|
| 780 | self.w_parser = w_parser
|
| 781 |
|
| 782 | def __enter__(self):
|
| 783 | # type: () -> None
|
| 784 | pass
|
| 785 |
|
| 786 | def __exit__(self, type, value, traceback):
|
| 787 | # type: (Any, Any, Any) -> None
|
| 788 | self.w_parser.EmitDocToken(False)
|
| 789 |
|
| 790 |
|
| 791 | class ctx_Multiline(object):
|
| 792 | """For multiline commands."""
|
| 793 |
|
| 794 | def __init__(self, w_parser):
|
| 795 | # type: (WordParser) -> None
|
| 796 | w_parser.Multiline(True)
|
| 797 | self.w_parser = w_parser
|
| 798 |
|
| 799 | def __enter__(self):
|
| 800 | # type: () -> None
|
| 801 | pass
|
| 802 |
|
| 803 | def __exit__(self, type, value, traceback):
|
| 804 | # type: (Any, Any, Any) -> None
|
| 805 | self.w_parser.Multiline(False)
|