1 | """
|
2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
3 | """
|
4 |
|
5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
6 | from _devbuild.gen.syntax_asdl import (
|
7 | Token,
|
8 | CompoundWord,
|
9 | DoubleQuoted,
|
10 | SingleQuoted,
|
11 | word,
|
12 | word_e,
|
13 | word_t,
|
14 | word_str,
|
15 | word_part,
|
16 | word_part_t,
|
17 | word_part_e,
|
18 | AssocPair,
|
19 | )
|
20 | from frontend import consts
|
21 | from frontend import lexer
|
22 | from mycpp import mylib
|
23 | from mycpp.mylib import tagswitch, log
|
24 |
|
25 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
26 | if TYPE_CHECKING:
|
27 | from osh.word_parse import WordParser
|
28 |
|
29 | _ = log
|
30 |
|
31 |
|
32 | def LiteralId(p):
|
33 | # type: (word_part_t) -> Id_t
|
34 | """If the WordPart consists of a single literal token, return its Id.
|
35 |
|
36 | Used for Id.KW_For, or Id.RBrace, etc.
|
37 | """
|
38 | UP_part = p
|
39 | if p.tag() == word_part_e.Literal:
|
40 | return cast(Token, UP_part).id
|
41 | else:
|
42 | return Id.Undefined_Tok # unequal to any other Id
|
43 |
|
44 |
|
45 | def _EvalWordPart(part):
|
46 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
47 | """Evaluate a WordPart at PARSE TIME.
|
48 |
|
49 | Used for:
|
50 |
|
51 | 1. here doc delimiters
|
52 | 2. function names
|
53 | 3. for loop variable names
|
54 | 4. Compiling constant regex words at parse time
|
55 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
56 | pattern.
|
57 |
|
58 | Returns:
|
59 | 3-tuple of
|
60 | ok: bool, success. If there are parts that can't be statically
|
61 | evaluated, then we return false.
|
62 | value: a string (not Value)
|
63 | quoted: whether any part of the word was quoted
|
64 | """
|
65 | UP_part = part
|
66 | with tagswitch(part) as case:
|
67 | if case(word_part_e.Literal):
|
68 | tok = cast(Token, UP_part)
|
69 | # Weird performance issue: if we change this to lexer.LazyStr(),
|
70 | # the parser slows down, e.g. on configure-coreutils from 805 B
|
71 | # irefs to ~830 B. The real issue is that we should avoid calling
|
72 | # this from CommandParser - for the Hay node.
|
73 | return True, lexer.TokenVal(tok), False
|
74 | #return True, lexer.LazyStr(tok), False
|
75 |
|
76 | elif case(word_part_e.EscapedLiteral):
|
77 | part = cast(word_part.EscapedLiteral, UP_part)
|
78 | if mylib.PYTHON:
|
79 | val = lexer.TokenVal(part.token)
|
80 | assert len(val) == 2, val # e.g. \*
|
81 | assert val[0] == '\\'
|
82 | s = lexer.TokenSliceLeft(part.token, 1)
|
83 | return True, s, True
|
84 |
|
85 | elif case(word_part_e.SingleQuoted):
|
86 | part = cast(SingleQuoted, UP_part)
|
87 | return True, part.sval, True
|
88 |
|
89 | elif case(word_part_e.DoubleQuoted):
|
90 | part = cast(DoubleQuoted, UP_part)
|
91 | strs = [] # type: List[str]
|
92 | for p in part.parts:
|
93 | ok, s, _ = _EvalWordPart(p)
|
94 | if not ok:
|
95 | return False, '', True
|
96 | strs.append(s)
|
97 |
|
98 | return True, ''.join(strs), True # At least one part was quoted!
|
99 |
|
100 | elif case(word_part_e.ShArrayLiteral, word_part_e.BashAssocLiteral,
|
101 | word_part_e.ZshVarSub, word_part_e.CommandSub,
|
102 | word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
|
103 | word_part_e.TildeSub, word_part_e.ArithSub,
|
104 | word_part_e.ExtGlob, word_part_e.Splice,
|
105 | word_part_e.ExprSub):
|
106 | return False, '', False
|
107 |
|
108 | else:
|
109 | raise AssertionError(part.tag())
|
110 |
|
111 |
|
112 | def FastStrEval(w):
|
113 | # type: (CompoundWord) -> Optional[str]
|
114 | """
|
115 | Detects common case
|
116 |
|
117 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
118 | For echo -e, test x -lt 0, etc.
|
119 | (2) single quoted word like 'foo'
|
120 |
|
121 | Other patterns we could detect are:
|
122 | (1) "foo"
|
123 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
124 | - I think val_ops.Stringify() can handle all the errors
|
125 | """
|
126 | if len(w.parts) != 1:
|
127 | return None
|
128 |
|
129 | part0 = w.parts[0]
|
130 | UP_part0 = part0
|
131 | with tagswitch(part0) as case:
|
132 | if case(word_part_e.Literal):
|
133 | part0 = cast(Token, UP_part0)
|
134 |
|
135 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
136 | # Could add more tokens in this case
|
137 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
138 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
139 | # know those are common
|
140 | # { } are not as common
|
141 | return lexer.LazyStr(part0)
|
142 |
|
143 | else:
|
144 | # e.g. Id.Lit_Star needs to be glob expanded
|
145 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
146 | return None
|
147 |
|
148 | elif case(word_part_e.SingleQuoted):
|
149 | part0 = cast(SingleQuoted, UP_part0)
|
150 | # TODO: SingleQuoted should have lazy (str? sval) field
|
151 | # This would only affect multi-line strings though?
|
152 | return part0.sval
|
153 |
|
154 | else:
|
155 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
156 | # might have "$@" and such
|
157 | return None
|
158 |
|
159 |
|
160 | def StaticEval(UP_w):
|
161 | # type: (word_t) -> Tuple[bool, str, bool]
|
162 | """Evaluate a Compound at PARSE TIME."""
|
163 | quoted = False
|
164 |
|
165 | # e.g. for ( instead of for (( is a token word
|
166 | if UP_w.tag() != word_e.Compound:
|
167 | return False, '', quoted
|
168 |
|
169 | w = cast(CompoundWord, UP_w)
|
170 |
|
171 | strs = [] # type: List[str]
|
172 | for part in w.parts:
|
173 | ok, s, q = _EvalWordPart(part)
|
174 | if not ok:
|
175 | return False, '', quoted
|
176 | if q:
|
177 | quoted = True # at least one part was quoted
|
178 | strs.append(s)
|
179 | #log('StaticEval parts %s', w.parts)
|
180 | return True, ''.join(strs), quoted
|
181 |
|
182 |
|
183 | # From bash, general.c, unquoted_tilde_word():
|
184 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
185 | # the beginning of the word, followed by all of the characters preceding the
|
186 | # first unquoted slash in the word, or all the characters in the word if there
|
187 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
188 | # characters in the tilde-prefix following the tilde shell be treated as a
|
189 | # possible login name.
|
190 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
191 | #
|
192 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
193 | # substitutions.
|
194 | #
|
195 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
196 |
|
197 |
|
198 | def TildeDetect(UP_w):
|
199 | # type: (word_t) -> Optional[CompoundWord]
|
200 | """Detect tilde expansion in a word.
|
201 |
|
202 | It might begin with Literal that needs to be turned into a TildeSub.
|
203 | (It depends on whether the second token begins with slash).
|
204 |
|
205 | If so, it return a new word. Otherwise return None.
|
206 |
|
207 | NOTE:
|
208 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
209 | conservative, like Lit_Chars without the /.
|
210 | - It's possible to write this in a mutating style, since only the first token
|
211 | is changed. But note that we CANNOT know this during lexing.
|
212 | """
|
213 | # BracedTree can't be tilde expanded
|
214 | if UP_w.tag() != word_e.Compound:
|
215 | return None
|
216 |
|
217 | w = cast(CompoundWord, UP_w)
|
218 | return TildeDetect2(w)
|
219 |
|
220 |
|
221 | def TildeDetect2(w):
|
222 | # type: (CompoundWord) -> Optional[CompoundWord]
|
223 | """If tilde sub is detected, returns a new CompoundWord.
|
224 |
|
225 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
226 | List[CompoundWord].
|
227 |
|
228 | Tilde detection:
|
229 |
|
230 | YES:
|
231 | ~ ~/
|
232 | ~bob ~bob/
|
233 |
|
234 | NO:
|
235 | ~bob# ~bob#/
|
236 | ~bob$x
|
237 | ~$x
|
238 |
|
239 | Pattern to match (all must be word_part_e.Literal):
|
240 |
|
241 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
242 | """
|
243 | if len(w.parts) == 0: # ${a-} has no parts
|
244 | return None
|
245 |
|
246 | part0 = w.parts[0]
|
247 | id0 = LiteralId(part0)
|
248 | if id0 != Id.Lit_Tilde:
|
249 | return None # $x is not TildeSub
|
250 |
|
251 | tok0 = cast(Token, part0)
|
252 |
|
253 | new_parts = [] # type: List[word_part_t]
|
254 |
|
255 | if len(w.parts) == 1: # ~
|
256 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
257 | return CompoundWord(new_parts)
|
258 |
|
259 | id1 = LiteralId(w.parts[1])
|
260 | if id1 == Id.Lit_Slash: # ~/
|
261 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
262 | new_parts.extend(w.parts[1:])
|
263 | return CompoundWord(new_parts)
|
264 |
|
265 | if id1 != Id.Lit_Chars:
|
266 | return None # ~$x is not TildeSub
|
267 |
|
268 | tok1 = cast(Token, w.parts[1])
|
269 |
|
270 | if len(w.parts) == 2: # ~foo
|
271 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
272 | return CompoundWord(new_parts)
|
273 |
|
274 | id2 = LiteralId(w.parts[2])
|
275 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
276 | return None
|
277 |
|
278 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
279 | new_parts.extend(w.parts[2:])
|
280 | return CompoundWord(new_parts)
|
281 |
|
282 |
|
283 | def TildeDetectAssign(w):
|
284 | # type: (CompoundWord) -> None
|
285 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
286 |
|
287 | MUTATES its argument.
|
288 |
|
289 | Pattern for to match (all must be word_part_e.Literal):
|
290 |
|
291 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
292 | """
|
293 | parts = w.parts
|
294 |
|
295 | # Bail out EARLY if there are no ~ at all
|
296 | has_tilde = False
|
297 | for part in parts:
|
298 | if LiteralId(part) == Id.Lit_Tilde:
|
299 | has_tilde = True
|
300 | break
|
301 | if not has_tilde:
|
302 | return # Avoid further work and allocations
|
303 |
|
304 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
305 | parts.append(None)
|
306 | parts.append(None)
|
307 |
|
308 | new_parts = [] # type: List[word_part_t]
|
309 |
|
310 | tilde_could_be_next = True # true at first, and true after :
|
311 |
|
312 | i = 0
|
313 | n = len(parts)
|
314 |
|
315 | while i < n:
|
316 | part0 = parts[i]
|
317 | if part0 is None:
|
318 | break
|
319 |
|
320 | #log('i = %d', i)
|
321 | #log('part0 %s', part0)
|
322 |
|
323 | # Skip tilde in middle of word, like a=foo~bar
|
324 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
325 | # If ~ ends the string, we have
|
326 | part1 = parts[i + 1]
|
327 | part2 = parts[i + 2]
|
328 |
|
329 | tok0 = cast(Token, part0)
|
330 |
|
331 | if part1 is None: # x=foo:~
|
332 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
333 | break # at end
|
334 |
|
335 | id1 = LiteralId(part1)
|
336 |
|
337 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
338 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
339 | new_parts.append(part1)
|
340 | i += 2
|
341 | continue
|
342 |
|
343 | if id1 != Id.Lit_Chars:
|
344 | new_parts.append(part0) # unchanged
|
345 | new_parts.append(part1) # ...
|
346 | i += 2
|
347 | continue # x=foo:~$x is not tilde sub
|
348 |
|
349 | tok1 = cast(Token, part1)
|
350 |
|
351 | if part2 is None: # x=foo:~foo
|
352 | # consume both
|
353 | new_parts.append(
|
354 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
355 | break # at end
|
356 |
|
357 | id2 = LiteralId(part2)
|
358 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
359 | new_parts.append(part0) # unchanged
|
360 | new_parts.append(part1) # ...
|
361 | new_parts.append(part2) # ...
|
362 | i += 3
|
363 | continue
|
364 |
|
365 | new_parts.append(
|
366 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
367 | new_parts.append(part2)
|
368 | i += 3
|
369 |
|
370 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
371 |
|
372 | else:
|
373 | new_parts.append(part0)
|
374 | i += 1
|
375 |
|
376 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
377 |
|
378 | parts.pop()
|
379 | parts.pop()
|
380 |
|
381 | # Mutate argument
|
382 | w.parts = new_parts
|
383 |
|
384 |
|
385 | def TildeDetectAll(words):
|
386 | # type: (List[word_t]) -> List[word_t]
|
387 | out = [] # type: List[word_t]
|
388 | for w in words:
|
389 | t = TildeDetect(w)
|
390 | if t:
|
391 | out.append(t)
|
392 | else:
|
393 | out.append(w)
|
394 | return out
|
395 |
|
396 |
|
397 | def HasArrayPart(w):
|
398 | # type: (CompoundWord) -> bool
|
399 | """Used in cmd_parse."""
|
400 | for part in w.parts:
|
401 | if part.tag() == word_part_e.ShArrayLiteral:
|
402 | return True
|
403 | return False
|
404 |
|
405 |
|
406 | def ShFunctionName(w):
|
407 | # type: (CompoundWord) -> str
|
408 | """Returns a valid shell function name, or the empty string.
|
409 |
|
410 | TODO: Maybe use this regex to validate:
|
411 |
|
412 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
413 |
|
414 | Bash is very lenient, but that would disallow confusing characters, for
|
415 | better error messages on a[x]=(), etc.
|
416 | """
|
417 | ok, s, quoted = StaticEval(w)
|
418 | # Function names should not have quotes
|
419 | if not ok or quoted:
|
420 | return ''
|
421 | return s
|
422 |
|
423 |
|
424 | def LooksLikeArithVar(UP_w):
|
425 | # type: (word_t) -> Optional[Token]
|
426 | """Return a token if this word looks like an arith var.
|
427 |
|
428 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
429 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
430 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
431 | """
|
432 | if UP_w.tag() != word_e.Compound:
|
433 | return None
|
434 |
|
435 | w = cast(CompoundWord, UP_w)
|
436 | if len(w.parts) != 1:
|
437 | return None
|
438 |
|
439 | UP_part0 = w.parts[0]
|
440 | if LiteralId(UP_part0) != Id.Lit_ArithVarLike:
|
441 | return None
|
442 |
|
443 | return cast(Token, UP_part0)
|
444 |
|
445 |
|
446 | def IsVarLike(w):
|
447 | # type: (CompoundWord) -> bool
|
448 | """Tests whether a word looks like FOO=bar.
|
449 |
|
450 | This is a quick test for the command parser to distinguish:
|
451 |
|
452 | func() { echo hi; }
|
453 | func=(1 2 3)
|
454 | """
|
455 | if len(w.parts) == 0:
|
456 | return False
|
457 |
|
458 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
459 |
|
460 |
|
461 | def DetectShAssignment(w):
|
462 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
463 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
464 |
|
465 | Returns:
|
466 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
467 | # assignment
|
468 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
469 | part_offset # where to start the value word, 0 if not an assignment
|
470 |
|
471 | Cases:
|
472 |
|
473 | s=1
|
474 | s+=1
|
475 | s[x]=1
|
476 | s[x]+=1
|
477 |
|
478 | a=()
|
479 | a+=()
|
480 | a[x]=(
|
481 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
482 | # can't be nested.
|
483 | """
|
484 | no_token = None # type: Optional[Token]
|
485 |
|
486 | n = len(w.parts)
|
487 | if n == 0:
|
488 | return no_token, no_token, 0
|
489 |
|
490 | UP_part0 = w.parts[0]
|
491 | id0 = LiteralId(UP_part0)
|
492 | if id0 == Id.Lit_VarLike:
|
493 | tok = cast(Token, UP_part0)
|
494 | return tok, no_token, 1 # everything after first token is the value
|
495 |
|
496 | if id0 == Id.Lit_ArrayLhsOpen:
|
497 | tok0 = cast(Token, UP_part0)
|
498 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
499 | if n < 2:
|
500 | return no_token, no_token, 0
|
501 | for i in xrange(1, n):
|
502 | UP_part = w.parts[i]
|
503 | if LiteralId(UP_part) == Id.Lit_ArrayLhsClose:
|
504 | tok_close = cast(Token, UP_part)
|
505 | return tok0, tok_close, i + 1
|
506 |
|
507 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
508 | return no_token, no_token, 0
|
509 |
|
510 |
|
511 | def DetectAssocPair(w):
|
512 | # type: (CompoundWord) -> Optional[AssocPair]
|
513 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
514 |
|
515 | The key and the value are both strings. So we just pick out
|
516 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
517 | [k] syntax is only used for associative array literals, as opposed
|
518 | to indexed array literals.
|
519 | """
|
520 | parts = w.parts
|
521 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
522 | return None
|
523 |
|
524 | n = len(parts)
|
525 | for i in xrange(n):
|
526 | id_ = LiteralId(parts[i])
|
527 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
528 | # e.g. if we have [$x$y]=$a$b
|
529 | key = CompoundWord(parts[1:i]) # $x$y
|
530 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
531 |
|
532 | # Type-annotated intermediate value for mycpp translation
|
533 | return AssocPair(key, value)
|
534 |
|
535 | return None
|
536 |
|
537 |
|
538 | def IsControlFlow(w):
|
539 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
540 | """Tests if a word is a control flow word."""
|
541 | no_token = None # type: Optional[Token]
|
542 |
|
543 | if len(w.parts) != 1:
|
544 | return Kind.Undefined, no_token
|
545 |
|
546 | UP_part0 = w.parts[0]
|
547 | token_type = LiteralId(UP_part0)
|
548 | if token_type == Id.Undefined_Tok:
|
549 | return Kind.Undefined, no_token
|
550 |
|
551 | token_kind = consts.GetKind(token_type)
|
552 | if token_kind == Kind.ControlFlow:
|
553 | return token_kind, cast(Token, UP_part0)
|
554 |
|
555 | return Kind.Undefined, no_token
|
556 |
|
557 |
|
558 | def LiteralToken(UP_w):
|
559 | # type: (word_t) -> Optional[Token]
|
560 | """If a word consists of a literal token, return it.
|
561 |
|
562 | Otherwise return None.
|
563 | """
|
564 | # We're casting here because this function is called by the CommandParser for
|
565 | # var, setvar, '...', etc. It's easier to cast in one place.
|
566 | assert UP_w.tag() == word_e.Compound, UP_w
|
567 | w = cast(CompoundWord, UP_w)
|
568 |
|
569 | if len(w.parts) != 1:
|
570 | return None
|
571 |
|
572 | part0 = w.parts[0]
|
573 | if part0.tag() == word_part_e.Literal:
|
574 | return cast(Token, part0)
|
575 |
|
576 | return None
|
577 |
|
578 |
|
579 | def BraceToken(UP_w):
|
580 | # type: (word_t) -> Optional[Token]
|
581 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
582 |
|
583 | This is a special case for osh/cmd_parse.py
|
584 |
|
585 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
586 | may get a token, not a word.
|
587 | """
|
588 | with tagswitch(UP_w) as case:
|
589 | if case(word_e.Operator):
|
590 | tok = cast(Token, UP_w)
|
591 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
592 | return tok
|
593 |
|
594 | elif case(word_e.Compound):
|
595 | w = cast(CompoundWord, UP_w)
|
596 | return LiteralToken(w)
|
597 |
|
598 | else:
|
599 | raise AssertionError()
|
600 |
|
601 |
|
602 | def AsKeywordToken(UP_w):
|
603 | # type: (word_t) -> Token
|
604 | """Given a word that IS A CompoundWord containing just a keyword, return
|
605 | the single token at the start."""
|
606 | assert UP_w.tag() == word_e.Compound, UP_w
|
607 | w = cast(CompoundWord, UP_w)
|
608 |
|
609 | part = w.parts[0]
|
610 | assert part.tag() == word_part_e.Literal, part
|
611 | tok = cast(Token, part)
|
612 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
613 | return tok
|
614 |
|
615 |
|
616 | def AsOperatorToken(word):
|
617 | # type: (word_t) -> Token
|
618 | """For a word that IS an operator (word.Token), return that token.
|
619 |
|
620 | This must only be called on a word which is known to be an operator
|
621 | (word.Token).
|
622 | """
|
623 | assert word.tag() == word_e.Operator, word
|
624 | return cast(Token, word)
|
625 |
|
626 |
|
627 | #
|
628 | # Polymorphic between Token and Compound
|
629 | #
|
630 |
|
631 |
|
632 | def ArithId(w):
|
633 | # type: (word_t) -> Id_t
|
634 | """Used by shell arithmetic parsing."""
|
635 | if w.tag() == word_e.Operator:
|
636 | tok = cast(Token, w)
|
637 | return tok.id
|
638 |
|
639 | assert isinstance(w, CompoundWord)
|
640 | return Id.Word_Compound
|
641 |
|
642 |
|
643 | def BoolId(w):
|
644 | # type: (word_t) -> Id_t
|
645 | UP_w = w
|
646 | with tagswitch(w) as case:
|
647 | if case(word_e.String): # for test/[
|
648 | w = cast(word.String, UP_w)
|
649 | return w.id
|
650 |
|
651 | elif case(word_e.Operator):
|
652 | tok = cast(Token, UP_w)
|
653 | return tok.id
|
654 |
|
655 | elif case(word_e.Compound):
|
656 | w = cast(CompoundWord, UP_w)
|
657 |
|
658 | if len(w.parts) != 1:
|
659 | return Id.Word_Compound
|
660 |
|
661 | token_type = LiteralId(w.parts[0])
|
662 | if token_type == Id.Undefined_Tok:
|
663 | return Id.Word_Compound # It's a regular word
|
664 |
|
665 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
666 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
667 | return token_type # special boolean "tokens"
|
668 |
|
669 | token_kind = consts.GetKind(token_type)
|
670 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
671 | return token_type # boolean operators
|
672 |
|
673 | return Id.Word_Compound
|
674 |
|
675 | else:
|
676 | # I think Empty never happens in this context?
|
677 | raise AssertionError(w.tag())
|
678 |
|
679 |
|
680 | def CommandId(w):
|
681 | # type: (word_t) -> Id_t
|
682 | """Used by CommandParser."""
|
683 | UP_w = w
|
684 | with tagswitch(w) as case:
|
685 | if case(word_e.Operator):
|
686 | tok = cast(Token, UP_w)
|
687 | return tok.id
|
688 |
|
689 | elif case(word_e.Compound):
|
690 | w = cast(CompoundWord, UP_w)
|
691 |
|
692 | # Fine-grained categorization of SINGLE literal parts
|
693 | if len(w.parts) != 1:
|
694 | return Id.Word_Compound # generic word
|
695 |
|
696 | token_type = LiteralId(w.parts[0])
|
697 | if token_type == Id.Undefined_Tok:
|
698 | return Id.Word_Compound # Not Kind.Lit, generic word
|
699 |
|
700 | if token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
701 | Id.Lit_TDot):
|
702 | # - { } are for YSH braces
|
703 | # - = is for the = keyword
|
704 | # - ... is to start multiline mode
|
705 | #
|
706 | # TODO: Should we use Op_{LBrace,RBrace} and Kind.Op when
|
707 | # parse_brace? Lit_Equals could be KW_Equals?
|
708 | return token_type
|
709 |
|
710 | token_kind = consts.GetKind(token_type)
|
711 | if token_kind == Kind.KW:
|
712 | return token_type # Id.KW_Var, etc.
|
713 |
|
714 | return Id.Word_Compound # generic word
|
715 |
|
716 | else:
|
717 | raise AssertionError(w.tag())
|
718 |
|
719 |
|
720 | def CommandKind(w):
|
721 | # type: (word_t) -> Kind_t
|
722 | """The CommandKind is for coarse-grained decisions in the CommandParser.
|
723 |
|
724 | NOTE: This is inconsistent with CommandId(), because we never return
|
725 | Kind.KW or Kind.Lit. But the CommandParser is easier to write this way.
|
726 |
|
727 | For example, these are valid redirects to a Kind.Word, and the parser
|
728 | checks:
|
729 |
|
730 | echo hi > =
|
731 | echo hi > {
|
732 |
|
733 | Invalid:
|
734 | echo hi > (
|
735 | echo hi > ;
|
736 | """
|
737 | if w.tag() == word_e.Operator:
|
738 | tok = cast(Token, w)
|
739 | # CommandParser uses Kind.Redir, Kind.Op, Kind.Eof, etc.
|
740 | return consts.GetKind(tok.id)
|
741 |
|
742 | return Kind.Word
|
743 |
|
744 |
|
745 | # Stubs for converting RHS of assignment to expression mode.
|
746 | # For osh2oil.py
|
747 | def IsVarSub(w):
|
748 | # type: (word_t) -> bool
|
749 | """Return whether it's any var sub, or a double quoted one."""
|
750 | return False
|
751 |
|
752 |
|
753 | # Doesn't translate with mycpp because of dynamic %
|
754 | def ErrorWord(error_str):
|
755 | # type: (str) -> CompoundWord
|
756 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
757 | return CompoundWord([t])
|
758 |
|
759 |
|
760 | def Pretty(w):
|
761 | # type: (word_t) -> str
|
762 | """Return a string to display to the user."""
|
763 | UP_w = w
|
764 | if w.tag() == word_e.String:
|
765 | w = cast(word.String, UP_w)
|
766 | if w.id == Id.Eof_Real:
|
767 | return 'EOF'
|
768 | else:
|
769 | return repr(w.s)
|
770 | else:
|
771 | return word_str(w.tag()) # tag name
|
772 |
|
773 |
|
774 | class ctx_EmitDocToken(object):
|
775 | """For doc comments."""
|
776 |
|
777 | def __init__(self, w_parser):
|
778 | # type: (WordParser) -> None
|
779 | w_parser.EmitDocToken(True)
|
780 | self.w_parser = w_parser
|
781 |
|
782 | def __enter__(self):
|
783 | # type: () -> None
|
784 | pass
|
785 |
|
786 | def __exit__(self, type, value, traceback):
|
787 | # type: (Any, Any, Any) -> None
|
788 | self.w_parser.EmitDocToken(False)
|
789 |
|
790 |
|
791 | class ctx_Multiline(object):
|
792 | """For multiline commands."""
|
793 |
|
794 | def __init__(self, w_parser):
|
795 | # type: (WordParser) -> None
|
796 | w_parser.Multiline(True)
|
797 | self.w_parser = w_parser
|
798 |
|
799 | def __enter__(self):
|
800 | # type: () -> None
|
801 | pass
|
802 |
|
803 | def __exit__(self, type, value, traceback):
|
804 | # type: (Any, Any, Any) -> None
|
805 | self.w_parser.Multiline(False)
|