| 1 | #!/usr/bin/env python2
 | 
| 2 | # Copyright 2016 Andy Chu. All rights reserved.
 | 
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 | 
| 4 | # you may not use this file except in compliance with the License.
 | 
| 5 | # You may obtain a copy of the License at
 | 
| 6 | #
 | 
| 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 | 
| 8 | """
 | 
| 9 | id_kind_def.py - Id and Kind definitions, stored in Token
 | 
| 10 | 
 | 
| 11 | NOTE: If this file changes, rebuild it with build/py.sh all
 | 
| 12 | """
 | 
| 13 | from __future__ import print_function
 | 
| 14 | 
 | 
| 15 | from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
 | 
| 16 | #from mycpp.mylib import log
 | 
| 17 | 
 | 
| 18 | from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
 | 
| 19 | if TYPE_CHECKING:  # avoid circular build deps
 | 
| 20 |     from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
 | 
| 21 | 
 | 
| 22 | 
 | 
| 23 | class IdSpec(object):
 | 
| 24 |     """Identifiers that form the "spine" of the shell program
 | 
| 25 |     representation."""
 | 
| 26 | 
 | 
| 27 |     def __init__(self, kind_lookup, bool_ops):
 | 
| 28 |         # type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
 | 
| 29 |         self.id_str2int = {}  # type: Dict[str, int]
 | 
| 30 |         self.kind_str2int = {}  # type: Dict[str, int]
 | 
| 31 | 
 | 
| 32 |         self.kind_lookup = kind_lookup  # Id int -> Kind int
 | 
| 33 |         self.kind_name_list = []  # type: List[str]
 | 
| 34 |         self.kind_sizes = []  # type: List[int]  # optional stats
 | 
| 35 | 
 | 
| 36 |         self.lexer_pairs = {}  # type: Dict[int, List[Tuple[bool, str, int]]]
 | 
| 37 |         self.bool_ops = bool_ops  # type: Dict[int, bool_arg_type_t]
 | 
| 38 | 
 | 
| 39 |         # Incremented on each method call
 | 
| 40 |         # IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
 | 
| 41 |         self.id_index = 1
 | 
| 42 |         self.kind_index = 1
 | 
| 43 | 
 | 
| 44 |     def LexerPairs(self, kind):
 | 
| 45 |         # type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
 | 
| 46 |         result = []
 | 
| 47 |         for is_regex, pat, id_ in self.lexer_pairs[kind]:
 | 
| 48 |             result.append((is_regex, pat, id_))
 | 
| 49 |         return result
 | 
| 50 | 
 | 
| 51 |     def _AddId(self, id_name, kind=None):
 | 
| 52 |         # type: (str, Optional[int]) -> int
 | 
| 53 |         """
 | 
| 54 |         Args:
 | 
| 55 |           id_name: e.g. BoolBinary_Equal
 | 
| 56 |           kind: override autoassignment.  For AddBoolBinaryForBuiltin
 | 
| 57 |         """
 | 
| 58 |         t = self.id_index
 | 
| 59 | 
 | 
| 60 |         self.id_str2int[id_name] = t
 | 
| 61 | 
 | 
| 62 |         if kind is None:
 | 
| 63 |             kind = self.kind_index
 | 
| 64 |         self.kind_lookup[t] = kind
 | 
| 65 | 
 | 
| 66 |         self.id_index += 1  # mutate last
 | 
| 67 |         return t  # the index we used
 | 
| 68 | 
 | 
| 69 |     def _AddKind(self, kind_name):
 | 
| 70 |         # type: (str) -> None
 | 
| 71 |         self.kind_str2int[kind_name] = self.kind_index
 | 
| 72 |         #log('%s = %d', kind_name, self.kind_index)
 | 
| 73 |         self.kind_index += 1
 | 
| 74 |         self.kind_name_list.append(kind_name)
 | 
| 75 | 
 | 
| 76 |     def AddKind(self, kind_name, tokens):
 | 
| 77 |         # type: (str, List[str]) -> None
 | 
| 78 |         assert isinstance(tokens, list), tokens
 | 
| 79 | 
 | 
| 80 |         for name in tokens:
 | 
| 81 |             id_name = '%s_%s' % (kind_name, name)
 | 
| 82 |             self._AddId(id_name)
 | 
| 83 | 
 | 
| 84 |         # Must be after adding Id
 | 
| 85 |         self._AddKind(kind_name)
 | 
| 86 |         self.kind_sizes.append(len(tokens))  # debug info
 | 
| 87 | 
 | 
| 88 |     def AddKindPairs(self, kind_name, pairs):
 | 
| 89 |         # type: (str, List[Tuple[str, str]]) -> None
 | 
| 90 |         assert isinstance(pairs, list), pairs
 | 
| 91 | 
 | 
| 92 |         lexer_pairs = []
 | 
| 93 |         for name, char_pat in pairs:
 | 
| 94 |             id_name = '%s_%s' % (kind_name, name)
 | 
| 95 |             id_int = self._AddId(id_name)
 | 
| 96 |             # After _AddId
 | 
| 97 |             lexer_pairs.append((False, char_pat, id_int))  # Constant
 | 
| 98 | 
 | 
| 99 |         self.lexer_pairs[self.kind_index] = lexer_pairs
 | 
| 100 | 
 | 
| 101 |         # Must be after adding Id
 | 
| 102 |         self._AddKind(kind_name)
 | 
| 103 |         self.kind_sizes.append(len(pairs))  # debug info
 | 
| 104 | 
 | 
| 105 |     def AddBoolKind(
 | 
| 106 |             self,
 | 
| 107 |             kind_name,  # type: str
 | 
| 108 |             arg_type_pairs,  # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
 | 
| 109 |     ):
 | 
| 110 |         # type: (...) -> None
 | 
| 111 |         """
 | 
| 112 |     Args:
 | 
| 113 |       kind_name: string
 | 
| 114 |       arg_type_pairs: dictionary of bool_arg_type_e -> []
 | 
| 115 |     """
 | 
| 116 |         lexer_pairs = []
 | 
| 117 |         num_tokens = 0
 | 
| 118 |         for arg_type, pairs in arg_type_pairs:
 | 
| 119 |             #print(arg_type, pairs)
 | 
| 120 | 
 | 
| 121 |             for name, char_pat in pairs:
 | 
| 122 |                 # BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
 | 
| 123 |                 id_name = '%s_%s' % (kind_name, name)
 | 
| 124 |                 id_int = self._AddId(id_name)
 | 
| 125 |                 self.AddBoolOp(id_int, arg_type)  # register type
 | 
| 126 |                 lexer_pairs.append((False, char_pat, id_int))  # constant
 | 
| 127 | 
 | 
| 128 |             num_tokens += len(pairs)
 | 
| 129 | 
 | 
| 130 |         self.lexer_pairs[self.kind_index] = lexer_pairs
 | 
| 131 | 
 | 
| 132 |         # Must do this after _AddId()
 | 
| 133 |         self._AddKind(kind_name)
 | 
| 134 |         self.kind_sizes.append(num_tokens)  # debug info
 | 
| 135 | 
 | 
| 136 |     def AddBoolBinaryForBuiltin(self, id_name, kind):
 | 
| 137 |         # type: (str, int) -> int
 | 
| 138 |         """For [ = ] [ == ] and [ != ].
 | 
| 139 | 
 | 
| 140 |     These operators are NOT added to the lexer.  The are "lexed" as
 | 
| 141 |     word.String.
 | 
| 142 |     """
 | 
| 143 |         id_name = 'BoolBinary_%s' % id_name
 | 
| 144 |         id_int = self._AddId(id_name, kind=kind)
 | 
| 145 |         self.AddBoolOp(id_int, bool_arg_type_e.Str)
 | 
| 146 |         return id_int
 | 
| 147 | 
 | 
| 148 |     def AddBoolOp(self, id_int, arg_type):
 | 
| 149 |         # type: (int, bool_arg_type_t) -> None
 | 
| 150 |         """Associate an ID integer with an bool_arg_type_e."""
 | 
| 151 |         self.bool_ops[id_int] = arg_type
 | 
| 152 | 
 | 
| 153 | 
 | 
| 154 | def AddKinds(spec):
 | 
| 155 |     # type: (IdSpec) -> None
 | 
| 156 | 
 | 
| 157 |     # A compound word, in arith context, boolean context, or command context.
 | 
| 158 |     # A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
 | 
| 159 |     spec.AddKind('Word', ['Compound'])
 | 
| 160 | 
 | 
| 161 |     # Token IDs in Kind.Arith are first to make the TDOP precedence table
 | 
| 162 |     # small.
 | 
| 163 |     #
 | 
| 164 |     # NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
 | 
| 165 |     # Actually all of Arith could be folded into Op, because we are using
 | 
| 166 |     # WordParser._ReadArithWord vs. WordParser._ReadWord.
 | 
| 167 |     spec.AddKindPairs(
 | 
| 168 |         'Arith',
 | 
| 169 |         [
 | 
| 170 |             ('Semi', ';'),  # ternary for loop only
 | 
| 171 |             ('Comma', ','),  # function call and C comma operator
 | 
| 172 |             ('Plus', '+'),
 | 
| 173 |             ('Minus', '-'),
 | 
| 174 |             ('Star', '*'),
 | 
| 175 |             ('Slash', '/'),
 | 
| 176 |             ('Percent', '%'),
 | 
| 177 |             ('DPlus', '++'),
 | 
| 178 |             ('DMinus', '--'),
 | 
| 179 |             ('DStar', '**'),
 | 
| 180 |             ('LParen', '('),
 | 
| 181 |             ('RParen', ')'),  # grouping and function call extension
 | 
| 182 |             ('LBracket', '['),
 | 
| 183 |             ('RBracket', ']'),  # array and assoc array subscript
 | 
| 184 |             ('RBrace', '}'),  # for end of var sub
 | 
| 185 | 
 | 
| 186 |             # Logical Ops
 | 
| 187 |             ('QMark', '?'),
 | 
| 188 |             ('Colon', ':'),  # Ternary Op: a < b ? 0 : 1
 | 
| 189 |             ('LessEqual', '<='),
 | 
| 190 |             ('Less', '<'),
 | 
| 191 |             ('GreatEqual', '>='),
 | 
| 192 |             ('Great', '>'),
 | 
| 193 |             ('DEqual', '=='),
 | 
| 194 |             ('NEqual', '!='),
 | 
| 195 |             # note: these 3 are not in YSH Expr.  (Could be used in find dialect.)
 | 
| 196 |             ('DAmp', '&&'),
 | 
| 197 |             ('DPipe', '||'),
 | 
| 198 |             ('Bang', '!'),
 | 
| 199 | 
 | 
| 200 |             # Bitwise ops
 | 
| 201 |             ('DGreat', '>>'),
 | 
| 202 |             ('DLess', '<<'),
 | 
| 203 |             # YSH: ^ is exponent
 | 
| 204 |             ('Amp', '&'),
 | 
| 205 |             ('Pipe', '|'),
 | 
| 206 |             ('Caret', '^'),
 | 
| 207 |             ('Tilde', '~'),
 | 
| 208 |             ('Equal', '='),
 | 
| 209 | 
 | 
| 210 |             # Augmented Assignment for $(( ))
 | 
| 211 |             # Must match the list in osh/arith_parse.py
 | 
| 212 |             # YSH has **= //= like Python
 | 
| 213 |             ('PlusEqual', '+='),
 | 
| 214 |             ('MinusEqual', '-='),
 | 
| 215 |             ('StarEqual', '*='),
 | 
| 216 |             ('SlashEqual', '/='),
 | 
| 217 |             ('PercentEqual', '%='),
 | 
| 218 |             ('DGreatEqual', '>>='),
 | 
| 219 |             ('DLessEqual', '<<='),
 | 
| 220 |             ('AmpEqual', '&='),
 | 
| 221 |             ('CaretEqual', '^='),
 | 
| 222 |             ('PipeEqual', '|='),
 | 
| 223 |         ])
 | 
| 224 | 
 | 
| 225 |     spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
 | 
| 226 | 
 | 
| 227 |     spec.AddKind('Undefined', ['Tok'])  # for initial state
 | 
| 228 | 
 | 
| 229 |     # The Unknown kind is used when we lex something, but it's invalid.
 | 
| 230 |     # Examples:
 | 
| 231 |     #   ${^}
 | 
| 232 |     #   $'\z'  Such bad codes are accepted when parse_backslash is on
 | 
| 233 |     #          (default in OSH), so we have to lex them.
 | 
| 234 |     #  (x == y) should used === or ~==
 | 
| 235 |     spec.AddKind('Unknown', ['Tok', 'Backslash', 'DEqual'])
 | 
| 236 | 
 | 
| 237 |     spec.AddKind('Eol', ['Tok'])  # no more tokens on line (\0)
 | 
| 238 | 
 | 
| 239 |     # Ignored_Newline is for J8 lexing to count lines
 | 
| 240 |     spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'Newline'])
 | 
| 241 | 
 | 
| 242 |     # Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
 | 
| 243 |     # lex_mode_e.Arith
 | 
| 244 |     spec.AddKind('WS', ['Space'])
 | 
| 245 | 
 | 
| 246 |     spec.AddKind(
 | 
| 247 |         'Lit',
 | 
| 248 |         [
 | 
| 249 |             'Chars',
 | 
| 250 |             'CharsWithoutPrefix',  # for stripping leading whitespace
 | 
| 251 |             'VarLike',
 | 
| 252 |             'ArrayLhsOpen',
 | 
| 253 |             'ArrayLhsClose',
 | 
| 254 |             'Splice',  # @func(a, b)
 | 
| 255 |             'AtLBracket',  # @[split(x)]
 | 
| 256 |             'AtLBraceDot',  # @{.myproc arg1} should be builtin_sub
 | 
| 257 |             'Other',
 | 
| 258 |             'EscapedChar',  # \* is escaped
 | 
| 259 |             'LBracket',
 | 
| 260 |             'RBracket',  # for assoc array literals, static globs
 | 
| 261 |             'Star',
 | 
| 262 |             'QMark',
 | 
| 263 |             # Either brace expansion or keyword for { and }
 | 
| 264 |             'LBrace',
 | 
| 265 |             'RBrace',
 | 
| 266 |             'Comma',
 | 
| 267 |             'Equals',  # For = f()
 | 
| 268 |             'Dollar',  # detecting 'echo $'
 | 
| 269 |             'DRightBracket',  # the ]] that matches [[, NOT a keyword
 | 
| 270 |             'Tilde',  # tilde expansion
 | 
| 271 |             'Pound',  # for comment or VarOp state
 | 
| 272 |             'TPound',  # for doc comments like ###
 | 
| 273 |             'TDot',  # for multiline commands ...
 | 
| 274 |             'Slash',
 | 
| 275 |             'Percent',  # / # % for patsub, NOT unary op
 | 
| 276 |             'Colon',  # x=foo:~:~root needs tilde expansion
 | 
| 277 |             'Digits',  # for lex_mode_e.Arith
 | 
| 278 |             'At',  # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
 | 
| 279 |             'ArithVarLike',  # for $((var+1)).  Distinct from Lit_VarLike 'var='
 | 
| 280 |             'BadBackslash',  # for "\z", not Id.Unknown_Backslash because it's a
 | 
| 281 |             # syntax error in YSH, but NOT OSH
 | 
| 282 |             'CompDummy',  # A fake Lit_* token to get partial words during
 | 
| 283 |             # completion
 | 
| 284 |         ])
 | 
| 285 | 
 | 
| 286 |     # For recognizing \` and \" and \\ within backticks.  There's an extra layer
 | 
| 287 |     # of backslash quoting.
 | 
| 288 |     spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
 | 
| 289 | 
 | 
| 290 |     spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
 | 
| 291 | 
 | 
| 292 |     spec.AddKind(
 | 
| 293 |         'Op',
 | 
| 294 |         [
 | 
| 295 |             'Newline',  # mostly equivalent to SEMI
 | 
| 296 |             'Amp',  # &
 | 
| 297 |             'Pipe',  # |
 | 
| 298 |             'PipeAmp',  # |& -- bash extension for stderr
 | 
| 299 |             'DAmp',  # &&
 | 
| 300 |             'DPipe',  # ||
 | 
| 301 |             'Semi',  # ;
 | 
| 302 |             'DSemi',  # ;; for case
 | 
| 303 |             'SemiAmp',  # ;& for case
 | 
| 304 |             'DSemiAmp',  # ;;& for case
 | 
| 305 |             'LParen',  # For subshell.  Not Kind.Left because it's NOT a WordPart.
 | 
| 306 |             'RParen',  # Default, will be translated to Id.Right_*
 | 
| 307 |             'DLeftParen',
 | 
| 308 |             'DRightParen',
 | 
| 309 | 
 | 
| 310 |             # for [[ ]] language
 | 
| 311 |             'Less',  # <
 | 
| 312 |             'Great',  # >
 | 
| 313 |             'Bang',  # !
 | 
| 314 | 
 | 
| 315 |             # YSH [] {}
 | 
| 316 |             'LBracket',
 | 
| 317 |             'RBracket',
 | 
| 318 |             'LBrace',
 | 
| 319 |             'RBrace',
 | 
| 320 |         ])
 | 
| 321 | 
 | 
| 322 |     # YSH expressions use Kind.Expr and Kind.Arith (further below)
 | 
| 323 |     spec.AddKind(
 | 
| 324 |         'Expr',
 | 
| 325 |         [
 | 
| 326 |             'Reserved',  # <- means nothing but it's reserved now
 | 
| 327 |             'Symbol',  # %foo
 | 
| 328 |             'Name',
 | 
| 329 |             'DecInt',
 | 
| 330 |             'BinInt',
 | 
| 331 |             'OctInt',
 | 
| 332 |             'HexInt',
 | 
| 333 |             'Float',
 | 
| 334 |             'Bang',  # eggex !digit, ![a-z]
 | 
| 335 |             'Dot',
 | 
| 336 |             'DDot',
 | 
| 337 |             'Colon',  # mylist:pop()
 | 
| 338 |             'RArrow',
 | 
| 339 |             'RDArrow',
 | 
| 340 |             'DSlash',  # integer division
 | 
| 341 |             'TEqual',
 | 
| 342 |             'NotDEqual',
 | 
| 343 |             'TildeDEqual',  # === !== ~==
 | 
| 344 |             'At',
 | 
| 345 |             'DoubleAt',  # splice operators
 | 
| 346 |             'Ellipsis',  # for varargs
 | 
| 347 |             'Dollar',  # legacy regex
 | 
| 348 |             'NotTilde',  # !~
 | 
| 349 |             'DTilde',
 | 
| 350 |             'NotDTilde',  # ~~ !~~
 | 
| 351 |             'DStarEqual',  # **=, which bash doesn't have
 | 
| 352 |             'DSlashEqual',  # //=, which bash doesn't have
 | 
| 353 |             'CastedDummy',  # Used for @()  $() (words in lex_mode_e.ShCommand)
 | 
| 354 |             # and ${}  ''  ""  (and all other strings)
 | 
| 355 | 
 | 
| 356 |             # Constants
 | 
| 357 |             'Null',
 | 
| 358 |             'True',
 | 
| 359 |             'False',
 | 
| 360 | 
 | 
| 361 |             # Keywords are resolved after lexing, but otherwise behave like tokens.
 | 
| 362 |             'And',
 | 
| 363 |             'Or',
 | 
| 364 |             'Not',
 | 
| 365 | 
 | 
| 366 |             # List comprehensions
 | 
| 367 |             'For',
 | 
| 368 |             'Is',
 | 
| 369 |             'In',
 | 
| 370 |             'If',
 | 
| 371 |             'Else',
 | 
| 372 |             'Func',  # For function literals
 | 
| 373 |             'Capture',
 | 
| 374 |             'As',
 | 
| 375 |         ])
 | 
| 376 | 
 | 
| 377 |     # For C-escaped strings.
 | 
| 378 |     spec.AddKind(
 | 
| 379 |         'Char',
 | 
| 380 |         [
 | 
| 381 |             'OneChar',
 | 
| 382 |             'Stop',
 | 
| 383 |             'Hex',  # \xff
 | 
| 384 |             'YHex',  # \yff for J8 notation
 | 
| 385 | 
 | 
| 386 |             # Two variants of Octal: \377, and \0377.
 | 
| 387 |             'Octal3',
 | 
| 388 |             'Octal4',
 | 
| 389 |             'Unicode4',
 | 
| 390 |             'SurrogatePair',  # JSON
 | 
| 391 |             'Unicode8',  # bash
 | 
| 392 |             'UBraced',
 | 
| 393 |             'Pound',  # YSH
 | 
| 394 |             'AsciiControl',  # \x01-\x1f, what's disallowed in JSON
 | 
| 395 |         ])
 | 
| 396 | 
 | 
| 397 |     # For lex_mode_e.BashRegex
 | 
| 398 |     # Bash treats ( | ) as special, and space is allowed within ()
 | 
| 399 |     # Note Id.Op_RParen -> Id.Right_BashRegex with lexer hint
 | 
| 400 |     spec.AddKind('BashRegex', ['LParen', 'AllowedInParens'])
 | 
| 401 | 
 | 
| 402 |     spec.AddKind(
 | 
| 403 |         'Eggex',
 | 
| 404 |         [
 | 
| 405 |             'Start',  # ^ or %start
 | 
| 406 |             'End',  # $ or %end
 | 
| 407 |             'Dot',  # . or dot
 | 
| 408 |             # Future: %boundary generates \b in Python/Perl, etc.
 | 
| 409 |         ])
 | 
| 410 | 
 | 
| 411 |     spec.AddKind(
 | 
| 412 |         'Redir',
 | 
| 413 |         [
 | 
| 414 |             'Less',  # < stdin
 | 
| 415 |             'Great',  # > stdout
 | 
| 416 |             'DLess',  # << here doc redirect
 | 
| 417 |             'TLess',  # <<< bash only here string
 | 
| 418 |             'DGreat',  # >> append stdout
 | 
| 419 |             'GreatAnd',  # >& descriptor redirect
 | 
| 420 |             'LessAnd',  # <& descriptor redirect
 | 
| 421 |             'DLessDash',  # <<- here doc redirect for tabs?
 | 
| 422 |             'LessGreat',  # <>
 | 
| 423 |             'Clobber',  # >|  POSIX?
 | 
| 424 |             'AndGreat',  # bash &> stdout/stderr to file
 | 
| 425 |             'AndDGreat',  # bash &>> stdout/stderr append to file
 | 
| 426 | 
 | 
| 427 |             #'GreatPlus',  # >+ is append in YSH
 | 
| 428 |             #'DGreatPlus', # >>+ is append to string in YSH
 | 
| 429 |         ])
 | 
| 430 | 
 | 
| 431 |     # NOTE: This is for left/right WORDS only.  (( is not a word so it doesn't
 | 
| 432 |     # get that.
 | 
| 433 |     spec.AddKind(
 | 
| 434 |         'Left',
 | 
| 435 |         [
 | 
| 436 |             'DoubleQuote',
 | 
| 437 |             'JDoubleQuote',  # j" for J8 notation
 | 
| 438 |             'SingleQuote',  # ''
 | 
| 439 |             'DollarSingleQuote',  # $'' for \n escapes
 | 
| 440 |             'RSingleQuote',  # r''
 | 
| 441 |             'USingleQuote',  # u''
 | 
| 442 |             'BSingleQuote',  # b''
 | 
| 443 | 
 | 
| 444 |             # Multiline versions
 | 
| 445 |             'TDoubleQuote',  # """ """
 | 
| 446 |             'DollarTDoubleQuote',  # $""" """
 | 
| 447 |             'TSingleQuote',  # ''' '''
 | 
| 448 |             'RTSingleQuote',  # r''' '''
 | 
| 449 |             'UTSingleQuote',  # u''' '''
 | 
| 450 |             'BTSingleQuote',  # b''' '''
 | 
| 451 |             'Backtick',  # `
 | 
| 452 |             'DollarParen',  # $(
 | 
| 453 |             'DollarBrace',  # ${
 | 
| 454 |             'DollarBraceZsh',  # ${(foo)
 | 
| 455 |             'DollarDParen',  # $((
 | 
| 456 |             'DollarBracket',  # $[ - synonym for $(( in bash and zsh
 | 
| 457 |             'DollarDoubleQuote',  # $" for bash localized strings
 | 
| 458 |             'ProcSubIn',  # <( )
 | 
| 459 |             'ProcSubOut',  # >( )
 | 
| 460 |             'AtParen',  # @( for split command sub
 | 
| 461 |             'CaretParen',  # ^( for Block literal in expression mode
 | 
| 462 |             'CaretBracket',  # ^[ for Expr literal
 | 
| 463 |             'CaretBrace',  # ^{ for Arglist
 | 
| 464 |             'CaretDoubleQuote',  # ^" for Template
 | 
| 465 |             'ColonPipe',  # :| for word arrays
 | 
| 466 |             'PercentParen',  # legacy %( for word arrays
 | 
| 467 |         ])
 | 
| 468 | 
 | 
| 469 |     spec.AddKind(
 | 
| 470 |         'Right',
 | 
| 471 |         [
 | 
| 472 |             'DoubleQuote',
 | 
| 473 |             'SingleQuote',
 | 
| 474 |             'Backtick',  # `
 | 
| 475 |             'DollarBrace',  # }
 | 
| 476 |             'DollarDParen',  # )) -- really the second one is a PushHint()
 | 
| 477 |             # ArithSub2 is just Id.Arith_RBracket
 | 
| 478 |             'DollarDoubleQuote',  # "
 | 
| 479 |             'DollarSingleQuote',  # '
 | 
| 480 | 
 | 
| 481 |             # Disambiguated right parens
 | 
| 482 |             'Subshell',  # )
 | 
| 483 |             'ShFunction',  # )
 | 
| 484 |             'CasePat',  # )
 | 
| 485 |             'ShArrayLiteral',  # )
 | 
| 486 |             'ExtGlob',  # )
 | 
| 487 |             'BashRegexGroup',  # )
 | 
| 488 |             'BlockLiteral',  # } that matches &{ echo hi }
 | 
| 489 |         ])
 | 
| 490 | 
 | 
| 491 |     spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
 | 
| 492 | 
 | 
| 493 |     # First position of var sub ${
 | 
| 494 |     # Id.VOp2_Pound -- however you can't tell the difference at first!  It could
 | 
| 495 |     # be an op or a name.  So it makes sense to base i on the state.
 | 
| 496 |     # Id.VOp2_At
 | 
| 497 |     # But then you have AS_STAR, or Id.Arith_Star maybe
 | 
| 498 | 
 | 
| 499 |     spec.AddKind(
 | 
| 500 |         'VSub',
 | 
| 501 |         [
 | 
| 502 |             'DollarName',  # $foo
 | 
| 503 |             'Name',  # 'foo' in ${foo}
 | 
| 504 |             'Number',  # $0 .. $9
 | 
| 505 |             'Bang',  # $!
 | 
| 506 |             'At',  # $@  or  [@] for array subscripting
 | 
| 507 |             'Pound',  # $#  or  ${#var} for length
 | 
| 508 |             'Dollar',  # $$
 | 
| 509 |             'Star',  # $*
 | 
| 510 |             'Hyphen',  # $-
 | 
| 511 |             'QMark',  # $?
 | 
| 512 |             'Dot',  # ${.myproc builtin sub}
 | 
| 513 |         ])
 | 
| 514 | 
 | 
| 515 |     spec.AddKindPairs('VTest', [
 | 
| 516 |         ('ColonHyphen', ':-'),
 | 
| 517 |         ('Hyphen', '-'),
 | 
| 518 |         ('ColonEquals', ':='),
 | 
| 519 |         ('Equals', '='),
 | 
| 520 |         ('ColonQMark', ':?'),
 | 
| 521 |         ('QMark', '?'),
 | 
| 522 |         ('ColonPlus', ':+'),
 | 
| 523 |         ('Plus', '+'),
 | 
| 524 |     ])
 | 
| 525 | 
 | 
| 526 |     # Statically parse @P, so @x etc. is an error.
 | 
| 527 |     spec.AddKindPairs(
 | 
| 528 |         'VOp0',
 | 
| 529 |         [
 | 
| 530 |             ('Q', '@Q'),  # ${x@Q} for quoting
 | 
| 531 |             ('E', '@E'),
 | 
| 532 |             ('P', '@P'),  # ${PS1@P} for prompt eval
 | 
| 533 |             ('A', '@A'),
 | 
| 534 |             ('a', '@a'),
 | 
| 535 |         ])
 | 
| 536 | 
 | 
| 537 |     # String removal ops
 | 
| 538 |     spec.AddKindPairs(
 | 
| 539 |         'VOp1',
 | 
| 540 |         [
 | 
| 541 |             ('Percent', '%'),
 | 
| 542 |             ('DPercent', '%%'),
 | 
| 543 |             ('Pound', '#'),
 | 
| 544 |             ('DPound', '##'),
 | 
| 545 |             # Case ops, in bash.  At least parse them.  Execution might require
 | 
| 546 |             # unicode stuff.
 | 
| 547 |             ('Caret', '^'),
 | 
| 548 |             ('DCaret', '^^'),
 | 
| 549 |             ('Comma', ','),
 | 
| 550 |             ('DComma', ',,'),
 | 
| 551 |         ])
 | 
| 552 | 
 | 
| 553 |     spec.AddKindPairs(
 | 
| 554 |         'VOpYsh',
 | 
| 555 |         [
 | 
| 556 |             ('Pipe', '|'),  # ${x|html}
 | 
| 557 |             ('Space', ' '),  # ${x %.3f}
 | 
| 558 |         ])
 | 
| 559 | 
 | 
| 560 |     # Not in POSIX, but in Bash
 | 
| 561 |     spec.AddKindPairs(
 | 
| 562 |         'VOp2',
 | 
| 563 |         [
 | 
| 564 |             ('Slash', '/'),  #  / for replacement
 | 
| 565 |             ('Colon', ':'),  #  : for slicing
 | 
| 566 |             ('LBracket', '['),  #  [ for indexing
 | 
| 567 |             ('RBracket', ']'),  #  ] for indexing
 | 
| 568 |         ])
 | 
| 569 | 
 | 
| 570 |     # Can only occur after ${!prefix@}
 | 
| 571 |     spec.AddKindPairs('VOp3', [
 | 
| 572 |         ('At', '@'),
 | 
| 573 |         ('Star', '*'),
 | 
| 574 |     ])
 | 
| 575 | 
 | 
| 576 |     # This kind is for Node types that are NOT tokens.
 | 
| 577 |     spec.AddKind(
 | 
| 578 |         'Node',
 | 
| 579 |         [
 | 
| 580 |             # Arithmetic nodes
 | 
| 581 |             'PostDPlus',
 | 
| 582 |             'PostDMinus',  # Postfix inc/dec.
 | 
| 583 |             # Prefix inc/dec use Arith_DPlus/Arith_DMinus.
 | 
| 584 |             'UnaryPlus',
 | 
| 585 |             'UnaryMinus',  # +1 and -1, to distinguish from infix.
 | 
| 586 |             # Actually we don't need this because we they
 | 
| 587 |             # will be under Expr1/Plus vs Expr2/Plus.
 | 
| 588 |             'NotIn',
 | 
| 589 |             'IsNot',  # For YSH comparisons
 | 
| 590 |         ])
 | 
| 591 | 
 | 
| 592 |     # NOTE: Not doing AddKindPairs() here because oil will have a different set
 | 
| 593 |     # of keywords.  It will probably have for/in/while/until/case/if/else/elif,
 | 
| 594 |     # and then func/proc.
 | 
| 595 |     spec.AddKind(
 | 
| 596 |         'KW',
 | 
| 597 |         [
 | 
| 598 |             'DLeftBracket',
 | 
| 599 |             'Bang',
 | 
| 600 |             'For',
 | 
| 601 |             'While',
 | 
| 602 |             'Until',
 | 
| 603 |             'Do',
 | 
| 604 |             'Done',
 | 
| 605 |             'In',
 | 
| 606 |             'Case',
 | 
| 607 |             'Esac',
 | 
| 608 |             'If',
 | 
| 609 |             'Fi',
 | 
| 610 |             'Then',
 | 
| 611 |             'Else',
 | 
| 612 |             'Elif',
 | 
| 613 |             'Function',
 | 
| 614 |             'Time',
 | 
| 615 | 
 | 
| 616 |             # YSH keywords.
 | 
| 617 |             'Const',
 | 
| 618 |             'Var',
 | 
| 619 |             'SetVar',
 | 
| 620 |             'SetGlobal',
 | 
| 621 |             # later: Auto?
 | 
| 622 |             'Call',
 | 
| 623 |             'Proc',
 | 
| 624 |             'Typed',
 | 
| 625 |             'Func',
 | 
| 626 | 
 | 
| 627 |             # builtins, NOT keywords: use, fork, wait, etc.
 | 
| 628 |             # Things that don't affect parsing shouldn't be keywords.
 | 
| 629 |         ])
 | 
| 630 | 
 | 
| 631 |     # Unlike bash, we parse control flow statically.  They're not
 | 
| 632 |     # dynamically-resolved builtins.
 | 
| 633 |     spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
 | 
| 634 | 
 | 
| 635 |     # Special Kind for lookahead in the lexer.  It's never seen by anything else.
 | 
| 636 |     spec.AddKind('LookAhead', ['FuncParens'])
 | 
| 637 | 
 | 
| 638 |     # For parsing globs and converting them to regexes.
 | 
| 639 |     spec.AddKind('Glob', [
 | 
| 640 |         'LBracket',
 | 
| 641 |         'RBracket',
 | 
| 642 |         'Star',
 | 
| 643 |         'QMark',
 | 
| 644 |         'Bang',
 | 
| 645 |         'Caret',
 | 
| 646 |         'EscapedChar',
 | 
| 647 |         'BadBackslash',
 | 
| 648 |         'CleanLiterals',
 | 
| 649 |         'OtherLiteral',
 | 
| 650 |     ])
 | 
| 651 | 
 | 
| 652 |     # For C-escaped strings.
 | 
| 653 |     spec.AddKind(
 | 
| 654 |         'Format',
 | 
| 655 |         [
 | 
| 656 |             'EscapedPercent',
 | 
| 657 |             'Percent',  # starts another lexer mode
 | 
| 658 |             'Flag',
 | 
| 659 |             'Num',
 | 
| 660 |             'Dot',
 | 
| 661 |             'Type',
 | 
| 662 |             'Star',
 | 
| 663 |             'Time',
 | 
| 664 |             'Zero',
 | 
| 665 |         ])
 | 
| 666 | 
 | 
| 667 |     # For parsing prompt strings like PS1.
 | 
| 668 |     spec.AddKind('PS', [
 | 
| 669 |         'Subst',
 | 
| 670 |         'Octal3',
 | 
| 671 |         'LBrace',
 | 
| 672 |         'RBrace',
 | 
| 673 |         'Literals',
 | 
| 674 |         'BadBackslash',
 | 
| 675 |     ])
 | 
| 676 | 
 | 
| 677 |     spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
 | 
| 678 | 
 | 
| 679 |     spec.AddKind(
 | 
| 680 |         'J8',
 | 
| 681 |         [
 | 
| 682 |             'LBracket',
 | 
| 683 |             'RBracket',
 | 
| 684 |             'LBrace',
 | 
| 685 |             'RBrace',
 | 
| 686 |             'Comma',
 | 
| 687 |             'Colon',
 | 
| 688 |             'Null',
 | 
| 689 |             'Bool',
 | 
| 690 |             'Int',  # Number
 | 
| 691 |             'Float',  # Number
 | 
| 692 | 
 | 
| 693 |             # High level tokens for "" b'' u''
 | 
| 694 |             # We don't distinguish them in the parser, because we recognize
 | 
| 695 |             # strings in the lexer.
 | 
| 696 |             'String',
 | 
| 697 | 
 | 
| 698 |             # JSON8 and NIL8
 | 
| 699 |             'Identifier',
 | 
| 700 |             'Newline',  # J8 Lines only, similar to Op_Newline
 | 
| 701 |             'Tab',  # Reserved for TSV8
 | 
| 702 | 
 | 
| 703 |             # NIL8 only
 | 
| 704 |             'LParen',
 | 
| 705 |             'RParen',
 | 
| 706 |             #'Symbol',
 | 
| 707 |             'Operator',
 | 
| 708 |         ])
 | 
| 709 | 
 | 
| 710 | 
 | 
| 711 | # Shared between [[ and test/[.
 | 
| 712 | _UNARY_STR_CHARS = 'zn'  # -z -n
 | 
| 713 | _UNARY_OTHER_CHARS = 'otvR'  # -o is overloaded
 | 
| 714 | _UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN'  # -a is overloaded
 | 
| 715 | 
 | 
| 716 | _BINARY_PATH = ['ef', 'nt', 'ot']
 | 
| 717 | _BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
 | 
| 718 | 
 | 
| 719 | 
 | 
| 720 | def _Dash(strs):
 | 
| 721 |     # type: (List[str]) -> List[Tuple[str, str]]
 | 
| 722 |     # Gives a pair of (token name, string to match)
 | 
| 723 |     return [(s, '-' + s) for s in strs]
 | 
| 724 | 
 | 
| 725 | 
 | 
| 726 | def AddBoolKinds(spec):
 | 
| 727 |     # type: (IdSpec) -> None
 | 
| 728 |     spec.AddBoolKind('BoolUnary', [
 | 
| 729 |         (bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
 | 
| 730 |         (bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
 | 
| 731 |         (bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
 | 
| 732 |     ])
 | 
| 733 | 
 | 
| 734 |     spec.AddBoolKind('BoolBinary', [
 | 
| 735 |         (bool_arg_type_e.Str, [
 | 
| 736 |             ('GlobEqual', '='),
 | 
| 737 |             ('GlobDEqual', '=='),
 | 
| 738 |             ('GlobNEqual', '!='),
 | 
| 739 |             ('EqualTilde', '=~'),
 | 
| 740 |         ]),
 | 
| 741 |         (bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
 | 
| 742 |         (bool_arg_type_e.Int, _Dash(_BINARY_INT)),
 | 
| 743 |     ])
 | 
| 744 | 
 | 
| 745 |     Id = spec.id_str2int
 | 
| 746 |     # logical, arity, arg_type
 | 
| 747 |     spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
 | 
| 748 |     spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
 | 
| 749 |     spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
 | 
| 750 | 
 | 
| 751 |     spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
 | 
| 752 |     spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
 | 
| 753 | 
 | 
| 754 | 
 | 
| 755 | def SetupTestBuiltin(
 | 
| 756 |         id_spec,  # type: IdSpec
 | 
| 757 |         unary_lookup,  # type: Dict[str, int]
 | 
| 758 |         binary_lookup,  # type: Dict[str, int]
 | 
| 759 |         other_lookup,  # type: Dict[str, int]
 | 
| 760 | ):
 | 
| 761 |     # type: (...) -> None
 | 
| 762 |     """Setup tokens for test/[.
 | 
| 763 | 
 | 
| 764 |     Similar to _AddBoolKinds above.  Differences:
 | 
| 765 |     - =~ doesn't exist
 | 
| 766 |     - && -> -a, || -> -o
 | 
| 767 |     - ( ) -> Op_LParen (they don't appear above)
 | 
| 768 |     """
 | 
| 769 |     Id = id_spec.id_str2int
 | 
| 770 |     Kind = id_spec.kind_str2int
 | 
| 771 | 
 | 
| 772 |     for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
 | 
| 773 |         id_name = 'BoolUnary_%s' % letter
 | 
| 774 |         unary_lookup['-' + letter] = Id[id_name]
 | 
| 775 | 
 | 
| 776 |     for s in _BINARY_PATH + _BINARY_INT:
 | 
| 777 |         id_name = 'BoolBinary_%s' % s
 | 
| 778 |         binary_lookup['-' + s] = Id[id_name]
 | 
| 779 | 
 | 
| 780 |     # Like the [[ definition above, but without globbing and without =~ .
 | 
| 781 | 
 | 
| 782 |     for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
 | 
| 783 |                                ('NEqual', '!=')]:
 | 
| 784 |         id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
 | 
| 785 | 
 | 
| 786 |         binary_lookup[token_str] = id_int
 | 
| 787 | 
 | 
| 788 |     # Some of these names don't quite match, but it keeps the BoolParser simple.
 | 
| 789 |     binary_lookup['<'] = Id['Op_Less']
 | 
| 790 |     binary_lookup['>'] = Id['Op_Great']
 | 
| 791 | 
 | 
| 792 |     # NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
 | 
| 793 |     # BoolUnary_o.  The parser rather than the tokenizer handles this.
 | 
| 794 |     other_lookup['!'] = Id['KW_Bang']  # like [[ !
 | 
| 795 |     other_lookup['('] = Id['Op_LParen']
 | 
| 796 |     other_lookup[')'] = Id['Op_RParen']
 | 
| 797 | 
 | 
| 798 |     other_lookup[']'] = Id['Arith_RBracket']  # For closing ]
 |