| 1 | #!/usr/bin/env python2
 | 
| 2 | """Consts.py."""
 | 
| 3 | from __future__ import print_function
 | 
| 4 | 
 | 
| 5 | from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
 | 
| 6 |                                       bool_arg_type_t, opt_group_i)
 | 
| 7 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
 | 
| 8 | from frontend import builtin_def
 | 
| 9 | from frontend import lexer_def
 | 
| 10 | from frontend import option_def
 | 
| 11 | 
 | 
| 12 | from typing import Tuple, Optional, TYPE_CHECKING
 | 
| 13 | if TYPE_CHECKING:
 | 
| 14 |     from _devbuild.gen.option_asdl import option_t, builtin_t
 | 
| 15 | 
 | 
| 16 | NO_INDEX = 0  # for Resolve
 | 
| 17 | 
 | 
| 18 | # Used as consts::STRICT_ALL, etc.  Do it explicitly to satisfy MyPy.
 | 
| 19 | STRICT_ALL = option_def.STRICT_ALL
 | 
| 20 | YSH_UPGRADE = option_def.YSH_UPGRADE
 | 
| 21 | YSH_ALL = option_def.YSH_ALL
 | 
| 22 | DEFAULT_TRUE = option_def.DEFAULT_TRUE
 | 
| 23 | 
 | 
| 24 | PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
 | 
| 25 | 
 | 
| 26 | SET_OPTION_NUMS = [
 | 
| 27 |     opt.index for opt in option_def._SORTED if opt.builtin == 'set'
 | 
| 28 | ]
 | 
| 29 | SET_OPTION_NAMES = [
 | 
| 30 |     opt.name for opt in option_def._SORTED if opt.builtin == 'set'
 | 
| 31 | ]
 | 
| 32 | 
 | 
| 33 | SHOPT_OPTION_NUMS = [
 | 
| 34 |     opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
 | 
| 35 | ]
 | 
| 36 | SHOPT_OPTION_NAMES = [
 | 
| 37 |     opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
 | 
| 38 | ]
 | 
| 39 | 
 | 
| 40 | VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS  # used to print
 | 
| 41 | 
 | 
| 42 | BUILTIN_NAMES = builtin_def.BUILTIN_NAMES  # Used by builtin_comp.py
 | 
| 43 | 
 | 
| 44 | # Keywords for introspection with bash 'compgen' and 'type'
 | 
| 45 | OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
 | 
| 46 | OSH_KEYWORD_NAMES.append('{')  # not handled by our lexer
 | 
| 47 | OSH_KEYWORD_NAMES.append('=')  # YSH keyword not handled by our lexer
 | 
| 48 | 
 | 
| 49 | # bash considers these closing delimiters keywords
 | 
| 50 | OSH_KEYWORD_NAMES.append('}')
 | 
| 51 | OSH_KEYWORD_NAMES.append(']]')
 | 
| 52 | 
 | 
| 53 | 
 | 
| 54 | def GetKind(id_):
 | 
| 55 |     # type: (Id_t) -> Kind_t
 | 
| 56 |     """To make coarse-grained parsing decisions."""
 | 
| 57 | 
 | 
| 58 |     from _devbuild.gen.id_kind import ID_TO_KIND  # break circular dep
 | 
| 59 |     return ID_TO_KIND[id_]
 | 
| 60 | 
 | 
| 61 | 
 | 
| 62 | def BoolArgType(id_):
 | 
| 63 |     # type: (Id_t) -> bool_arg_type_t
 | 
| 64 | 
 | 
| 65 |     from _devbuild.gen.id_kind import BOOL_ARG_TYPES  # break circular dep
 | 
| 66 |     return BOOL_ARG_TYPES[id_]
 | 
| 67 | 
 | 
| 68 | 
 | 
| 69 | #
 | 
| 70 | # Redirect Tables associated with IDs
 | 
| 71 | #
 | 
| 72 | 
 | 
| 73 | REDIR_DEFAULT_FD = {
 | 
| 74 |     # filename
 | 
| 75 |     Id.Redir_Less: 0,  # cat <input.txt means cat 0<input.txt
 | 
| 76 |     Id.Redir_Great: 1,
 | 
| 77 |     Id.Redir_DGreat: 1,
 | 
| 78 |     Id.Redir_Clobber: 1,
 | 
| 79 |     Id.Redir_LessGreat: 0,  # 'exec <> foo' opens a file with read/write
 | 
| 80 |     # bash &> and &>>
 | 
| 81 |     Id.Redir_AndGreat: 1,
 | 
| 82 |     Id.Redir_AndDGreat: 1,
 | 
| 83 | 
 | 
| 84 |     # descriptor
 | 
| 85 |     Id.Redir_GreatAnd: 1,  # echo >&2  means echo 1>&2
 | 
| 86 |     Id.Redir_LessAnd: 0,  # echo <&3 means echo 0<&3, I think
 | 
| 87 |     Id.Redir_TLess: 0,  # here word
 | 
| 88 | 
 | 
| 89 |     # here docs included
 | 
| 90 |     Id.Redir_DLess: 0,
 | 
| 91 |     Id.Redir_DLessDash: 0,
 | 
| 92 | }
 | 
| 93 | 
 | 
| 94 | REDIR_ARG_TYPES = {
 | 
| 95 |     # filename
 | 
| 96 |     Id.Redir_Less: redir_arg_type_e.Path,
 | 
| 97 |     Id.Redir_Great: redir_arg_type_e.Path,
 | 
| 98 |     Id.Redir_DGreat: redir_arg_type_e.Path,
 | 
| 99 |     Id.Redir_Clobber: redir_arg_type_e.Path,
 | 
| 100 |     Id.Redir_LessGreat: redir_arg_type_e.Path,
 | 
| 101 |     # bash &> and &>>
 | 
| 102 |     Id.Redir_AndGreat: redir_arg_type_e.Path,
 | 
| 103 |     Id.Redir_AndDGreat: redir_arg_type_e.Path,
 | 
| 104 | 
 | 
| 105 |     # descriptor
 | 
| 106 |     Id.Redir_GreatAnd: redir_arg_type_e.Desc,
 | 
| 107 |     Id.Redir_LessAnd: redir_arg_type_e.Desc,
 | 
| 108 |     Id.Redir_TLess: redir_arg_type_e.Here,  # here word
 | 
| 109 |     # note: here docs aren't included
 | 
| 110 | }
 | 
| 111 | 
 | 
| 112 | 
 | 
| 113 | def RedirArgType(id_):
 | 
| 114 |     # type: (Id_t) -> redir_arg_type_t
 | 
| 115 |     return REDIR_ARG_TYPES[id_]
 | 
| 116 | 
 | 
| 117 | 
 | 
| 118 | def RedirDefaultFd(id_):
 | 
| 119 |     # type: (Id_t) -> int
 | 
| 120 |     return REDIR_DEFAULT_FD[id_]
 | 
| 121 | 
 | 
| 122 | 
 | 
| 123 | #
 | 
| 124 | # Builtins
 | 
| 125 | #
 | 
| 126 | 
 | 
| 127 | _BUILTIN_DICT = builtin_def.BuiltinDict()
 | 
| 128 | 
 | 
| 129 | 
 | 
| 130 | def LookupSpecialBuiltin(argv0):
 | 
| 131 |     # type: (str) -> builtin_t
 | 
| 132 |     """Is it a special builtin?"""
 | 
| 133 |     b = _BUILTIN_DICT.get(argv0)
 | 
| 134 |     if b and b.kind == 'special':
 | 
| 135 |         return b.index
 | 
| 136 |     else:
 | 
| 137 |         return NO_INDEX
 | 
| 138 | 
 | 
| 139 | 
 | 
| 140 | def LookupAssignBuiltin(argv0):
 | 
| 141 |     # type: (str) -> builtin_t
 | 
| 142 |     """Is it an assignment builtin?"""
 | 
| 143 |     b = _BUILTIN_DICT.get(argv0)
 | 
| 144 |     if b and b.kind == 'assign':
 | 
| 145 |         return b.index
 | 
| 146 |     else:
 | 
| 147 |         return NO_INDEX
 | 
| 148 | 
 | 
| 149 | 
 | 
| 150 | def LookupNormalBuiltin(argv0):
 | 
| 151 |     # type: (str) -> builtin_t
 | 
| 152 |     """Is it any other builtin?"""
 | 
| 153 |     b = _BUILTIN_DICT.get(argv0)
 | 
| 154 |     if b and b.kind == 'normal':
 | 
| 155 |         return b.index
 | 
| 156 |     else:
 | 
| 157 |         return NO_INDEX
 | 
| 158 | 
 | 
| 159 | 
 | 
| 160 | def OptionName(opt_num):
 | 
| 161 |     # type: (option_t) -> str
 | 
| 162 |     """Get the name from an index."""
 | 
| 163 |     return option_def.OPTION_NAMES[opt_num]
 | 
| 164 | 
 | 
| 165 | 
 | 
| 166 | OPTION_GROUPS = {
 | 
| 167 |     'strict:all': opt_group_i.StrictAll,
 | 
| 168 | 
 | 
| 169 |     # Aliases to deprecate
 | 
| 170 |     'oil:upgrade': opt_group_i.YshUpgrade,
 | 
| 171 |     'oil:all': opt_group_i.YshAll,
 | 
| 172 |     'ysh:upgrade': opt_group_i.YshUpgrade,
 | 
| 173 |     'ysh:all': opt_group_i.YshAll,
 | 
| 174 | }
 | 
| 175 | 
 | 
| 176 | 
 | 
| 177 | def OptionGroupNum(s):
 | 
| 178 |     # type: (str) -> int
 | 
| 179 |     return OPTION_GROUPS.get(s, NO_INDEX)  # 0 for not found
 | 
| 180 | 
 | 
| 181 | 
 | 
| 182 | _OPTION_DICT = option_def.OptionDict()
 | 
| 183 | 
 | 
| 184 | 
 | 
| 185 | def OptionNum(s):
 | 
| 186 |     # type: (str) -> int
 | 
| 187 |     return _OPTION_DICT.get(s, 0)  # 0 means not found
 | 
| 188 | 
 | 
| 189 | 
 | 
| 190 | _CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
 | 
| 191 | _CONTROL_FLOW_LOOKUP = {}
 | 
| 192 | for _, name, id_ in lexer_def.CONTROL_FLOW:
 | 
| 193 |     _CONTROL_FLOW_LOOKUP[id_] = name
 | 
| 194 | 
 | 
| 195 | 
 | 
| 196 | def ControlFlowName(id_):
 | 
| 197 |     # type: (int) -> str
 | 
| 198 |     """For tracing"""
 | 
| 199 |     return _CONTROL_FLOW_LOOKUP[id_]
 | 
| 200 | 
 | 
| 201 | 
 | 
| 202 | def IsControlFlow(name):
 | 
| 203 |     # type: (str) -> bool
 | 
| 204 |     return name in _CONTROL_FLOW_NAMES
 | 
| 205 | 
 | 
| 206 | 
 | 
| 207 | def IsKeyword(name):
 | 
| 208 |     # type: (str) -> bool
 | 
| 209 |     return name in OSH_KEYWORD_NAMES
 | 
| 210 | 
 | 
| 211 | 
 | 
| 212 | #
 | 
| 213 | # osh/prompt.py and osh/word_compile.py
 | 
| 214 | #
 | 
| 215 | 
 | 
| 216 | _ONE_CHAR_C = {
 | 
| 217 |     '0': '\0',
 | 
| 218 |     'a': '\a',
 | 
| 219 |     'b': '\b',
 | 
| 220 |     'e': '\x1b',
 | 
| 221 |     'E': '\x1b',
 | 
| 222 |     'f': '\f',
 | 
| 223 |     'n': '\n',
 | 
| 224 |     'r': '\r',
 | 
| 225 |     't': '\t',
 | 
| 226 |     'v': '\v',
 | 
| 227 |     '\\': '\\',
 | 
| 228 |     "'": "'",  # for $'' only, not echo -e
 | 
| 229 |     '"': '"',  # not sure why this is escaped within $''
 | 
| 230 |     '/': '/',  # for JSON \/ only
 | 
| 231 | }
 | 
| 232 | 
 | 
| 233 | 
 | 
| 234 | def LookupCharC(c):
 | 
| 235 |     # type: (str) -> str
 | 
| 236 |     """Fatal if not present."""
 | 
| 237 |     return _ONE_CHAR_C[c]
 | 
| 238 | 
 | 
| 239 | 
 | 
| 240 | # NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
 | 
| 241 | # not in PS1.
 | 
| 242 | _ONE_CHAR_PROMPT = {
 | 
| 243 |     'a': '\a',
 | 
| 244 |     'e': '\x1b',
 | 
| 245 |     'r': '\r',
 | 
| 246 |     'n': '\n',
 | 
| 247 |     '\\': '\\',
 | 
| 248 | }
 | 
| 249 | 
 | 
| 250 | 
 | 
| 251 | def LookupCharPrompt(c):
 | 
| 252 |     # type: (str) -> Optional[str]
 | 
| 253 |     """Returns None if not present."""
 | 
| 254 |     return _ONE_CHAR_PROMPT.get(c)
 | 
| 255 | 
 | 
| 256 | 
 | 
| 257 | #
 | 
| 258 | # Constants used by osh/split.py
 | 
| 259 | #
 | 
| 260 | 
 | 
| 261 | # IFS splitting is complicated in general.  We handle it with three concepts:
 | 
| 262 | #
 | 
| 263 | # - CH.* - Kinds of characters (edge labels)
 | 
| 264 | # - ST.* - States (node labels)
 | 
| 265 | # - EMIT.*  Actions
 | 
| 266 | #
 | 
| 267 | # The Split() loop below classifies characters, follows state transitions, and
 | 
| 268 | # emits spans.  A span is a (ignored Bool, end_index Int) pair.
 | 
| 269 | 
 | 
| 270 | # As an example, consider this string:
 | 
| 271 | # 'a _ b'
 | 
| 272 | #
 | 
| 273 | # The character classes are:
 | 
| 274 | #
 | 
| 275 | # a      ' '        _        ' '        b
 | 
| 276 | # Black  DE_White   DE_Gray  DE_White   Black
 | 
| 277 | #
 | 
| 278 | # The states are:
 | 
| 279 | #
 | 
| 280 | # a      ' '        _        ' '        b
 | 
| 281 | # Black  DE_White1  DE_Gray  DE_White2  Black
 | 
| 282 | #
 | 
| 283 | # DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
 | 
| 284 | #
 | 
| 285 | # The spans emitted are:
 | 
| 286 | #
 | 
| 287 | # (part 'a', ignored ' _ ', part 'b')
 | 
| 288 | 
 | 
| 289 | # SplitForRead() will check if the last two spans are a \ and \\n.  Easy.
 | 
| 290 | 
 | 
| 291 | # Shorter names for state machine enums
 | 
| 292 | from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
 | 
| 293 | from _devbuild.gen.runtime_asdl import emit_i as EMIT
 | 
| 294 | from _devbuild.gen.runtime_asdl import char_kind_i as CH
 | 
| 295 | from _devbuild.gen.runtime_asdl import state_i as ST
 | 
| 296 | 
 | 
| 297 | _IFS_EDGES = {
 | 
| 298 |     # Whitespace should have been stripped
 | 
| 299 |     (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing),  # ' '
 | 
| 300 |     (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty),  # '_'
 | 
| 301 |     (ST.Start, CH.Black): (ST.Black, EMIT.Nothing),  # 'a'
 | 
| 302 |     (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing),  # '\'
 | 
| 303 |     (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing),  # ''
 | 
| 304 |     (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing),  # '  '
 | 
| 305 |     (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing),  # ' _'
 | 
| 306 |     (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim),  # ' a'
 | 
| 307 |     (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim),  # ' \'
 | 
| 308 |     # Ignore trailing IFS whitespace too.  This is necessary for the case:
 | 
| 309 |     # IFS=':' ; read x y z <<< 'a : b : c :'.
 | 
| 310 |     (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing),  # 'zz '
 | 
| 311 |     (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing),  # '_ '
 | 
| 312 |     (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty),  # '__'
 | 
| 313 |     (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim),  # '_a'
 | 
| 314 |     (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim),  # '_\'
 | 
| 315 |     (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim),  # 'zz:' IFS=': '
 | 
| 316 |     (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing),  # '_  '
 | 
| 317 |     (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty),  # '_ _'
 | 
| 318 |     (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim),  # '_ a'
 | 
| 319 |     (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim),  # '_ \'
 | 
| 320 |     (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim),  # 'zz: ' IFS=': '
 | 
| 321 |     (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part),  # 'a '
 | 
| 322 |     (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part),  # 'a_'
 | 
| 323 |     (ST.Black, CH.Black): (ST.Black, EMIT.Nothing),  # 'aa'
 | 
| 324 |     (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part),  # 'a\'
 | 
| 325 |     (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part),  # 'zz' IFS=': '
 | 
| 326 | 
 | 
| 327 |     # Here we emit an ignored \ and the second character as well.
 | 
| 328 |     # We're emitting TWO spans here; we don't wait until the subsequent
 | 
| 329 |     # character.  That is OK.
 | 
| 330 |     #
 | 
| 331 |     # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
 | 
| 332 |     # In all other cases we do.
 | 
| 333 |     (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape),  # '\ '
 | 
| 334 |     (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape),  # '\_'
 | 
| 335 |     (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape),  # '\a'
 | 
| 336 |     # NOTE: second character is a backslash, but new state is ST.Black!
 | 
| 337 |     (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape),  # '\\'
 | 
| 338 |     (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape),  # 'zz\'
 | 
| 339 | }
 | 
| 340 | 
 | 
| 341 | 
 | 
| 342 | def IfsEdge(state, ch):
 | 
| 343 |     # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
 | 
| 344 |     """Follow edges of the IFS state machine."""
 | 
| 345 |     return _IFS_EDGES[state, ch]
 |