1 | #!/usr/bin/env python2
|
2 | """Consts.py."""
|
3 | from __future__ import print_function
|
4 |
|
5 | from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
|
6 | bool_arg_type_t, opt_group_i)
|
7 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
|
8 | from frontend import builtin_def
|
9 | from frontend import lexer_def
|
10 | from frontend import option_def
|
11 |
|
12 | from typing import Tuple, Optional, TYPE_CHECKING
|
13 | if TYPE_CHECKING:
|
14 | from _devbuild.gen.option_asdl import option_t, builtin_t
|
15 |
|
16 | NO_INDEX = 0 # for Resolve
|
17 |
|
18 | # Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
|
19 | STRICT_ALL = option_def.STRICT_ALL
|
20 | YSH_UPGRADE = option_def.YSH_UPGRADE
|
21 | YSH_ALL = option_def.YSH_ALL
|
22 | DEFAULT_TRUE = option_def.DEFAULT_TRUE
|
23 |
|
24 | PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
|
25 |
|
26 | SET_OPTION_NUMS = [
|
27 | opt.index for opt in option_def._SORTED if opt.builtin == 'set'
|
28 | ]
|
29 | SET_OPTION_NAMES = [
|
30 | opt.name for opt in option_def._SORTED if opt.builtin == 'set'
|
31 | ]
|
32 |
|
33 | SHOPT_OPTION_NUMS = [
|
34 | opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
|
35 | ]
|
36 | SHOPT_OPTION_NAMES = [
|
37 | opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
|
38 | ]
|
39 |
|
40 | VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
|
41 |
|
42 | BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
|
43 |
|
44 | # Keywords for introspection with bash 'compgen' and 'type'
|
45 | OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
|
46 | OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
|
47 | OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
|
48 |
|
49 | # bash considers these closing delimiters keywords
|
50 | OSH_KEYWORD_NAMES.append('}')
|
51 | OSH_KEYWORD_NAMES.append(']]')
|
52 |
|
53 |
|
54 | def GetKind(id_):
|
55 | # type: (Id_t) -> Kind_t
|
56 | """To make coarse-grained parsing decisions."""
|
57 |
|
58 | from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
|
59 | return ID_TO_KIND[id_]
|
60 |
|
61 |
|
62 | def BoolArgType(id_):
|
63 | # type: (Id_t) -> bool_arg_type_t
|
64 |
|
65 | from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
|
66 | return BOOL_ARG_TYPES[id_]
|
67 |
|
68 |
|
69 | #
|
70 | # Redirect Tables associated with IDs
|
71 | #
|
72 |
|
73 | REDIR_DEFAULT_FD = {
|
74 | # filename
|
75 | Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
|
76 | Id.Redir_Great: 1,
|
77 | Id.Redir_DGreat: 1,
|
78 | Id.Redir_Clobber: 1,
|
79 | Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
|
80 | # bash &> and &>>
|
81 | Id.Redir_AndGreat: 1,
|
82 | Id.Redir_AndDGreat: 1,
|
83 |
|
84 | # descriptor
|
85 | Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
|
86 | Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
|
87 | Id.Redir_TLess: 0, # here word
|
88 |
|
89 | # here docs included
|
90 | Id.Redir_DLess: 0,
|
91 | Id.Redir_DLessDash: 0,
|
92 | }
|
93 |
|
94 | REDIR_ARG_TYPES = {
|
95 | # filename
|
96 | Id.Redir_Less: redir_arg_type_e.Path,
|
97 | Id.Redir_Great: redir_arg_type_e.Path,
|
98 | Id.Redir_DGreat: redir_arg_type_e.Path,
|
99 | Id.Redir_Clobber: redir_arg_type_e.Path,
|
100 | Id.Redir_LessGreat: redir_arg_type_e.Path,
|
101 | # bash &> and &>>
|
102 | Id.Redir_AndGreat: redir_arg_type_e.Path,
|
103 | Id.Redir_AndDGreat: redir_arg_type_e.Path,
|
104 |
|
105 | # descriptor
|
106 | Id.Redir_GreatAnd: redir_arg_type_e.Desc,
|
107 | Id.Redir_LessAnd: redir_arg_type_e.Desc,
|
108 | Id.Redir_TLess: redir_arg_type_e.Here, # here word
|
109 | # note: here docs aren't included
|
110 | }
|
111 |
|
112 |
|
113 | def RedirArgType(id_):
|
114 | # type: (Id_t) -> redir_arg_type_t
|
115 | return REDIR_ARG_TYPES[id_]
|
116 |
|
117 |
|
118 | def RedirDefaultFd(id_):
|
119 | # type: (Id_t) -> int
|
120 | return REDIR_DEFAULT_FD[id_]
|
121 |
|
122 |
|
123 | #
|
124 | # Builtins
|
125 | #
|
126 |
|
127 | _BUILTIN_DICT = builtin_def.BuiltinDict()
|
128 |
|
129 |
|
130 | def LookupSpecialBuiltin(argv0):
|
131 | # type: (str) -> builtin_t
|
132 | """Is it a special builtin?"""
|
133 | b = _BUILTIN_DICT.get(argv0)
|
134 | if b and b.kind == 'special':
|
135 | return b.index
|
136 | else:
|
137 | return NO_INDEX
|
138 |
|
139 |
|
140 | def LookupAssignBuiltin(argv0):
|
141 | # type: (str) -> builtin_t
|
142 | """Is it an assignment builtin?"""
|
143 | b = _BUILTIN_DICT.get(argv0)
|
144 | if b and b.kind == 'assign':
|
145 | return b.index
|
146 | else:
|
147 | return NO_INDEX
|
148 |
|
149 |
|
150 | def LookupNormalBuiltin(argv0):
|
151 | # type: (str) -> builtin_t
|
152 | """Is it any other builtin?"""
|
153 | b = _BUILTIN_DICT.get(argv0)
|
154 | if b and b.kind == 'normal':
|
155 | return b.index
|
156 | else:
|
157 | return NO_INDEX
|
158 |
|
159 |
|
160 | def OptionName(opt_num):
|
161 | # type: (option_t) -> str
|
162 | """Get the name from an index."""
|
163 | return option_def.OPTION_NAMES[opt_num]
|
164 |
|
165 |
|
166 | OPTION_GROUPS = {
|
167 | 'strict:all': opt_group_i.StrictAll,
|
168 |
|
169 | # Aliases to deprecate
|
170 | 'oil:upgrade': opt_group_i.YshUpgrade,
|
171 | 'oil:all': opt_group_i.YshAll,
|
172 | 'ysh:upgrade': opt_group_i.YshUpgrade,
|
173 | 'ysh:all': opt_group_i.YshAll,
|
174 | }
|
175 |
|
176 |
|
177 | def OptionGroupNum(s):
|
178 | # type: (str) -> int
|
179 | return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
|
180 |
|
181 |
|
182 | _OPTION_DICT = option_def.OptionDict()
|
183 |
|
184 |
|
185 | def OptionNum(s):
|
186 | # type: (str) -> int
|
187 | return _OPTION_DICT.get(s, 0) # 0 means not found
|
188 |
|
189 |
|
190 | _CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
|
191 | _CONTROL_FLOW_LOOKUP = {}
|
192 | for _, name, id_ in lexer_def.CONTROL_FLOW:
|
193 | _CONTROL_FLOW_LOOKUP[id_] = name
|
194 |
|
195 |
|
196 | def ControlFlowName(id_):
|
197 | # type: (int) -> str
|
198 | """For tracing"""
|
199 | return _CONTROL_FLOW_LOOKUP[id_]
|
200 |
|
201 |
|
202 | def IsControlFlow(name):
|
203 | # type: (str) -> bool
|
204 | return name in _CONTROL_FLOW_NAMES
|
205 |
|
206 |
|
207 | def IsKeyword(name):
|
208 | # type: (str) -> bool
|
209 | return name in OSH_KEYWORD_NAMES
|
210 |
|
211 |
|
212 | #
|
213 | # osh/prompt.py and osh/word_compile.py
|
214 | #
|
215 |
|
216 | _ONE_CHAR_C = {
|
217 | '0': '\0',
|
218 | 'a': '\a',
|
219 | 'b': '\b',
|
220 | 'e': '\x1b',
|
221 | 'E': '\x1b',
|
222 | 'f': '\f',
|
223 | 'n': '\n',
|
224 | 'r': '\r',
|
225 | 't': '\t',
|
226 | 'v': '\v',
|
227 | '\\': '\\',
|
228 | "'": "'", # for $'' only, not echo -e
|
229 | '"': '"', # not sure why this is escaped within $''
|
230 | '/': '/', # for JSON \/ only
|
231 | }
|
232 |
|
233 |
|
234 | def LookupCharC(c):
|
235 | # type: (str) -> str
|
236 | """Fatal if not present."""
|
237 | return _ONE_CHAR_C[c]
|
238 |
|
239 |
|
240 | # NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
|
241 | # not in PS1.
|
242 | _ONE_CHAR_PROMPT = {
|
243 | 'a': '\a',
|
244 | 'e': '\x1b',
|
245 | 'r': '\r',
|
246 | 'n': '\n',
|
247 | '\\': '\\',
|
248 | }
|
249 |
|
250 |
|
251 | def LookupCharPrompt(c):
|
252 | # type: (str) -> Optional[str]
|
253 | """Returns None if not present."""
|
254 | return _ONE_CHAR_PROMPT.get(c)
|
255 |
|
256 |
|
257 | #
|
258 | # Constants used by osh/split.py
|
259 | #
|
260 |
|
261 | # IFS splitting is complicated in general. We handle it with three concepts:
|
262 | #
|
263 | # - CH.* - Kinds of characters (edge labels)
|
264 | # - ST.* - States (node labels)
|
265 | # - EMIT.* Actions
|
266 | #
|
267 | # The Split() loop below classifies characters, follows state transitions, and
|
268 | # emits spans. A span is a (ignored Bool, end_index Int) pair.
|
269 |
|
270 | # As an example, consider this string:
|
271 | # 'a _ b'
|
272 | #
|
273 | # The character classes are:
|
274 | #
|
275 | # a ' ' _ ' ' b
|
276 | # Black DE_White DE_Gray DE_White Black
|
277 | #
|
278 | # The states are:
|
279 | #
|
280 | # a ' ' _ ' ' b
|
281 | # Black DE_White1 DE_Gray DE_White2 Black
|
282 | #
|
283 | # DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
|
284 | #
|
285 | # The spans emitted are:
|
286 | #
|
287 | # (part 'a', ignored ' _ ', part 'b')
|
288 |
|
289 | # SplitForRead() will check if the last two spans are a \ and \\n. Easy.
|
290 |
|
291 | # Shorter names for state machine enums
|
292 | from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
|
293 | from _devbuild.gen.runtime_asdl import emit_i as EMIT
|
294 | from _devbuild.gen.runtime_asdl import char_kind_i as CH
|
295 | from _devbuild.gen.runtime_asdl import state_i as ST
|
296 |
|
297 | _IFS_EDGES = {
|
298 | # Whitespace should have been stripped
|
299 | (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
|
300 | (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
|
301 | (ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
|
302 | (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
|
303 | (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
|
304 | (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
|
305 | (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
|
306 | (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
|
307 | (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
|
308 | # Ignore trailing IFS whitespace too. This is necessary for the case:
|
309 | # IFS=':' ; read x y z <<< 'a : b : c :'.
|
310 | (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
|
311 | (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
|
312 | (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
|
313 | (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
|
314 | (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
|
315 | (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
|
316 | (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
|
317 | (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
|
318 | (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
|
319 | (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
|
320 | (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
|
321 | (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
|
322 | (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
|
323 | (ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
|
324 | (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
|
325 | (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
|
326 |
|
327 | # Here we emit an ignored \ and the second character as well.
|
328 | # We're emitting TWO spans here; we don't wait until the subsequent
|
329 | # character. That is OK.
|
330 | #
|
331 | # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
|
332 | # In all other cases we do.
|
333 | (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
|
334 | (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
|
335 | (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
|
336 | # NOTE: second character is a backslash, but new state is ST.Black!
|
337 | (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
|
338 | (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
|
339 | }
|
340 |
|
341 |
|
342 | def IfsEdge(state, ch):
|
343 | # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
|
344 | """Follow edges of the IFS state machine."""
|
345 | return _IFS_EDGES[state, ch]
|
346 |
|
347 |
|
348 | # Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
|
349 | #
|
350 | # We want submatch extraction, which would need a new type of binding, and
|
351 | # doing it with libc seems easy enough.
|
352 |
|
353 | ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + ')((=|\+=)(.*))?$'
|
354 |
|
355 | # Eggex equivalent:
|
356 | #
|
357 | # VarName = /
|
358 | # [a-z A-Z _ ]
|
359 | # [a-z A-Z 0-9 _ ]*
|
360 | # /
|
361 | #
|
362 | # SplitArg = /
|
363 | # %begin
|
364 | # < capture VarName >
|
365 | # < capture
|
366 | # < capture '=' | '+=' > < capture dot* >
|
367 | # > ?
|
368 | # %end
|
369 | #
|
370 | # Note: we use < > for grouping because ERE has no non-capturing group.
|