frontend/consts.py

OILS / frontend / consts.py View on Github | oilshell.org

383 lines, 182 significant

1	#!/usr/bin/env python2
2	"""Consts.py."""
3	from __future__ import print_function
4
5	from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
6	bool_arg_type_t, opt_group_i)
7	from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
8	from frontend import builtin_def
9	from frontend import lexer_def
10	from frontend import option_def
11
12	from typing import Tuple, Optional, TYPE_CHECKING
13	if TYPE_CHECKING:
14	from _devbuild.gen.option_asdl import option_t, builtin_t
15
16	NO_INDEX = 0 # for Resolve
17
18	# Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
19	STRICT_ALL = option_def.STRICT_ALL
20	YSH_UPGRADE = option_def.YSH_UPGRADE
21	YSH_ALL = option_def.YSH_ALL
22	DEFAULT_TRUE = option_def.DEFAULT_TRUE
23
24	PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
25
26	SET_OPTION_NUMS = [
27	opt.index for opt in option_def._SORTED if opt.builtin == 'set'
28	]
29	SET_OPTION_NAMES = [
30	opt.name for opt in option_def._SORTED if opt.builtin == 'set'
31	]
32
33	SHOPT_OPTION_NUMS = [
34	opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
35	]
36	SHOPT_OPTION_NAMES = [
37	opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
38	]
39
40	VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
41
42	BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
43
44	# Keywords for introspection with bash 'compgen' and 'type'
45	OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
46	OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
47	OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
48
49	# bash considers these closing delimiters keywords
50	OSH_KEYWORD_NAMES.append('}')
51	OSH_KEYWORD_NAMES.append(']]')
52
53
54	def GetKind(id_):
55	# type: (Id_t) -> Kind_t
56	"""To make coarse-grained parsing decisions."""
57
58	from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
59	return ID_TO_KIND[id_]
60
61
62	def BoolArgType(id_):
63	# type: (Id_t) -> bool_arg_type_t
64
65	from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
66	return BOOL_ARG_TYPES[id_]
67
68
69	#
70	# Redirect Tables associated with IDs
71	#
72
73	REDIR_DEFAULT_FD = {
74	# filename
75	Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
76	Id.Redir_Great: 1,
77	Id.Redir_DGreat: 1,
78	Id.Redir_Clobber: 1,
79	Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
80	# bash &> and &>>
81	Id.Redir_AndGreat: 1,
82	Id.Redir_AndDGreat: 1,
83
84	# descriptor
85	Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
86	Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
87	Id.Redir_TLess: 0, # here word
88
89	# here docs included
90	Id.Redir_DLess: 0,
91	Id.Redir_DLessDash: 0,
92	}
93
94	REDIR_ARG_TYPES = {
95	# filename
96	Id.Redir_Less: redir_arg_type_e.Path,
97	Id.Redir_Great: redir_arg_type_e.Path,
98	Id.Redir_DGreat: redir_arg_type_e.Path,
99	Id.Redir_Clobber: redir_arg_type_e.Path,
100	Id.Redir_LessGreat: redir_arg_type_e.Path,
101	# bash &> and &>>
102	Id.Redir_AndGreat: redir_arg_type_e.Path,
103	Id.Redir_AndDGreat: redir_arg_type_e.Path,
104
105	# descriptor
106	Id.Redir_GreatAnd: redir_arg_type_e.Desc,
107	Id.Redir_LessAnd: redir_arg_type_e.Desc,
108	Id.Redir_TLess: redir_arg_type_e.Here, # here word
109	# note: here docs aren't included
110	}
111
112
113	def RedirArgType(id_):
114	# type: (Id_t) -> redir_arg_type_t
115	return REDIR_ARG_TYPES[id_]
116
117
118	def RedirDefaultFd(id_):
119	# type: (Id_t) -> int
120	return REDIR_DEFAULT_FD[id_]
121
122
123	#
124	# Builtins
125	#
126
127	_BUILTIN_DICT = builtin_def.BuiltinDict()
128
129
130	def LookupSpecialBuiltin(argv0):
131	# type: (str) -> builtin_t
132	"""Is it a special builtin?"""
133	b = _BUILTIN_DICT.get(argv0)
134	if b and b.kind == 'special':
135	return b.index
136	else:
137	return NO_INDEX
138
139
140	def LookupAssignBuiltin(argv0):
141	# type: (str) -> builtin_t
142	"""Is it an assignment builtin?"""
143	b = _BUILTIN_DICT.get(argv0)
144	if b and b.kind == 'assign':
145	return b.index
146	else:
147	return NO_INDEX
148
149
150	def LookupNormalBuiltin(argv0):
151	# type: (str) -> builtin_t
152	"""Is it any other builtin?"""
153	b = _BUILTIN_DICT.get(argv0)
154	if b and b.kind == 'normal':
155	return b.index
156	else:
157	return NO_INDEX
158
159
160	def OptionName(opt_num):
161	# type: (option_t) -> str
162	"""Get the name from an index."""
163	return option_def.OPTION_NAMES[opt_num]
164
165
166	OPTION_GROUPS = {
167	'strict:all': opt_group_i.StrictAll,
168
169	# Aliases to deprecate
170	'oil:upgrade': opt_group_i.YshUpgrade,
171	'oil:all': opt_group_i.YshAll,
172	'ysh:upgrade': opt_group_i.YshUpgrade,
173	'ysh:all': opt_group_i.YshAll,
174	}
175
176
177	def OptionGroupNum(s):
178	# type: (str) -> int
179	return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
180
181
182	_OPTION_DICT = option_def.OptionDict()
183
184
185	def OptionNum(s):
186	# type: (str) -> int
187	return _OPTION_DICT.get(s, 0) # 0 means not found
188
189
190	_CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
191	_CONTROL_FLOW_LOOKUP = {}
192	for _, name, id_ in lexer_def.CONTROL_FLOW:
193	_CONTROL_FLOW_LOOKUP[id_] = name
194
195
196	def ControlFlowName(id_):
197	# type: (int) -> str
198	"""For tracing"""
199	return _CONTROL_FLOW_LOOKUP[id_]
200
201
202	def IsControlFlow(name):
203	# type: (str) -> bool
204	return name in _CONTROL_FLOW_NAMES
205
206
207	def IsKeyword(name):
208	# type: (str) -> bool
209	return name in OSH_KEYWORD_NAMES
210
211
212	#
213	# osh/prompt.py and osh/word_compile.py
214	#
215
216	_ONE_CHAR_C = {
217	'0': '\0',
218	'a': '\a',
219	'b': '\b',
220	'e': '\x1b',
221	'E': '\x1b',
222	'f': '\f',
223	'n': '\n',
224	'r': '\r',
225	't': '\t',
226	'v': '\v',
227	'\\': '\\',
228	"'": "'", # for $'' only, not echo -e
229	'"': '"', # not sure why this is escaped within $''
230	'/': '/', # for JSON \/ only
231	}
232
233
234	def LookupCharC(c):
235	# type: (str) -> str
236	"""Fatal if not present."""
237	return _ONE_CHAR_C[c]
238
239
240	# NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
241	# not in PS1.
242	_ONE_CHAR_PROMPT = {
243	'a': '\a',
244	'e': '\x1b',
245	'r': '\r',
246	'n': '\n',
247	'\\': '\\',
248	}
249
250
251	def LookupCharPrompt(c):
252	# type: (str) -> Optional[str]
253	"""Returns None if not present."""
254	return _ONE_CHAR_PROMPT.get(c)
255
256
257	#
258	# Constants used by osh/split.py
259	#
260
261	# IFS splitting is complicated in general. We handle it with three concepts:
262	#
263	# - CH.* - Kinds of characters (edge labels)
264	# - ST.* - States (node labels)
265	# - EMIT.* Actions
266	#
267	# The Split() loop below classifies characters, follows state transitions, and
268	# emits spans. A span is a (ignored Bool, end_index Int) pair.
269
270	# As an example, consider this string:
271	# 'a _ b'
272	#
273	# The character classes are:
274	#
275	# a ' ' _ ' ' b
276	# Black DE_White DE_Gray DE_White Black
277	#
278	# The states are:
279	#
280	# a ' ' _ ' ' b
281	# Black DE_White1 DE_Gray DE_White2 Black
282	#
283	# DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
284	#
285	# The spans emitted are:
286	#
287	# (part 'a', ignored ' _ ', part 'b')
288
289	# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
290
291	# Shorter names for state machine enums
292	from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
293	from _devbuild.gen.runtime_asdl import emit_i as EMIT
294	from _devbuild.gen.runtime_asdl import char_kind_i as CH
295	from _devbuild.gen.runtime_asdl import state_i as ST
296
297	_IFS_EDGES = {
298	# Whitespace should have been stripped
299	(ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
300	(ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
301	(ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
302	(ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
303	(ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
304	(ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
305	(ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
306	(ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
307	(ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
308	# Ignore trailing IFS whitespace too. This is necessary for the case:
309	# IFS=':' ; read x y z <<< 'a : b : c :'.
310	(ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
311	(ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
312	(ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
313	(ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
314	(ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
315	(ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
316	(ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
317	(ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
318	(ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
319	(ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
320	(ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
321	(ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
322	(ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
323	(ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
324	(ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
325	(ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
326
327	# Here we emit an ignored \ and the second character as well.
328	# We're emitting TWO spans here; we don't wait until the subsequent
329	# character. That is OK.
330	#
331	# Problem: if '\ ' is the last one, we don't want to emit a trailing span?
332	# In all other cases we do.
333	(ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
334	(ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
335	(ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
336	# NOTE: second character is a backslash, but new state is ST.Black!
337	(ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
338	(ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
339	}
340
341
342	def IfsEdge(state, ch):
343	# type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
344	"""Follow edges of the IFS state machine."""
345	return _IFS_EDGES[state, ch]
346
347
348	# Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
349	#
350	# We want submatch extraction, which would need a new type of binding, and
351	# doing it with libc seems easy enough.
352
353	ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=\|\+=)(.*))?$'
354
355	# Eggex equivalent:
356	#
357	# VarName = /
358	# [a-z A-Z _ ]
359	# [a-z A-Z 0-9 _ ]*
360	# /
361	#
362	# SplitArg = /
363	# %begin
364	# <capture VarName>
365	# (
366	# <capture '=' \| '+='> <capture dot*>
367	# )?
368	# %end
369
370	# Weird rules for brackets: put ] first
371	NOT_BRACKETS = '[^][]*'
372	TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
373
374	# NotBracket = / ![ ']' '[' ] /
375	#
376	# TestV = /
377	# %begin
378	# <capture VarName>
379	# (
380	# '[' <capture NotBrackets> ']'
381	# )?
382	# %end
383	# /