core/alloc.py

OILS / core / alloc.py View on Github | oilshell.org

320 lines, 137 significant

1	"""
2	alloc.py - strategies for managing SourceLine and Token
3
4	"""
5
6	from _devbuild.gen.syntax_asdl import source_t, Token, SourceLine, loc
7	from asdl import runtime
8	from core import error
9	from mycpp.mylib import log
10
11	from typing import List, Dict, Any
12
13	_ = log
14
15
16	def SnipCodeBlock(left, right, lines):
17	# type: (Token, Token, List[SourceLine]) -> str
18	"""Return the code string between left and right tokens, EXCLUSIVE.
19
20	Meaning { } are not included.
21
22	Used for Hay evaluation. Similar to SnipCodeString().
23	"""
24	pieces = [] # type: List[str]
25
26	assert left.length == 1, "{ expected"
27	assert right.length == 1, "} expected"
28
29	# Pad with spaces so column numbers aren't off
30	pieces.append(' ' * (left.col + 1))
31
32	if left.line == right.line:
33	for li in lines:
34	if li == left.line:
35	piece = li.content[left.col + left.length:right.col]
36	pieces.append(piece)
37	return ''.join(pieces)
38
39	saving = False
40	found_left = False
41	found_right = False
42	for li in lines:
43	if li == left.line:
44	found_left = True
45	saving = True
46
47	# Save everything after the left token
48	piece = li.content[left.col + left.length:]
49	pieces.append(piece)
50	#log(' %r', piece)
51	continue
52
53	if li == right.line:
54	found_right = True
55
56	piece = li.content[:right.col]
57	pieces.append(piece)
58	#log(' %r', piece)
59
60	saving = False
61	break
62
63	if saving:
64	pieces.append(li.content)
65	#log(' %r', li.content)
66
67	assert found_left, "Couldn't find left token"
68	assert found_right, "Couldn't find right token"
69	return ''.join(pieces)
70
71
72	class ctx_SourceCode(object):
73
74	def __init__(self, arena, src):
75	# type: (Arena, source_t) -> None
76	arena.PushSource(src)
77	self.arena = arena
78
79	def __enter__(self):
80	# type: () -> None
81	pass
82
83	def __exit__(self, type, value, traceback):
84	# type: (Any, Any, Any) -> None
85	self.arena.PopSource()
86
87
88	class Arena(object):
89	"""Manages source_t, SourceLine, Token."""
90
91	def __init__(self, save_tokens=False):
92	# type: (bool) -> None
93
94	self.save_tokens = save_tokens
95
96	# indexed by span_id
97	self.tokens = [] # type: List[Token]
98	self.num_tokens = 0
99
100	# Only used in tools
101	self.span_id_lookup = {} # type: Dict[Token, int]
102
103	# All lines that haven't been discarded. For LST formatting.
104	self.lines_list = [] # type: List[SourceLine]
105
106	# reuse these instances in many line_span instances
107	self.source_instances = [] # type: List[source_t]
108
109	def SaveTokens(self):
110	# type: () -> None
111	"""
112	Used by --tool X. Do we need LosslessArena?
113	"""
114	self.save_tokens = True
115
116	def PushSource(self, src):
117	# type: (source_t) -> None
118	self.source_instances.append(src)
119
120	def PopSource(self):
121	# type: () -> None
122	self.source_instances.pop()
123
124	def AddLine(self, line, line_num):
125	# type: (str, int) -> SourceLine
126	"""Save a physical line and return a line_id for later retrieval.
127
128	The line number is 1-based.
129	"""
130	src_line = SourceLine(line_num, line, self.source_instances[-1])
131	self.lines_list.append(src_line)
132	return src_line
133
134	def DiscardLines(self):
135	# type: () -> None
136	"""Remove references ot lines we've accumulated.
137
138	- This makes the linear search in SnipCodeString() shorter.
139	- It removes the ARENA's references to all lines. The TOKENS still
140	reference some lines.
141	"""
142	#log("discarding %d lines", len(self.lines_list))
143	del self.lines_list[:]
144
145	def SaveLinesAndDiscard(self, left, right):
146	# type: (Token, Token) -> List[SourceLine]
147	"""Save the lines between two tokens, e.g. for { and }
148
149	Why?
150	- In between { }, we want to preserve lines not pointed to by a token, e.g.
151	comment lines.
152	- But we don't want to save all lines in an interactive shell:
153	echo 1
154	echo 2
155	...
156	echo 500000
157	echo 500001
158
159	The lines should be freed after execution takes place.
160	"""
161	#log('*** Saving lines between %r and %r', left, right)
162
163	saved = [] # type: List[SourceLine]
164	saving = False
165	for li in self.lines_list:
166	if li == left.line:
167	saving = True
168
169	# These lines are PERMANENT, and never deleted. What if you overwrite a
170	# function name? You might want to save those in a the function record
171	# ITSELF.
172	#
173	# This is for INLINE hay blocks that can be evaluated at any point. In
174	# contrast, parse_hay(other_file) uses ParseWholeFile, and we could save
175	# all lines.
176
177	# TODO: consider creating a new Arena for each CommandParser? Or rename itj
178	# to 'BackingLines' or something.
179
180	# TODO: We should mutate li.line_id here so it's the index into
181	# saved_lines?
182	if saving:
183	saved.append(li)
184	#log(' %r', li.val)
185
186	if li == right.line:
187	saving = False
188	break
189
190	#log('*** SAVED %d lines', len(saved))
191
192	self.DiscardLines()
193	return saved
194
195	#log('SAVED = %s', [line.val for line in self.saved_lines])
196
197	def SnipCodeString(self, left, right):
198	# type: (Token, Token) -> str
199	"""Return the code string between left and right tokens, INCLUSIVE.
200
201	Used for ALIAS expansion, which happens in the PARSER.
202
203	The argument to aliases can span multiple lines, like this:
204
205	$ myalias '1 2 3'
206	"""
207	if left.line == right.line:
208	for li in self.lines_list:
209	if li == left.line:
210	piece = li.content[left.col:right.col + right.length]
211	return piece
212
213	pieces = [] # type: List[str]
214	saving = False
215	found_left = False
216	found_right = False
217	for li in self.lines_list:
218	if li == left.line:
219	found_left = True
220	saving = True
221
222	# Save everything after the left token
223	piece = li.content[left.col:]
224	pieces.append(piece)
225	#log(' %r', piece)
226	continue
227
228	if li == right.line:
229	found_right = True
230
231	piece = li.content[:right.col + right.length]
232	pieces.append(piece)
233	#log(' %r', piece)
234
235	saving = False
236	break
237
238	if saving:
239	pieces.append(li.content)
240	#log(' %r', li.content)
241
242	assert found_left, "Couldn't find left token"
243	assert found_right, "Couldn't find right token"
244	return ''.join(pieces)
245
246	def NewToken(self, id_, col, length, src_line):
247	# type: (int, int, int, SourceLine) -> Token
248
249	if length >= 65536:
250	raise error.Parse(
251	'', # ignored message
252	loc.TokenTooLong(src_line, id_, length, col))
253
254	tok = Token(id_, length, col, src_line, None)
255	if self.save_tokens:
256	span_id = self.num_tokens
257	self.num_tokens += 1
258
259	self.tokens.append(tok)
260	self.span_id_lookup[tok] = span_id
261	return tok
262
263	def UnreadOne(self):
264	# type: () -> None
265	"""Reuse the last span ID."""
266	if self.save_tokens:
267	self.tokens.pop()
268	self.num_tokens -= 1
269
270	def GetToken(self, span_id):
271	# type: (int) -> Token
272	assert span_id != runtime.NO_SPID, span_id
273	assert span_id < len(self.tokens), \
274	'Span ID out of range: %d is greater than %d' % (span_id, len(self.tokens))
275	return self.tokens[span_id]
276
277	def GetSpanId(self, tok):
278	# type: (Token) -> int
279	"""Given a Token, returns its a sequence number"""
280	#return tok.span_id
281	#return -1
282	assert tok in self.span_id_lookup
283	return self.span_id_lookup[tok]
284
285	def LastSpanId(self):
286	# type: () -> int
287	"""Return one past the last span ID."""
288	return len(self.tokens)
289
290
291	class LosslessArena(Arena):
292	"""
293	TODO:
294
295	Has lossless invariant, for
296	--tool fmt
297	--tool ysh-ify
298
299	Retains all SourceLine and Token
300
301	Somehow disallow re-parsing? Is that equivalent to ctx_SourceCode()?
302	"""
303	pass
304
305
306	class DynamicArena(Arena):
307	"""
308	For batch and interactive shell
309
310	TODO:
311	- Test that SourceLine and Token are GC'd
312
313	However, it should support:
314	- SnipCodeString() for aliases
315	- SnipCodeBlock() for Hay
316
317	Neither of those are necessary in the LosslessArena? We might have
318	different utilities there.
319	"""
320	pass