OILS / core / alloc.py View on Github | oilshell.org

320 lines, 137 significant
1"""
2alloc.py - strategies for managing SourceLine and Token
3
4"""
5
6from _devbuild.gen.syntax_asdl import source_t, Token, SourceLine, loc
7from asdl import runtime
8from core import error
9from mycpp.mylib import log
10
11from typing import List, Dict, Any
12
13_ = log
14
15
16def SnipCodeBlock(left, right, lines):
17 # type: (Token, Token, List[SourceLine]) -> str
18 """Return the code string between left and right tokens, EXCLUSIVE.
19
20 Meaning { } are not included.
21
22 Used for Hay evaluation. Similar to SnipCodeString().
23 """
24 pieces = [] # type: List[str]
25
26 assert left.length == 1, "{ expected"
27 assert right.length == 1, "} expected"
28
29 # Pad with spaces so column numbers aren't off
30 pieces.append(' ' * (left.col + 1))
31
32 if left.line == right.line:
33 for li in lines:
34 if li == left.line:
35 piece = li.content[left.col + left.length:right.col]
36 pieces.append(piece)
37 return ''.join(pieces)
38
39 saving = False
40 found_left = False
41 found_right = False
42 for li in lines:
43 if li == left.line:
44 found_left = True
45 saving = True
46
47 # Save everything after the left token
48 piece = li.content[left.col + left.length:]
49 pieces.append(piece)
50 #log(' %r', piece)
51 continue
52
53 if li == right.line:
54 found_right = True
55
56 piece = li.content[:right.col]
57 pieces.append(piece)
58 #log(' %r', piece)
59
60 saving = False
61 break
62
63 if saving:
64 pieces.append(li.content)
65 #log(' %r', li.content)
66
67 assert found_left, "Couldn't find left token"
68 assert found_right, "Couldn't find right token"
69 return ''.join(pieces)
70
71
72class ctx_SourceCode(object):
73
74 def __init__(self, arena, src):
75 # type: (Arena, source_t) -> None
76 arena.PushSource(src)
77 self.arena = arena
78
79 def __enter__(self):
80 # type: () -> None
81 pass
82
83 def __exit__(self, type, value, traceback):
84 # type: (Any, Any, Any) -> None
85 self.arena.PopSource()
86
87
88class Arena(object):
89 """Manages source_t, SourceLine, Token."""
90
91 def __init__(self, save_tokens=False):
92 # type: (bool) -> None
93
94 self.save_tokens = save_tokens
95
96 # indexed by span_id
97 self.tokens = [] # type: List[Token]
98 self.num_tokens = 0
99
100 # Only used in tools
101 self.span_id_lookup = {} # type: Dict[Token, int]
102
103 # All lines that haven't been discarded. For LST formatting.
104 self.lines_list = [] # type: List[SourceLine]
105
106 # reuse these instances in many line_span instances
107 self.source_instances = [] # type: List[source_t]
108
109 def SaveTokens(self):
110 # type: () -> None
111 """
112 Used by --tool X. Do we need LosslessArena?
113 """
114 self.save_tokens = True
115
116 def PushSource(self, src):
117 # type: (source_t) -> None
118 self.source_instances.append(src)
119
120 def PopSource(self):
121 # type: () -> None
122 self.source_instances.pop()
123
124 def AddLine(self, line, line_num):
125 # type: (str, int) -> SourceLine
126 """Save a physical line and return a line_id for later retrieval.
127
128 The line number is 1-based.
129 """
130 src_line = SourceLine(line_num, line, self.source_instances[-1])
131 self.lines_list.append(src_line)
132 return src_line
133
134 def DiscardLines(self):
135 # type: () -> None
136 """Remove references ot lines we've accumulated.
137
138 - This makes the linear search in SnipCodeString() shorter.
139 - It removes the ARENA's references to all lines. The TOKENS still
140 reference some lines.
141 """
142 #log("discarding %d lines", len(self.lines_list))
143 del self.lines_list[:]
144
145 def SaveLinesAndDiscard(self, left, right):
146 # type: (Token, Token) -> List[SourceLine]
147 """Save the lines between two tokens, e.g. for { and }
148
149 Why?
150 - In between { }, we want to preserve lines not pointed to by a token, e.g.
151 comment lines.
152 - But we don't want to save all lines in an interactive shell:
153 echo 1
154 echo 2
155 ...
156 echo 500000
157 echo 500001
158
159 The lines should be freed after execution takes place.
160 """
161 #log('*** Saving lines between %r and %r', left, right)
162
163 saved = [] # type: List[SourceLine]
164 saving = False
165 for li in self.lines_list:
166 if li == left.line:
167 saving = True
168
169 # These lines are PERMANENT, and never deleted. What if you overwrite a
170 # function name? You might want to save those in a the function record
171 # ITSELF.
172 #
173 # This is for INLINE hay blocks that can be evaluated at any point. In
174 # contrast, parse_hay(other_file) uses ParseWholeFile, and we could save
175 # all lines.
176
177 # TODO: consider creating a new Arena for each CommandParser? Or rename itj
178 # to 'BackingLines' or something.
179
180 # TODO: We should mutate li.line_id here so it's the index into
181 # saved_lines?
182 if saving:
183 saved.append(li)
184 #log(' %r', li.val)
185
186 if li == right.line:
187 saving = False
188 break
189
190 #log('*** SAVED %d lines', len(saved))
191
192 self.DiscardLines()
193 return saved
194
195 #log('SAVED = %s', [line.val for line in self.saved_lines])
196
197 def SnipCodeString(self, left, right):
198 # type: (Token, Token) -> str
199 """Return the code string between left and right tokens, INCLUSIVE.
200
201 Used for ALIAS expansion, which happens in the PARSER.
202
203 The argument to aliases can span multiple lines, like this:
204
205 $ myalias '1 2 3'
206 """
207 if left.line == right.line:
208 for li in self.lines_list:
209 if li == left.line:
210 piece = li.content[left.col:right.col + right.length]
211 return piece
212
213 pieces = [] # type: List[str]
214 saving = False
215 found_left = False
216 found_right = False
217 for li in self.lines_list:
218 if li == left.line:
219 found_left = True
220 saving = True
221
222 # Save everything after the left token
223 piece = li.content[left.col:]
224 pieces.append(piece)
225 #log(' %r', piece)
226 continue
227
228 if li == right.line:
229 found_right = True
230
231 piece = li.content[:right.col + right.length]
232 pieces.append(piece)
233 #log(' %r', piece)
234
235 saving = False
236 break
237
238 if saving:
239 pieces.append(li.content)
240 #log(' %r', li.content)
241
242 assert found_left, "Couldn't find left token"
243 assert found_right, "Couldn't find right token"
244 return ''.join(pieces)
245
246 def NewToken(self, id_, col, length, src_line):
247 # type: (int, int, int, SourceLine) -> Token
248
249 if length >= 65536:
250 raise error.Parse(
251 '', # ignored message
252 loc.TokenTooLong(src_line, id_, length, col))
253
254 tok = Token(id_, length, col, src_line, None)
255 if self.save_tokens:
256 span_id = self.num_tokens
257 self.num_tokens += 1
258
259 self.tokens.append(tok)
260 self.span_id_lookup[tok] = span_id
261 return tok
262
263 def UnreadOne(self):
264 # type: () -> None
265 """Reuse the last span ID."""
266 if self.save_tokens:
267 self.tokens.pop()
268 self.num_tokens -= 1
269
270 def GetToken(self, span_id):
271 # type: (int) -> Token
272 assert span_id != runtime.NO_SPID, span_id
273 assert span_id < len(self.tokens), \
274 'Span ID out of range: %d is greater than %d' % (span_id, len(self.tokens))
275 return self.tokens[span_id]
276
277 def GetSpanId(self, tok):
278 # type: (Token) -> int
279 """Given a Token, returns its a sequence number"""
280 #return tok.span_id
281 #return -1
282 assert tok in self.span_id_lookup
283 return self.span_id_lookup[tok]
284
285 def LastSpanId(self):
286 # type: () -> int
287 """Return one past the last span ID."""
288 return len(self.tokens)
289
290
291class LosslessArena(Arena):
292 """
293 TODO:
294
295 Has lossless invariant, for
296 --tool fmt
297 --tool ysh-ify
298
299 Retains all SourceLine and Token
300
301 Somehow disallow re-parsing? Is that equivalent to ctx_SourceCode()?
302 """
303 pass
304
305
306class DynamicArena(Arena):
307 """
308 For batch and interactive shell
309
310 TODO:
311 - Test that SourceLine and Token are GC'd
312
313 However, it should support:
314 - SnipCodeString() for aliases
315 - SnipCodeBlock() for Hay
316
317 Neither of those are necessary in the LosslessArena? We might have
318 different utilities there.
319 """
320 pass