OILS / demo / old / ovm2 / oheap2.py View on Github | oilshell.org

402 lines, 215 significant
1#!/usr/bin/env python2
2"""
3oheap2.py
4
5OVM2 stores objects in a data structure called OHeap2. It has the following
6concepts.
7
8- Handles: 4-byte / 32-bit integers that point to Cells.
9- Cells: 16-byte structs with a type tag, in a contiguous array. This array is
10 scanned by the GC.
11- Slabs: Variable-length arrays of memory that are owned by a Cell. Each slab
12 may be malloc'd individually.
13 - In the case of a string, it's opaque to the GC.
14 - In the case of a tuple or struct, it must be scanned by the GC.
15
16This is called the "4-16-N design".
17
18Every cell has a type tag, which is simply an integer. Negative integers
19indicate native types implemented in C. Positive integers are for user-defined
20types.
21
22Cells also have an is_slab bit and a length for small. TODO: is_slab should be
23is_small?
24
25Operations on OHeap
26-------------------
27
281. Allocate a new cell, possibly resizing
29
302. Garbage Collect
31 - Walk all the cells. Free unused cells along with their slabs.
32
333. Save to file
34 Slabs: Native Pointers to Offsets
35
363a. Copy an object tree to a fresh oheap? e.g. for IPC. I guess this is like "Save".
37 - Walk all the cells and slabs
38
394. Load from file into fresh OHeap
40 - Patch Slab Offsets to Native Pointers
41
424a. Load from file and merge into existing OHeap?
43 - Inside a namespace?
44 - Import?
45 - Patch all cell refs!
46
475. Make permanent? Move to another "generation"? OPy classes and bytecode
48perhaps fit in this category.
49
50TODO
51----
52
53- Bits on handle: whether it's const or not?
54 - negative handles could be const?
55
56- Singleton values for None/True/False
57- interning of strings
58- hash tables, and hashes of strings
59
60Introspection
61-------------
62
63These operations should work:
64
65 type()
66 getattr() -- hm in OVM I don't want to look at base classes
67 dir() -- list all attributes. ditto -- don't look at base classes?
68 hm except this returns different stuff for modules, classes, and objects
69 it looks at superclasses
70 isinstance(obj, typ) - check if the type tag of obj points at typ or a superclass
71"""
72from __future__ import print_function
73
74import struct
75import sys
76import types
77
78from core.util import log
79
80
81TAG_NONE = -1
82TAG_BOOL = -2
83TAG_INT = -3
84TAG_FLOAT = -4
85TAG_STR = -5
86TAG_TUPLE = -6
87TAG_CODE = -7
88
89MIN_SMALL_INT = -(1 << 63)
90MAX_SMALL_INT = (1 << 63) - 1
91
92MAX_LEN_SMALL_STR = 11 # 12 bytes - 1 for NUL
93
94MAX_LEN_SMALL_TUPLE = 3 # 4 Handles in 12 bytes?
95 # We can steal a bit from the tag for the small/big
96 # bit.
97
98def u8(i): # 1 byte unsigned
99 return struct.pack('B', i)
100
101def i16(i): # 2 bytes
102 return struct.pack('h', i)
103
104def i32(i): # 4 bytes
105 return struct.pack('i', i)
106
107def i64(i): # 8 bytes (long long)
108 return struct.pack('q', i)
109
110def f64(i): # 8 byte double
111 return struct.pack('d', i)
112
113
114def Align4(i):
115 """Round up to the nearest multiple of 4. See unit tests."""
116 return ((i-1) | 3) + 1
117
118
119def Align16(i):
120 """Round up to the nearest multiple of 16. See unit tests."""
121 return ((i-1) | 15) + 1
122
123
124class Encoder(object):
125 """
126 Write objects into an OHeap2 structure that can be lazily loaded.
127
128 First pass:
129 Append to cells and append to slabs
130 Second pass:
131 Write slabs as bytes, and then patch offsets in cells?
132 Write all the cells
133 Write the root object at the front of the file? Or do it at the end?
134 OHeap writes it at the beginning after
135 """
136
137 def __init__(self):
138 self.chunk = bytearray()
139 # An array of cells
140 self.cells = []
141 # Write all these first? So that the cells can point to them.
142 self.slabs = []
143
144 def Any(self, obj):
145 """
146 Encode an object and return its id.
147 """
148 id_ = len(self.cells)
149
150 # TODO: Enforce that None is a singleton. But what about joining OHeaps?
151 if isinstance(obj, types.NoneType):
152 self.cells.append((TAG_NONE, False, None))
153
154 # TODO: Enforce that True/False is a singleton. But what about joining
155 # OHeaps?
156 elif isinstance(obj, bool):
157 self.cells.append((TAG_BOOL, False, obj))
158
159 elif isinstance(obj, int):
160 i = obj
161 if MIN_SMALL_INT < i < MAX_SMALL_INT:
162 self.cells.append((TAG_INT, False, i))
163 else:
164 raise NotImplementedError
165
166 elif isinstance(obj, float):
167 raise NotImplementedError
168
169 # TODO: Identical strings could be "interned" here.
170 elif isinstance(obj, str):
171 s = obj
172 n = len(s)
173 if n < MAX_LEN_SMALL_STR:
174 self.cells.append((TAG_STR, False, s))
175 else:
176 # length and they payload
177 slab_index = len(self.slabs)
178 self.slabs.append((n, s))
179 self.cells.append((TAG_STR, True, slab_index))
180
181 elif isinstance(obj, tuple):
182 t = obj
183 refs = []
184 for item in t:
185 refs.append(self.Any(item)) # Depth-first.
186
187 # Compute ID after adding all the children.
188 id_ = len(self.cells)
189
190 n = len(t)
191 if n < MAX_LEN_SMALL_TUPLE:
192 # TODO: How do we know how long it is?
193 self.cells.append((TAG_TUPLE, False, refs))
194 else:
195 slab_index = len(self.slabs)
196 self.slabs.append((n, refs))
197 self.cells.append((TAG_TUPLE, True, slab_index))
198
199 elif isinstance(obj, types.CodeType):
200 co = obj
201 refs = []
202
203 # Ints first
204 refs.append(self.Any(co.co_argcount))
205 refs.append(self.Any(co.co_nlocals))
206 refs.append(self.Any(co.co_stacksize))
207 refs.append(self.Any(co.co_flags))
208 refs.append(self.Any(co.co_firstlineno))
209
210 # Strings
211 refs.append(self.Any(co.co_name))
212 refs.append(self.Any(co.co_filename))
213 refs.append(self.Any(co.co_code)) # bytecode
214
215 # Tuples
216 refs.append(self.Any(co.co_names))
217 refs.append(self.Any(co.co_varnames))
218 refs.append(self.Any(co.co_consts))
219
220 slab_index = len(self.slabs)
221 self.slabs.append((len(refs), refs))
222 self.cells.append((TAG_CODE, True, slab_index))
223
224 else:
225 raise AssertionError
226
227 return id_
228
229 def Write(self, f):
230 f.write('OHP2') # magic header
231 f.write(i32(0)) # placeholder for cell offset
232 f.write(i32(0)) # placeholder for root object. NOTE: We could always
233 # fseek(SEEK_END, -4)? But I think we want it to more
234 # self-describing.
235
236 # Write slabs first, so we know their offsets.
237 slab_offsets = []
238 pos = 0 # Reader should patch relative to 12
239
240 for length, payload in self.slabs:
241
242 if isinstance(payload, str):
243 slab_offsets.append(pos)
244
245 f.write(i32(length)) # length in bytes
246 pos += 4
247
248 n = len(payload)
249 aligned = Align4(n+1) # at least NUL byte for terminator
250 f.write(payload)
251 f.write('\0' * (aligned - n)) # padding
252 pos += aligned
253
254 elif isinstance(payload, list): # list of references
255 slab_offsets.append(pos)
256
257 # length in references. Could be unsigned?
258 f.write(i32(length))
259
260 # NOTE: There is no GC offset, since all of them are scanned?
261 for ref in payload:
262 f.write(i32(ref))
263 pos += 4
264 pos += len(payload) * 4
265
266 else:
267 raise AssertionError(payload)
268
269 log('Slab offsets: %s', slab_offsets)
270
271 # Pad out the slabs so that the cells begins at a multiple of 16.
272 total_slab_size = Align16(pos) # including pad, but not including header.
273 f.write('\0' * (total_slab_size - pos)) # Write the pad
274
275 # Encode it into 16 bytes
276 for tag, is_slab, val in self.cells:
277 #log('%s %s %s', tag, is_slab, val)
278
279 # 4 byte tag. This may be patched into a type Handle?
280 # Reserve LSB for is_slab? Or highest bit?
281 # The tag could be (i >> 1) then?
282 f.write(i16(tag))
283
284 if tag == TAG_NONE:
285 f.write(i16(0))
286 f.write(i32(0))
287 f.write(i32(0))
288 f.write(i32(0))
289
290 elif tag == TAG_BOOL:
291 f.write(i16(0)) # pad
292 f.write(i32(0)) # pad
293 f.write(i32(0)) # pad
294 f.write(i32(int(val))) # 0 or 1
295
296 elif tag == TAG_INT:
297 assert not is_slab, val
298 f.write(i16(0)) # Padding
299 f.write(i32(0)) # Padding
300 f.write(i64(val))
301
302 elif tag == TAG_FLOAT:
303 assert not is_slab, val
304 f.write(i16(0)) # Padding
305 f.write(i32(0)) # Padding
306 f.write(f64(val))
307
308 elif tag == TAG_STR:
309 if is_slab:
310 # For a string, a big/small bit could technically be in the last
311 # byte. To reuse NUL terminator. But I want the patching process to
312 # be consistent.
313 slab_index = val
314 offset = slab_offsets[slab_index]
315 f.write(u8(1)) # is_slab
316 f.write(u8(0)) # length stored in slab
317 f.write(i32(0)) # pad
318 f.write(i32(0)) # pad
319 f.write(i32(offset))
320 else:
321 n = len(val)
322 f.write(u8(0)) # is_slab
323 f.write(u8(n)) # length stored here
324 f.write(val)
325 num_pad = 12 - n # at least one NUL
326 f.write('\0' * num_pad)
327
328 elif tag == TAG_TUPLE:
329 if is_slab:
330 slab_index = val
331 offset = slab_offsets[slab_index]
332 f.write(u8(1)) # is_slab
333 f.write(u8(0)) # length stored in slab
334 f.write(i32(0)) # pad
335 f.write(i32(0)) # pad
336 f.write(i32(offset))
337 else:
338 n = len(val)
339 f.write(u8(0)) # is_slab
340 f.write(u8(n))
341 # TODO: how is length represented?
342 for ref in val:
343 f.write(i32(ref))
344 num_pad = 3 - len(val)
345 for i in xrange(num_pad):
346 f.write(i32(0))
347
348 elif tag == TAG_CODE:
349 assert is_slab, val
350 slab_index = val
351 #log('slab_index %d', slab_index)
352 offset = slab_offsets[slab_index]
353 f.write(u8(1)) # is_slab
354 f.write(u8(0)) # length stored in slab
355 f.write(i32(0)) # pad
356 f.write(i32(0)) # pad
357 f.write(i32(offset))
358
359 else:
360 raise AssertionError(tag)
361
362 log('')
363 log('slabs')
364 for slab in self.slabs:
365 log('\t%r', slab)
366
367 log('cells')
368 for c in self.cells:
369 #log('\t%r', c)
370 pass
371
372 log('%d slabs in %d bytes', len(self.slabs), total_slab_size)
373 log('%d cells in %d bytes', len(self.cells), f.tell() - 12 - total_slab_size)
374
375 # Fill in the cell position
376 f.seek(4)
377 f.write(i32(total_slab_size))
378 f.write(i32(len(self.cells)))
379
380
381def Write(co, f):
382 print(co)
383 enc = Encoder()
384 enc.Any(co)
385 enc.Write(f)
386
387
388def main(argv):
389 chunk = bytearray()
390 chunk.extend('hello')
391 chunk.append('\0')
392
393 print('Hello from oheap2.py')
394 print(repr(chunk))
395
396
397if __name__ == '__main__':
398 try:
399 main(sys.argv)
400 except RuntimeError as e:
401 print('FATAL: %s' % e, file=sys.stderr)
402 sys.exit(1)