demo/old/ovm2/oheap2.py

OILS / demo / old / ovm2 / oheap2.py View on Github | oilshell.org

402 lines, 215 significant

1	#!/usr/bin/env python2
2	"""
3	oheap2.py
4
5	OVM2 stores objects in a data structure called OHeap2. It has the following
6	concepts.
7
8	- Handles: 4-byte / 32-bit integers that point to Cells.
9	- Cells: 16-byte structs with a type tag, in a contiguous array. This array is
10	scanned by the GC.
11	- Slabs: Variable-length arrays of memory that are owned by a Cell. Each slab
12	may be malloc'd individually.
13	- In the case of a string, it's opaque to the GC.
14	- In the case of a tuple or struct, it must be scanned by the GC.
15
16	This is called the "4-16-N design".
17
18	Every cell has a type tag, which is simply an integer. Negative integers
19	indicate native types implemented in C. Positive integers are for user-defined
20	types.
21
22	Cells also have an is_slab bit and a length for small. TODO: is_slab should be
23	is_small?
24
25	Operations on OHeap
26	-------------------
27
28	1. Allocate a new cell, possibly resizing
29
30	2. Garbage Collect
31	- Walk all the cells. Free unused cells along with their slabs.
32
33	3. Save to file
34	Slabs: Native Pointers to Offsets
35
36	3a. Copy an object tree to a fresh oheap? e.g. for IPC. I guess this is like "Save".
37	- Walk all the cells and slabs
38
39	4. Load from file into fresh OHeap
40	- Patch Slab Offsets to Native Pointers
41
42	4a. Load from file and merge into existing OHeap?
43	- Inside a namespace?
44	- Import?
45	- Patch all cell refs!
46
47	5. Make permanent? Move to another "generation"? OPy classes and bytecode
48	perhaps fit in this category.
49
50	TODO
51	----
52
53	- Bits on handle: whether it's const or not?
54	- negative handles could be const?
55
56	- Singleton values for None/True/False
57	- interning of strings
58	- hash tables, and hashes of strings
59
60	Introspection
61	-------------
62
63	These operations should work:
64
65	type()
66	getattr() -- hm in OVM I don't want to look at base classes
67	dir() -- list all attributes. ditto -- don't look at base classes?
68	hm except this returns different stuff for modules, classes, and objects
69	it looks at superclasses
70	isinstance(obj, typ) - check if the type tag of obj points at typ or a superclass
71	"""
72	from __future__ import print_function
73
74	import struct
75	import sys
76	import types
77
78	from core.util import log
79
80
81	TAG_NONE = -1
82	TAG_BOOL = -2
83	TAG_INT = -3
84	TAG_FLOAT = -4
85	TAG_STR = -5
86	TAG_TUPLE = -6
87	TAG_CODE = -7
88
89	MIN_SMALL_INT = -(1 << 63)
90	MAX_SMALL_INT = (1 << 63) - 1
91
92	MAX_LEN_SMALL_STR = 11 # 12 bytes - 1 for NUL
93
94	MAX_LEN_SMALL_TUPLE = 3 # 4 Handles in 12 bytes?
95	# We can steal a bit from the tag for the small/big
96	# bit.
97
98	def u8(i): # 1 byte unsigned
99	return struct.pack('B', i)
100
101	def i16(i): # 2 bytes
102	return struct.pack('h', i)
103
104	def i32(i): # 4 bytes
105	return struct.pack('i', i)
106
107	def i64(i): # 8 bytes (long long)
108	return struct.pack('q', i)
109
110	def f64(i): # 8 byte double
111	return struct.pack('d', i)
112
113
114	def Align4(i):
115	"""Round up to the nearest multiple of 4. See unit tests."""
116	return ((i-1) \| 3) + 1
117
118
119	def Align16(i):
120	"""Round up to the nearest multiple of 16. See unit tests."""
121	return ((i-1) \| 15) + 1
122
123
124	class Encoder(object):
125	"""
126	Write objects into an OHeap2 structure that can be lazily loaded.
127
128	First pass:
129	Append to cells and append to slabs
130	Second pass:
131	Write slabs as bytes, and then patch offsets in cells?
132	Write all the cells
133	Write the root object at the front of the file? Or do it at the end?
134	OHeap writes it at the beginning after
135	"""
136
137	def __init__(self):
138	self.chunk = bytearray()
139	# An array of cells
140	self.cells = []
141	# Write all these first? So that the cells can point to them.
142	self.slabs = []
143
144	def Any(self, obj):
145	"""
146	Encode an object and return its id.
147	"""
148	id_ = len(self.cells)
149
150	# TODO: Enforce that None is a singleton. But what about joining OHeaps?
151	if isinstance(obj, types.NoneType):
152	self.cells.append((TAG_NONE, False, None))
153
154	# TODO: Enforce that True/False is a singleton. But what about joining
155	# OHeaps?
156	elif isinstance(obj, bool):
157	self.cells.append((TAG_BOOL, False, obj))
158
159	elif isinstance(obj, int):
160	i = obj
161	if MIN_SMALL_INT < i < MAX_SMALL_INT:
162	self.cells.append((TAG_INT, False, i))
163	else:
164	raise NotImplementedError
165
166	elif isinstance(obj, float):
167	raise NotImplementedError
168
169	# TODO: Identical strings could be "interned" here.
170	elif isinstance(obj, str):
171	s = obj
172	n = len(s)
173	if n < MAX_LEN_SMALL_STR:
174	self.cells.append((TAG_STR, False, s))
175	else:
176	# length and they payload
177	slab_index = len(self.slabs)
178	self.slabs.append((n, s))
179	self.cells.append((TAG_STR, True, slab_index))
180
181	elif isinstance(obj, tuple):
182	t = obj
183	refs = []
184	for item in t:
185	refs.append(self.Any(item)) # Depth-first.
186
187	# Compute ID after adding all the children.
188	id_ = len(self.cells)
189
190	n = len(t)
191	if n < MAX_LEN_SMALL_TUPLE:
192	# TODO: How do we know how long it is?
193	self.cells.append((TAG_TUPLE, False, refs))
194	else:
195	slab_index = len(self.slabs)
196	self.slabs.append((n, refs))
197	self.cells.append((TAG_TUPLE, True, slab_index))
198
199	elif isinstance(obj, types.CodeType):
200	co = obj
201	refs = []
202
203	# Ints first
204	refs.append(self.Any(co.co_argcount))
205	refs.append(self.Any(co.co_nlocals))
206	refs.append(self.Any(co.co_stacksize))
207	refs.append(self.Any(co.co_flags))
208	refs.append(self.Any(co.co_firstlineno))
209
210	# Strings
211	refs.append(self.Any(co.co_name))
212	refs.append(self.Any(co.co_filename))
213	refs.append(self.Any(co.co_code)) # bytecode
214
215	# Tuples
216	refs.append(self.Any(co.co_names))
217	refs.append(self.Any(co.co_varnames))
218	refs.append(self.Any(co.co_consts))
219
220	slab_index = len(self.slabs)
221	self.slabs.append((len(refs), refs))
222	self.cells.append((TAG_CODE, True, slab_index))
223
224	else:
225	raise AssertionError
226
227	return id_
228
229	def Write(self, f):
230	f.write('OHP2') # magic header
231	f.write(i32(0)) # placeholder for cell offset
232	f.write(i32(0)) # placeholder for root object. NOTE: We could always
233	# fseek(SEEK_END, -4)? But I think we want it to more
234	# self-describing.
235
236	# Write slabs first, so we know their offsets.
237	slab_offsets = []
238	pos = 0 # Reader should patch relative to 12
239
240	for length, payload in self.slabs:
241
242	if isinstance(payload, str):
243	slab_offsets.append(pos)
244
245	f.write(i32(length)) # length in bytes
246	pos += 4
247
248	n = len(payload)
249	aligned = Align4(n+1) # at least NUL byte for terminator
250	f.write(payload)
251	f.write('\0' * (aligned - n)) # padding
252	pos += aligned
253
254	elif isinstance(payload, list): # list of references
255	slab_offsets.append(pos)
256
257	# length in references. Could be unsigned?
258	f.write(i32(length))
259
260	# NOTE: There is no GC offset, since all of them are scanned?
261	for ref in payload:
262	f.write(i32(ref))
263	pos += 4
264	pos += len(payload) * 4
265
266	else:
267	raise AssertionError(payload)
268
269	log('Slab offsets: %s', slab_offsets)
270
271	# Pad out the slabs so that the cells begins at a multiple of 16.
272	total_slab_size = Align16(pos) # including pad, but not including header.
273	f.write('\0' * (total_slab_size - pos)) # Write the pad
274
275	# Encode it into 16 bytes
276	for tag, is_slab, val in self.cells:
277	#log('%s %s %s', tag, is_slab, val)
278
279	# 4 byte tag. This may be patched into a type Handle?
280	# Reserve LSB for is_slab? Or highest bit?
281	# The tag could be (i >> 1) then?
282	f.write(i16(tag))
283
284	if tag == TAG_NONE:
285	f.write(i16(0))
286	f.write(i32(0))
287	f.write(i32(0))
288	f.write(i32(0))
289
290	elif tag == TAG_BOOL:
291	f.write(i16(0)) # pad
292	f.write(i32(0)) # pad
293	f.write(i32(0)) # pad
294	f.write(i32(int(val))) # 0 or 1
295
296	elif tag == TAG_INT:
297	assert not is_slab, val
298	f.write(i16(0)) # Padding
299	f.write(i32(0)) # Padding
300	f.write(i64(val))
301
302	elif tag == TAG_FLOAT:
303	assert not is_slab, val
304	f.write(i16(0)) # Padding
305	f.write(i32(0)) # Padding
306	f.write(f64(val))
307
308	elif tag == TAG_STR:
309	if is_slab:
310	# For a string, a big/small bit could technically be in the last
311	# byte. To reuse NUL terminator. But I want the patching process to
312	# be consistent.
313	slab_index = val
314	offset = slab_offsets[slab_index]
315	f.write(u8(1)) # is_slab
316	f.write(u8(0)) # length stored in slab
317	f.write(i32(0)) # pad
318	f.write(i32(0)) # pad
319	f.write(i32(offset))
320	else:
321	n = len(val)
322	f.write(u8(0)) # is_slab
323	f.write(u8(n)) # length stored here
324	f.write(val)
325	num_pad = 12 - n # at least one NUL
326	f.write('\0' * num_pad)
327
328	elif tag == TAG_TUPLE:
329	if is_slab:
330	slab_index = val
331	offset = slab_offsets[slab_index]
332	f.write(u8(1)) # is_slab
333	f.write(u8(0)) # length stored in slab
334	f.write(i32(0)) # pad
335	f.write(i32(0)) # pad
336	f.write(i32(offset))
337	else:
338	n = len(val)
339	f.write(u8(0)) # is_slab
340	f.write(u8(n))
341	# TODO: how is length represented?
342	for ref in val:
343	f.write(i32(ref))
344	num_pad = 3 - len(val)
345	for i in xrange(num_pad):
346	f.write(i32(0))
347
348	elif tag == TAG_CODE:
349	assert is_slab, val
350	slab_index = val
351	#log('slab_index %d', slab_index)
352	offset = slab_offsets[slab_index]
353	f.write(u8(1)) # is_slab
354	f.write(u8(0)) # length stored in slab
355	f.write(i32(0)) # pad
356	f.write(i32(0)) # pad
357	f.write(i32(offset))
358
359	else:
360	raise AssertionError(tag)
361
362	log('')
363	log('slabs')
364	for slab in self.slabs:
365	log('\t%r', slab)
366
367	log('cells')
368	for c in self.cells:
369	#log('\t%r', c)
370	pass
371
372	log('%d slabs in %d bytes', len(self.slabs), total_slab_size)
373	log('%d cells in %d bytes', len(self.cells), f.tell() - 12 - total_slab_size)
374
375	# Fill in the cell position
376	f.seek(4)
377	f.write(i32(total_slab_size))
378	f.write(i32(len(self.cells)))
379
380
381	def Write(co, f):
382	print(co)
383	enc = Encoder()
384	enc.Any(co)
385	enc.Write(f)
386
387
388	def main(argv):
389	chunk = bytearray()
390	chunk.extend('hello')
391	chunk.append('\0')
392
393	print('Hello from oheap2.py')
394	print(repr(chunk))
395
396
397	if __name__ == '__main__':
398	try:
399	main(sys.argv)
400	except RuntimeError as e:
401	print('FATAL: %s' % e, file=sys.stderr)
402	sys.exit(1)