OILS / opy / _regtest / src / pickle.py View on Github | oilshell.org

1397 lines, 912 significant
1"""Create portable serialized representations of Python objects.
2
3See module cPickle for a (much) faster implementation.
4See module copy_reg for a mechanism for registering custom picklers.
5See module pickletools source for extensive comments.
6
7Classes:
8
9 Pickler
10 Unpickler
11
12Functions:
13
14 dump(object, file)
15 dumps(object) -> string
16 load(file) -> object
17 loads(string) -> object
18
19Misc variables:
20
21 __version__
22 format_version
23 compatible_formats
24
25"""
26
27__version__ = "$Revision: 72223 $" # Code version
28
29from types import *
30from copy_reg import dispatch_table
31from copy_reg import _extension_registry, _inverted_registry, _extension_cache
32import marshal
33import sys
34import struct
35import re
36
37__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38 "Unpickler", "dump", "dumps", "load", "loads"]
39
40# These are purely informational; no code uses these.
41format_version = "2.0" # File format version we write
42compatible_formats = ["1.0", # Original protocol 0
43 "1.1", # Protocol 0 with INST added
44 "1.2", # Original protocol 1
45 "1.3", # Protocol 1 with BINFLOAT added
46 "2.0", # Protocol 2
47 ] # Old format versions we can read
48
49# Keep in synch with cPickle. This is the highest protocol number we
50# know how to read.
51HIGHEST_PROTOCOL = 2
52
53# Why use struct.pack() for pickling but marshal.loads() for
54# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
55# marshal.loads() is twice as fast as struct.unpack()!
56mloads = marshal.loads
57
58class PickleError(Exception):
59 """A common base class for the other pickling exceptions."""
60 pass
61
62class PicklingError(PickleError):
63 """This exception is raised when an unpicklable object is passed to the
64 dump() method.
65
66 """
67 pass
68
69class UnpicklingError(PickleError):
70 """This exception is raised when there is a problem unpickling an object,
71 such as a security violation.
72
73 Note that other exceptions may also be raised during unpickling, including
74 (but not necessarily limited to) AttributeError, EOFError, ImportError,
75 and IndexError.
76
77 """
78 pass
79
80# An instance of _Stop is raised by Unpickler.load_stop() in response to
81# the STOP opcode, passing the object that is the result of unpickling.
82class _Stop(Exception):
83 def __init__(self, value):
84 self.value = value
85
86# Jython has PyStringMap; it's a dict subclass with string keys
87try:
88 from org.python.core import PyStringMap
89except ImportError:
90 PyStringMap = None
91
92# UnicodeType may or may not be exported (normally imported from types)
93try:
94 UnicodeType
95except NameError:
96 UnicodeType = None
97
98# Pickle opcodes. See pickletools.py for extensive docs. The listing
99# here is in kind-of alphabetical order of 1-character pickle code.
100# pickletools groups them by purpose.
101
102MARK = '(' # push special markobject on stack
103STOP = '.' # every pickle ends with STOP
104POP = '0' # discard topmost stack item
105POP_MARK = '1' # discard stack top through topmost markobject
106DUP = '2' # duplicate top stack item
107FLOAT = 'F' # push float object; decimal string argument
108INT = 'I' # push integer or bool; decimal string argument
109BININT = 'J' # push four-byte signed int
110BININT1 = 'K' # push 1-byte unsigned int
111LONG = 'L' # push long; decimal string argument
112BININT2 = 'M' # push 2-byte unsigned int
113NONE = 'N' # push None
114PERSID = 'P' # push persistent object; id is taken from string arg
115BINPERSID = 'Q' # " " " ; " " " " stack
116REDUCE = 'R' # apply callable to argtuple, both on stack
117STRING = 'S' # push string; NL-terminated string argument
118BINSTRING = 'T' # push string; counted binary string argument
119SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
120UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
121BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
122APPEND = 'a' # append stack top to list below it
123BUILD = 'b' # call __setstate__ or __dict__.update()
124GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
125DICT = 'd' # build a dict from stack items
126EMPTY_DICT = '}' # push empty dict
127APPENDS = 'e' # extend list on stack by topmost stack slice
128GET = 'g' # push item from memo on stack; index is string arg
129BINGET = 'h' # " " " " " " ; " " 1-byte arg
130INST = 'i' # build & push class instance
131LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
132LIST = 'l' # build list from topmost stack items
133EMPTY_LIST = ']' # push empty list
134OBJ = 'o' # build & push class instance
135PUT = 'p' # store stack top in memo; index is string arg
136BINPUT = 'q' # " " " " " ; " " 1-byte arg
137LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
138SETITEM = 's' # add key+value pair to dict
139TUPLE = 't' # build tuple from topmost stack items
140EMPTY_TUPLE = ')' # push empty tuple
141SETITEMS = 'u' # modify dict by adding topmost key+value pairs
142BINFLOAT = 'G' # push float; arg is 8-byte float encoding
143
144TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
145FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
146
147# Protocol 2
148
149PROTO = '\x80' # identify pickle protocol
150NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
151EXT1 = '\x82' # push object from extension registry; 1-byte index
152EXT2 = '\x83' # ditto, but 2-byte index
153EXT4 = '\x84' # ditto, but 4-byte index
154TUPLE1 = '\x85' # build 1-tuple from stack top
155TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
156TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
157NEWTRUE = '\x88' # push True
158NEWFALSE = '\x89' # push False
159LONG1 = '\x8a' # push long from < 256 bytes
160LONG4 = '\x8b' # push really big long
161
162_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
163
164
165__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
166del x
167
168
169# Pickling machinery
170
171class Pickler:
172
173 def __init__(self, file, protocol=None):
174 """This takes a file-like object for writing a pickle data stream.
175
176 The optional protocol argument tells the pickler to use the
177 given protocol; supported protocols are 0, 1, 2. The default
178 protocol is 0, to be backwards compatible. (Protocol 0 is the
179 only protocol that can be written to a file opened in text
180 mode and read back successfully. When using a protocol higher
181 than 0, make sure the file is opened in binary mode, both when
182 pickling and unpickling.)
183
184 Protocol 1 is more efficient than protocol 0; protocol 2 is
185 more efficient than protocol 1.
186
187 Specifying a negative protocol version selects the highest
188 protocol version supported. The higher the protocol used, the
189 more recent the version of Python needed to read the pickle
190 produced.
191
192 The file parameter must have a write() method that accepts a single
193 string argument. It can thus be an open file object, a StringIO
194 object, or any other custom object that meets this interface.
195
196 """
197 if protocol is None:
198 protocol = 0
199 if protocol < 0:
200 protocol = HIGHEST_PROTOCOL
201 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
202 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
203 self.write = file.write
204 self.memo = {}
205 self.proto = int(protocol)
206 self.bin = protocol >= 1
207 self.fast = 0
208
209 def clear_memo(self):
210 """Clears the pickler's "memo".
211
212 The memo is the data structure that remembers which objects the
213 pickler has already seen, so that shared or recursive objects are
214 pickled by reference and not by value. This method is useful when
215 re-using picklers.
216
217 """
218 self.memo.clear()
219
220 def dump(self, obj):
221 """Write a pickled representation of obj to the open file."""
222 if self.proto >= 2:
223 self.write(PROTO + chr(self.proto))
224 self.save(obj)
225 self.write(STOP)
226
227 def memoize(self, obj):
228 """Store an object in the memo."""
229
230 # The Pickler memo is a dictionary mapping object ids to 2-tuples
231 # that contain the Unpickler memo key and the object being memoized.
232 # The memo key is written to the pickle and will become
233 # the key in the Unpickler's memo. The object is stored in the
234 # Pickler memo so that transient objects are kept alive during
235 # pickling.
236
237 # The use of the Unpickler memo length as the memo key is just a
238 # convention. The only requirement is that the memo values be unique.
239 # But there appears no advantage to any other scheme, and this
240 # scheme allows the Unpickler memo to be implemented as a plain (but
241 # growable) array, indexed by memo key.
242 if self.fast:
243 return
244 assert id(obj) not in self.memo
245 memo_len = len(self.memo)
246 self.write(self.put(memo_len))
247 self.memo[id(obj)] = memo_len, obj
248
249 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
250 def put(self, i, pack=struct.pack):
251 if self.bin:
252 if i < 256:
253 return BINPUT + chr(i)
254 else:
255 return LONG_BINPUT + pack("<i", i)
256
257 return PUT + repr(i) + '\n'
258
259 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
260 def get(self, i, pack=struct.pack):
261 if self.bin:
262 if i < 256:
263 return BINGET + chr(i)
264 else:
265 return LONG_BINGET + pack("<i", i)
266
267 return GET + repr(i) + '\n'
268
269 def save(self, obj):
270 # Check for persistent id (defined by a subclass)
271 pid = self.persistent_id(obj)
272 if pid is not None:
273 self.save_pers(pid)
274 return
275
276 # Check the memo
277 x = self.memo.get(id(obj))
278 if x:
279 self.write(self.get(x[0]))
280 return
281
282 # Check the type dispatch table
283 t = type(obj)
284 f = self.dispatch.get(t)
285 if f:
286 f(self, obj) # Call unbound method with explicit self
287 return
288
289 # Check copy_reg.dispatch_table
290 reduce = dispatch_table.get(t)
291 if reduce:
292 rv = reduce(obj)
293 else:
294 # Check for a class with a custom metaclass; treat as regular class
295 try:
296 issc = issubclass(t, TypeType)
297 except TypeError: # t is not a class (old Boost; see SF #502085)
298 issc = 0
299 if issc:
300 self.save_global(obj)
301 return
302
303 # Check for a __reduce_ex__ method, fall back to __reduce__
304 reduce = getattr(obj, "__reduce_ex__", None)
305 if reduce:
306 rv = reduce(self.proto)
307 else:
308 reduce = getattr(obj, "__reduce__", None)
309 if reduce:
310 rv = reduce()
311 else:
312 raise PicklingError("Can't pickle %r object: %r" %
313 (t.__name__, obj))
314
315 # Check for string returned by reduce(), meaning "save as global"
316 if type(rv) is StringType:
317 self.save_global(obj, rv)
318 return
319
320 # Assert that reduce() returned a tuple
321 if type(rv) is not TupleType:
322 raise PicklingError("%s must return string or tuple" % reduce)
323
324 # Assert that it returned an appropriately sized tuple
325 l = len(rv)
326 if not (2 <= l <= 5):
327 raise PicklingError("Tuple returned by %s must have "
328 "two to five elements" % reduce)
329
330 # Save the reduce() output and finally memoize the object
331 self.save_reduce(obj=obj, *rv)
332
333 def persistent_id(self, obj):
334 # This exists so a subclass can override it
335 return None
336
337 def save_pers(self, pid):
338 # Save a persistent id reference
339 if self.bin:
340 self.save(pid)
341 self.write(BINPERSID)
342 else:
343 self.write(PERSID + str(pid) + '\n')
344
345 def save_reduce(self, func, args, state=None,
346 listitems=None, dictitems=None, obj=None):
347 # This API is called by some subclasses
348
349 # Assert that args is a tuple or None
350 if not isinstance(args, TupleType):
351 raise PicklingError("args from reduce() should be a tuple")
352
353 # Assert that func is callable
354 if not hasattr(func, '__call__'):
355 raise PicklingError("func from reduce should be callable")
356
357 save = self.save
358 write = self.write
359
360 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
361 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
362 # A __reduce__ implementation can direct protocol 2 to
363 # use the more efficient NEWOBJ opcode, while still
364 # allowing protocol 0 and 1 to work normally. For this to
365 # work, the function returned by __reduce__ should be
366 # called __newobj__, and its first argument should be a
367 # new-style class. The implementation for __newobj__
368 # should be as follows, although pickle has no way to
369 # verify this:
370 #
371 # def __newobj__(cls, *args):
372 # return cls.__new__(cls, *args)
373 #
374 # Protocols 0 and 1 will pickle a reference to __newobj__,
375 # while protocol 2 (and above) will pickle a reference to
376 # cls, the remaining args tuple, and the NEWOBJ code,
377 # which calls cls.__new__(cls, *args) at unpickling time
378 # (see load_newobj below). If __reduce__ returns a
379 # three-tuple, the state from the third tuple item will be
380 # pickled regardless of the protocol, calling __setstate__
381 # at unpickling time (see load_build below).
382 #
383 # Note that no standard __newobj__ implementation exists;
384 # you have to provide your own. This is to enforce
385 # compatibility with Python 2.2 (pickles written using
386 # protocol 0 or 1 in Python 2.3 should be unpicklable by
387 # Python 2.2).
388 cls = args[0]
389 if not hasattr(cls, "__new__"):
390 raise PicklingError(
391 "args[0] from __newobj__ args has no __new__")
392 if obj is not None and cls is not obj.__class__:
393 raise PicklingError(
394 "args[0] from __newobj__ args has the wrong class")
395 args = args[1:]
396 save(cls)
397 save(args)
398 write(NEWOBJ)
399 else:
400 save(func)
401 save(args)
402 write(REDUCE)
403
404 if obj is not None:
405 # If the object is already in the memo, this means it is
406 # recursive. In this case, throw away everything we put on the
407 # stack, and fetch the object back from the memo.
408 if id(obj) in self.memo:
409 write(POP + self.get(self.memo[id(obj)][0]))
410 else:
411 self.memoize(obj)
412
413 # More new special cases (that work with older protocols as
414 # well): when __reduce__ returns a tuple with 4 or 5 items,
415 # the 4th and 5th item should be iterators that provide list
416 # items and dict items (as (key, value) tuples), or None.
417
418 if listitems is not None:
419 self._batch_appends(listitems)
420
421 if dictitems is not None:
422 self._batch_setitems(dictitems)
423
424 if state is not None:
425 save(state)
426 write(BUILD)
427
428 # Methods below this point are dispatched through the dispatch table
429
430 dispatch = {}
431
432 def save_none(self, obj):
433 self.write(NONE)
434 dispatch[NoneType] = save_none
435
436 def save_bool(self, obj):
437 if self.proto >= 2:
438 self.write(obj and NEWTRUE or NEWFALSE)
439 else:
440 self.write(obj and TRUE or FALSE)
441 dispatch[bool] = save_bool
442
443 def save_int(self, obj, pack=struct.pack):
444 if self.bin:
445 # If the int is small enough to fit in a signed 4-byte 2's-comp
446 # format, we can store it more efficiently than the general
447 # case.
448 # First one- and two-byte unsigned ints:
449 if obj >= 0:
450 if obj <= 0xff:
451 self.write(BININT1 + chr(obj))
452 return
453 if obj <= 0xffff:
454 self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
455 return
456 # Next check for 4-byte signed ints:
457 high_bits = obj >> 31 # note that Python shift sign-extends
458 if high_bits == 0 or high_bits == -1:
459 # All high bits are copies of bit 2**31, so the value
460 # fits in a 4-byte signed int.
461 self.write(BININT + pack("<i", obj))
462 return
463 # Text pickle, or int too big to fit in signed 4-byte format.
464 self.write(INT + repr(obj) + '\n')
465 dispatch[IntType] = save_int
466
467 def save_long(self, obj, pack=struct.pack):
468 if self.proto >= 2:
469 bytes = encode_long(obj)
470 n = len(bytes)
471 if n < 256:
472 self.write(LONG1 + chr(n) + bytes)
473 else:
474 self.write(LONG4 + pack("<i", n) + bytes)
475 return
476 self.write(LONG + repr(obj) + '\n')
477 dispatch[LongType] = save_long
478
479 def save_float(self, obj, pack=struct.pack):
480 if self.bin:
481 self.write(BINFLOAT + pack('>d', obj))
482 else:
483 self.write(FLOAT + repr(obj) + '\n')
484 dispatch[FloatType] = save_float
485
486 def save_string(self, obj, pack=struct.pack):
487 if self.bin:
488 n = len(obj)
489 if n < 256:
490 self.write(SHORT_BINSTRING + chr(n) + obj)
491 else:
492 self.write(BINSTRING + pack("<i", n) + obj)
493 else:
494 self.write(STRING + repr(obj) + '\n')
495 self.memoize(obj)
496 dispatch[StringType] = save_string
497
498 def save_unicode(self, obj, pack=struct.pack):
499 if self.bin:
500 encoding = obj.encode('utf-8')
501 n = len(encoding)
502 self.write(BINUNICODE + pack("<i", n) + encoding)
503 else:
504 obj = obj.replace("\\", "\\u005c")
505 obj = obj.replace("\n", "\\u000a")
506 self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
507 self.memoize(obj)
508 dispatch[UnicodeType] = save_unicode
509
510 if StringType is UnicodeType:
511 # This is true for Jython
512 def save_string(self, obj, pack=struct.pack):
513 unicode = obj.isunicode()
514
515 if self.bin:
516 if unicode:
517 obj = obj.encode("utf-8")
518 l = len(obj)
519 if l < 256 and not unicode:
520 self.write(SHORT_BINSTRING + chr(l) + obj)
521 else:
522 s = pack("<i", l)
523 if unicode:
524 self.write(BINUNICODE + s + obj)
525 else:
526 self.write(BINSTRING + s + obj)
527 else:
528 if unicode:
529 obj = obj.replace("\\", "\\u005c")
530 obj = obj.replace("\n", "\\u000a")
531 obj = obj.encode('raw-unicode-escape')
532 self.write(UNICODE + obj + '\n')
533 else:
534 self.write(STRING + repr(obj) + '\n')
535 self.memoize(obj)
536 dispatch[StringType] = save_string
537
538 def save_tuple(self, obj):
539 write = self.write
540 proto = self.proto
541
542 n = len(obj)
543 if n == 0:
544 if proto:
545 write(EMPTY_TUPLE)
546 else:
547 write(MARK + TUPLE)
548 return
549
550 save = self.save
551 memo = self.memo
552 if n <= 3 and proto >= 2:
553 for element in obj:
554 save(element)
555 # Subtle. Same as in the big comment below.
556 if id(obj) in memo:
557 get = self.get(memo[id(obj)][0])
558 write(POP * n + get)
559 else:
560 write(_tuplesize2code[n])
561 self.memoize(obj)
562 return
563
564 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
565 # has more than 3 elements.
566 write(MARK)
567 for element in obj:
568 save(element)
569
570 if id(obj) in memo:
571 # Subtle. d was not in memo when we entered save_tuple(), so
572 # the process of saving the tuple's elements must have saved
573 # the tuple itself: the tuple is recursive. The proper action
574 # now is to throw away everything we put on the stack, and
575 # simply GET the tuple (it's already constructed). This check
576 # could have been done in the "for element" loop instead, but
577 # recursive tuples are a rare thing.
578 get = self.get(memo[id(obj)][0])
579 if proto:
580 write(POP_MARK + get)
581 else: # proto 0 -- POP_MARK not available
582 write(POP * (n+1) + get)
583 return
584
585 # No recursion.
586 self.write(TUPLE)
587 self.memoize(obj)
588
589 dispatch[TupleType] = save_tuple
590
591 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
592 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
593 # to remove it.
594 def save_empty_tuple(self, obj):
595 self.write(EMPTY_TUPLE)
596
597 def save_list(self, obj):
598 write = self.write
599
600 if self.bin:
601 write(EMPTY_LIST)
602 else: # proto 0 -- can't use EMPTY_LIST
603 write(MARK + LIST)
604
605 self.memoize(obj)
606 self._batch_appends(iter(obj))
607
608 dispatch[ListType] = save_list
609
610 # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
611 # out of synch, though.
612 _BATCHSIZE = 1000
613
614 def _batch_appends(self, items):
615 # Helper to batch up APPENDS sequences
616 save = self.save
617 write = self.write
618
619 if not self.bin:
620 for x in items:
621 save(x)
622 write(APPEND)
623 return
624
625 r = xrange(self._BATCHSIZE)
626 while items is not None:
627 tmp = []
628 for i in r:
629 try:
630 x = items.next()
631 tmp.append(x)
632 except StopIteration:
633 items = None
634 break
635 n = len(tmp)
636 if n > 1:
637 write(MARK)
638 for x in tmp:
639 save(x)
640 write(APPENDS)
641 elif n:
642 save(tmp[0])
643 write(APPEND)
644 # else tmp is empty, and we're done
645
646 def save_dict(self, obj):
647 write = self.write
648
649 if self.bin:
650 write(EMPTY_DICT)
651 else: # proto 0 -- can't use EMPTY_DICT
652 write(MARK + DICT)
653
654 self.memoize(obj)
655 self._batch_setitems(obj.iteritems())
656
657 dispatch[DictionaryType] = save_dict
658 if not PyStringMap is None:
659 dispatch[PyStringMap] = save_dict
660
661 def _batch_setitems(self, items):
662 # Helper to batch up SETITEMS sequences; proto >= 1 only
663 save = self.save
664 write = self.write
665
666 if not self.bin:
667 for k, v in items:
668 save(k)
669 save(v)
670 write(SETITEM)
671 return
672
673 r = xrange(self._BATCHSIZE)
674 while items is not None:
675 tmp = []
676 for i in r:
677 try:
678 tmp.append(items.next())
679 except StopIteration:
680 items = None
681 break
682 n = len(tmp)
683 if n > 1:
684 write(MARK)
685 for k, v in tmp:
686 save(k)
687 save(v)
688 write(SETITEMS)
689 elif n:
690 k, v = tmp[0]
691 save(k)
692 save(v)
693 write(SETITEM)
694 # else tmp is empty, and we're done
695
696 def save_inst(self, obj):
697 cls = obj.__class__
698
699 memo = self.memo
700 write = self.write
701 save = self.save
702
703 if hasattr(obj, '__getinitargs__'):
704 args = obj.__getinitargs__()
705 len(args) # XXX Assert it's a sequence
706 _keep_alive(args, memo)
707 else:
708 args = ()
709
710 write(MARK)
711
712 if self.bin:
713 save(cls)
714 for arg in args:
715 save(arg)
716 write(OBJ)
717 else:
718 for arg in args:
719 save(arg)
720 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
721
722 self.memoize(obj)
723
724 try:
725 getstate = obj.__getstate__
726 except AttributeError:
727 stuff = obj.__dict__
728 else:
729 stuff = getstate()
730 _keep_alive(stuff, memo)
731 save(stuff)
732 write(BUILD)
733
734 dispatch[InstanceType] = save_inst
735
736 def save_global(self, obj, name=None, pack=struct.pack):
737 write = self.write
738 memo = self.memo
739
740 if name is None:
741 name = obj.__name__
742
743 module = getattr(obj, "__module__", None)
744 if module is None:
745 module = whichmodule(obj, name)
746
747 try:
748 __import__(module)
749 mod = sys.modules[module]
750 klass = getattr(mod, name)
751 except (ImportError, KeyError, AttributeError):
752 raise PicklingError(
753 "Can't pickle %r: it's not found as %s.%s" %
754 (obj, module, name))
755 else:
756 if klass is not obj:
757 raise PicklingError(
758 "Can't pickle %r: it's not the same object as %s.%s" %
759 (obj, module, name))
760
761 if self.proto >= 2:
762 code = _extension_registry.get((module, name))
763 if code:
764 assert code > 0
765 if code <= 0xff:
766 write(EXT1 + chr(code))
767 elif code <= 0xffff:
768 write("%c%c%c" % (EXT2, code&0xff, code>>8))
769 else:
770 write(EXT4 + pack("<i", code))
771 return
772
773 write(GLOBAL + module + '\n' + name + '\n')
774 self.memoize(obj)
775
776 dispatch[ClassType] = save_global
777 dispatch[FunctionType] = save_global
778 dispatch[BuiltinFunctionType] = save_global
779 dispatch[TypeType] = save_global
780
781# Pickling helpers
782
783def _keep_alive(x, memo):
784 """Keeps a reference to the object x in the memo.
785
786 Because we remember objects by their id, we have
787 to assure that possibly temporary objects are kept
788 alive by referencing them.
789 We store a reference at the id of the memo, which should
790 normally not be used unless someone tries to deepcopy
791 the memo itself...
792 """
793 try:
794 memo[id(memo)].append(x)
795 except KeyError:
796 # aha, this is the first one :-)
797 memo[id(memo)]=[x]
798
799
800# A cache for whichmodule(), mapping a function object to the name of
801# the module in which the function was found.
802
803classmap = {} # called classmap for backwards compatibility
804
805def whichmodule(func, funcname):
806 """Figure out the module in which a function occurs.
807
808 Search sys.modules for the module.
809 Cache in classmap.
810 Return a module name.
811 If the function cannot be found, return "__main__".
812 """
813 # Python functions should always get an __module__ from their globals.
814 mod = getattr(func, "__module__", None)
815 if mod is not None:
816 return mod
817 if func in classmap:
818 return classmap[func]
819
820 for name, module in sys.modules.items():
821 if module is None:
822 continue # skip dummy package entries
823 if name != '__main__' and getattr(module, funcname, None) is func:
824 break
825 else:
826 name = '__main__'
827 classmap[func] = name
828 return name
829
830
831# Unpickling machinery
832
833class Unpickler:
834
835 def __init__(self, file):
836 """This takes a file-like object for reading a pickle data stream.
837
838 The protocol version of the pickle is detected automatically, so no
839 proto argument is needed.
840
841 The file-like object must have two methods, a read() method that
842 takes an integer argument, and a readline() method that requires no
843 arguments. Both methods should return a string. Thus file-like
844 object can be a file object opened for reading, a StringIO object,
845 or any other custom object that meets this interface.
846 """
847 self.readline = file.readline
848 self.read = file.read
849 self.memo = {}
850
851 def load(self):
852 """Read a pickled object representation from the open file.
853
854 Return the reconstituted object hierarchy specified in the file.
855 """
856 self.mark = object() # any new unique object
857 self.stack = []
858 self.append = self.stack.append
859 read = self.read
860 dispatch = self.dispatch
861 try:
862 while 1:
863 key = read(1)
864 dispatch[key](self)
865 except _Stop, stopinst:
866 return stopinst.value
867
868 # Return largest index k such that self.stack[k] is self.mark.
869 # If the stack doesn't contain a mark, eventually raises IndexError.
870 # This could be sped by maintaining another stack, of indices at which
871 # the mark appears. For that matter, the latter stack would suffice,
872 # and we wouldn't need to push mark objects on self.stack at all.
873 # Doing so is probably a good thing, though, since if the pickle is
874 # corrupt (or hostile) we may get a clue from finding self.mark embedded
875 # in unpickled objects.
876 def marker(self):
877 stack = self.stack
878 mark = self.mark
879 k = len(stack)-1
880 while stack[k] is not mark: k = k-1
881 return k
882
883 dispatch = {}
884
885 def load_eof(self):
886 raise EOFError
887 dispatch[''] = load_eof
888
889 def load_proto(self):
890 proto = ord(self.read(1))
891 if not 0 <= proto <= 2:
892 raise ValueError, "unsupported pickle protocol: %d" % proto
893 dispatch[PROTO] = load_proto
894
895 def load_persid(self):
896 pid = self.readline()[:-1]
897 self.append(self.persistent_load(pid))
898 dispatch[PERSID] = load_persid
899
900 def load_binpersid(self):
901 pid = self.stack.pop()
902 self.append(self.persistent_load(pid))
903 dispatch[BINPERSID] = load_binpersid
904
905 def load_none(self):
906 self.append(None)
907 dispatch[NONE] = load_none
908
909 def load_false(self):
910 self.append(False)
911 dispatch[NEWFALSE] = load_false
912
913 def load_true(self):
914 self.append(True)
915 dispatch[NEWTRUE] = load_true
916
917 def load_int(self):
918 data = self.readline()
919 if data == FALSE[1:]:
920 val = False
921 elif data == TRUE[1:]:
922 val = True
923 else:
924 try:
925 val = int(data)
926 except ValueError:
927 val = long(data)
928 self.append(val)
929 dispatch[INT] = load_int
930
931 def load_binint(self):
932 self.append(mloads('i' + self.read(4)))
933 dispatch[BININT] = load_binint
934
935 def load_binint1(self):
936 self.append(ord(self.read(1)))
937 dispatch[BININT1] = load_binint1
938
939 def load_binint2(self):
940 self.append(mloads('i' + self.read(2) + '\000\000'))
941 dispatch[BININT2] = load_binint2
942
943 def load_long(self):
944 self.append(long(self.readline()[:-1], 0))
945 dispatch[LONG] = load_long
946
947 def load_long1(self):
948 n = ord(self.read(1))
949 bytes = self.read(n)
950 self.append(decode_long(bytes))
951 dispatch[LONG1] = load_long1
952
953 def load_long4(self):
954 n = mloads('i' + self.read(4))
955 bytes = self.read(n)
956 self.append(decode_long(bytes))
957 dispatch[LONG4] = load_long4
958
959 def load_float(self):
960 self.append(float(self.readline()[:-1]))
961 dispatch[FLOAT] = load_float
962
963 def load_binfloat(self, unpack=struct.unpack):
964 self.append(unpack('>d', self.read(8))[0])
965 dispatch[BINFLOAT] = load_binfloat
966
967 def load_string(self):
968 rep = self.readline()[:-1]
969 for q in "\"'": # double or single quote
970 if rep.startswith(q):
971 if len(rep) < 2 or not rep.endswith(q):
972 raise ValueError, "insecure string pickle"
973 rep = rep[len(q):-len(q)]
974 break
975 else:
976 raise ValueError, "insecure string pickle"
977 self.append(rep.decode("string-escape"))
978 dispatch[STRING] = load_string
979
980 def load_binstring(self):
981 len = mloads('i' + self.read(4))
982 self.append(self.read(len))
983 dispatch[BINSTRING] = load_binstring
984
985 def load_unicode(self):
986 self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
987 dispatch[UNICODE] = load_unicode
988
989 def load_binunicode(self):
990 len = mloads('i' + self.read(4))
991 self.append(unicode(self.read(len),'utf-8'))
992 dispatch[BINUNICODE] = load_binunicode
993
994 def load_short_binstring(self):
995 len = ord(self.read(1))
996 self.append(self.read(len))
997 dispatch[SHORT_BINSTRING] = load_short_binstring
998
999 def load_tuple(self):
1000 k = self.marker()
1001 self.stack[k:] = [tuple(self.stack[k+1:])]
1002 dispatch[TUPLE] = load_tuple
1003
1004 def load_empty_tuple(self):
1005 self.stack.append(())
1006 dispatch[EMPTY_TUPLE] = load_empty_tuple
1007
1008 def load_tuple1(self):
1009 self.stack[-1] = (self.stack[-1],)
1010 dispatch[TUPLE1] = load_tuple1
1011
1012 def load_tuple2(self):
1013 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1014 dispatch[TUPLE2] = load_tuple2
1015
1016 def load_tuple3(self):
1017 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1018 dispatch[TUPLE3] = load_tuple3
1019
1020 def load_empty_list(self):
1021 self.stack.append([])
1022 dispatch[EMPTY_LIST] = load_empty_list
1023
1024 def load_empty_dictionary(self):
1025 self.stack.append({})
1026 dispatch[EMPTY_DICT] = load_empty_dictionary
1027
1028 def load_list(self):
1029 k = self.marker()
1030 self.stack[k:] = [self.stack[k+1:]]
1031 dispatch[LIST] = load_list
1032
1033 def load_dict(self):
1034 k = self.marker()
1035 d = {}
1036 items = self.stack[k+1:]
1037 for i in range(0, len(items), 2):
1038 key = items[i]
1039 value = items[i+1]
1040 d[key] = value
1041 self.stack[k:] = [d]
1042 dispatch[DICT] = load_dict
1043
1044 # INST and OBJ differ only in how they get a class object. It's not
1045 # only sensible to do the rest in a common routine, the two routines
1046 # previously diverged and grew different bugs.
1047 # klass is the class to instantiate, and k points to the topmost mark
1048 # object, following which are the arguments for klass.__init__.
1049 def _instantiate(self, klass, k):
1050 args = tuple(self.stack[k+1:])
1051 del self.stack[k:]
1052 instantiated = 0
1053 if (not args and
1054 type(klass) is ClassType and
1055 not hasattr(klass, "__getinitargs__")):
1056 try:
1057 value = _EmptyClass()
1058 value.__class__ = klass
1059 instantiated = 1
1060 except RuntimeError:
1061 # In restricted execution, assignment to inst.__class__ is
1062 # prohibited
1063 pass
1064 if not instantiated:
1065 try:
1066 value = klass(*args)
1067 except TypeError, err:
1068 raise TypeError, "in constructor for %s: %s" % (
1069 klass.__name__, str(err)), sys.exc_info()[2]
1070 self.append(value)
1071
1072 def load_inst(self):
1073 module = self.readline()[:-1]
1074 name = self.readline()[:-1]
1075 klass = self.find_class(module, name)
1076 self._instantiate(klass, self.marker())
1077 dispatch[INST] = load_inst
1078
1079 def load_obj(self):
1080 # Stack is ... markobject classobject arg1 arg2 ...
1081 k = self.marker()
1082 klass = self.stack.pop(k+1)
1083 self._instantiate(klass, k)
1084 dispatch[OBJ] = load_obj
1085
1086 def load_newobj(self):
1087 args = self.stack.pop()
1088 cls = self.stack[-1]
1089 obj = cls.__new__(cls, *args)
1090 self.stack[-1] = obj
1091 dispatch[NEWOBJ] = load_newobj
1092
1093 def load_global(self):
1094 module = self.readline()[:-1]
1095 name = self.readline()[:-1]
1096 klass = self.find_class(module, name)
1097 self.append(klass)
1098 dispatch[GLOBAL] = load_global
1099
1100 def load_ext1(self):
1101 code = ord(self.read(1))
1102 self.get_extension(code)
1103 dispatch[EXT1] = load_ext1
1104
1105 def load_ext2(self):
1106 code = mloads('i' + self.read(2) + '\000\000')
1107 self.get_extension(code)
1108 dispatch[EXT2] = load_ext2
1109
1110 def load_ext4(self):
1111 code = mloads('i' + self.read(4))
1112 self.get_extension(code)
1113 dispatch[EXT4] = load_ext4
1114
1115 def get_extension(self, code):
1116 nil = []
1117 obj = _extension_cache.get(code, nil)
1118 if obj is not nil:
1119 self.append(obj)
1120 return
1121 key = _inverted_registry.get(code)
1122 if not key:
1123 raise ValueError("unregistered extension code %d" % code)
1124 obj = self.find_class(*key)
1125 _extension_cache[code] = obj
1126 self.append(obj)
1127
1128 def find_class(self, module, name):
1129 # Subclasses may override this
1130 __import__(module)
1131 mod = sys.modules[module]
1132 klass = getattr(mod, name)
1133 return klass
1134
1135 def load_reduce(self):
1136 stack = self.stack
1137 args = stack.pop()
1138 func = stack[-1]
1139 value = func(*args)
1140 stack[-1] = value
1141 dispatch[REDUCE] = load_reduce
1142
1143 def load_pop(self):
1144 del self.stack[-1]
1145 dispatch[POP] = load_pop
1146
1147 def load_pop_mark(self):
1148 k = self.marker()
1149 del self.stack[k:]
1150 dispatch[POP_MARK] = load_pop_mark
1151
1152 def load_dup(self):
1153 self.append(self.stack[-1])
1154 dispatch[DUP] = load_dup
1155
1156 def load_get(self):
1157 self.append(self.memo[self.readline()[:-1]])
1158 dispatch[GET] = load_get
1159
1160 def load_binget(self):
1161 i = ord(self.read(1))
1162 self.append(self.memo[repr(i)])
1163 dispatch[BINGET] = load_binget
1164
1165 def load_long_binget(self):
1166 i = mloads('i' + self.read(4))
1167 self.append(self.memo[repr(i)])
1168 dispatch[LONG_BINGET] = load_long_binget
1169
1170 def load_put(self):
1171 self.memo[self.readline()[:-1]] = self.stack[-1]
1172 dispatch[PUT] = load_put
1173
1174 def load_binput(self):
1175 i = ord(self.read(1))
1176 self.memo[repr(i)] = self.stack[-1]
1177 dispatch[BINPUT] = load_binput
1178
1179 def load_long_binput(self):
1180 i = mloads('i' + self.read(4))
1181 self.memo[repr(i)] = self.stack[-1]
1182 dispatch[LONG_BINPUT] = load_long_binput
1183
1184 def load_append(self):
1185 stack = self.stack
1186 value = stack.pop()
1187 list = stack[-1]
1188 list.append(value)
1189 dispatch[APPEND] = load_append
1190
1191 def load_appends(self):
1192 stack = self.stack
1193 mark = self.marker()
1194 list = stack[mark - 1]
1195 list.extend(stack[mark + 1:])
1196 del stack[mark:]
1197 dispatch[APPENDS] = load_appends
1198
1199 def load_setitem(self):
1200 stack = self.stack
1201 value = stack.pop()
1202 key = stack.pop()
1203 dict = stack[-1]
1204 dict[key] = value
1205 dispatch[SETITEM] = load_setitem
1206
1207 def load_setitems(self):
1208 stack = self.stack
1209 mark = self.marker()
1210 dict = stack[mark - 1]
1211 for i in range(mark + 1, len(stack), 2):
1212 dict[stack[i]] = stack[i + 1]
1213
1214 del stack[mark:]
1215 dispatch[SETITEMS] = load_setitems
1216
1217 def load_build(self):
1218 stack = self.stack
1219 state = stack.pop()
1220 inst = stack[-1]
1221 setstate = getattr(inst, "__setstate__", None)
1222 if setstate:
1223 setstate(state)
1224 return
1225 slotstate = None
1226 if isinstance(state, tuple) and len(state) == 2:
1227 state, slotstate = state
1228 if state:
1229 try:
1230 d = inst.__dict__
1231 try:
1232 for k, v in state.iteritems():
1233 d[intern(k)] = v
1234 # keys in state don't have to be strings
1235 # don't blow up, but don't go out of our way
1236 except TypeError:
1237 d.update(state)
1238
1239 except RuntimeError:
1240 # XXX In restricted execution, the instance's __dict__
1241 # is not accessible. Use the old way of unpickling
1242 # the instance variables. This is a semantic
1243 # difference when unpickling in restricted
1244 # vs. unrestricted modes.
1245 # Note, however, that cPickle has never tried to do the
1246 # .update() business, and always uses
1247 # PyObject_SetItem(inst.__dict__, key, value) in a
1248 # loop over state.items().
1249 for k, v in state.items():
1250 setattr(inst, k, v)
1251 if slotstate:
1252 for k, v in slotstate.items():
1253 setattr(inst, k, v)
1254 dispatch[BUILD] = load_build
1255
1256 def load_mark(self):
1257 self.append(self.mark)
1258 dispatch[MARK] = load_mark
1259
1260 def load_stop(self):
1261 value = self.stack.pop()
1262 raise _Stop(value)
1263 dispatch[STOP] = load_stop
1264
1265# Helper class for load_inst/load_obj
1266
1267class _EmptyClass:
1268 pass
1269
1270# Encode/decode longs in linear time.
1271
1272import binascii as _binascii
1273
1274def encode_long(x):
1275 r"""Encode a long to a two's complement little-endian binary string.
1276 Note that 0L is a special case, returning an empty string, to save a
1277 byte in the LONG1 pickling context.
1278
1279 >>> encode_long(0L)
1280 ''
1281 >>> encode_long(255L)
1282 '\xff\x00'
1283 >>> encode_long(32767L)
1284 '\xff\x7f'
1285 >>> encode_long(-256L)
1286 '\x00\xff'
1287 >>> encode_long(-32768L)
1288 '\x00\x80'
1289 >>> encode_long(-128L)
1290 '\x80'
1291 >>> encode_long(127L)
1292 '\x7f'
1293 >>>
1294 """
1295
1296 if x == 0:
1297 return ''
1298 if x > 0:
1299 ashex = hex(x)
1300 assert ashex.startswith("0x")
1301 njunkchars = 2 + ashex.endswith('L')
1302 nibbles = len(ashex) - njunkchars
1303 if nibbles & 1:
1304 # need an even # of nibbles for unhexlify
1305 ashex = "0x0" + ashex[2:]
1306 elif int(ashex[2], 16) >= 8:
1307 # "looks negative", so need a byte of sign bits
1308 ashex = "0x00" + ashex[2:]
1309 else:
1310 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1311 # to find the number of bytes in linear time (although that should
1312 # really be a constant-time task).
1313 ashex = hex(-x)
1314 assert ashex.startswith("0x")
1315 njunkchars = 2 + ashex.endswith('L')
1316 nibbles = len(ashex) - njunkchars
1317 if nibbles & 1:
1318 # Extend to a full byte.
1319 nibbles += 1
1320 nbits = nibbles * 4
1321 x += 1L << nbits
1322 assert x > 0
1323 ashex = hex(x)
1324 njunkchars = 2 + ashex.endswith('L')
1325 newnibbles = len(ashex) - njunkchars
1326 if newnibbles < nibbles:
1327 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1328 if int(ashex[2], 16) < 8:
1329 # "looks positive", so need a byte of sign bits
1330 ashex = "0xff" + ashex[2:]
1331
1332 if ashex.endswith('L'):
1333 ashex = ashex[2:-1]
1334 else:
1335 ashex = ashex[2:]
1336 assert len(ashex) & 1 == 0, (x, ashex)
1337 binary = _binascii.unhexlify(ashex)
1338 return binary[::-1]
1339
1340def decode_long(data):
1341 r"""Decode a long from a two's complement little-endian binary string.
1342
1343 >>> decode_long('')
1344 0L
1345 >>> decode_long("\xff\x00")
1346 255L
1347 >>> decode_long("\xff\x7f")
1348 32767L
1349 >>> decode_long("\x00\xff")
1350 -256L
1351 >>> decode_long("\x00\x80")
1352 -32768L
1353 >>> decode_long("\x80")
1354 -128L
1355 >>> decode_long("\x7f")
1356 127L
1357 """
1358
1359 nbytes = len(data)
1360 if nbytes == 0:
1361 return 0L
1362 ashex = _binascii.hexlify(data[::-1])
1363 n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1364 if data[-1] >= '\x80':
1365 n -= 1L << (nbytes * 8)
1366 return n
1367
1368# Shorthands
1369
1370try:
1371 from cStringIO import StringIO
1372except ImportError:
1373 from StringIO import StringIO
1374
1375def dump(obj, file, protocol=None):
1376 Pickler(file, protocol).dump(obj)
1377
1378def dumps(obj, protocol=None):
1379 file = StringIO()
1380 Pickler(file, protocol).dump(obj)
1381 return file.getvalue()
1382
1383def load(file):
1384 return Unpickler(file).load()
1385
1386def loads(str):
1387 file = StringIO(str)
1388 return Unpickler(file).load()
1389
1390# Doctest
1391
1392def _test():
1393 import doctest
1394 return doctest.testmod()
1395
1396if __name__ == "__main__":
1397 _test()