opy/_regtest/src/encodings/__init_

OILS / opy / _regtest / src / encodings / __init__.py View on Github | oilshell.org

157 lines, 75 significant

1	""" Standard "encodings" Package
2
3	Standard Python encoding modules are stored in this package
4	directory.
5
6	Codec modules must have names corresponding to normalized encoding
7	names as defined in the normalize_encoding() function below, e.g.
8	'utf-8' must be implemented by the module 'utf_8.py'.
9
10	Each codec module must export the following interface:
11
12	* getregentry() -> codecs.CodecInfo object
13	The getregentry() API must a CodecInfo object with encoder, decoder,
14	incrementalencoder, incrementaldecoder, streamwriter and streamreader
15	attributes which adhere to the Python Codec Interface Standard.
16
17	In addition, a module may optionally also define the following
18	APIs which are then used by the package's codec search function:
19
20	* getaliases() -> sequence of encoding name strings to use as aliases
21
22	Alias names returned by getaliases() must be normalized encoding
23	names as defined by normalize_encoding().
24
25	Written by Marc-Andre Lemburg (mal@lemburg.com).
26
27	(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
28
29	"""#"
30
31	import codecs
32	from encodings import aliases
33	import __builtin__
34
35	_cache = {}
36	_unknown = '--unknown--'
37	_import_tail = ['*']
38	_norm_encoding_map = (' . '
39	'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
40	' abcdefghijklmnopqrstuvwxyz '
41	' '
42	' '
43	' ')
44	_aliases = aliases.aliases
45
46	class CodecRegistryError(LookupError, SystemError):
47	pass
48
49	def normalize_encoding(encoding):
50
51	""" Normalize an encoding name.
52
53	Normalization works as follows: all non-alphanumeric
54	characters except the dot used for Python package names are
55	collapsed and replaced with a single underscore, e.g. ' -;#'
56	becomes '_'. Leading and trailing underscores are removed.
57
58	Note that encoding names should be ASCII only; if they do use
59	non-ASCII characters, these must be Latin-1 compatible.
60
61	"""
62	# Make sure we have an 8-bit string, because .translate() works
63	# differently for Unicode strings.
64	if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):
65	# Note that .encode('latin-1') does not use the codec
66	# registry, so this call doesn't recurse. (See unicodeobject.c
67	# PyUnicode_AsEncodedString() for details)
68	encoding = encoding.encode('latin-1')
69	return '_'.join(encoding.translate(_norm_encoding_map).split())
70
71	def search_function(encoding):
72
73	# Cache lookup
74	entry = _cache.get(encoding, _unknown)
75	if entry is not _unknown:
76	return entry
77
78	# Import the module:
79	#
80	# First try to find an alias for the normalized encoding
81	# name and lookup the module using the aliased name, then try to
82	# lookup the module using the standard import scheme, i.e. first
83	# try in the encodings package, then at top-level.
84	#
85	norm_encoding = normalize_encoding(encoding)
86	aliased_encoding = _aliases.get(norm_encoding) or \
87	_aliases.get(norm_encoding.replace('.', '_'))
88	if aliased_encoding is not None:
89	modnames = [aliased_encoding,
90	norm_encoding]
91	else:
92	modnames = [norm_encoding]
93	for modname in modnames:
94	if not modname or '.' in modname:
95	continue
96	try:
97	# Import is absolute to prevent the possibly malicious import of a
98	# module with side-effects that is not in the 'encodings' package.
99	mod = __import__('encodings.' + modname, fromlist=_import_tail,
100	level=0)
101	except ImportError:
102	pass
103	else:
104	break
105	else:
106	mod = None
107
108	try:
109	getregentry = mod.getregentry
110	except AttributeError:
111	# Not a codec module
112	mod = None
113
114	if mod is None:
115	# Cache misses
116	_cache[encoding] = None
117	return None
118
119	# Now ask the module for the registry entry
120	entry = getregentry()
121	if not isinstance(entry, codecs.CodecInfo):
122	if not 4 <= len(entry) <= 7:
123	raise CodecRegistryError,\
124	'module "%s" (%s) failed to register' % \
125	(mod.__name__, mod.__file__)
126	if not hasattr(entry[0], '__call__') or \
127	not hasattr(entry[1], '__call__') or \
128	(entry[2] is not None and not hasattr(entry[2], '__call__')) or \
129	(entry[3] is not None and not hasattr(entry[3], '__call__')) or \
130	(len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \
131	(len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')):
132	raise CodecRegistryError,\
133	'incompatible codecs in module "%s" (%s)' % \
134	(mod.__name__, mod.__file__)
135	if len(entry)<7 or entry[6] is None:
136	entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
137	entry = codecs.CodecInfo(*entry)
138
139	# Cache the codec registry entry
140	_cache[encoding] = entry
141
142	# Register its aliases (without overwriting previously registered
143	# aliases)
144	try:
145	codecaliases = mod.getaliases()
146	except AttributeError:
147	pass
148	else:
149	for alias in codecaliases:
150	if alias not in _aliases:
151	_aliases[alias] = modname
152
153	# Return the registry entry
154	return entry
155
156	# Register the search_function in the Python codec registry
157	codecs.register(search_function)