1 | """ Encoding Aliases Support
|
2 |
|
3 | This module is used by the encodings package search function to
|
4 | map encodings names to module names.
|
5 |
|
6 | Note that the search function normalizes the encoding names before
|
7 | doing the lookup, so the mapping will have to map normalized
|
8 | encoding names to module names.
|
9 |
|
10 | Contents:
|
11 |
|
12 | The following aliases dictionary contains mappings of all IANA
|
13 | character set names for which the Python core library provides
|
14 | codecs. In addition to these, a few Python specific codec
|
15 | aliases have also been added.
|
16 |
|
17 | """
|
18 | aliases = {
|
19 |
|
20 | # Please keep this list sorted alphabetically by value !
|
21 |
|
22 | # ascii codec
|
23 | '646' : 'ascii',
|
24 | 'ansi_x3.4_1968' : 'ascii',
|
25 | 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
|
26 | 'ansi_x3.4_1986' : 'ascii',
|
27 | 'cp367' : 'ascii',
|
28 | 'csascii' : 'ascii',
|
29 | 'ibm367' : 'ascii',
|
30 | 'iso646_us' : 'ascii',
|
31 | 'iso_646.irv_1991' : 'ascii',
|
32 | 'iso_ir_6' : 'ascii',
|
33 | 'us' : 'ascii',
|
34 | 'us_ascii' : 'ascii',
|
35 |
|
36 | # base64_codec codec
|
37 | 'base64' : 'base64_codec',
|
38 | 'base_64' : 'base64_codec',
|
39 |
|
40 | # big5 codec
|
41 | 'big5_tw' : 'big5',
|
42 | 'csbig5' : 'big5',
|
43 |
|
44 | # big5hkscs codec
|
45 | 'big5_hkscs' : 'big5hkscs',
|
46 | 'hkscs' : 'big5hkscs',
|
47 |
|
48 | # bz2_codec codec
|
49 | 'bz2' : 'bz2_codec',
|
50 |
|
51 | # cp037 codec
|
52 | '037' : 'cp037',
|
53 | 'csibm037' : 'cp037',
|
54 | 'ebcdic_cp_ca' : 'cp037',
|
55 | 'ebcdic_cp_nl' : 'cp037',
|
56 | 'ebcdic_cp_us' : 'cp037',
|
57 | 'ebcdic_cp_wt' : 'cp037',
|
58 | 'ibm037' : 'cp037',
|
59 | 'ibm039' : 'cp037',
|
60 |
|
61 | # cp1026 codec
|
62 | '1026' : 'cp1026',
|
63 | 'csibm1026' : 'cp1026',
|
64 | 'ibm1026' : 'cp1026',
|
65 |
|
66 | # cp1140 codec
|
67 | '1140' : 'cp1140',
|
68 | 'ibm1140' : 'cp1140',
|
69 |
|
70 | # cp1250 codec
|
71 | '1250' : 'cp1250',
|
72 | 'windows_1250' : 'cp1250',
|
73 |
|
74 | # cp1251 codec
|
75 | '1251' : 'cp1251',
|
76 | 'windows_1251' : 'cp1251',
|
77 |
|
78 | # cp1252 codec
|
79 | '1252' : 'cp1252',
|
80 | 'windows_1252' : 'cp1252',
|
81 |
|
82 | # cp1253 codec
|
83 | '1253' : 'cp1253',
|
84 | 'windows_1253' : 'cp1253',
|
85 |
|
86 | # cp1254 codec
|
87 | '1254' : 'cp1254',
|
88 | 'windows_1254' : 'cp1254',
|
89 |
|
90 | # cp1255 codec
|
91 | '1255' : 'cp1255',
|
92 | 'windows_1255' : 'cp1255',
|
93 |
|
94 | # cp1256 codec
|
95 | '1256' : 'cp1256',
|
96 | 'windows_1256' : 'cp1256',
|
97 |
|
98 | # cp1257 codec
|
99 | '1257' : 'cp1257',
|
100 | 'windows_1257' : 'cp1257',
|
101 |
|
102 | # cp1258 codec
|
103 | '1258' : 'cp1258',
|
104 | 'windows_1258' : 'cp1258',
|
105 |
|
106 | # cp424 codec
|
107 | '424' : 'cp424',
|
108 | 'csibm424' : 'cp424',
|
109 | 'ebcdic_cp_he' : 'cp424',
|
110 | 'ibm424' : 'cp424',
|
111 |
|
112 | # cp437 codec
|
113 | '437' : 'cp437',
|
114 | 'cspc8codepage437' : 'cp437',
|
115 | 'ibm437' : 'cp437',
|
116 |
|
117 | # cp500 codec
|
118 | '500' : 'cp500',
|
119 | 'csibm500' : 'cp500',
|
120 | 'ebcdic_cp_be' : 'cp500',
|
121 | 'ebcdic_cp_ch' : 'cp500',
|
122 | 'ibm500' : 'cp500',
|
123 |
|
124 | # cp775 codec
|
125 | '775' : 'cp775',
|
126 | 'cspc775baltic' : 'cp775',
|
127 | 'ibm775' : 'cp775',
|
128 |
|
129 | # cp850 codec
|
130 | '850' : 'cp850',
|
131 | 'cspc850multilingual' : 'cp850',
|
132 | 'ibm850' : 'cp850',
|
133 |
|
134 | # cp852 codec
|
135 | '852' : 'cp852',
|
136 | 'cspcp852' : 'cp852',
|
137 | 'ibm852' : 'cp852',
|
138 |
|
139 | # cp855 codec
|
140 | '855' : 'cp855',
|
141 | 'csibm855' : 'cp855',
|
142 | 'ibm855' : 'cp855',
|
143 |
|
144 | # cp857 codec
|
145 | '857' : 'cp857',
|
146 | 'csibm857' : 'cp857',
|
147 | 'ibm857' : 'cp857',
|
148 |
|
149 | # cp858 codec
|
150 | '858' : 'cp858',
|
151 | 'csibm858' : 'cp858',
|
152 | 'ibm858' : 'cp858',
|
153 |
|
154 | # cp860 codec
|
155 | '860' : 'cp860',
|
156 | 'csibm860' : 'cp860',
|
157 | 'ibm860' : 'cp860',
|
158 |
|
159 | # cp861 codec
|
160 | '861' : 'cp861',
|
161 | 'cp_is' : 'cp861',
|
162 | 'csibm861' : 'cp861',
|
163 | 'ibm861' : 'cp861',
|
164 |
|
165 | # cp862 codec
|
166 | '862' : 'cp862',
|
167 | 'cspc862latinhebrew' : 'cp862',
|
168 | 'ibm862' : 'cp862',
|
169 |
|
170 | # cp863 codec
|
171 | '863' : 'cp863',
|
172 | 'csibm863' : 'cp863',
|
173 | 'ibm863' : 'cp863',
|
174 |
|
175 | # cp864 codec
|
176 | '864' : 'cp864',
|
177 | 'csibm864' : 'cp864',
|
178 | 'ibm864' : 'cp864',
|
179 |
|
180 | # cp865 codec
|
181 | '865' : 'cp865',
|
182 | 'csibm865' : 'cp865',
|
183 | 'ibm865' : 'cp865',
|
184 |
|
185 | # cp866 codec
|
186 | '866' : 'cp866',
|
187 | 'csibm866' : 'cp866',
|
188 | 'ibm866' : 'cp866',
|
189 |
|
190 | # cp869 codec
|
191 | '869' : 'cp869',
|
192 | 'cp_gr' : 'cp869',
|
193 | 'csibm869' : 'cp869',
|
194 | 'ibm869' : 'cp869',
|
195 |
|
196 | # cp932 codec
|
197 | '932' : 'cp932',
|
198 | 'ms932' : 'cp932',
|
199 | 'mskanji' : 'cp932',
|
200 | 'ms_kanji' : 'cp932',
|
201 |
|
202 | # cp949 codec
|
203 | '949' : 'cp949',
|
204 | 'ms949' : 'cp949',
|
205 | 'uhc' : 'cp949',
|
206 |
|
207 | # cp950 codec
|
208 | '950' : 'cp950',
|
209 | 'ms950' : 'cp950',
|
210 |
|
211 | # euc_jis_2004 codec
|
212 | 'jisx0213' : 'euc_jis_2004',
|
213 | 'eucjis2004' : 'euc_jis_2004',
|
214 | 'euc_jis2004' : 'euc_jis_2004',
|
215 |
|
216 | # euc_jisx0213 codec
|
217 | 'eucjisx0213' : 'euc_jisx0213',
|
218 |
|
219 | # euc_jp codec
|
220 | 'eucjp' : 'euc_jp',
|
221 | 'ujis' : 'euc_jp',
|
222 | 'u_jis' : 'euc_jp',
|
223 |
|
224 | # euc_kr codec
|
225 | 'euckr' : 'euc_kr',
|
226 | 'korean' : 'euc_kr',
|
227 | 'ksc5601' : 'euc_kr',
|
228 | 'ks_c_5601' : 'euc_kr',
|
229 | 'ks_c_5601_1987' : 'euc_kr',
|
230 | 'ksx1001' : 'euc_kr',
|
231 | 'ks_x_1001' : 'euc_kr',
|
232 |
|
233 | # gb18030 codec
|
234 | 'gb18030_2000' : 'gb18030',
|
235 |
|
236 | # gb2312 codec
|
237 | 'chinese' : 'gb2312',
|
238 | 'csiso58gb231280' : 'gb2312',
|
239 | 'euc_cn' : 'gb2312',
|
240 | 'euccn' : 'gb2312',
|
241 | 'eucgb2312_cn' : 'gb2312',
|
242 | 'gb2312_1980' : 'gb2312',
|
243 | 'gb2312_80' : 'gb2312',
|
244 | 'iso_ir_58' : 'gb2312',
|
245 |
|
246 | # gbk codec
|
247 | '936' : 'gbk',
|
248 | 'cp936' : 'gbk',
|
249 | 'ms936' : 'gbk',
|
250 |
|
251 | # hex_codec codec
|
252 | 'hex' : 'hex_codec',
|
253 |
|
254 | # hp_roman8 codec
|
255 | 'roman8' : 'hp_roman8',
|
256 | 'r8' : 'hp_roman8',
|
257 | 'csHPRoman8' : 'hp_roman8',
|
258 |
|
259 | # hz codec
|
260 | 'hzgb' : 'hz',
|
261 | 'hz_gb' : 'hz',
|
262 | 'hz_gb_2312' : 'hz',
|
263 |
|
264 | # iso2022_jp codec
|
265 | 'csiso2022jp' : 'iso2022_jp',
|
266 | 'iso2022jp' : 'iso2022_jp',
|
267 | 'iso_2022_jp' : 'iso2022_jp',
|
268 |
|
269 | # iso2022_jp_1 codec
|
270 | 'iso2022jp_1' : 'iso2022_jp_1',
|
271 | 'iso_2022_jp_1' : 'iso2022_jp_1',
|
272 |
|
273 | # iso2022_jp_2 codec
|
274 | 'iso2022jp_2' : 'iso2022_jp_2',
|
275 | 'iso_2022_jp_2' : 'iso2022_jp_2',
|
276 |
|
277 | # iso2022_jp_2004 codec
|
278 | 'iso_2022_jp_2004' : 'iso2022_jp_2004',
|
279 | 'iso2022jp_2004' : 'iso2022_jp_2004',
|
280 |
|
281 | # iso2022_jp_3 codec
|
282 | 'iso2022jp_3' : 'iso2022_jp_3',
|
283 | 'iso_2022_jp_3' : 'iso2022_jp_3',
|
284 |
|
285 | # iso2022_jp_ext codec
|
286 | 'iso2022jp_ext' : 'iso2022_jp_ext',
|
287 | 'iso_2022_jp_ext' : 'iso2022_jp_ext',
|
288 |
|
289 | # iso2022_kr codec
|
290 | 'csiso2022kr' : 'iso2022_kr',
|
291 | 'iso2022kr' : 'iso2022_kr',
|
292 | 'iso_2022_kr' : 'iso2022_kr',
|
293 |
|
294 | # iso8859_10 codec
|
295 | 'csisolatin6' : 'iso8859_10',
|
296 | 'iso_8859_10' : 'iso8859_10',
|
297 | 'iso_8859_10_1992' : 'iso8859_10',
|
298 | 'iso_ir_157' : 'iso8859_10',
|
299 | 'l6' : 'iso8859_10',
|
300 | 'latin6' : 'iso8859_10',
|
301 |
|
302 | # iso8859_11 codec
|
303 | 'thai' : 'iso8859_11',
|
304 | 'iso_8859_11' : 'iso8859_11',
|
305 | 'iso_8859_11_2001' : 'iso8859_11',
|
306 |
|
307 | # iso8859_13 codec
|
308 | 'iso_8859_13' : 'iso8859_13',
|
309 | 'l7' : 'iso8859_13',
|
310 | 'latin7' : 'iso8859_13',
|
311 |
|
312 | # iso8859_14 codec
|
313 | 'iso_8859_14' : 'iso8859_14',
|
314 | 'iso_8859_14_1998' : 'iso8859_14',
|
315 | 'iso_celtic' : 'iso8859_14',
|
316 | 'iso_ir_199' : 'iso8859_14',
|
317 | 'l8' : 'iso8859_14',
|
318 | 'latin8' : 'iso8859_14',
|
319 |
|
320 | # iso8859_15 codec
|
321 | 'iso_8859_15' : 'iso8859_15',
|
322 | 'l9' : 'iso8859_15',
|
323 | 'latin9' : 'iso8859_15',
|
324 |
|
325 | # iso8859_16 codec
|
326 | 'iso_8859_16' : 'iso8859_16',
|
327 | 'iso_8859_16_2001' : 'iso8859_16',
|
328 | 'iso_ir_226' : 'iso8859_16',
|
329 | 'l10' : 'iso8859_16',
|
330 | 'latin10' : 'iso8859_16',
|
331 |
|
332 | # iso8859_2 codec
|
333 | 'csisolatin2' : 'iso8859_2',
|
334 | 'iso_8859_2' : 'iso8859_2',
|
335 | 'iso_8859_2_1987' : 'iso8859_2',
|
336 | 'iso_ir_101' : 'iso8859_2',
|
337 | 'l2' : 'iso8859_2',
|
338 | 'latin2' : 'iso8859_2',
|
339 |
|
340 | # iso8859_3 codec
|
341 | 'csisolatin3' : 'iso8859_3',
|
342 | 'iso_8859_3' : 'iso8859_3',
|
343 | 'iso_8859_3_1988' : 'iso8859_3',
|
344 | 'iso_ir_109' : 'iso8859_3',
|
345 | 'l3' : 'iso8859_3',
|
346 | 'latin3' : 'iso8859_3',
|
347 |
|
348 | # iso8859_4 codec
|
349 | 'csisolatin4' : 'iso8859_4',
|
350 | 'iso_8859_4' : 'iso8859_4',
|
351 | 'iso_8859_4_1988' : 'iso8859_4',
|
352 | 'iso_ir_110' : 'iso8859_4',
|
353 | 'l4' : 'iso8859_4',
|
354 | 'latin4' : 'iso8859_4',
|
355 |
|
356 | # iso8859_5 codec
|
357 | 'csisolatincyrillic' : 'iso8859_5',
|
358 | 'cyrillic' : 'iso8859_5',
|
359 | 'iso_8859_5' : 'iso8859_5',
|
360 | 'iso_8859_5_1988' : 'iso8859_5',
|
361 | 'iso_ir_144' : 'iso8859_5',
|
362 |
|
363 | # iso8859_6 codec
|
364 | 'arabic' : 'iso8859_6',
|
365 | 'asmo_708' : 'iso8859_6',
|
366 | 'csisolatinarabic' : 'iso8859_6',
|
367 | 'ecma_114' : 'iso8859_6',
|
368 | 'iso_8859_6' : 'iso8859_6',
|
369 | 'iso_8859_6_1987' : 'iso8859_6',
|
370 | 'iso_ir_127' : 'iso8859_6',
|
371 |
|
372 | # iso8859_7 codec
|
373 | 'csisolatingreek' : 'iso8859_7',
|
374 | 'ecma_118' : 'iso8859_7',
|
375 | 'elot_928' : 'iso8859_7',
|
376 | 'greek' : 'iso8859_7',
|
377 | 'greek8' : 'iso8859_7',
|
378 | 'iso_8859_7' : 'iso8859_7',
|
379 | 'iso_8859_7_1987' : 'iso8859_7',
|
380 | 'iso_ir_126' : 'iso8859_7',
|
381 |
|
382 | # iso8859_8 codec
|
383 | 'csisolatinhebrew' : 'iso8859_8',
|
384 | 'hebrew' : 'iso8859_8',
|
385 | 'iso_8859_8' : 'iso8859_8',
|
386 | 'iso_8859_8_1988' : 'iso8859_8',
|
387 | 'iso_ir_138' : 'iso8859_8',
|
388 |
|
389 | # iso8859_9 codec
|
390 | 'csisolatin5' : 'iso8859_9',
|
391 | 'iso_8859_9' : 'iso8859_9',
|
392 | 'iso_8859_9_1989' : 'iso8859_9',
|
393 | 'iso_ir_148' : 'iso8859_9',
|
394 | 'l5' : 'iso8859_9',
|
395 | 'latin5' : 'iso8859_9',
|
396 |
|
397 | # johab codec
|
398 | 'cp1361' : 'johab',
|
399 | 'ms1361' : 'johab',
|
400 |
|
401 | # koi8_r codec
|
402 | 'cskoi8r' : 'koi8_r',
|
403 |
|
404 | # latin_1 codec
|
405 | #
|
406 | # Note that the latin_1 codec is implemented internally in C and a
|
407 | # lot faster than the charmap codec iso8859_1 which uses the same
|
408 | # encoding. This is why we discourage the use of the iso8859_1
|
409 | # codec and alias it to latin_1 instead.
|
410 | #
|
411 | '8859' : 'latin_1',
|
412 | 'cp819' : 'latin_1',
|
413 | 'csisolatin1' : 'latin_1',
|
414 | 'ibm819' : 'latin_1',
|
415 | 'iso8859' : 'latin_1',
|
416 | 'iso8859_1' : 'latin_1',
|
417 | 'iso_8859_1' : 'latin_1',
|
418 | 'iso_8859_1_1987' : 'latin_1',
|
419 | 'iso_ir_100' : 'latin_1',
|
420 | 'l1' : 'latin_1',
|
421 | 'latin' : 'latin_1',
|
422 | 'latin1' : 'latin_1',
|
423 |
|
424 | # mac_cyrillic codec
|
425 | 'maccyrillic' : 'mac_cyrillic',
|
426 |
|
427 | # mac_greek codec
|
428 | 'macgreek' : 'mac_greek',
|
429 |
|
430 | # mac_iceland codec
|
431 | 'maciceland' : 'mac_iceland',
|
432 |
|
433 | # mac_latin2 codec
|
434 | 'maccentraleurope' : 'mac_latin2',
|
435 | 'maclatin2' : 'mac_latin2',
|
436 |
|
437 | # mac_roman codec
|
438 | 'macroman' : 'mac_roman',
|
439 |
|
440 | # mac_turkish codec
|
441 | 'macturkish' : 'mac_turkish',
|
442 |
|
443 | # mbcs codec
|
444 | 'dbcs' : 'mbcs',
|
445 |
|
446 | # ptcp154 codec
|
447 | 'csptcp154' : 'ptcp154',
|
448 | 'pt154' : 'ptcp154',
|
449 | 'cp154' : 'ptcp154',
|
450 | 'cyrillic_asian' : 'ptcp154',
|
451 |
|
452 | # quopri_codec codec
|
453 | 'quopri' : 'quopri_codec',
|
454 | 'quoted_printable' : 'quopri_codec',
|
455 | 'quotedprintable' : 'quopri_codec',
|
456 |
|
457 | # rot_13 codec
|
458 | 'rot13' : 'rot_13',
|
459 |
|
460 | # shift_jis codec
|
461 | 'csshiftjis' : 'shift_jis',
|
462 | 'shiftjis' : 'shift_jis',
|
463 | 'sjis' : 'shift_jis',
|
464 | 's_jis' : 'shift_jis',
|
465 |
|
466 | # shift_jis_2004 codec
|
467 | 'shiftjis2004' : 'shift_jis_2004',
|
468 | 'sjis_2004' : 'shift_jis_2004',
|
469 | 's_jis_2004' : 'shift_jis_2004',
|
470 |
|
471 | # shift_jisx0213 codec
|
472 | 'shiftjisx0213' : 'shift_jisx0213',
|
473 | 'sjisx0213' : 'shift_jisx0213',
|
474 | 's_jisx0213' : 'shift_jisx0213',
|
475 |
|
476 | # tactis codec
|
477 | 'tis260' : 'tactis',
|
478 |
|
479 | # tis_620 codec
|
480 | 'tis620' : 'tis_620',
|
481 | 'tis_620_0' : 'tis_620',
|
482 | 'tis_620_2529_0' : 'tis_620',
|
483 | 'tis_620_2529_1' : 'tis_620',
|
484 | 'iso_ir_166' : 'tis_620',
|
485 |
|
486 | # utf_16 codec
|
487 | 'u16' : 'utf_16',
|
488 | 'utf16' : 'utf_16',
|
489 |
|
490 | # utf_16_be codec
|
491 | 'unicodebigunmarked' : 'utf_16_be',
|
492 | 'utf_16be' : 'utf_16_be',
|
493 |
|
494 | # utf_16_le codec
|
495 | 'unicodelittleunmarked' : 'utf_16_le',
|
496 | 'utf_16le' : 'utf_16_le',
|
497 |
|
498 | # utf_32 codec
|
499 | 'u32' : 'utf_32',
|
500 | 'utf32' : 'utf_32',
|
501 |
|
502 | # utf_32_be codec
|
503 | 'utf_32be' : 'utf_32_be',
|
504 |
|
505 | # utf_32_le codec
|
506 | 'utf_32le' : 'utf_32_le',
|
507 |
|
508 | # utf_7 codec
|
509 | 'u7' : 'utf_7',
|
510 | 'utf7' : 'utf_7',
|
511 | 'unicode_1_1_utf_7' : 'utf_7',
|
512 |
|
513 | # utf_8 codec
|
514 | 'u8' : 'utf_8',
|
515 | 'utf' : 'utf_8',
|
516 | 'utf8' : 'utf_8',
|
517 | 'utf8_ucs2' : 'utf_8',
|
518 | 'utf8_ucs4' : 'utf_8',
|
519 |
|
520 | # uu_codec codec
|
521 | 'uu' : 'uu_codec',
|
522 |
|
523 | # zlib_codec codec
|
524 | 'zip' : 'zlib_codec',
|
525 | 'zlib' : 'zlib_codec',
|
526 |
|
527 | }
|