OILS / frontend / consts_gen.py View on Github | oilshell.org

639 lines, 327 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9consts_gen.py - Code generation for consts.py, id_kind_def.py, etc.
10"""
11from __future__ import print_function
12
13import collections
14import os
15import sys
16
17from asdl import gen_cpp
18from mycpp.mylib import log
19from frontend import id_kind_def
20from frontend import builtin_def
21from frontend import option_def
22
23
24def _CreateModule(id_spec, ids):
25 """Create a SYNTHETIC ASDL module to generate code from."""
26 from asdl import ast
27
28 id_variants = [ast.Constructor(name) for name, _ in ids]
29 # Pack it in 16 bits
30 id_sum = ast.SimpleSum(id_variants,
31 generate=['uint16', 'no_namespace_suffix'])
32
33 kind_variants = [ast.Constructor(name) for name in id_spec.kind_name_list]
34 kind_sum = ast.SimpleSum(kind_variants, generate=['no_namespace_suffix'])
35
36 # Id = Word_Compound | Arith_Semi | Arith_Comma | ...
37 id_ = ast.TypeDecl('Id', id_sum)
38 kind_ = ast.TypeDecl('Kind', kind_sum)
39
40 schema_ast = ast.Module('id_kind', [], [id_, kind_])
41 return schema_ast
42
43
44_BUILTINS = builtin_def.All()
45
46
47def GenBuiltinLookup(func_name, kind, f):
48 #log('%r %r', func_name, kind)
49
50 pairs = [(b.name, b.index) for b in _BUILTINS if b.kind == kind]
51
52 GenStringLookup('builtin_t', func_name, pairs, f)
53
54
55def GenStringLookup(type_name, func_name, pairs, f):
56 #log('%s', pairs)
57
58 groups = collections.defaultdict(list)
59 for name, index in pairs:
60 first_char = name[0]
61 groups[first_char].append((name, index))
62
63 if 0:
64 for first_char, pairs in groups.iteritems():
65 log('%s %d', first_char, len(pairs))
66 log('%s', pairs)
67
68 # Note: we could optimize the length check, e.g. have a second level
69 # switch. But we would need to measure the difference. Caching the id on
70 # AST nodes is probably a bigger win, e.g. for loops.
71 #
72 # Size optimization: don't repeat constants literally?
73
74 f.write("""\
75%s %s(BigStr* s) {
76 int length = len(s);
77 if (length == 0) return 0; // consts.NO_INDEX
78
79 const char* data = s->data_;
80 switch (data[0]) {
81""" % (type_name, func_name))
82
83 for first_char in sorted(groups):
84 pairs = groups[first_char]
85 f.write(" case '%s':\n" % first_char)
86 for name, index in pairs:
87 # NOTE: we have to check the length because they're not NUL-terminated
88 f.write('''\
89 if (length == %d && memcmp("%s", data, %d) == 0) return %d;
90''' % (len(name), name, len(name), index))
91 f.write(' break;\n')
92
93 f.write("""\
94 }
95
96 return 0; // consts.NO_INDEX
97}
98
99""")
100
101
102def GenIntStrLookup(func_name, int2str, f):
103 # NOTE: quoting doesn't work, strings must be Identifier Names here
104
105 for i in sorted(int2str):
106 s = int2str[i]
107 f.write('GLOBAL_STR(k%s_%d, "%s");\n' % (func_name, i, s))
108
109 f.write("""\
110
111BigStr* %s(int i) {
112 switch (i) {
113""" % func_name)
114
115 for i in sorted(int2str):
116 s = int2str[i]
117 f.write(' case %d:\n' % i)
118 f.write(' return k%s_%d;\n' % (func_name, i))
119 f.write(' break;\n')
120 f.write("""\
121 default:
122 FAIL(kShouldNotGetHere);
123 }
124}
125
126""")
127
128
129def GenStringMembership(func_name, strs, f):
130 groups = collections.defaultdict(list)
131 for s in strs:
132 first_char = s[0]
133 groups[first_char].append(s)
134
135 f.write("""\
136bool %s(BigStr* s) {
137 int length = len(s);
138 if (length == 0) return false;
139
140 const char* data = s->data_;
141 switch (data[0]) {
142""" % func_name)
143
144 for first_char in sorted(groups):
145 strs = groups[first_char]
146 f.write(" case '%s':\n" % first_char)
147 for s in strs:
148 # NOTE: we have to check the length because they're not NUL-terminated
149 f.write('''\
150 if (length == %d && memcmp("%s", data, %d) == 0) return true;
151''' % (len(s), s, len(s)))
152 f.write(' break;\n')
153
154 f.write("""\
155 }
156
157 return false;
158}
159
160""")
161
162
163C_CHAR = {
164 # '\'' is a single quote in C
165 "'": "\\'",
166 '"': '\\"',
167 '\\': "\\\\",
168 '\t': '\\t',
169 '\r': '\\r',
170 '\n': '\\n',
171 '\v': '\\v',
172 '\0': '\\0',
173 '\a': '\\a',
174 '\b': '\\b',
175 '\f': '\\f',
176 '\x1b': '\\x1b',
177}
178
179
180def CChar(c):
181 return C_CHAR.get(c, c)
182
183
184def GenCharLookup(func_name, lookup, f, required=False):
185 f.write("""\
186BigStr* %s(BigStr* c) {
187 assert(len(c) == 1);
188
189 char ch = c->data_[0];
190
191 // TODO-intern: return value
192 switch (ch) {
193""" % func_name)
194
195 for char_code in sorted(lookup):
196 f.write(" case '%s':\n" % CChar(char_code))
197 f.write(' return StrFromC("%s", 1);\n' % CChar(lookup[char_code]))
198 f.write(" break;\n")
199
200 f.write(" default:\n")
201 if required:
202 f.write(" assert(0);\n")
203 else:
204 f.write(" return nullptr;\n")
205
206 f.write("""
207 }
208}
209""")
210
211
212def GenStrList(l, name, out):
213 element_globals = []
214 for i, elem in enumerate(l):
215 global_name = "k%s_%d" % (name, i)
216 out('GLOBAL_STR(%s, "%s");', global_name, elem)
217 element_globals.append(global_name)
218
219 lit = ' COMMA '.join(element_globals)
220 out('GLOBAL_LIST(%s, BigStr*, %d, {%s});\n', name, len(l), lit)
221
222
223def main(argv):
224 try:
225 action = argv[1]
226 except IndexError:
227 raise RuntimeError('Action required')
228
229 # TODO: Remove duplication in core/meta.py
230 ID_TO_KIND = {}
231 BOOL_ARG_TYPES = {}
232 TEST_UNARY_LOOKUP = {}
233 TEST_BINARY_LOOKUP = {}
234 TEST_OTHER_LOOKUP = {}
235
236 ID_SPEC = id_kind_def.IdSpec(ID_TO_KIND, BOOL_ARG_TYPES)
237
238 id_kind_def.AddKinds(ID_SPEC)
239 id_kind_def.AddBoolKinds(ID_SPEC) # must come second
240
241 id_kind_def.SetupTestBuiltin(ID_SPEC, TEST_UNARY_LOOKUP,
242 TEST_BINARY_LOOKUP, TEST_OTHER_LOOKUP)
243
244 ids = ID_SPEC.id_str2int.items()
245 ids.sort(key=lambda pair: pair[1]) # Sort by ID
246
247 if action == 'c':
248 for name, id_int in ids:
249 print('#define id__%s %s' % (name, id_int))
250
251 elif action == 'cpp':
252 schema_ast = _CreateModule(ID_SPEC, ids)
253
254 out_prefix = argv[2]
255
256 with open(out_prefix + '.h', 'w') as f:
257 f.write("""\
258#ifndef ID_KIND_ASDL_H
259#define ID_KIND_ASDL_H
260
261#include <stdint.h> // uint16_t
262
263class BigStr;
264
265namespace id_kind_asdl {
266
267#define ASDL_NAMES struct
268""")
269
270 v = gen_cpp.ClassDefVisitor(f)
271 v.VisitModule(schema_ast)
272
273 f.write("""
274} // namespace id_kind_asdl
275
276#endif // ID_KIND_ASDL_H
277""")
278
279 with open(out_prefix + '.cc', 'w') as f:
280 f.write("""\
281#include <assert.h>
282#include "_gen/frontend/id_kind.asdl.h"
283#include "mycpp/gc_alloc.h" // StrFromC()
284
285namespace id_kind_asdl {
286
287""")
288
289 v = gen_cpp.MethodDefVisitor(f)
290
291 v.VisitModule(schema_ast)
292
293 f.write('} // namespace id_kind_asdl\n')
294
295 elif action == 'mypy':
296 from asdl import gen_python
297
298 schema_ast = _CreateModule(ID_SPEC, ids)
299 #print(schema_ast)
300
301 f = sys.stdout
302
303 f.write("""\
304from asdl import pybase
305
306""")
307 # Minor style issue: we want Id and Kind, not Id_e and Kind_e
308 v = gen_python.GenMyPyVisitor(f)
309 v.VisitModule(schema_ast)
310
311 elif action == 'cpp-consts':
312
313 # Break circular deps
314
315 from core import pyutil
316 from frontend import consts
317 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
318 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
319
320 LIST_INT = [
321 'STRICT_ALL',
322 'YSH_UPGRADE',
323 'YSH_ALL',
324 'DEFAULT_TRUE',
325 'PARSE_OPTION_NUMS',
326 'SHOPT_OPTION_NUMS',
327 'SET_OPTION_NUMS',
328 'VISIBLE_SHOPT_NUMS',
329 ]
330
331 prefix = argv[2]
332
333 with open(prefix + '.h', 'w') as f:
334
335 def out(fmt, *args):
336 print(fmt % args, file=f)
337
338 out("""\
339#ifndef CONSTS_H
340#define CONSTS_H
341
342#include "mycpp/runtime.h"
343
344#include "_gen/frontend/id_kind.asdl.h"
345#include "_gen/frontend/option.asdl.h"
346#include "_gen/core/runtime.asdl.h"
347#include "_gen/frontend/types.asdl.h"
348
349namespace consts {
350""")
351
352 for name in LIST_INT:
353 out('extern List<int>* %s;', name)
354
355 out('extern List<BigStr*>* BUILTIN_NAMES;')
356 out('extern List<BigStr*>* OSH_KEYWORD_NAMES;')
357 out('extern List<BigStr*>* SET_OPTION_NAMES;')
358 out('extern List<BigStr*>* SHOPT_OPTION_NAMES;')
359
360 out("""\
361
362extern int NO_INDEX;
363
364extern BigStr* gVersion;
365
366int RedirDefaultFd(id_kind_asdl::Id_t id);
367types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id);
368types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id);
369id_kind_asdl::Kind GetKind(id_kind_asdl::Id_t id);
370
371types_asdl::opt_group_t OptionGroupNum(BigStr* s);
372option_asdl::option_t OptionNum(BigStr* s);
373option_asdl::builtin_t LookupNormalBuiltin(BigStr* s);
374option_asdl::builtin_t LookupAssignBuiltin(BigStr* s);
375option_asdl::builtin_t LookupSpecialBuiltin(BigStr* s);
376bool IsControlFlow(BigStr* s);
377BigStr* ControlFlowName(int i);
378bool IsKeyword(BigStr* s);
379BigStr* LookupCharC(BigStr* c);
380BigStr* LookupCharPrompt(BigStr* c);
381
382BigStr* OptionName(option_asdl::option_t opt_num);
383
384Tuple2<runtime_asdl::state_t, runtime_asdl::emit_t> IfsEdge(runtime_asdl::state_t state, runtime_asdl::char_kind_t ch);
385
386extern BigStr* ASSIGN_ARG_RE;
387
388} // namespace consts
389
390#endif // CONSTS_H
391""")
392 with open(prefix + '.cc', 'w') as f:
393
394 def out(fmt, *args):
395 print(fmt % args, file=f)
396
397 out("""\
398#include "_gen/frontend/consts.h"
399
400using id_kind_asdl::Id;
401using id_kind_asdl::Kind;
402using types_asdl::redir_arg_type_e;
403using types_asdl::bool_arg_type_e;
404using option_asdl::builtin_t;
405
406namespace consts {
407
408int NO_INDEX = 0; // duplicated from frontend/consts.py
409""")
410
411 # Generate gVersion, which is read by pyutil::GetVersion()
412 this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
413 root_dir = os.path.join(this_dir, '..') # ~/git/oilshell/oil
414 loader = pyutil._FileResourceLoader(root_dir)
415
416 version_str = pyutil.GetVersion(loader)
417 out('GLOBAL_STR(gVersion, "%s");' % version_str)
418 out('')
419
420 # Note: could use opt_num:: instead of raw ints
421 for name in LIST_INT:
422 val = getattr(consts, name)
423 val_str = ' COMMA '.join(str(i) for i in val)
424 out('GLOBAL_LIST(%s, int, %d, {%s});', name, len(val), val_str)
425
426 out("""\
427
428int RedirDefaultFd(id_kind_asdl::Id_t id) {
429 // relies on "switch lowering"
430 switch (id) {
431""")
432 for id_ in sorted(consts.REDIR_DEFAULT_FD):
433 a = Id_str(id_).replace('.', '::')
434 b = consts.REDIR_DEFAULT_FD[id_]
435 out(' case %s: return %s;' % (a, b))
436 out("""\
437 }
438 FAIL(kShouldNotGetHere);
439}
440""")
441
442 out("""\
443types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id) {
444 // relies on "switch lowering"
445 switch (id) {
446""")
447 for id_ in sorted(consts.REDIR_ARG_TYPES):
448 a = Id_str(id_).replace('.', '::')
449 # redir_arg_type_e::Path, etc.
450 b = redir_arg_type_str(consts.REDIR_ARG_TYPES[id_]).replace(
451 '.', '_e::')
452 out(' case %s: return %s;' % (a, b))
453 out("""\
454 }
455 FAIL(kShouldNotGetHere);
456}
457""")
458
459 out("""\
460types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id) {
461 // relies on "switch lowering"
462 switch (id) {
463""")
464 for id_ in sorted(BOOL_ARG_TYPES):
465 a = Id_str(id_).replace('.', '::')
466 # bool_arg_type_e::BigStr, etc.
467 b = bool_arg_type_str(BOOL_ARG_TYPES[id_]).replace('.', '_e::')
468 out(' case %s: return %s;' % (a, b))
469 out("""\
470 }
471 FAIL(kShouldNotGetHere);
472}
473""")
474
475 out("""\
476Kind GetKind(id_kind_asdl::Id_t id) {
477 // relies on "switch lowering"
478 switch (id) {
479""")
480 for id_ in sorted(ID_TO_KIND):
481 a = Id_str(id_).replace('.', '::')
482 b = Kind_str(ID_TO_KIND[id_]).replace('.', '::')
483 out(' case %s: return %s;' % (a, b))
484 out("""\
485 }
486 FAIL(kShouldNotGetHere);
487}
488""")
489
490 pairs = consts.OPTION_GROUPS.items()
491 GenStringLookup('types_asdl::opt_group_t', 'OptionGroupNum', pairs,
492 f)
493
494 pairs = [(opt.name, opt.index) for opt in option_def.All()]
495 GenStringLookup('option_asdl::option_t', 'OptionNum', pairs, f)
496
497 GenBuiltinLookup('LookupNormalBuiltin', 'normal', f)
498 GenBuiltinLookup('LookupAssignBuiltin', 'assign', f)
499 GenBuiltinLookup('LookupSpecialBuiltin', 'special', f)
500
501 GenStringMembership('IsControlFlow', consts._CONTROL_FLOW_NAMES, f)
502 GenIntStrLookup('ControlFlowName', consts._CONTROL_FLOW_LOOKUP, f)
503
504 GenStringMembership('IsKeyword', consts.OSH_KEYWORD_NAMES, f)
505
506 GenCharLookup('LookupCharC', consts._ONE_CHAR_C, f, required=True)
507 GenCharLookup('LookupCharPrompt', consts._ONE_CHAR_PROMPT, f)
508
509 opt_int2str = {}
510 for opt in option_def.All():
511 opt_int2str[opt.index] = opt.name
512 GenIntStrLookup('OptionName', opt_int2str, f)
513
514 #
515 # Generate a tightly packed 2D array for C, from a Python dict.
516 #
517
518 edges = consts._IFS_EDGES
519 max_state = max(edge[0] for edge in edges)
520 max_char_kind = max(edge[1] for edge in edges)
521
522 edge_array = []
523 for i in xrange(max_state + 1):
524 # unused cells get -1
525 edge_array.append(['-1'] * (max_char_kind + 1))
526
527 for i in xrange(max_state + 1):
528 for j in xrange(max_char_kind + 1):
529 entry = edges.get((i, j))
530 if entry is not None:
531 # pack (new_state, action) into 32 bits
532 edge_array[i][j] = '(%d<<16)|%d' % entry
533
534 parts = []
535 for i in xrange(max_state + 1):
536 parts.append(' {')
537 parts.append(', '.join('%10s' % cell
538 for cell in edge_array[i]))
539 parts.append(' },\n')
540
541 out("""\
542int _IFS_EDGE[%d][%d] = {
543%s
544};
545""" % (max_state + 1, max_char_kind + 1, ''.join(parts)))
546
547 out("""\
548// Note: all of these are integers, e.g. state_i, emit_i, char_kind_i
549using runtime_asdl::state_t;
550using runtime_asdl::emit_t;
551using runtime_asdl::char_kind_t;
552
553Tuple2<state_t, emit_t> IfsEdge(state_t state, runtime_asdl::char_kind_t ch) {
554 int cell = _IFS_EDGE[state][ch];
555 state_t new_state = cell >> 16;
556 emit_t emit = cell & 0xFFFF;
557 return Tuple2<state_t, emit_t>(new_state, emit);
558}
559""")
560
561 GenStrList(consts.BUILTIN_NAMES, 'BUILTIN_NAMES', out)
562 GenStrList(consts.OSH_KEYWORD_NAMES, 'OSH_KEYWORD_NAMES', out)
563 GenStrList(consts.SET_OPTION_NAMES, 'SET_OPTION_NAMES', out)
564 GenStrList(consts.SHOPT_OPTION_NAMES, 'SHOPT_OPTION_NAMES', out)
565
566 def _CString(s):
567 # Hack that does backslash escaping, e.g. \\
568 # We could also use C++ strings
569 import json
570 return json.dumps(s)
571
572 GLOBAL_STRINGS = ['ASSIGN_ARG_RE']
573 for var_name in GLOBAL_STRINGS:
574 out('GLOBAL_STR(%s, %s);', var_name, _CString(getattr(consts, var_name)))
575
576 out("""\
577} // namespace consts
578""")
579
580 elif action == 'py-consts':
581 # It's kind of weird to use the generated code to generate more code.
582 # Can we do this instead with the parsed module for "id" and "types.asdl"?
583
584 from frontend import consts
585 from _devbuild.gen.id_kind_asdl import Id_str, Kind_str
586 from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str
587
588 print("""
589from _devbuild.gen.id_kind_asdl import Id, Kind
590from _devbuild.gen.types_asdl import redir_arg_type_e, bool_arg_type_e
591""")
592
593 print('')
594 print('BOOL_ARG_TYPES = {')
595 for id_ in sorted(BOOL_ARG_TYPES):
596 v = BOOL_ARG_TYPES[id_]
597 # HACK
598 v = bool_arg_type_str(v).replace('.', '_e.')
599 print(' %s: %s,' % (Id_str(id_), v))
600 print('}')
601
602 print('')
603 print('TEST_UNARY_LOOKUP = {')
604 for op_str in sorted(TEST_UNARY_LOOKUP):
605 v = Id_str(TEST_UNARY_LOOKUP[op_str])
606 print(' %r: %s,' % (op_str, v))
607 print('}')
608
609 print('')
610 print('TEST_BINARY_LOOKUP = {')
611 for op_str in sorted(TEST_BINARY_LOOKUP):
612 v = Id_str(TEST_BINARY_LOOKUP[op_str])
613 print(' %r: %s,' % (op_str, v))
614 print('}')
615
616 print('')
617 print('TEST_OTHER_LOOKUP = {')
618 for op_str in sorted(TEST_OTHER_LOOKUP):
619 v = Id_str(TEST_OTHER_LOOKUP[op_str])
620 print(' %r: %s,' % (op_str, v))
621 print('}')
622
623 print('')
624 print('ID_TO_KIND = {')
625 for id_ in sorted(ID_TO_KIND):
626 v = Kind_str(ID_TO_KIND[id_])
627 print(' %s: %s,' % (Id_str(id_), v))
628 print('}')
629
630 else:
631 raise RuntimeError('Invalid action %r' % action)
632
633
634if __name__ == '__main__':
635 try:
636 main(sys.argv)
637 except RuntimeError as e:
638 print('FATAL: %s' % e, file=sys.stderr)
639 sys.exit(1)