| 1 | #!/usr/bin/env python2
 | 
| 2 | """Lex_gen.py."""
 | 
| 3 | from __future__ import print_function
 | 
| 4 | 
 | 
| 5 | from _devbuild.gen.id_kind import (TEST_UNARY_LOOKUP, TEST_BINARY_LOOKUP,
 | 
| 6 |                                    TEST_OTHER_LOOKUP)
 | 
| 7 | from _devbuild.gen.id_kind_asdl import Id_str
 | 
| 8 | from _devbuild.gen.types_asdl import lex_mode_str
 | 
| 9 | 
 | 
| 10 | import cStringIO
 | 
| 11 | import sys
 | 
| 12 | import sre_parse
 | 
| 13 | import sre_constants
 | 
| 14 | 
 | 
| 15 | from frontend import lexer_def
 | 
| 16 | 
 | 
| 17 | 
 | 
| 18 | def PrintTree(re_tree, depth=2):
 | 
| 19 |     """
 | 
| 20 |   re_tree: List of children
 | 
| 21 |   """
 | 
| 22 |     for child in re_tree:
 | 
| 23 |         name, arg = child
 | 
| 24 |         sys.stdout.write(depth * '\t')
 | 
| 25 |         sys.stdout.write(name)
 | 
| 26 |         sys.stdout.write(' ')
 | 
| 27 |         if name == 'in':  # character class
 | 
| 28 |             print('{')
 | 
| 29 |             PrintTree(arg, depth=depth + 1)
 | 
| 30 |             sys.stdout.write(depth * '\t')
 | 
| 31 |             print('}')
 | 
| 32 |         elif name == 'max_repeat':  # repetition
 | 
| 33 |             min_, max_, children = arg
 | 
| 34 |             # min = 0 means *, min = 1 means +
 | 
| 35 |             assert min_ in (0, 1), min_
 | 
| 36 |             print(min_, max_, '{')
 | 
| 37 |             PrintTree(children, depth=depth + 1)
 | 
| 38 |             sys.stdout.write(depth * '\t')
 | 
| 39 |             print()
 | 
| 40 |         elif name == 'negate':  # Oh this is a ^.  It doesn't form a node.
 | 
| 41 |             assert arg is None
 | 
| 42 |             print()
 | 
| 43 |         elif name == 'literal':  # Quote \ and " in re2c syntax
 | 
| 44 |             print(repr(chr(arg)))
 | 
| 45 |         elif name == 'not_literal':  # ditto
 | 
| 46 |             print(repr(chr(arg)))
 | 
| 47 |         elif name == 'range':  # ascii range
 | 
| 48 |             begin, end = arg
 | 
| 49 |             print(repr(chr(begin)), repr(chr(end)))
 | 
| 50 |         elif name == 'any':  # This is the '.' character
 | 
| 51 |             assert arg is None
 | 
| 52 |             print()
 | 
| 53 |         else:
 | 
| 54 |             raise AssertionError(name)
 | 
| 55 | 
 | 
| 56 |     # NOTE: negate and not_literal are sort of duplicated
 | 
| 57 | 
 | 
| 58 | 
 | 
| 59 | def PrintRegex(pat):
 | 
| 60 |     re_tree = sre_parse.parse(pat)
 | 
| 61 |     print('\t\t[')
 | 
| 62 |     PrintTree(re_tree)
 | 
| 63 |     print('\t\t]')
 | 
| 64 | 
 | 
| 65 | 
 | 
| 66 | # re2c literals are inside double quotes, so we need to escape \ and "
 | 
| 67 | # But we don't need to do anything with the quoted literal ^
 | 
| 68 | LITERAL_META = ['\\', '"']
 | 
| 69 | LITERAL_META_CODES = [ord(c) for c in LITERAL_META]
 | 
| 70 | 
 | 
| 71 | 
 | 
| 72 | def _Literal(arg, char_escapes=LITERAL_META_CODES):
 | 
| 73 |     if arg == ord('\n'):
 | 
| 74 |         s = r'\n'
 | 
| 75 |     elif arg == ord('\r'):
 | 
| 76 |         s = r'\r'
 | 
| 77 |     elif arg == ord('\t'):
 | 
| 78 |         s = r'\t'
 | 
| 79 |     elif arg >= 0x80 or arg < 0x20:  # 0x1f and below
 | 
| 80 |         s = '\\x%02x' % arg  # for \x80-\xff
 | 
| 81 |     elif arg in char_escapes:
 | 
| 82 |         s = '\\' + chr(arg)
 | 
| 83 |     else:
 | 
| 84 |         s = chr(arg)
 | 
| 85 |     return s
 | 
| 86 | 
 | 
| 87 | 
 | 
| 88 | # - means range.  Note that re2c gives an error we uselessly escape \^.
 | 
| 89 | CHAR_CLASS_META = ['\\', '-', ']']
 | 
| 90 | CHAR_CLASS_META_CODES = [ord(c) for c in CHAR_CLASS_META]
 | 
| 91 | 
 | 
| 92 | 
 | 
| 93 | def _CharClassLiteral(arg):
 | 
| 94 |     return _Literal(arg, char_escapes=CHAR_CLASS_META_CODES)
 | 
| 95 | 
 | 
| 96 | 
 | 
| 97 | def TranslateConstant(pat):
 | 
| 98 |     return '"' + ''.join(_Literal(ord(c)) for c in pat) + '"'
 | 
| 99 | 
 | 
| 100 | 
 | 
| 101 | def TranslateTree(re_tree, f, in_char_class=False):
 | 
| 102 |     """
 | 
| 103 |   re_tree: List of children
 | 
| 104 |   """
 | 
| 105 |     for child in re_tree:
 | 
| 106 |         name, arg = child
 | 
| 107 |         if name == 'in':  # character class
 | 
| 108 |             f.write('[')
 | 
| 109 |             TranslateTree(arg, f,
 | 
| 110 |                           in_char_class=True)  # list of literals/ranges
 | 
| 111 |             f.write(']')
 | 
| 112 | 
 | 
| 113 |         elif name == 'branch':  # |
 | 
| 114 |             _, branches = arg
 | 
| 115 |             for i, branch in enumerate(branches):
 | 
| 116 |                 if i != 0:
 | 
| 117 |                     f.write(' | ')
 | 
| 118 |                 TranslateTree(branch, f)
 | 
| 119 | 
 | 
| 120 |         elif name == 'max_repeat':  # repetition
 | 
| 121 |             min_, max_, children = arg
 | 
| 122 |             # min = 0 means *, min = 1 means +
 | 
| 123 |             #assert min_ in (0, 1), min_
 | 
| 124 |             TranslateTree(children, f)
 | 
| 125 | 
 | 
| 126 |             if min_ == 0 and max_ == 1:
 | 
| 127 |                 f.write('? ')
 | 
| 128 | 
 | 
| 129 |             elif max_ == sre_constants.MAXREPEAT:
 | 
| 130 |                 if min_ == 0:
 | 
| 131 |                     f.write('* ')
 | 
| 132 |                 elif min_ == 1:
 | 
| 133 |                     f.write('+ ')
 | 
| 134 |                 else:
 | 
| 135 |                     assert 0, min_
 | 
| 136 | 
 | 
| 137 |             else:  # re2c also supports [0-7]{1,2} syntax
 | 
| 138 |                 # note: might generated {2,2}
 | 
| 139 |                 f.write('{%d,%d} ' % (min_, max_))
 | 
| 140 | 
 | 
| 141 |         elif name == 'negate':  # ^ in [^a-z]
 | 
| 142 |             assert arg is None
 | 
| 143 |             f.write('^')
 | 
| 144 | 
 | 
| 145 |         elif name == 'literal':  # Quote \ and " in re2c syntax
 | 
| 146 |             # TODO: it matters if we're inside a character class
 | 
| 147 |             #print("literal ARG %r" % arg)
 | 
| 148 | 
 | 
| 149 |             if in_char_class:
 | 
| 150 |                 s = _CharClassLiteral(arg)
 | 
| 151 |             else:
 | 
| 152 |                 s = '"%s" ' % _Literal(arg)
 | 
| 153 |             f.write(s)
 | 
| 154 | 
 | 
| 155 |         elif name == 'not_literal':  # ditto
 | 
| 156 |             assert not in_char_class
 | 
| 157 |             f.write('[^%s]' % _CharClassLiteral(arg))
 | 
| 158 | 
 | 
| 159 |         elif name == 'range':  # ascii range
 | 
| 160 |             begin, end = arg
 | 
| 161 |             f.write('%s-%s' %
 | 
| 162 |                     (_CharClassLiteral(begin), _CharClassLiteral(end)))
 | 
| 163 | 
 | 
| 164 |         elif name == 'any':  # This is the '.' character
 | 
| 165 |             assert arg is None
 | 
| 166 |             f.write('.')
 | 
| 167 | 
 | 
| 168 |         elif name == 'subpattern':
 | 
| 169 |             _, children = arg  # Not sure what the _ is, but it works
 | 
| 170 |             f.write('(')
 | 
| 171 |             TranslateTree(children, f)
 | 
| 172 |             f.write(')')
 | 
| 173 | 
 | 
| 174 |         else:
 | 
| 175 |             if 0:
 | 
| 176 |                 from mycpp.mylib import log
 | 
| 177 |                 log('child %s', child)
 | 
| 178 |             raise RuntimeError("I don't understand regex construct: %r" % name)
 | 
| 179 | 
 | 
| 180 |     # NOTE: negate and not_literal are sort of duplicated
 | 
| 181 | 
 | 
| 182 | 
 | 
| 183 | def TranslateRegex(pat):
 | 
| 184 |     re_tree = sre_parse.parse(pat)
 | 
| 185 |     # For debugging
 | 
| 186 |     #import pprint
 | 
| 187 |     #print(pprint.pformat(re_tree), file=sys.stderr)
 | 
| 188 |     f = cStringIO.StringIO()
 | 
| 189 |     try:
 | 
| 190 |         TranslateTree(re_tree, f)
 | 
| 191 |     except RuntimeError:
 | 
| 192 |         print('Error translating %r' % pat, file=sys.stderr)
 | 
| 193 |         raise
 | 
| 194 |     return f.getvalue()
 | 
| 195 | 
 | 
| 196 | 
 | 
| 197 | # This explains the sentinel method, which we will use.
 | 
| 198 | # http://re2c.org/examples/example_01.html
 | 
| 199 | #
 | 
| 200 | # TODO: Change ParseTuple to use 's' rather than '#s' ?
 | 
| 201 | 
 | 
| 202 | # I don't think we need this YYFILL mechanism, because we lex a line at a
 | 
| 203 | # time.
 | 
| 204 | # http://re2c.org/examples/example_03.html
 | 
| 205 | 
 | 
| 206 | 
 | 
| 207 | def TranslateSimpleLexer(func_name, lexer_def):
 | 
| 208 |     print(r"""
 | 
| 209 | static inline void %s(const unsigned char* line, int line_len,
 | 
| 210 |     int start_pos, int* id, int* end_pos) {
 | 
| 211 |   assert(start_pos <= line_len);  /* caller should have checked */
 | 
| 212 | 
 | 
| 213 |   const unsigned char* p = line + start_pos;  /* modified by re2c */
 | 
| 214 | 
 | 
| 215 |   /* Echo and History lexer apparently need this, but others don't */
 | 
| 216 |   __attribute__((unused)) const unsigned char* YYMARKER;
 | 
| 217 | 
 | 
| 218 |   for (;;) {
 | 
| 219 |     /*!re2c
 | 
| 220 | """ % func_name)
 | 
| 221 | 
 | 
| 222 |     for is_regex, pat, id_ in lexer_def:
 | 
| 223 |         if is_regex:
 | 
| 224 |             re2c_pat = TranslateRegex(pat)
 | 
| 225 |         else:
 | 
| 226 |             re2c_pat = TranslateConstant(pat)
 | 
| 227 |         id_name = Id_str(id_).split('.')[-1]  # e.g. Undefined_Tok
 | 
| 228 |         print('      %-30s { *id = id__%s; break; }' % (re2c_pat, id_name))
 | 
| 229 | 
 | 
| 230 |     # EARLY RETURN: Do NOT advance past the NUL terminator.
 | 
| 231 |     print('      %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \
 | 
| 232 |         r'"\x00"')
 | 
| 233 | 
 | 
| 234 |     print("""
 | 
| 235 |     */
 | 
| 236 |   }
 | 
| 237 |   *end_pos = p - line;  /* relative */
 | 
| 238 | }
 | 
| 239 | """)
 | 
| 240 | 
 | 
| 241 | 
 | 
| 242 | def TranslateBracket(func_name, token_dict):
 | 
| 243 |     print(r"""
 | 
| 244 | static inline int %s(const unsigned char* s, int len) {
 | 
| 245 |   const unsigned char* p = s;  /* modified by re2c */
 | 
| 246 |   const unsigned char* end = s + len;
 | 
| 247 | 
 | 
| 248 |   __attribute__((unused)) const unsigned char* YYMARKER;
 | 
| 249 |   int id;
 | 
| 250 | 
 | 
| 251 |   for (;;) {
 | 
| 252 |     /*!re2c
 | 
| 253 | """ % func_name)
 | 
| 254 | 
 | 
| 255 |     for pat in sorted(token_dict):
 | 
| 256 |         id_ = token_dict[pat]
 | 
| 257 |         re2c_pat = TranslateConstant(pat)
 | 
| 258 |         id_name = Id_str(id_).split('.')[-1]  # e.g. Undefined_Tok
 | 
| 259 |         print('      %-30s { id = id__%s; break; }' % (re2c_pat, id_name))
 | 
| 260 | 
 | 
| 261 |     # EARLY RETURN: Do NOT advance past other chars, including the NUL
 | 
| 262 |     # terminator.
 | 
| 263 |     print('      %-30s { return id__Undefined_Tok; }' % '*')
 | 
| 264 | 
 | 
| 265 |     print("""
 | 
| 266 |     */
 | 
| 267 |   }
 | 
| 268 |   // must be an exact match
 | 
| 269 |   return (p == end) ? id : id__Undefined_Tok;
 | 
| 270 | }
 | 
| 271 | """)
 | 
| 272 | 
 | 
| 273 | 
 | 
| 274 | def StringToInt(func_name, name_def):
 | 
| 275 |     print(r"""
 | 
| 276 | static inline void %s(const unsigned char* s, int len, int* id) {
 | 
| 277 |   const unsigned char* p = s;  /* modified by re2c */
 | 
| 278 |   const unsigned char* end = s + len;
 | 
| 279 | 
 | 
| 280 |   //fprintf(stderr, "*** s = %%s\n", s);
 | 
| 281 | 
 | 
| 282 |   for (;;) {
 | 
| 283 |     /*!re2c
 | 
| 284 | """ % func_name)
 | 
| 285 | 
 | 
| 286 |     for name, enum in name_def:
 | 
| 287 |         re2c_pat = TranslateConstant(name)
 | 
| 288 |         print('      %-30s { *id = %s; break; }' % (re2c_pat, enum))
 | 
| 289 | 
 | 
| 290 |     # Not found.  * matches anything else.
 | 
| 291 |     print('      %-30s { *id = 0; return; }' % \
 | 
| 292 |         r'*')
 | 
| 293 | 
 | 
| 294 |     print(r"""
 | 
| 295 |     */
 | 
| 296 |   }
 | 
| 297 |   if (p != end) {
 | 
| 298 |     //fprintf(stderr, "EXTRA CHARS\n", s);
 | 
| 299 |     *id = 0;  // Not an exact match
 | 
| 300 |   }
 | 
| 301 | }
 | 
| 302 | """)
 | 
| 303 | 
 | 
| 304 |     # TODO: Check that we're at the END OF THE STRING
 | 
| 305 | 
 | 
| 306 | 
 | 
| 307 | def TranslateOshLexer(lexer_def):
 | 
| 308 |     # https://stackoverflow.com/questions/12836171/difference-between-an-inline-function-and-static-inline-function
 | 
| 309 |     # Has to be 'static inline' rather than 'inline', otherwise the
 | 
| 310 |     # _bin/oil.ovm-dbg build fails (but the _bin/oil.ovm doesn't!).
 | 
| 311 |     # Since we reference this function in exactly one translation unit --
 | 
| 312 |     # fastlex.c, the difference is moot, and we just satisfy the compiler.
 | 
| 313 | 
 | 
| 314 |     print(r"""
 | 
| 315 | /* Common stuff */
 | 
| 316 | 
 | 
| 317 | /*!re2c
 | 
| 318 |   re2c:define:YYCTYPE = "unsigned char";
 | 
| 319 |   re2c:define:YYCURSOR = p;
 | 
| 320 |   re2c:yyfill:enable = 0;  // generated code doesn't ask for more input
 | 
| 321 | */
 | 
| 322 | 
 | 
| 323 | static inline void MatchOshToken(int lex_mode, const unsigned char* line, int line_len,
 | 
| 324 |                               int start_pos, int* id, int* end_pos) {
 | 
| 325 |   assert(start_pos <= line_len);  /* caller should have checked */
 | 
| 326 | 
 | 
| 327 |   const unsigned char* p = line + start_pos;  /* modified by re2c */
 | 
| 328 |   //printf("p: %p q: %p\n", p, q);
 | 
| 329 | 
 | 
| 330 |   __attribute__((unused)) const unsigned char* YYMARKER;  /* why do we need this? */
 | 
| 331 |   switch (lex_mode)  {
 | 
| 332 | """)
 | 
| 333 | 
 | 
| 334 |     # TODO: Should be ordered by most common?  Or will profile-directed feedback
 | 
| 335 |     # help?
 | 
| 336 |     for state, pat_list in lexer_def.iteritems():
 | 
| 337 |         # e.g. lex_mode.DQ => lex_mode__DQ
 | 
| 338 |         print('  case %s:' % lex_mode_str(state).replace('.', '__'))
 | 
| 339 |         print('    for (;;) {')
 | 
| 340 |         print('      /*!re2c')
 | 
| 341 | 
 | 
| 342 |         for is_regex, pat, id_ in pat_list:
 | 
| 343 |             if is_regex:
 | 
| 344 |                 re2c_pat = TranslateRegex(pat)
 | 
| 345 |             else:
 | 
| 346 |                 re2c_pat = TranslateConstant(pat)
 | 
| 347 |             id_name = Id_str(id_).split('.')[-1]  # e.g. Undefined_Tok
 | 
| 348 |             print('      %-30s { *id = id__%s; break; }' % (re2c_pat, id_name))
 | 
| 349 | 
 | 
| 350 |         # EARLY RETURN: Do NOT advance past the NUL terminator.
 | 
| 351 |         print('      %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \
 | 
| 352 |             r'"\x00"')
 | 
| 353 | 
 | 
| 354 |         print('      */')
 | 
| 355 |         print('    }')
 | 
| 356 |         print('    break;')
 | 
| 357 |         print()
 | 
| 358 | 
 | 
| 359 |     # This is literal code without generation:
 | 
| 360 |     """
 | 
| 361 |   case lex_mode__OUTER:
 | 
| 362 |     for (;;) {
 | 
| 363 |       /*!re2c
 | 
| 364 |       literal_chunk = [a-zA-Z0-9_/.-]+;
 | 
| 365 |       var_like    = [a-zA-Z_][a-zA-Z0-9_]* "=";  // might be NAME=val
 | 
| 366 |       comment     = [ \t\r]* "#" [^\000\r\n]*;
 | 
| 367 |       space       = [ \t\r]+;
 | 
| 368 |       nul = "\000";
 | 
| 369 | 
 | 
| 370 |       literal_chunk { *id = id__Lit_Chars; break; }
 | 
| 371 |       var_like      { *id = id__Lit_VarLike; break; }
 | 
| 372 | 
 | 
| 373 |       [ \t\r]* "\n" { *id = id__Op_Newline; break; }
 | 
| 374 |       space         { *id = id__WS_Space; break; }
 | 
| 375 | 
 | 
| 376 |       nul           { *id = id__Eof_Real; break; }
 | 
| 377 | 
 | 
| 378 |       // anything else
 | 
| 379 |       *             { *id = id__Lit_Other; break; }
 | 
| 380 | 
 | 
| 381 |       */
 | 
| 382 |     }
 | 
| 383 | 
 | 
| 384 |     *end_pos = p - line;
 | 
| 385 |     break;
 | 
| 386 | 
 | 
| 387 |   case lex_mode__COMMENT:
 | 
| 388 |     *id = id__Lit_Other;
 | 
| 389 |     *end_pos = 6;
 | 
| 390 |     break;
 | 
| 391 |   """
 | 
| 392 | 
 | 
| 393 |     print("""\
 | 
| 394 |   default:
 | 
| 395 |     assert(0);
 | 
| 396 | 
 | 
| 397 |   }
 | 
| 398 |   *end_pos = p - line;  /* relative */
 | 
| 399 | }
 | 
| 400 | """)
 | 
| 401 | 
 | 
| 402 | 
 | 
| 403 | def TranslateRegexToPredicate(py_regex, func_name):
 | 
| 404 |     re2c_pat = TranslateRegex(py_regex)
 | 
| 405 |     print(r"""
 | 
| 406 | static inline int %s(const unsigned char* s, int len) {
 | 
| 407 |   const unsigned char* p = s;  /* modified by re2c */
 | 
| 408 |   const unsigned char* end = s + len;
 | 
| 409 | 
 | 
| 410 |   /* MatchBraceRangeToken needs this, but others don't */
 | 
| 411 |   __attribute__((unused)) const unsigned char* YYMARKER;
 | 
| 412 | 
 | 
| 413 |   /*!re2c
 | 
| 414 |   re2c:define:YYCTYPE = "unsigned char";
 | 
| 415 |   re2c:define:YYCURSOR = p;
 | 
| 416 |   %-30s { return p == end; }  // Match must be anchored right, like $
 | 
| 417 |   *     { return 0; }
 | 
| 418 |   */
 | 
| 419 | }
 | 
| 420 | """ % (func_name, re2c_pat))
 | 
| 421 | 
 | 
| 422 | 
 | 
| 423 |     # note: use YYCURSOR and YYLIMIT
 | 
| 424 |     # limit should be the end of string
 | 
| 425 |     # line + line_len
 | 
| 426 | def main(argv):
 | 
| 427 |     action = argv[1]
 | 
| 428 |     if action == 'c':
 | 
| 429 |         # Code is printed to stdout
 | 
| 430 | 
 | 
| 431 |         TranslateOshLexer(lexer_def.LEXER_DEF)
 | 
| 432 | 
 | 
| 433 |         TranslateSimpleLexer('MatchEchoToken', lexer_def.ECHO_E_DEF)
 | 
| 434 |         TranslateSimpleLexer('MatchGlobToken', lexer_def.GLOB_DEF)
 | 
| 435 |         TranslateSimpleLexer('MatchPS1Token', lexer_def.PS1_DEF)
 | 
| 436 |         TranslateSimpleLexer('MatchHistoryToken', lexer_def.HISTORY_DEF)
 | 
| 437 |         TranslateSimpleLexer('MatchBraceRangeToken', lexer_def.BRACE_RANGE_DEF)
 | 
| 438 |         TranslateSimpleLexer('MatchJ8Token', lexer_def.J8_DEF)
 | 
| 439 |         TranslateSimpleLexer('MatchJ8LinesToken', lexer_def.J8_LINES_DEF)
 | 
| 440 |         TranslateSimpleLexer('MatchJ8StrToken', lexer_def.J8_STR_DEF)
 | 
| 441 |         TranslateSimpleLexer('MatchJsonStrToken', lexer_def.JSON_STR_DEF)
 | 
| 442 | 
 | 
| 443 |         TranslateRegexToPredicate(lexer_def.VAR_NAME_RE, 'IsValidVarName')
 | 
| 444 |         TranslateRegexToPredicate(lexer_def.SHOULD_HIJACK_RE, 'ShouldHijack')
 | 
| 445 |         TranslateRegexToPredicate(lexer_def.LOOKS_LIKE_INTEGER,
 | 
| 446 |                                   'LooksLikeInteger')
 | 
| 447 |         TranslateRegexToPredicate(lexer_def.LOOKS_LIKE_FLOAT, 'LooksLikeFloat')
 | 
| 448 | 
 | 
| 449 |         TranslateBracket('BracketUnary', TEST_UNARY_LOOKUP)
 | 
| 450 |         TranslateBracket('BracketBinary', TEST_BINARY_LOOKUP)
 | 
| 451 |         TranslateBracket('BracketOther', TEST_OTHER_LOOKUP)
 | 
| 452 | 
 | 
| 453 |     elif action == 'print-all':
 | 
| 454 |         # Top level is a switch statement.
 | 
| 455 |         for state, pat_list in lexer_def.LEXER_DEF.iteritems():
 | 
| 456 |             print(state)
 | 
| 457 |             # This level is re2c patterns.
 | 
| 458 |             for is_regex, pat, token_id in pat_list:
 | 
| 459 |                 print('\t%r  ->  %r' % (pat, token_id))
 | 
| 460 |                 if is_regex:
 | 
| 461 |                     #print re_tree
 | 
| 462 |                     _ = TranslateRegex(pat)
 | 
| 463 |                     #print out_pat
 | 
| 464 | 
 | 
| 465 |             print()
 | 
| 466 | 
 | 
| 467 |     elif action == 'print-regex':
 | 
| 468 |         unique = set()
 | 
| 469 | 
 | 
| 470 |         num_regexes = 0
 | 
| 471 |         for state, pat_list in lexer_def.LEXER_DEF.iteritems():
 | 
| 472 |             print(state)
 | 
| 473 |             # This level is re2c patterns.
 | 
| 474 |             for is_regex, pat, token_id in pat_list:
 | 
| 475 |                 #print '\t%r  ->  %r' % (pat, token_id)
 | 
| 476 |                 if is_regex:
 | 
| 477 |                     print('\t' + pat)
 | 
| 478 |                     print('\t' + TranslateRegex(pat))
 | 
| 479 |                     print()
 | 
| 480 |                     #PrintRegex(pat)
 | 
| 481 |                     num_regexes += 1
 | 
| 482 |                     unique.add(pat)
 | 
| 483 |                 else:
 | 
| 484 |                     print('\t' + TranslateConstant(pat))
 | 
| 485 | 
 | 
| 486 |             print()
 | 
| 487 | 
 | 
| 488 |         print('Printed %d regexes (%d unique)' % (num_regexes, len(unique)))
 | 
| 489 | 
 | 
| 490 | 
 | 
| 491 | if __name__ == '__main__':
 | 
| 492 |     try:
 | 
| 493 |         main(sys.argv)
 | 
| 494 |     except RuntimeError as e:
 | 
| 495 |         print('FATAL: %s' % e, file=sys.stderr)
 | 
| 496 |         sys.exit(1)
 |