| 1 | """
 | 
| 2 | tdop_lexer.py
 | 
| 3 | """
 | 
| 4 | from __future__ import print_function
 | 
| 5 | 
 | 
| 6 | import re
 | 
| 7 | from typing import Iterator, Tuple, cast, TYPE_CHECKING
 | 
| 8 | 
 | 
| 9 | from asdl.examples.tdop import Token
 | 
| 10 | 
 | 
| 11 | if TYPE_CHECKING:
 | 
| 12 |   TupleStr4 = Tuple[str, str, str, str]
 | 
| 13 | else:
 | 
| 14 |   TupleStr4 = None  # Using runtime stub
 | 
| 15 | 
 | 
| 16 | #
 | 
| 17 | # Using the pattern here: http://effbot.org/zone/xml-scanner.htm
 | 
| 18 | #
 | 
| 19 | 
 | 
| 20 | # NOTE: () and [] need to be on their own so (-1+2) works
 | 
| 21 | TOKEN_RE = re.compile(r"""
 | 
| 22 | \s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) )
 | 
| 23 | """, re.VERBOSE)
 | 
| 24 | 
 | 
| 25 | def Tokenize(s):
 | 
| 26 |   # type: (str) -> Iterator[Token]
 | 
| 27 |   for item in TOKEN_RE.findall(s):
 | 
| 28 |     # The type checker can't know the true type of item!
 | 
| 29 |     item = cast(TupleStr4, item)
 | 
| 30 |     if item[0]:
 | 
| 31 |       typ = 'number'
 | 
| 32 |       val = item[0]
 | 
| 33 |     elif item[1]:
 | 
| 34 |       typ = 'name'
 | 
| 35 |       val = item[1]
 | 
| 36 |     elif item[2]:
 | 
| 37 |       typ = item[2]
 | 
| 38 |       val = item[2]
 | 
| 39 |     elif item[3]:
 | 
| 40 |       typ = item[3]
 | 
| 41 |       val = item[3]
 | 
| 42 |     yield Token(typ, val)
 |