| 1 | """
|
| 2 | tdop_lexer.py
|
| 3 | """
|
| 4 | from __future__ import print_function
|
| 5 |
|
| 6 | import re
|
| 7 | from typing import Iterator, Tuple, cast, TYPE_CHECKING
|
| 8 |
|
| 9 | from asdl.examples.tdop import Token
|
| 10 |
|
| 11 | if TYPE_CHECKING:
|
| 12 | TupleStr4 = Tuple[str, str, str, str]
|
| 13 | else:
|
| 14 | TupleStr4 = None # Using runtime stub
|
| 15 |
|
| 16 | #
|
| 17 | # Using the pattern here: http://effbot.org/zone/xml-scanner.htm
|
| 18 | #
|
| 19 |
|
| 20 | # NOTE: () and [] need to be on their own so (-1+2) works
|
| 21 | TOKEN_RE = re.compile(r"""
|
| 22 | \s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) )
|
| 23 | """, re.VERBOSE)
|
| 24 |
|
| 25 | def Tokenize(s):
|
| 26 | # type: (str) -> Iterator[Token]
|
| 27 | for item in TOKEN_RE.findall(s):
|
| 28 | # The type checker can't know the true type of item!
|
| 29 | item = cast(TupleStr4, item)
|
| 30 | if item[0]:
|
| 31 | typ = 'number'
|
| 32 | val = item[0]
|
| 33 | elif item[1]:
|
| 34 | typ = 'name'
|
| 35 | val = item[1]
|
| 36 | elif item[2]:
|
| 37 | typ = item[2]
|
| 38 | val = item[2]
|
| 39 | elif item[3]:
|
| 40 | typ = item[3]
|
| 41 | val = item[3]
|
| 42 | yield Token(typ, val)
|