1 | """
|
2 | tdop_lexer.py
|
3 | """
|
4 | from __future__ import print_function
|
5 |
|
6 | import re
|
7 | from typing import Iterator, Tuple, cast, TYPE_CHECKING
|
8 |
|
9 | from asdl.examples.tdop import Token
|
10 |
|
11 | if TYPE_CHECKING:
|
12 | TupleStr4 = Tuple[str, str, str, str]
|
13 | else:
|
14 | TupleStr4 = None # Using runtime stub
|
15 |
|
16 | #
|
17 | # Using the pattern here: http://effbot.org/zone/xml-scanner.htm
|
18 | #
|
19 |
|
20 | # NOTE: () and [] need to be on their own so (-1+2) works
|
21 | TOKEN_RE = re.compile(r"""
|
22 | \s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) )
|
23 | """, re.VERBOSE)
|
24 |
|
25 | def Tokenize(s):
|
26 | # type: (str) -> Iterator[Token]
|
27 | for item in TOKEN_RE.findall(s):
|
28 | # The type checker can't know the true type of item!
|
29 | item = cast(TupleStr4, item)
|
30 | if item[0]:
|
31 | typ = 'number'
|
32 | val = item[0]
|
33 | elif item[1]:
|
34 | typ = 'name'
|
35 | val = item[1]
|
36 | elif item[2]:
|
37 | typ = item[2]
|
38 | val = item[2]
|
39 | elif item[3]:
|
40 | typ = item[3]
|
41 | val = item[3]
|
42 | yield Token(typ, val)
|