OILS / asdl / examples / tdop_lexer.py View on Github | oilshell.org

42 lines, 26 significant
1"""
2tdop_lexer.py
3"""
4from __future__ import print_function
5
6import re
7from typing import Iterator, Tuple, cast, TYPE_CHECKING
8
9from asdl.examples.tdop import Token
10
11if TYPE_CHECKING:
12 TupleStr4 = Tuple[str, str, str, str]
13else:
14 TupleStr4 = None # Using runtime stub
15
16#
17# Using the pattern here: http://effbot.org/zone/xml-scanner.htm
18#
19
20# NOTE: () and [] need to be on their own so (-1+2) works
21TOKEN_RE = re.compile(r"""
22\s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) )
23""", re.VERBOSE)
24
25def Tokenize(s):
26 # type: (str) -> Iterator[Token]
27 for item in TOKEN_RE.findall(s):
28 # The type checker can't know the true type of item!
29 item = cast(TupleStr4, item)
30 if item[0]:
31 typ = 'number'
32 val = item[0]
33 elif item[1]:
34 typ = 'name'
35 val = item[1]
36 elif item[2]:
37 typ = item[2]
38 val = item[2]
39 elif item[3]:
40 typ = item[3]
41 val = item[3]
42 yield Token(typ, val)