opy/_regtest/src/asdl/tdop.py

OILS / opy / _regtest / src / asdl / tdop.py View on Github | oilshell.org

230 lines, 117 significant

1	#!/usr/bin/env python
2	"""
3	tdop.py
4	"""
5
6	import re
7
8
9	class ParseError(Exception):
10	pass
11
12
13	#
14	# Default parsing functions give errors
15	#
16
17	def NullError(p, token, bp):
18	raise ParseError("%s can't be used in prefix position" % token)
19
20
21	def LeftError(p, token, left, rbp):
22	# Hm is this not called because of binding power?
23	raise ParseError("%s can't be used in infix position" % token)
24
25
26	#
27	# Input
28	#
29
30	class Token:
31	def __init__(self, type, val, loc=None):
32	self.type = type
33	self.val = val
34
35	def __repr__(self):
36	return '<Token %s %s>' % (self.type, self.val)
37
38
39	#
40	# Using the pattern here: http://effbot.org/zone/xml-scanner.htm
41	#
42
43	# NOTE: () and [] need to be on their own so (-1+2) works
44	TOKEN_RE = re.compile("""
45	\s* (?: (\d+) \| (\w+) \| ( [\-\+\*/%!~<>=&^\|?:,]+ ) \| ([\(\)\[\]]) )
46	""", re.VERBOSE)
47
48	def Tokenize(s):
49	for item in TOKEN_RE.findall(s):
50	if item[0]:
51	typ = 'number'
52	val = int(item[0])
53	elif item[1]:
54	typ = 'name'
55	val = item[1]
56	elif item[2]:
57	typ = item[2]
58	val = item[2]
59	elif item[3]:
60	typ = item[3]
61	val = item[3]
62	yield Token(typ, val, loc=(0, 0))
63
64
65	#
66	# Simple and Composite AST nodes
67	#
68
69	class Node(object):
70	def __init__(self, token):
71	"""
72	Args:
73	type: token type (operator, etc.)
74	val: token val, only important for number and string
75	"""
76	self.token = token
77
78	def __repr__(self):
79	return str(self.token.val)
80
81
82	class CompositeNode(Node):
83	def __init__(self, token, children):
84	"""
85	Args:
86	type: token type (operator, etc.)
87	"""
88	Node.__init__(self, token)
89	self.children = children
90
91	def __repr__(self):
92	args = ''.join([" " + repr(c) for c in self.children])
93	return "(" + self.token.type + args + ")"
94
95
96	#
97	# Parser definition
98	#
99
100	class LeftInfo(object):
101	"""Row for operator.
102
103	In C++ this should be a big array.
104	"""
105	def __init__(self, led=None, lbp=0, rbp=0):
106	self.led = led or LeftError
107	self.lbp = lbp
108	self.rbp = rbp
109
110
111	class NullInfo(object):
112	"""Row for operator.
113
114	In C++ this should be a big array.
115	"""
116	def __init__(self, nud=None, bp=0):
117	self.nud = nud or NullError
118	self.bp = bp
119
120
121	class ParserSpec(object):
122	"""Specification for a TDOP parser."""
123
124	def __init__(self):
125	self.null_lookup = {}
126	self.left_lookup = {}
127
128	def Null(self, bp, nud, tokens):
129	"""Register a token that doesn't take anything on the left.
130
131	Examples: constant, prefix operator, error.
132	"""
133	for token in tokens:
134	self.null_lookup[token] = NullInfo(nud=nud, bp=bp)
135	if token not in self.left_lookup:
136	self.left_lookup[token] = LeftInfo() # error
137
138	def _RegisterLed(self, lbp, rbp, led, tokens):
139	for token in tokens:
140	if token not in self.null_lookup:
141	self.null_lookup[token] = NullInfo(NullError)
142	self.left_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
143
144	def Left(self, bp, led, tokens):
145	"""Register a token that takes an expression on the left."""
146	self._RegisterLed(bp, bp, led, tokens)
147
148	def LeftRightAssoc(self, bp, led, tokens):
149	"""Register a right associative operator."""
150	self._RegisterLed(bp, bp-1, led, tokens)
151
152	def LookupNull(self, token):
153	"""Get the parsing function and precedence for a null position token."""
154	try:
155	nud = self.null_lookup[token]
156	except KeyError:
157	raise ParseError('Unexpected token %r' % token)
158	return nud
159
160	def LookupLeft(self, token):
161	"""Get the parsing function and precedence for a left position token."""
162	try:
163	led = self.left_lookup[token]
164	except KeyError:
165	raise ParseError('Unexpected token %r' % token)
166	return led
167
168
169	EOF_TOKEN = Token('eof', 'eof')
170
171
172	class Parser(object):
173	"""Recursive TDOP parser."""
174
175	def __init__(self, spec, lexer):
176	self.spec = spec
177	self.lexer = lexer # iterable
178	self.token = None # current token
179
180	def AtToken(self, token_type):
181	"""Test if we are looking at a token."""
182	return self.token.type == token_type
183
184	def Next(self):
185	"""Move to the next token."""
186	try:
187	t = self.lexer.next()
188	except StopIteration:
189	t = EOF_TOKEN
190	self.token = t
191
192	def Eat(self, val):
193	"""Assert the value of the current token, then move to the next token."""
194	if val and not self.AtToken(val):
195	raise ParseError('expected %s, got %s' % (val, self.token))
196	self.Next()
197
198	def ParseUntil(self, rbp):
199	"""
200	Parse to the right, eating tokens until we encounter a token with binding
201	power LESS THAN OR EQUAL TO rbp.
202	"""
203	if self.AtToken('eof'):
204	raise ParseError('Unexpected end of input')
205
206	t = self.token
207	self.Next() # skip over the token, e.g. ! ~ + -
208
209	null_info = self.spec.LookupNull(t.type)
210	node = null_info.nud(self, t, null_info.bp)
211
212	while True:
213	t = self.token
214	left_info = self.spec.LookupLeft(t.type)
215
216	# Examples:
217	# If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
218	# If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
219	# If we see 12, rbp = 26 and lbp = 27, so keep going.
220	if rbp >= left_info.lbp:
221	break
222	self.Next() # skip over the token, e.g. / *
223
224	node = left_info.led(self, t, node, left_info.rbp)
225
226	return node
227
228	def Parse(self):
229	self.Next()
230	return self.ParseUntil(0)