pgen2/driver.py

OILS / pgen2 / driver.py View on Github | oilshell.org

101 lines, 61 significant

1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2	# Licensed to PSF under a Contributor Agreement.
3
4	# Modifications:
5	# Copyright 2006 Google, Inc. All Rights Reserved.
6	# Licensed to PSF under a Contributor Agreement.
7	from __future__ import print_function
8
9	"""Parser driver.
10
11	A high-level interface to parse a file into a syntax tree.
12	"""
13
14	__author__ = "Guido van Rossum <guido@python.org>"
15
16
17	import sys
18
19	from . import parse, pnode, token, tokenize
20
21
22	def log(msg, *args):
23	if args:
24	msg = msg % args
25	print(msg, file=sys.stderr)
26
27
28	def classify(gr, typ, value):
29	"""Turn a token into a label. (Internal)"""
30	if typ == token.NAME:
31	# Keep a listing of all used names
32	# OIL note: removed because it's only used by lib2to3
33	#self.used_names.add(value)
34
35	# Check for reserved words
36	ilabel = gr.keywords.get(value)
37	if ilabel is not None:
38	return ilabel
39	ilabel = gr.tokens.get(typ)
40	if ilabel is None:
41	raise parse.ParseError("bad token", typ, value)
42	return ilabel
43
44
45	def PushTokens(p, tokens, gr, start_symbol, opmap=token.opmap, debug=False):
46	"""Parse a series of tokens and return the syntax tree.
47
48	NOTE: This function is specific to Python's lexer.
49	"""
50	# XXX Move the prefix computation into a wrapper around tokenize.
51	# NOTE: It's mainly for lib2to3.
52
53	p.setup(gr.symbol2number[start_symbol], pnode.PNodeAllocator())
54
55	lineno = 1
56	column = 0
57	type_ = value = start = end = line_text = None
58	prefix = ""
59	for quintuple in tokens:
60	type_, value, start, end, line_text = quintuple
61	#log('token %s %r', type_, value)
62	if start != (lineno, column):
63	assert (lineno, column) <= start, ((lineno, column), start)
64	s_lineno, s_column = start
65	if lineno < s_lineno:
66	prefix += "\n" * (s_lineno - lineno)
67	lineno = s_lineno
68	column = 0
69	if column < s_column:
70	prefix += line_text[column:s_column]
71	column = s_column
72	if type_ in (tokenize.COMMENT, tokenize.NL):
73	prefix += value
74	lineno, column = end
75	if value.endswith("\n"):
76	lineno += 1
77	column = 0
78	continue
79
80	if type_ == token.OP:
81	type_ = opmap[value]
82
83	if debug:
84	log("%s %r (prefix=%r)", token.tok_name[type_], value, prefix)
85
86	ilabel = classify(gr, type_, value)
87	opaque = (value, prefix, start)
88	if p.addtoken(type_, opaque, ilabel):
89	if debug:
90	log("Stop.")
91	break
92	prefix = ""
93	lineno, column = end
94	if value.endswith("\n"):
95	lineno += 1
96	column = 0
97	else:
98	# We never broke out -- EOF is too soon (how can this happen???)
99	opaque = (value, prefix, start)
100	raise parse.ParseError("incomplete input", type_, opaque)
101	return p.rootnode