OILS / pgen2 / driver.py View on Github | oilshell.org

101 lines, 61 significant
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4# Modifications:
5# Copyright 2006 Google, Inc. All Rights Reserved.
6# Licensed to PSF under a Contributor Agreement.
7from __future__ import print_function
8
9"""Parser driver.
10
11A high-level interface to parse a file into a syntax tree.
12"""
13
14__author__ = "Guido van Rossum <guido@python.org>"
15
16
17import sys
18
19from . import parse, pnode, token, tokenize
20
21
22def log(msg, *args):
23 if args:
24 msg = msg % args
25 print(msg, file=sys.stderr)
26
27
28def classify(gr, typ, value):
29 """Turn a token into a label. (Internal)"""
30 if typ == token.NAME:
31 # Keep a listing of all used names
32 # OIL note: removed because it's only used by lib2to3
33 #self.used_names.add(value)
34
35 # Check for reserved words
36 ilabel = gr.keywords.get(value)
37 if ilabel is not None:
38 return ilabel
39 ilabel = gr.tokens.get(typ)
40 if ilabel is None:
41 raise parse.ParseError("bad token", typ, value)
42 return ilabel
43
44
45def PushTokens(p, tokens, gr, start_symbol, opmap=token.opmap, debug=False):
46 """Parse a series of tokens and return the syntax tree.
47
48 NOTE: This function is specific to Python's lexer.
49 """
50 # XXX Move the prefix computation into a wrapper around tokenize.
51 # NOTE: It's mainly for lib2to3.
52
53 p.setup(gr.symbol2number[start_symbol], pnode.PNodeAllocator())
54
55 lineno = 1
56 column = 0
57 type_ = value = start = end = line_text = None
58 prefix = ""
59 for quintuple in tokens:
60 type_, value, start, end, line_text = quintuple
61 #log('token %s %r', type_, value)
62 if start != (lineno, column):
63 assert (lineno, column) <= start, ((lineno, column), start)
64 s_lineno, s_column = start
65 if lineno < s_lineno:
66 prefix += "\n" * (s_lineno - lineno)
67 lineno = s_lineno
68 column = 0
69 if column < s_column:
70 prefix += line_text[column:s_column]
71 column = s_column
72 if type_ in (tokenize.COMMENT, tokenize.NL):
73 prefix += value
74 lineno, column = end
75 if value.endswith("\n"):
76 lineno += 1
77 column = 0
78 continue
79
80 if type_ == token.OP:
81 type_ = opmap[value]
82
83 if debug:
84 log("%s %r (prefix=%r)", token.tok_name[type_], value, prefix)
85
86 ilabel = classify(gr, type_, value)
87 opaque = (value, prefix, start)
88 if p.addtoken(type_, opaque, ilabel):
89 if debug:
90 log("Stop.")
91 break
92 prefix = ""
93 lineno, column = end
94 if value.endswith("\n"):
95 lineno += 1
96 column = 0
97 else:
98 # We never broke out -- EOF is too soon (how can this happen???)
99 opaque = (value, prefix, start)
100 raise parse.ParseError("incomplete input", type_, opaque)
101 return p.rootnode