OILS / pyext / fastlex.c View on Github | oilshell.org

351 lines, 238 significant
1/*
2 * Fast lexer using re2c.
3 */
4
5#include <stdarg.h> // va_list, etc.
6#include <stdio.h> // printf
7
8#include <Python.h>
9
10#include "_gen/frontend/id_kind.asdl_c.h"
11#include "_gen/frontend/types.asdl_c.h" // for lex_mode_e
12#include "_gen/frontend/match.re2c.h"
13
14// TODO: Should this be shared among all extensions?
15// Log messages to stderr.
16#if 0
17static void debug(const char* fmt, ...) {
18 va_list args;
19 va_start(args, fmt);
20 vfprintf(stderr, fmt, args);
21 va_end(args);
22 fprintf(stderr, "\n");
23}
24#endif
25
26static PyObject *
27fastlex_MatchOshToken(PyObject *self, PyObject *args) {
28 int lex_mode;
29
30 unsigned char* line;
31 int line_len;
32
33 int start_pos;
34 if (!PyArg_ParseTuple(args, "is#i",
35 &lex_mode, &line, &line_len, &start_pos)) {
36 return NULL;
37 }
38
39 // Bounds checking. It's OK to be called with a start_pos looking at \0.
40 // Eol_Tok is inserted everywhere.
41 if (start_pos > line_len) {
42 PyErr_Format(PyExc_ValueError,
43 "Invalid MatchOshToken call (start_pos = %d, line_len = %d)",
44 start_pos, line_len);
45 return NULL;
46 }
47
48 int id;
49 int end_pos;
50 MatchOshToken(lex_mode, line, line_len, start_pos, &id, &end_pos);
51 return Py_BuildValue("(ii)", id, end_pos);
52}
53
54static PyObject *
55fastlex_MatchEchoToken(PyObject *self, PyObject *args) {
56 unsigned char* line;
57 int line_len;
58
59 int start_pos;
60 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
61 return NULL;
62 }
63
64 // Bounds checking.
65 if (start_pos > line_len) {
66 PyErr_Format(PyExc_ValueError,
67 "Invalid MatchEchoToken call (start_pos = %d, line_len = %d)",
68 start_pos, line_len);
69 return NULL;
70 }
71
72 int id;
73 int end_pos;
74 MatchEchoToken(line, line_len, start_pos, &id, &end_pos);
75 return Py_BuildValue("(ii)", id, end_pos);
76}
77
78static PyObject *
79fastlex_MatchGlobToken(PyObject *self, PyObject *args) {
80 unsigned char* line;
81 int line_len;
82
83 int start_pos;
84 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
85 return NULL;
86 }
87
88 // Bounds checking.
89 if (start_pos > line_len) {
90 PyErr_Format(PyExc_ValueError,
91 "Invalid MatchGlobToken call (start_pos = %d, line_len = %d)",
92 start_pos, line_len);
93 return NULL;
94 }
95
96 int id;
97 int end_pos;
98 MatchGlobToken(line, line_len, start_pos, &id, &end_pos);
99 return Py_BuildValue("(ii)", id, end_pos);
100}
101
102static PyObject *
103fastlex_MatchPS1Token(PyObject *self, PyObject *args) {
104 unsigned char* line;
105 int line_len;
106
107 int start_pos;
108 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
109 return NULL;
110 }
111
112 // Bounds checking.
113 if (start_pos > line_len) {
114 PyErr_Format(PyExc_ValueError,
115 "Invalid MatchPS1Token call (start_pos = %d, line_len = %d)",
116 start_pos, line_len);
117 return NULL;
118 }
119
120 int id;
121 int end_pos;
122 MatchPS1Token(line, line_len, start_pos, &id, &end_pos);
123 return Py_BuildValue("(ii)", id, end_pos);
124}
125
126static PyObject *
127fastlex_MatchHistoryToken(PyObject *self, PyObject *args) {
128 unsigned char* line;
129 int line_len;
130
131 int start_pos;
132 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
133 return NULL;
134 }
135
136 // Bounds checking.
137 if (start_pos > line_len) {
138 PyErr_Format(PyExc_ValueError,
139 "Invalid MatchHistoryToken call (start_pos = %d, line_len = %d)",
140 start_pos, line_len);
141 return NULL;
142 }
143
144 int id;
145 int end_pos;
146 MatchHistoryToken(line, line_len, start_pos, &id, &end_pos);
147 return Py_BuildValue("(ii)", id, end_pos);
148}
149
150static PyObject *
151fastlex_MatchBraceRangeToken(PyObject *self, PyObject *args) {
152 unsigned char* line;
153 int line_len;
154
155 int start_pos;
156 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
157 return NULL;
158 }
159
160 // Bounds checking.
161 if (start_pos > line_len) {
162 PyErr_Format(PyExc_ValueError,
163 "Invalid MatchBraceRangeToken call (start_pos = %d, line_len = %d)",
164 start_pos, line_len);
165 return NULL;
166 }
167
168 int id;
169 int end_pos;
170 MatchBraceRangeToken(line, line_len, start_pos, &id, &end_pos);
171 return Py_BuildValue("(ii)", id, end_pos);
172}
173
174static PyObject *
175fastlex_MatchJ8Token(PyObject *self, PyObject *args) {
176 unsigned char* line;
177 int line_len;
178
179 int start_pos;
180 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
181 return NULL;
182 }
183
184 // Bounds checking.
185 if (start_pos > line_len) {
186 PyErr_Format(PyExc_ValueError,
187 "Invalid MatchJ8Token call (start_pos = %d, line_len = %d)",
188 start_pos, line_len);
189 return NULL;
190 }
191
192 int id;
193 int end_pos;
194 MatchJ8Token(line, line_len, start_pos, &id, &end_pos);
195 return Py_BuildValue("(ii)", id, end_pos);
196}
197
198static PyObject *
199fastlex_MatchJ8LinesToken(PyObject *self, PyObject *args) {
200 unsigned char* line;
201 int line_len;
202
203 int start_pos;
204 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
205 return NULL;
206 }
207
208 // Bounds checking.
209 if (start_pos > line_len) {
210 PyErr_Format(PyExc_ValueError,
211 "Invalid MatchJ8LinesToken call (start_pos = %d, line_len = %d)",
212 start_pos, line_len);
213 return NULL;
214 }
215
216 int id;
217 int end_pos;
218 MatchJ8LinesToken(line, line_len, start_pos, &id, &end_pos);
219 return Py_BuildValue("(ii)", id, end_pos);
220}
221
222
223static PyObject *
224fastlex_MatchJ8StrToken(PyObject *self, PyObject *args) {
225 unsigned char* line;
226 int line_len;
227
228 int start_pos;
229 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
230 return NULL;
231 }
232
233 // Bounds checking.
234 if (start_pos > line_len) {
235 PyErr_Format(PyExc_ValueError,
236 "Invalid MatchJ8StrToken call (start_pos = %d, line_len = %d)",
237 start_pos, line_len);
238 return NULL;
239 }
240
241 int id;
242 int end_pos;
243 MatchJ8StrToken(line, line_len, start_pos, &id, &end_pos);
244 return Py_BuildValue("(ii)", id, end_pos);
245}
246
247static PyObject *
248fastlex_MatchJsonStrToken(PyObject *self, PyObject *args) {
249 unsigned char* line;
250 int line_len;
251
252 int start_pos;
253 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
254 return NULL;
255 }
256
257 // Bounds checking.
258 if (start_pos > line_len) {
259 PyErr_Format(PyExc_ValueError,
260 "Invalid MatchJsonStrToken call (start_pos = %d, line_len = %d)",
261 start_pos, line_len);
262 return NULL;
263 }
264
265 int id;
266 int end_pos;
267 MatchJsonStrToken(line, line_len, start_pos, &id, &end_pos);
268 return Py_BuildValue("(ii)", id, end_pos);
269}
270
271static PyObject *
272fastlex_IsValidVarName(PyObject *self, PyObject *args) {
273 unsigned char *name;
274 int len;
275
276 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
277 return NULL;
278 }
279 return PyBool_FromLong(IsValidVarName(name, len));
280}
281
282static PyObject *
283fastlex_ShouldHijack(PyObject *self, PyObject *args) {
284 unsigned char *name;
285 int len;
286
287 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
288 return NULL;
289 }
290 return PyBool_FromLong(ShouldHijack(name, len));
291}
292
293static PyObject *
294fastlex_LooksLikeInteger(PyObject *self, PyObject *args) {
295 unsigned char *name;
296 int len;
297
298 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
299 return NULL;
300 }
301 return PyBool_FromLong(LooksLikeInteger(name, len));
302}
303
304static PyObject *
305fastlex_LooksLikeFloat(PyObject *self, PyObject *args) {
306 unsigned char *name;
307 int len;
308
309 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
310 return NULL;
311 }
312 return PyBool_FromLong(LooksLikeFloat(name, len));
313}
314
315#ifdef OVM_MAIN
316#include "pyext/fastlex.c/methods.def"
317#else
318static PyMethodDef methods[] = {
319 {"MatchOshToken", fastlex_MatchOshToken, METH_VARARGS,
320 "(lexer mode, line, start_pos) -> (id, end_pos)."},
321 {"MatchEchoToken", fastlex_MatchEchoToken, METH_VARARGS,
322 "(line, start_pos) -> (id, end_pos)."},
323 {"MatchGlobToken", fastlex_MatchGlobToken, METH_VARARGS,
324 "(line, start_pos) -> (id, end_pos)."},
325 {"MatchPS1Token", fastlex_MatchPS1Token, METH_VARARGS,
326 "(line, start_pos) -> (id, end_pos)."},
327 {"MatchHistoryToken", fastlex_MatchHistoryToken, METH_VARARGS,
328 "(line, start_pos) -> (id, end_pos)."},
329 {"MatchBraceRangeToken", fastlex_MatchBraceRangeToken, METH_VARARGS,
330 "(line, start_pos) -> (id, end_pos)."},
331 {"MatchJ8Token", fastlex_MatchJ8Token, METH_VARARGS,
332 "(line, start_pos) -> (id, end_pos)."},
333 {"MatchJ8LinesToken", fastlex_MatchJ8LinesToken, METH_VARARGS,
334 "(line, start_pos) -> (id, end_pos)."},
335 {"MatchJ8StrToken", fastlex_MatchJ8StrToken, METH_VARARGS,
336 "(line, start_pos) -> (id, end_pos)."},
337 {"MatchJsonStrToken", fastlex_MatchJsonStrToken, METH_VARARGS,
338 "(line, start_pos) -> (id, end_pos)."},
339 {"IsValidVarName", fastlex_IsValidVarName, METH_VARARGS,
340 "Is it a valid var name?"},
341 // Should we hijack this shebang line?
342 {"ShouldHijack", fastlex_ShouldHijack, METH_VARARGS, ""},
343 {"LooksLikeInteger", fastlex_LooksLikeInteger, METH_VARARGS, ""},
344 {"LooksLikeFloat", fastlex_LooksLikeFloat, METH_VARARGS, ""},
345 {NULL, NULL},
346};
347#endif
348
349void initfastlex(void) {
350 Py_InitModule("fastlex", methods);
351}