OILS / pyext / libc.c View on Github | oilshell.org

421 lines, 244 significant
1/*
2 * Python interface to libc functions.
3 */
4
5// - Enable GNU extensions in fnmatch.h for extended glob.
6// - It's also apparently needed for wchar.h in combination with Python.
7// https://github.com/python-pillow/Pillow/issues/1850
8// - It's currently hard-coded in pyconfig.h.
9#define _GNU_SOURCE 1
10
11#include <stdarg.h> // va_list, etc.
12#include <stdio.h> // printf
13#include <limits.h>
14#include <wchar.h>
15#include <stdlib.h>
16#include <sys/ioctl.h>
17#include <locale.h>
18#include <fnmatch.h>
19#include <glob.h>
20#include <regex.h>
21
22#include <Python.h>
23
24// Log messages to stderr.
25static void debug(const char* fmt, ...) {
26#ifdef LIBC_VERBOSE
27 va_list args;
28 va_start(args, fmt);
29 vfprintf(stderr, fmt, args);
30 va_end(args);
31 fprintf(stderr, "\n");
32#endif
33}
34
35static PyObject *
36func_realpath(PyObject *self, PyObject *args) {
37 const char *symlink;
38
39 if (!PyArg_ParseTuple(args, "s", &symlink)) {
40 return NULL;
41 }
42 char target[PATH_MAX + 1];
43 char *status = realpath(symlink, target);
44
45 // TODO: Throw exception like IOError here
46 if (status == NULL) {
47 debug("error from realpath()");
48 Py_RETURN_NONE;
49 }
50
51 return PyString_FromString(target);
52}
53
54static PyObject *
55func_fnmatch(PyObject *self, PyObject *args) {
56 const char *pattern;
57 const char *str;
58 int flags = 0;
59
60 if (!PyArg_ParseTuple(args, "ss|i", &pattern, &str, &flags)) {
61 return NULL;
62 }
63
64 // NOTE: Testing for __GLIBC__ is the version detection anti-pattern. We
65 // should really use feature detection in our configure script. But I plan
66 // to get rid of the dependency on FNM_EXTMATCH because it doesn't work on
67 // musl libc (or OS X). Instead we should compile extended globs to extended
68 // regex syntax.
69#ifdef __GLIBC__
70 flags |= FNM_EXTMATCH;
71#else
72 debug("Warning: FNM_EXTMATCH is not defined");
73#endif
74
75 int ret = fnmatch(pattern, str, flags);
76
77 switch (ret) {
78 case 0:
79 debug("matched: %s", str);
80 return PyLong_FromLong(1);
81 break;
82 case FNM_NOMATCH:
83 debug("no match: %s", str);
84 return PyLong_FromLong(0);
85 break;
86 default:
87 debug("other error: %s", str);
88 return PyLong_FromLong(-1);
89 break;
90 }
91}
92
93// error callback to glob()
94//
95// Disabled because of spurious errors. For example, sed -i s/.*// (without
96// quotes) is OK, but it would be treated as a glob, and prints an error if the
97// directory 's' doesn't exist.
98//
99// Bash does its own globbing -- it doesn't use libc. Likewise, I think dash
100// and mksh do their own globbing.
101
102int globerr(const char *path, int errno_) {
103 fprintf(stderr, "globerr: %s: %s\n", path, strerror(errno_));
104 return 0; // let glob() keep going
105}
106
107static PyObject *
108func_glob(PyObject *self, PyObject *args) {
109 const char* pattern;
110 if (!PyArg_ParseTuple(args, "s", &pattern)) {
111 return NULL;
112 }
113
114 glob_t results;
115 // Hm, it's weird that the first one can't be called with GLOB_APPEND. You
116 // get a segfault.
117 int flags = 0;
118 // int flags = GLOB_APPEND;
119 //flags |= GLOB_NOMAGIC;
120 int ret = glob(pattern, flags, NULL, &results);
121
122 const char *err_str = NULL;
123 switch (ret) {
124 case 0: // no error
125 break;
126 case GLOB_ABORTED:
127 err_str = "read error";
128 break;
129 case GLOB_NOMATCH:
130 // No error, because not matching isn't necessarily a problem.
131 // NOTE: This can be turned on to log overaggressive calls to glob().
132 //err_str = "nothing matched";
133 break;
134 case GLOB_NOSPACE:
135 err_str = "no dynamic memory";
136 break;
137 default:
138 err_str = "unknown problem";
139 break;
140 }
141 if (err_str) {
142 //fprintf(stderr, "func_glob: %s: %s\n", pattern, err_str);
143 PyErr_SetString(PyExc_RuntimeError, err_str);
144 return NULL;
145 }
146
147 // http://stackoverflow.com/questions/3512414/does-this-pylist-appendlist-py-buildvalue-leak
148 size_t n = results.gl_pathc;
149 PyObject* matches = PyList_New(n);
150
151 // Print array of results
152 size_t i;
153 for (i = 0; i < n; i++) {
154 //printf("%s\n", results.gl_pathv[i]);
155 PyObject* m = Py_BuildValue("s", results.gl_pathv[i]);
156 PyList_SetItem(matches, i, m);
157 }
158 globfree(&results);
159
160 return matches;
161}
162
163static PyObject *
164func_regex_search(PyObject *self, PyObject *args) {
165 const char* pattern;
166 const char* str;
167 int cflags = 0;
168 int eflags = 0;
169 int pos = 0;
170
171 if (!PyArg_ParseTuple(args, "sisi|i", &pattern, &cflags, &str, &eflags, &pos)) {
172 return NULL;
173 }
174
175 cflags |= REG_EXTENDED;
176 regex_t pat;
177 int status = regcomp(&pat, pattern, cflags);
178 if (status != 0) {
179 char error_desc[50];
180 regerror(status, &pat, error_desc, 50);
181
182 char error_message[80];
183 snprintf(error_message, 80, "Invalid regex %s (%s)", pattern, error_desc);
184
185 PyErr_SetString(PyExc_ValueError, error_message);
186 return NULL;
187 }
188
189 int num_groups = pat.re_nsub + 1;
190 PyObject *ret = PyList_New(num_groups * 2);
191
192 if (ret == NULL) {
193 regfree(&pat);
194 return NULL;
195 }
196
197 regmatch_t *pmatch = (regmatch_t*) malloc(sizeof(regmatch_t) * num_groups);
198 int match = regexec(&pat, str + pos, num_groups, pmatch, eflags);
199 if (match == 0) {
200 int i;
201 for (i = 0; i < num_groups; i++) {
202 int start = pmatch[i].rm_so;
203 if (start != -1) {
204 start += pos;
205 }
206 PyList_SetItem(ret, 2*i, PyInt_FromLong(start));
207
208 int end = pmatch[i].rm_eo;
209 if (end != -1) {
210 end += pos;
211 }
212 PyList_SetItem(ret, 2*i + 1, PyInt_FromLong(end));
213 }
214 }
215
216 free(pmatch);
217 regfree(&pat);
218
219 if (match != 0) {
220 Py_RETURN_NONE;
221 }
222
223 return ret;
224}
225
226// For ${//}, the number of groups is always 1, so we want 2 match position
227// results -- the whole regex (which we ignore), and then first group.
228//
229// For [[ =~ ]], do we need to count how many matches the user gave?
230
231#define NMATCH 2
232
233static PyObject *
234func_regex_first_group_match(PyObject *self, PyObject *args) {
235 const char* pattern;
236 const char* str;
237 int pos;
238 if (!PyArg_ParseTuple(args, "ssi", &pattern, &str, &pos)) {
239 return NULL;
240 }
241
242 regex_t pat;
243 regmatch_t m[NMATCH];
244
245 // Could have been checked by regex_parse for [[ =~ ]], but not for glob
246 // patterns like ${foo/x*/y}.
247
248 int status = regcomp(&pat, pattern, REG_EXTENDED);
249 if (status != 0) {
250 char error_string[80];
251 regerror(status, &pat, error_string, 80);
252 PyErr_SetString(PyExc_RuntimeError, error_string);
253 return NULL;
254 }
255
256 debug("first_group_match pat %s str %s pos %d", pattern, str, pos);
257
258 // Match at offset 'pos'
259 int result = regexec(&pat, str + pos, NMATCH, m, 0 /*flags*/);
260 regfree(&pat);
261
262 if (result != 0) {
263 Py_RETURN_NONE; // no match
264 }
265
266 // Assume there is a match
267 regoff_t start = m[1].rm_so;
268 regoff_t end = m[1].rm_eo;
269 return Py_BuildValue("(i,i)", pos + start, pos + end);
270}
271
272// We do this in C so we can remove '%f' % 0.1 from the CPython build. That
273// involves dtoa.c and pystrod.c, which are thousands of lines of code.
274static PyObject *
275func_print_time(PyObject *self, PyObject *args) {
276 double real, user, sys;
277 if (!PyArg_ParseTuple(args, "ddd", &real, &user, &sys)) {
278 return NULL;
279 }
280 fprintf(stderr, "real\t%.3f\n", real);
281 fprintf(stderr, "user\t%.3f\n", user);
282 fprintf(stderr, "sys\t%.3f\n", sys);
283 Py_RETURN_NONE;
284}
285
286// A copy of socket.gethostname() from socketmodule.c. That module brings in
287// too many dependencies.
288
289static PyObject *errno_error;
290
291static PyObject *
292socket_gethostname(PyObject *self, PyObject *unused)
293{
294 char buf[1024];
295 int res;
296 Py_BEGIN_ALLOW_THREADS
297 res = gethostname(buf, (int) sizeof buf - 1);
298 //res = gethostname(buf, 0); // For testing errors
299 Py_END_ALLOW_THREADS
300 if (res < 0)
301 return PyErr_SetFromErrno(errno_error);
302 buf[sizeof buf - 1] = '\0';
303 return PyString_FromString(buf);
304}
305
306static PyObject *
307func_get_terminal_width(PyObject *self, PyObject *unused) {
308 struct winsize w;
309 int res;
310 res = ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
311 if (res < 0)
312 return PyErr_SetFromErrno(errno_error);
313 return PyLong_FromLong(w.ws_col);
314}
315
316static PyObject *
317func_wcswidth(PyObject *self, PyObject *args){
318 char *string;
319 if (!PyArg_ParseTuple(args, "s", &string)) {
320 return NULL;
321 }
322
323 int num_wide_chars = mbstowcs(NULL, string, 0);
324 if (num_wide_chars == -1) {
325 PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 1");
326 return NULL;
327 }
328 int buf_size = (num_wide_chars + 1) * sizeof(wchar_t);
329 wchar_t* wide_chars = (wchar_t*)malloc(buf_size);
330 assert(wide_chars != NULL);
331
332 num_wide_chars = mbstowcs(wide_chars, string, num_wide_chars);
333 if (num_wide_chars == -1) {
334 PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 2");
335 return NULL;
336 }
337
338 int width = wcswidth(wide_chars, num_wide_chars);
339 if (width == -1) {
340 PyErr_SetString(PyExc_UnicodeError, "wcswidth()");
341 return NULL;
342 }
343
344 return PyInt_FromLong(width);
345}
346
347static PyObject *
348func_cpython_reset_locale(PyObject *self, PyObject *unused)
349{
350 // From man setlocale:
351 // The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
352 // On startup of the main program, the portable "C" locale is selected as default.
353
354 // Python overrides this, so we set it back.
355 if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) {
356 // Our CI machines don't work with C.UTF-8, even though it's supposed
357 // to exist?
358 if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) {
359 PyErr_SetString(PyExc_SystemError, "Couldn't set locale to C.UTF-8 or en_US.UTF-8");
360 return NULL;
361 }
362 }
363 Py_RETURN_NONE;
364}
365
366#ifdef OVM_MAIN
367#include "pyext/libc.c/methods.def"
368#else
369static PyMethodDef methods[] = {
370 // Return the canonical version of a path with symlinks, or None if there is
371 // an error.
372 {"realpath", func_realpath, METH_VARARGS, ""},
373
374 // Return whether a string matches a pattern."
375 {"fnmatch", func_fnmatch, METH_VARARGS, ""},
376
377 // Return a list of files that match a pattern.
378 // We need this since Python's glob doesn't have char classes.
379 {"glob", func_glob, METH_VARARGS, ""},
380
381 // Search a string for regex. Returns a list of matches, None if no
382 // match. Raises RuntimeError if the regex is invalid.
383 {"regex_search", func_regex_search, METH_VARARGS, ""},
384
385 // If the regex matches the string, return the start and end position of the
386 // first group. Returns None if there is no match. Raises RuntimeError if
387 // the regex is invalid.
388 {"regex_first_group_match", func_regex_first_group_match, METH_VARARGS, ""},
389
390 // "Print three floating point values for the 'time' builtin.
391 {"print_time", func_print_time, METH_VARARGS, ""},
392
393 {"gethostname", socket_gethostname, METH_NOARGS, ""},
394
395 // ioctl() to get the terminal width.
396 {"get_terminal_width", func_get_terminal_width, METH_NOARGS, ""},
397
398 // Get the display width of a string. Throw an exception if the string is invalid UTF8.
399 {"wcswidth", func_wcswidth, METH_VARARGS, ""},
400
401 // Workaround for CPython's calling setlocale() in pythonrun.c. ONLY used
402 // by tests and bin/oil.py.
403 {"cpython_reset_locale", func_cpython_reset_locale, METH_NOARGS, ""},
404 {NULL, NULL},
405};
406#endif
407
408void initlibc(void) {
409 PyObject *module;
410
411 module = Py_InitModule("libc", methods);
412 if (module != NULL) {
413 PyModule_AddIntConstant(module, "FNM_CASEFOLD", FNM_CASEFOLD);
414 PyModule_AddIntConstant(module, "REG_ICASE", REG_ICASE);
415 PyModule_AddIntConstant(module, "REG_NEWLINE", REG_NEWLINE);
416 PyModule_AddIntConstant(module, "REG_NOTBOL", REG_NOTBOL);
417 }
418
419 errno_error = PyErr_NewException("libc.error",
420 PyExc_IOError, NULL);
421}