| 1 | /*
 | 
| 2 |  * Python interface to libc functions.
 | 
| 3 |  */
 | 
| 4 | 
 | 
| 5 | // - Enable GNU extensions in fnmatch.h for extended glob.
 | 
| 6 | // - It's also apparently needed for wchar.h in combination with Python.
 | 
| 7 | //   https://github.com/python-pillow/Pillow/issues/1850
 | 
| 8 | //   - It's currently hard-coded in pyconfig.h.
 | 
| 9 | #define _GNU_SOURCE 1
 | 
| 10 | 
 | 
| 11 | #include <stdarg.h>  // va_list, etc.
 | 
| 12 | #include <stdio.h>  // printf
 | 
| 13 | #include <limits.h>
 | 
| 14 | #include <wchar.h>
 | 
| 15 | #include <stdlib.h>
 | 
| 16 | #include <sys/ioctl.h>
 | 
| 17 | #include <locale.h>
 | 
| 18 | #include <fnmatch.h>
 | 
| 19 | #include <glob.h>
 | 
| 20 | #include <regex.h>
 | 
| 21 | 
 | 
| 22 | #include <Python.h>
 | 
| 23 | 
 | 
| 24 | // Log messages to stderr.
 | 
| 25 | static void debug(const char* fmt, ...) {
 | 
| 26 | #ifdef LIBC_VERBOSE
 | 
| 27 |   va_list args;
 | 
| 28 |   va_start(args, fmt);
 | 
| 29 |   vfprintf(stderr, fmt, args);
 | 
| 30 |   va_end(args);
 | 
| 31 |   fprintf(stderr, "\n");
 | 
| 32 | #endif
 | 
| 33 | }
 | 
| 34 | 
 | 
| 35 | static PyObject *
 | 
| 36 | func_realpath(PyObject *self, PyObject *args) {
 | 
| 37 |   const char *symlink;
 | 
| 38 | 
 | 
| 39 |   if (!PyArg_ParseTuple(args, "s", &symlink)) {
 | 
| 40 |     return NULL;
 | 
| 41 |   }
 | 
| 42 |   char target[PATH_MAX + 1];
 | 
| 43 |   char *status = realpath(symlink, target);
 | 
| 44 | 
 | 
| 45 |   // TODO: Throw exception like IOError here
 | 
| 46 |   if (status == NULL) {
 | 
| 47 |     debug("error from realpath()");
 | 
| 48 |     Py_RETURN_NONE;
 | 
| 49 |   }
 | 
| 50 | 
 | 
| 51 |   return PyString_FromString(target);
 | 
| 52 | }
 | 
| 53 | 
 | 
| 54 | static PyObject *
 | 
| 55 | func_fnmatch(PyObject *self, PyObject *args) {
 | 
| 56 |   const char *pattern;
 | 
| 57 |   const char *str;
 | 
| 58 |   int flags = 0;
 | 
| 59 | 
 | 
| 60 |   if (!PyArg_ParseTuple(args, "ss|i", &pattern, &str, &flags)) {
 | 
| 61 |     return NULL;
 | 
| 62 |   }
 | 
| 63 | 
 | 
| 64 |   // NOTE: Testing for __GLIBC__ is the version detection anti-pattern.  We
 | 
| 65 |   // should really use feature detection in our configure script.  But I plan
 | 
| 66 |   // to get rid of the dependency on FNM_EXTMATCH because it doesn't work on
 | 
| 67 |   // musl libc (or OS X).  Instead we should compile extended globs to extended
 | 
| 68 |   // regex syntax.
 | 
| 69 | #ifdef __GLIBC__
 | 
| 70 |   flags |= FNM_EXTMATCH;
 | 
| 71 | #else
 | 
| 72 |   debug("Warning: FNM_EXTMATCH is not defined");
 | 
| 73 | #endif
 | 
| 74 | 
 | 
| 75 |   int ret = fnmatch(pattern, str, flags);
 | 
| 76 | 
 | 
| 77 |   switch (ret) {
 | 
| 78 |   case 0:
 | 
| 79 |     debug("matched: %s", str);
 | 
| 80 |     return PyLong_FromLong(1);
 | 
| 81 |     break;
 | 
| 82 |   case FNM_NOMATCH:
 | 
| 83 |     debug("no match: %s", str);
 | 
| 84 |     return PyLong_FromLong(0);
 | 
| 85 |     break;
 | 
| 86 |   default:
 | 
| 87 |     debug("other error: %s", str);
 | 
| 88 |     return PyLong_FromLong(-1);
 | 
| 89 |     break;
 | 
| 90 |   }
 | 
| 91 | }
 | 
| 92 | 
 | 
| 93 | // error callback to glob()
 | 
| 94 | //
 | 
| 95 | // Disabled because of spurious errors.  For example, sed -i s/.*// (without
 | 
| 96 | // quotes) is OK, but it would be treated as a glob, and prints an error if the
 | 
| 97 | // directory 's' doesn't exist.
 | 
| 98 | //
 | 
| 99 | // Bash does its own globbing -- it doesn't use libc.  Likewise, I think dash
 | 
| 100 | // and mksh do their own globbing.
 | 
| 101 | 
 | 
| 102 | int globerr(const char *path, int errno_) {
 | 
| 103 |   fprintf(stderr, "globerr: %s: %s\n", path, strerror(errno_));
 | 
| 104 |   return 0;  // let glob() keep going
 | 
| 105 | }
 | 
| 106 | 
 | 
| 107 | static PyObject *
 | 
| 108 | func_glob(PyObject *self, PyObject *args) {
 | 
| 109 |   const char* pattern;
 | 
| 110 |   if (!PyArg_ParseTuple(args, "s", &pattern)) {
 | 
| 111 |     return NULL;
 | 
| 112 |   }
 | 
| 113 | 
 | 
| 114 |   glob_t results;
 | 
| 115 |   // Hm, it's weird that the first one can't be called with GLOB_APPEND.  You
 | 
| 116 |   // get a segfault.
 | 
| 117 |   int flags = 0;
 | 
| 118 |   // int flags = GLOB_APPEND;
 | 
| 119 |   //flags |= GLOB_NOMAGIC;
 | 
| 120 |   int ret = glob(pattern, flags, NULL, &results);
 | 
| 121 | 
 | 
| 122 |   const char *err_str = NULL;
 | 
| 123 |   switch (ret) {
 | 
| 124 |   case 0:  // no error
 | 
| 125 |     break;
 | 
| 126 |   case GLOB_ABORTED:
 | 
| 127 |     err_str = "read error";
 | 
| 128 |     break;
 | 
| 129 |   case GLOB_NOMATCH:
 | 
| 130 |     // No error, because not matching isn't necessarily a problem.
 | 
| 131 |     // NOTE: This can be turned on to log overaggressive calls to glob().
 | 
| 132 |     //err_str = "nothing matched";
 | 
| 133 |     break;
 | 
| 134 |   case GLOB_NOSPACE:
 | 
| 135 |     err_str = "no dynamic memory";
 | 
| 136 |     break;
 | 
| 137 |   default:
 | 
| 138 |     err_str = "unknown problem";
 | 
| 139 |     break;
 | 
| 140 |   }
 | 
| 141 |   if (err_str) {
 | 
| 142 |     //fprintf(stderr, "func_glob: %s: %s\n", pattern, err_str);
 | 
| 143 |     PyErr_SetString(PyExc_RuntimeError, err_str);
 | 
| 144 |     return NULL;
 | 
| 145 |   }
 | 
| 146 | 
 | 
| 147 |   // http://stackoverflow.com/questions/3512414/does-this-pylist-appendlist-py-buildvalue-leak
 | 
| 148 |   size_t n = results.gl_pathc;
 | 
| 149 |   PyObject* matches = PyList_New(n);
 | 
| 150 | 
 | 
| 151 |   // Print array of results
 | 
| 152 |   size_t i;
 | 
| 153 |   for (i = 0; i < n; i++) {
 | 
| 154 |     //printf("%s\n", results.gl_pathv[i]);
 | 
| 155 |     PyObject* m = Py_BuildValue("s", results.gl_pathv[i]);
 | 
| 156 |     PyList_SetItem(matches, i, m);
 | 
| 157 |   }
 | 
| 158 |   globfree(&results);
 | 
| 159 | 
 | 
| 160 |   return matches;
 | 
| 161 | }
 | 
| 162 | 
 | 
| 163 | static PyObject *
 | 
| 164 | func_regex_search(PyObject *self, PyObject *args) {
 | 
| 165 |   const char* pattern;
 | 
| 166 |   const char* str;
 | 
| 167 |   int cflags = 0;
 | 
| 168 |   int eflags = 0;
 | 
| 169 |   int pos = 0;
 | 
| 170 | 
 | 
| 171 |   if (!PyArg_ParseTuple(args, "sisi|i", &pattern, &cflags, &str, &eflags, &pos)) {
 | 
| 172 |     return NULL;
 | 
| 173 |   }
 | 
| 174 | 
 | 
| 175 |   cflags |= REG_EXTENDED;
 | 
| 176 |   regex_t pat;
 | 
| 177 |   int status = regcomp(&pat, pattern, cflags);
 | 
| 178 |   if (status != 0) {
 | 
| 179 |     char error_desc[50];
 | 
| 180 |     regerror(status, &pat, error_desc, 50);
 | 
| 181 | 
 | 
| 182 |     char error_message[80];
 | 
| 183 |     snprintf(error_message, 80, "Invalid regex %s (%s)", pattern, error_desc);
 | 
| 184 | 
 | 
| 185 |     PyErr_SetString(PyExc_ValueError, error_message);
 | 
| 186 |     return NULL;
 | 
| 187 |   }
 | 
| 188 | 
 | 
| 189 |   int num_groups = pat.re_nsub + 1;
 | 
| 190 |   PyObject *ret = PyList_New(num_groups * 2);
 | 
| 191 | 
 | 
| 192 |   if (ret == NULL) {
 | 
| 193 |     regfree(&pat);
 | 
| 194 |     return NULL;
 | 
| 195 |   }
 | 
| 196 | 
 | 
| 197 |   regmatch_t *pmatch = (regmatch_t*) malloc(sizeof(regmatch_t) * num_groups);
 | 
| 198 |   int match = regexec(&pat, str + pos, num_groups, pmatch, eflags);
 | 
| 199 |   if (match == 0) {
 | 
| 200 |     int i;
 | 
| 201 |     for (i = 0; i < num_groups; i++) {
 | 
| 202 |       int start = pmatch[i].rm_so;
 | 
| 203 |       if (start != -1) {
 | 
| 204 |         start += pos;
 | 
| 205 |       }
 | 
| 206 |       PyList_SetItem(ret, 2*i, PyInt_FromLong(start));
 | 
| 207 | 
 | 
| 208 |       int end = pmatch[i].rm_eo;
 | 
| 209 |       if (end != -1) {
 | 
| 210 |         end += pos;
 | 
| 211 |       }
 | 
| 212 |       PyList_SetItem(ret, 2*i + 1, PyInt_FromLong(end));
 | 
| 213 |     }
 | 
| 214 |   }
 | 
| 215 | 
 | 
| 216 |   free(pmatch);
 | 
| 217 |   regfree(&pat);
 | 
| 218 | 
 | 
| 219 |   if (match != 0) {
 | 
| 220 |     Py_RETURN_NONE;
 | 
| 221 |   }
 | 
| 222 | 
 | 
| 223 |   return ret;
 | 
| 224 | }
 | 
| 225 | 
 | 
| 226 | // For ${//}, the number of groups is always 1, so we want 2 match position
 | 
| 227 | // results -- the whole regex (which we ignore), and then first group.
 | 
| 228 | //
 | 
| 229 | // For [[ =~ ]], do we need to count how many matches the user gave?
 | 
| 230 | 
 | 
| 231 | #define NMATCH 2
 | 
| 232 | 
 | 
| 233 | static PyObject *
 | 
| 234 | func_regex_first_group_match(PyObject *self, PyObject *args) {
 | 
| 235 |   const char* pattern;
 | 
| 236 |   const char* str;
 | 
| 237 |   int pos;
 | 
| 238 |   if (!PyArg_ParseTuple(args, "ssi", &pattern, &str, &pos)) {
 | 
| 239 |     return NULL;
 | 
| 240 |   }
 | 
| 241 | 
 | 
| 242 |   regex_t pat;
 | 
| 243 |   regmatch_t m[NMATCH];
 | 
| 244 | 
 | 
| 245 |   // Could have been checked by regex_parse for [[ =~ ]], but not for glob
 | 
| 246 |   // patterns like ${foo/x*/y}.
 | 
| 247 | 
 | 
| 248 |   int status = regcomp(&pat, pattern, REG_EXTENDED);
 | 
| 249 |   if (status != 0) {
 | 
| 250 |     char error_string[80];
 | 
| 251 |     regerror(status, &pat, error_string, 80);
 | 
| 252 |     PyErr_SetString(PyExc_RuntimeError, error_string);
 | 
| 253 |     return NULL;
 | 
| 254 |   }
 | 
| 255 | 
 | 
| 256 |   debug("first_group_match pat %s str %s pos %d", pattern, str, pos);
 | 
| 257 | 
 | 
| 258 |   // Match at offset 'pos'
 | 
| 259 |   int result = regexec(&pat, str + pos, NMATCH, m, 0 /*flags*/);
 | 
| 260 |   regfree(&pat);
 | 
| 261 | 
 | 
| 262 |   if (result != 0) {
 | 
| 263 |     Py_RETURN_NONE;  // no match
 | 
| 264 |   }
 | 
| 265 | 
 | 
| 266 |   // Assume there is a match
 | 
| 267 |   regoff_t start = m[1].rm_so;
 | 
| 268 |   regoff_t end = m[1].rm_eo;
 | 
| 269 |   return Py_BuildValue("(i,i)", pos + start, pos + end);
 | 
| 270 | }
 | 
| 271 | 
 | 
| 272 | // We do this in C so we can remove '%f' % 0.1 from the CPython build.  That
 | 
| 273 | // involves dtoa.c and pystrod.c, which are thousands of lines of code.
 | 
| 274 | static PyObject *
 | 
| 275 | func_print_time(PyObject *self, PyObject *args) {
 | 
| 276 |   double real, user, sys;
 | 
| 277 |   if (!PyArg_ParseTuple(args, "ddd", &real, &user, &sys)) {
 | 
| 278 |     return NULL;
 | 
| 279 |   }
 | 
| 280 |   fprintf(stderr, "real\t%.3f\n", real);
 | 
| 281 |   fprintf(stderr, "user\t%.3f\n",  user);
 | 
| 282 |   fprintf(stderr, "sys\t%.3f\n", sys);
 | 
| 283 |   Py_RETURN_NONE;
 | 
| 284 | }
 | 
| 285 | 
 | 
| 286 | // A copy of socket.gethostname() from socketmodule.c.  That module brings in
 | 
| 287 | // too many dependencies.
 | 
| 288 | 
 | 
| 289 | static PyObject *errno_error;
 | 
| 290 | 
 | 
| 291 | static PyObject *
 | 
| 292 | socket_gethostname(PyObject *self, PyObject *unused)
 | 
| 293 | {
 | 
| 294 |     char buf[1024];
 | 
| 295 |     int res;
 | 
| 296 |     Py_BEGIN_ALLOW_THREADS
 | 
| 297 |     res = gethostname(buf, (int) sizeof buf - 1);
 | 
| 298 |     //res = gethostname(buf, 0);  // For testing errors
 | 
| 299 |     Py_END_ALLOW_THREADS
 | 
| 300 |     if (res < 0)
 | 
| 301 |         return PyErr_SetFromErrno(errno_error);
 | 
| 302 |     buf[sizeof buf - 1] = '\0';
 | 
| 303 |     return PyString_FromString(buf);
 | 
| 304 | }
 | 
| 305 | 
 | 
| 306 | static PyObject *
 | 
| 307 | func_get_terminal_width(PyObject *self, PyObject *unused) {
 | 
| 308 |   struct winsize w;
 | 
| 309 |   int res;
 | 
| 310 |   res = ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
 | 
| 311 |   if (res < 0)
 | 
| 312 |     return PyErr_SetFromErrno(errno_error);
 | 
| 313 |   return PyLong_FromLong(w.ws_col);
 | 
| 314 | }
 | 
| 315 | 
 | 
| 316 | static PyObject *
 | 
| 317 | func_wcswidth(PyObject *self, PyObject *args){
 | 
| 318 |     char *string;
 | 
| 319 |     if (!PyArg_ParseTuple(args, "s", &string)) {
 | 
| 320 |         return NULL;
 | 
| 321 |     }
 | 
| 322 | 
 | 
| 323 |     int num_wide_chars = mbstowcs(NULL, string, 0);
 | 
| 324 |     if (num_wide_chars == -1) {
 | 
| 325 |         PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 1");
 | 
| 326 |         return NULL;
 | 
| 327 |     }
 | 
| 328 |     int buf_size = (num_wide_chars + 1) * sizeof(wchar_t);
 | 
| 329 |     wchar_t* wide_chars = (wchar_t*)malloc(buf_size);
 | 
| 330 |     assert(wide_chars != NULL);
 | 
| 331 | 
 | 
| 332 |     num_wide_chars = mbstowcs(wide_chars, string, num_wide_chars);
 | 
| 333 |     if (num_wide_chars == -1) {
 | 
| 334 |         PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 2");
 | 
| 335 |         return NULL;
 | 
| 336 |     }
 | 
| 337 | 
 | 
| 338 |     int width = wcswidth(wide_chars, num_wide_chars);
 | 
| 339 |     if (width == -1) {
 | 
| 340 |         PyErr_SetString(PyExc_UnicodeError, "wcswidth()");
 | 
| 341 |         return NULL;
 | 
| 342 |     }
 | 
| 343 | 
 | 
| 344 |     return PyInt_FromLong(width);
 | 
| 345 | }
 | 
| 346 | 
 | 
| 347 | static PyObject *
 | 
| 348 | func_cpython_reset_locale(PyObject *self, PyObject *unused)
 | 
| 349 | {
 | 
| 350 |     // From man setlocale:
 | 
| 351 |     //   The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
 | 
| 352 |     //   On startup of the main program, the portable "C" locale is selected as default.
 | 
| 353 | 
 | 
| 354 |     // Python overrides this, so we set it back.
 | 
| 355 |     if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) {
 | 
| 356 |         // Our CI machines don't work with C.UTF-8, even though it's supposed
 | 
| 357 |         // to exist?
 | 
| 358 |         if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) {
 | 
| 359 |             PyErr_SetString(PyExc_SystemError, "Couldn't set locale to C.UTF-8 or en_US.UTF-8");
 | 
| 360 |             return NULL;
 | 
| 361 |         }
 | 
| 362 |     }
 | 
| 363 |     Py_RETURN_NONE;
 | 
| 364 | }
 | 
| 365 | 
 | 
| 366 | #ifdef OVM_MAIN
 | 
| 367 | #include "pyext/libc.c/methods.def"
 | 
| 368 | #else
 | 
| 369 | static PyMethodDef methods[] = {
 | 
| 370 |   // Return the canonical version of a path with symlinks, or None if there is
 | 
| 371 |   // an error.
 | 
| 372 |   {"realpath", func_realpath, METH_VARARGS, ""},
 | 
| 373 | 
 | 
| 374 |   // Return whether a string matches a pattern."
 | 
| 375 |   {"fnmatch", func_fnmatch, METH_VARARGS, ""},
 | 
| 376 | 
 | 
| 377 |   // Return a list of files that match a pattern.
 | 
| 378 |   // We need this since Python's glob doesn't have char classes.
 | 
| 379 |   {"glob", func_glob, METH_VARARGS, ""},
 | 
| 380 | 
 | 
| 381 |   // Search a string for regex.  Returns a list of matches, None if no
 | 
| 382 |   // match.  Raises RuntimeError if the regex is invalid.
 | 
| 383 |   {"regex_search", func_regex_search, METH_VARARGS, ""},
 | 
| 384 | 
 | 
| 385 |   // If the regex matches the string, return the start and end position of the
 | 
| 386 |   // first group.  Returns None if there is no match.  Raises RuntimeError if
 | 
| 387 |   // the regex is invalid.
 | 
| 388 |   {"regex_first_group_match", func_regex_first_group_match, METH_VARARGS, ""},
 | 
| 389 | 
 | 
| 390 |   // "Print three floating point values for the 'time' builtin.
 | 
| 391 |   {"print_time", func_print_time, METH_VARARGS, ""},
 | 
| 392 | 
 | 
| 393 |   {"gethostname", socket_gethostname, METH_NOARGS, ""},
 | 
| 394 | 
 | 
| 395 |   // ioctl() to get the terminal width.
 | 
| 396 |   {"get_terminal_width", func_get_terminal_width, METH_NOARGS, ""},
 | 
| 397 | 
 | 
| 398 |   // Get the display width of a string. Throw an exception if the string is invalid UTF8.
 | 
| 399 |   {"wcswidth", func_wcswidth, METH_VARARGS, ""},
 | 
| 400 | 
 | 
| 401 |   // Workaround for CPython's calling setlocale() in pythonrun.c.  ONLY used
 | 
| 402 |   // by tests and bin/oil.py.
 | 
| 403 |   {"cpython_reset_locale", func_cpython_reset_locale, METH_NOARGS, ""},
 | 
| 404 |   {NULL, NULL},
 | 
| 405 | };
 | 
| 406 | #endif
 | 
| 407 | 
 | 
| 408 | void initlibc(void) {
 | 
| 409 |   PyObject *module;
 | 
| 410 | 
 | 
| 411 |   module = Py_InitModule("libc", methods);
 | 
| 412 |   if (module != NULL) {
 | 
| 413 |       PyModule_AddIntConstant(module, "FNM_CASEFOLD", FNM_CASEFOLD);
 | 
| 414 |       PyModule_AddIntConstant(module, "REG_ICASE", REG_ICASE);
 | 
| 415 |       PyModule_AddIntConstant(module, "REG_NEWLINE", REG_NEWLINE);
 | 
| 416 |       PyModule_AddIntConstant(module, "REG_NOTBOL", REG_NOTBOL);
 | 
| 417 |   }
 | 
| 418 | 
 | 
| 419 |   errno_error = PyErr_NewException("libc.error",
 | 
| 420 |                                     PyExc_IOError, NULL);
 | 
| 421 | }
 |