| 1 | // libc.cc: Replacement for pyext/libc.c
 | 
| 2 | 
 | 
| 3 | #include "cpp/libc.h"
 | 
| 4 | 
 | 
| 5 | #include <errno.h>
 | 
| 6 | #include <fnmatch.h>
 | 
| 7 | #include <glob.h>
 | 
| 8 | #include <locale.h>
 | 
| 9 | #include <regex.h>
 | 
| 10 | #include <sys/ioctl.h>
 | 
| 11 | #include <unistd.h>  // gethostname()
 | 
| 12 | #include <wchar.h>
 | 
| 13 | 
 | 
| 14 | namespace libc {
 | 
| 15 | 
 | 
| 16 | BigStr* gethostname() {
 | 
| 17 |   // Note: Fixed issue #1656 - OS X and FreeBSD don't have HOST_NAME_MAX
 | 
| 18 |   // https://reviews.freebsd.org/D30062
 | 
| 19 |   BigStr* result = OverAllocatedStr(_POSIX_HOST_NAME_MAX);
 | 
| 20 |   int status = ::gethostname(result->data_, _POSIX_HOST_NAME_MAX);
 | 
| 21 |   if (status != 0) {
 | 
| 22 |     throw Alloc<OSError>(errno);
 | 
| 23 |   }
 | 
| 24 |   // Important: set the length of the string!
 | 
| 25 |   result->MaybeShrink(strlen(result->data_));
 | 
| 26 |   return result;
 | 
| 27 | }
 | 
| 28 | 
 | 
| 29 | BigStr* realpath(BigStr* path) {
 | 
| 30 |   BigStr* result = OverAllocatedStr(PATH_MAX);
 | 
| 31 |   char* p = ::realpath(path->data_, result->data_);
 | 
| 32 |   if (p == nullptr) {
 | 
| 33 |     throw Alloc<OSError>(errno);
 | 
| 34 |   }
 | 
| 35 |   result->MaybeShrink(strlen(result->data_));
 | 
| 36 |   return result;
 | 
| 37 | }
 | 
| 38 | 
 | 
| 39 | int fnmatch(BigStr* pat, BigStr* str, int flags) {
 | 
| 40 | #ifdef FNM_EXTMATCH
 | 
| 41 |   flags |= FNM_EXTMATCH;
 | 
| 42 | #else
 | 
| 43 |   // TODO: We should detect this at ./configure time, and then maybe flag these
 | 
| 44 |   // at parse time, not runtime
 | 
| 45 | #endif
 | 
| 46 | 
 | 
| 47 |   int result = ::fnmatch(pat->data_, str->data_, flags);
 | 
| 48 |   switch (result) {
 | 
| 49 |   case 0:
 | 
| 50 |     return 1;
 | 
| 51 |   case FNM_NOMATCH:
 | 
| 52 |     return 0;
 | 
| 53 |   default:
 | 
| 54 |     // Other error
 | 
| 55 |     return -1;
 | 
| 56 |   }
 | 
| 57 | }
 | 
| 58 | 
 | 
| 59 | List<BigStr*>* glob(BigStr* pat) {
 | 
| 60 |   glob_t results;
 | 
| 61 |   // Hm, it's weird that the first one can't be called with GLOB_APPEND.  You
 | 
| 62 |   // get a segfault.
 | 
| 63 |   int flags = 0;
 | 
| 64 |   // int flags = GLOB_APPEND;
 | 
| 65 |   // flags |= GLOB_NOMAGIC;
 | 
| 66 |   int ret = glob(pat->data_, flags, NULL, &results);
 | 
| 67 | 
 | 
| 68 |   const char* err_str = NULL;
 | 
| 69 |   switch (ret) {
 | 
| 70 |   case 0:  // no error
 | 
| 71 |     break;
 | 
| 72 |   case GLOB_ABORTED:
 | 
| 73 |     err_str = "read error";
 | 
| 74 |     break;
 | 
| 75 |   case GLOB_NOMATCH:
 | 
| 76 |     // No error, because not matching isn't necessarily a problem.
 | 
| 77 |     // NOTE: This can be turned on to log overaggressive calls to glob().
 | 
| 78 |     // err_str = "nothing matched";
 | 
| 79 |     break;
 | 
| 80 |   case GLOB_NOSPACE:
 | 
| 81 |     err_str = "no dynamic memory";
 | 
| 82 |     break;
 | 
| 83 |   default:
 | 
| 84 |     err_str = "unknown problem";
 | 
| 85 |     break;
 | 
| 86 |   }
 | 
| 87 |   if (err_str) {
 | 
| 88 |     throw Alloc<RuntimeError>(StrFromC(err_str));
 | 
| 89 |   }
 | 
| 90 | 
 | 
| 91 |   // http://stackoverflow.com/questions/3512414/does-this-pylist-appendlist-py-buildvalue-leak
 | 
| 92 |   size_t n = results.gl_pathc;
 | 
| 93 |   auto matches = NewList<BigStr*>();
 | 
| 94 | 
 | 
| 95 |   // Print array of results
 | 
| 96 |   size_t i;
 | 
| 97 |   for (i = 0; i < n; i++) {
 | 
| 98 |     const char* m = results.gl_pathv[i];
 | 
| 99 |     matches->append(StrFromC(m));
 | 
| 100 |   }
 | 
| 101 |   globfree(&results);
 | 
| 102 | 
 | 
| 103 |   return matches;
 | 
| 104 | }
 | 
| 105 | 
 | 
| 106 | // Raises RuntimeError if the pattern is invalid.  TODO: Use a different
 | 
| 107 | // exception?
 | 
| 108 | List<int>* regex_search(BigStr* pattern, int cflags, BigStr* str, int eflags,
 | 
| 109 |                         int pos) {
 | 
| 110 |   cflags |= REG_EXTENDED;
 | 
| 111 |   regex_t pat;
 | 
| 112 |   int status = regcomp(&pat, pattern->data_, cflags);
 | 
| 113 |   if (status != 0) {
 | 
| 114 |     char error_desc[50];
 | 
| 115 |     regerror(status, &pat, error_desc, 50);
 | 
| 116 | 
 | 
| 117 |     char error_message[80];
 | 
| 118 |     snprintf(error_message, 80, "Invalid regex %s (%s)", pattern->data_,
 | 
| 119 |              error_desc);
 | 
| 120 | 
 | 
| 121 |     throw Alloc<ValueError>(StrFromC(error_message));
 | 
| 122 |   }
 | 
| 123 |   // log("pat = %d, str = %d", len(pattern), len(str));
 | 
| 124 | 
 | 
| 125 |   int num_groups = pat.re_nsub + 1;  // number of captures
 | 
| 126 | 
 | 
| 127 |   List<int>* indices = NewList<int>();
 | 
| 128 |   indices->reserve(num_groups * 2);
 | 
| 129 | 
 | 
| 130 |   const char* s = str->data_;
 | 
| 131 |   regmatch_t* pmatch =
 | 
| 132 |       static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * num_groups));
 | 
| 133 |   bool match = regexec(&pat, s + pos, num_groups, pmatch, eflags) == 0;
 | 
| 134 |   if (match) {
 | 
| 135 |     int i;
 | 
| 136 |     for (i = 0; i < num_groups; i++) {
 | 
| 137 |       int start = pmatch[i].rm_so;
 | 
| 138 |       if (start != -1) {
 | 
| 139 |         start += pos;
 | 
| 140 |       }
 | 
| 141 |       indices->append(start);
 | 
| 142 | 
 | 
| 143 |       int end = pmatch[i].rm_eo;
 | 
| 144 |       if (end != -1) {
 | 
| 145 |         end += pos;
 | 
| 146 |       }
 | 
| 147 |       indices->append(end);
 | 
| 148 |     }
 | 
| 149 |   }
 | 
| 150 | 
 | 
| 151 |   free(pmatch);
 | 
| 152 |   regfree(&pat);
 | 
| 153 | 
 | 
| 154 |   if (!match) {
 | 
| 155 |     return nullptr;
 | 
| 156 |   }
 | 
| 157 | 
 | 
| 158 |   return indices;
 | 
| 159 | }
 | 
| 160 | 
 | 
| 161 | // For ${//}, the number of groups is always 1, so we want 2 match position
 | 
| 162 | // results -- the whole regex (which we ignore), and then first group.
 | 
| 163 | //
 | 
| 164 | // For [[ =~ ]], do we need to count how many matches the user gave?
 | 
| 165 | 
 | 
| 166 | const int NMATCH = 2;
 | 
| 167 | 
 | 
| 168 | // Odd: This a Tuple2* not Tuple2 because it's Optional[Tuple2]!
 | 
| 169 | Tuple2<int, int>* regex_first_group_match(BigStr* pattern, BigStr* str,
 | 
| 170 |                                           int pos) {
 | 
| 171 |   regex_t pat;
 | 
| 172 |   regmatch_t m[NMATCH];
 | 
| 173 | 
 | 
| 174 |   // Could have been checked by regex_parse for [[ =~ ]], but not for glob
 | 
| 175 |   // patterns like ${foo/x*/y}.
 | 
| 176 | 
 | 
| 177 |   if (regcomp(&pat, pattern->data_, REG_EXTENDED) != 0) {
 | 
| 178 |     throw Alloc<RuntimeError>(
 | 
| 179 |         StrFromC("Invalid regex syntax (func_regex_first_group_match)"));
 | 
| 180 |   }
 | 
| 181 | 
 | 
| 182 |   // Match at offset 'pos'
 | 
| 183 |   int result = regexec(&pat, str->data_ + pos, NMATCH, m, 0 /*flags*/);
 | 
| 184 |   regfree(&pat);
 | 
| 185 | 
 | 
| 186 |   if (result != 0) {
 | 
| 187 |     return nullptr;
 | 
| 188 |   }
 | 
| 189 | 
 | 
| 190 |   // Assume there is a match
 | 
| 191 |   regoff_t start = m[1].rm_so;
 | 
| 192 |   regoff_t end = m[1].rm_eo;
 | 
| 193 |   Tuple2<int, int>* tup = Alloc<Tuple2<int, int>>(pos + start, pos + end);
 | 
| 194 | 
 | 
| 195 |   return tup;
 | 
| 196 | }
 | 
| 197 | 
 | 
| 198 | int wcswidth(BigStr* s) {
 | 
| 199 |   // Behavior of mbstowcs() depends on LC_CTYPE
 | 
| 200 | 
 | 
| 201 |   // Calculate length first
 | 
| 202 |   int num_wide_chars = ::mbstowcs(NULL, s->data_, 0);
 | 
| 203 |   if (num_wide_chars == -1) {
 | 
| 204 |     throw Alloc<UnicodeError>(StrFromC("mbstowcs() 1"));
 | 
| 205 |   }
 | 
| 206 | 
 | 
| 207 |   // Allocate buffer
 | 
| 208 |   int buf_size = (num_wide_chars + 1) * sizeof(wchar_t);
 | 
| 209 |   wchar_t* wide_chars = static_cast<wchar_t*>(malloc(buf_size));
 | 
| 210 |   DCHECK(wide_chars != nullptr);
 | 
| 211 | 
 | 
| 212 |   // Convert to wide chars
 | 
| 213 |   num_wide_chars = ::mbstowcs(wide_chars, s->data_, num_wide_chars);
 | 
| 214 |   if (num_wide_chars == -1) {
 | 
| 215 |     free(wide_chars);  // cleanup
 | 
| 216 | 
 | 
| 217 |     throw Alloc<UnicodeError>(StrFromC("mbstowcs() 2"));
 | 
| 218 |   }
 | 
| 219 | 
 | 
| 220 |   // Find number of columns
 | 
| 221 |   int width = ::wcswidth(wide_chars, num_wide_chars);
 | 
| 222 |   if (width == -1) {
 | 
| 223 |     free(wide_chars);  // cleanup
 | 
| 224 | 
 | 
| 225 |     // unprintable chars
 | 
| 226 |     throw Alloc<UnicodeError>(StrFromC("wcswidth()"));
 | 
| 227 |   }
 | 
| 228 | 
 | 
| 229 |   free(wide_chars);
 | 
| 230 |   return width;
 | 
| 231 | }
 | 
| 232 | 
 | 
| 233 | int get_terminal_width() {
 | 
| 234 |   struct winsize w;
 | 
| 235 |   if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == -1) {
 | 
| 236 |     throw Alloc<IOError>(errno);
 | 
| 237 |   }
 | 
| 238 |   return w.ws_col;
 | 
| 239 | }
 | 
| 240 | 
 | 
| 241 | }  // namespace libc
 |