OILS / cpp / libc_test.cc View on Github | oilshell.org

403 lines, 237 significant
1#include "cpp/libc.h"
2
3#include <locale.h> // setlocale()
4#include <regex.h> // regcomp()
5#include <unistd.h> // gethostname()
6#include <wctype.h> // towupper()
7
8#include "mycpp/runtime.h"
9#include "vendor/greatest.h"
10
11TEST hostname_test() {
12 BigStr* s0 = libc::gethostname();
13 ASSERT(s0 != nullptr);
14
15 char buf[1024];
16 ASSERT(gethostname(buf, HOST_NAME_MAX) == 0);
17 ASSERT(str_equals(s0, StrFromC(buf)));
18
19 PASS();
20}
21
22TEST realpath_test() {
23 BigStr* result = libc::realpath(StrFromC("/"));
24 ASSERT(str_equals(StrFromC("/"), result));
25
26 bool caught = false;
27 try {
28 libc::realpath(StrFromC("/nonexistent_ZZZ"));
29 } catch (IOError_OSError* e) {
30 caught = true;
31 }
32 ASSERT(caught);
33
34 PASS();
35}
36
37TEST libc_test() {
38 log("sizeof(wchar_t) = %d", sizeof(wchar_t));
39
40 int width = 0;
41
42 // TODO: enable this test. Is it not picking LC_CTYPE?
43 // Do we have to do some initialization like libc.cpython_reset_locale() ?
44#if 0
45 try {
46 // mu character \u{03bc} in utf-8
47 width = libc::wcswidth(StrFromC("\xce\xbc"));
48 } catch (UnicodeError* e) {
49 log("UnicodeError %s", e->message->data_);
50 }
51 ASSERT_EQ_FMT(2, width, "%d");
52#endif
53
54 BigStr* h = libc::gethostname();
55 log("gethostname() = %s %d", h->data_, len(h));
56
57 width = libc::wcswidth(StrFromC("foo"));
58 ASSERT_EQ(3, width);
59
60 libc::print_time(0.1, 0.2, 0.3);
61
62 PASS();
63}
64
65static List<BigStr*>* Groups(BigStr* s, List<int>* indices) {
66 List<BigStr*>* groups = NewList<BigStr*>();
67 int n = len(indices) / 2;
68 for (int i = 0; i < n; ++i) {
69 int start = indices->at(2 * i);
70 int end = indices->at(2 * i + 1);
71 if (start == -1) {
72 groups->append(nullptr);
73 } else {
74 groups->append(s->slice(start, end));
75 }
76 }
77 return groups;
78}
79
80TEST regex_wrapper_test() {
81 BigStr* s1 = StrFromC("-abaacaaa");
82 List<int>* indices = libc::regex_search(StrFromC("(a+).(a+)"), 0, s1, 0);
83 List<BigStr*>* results = Groups(s1, indices);
84 ASSERT_EQ_FMT(3, len(results), "%d");
85 ASSERT(str_equals(StrFromC("abaa"), results->at(0))); // whole match
86 ASSERT(str_equals(StrFromC("a"), results->at(1)));
87 ASSERT(str_equals(StrFromC("aa"), results->at(2)));
88
89 indices = libc::regex_search(StrFromC("z+"), 0, StrFromC("abaacaaa"), 0);
90 ASSERT_EQ(nullptr, indices);
91
92 // Alternation gives unmatched group
93 BigStr* s2 = StrFromC("b");
94 indices = libc::regex_search(StrFromC("(a)|(b)"), 0, s2, 0);
95 results = Groups(s2, indices);
96 ASSERT_EQ_FMT(3, len(results), "%d");
97 ASSERT(str_equals(StrFromC("b"), results->at(0))); // whole match
98 ASSERT_EQ(nullptr, results->at(1));
99 ASSERT(str_equals(StrFromC("b"), results->at(2)));
100
101 // Like Unicode test below
102 indices = libc::regex_search(StrFromC("_._"), 0, StrFromC("_x_"), 0);
103 ASSERT(indices != nullptr);
104 ASSERT_EQ_FMT(2, len(indices), "%d");
105 ASSERT_EQ_FMT(0, indices->at(0), "%d");
106 ASSERT_EQ_FMT(3, indices->at(1), "%d");
107
108 // TODO(unicode)
109#if 0
110 //indices = libc::regex_search(StrFromC("_._"), 0, StrFromC("_\u03bc_"), 0);
111 indices = libc::regex_search(StrFromC("_._"), 0, StrFromC("_μ_"), 0);
112 ASSERT(indices != nullptr);
113 ASSERT_EQ_FMT(2, len(indices), "%d");
114 ASSERT_EQ_FMT(0, indices->at(0), "%d");
115 ASSERT_EQ_FMT(0, indices->at(0), "%d");
116#endif
117
118 Tuple2<int, int>* result;
119 BigStr* s = StrFromC("oXooXoooXoX");
120 result = libc::regex_first_group_match(StrFromC("(X.)"), s, 0);
121 ASSERT_EQ_FMT(1, result->at0(), "%d");
122 ASSERT_EQ_FMT(3, result->at1(), "%d");
123
124 result = libc::regex_first_group_match(StrFromC("(X.)"), s, 3);
125 ASSERT_EQ_FMT(4, result->at0(), "%d");
126 ASSERT_EQ_FMT(6, result->at1(), "%d");
127
128 result = libc::regex_first_group_match(StrFromC("(X.)"), s, 6);
129 ASSERT_EQ_FMT(8, result->at0(), "%d");
130 ASSERT_EQ_FMT(10, result->at1(), "%d");
131
132 PASS();
133}
134
135TEST glob_test() {
136 // This depends on the file system
137 auto files = libc::glob(StrFromC("*.testdata"));
138 // 3 files are made by the shell wrapper
139 ASSERT_EQ_FMT(3, len(files), "%d");
140
141 print(files->at(0));
142
143 auto files2 = libc::glob(StrFromC("*.pyzzz"));
144 ASSERT_EQ_FMT(0, len(files2), "%d");
145
146 PASS();
147}
148
149TEST fnmatch_test() {
150 BigStr* s1 = (StrFromC("foo.py "))->strip();
151 ASSERT(libc::fnmatch(StrFromC("*.py"), s1));
152 ASSERT(!libc::fnmatch(StrFromC("*.py"), StrFromC("foo.p")));
153
154 // Unicode - ? is byte or code point?
155 ASSERT(libc::fnmatch(StrFromC("_?_"), StrFromC("_x_")));
156
157 // TODO(unicode)
158 // ASSERT(libc::fnmatch(StrFromC("_?_"), StrFromC("_\u03bc_")));
159 // ASSERT(libc::fnmatch(StrFromC("_?_"), StrFromC("_μ_")));
160
161 // extended glob
162 ASSERT(libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.py")));
163 ASSERT(!libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.p")));
164
165 PASS();
166}
167
168TEST for_test_coverage() {
169 // Sometimes we're not connected to a terminal
170 try {
171 libc::get_terminal_width();
172 } catch (IOError_OSError* e) {
173 }
174
175 PASS();
176}
177
178void FindAll(const char* p, const char* s) {
179 regex_t pat;
180
181 int cflags = REG_EXTENDED;
182 if (regcomp(&pat, p, cflags) != 0) {
183 FAIL();
184 }
185 int outlen = pat.re_nsub + 1; // number of captures
186
187 // TODO: Could statically allocate 99, and assert that re_nsub is less than
188 // 99. Would speed up loops.
189 regmatch_t* pmatch =
190 static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * outlen));
191
192 int cur_pos = 0;
193 // int n = strlen(s);
194 while (true) {
195 // Necessary so ^ doesn't match in the middle!
196 int eflags = cur_pos == 0 ? 0 : REG_NOTBOL;
197 bool match = regexec(&pat, s + cur_pos, outlen, pmatch, eflags) == 0;
198
199 if (!match) {
200 break;
201 }
202 int i;
203 for (i = 0; i < outlen; i++) {
204 int start = pmatch[i].rm_so;
205 int end = pmatch[i].rm_eo;
206 int len = end - start;
207 BigStr* m = StrFromC(s + cur_pos + start, len);
208 log("%d GROUP %d (%d .. %d) = [%s]", cur_pos, i, start, end, m->data_);
209 }
210 log("");
211 int match_len = pmatch[0].rm_eo;
212 if (match_len == 0) {
213 break;
214 }
215 cur_pos += match_len;
216 }
217
218 free(pmatch);
219 regfree(&pat);
220}
221
222// adjacent matches
223const char* s = "a345y-axy- there b789y- cy-";
224
225TEST regex_unanchored() {
226 const char* unanchored = "[abc]([0-9]*)(x?)(y)-";
227 FindAll(unanchored, s);
228
229 PASS();
230}
231
232TEST regex_caret() {
233 const char* anchored = "^[abc]([0-9]*)(x?)(y)-";
234 FindAll(anchored, s);
235
236 PASS();
237}
238
239TEST regex_lexer() {
240 // like the Yaks / Make-a-Lisp pattern
241 const char* lexer = "([a-z]+)|([0-9]+)|([ ]+)|([+-])";
242 FindAll(lexer, s);
243
244 PASS();
245}
246
247TEST regex_repeat_with_capture() {
248 const char* lexer = "(([a-z]+)([0-9]+)-)*((A+)|(Z+))*";
249 FindAll(lexer, "a0-b1-c2-AAZZZA");
250 // Groups are weird
251 // whole match 0: a0-b1-c2-
252 // 1: c2- # last repetition
253 // 2: c # last one
254 // 3: 2 # last one
255 //
256 // And then there's an empty match
257 //
258 // Ideas:
259 // - disallow nested groups in Eggex?
260 // - I really care about the inner ones -- groups 2 and 3
261 // - I want flat groups
262
263 PASS();
264}
265
266// Disallow this in eggex, as well as the above
267TEST regex_nested_capture() {
268 const char* lexer = "(([a-z]+)([0-9]+))";
269 FindAll(lexer, "a0");
270 PASS();
271}
272
273// I think we allow this in eggex
274TEST regex_alt_with_capture() {
275 const char* lexer = "([a-z]+)|([0-9]+)(-)";
276 FindAll(lexer, "x-");
277 FindAll(lexer, "7-");
278 PASS();
279}
280
281TEST regex_unicode() {
282 regex_t pat;
283
284 // 1 or 2 bytes
285 // const char* p = "_..?_";
286 // const char* p = "_[^a]_";
287 const char* p = "_._"; // 1 byte, not code point?
288
289 if (regcomp(&pat, p, REG_EXTENDED) != 0) {
290 FAIL();
291 }
292 int outlen = pat.re_nsub + 1; // number of captures
293 regmatch_t* pmatch =
294 static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * outlen));
295
296 int result;
297
298 const char* bad = "_xyz_";
299 result = regexec(&pat, bad, outlen, pmatch, 0);
300 ASSERT_EQ_FMT(1, result, "%d"); // does not match
301
302 const char* a = "_x_";
303 result = regexec(&pat, a, outlen, pmatch, 0);
304 ASSERT_EQ_FMT(0, result, "%d");
305
306 // Doesn't change anything
307 // int lc_what = LC_ALL;
308 int lc_what = LC_CTYPE;
309
310 // char* saved_locale = setlocale(LC_ALL, "");
311 // char* saved_locale = setlocale(LC_ALL, NULL);
312
313 // char* saved_locale = setlocale(lc_what, NULL);
314
315#if 0
316 // Doesn't change anything?
317 //if (setlocale(LC_CTYPE, "C.utf8") == NULL) {
318 if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) {
319 log("Couldn't set locale to C.utf8");
320 FAIL();
321 }
322#endif
323
324 // const char* u = "_μ_";
325 const char* u = "_\u03bc_";
326 log("a = %d bytes", strlen(a));
327 log("u = %d bytes", strlen(u));
328 result = regexec(&pat, u, outlen, pmatch, 0);
329
330#if 0
331 if (setlocale(lc_what, saved_locale) == NULL) {
332 log("Couldn't restore locale");
333 FAIL();
334 }
335#endif
336
337 free(pmatch); // Clean up before test failures
338 regfree(&pat);
339
340 // TODO(unicode)
341 // ASSERT_EQ_FMT(0, result, "%d");
342
343 PASS();
344}
345
346TEST casefold_test() {
347#if 0
348 // Turkish
349 if (setlocale(LC_CTYPE, "tr_TR.utf8") == NULL) {
350 log("Couldn't set locale to tr_TR.utf8");
351 FAIL();
352 }
353#endif
354
355 // LC_CTYPE_MASK instead of LC_CTYPE
356 locale_t turkish = newlocale(LC_CTYPE_MASK, "tr_TR.utf8", NULL);
357
358 int u = toupper('i');
359 int wu = towupper('i');
360 int wul = towupper_l('i', turkish);
361
362 // Regular: upper case i is I, 73
363 // Turkish: upper case is 304
364 log("upper = %d", u);
365 log("wide upper = %d", wu);
366 log("wide upper locale = %d", wul);
367
368 freelocale(turkish);
369
370 PASS();
371}
372
373GREATEST_MAIN_DEFS();
374
375int main(int argc, char** argv) {
376 gHeap.Init();
377
378 GREATEST_MAIN_BEGIN();
379
380 RUN_TEST(hostname_test);
381 RUN_TEST(realpath_test);
382 RUN_TEST(libc_test);
383 RUN_TEST(regex_wrapper_test);
384 RUN_TEST(glob_test);
385 RUN_TEST(fnmatch_test);
386 RUN_TEST(for_test_coverage);
387
388 RUN_TEST(regex_unanchored);
389 RUN_TEST(regex_caret);
390 RUN_TEST(regex_lexer);
391 RUN_TEST(regex_repeat_with_capture);
392 RUN_TEST(regex_alt_with_capture);
393 RUN_TEST(regex_nested_capture);
394 RUN_TEST(regex_unicode);
395
396 // Crashes in CI? Because of Turkish locale?
397 // RUN_TEST(casefold_test);
398
399 gHeap.CleanProcessExit();
400
401 GREATEST_MAIN_END();
402 return 0;
403}