cpp/libc_test.cc

OILS / cpp / libc_test.cc View on Github | oilshell.org

403 lines, 237 significant

1	#include "cpp/libc.h"
2
3	#include <locale.h> // setlocale()
4	#include <regex.h> // regcomp()
5	#include <unistd.h> // gethostname()
6	#include <wctype.h> // towupper()
7
8	#include "mycpp/runtime.h"
9	#include "vendor/greatest.h"
10
11	TEST hostname_test() {
12	BigStr* s0 = libc::gethostname();
13	ASSERT(s0 != nullptr);
14
15	char buf[1024];
16	ASSERT(gethostname(buf, HOST_NAME_MAX) == 0);
17	ASSERT(str_equals(s0, StrFromC(buf)));
18
19	PASS();
20	}
21
22	TEST realpath_test() {
23	BigStr* result = libc::realpath(StrFromC("/"));
24	ASSERT(str_equals(StrFromC("/"), result));
25
26	bool caught = false;
27	try {
28	libc::realpath(StrFromC("/nonexistent_ZZZ"));
29	} catch (IOError_OSError* e) {
30	caught = true;
31	}
32	ASSERT(caught);
33
34	PASS();
35	}
36
37	TEST libc_test() {
38	log("sizeof(wchar_t) = %d", sizeof(wchar_t));
39
40	int width = 0;
41
42	// TODO: enable this test. Is it not picking LC_CTYPE?
43	// Do we have to do some initialization like libc.cpython_reset_locale() ?
44	#if 0
45	try {
46	// mu character \u{03bc} in utf-8
47	width = libc::wcswidth(StrFromC("\xce\xbc"));
48	} catch (UnicodeError* e) {
49	log("UnicodeError %s", e->message->data_);
50	}
51	ASSERT_EQ_FMT(2, width, "%d");
52	#endif
53
54	BigStr* h = libc::gethostname();
55	log("gethostname() = %s %d", h->data_, len(h));
56
57	width = libc::wcswidth(StrFromC("foo"));
58	ASSERT_EQ(3, width);
59
60	libc::print_time(0.1, 0.2, 0.3);
61
62	PASS();
63	}
64
65	static List<BigStr> Groups(BigStr* s, List<int>* indices) {
66	List<BigStr> groups = NewList<BigStr*>();
67	int n = len(indices) / 2;
68	for (int i = 0; i < n; ++i) {
69	int start = indices->at(2 * i);
70	int end = indices->at(2 * i + 1);
71	if (start == -1) {
72	groups->append(nullptr);
73	} else {
74	groups->append(s->slice(start, end));
75	}
76	}
77	return groups;
78	}
79
80	TEST regex_wrapper_test() {
81	BigStr* s1 = StrFromC("-abaacaaa");
82	List<int>* indices = libc::regex_search(StrFromC("(a+).(a+)"), 0, s1, 0);
83	List<BigStr> results = Groups(s1, indices);
84	ASSERT_EQ_FMT(3, len(results), "%d");
85	ASSERT(str_equals(StrFromC("abaa"), results->at(0))); // whole match
86	ASSERT(str_equals(StrFromC("a"), results->at(1)));
87	ASSERT(str_equals(StrFromC("aa"), results->at(2)));
88
89	indices = libc::regex_search(StrFromC("z+"), 0, StrFromC("abaacaaa"), 0);
90	ASSERT_EQ(nullptr, indices);
91
92	// Alternation gives unmatched group
93	BigStr* s2 = StrFromC("b");
94	indices = libc::regex_search(StrFromC("(a)\|(b)"), 0, s2, 0);
95	results = Groups(s2, indices);
96	ASSERT_EQ_FMT(3, len(results), "%d");
97	ASSERT(str_equals(StrFromC("b"), results->at(0))); // whole match
98	ASSERT_EQ(nullptr, results->at(1));
99	ASSERT(str_equals(StrFromC("b"), results->at(2)));
100
101	// Like Unicode test below
102	indices = libc::regex_search(StrFromC("_._"), 0, StrFromC("_x_"), 0);
103	ASSERT(indices != nullptr);
104	ASSERT_EQ_FMT(2, len(indices), "%d");
105	ASSERT_EQ_FMT(0, indices->at(0), "%d");
106	ASSERT_EQ_FMT(3, indices->at(1), "%d");
107
108	// TODO(unicode)
109	#if 0
110	//indices = libc::regex_search(StrFromC("_._"), 0, StrFromC("_\u03bc_"), 0);
111	indices = libc::regex_search(StrFromC("_._"), 0, StrFromC("_μ_"), 0);
112	ASSERT(indices != nullptr);
113	ASSERT_EQ_FMT(2, len(indices), "%d");
114	ASSERT_EQ_FMT(0, indices->at(0), "%d");
115	ASSERT_EQ_FMT(0, indices->at(0), "%d");
116	#endif
117
118	Tuple2<int, int>* result;
119	BigStr* s = StrFromC("oXooXoooXoX");
120	result = libc::regex_first_group_match(StrFromC("(X.)"), s, 0);
121	ASSERT_EQ_FMT(1, result->at0(), "%d");
122	ASSERT_EQ_FMT(3, result->at1(), "%d");
123
124	result = libc::regex_first_group_match(StrFromC("(X.)"), s, 3);
125	ASSERT_EQ_FMT(4, result->at0(), "%d");
126	ASSERT_EQ_FMT(6, result->at1(), "%d");
127
128	result = libc::regex_first_group_match(StrFromC("(X.)"), s, 6);
129	ASSERT_EQ_FMT(8, result->at0(), "%d");
130	ASSERT_EQ_FMT(10, result->at1(), "%d");
131
132	PASS();
133	}
134
135	TEST glob_test() {
136	// This depends on the file system
137	auto files = libc::glob(StrFromC("*.testdata"));
138	// 3 files are made by the shell wrapper
139	ASSERT_EQ_FMT(3, len(files), "%d");
140
141	print(files->at(0));
142
143	auto files2 = libc::glob(StrFromC("*.pyzzz"));
144	ASSERT_EQ_FMT(0, len(files2), "%d");
145
146	PASS();
147	}
148
149	TEST fnmatch_test() {
150	BigStr* s1 = (StrFromC("foo.py "))->strip();
151	ASSERT(libc::fnmatch(StrFromC("*.py"), s1));
152	ASSERT(!libc::fnmatch(StrFromC("*.py"), StrFromC("foo.p")));
153
154	// Unicode - ? is byte or code point?
155	ASSERT(libc::fnmatch(StrFromC("_?_"), StrFromC("_x_")));
156
157	// TODO(unicode)
158	// ASSERT(libc::fnmatch(StrFromC("_?_"), StrFromC("_\u03bc_")));
159	// ASSERT(libc::fnmatch(StrFromC("_?_"), StrFromC("_μ_")));
160
161	// extended glob
162	ASSERT(libc::fnmatch(StrFromC("*(foo\|bar).py"), StrFromC("foo.py")));
163	ASSERT(!libc::fnmatch(StrFromC("*(foo\|bar).py"), StrFromC("foo.p")));
164
165	PASS();
166	}
167
168	TEST for_test_coverage() {
169	// Sometimes we're not connected to a terminal
170	try {
171	libc::get_terminal_width();
172	} catch (IOError_OSError* e) {
173	}
174
175	PASS();
176	}
177
178	void FindAll(const char* p, const char* s) {
179	regex_t pat;
180
181	int cflags = REG_EXTENDED;
182	if (regcomp(&pat, p, cflags) != 0) {
183	FAIL();
184	}
185	int outlen = pat.re_nsub + 1; // number of captures
186
187	// TODO: Could statically allocate 99, and assert that re_nsub is less than
188	// 99. Would speed up loops.
189	regmatch_t* pmatch =
190	static_cast<regmatch_t>(malloc(sizeof(regmatch_t) outlen));
191
192	int cur_pos = 0;
193	// int n = strlen(s);
194	while (true) {
195	// Necessary so ^ doesn't match in the middle!
196	int eflags = cur_pos == 0 ? 0 : REG_NOTBOL;
197	bool match = regexec(&pat, s + cur_pos, outlen, pmatch, eflags) == 0;
198
199	if (!match) {
200	break;
201	}
202	int i;
203	for (i = 0; i < outlen; i++) {
204	int start = pmatch[i].rm_so;
205	int end = pmatch[i].rm_eo;
206	int len = end - start;
207	BigStr* m = StrFromC(s + cur_pos + start, len);
208	log("%d GROUP %d (%d .. %d) = [%s]", cur_pos, i, start, end, m->data_);
209	}
210	log("");
211	int match_len = pmatch[0].rm_eo;
212	if (match_len == 0) {
213	break;
214	}
215	cur_pos += match_len;
216	}
217
218	free(pmatch);
219	regfree(&pat);
220	}
221
222	// adjacent matches
223	const char* s = "a345y-axy- there b789y- cy-";
224
225	TEST regex_unanchored() {
226	const char* unanchored = "[abc]([0-9]*)(x?)(y)-";
227	FindAll(unanchored, s);
228
229	PASS();
230	}
231
232	TEST regex_caret() {
233	const char* anchored = "^[abc]([0-9]*)(x?)(y)-";
234	FindAll(anchored, s);
235
236	PASS();
237	}
238
239	TEST regex_lexer() {
240	// like the Yaks / Make-a-Lisp pattern
241	const char* lexer = "([a-z]+)\|([0-9]+)\|([ ]+)\|([+-])";
242	FindAll(lexer, s);
243
244	PASS();
245	}
246
247	TEST regex_repeat_with_capture() {
248	const char* lexer = "(([a-z]+)([0-9]+)-)((A+)\|(Z+))";
249	FindAll(lexer, "a0-b1-c2-AAZZZA");
250	// Groups are weird
251	// whole match 0: a0-b1-c2-
252	// 1: c2- # last repetition
253	// 2: c # last one
254	// 3: 2 # last one
255	//
256	// And then there's an empty match
257	//
258	// Ideas:
259	// - disallow nested groups in Eggex?
260	// - I really care about the inner ones -- groups 2 and 3
261	// - I want flat groups
262
263	PASS();
264	}
265
266	// Disallow this in eggex, as well as the above
267	TEST regex_nested_capture() {
268	const char* lexer = "(([a-z]+)([0-9]+))";
269	FindAll(lexer, "a0");
270	PASS();
271	}
272
273	// I think we allow this in eggex
274	TEST regex_alt_with_capture() {
275	const char* lexer = "([a-z]+)\|([0-9]+)(-)";
276	FindAll(lexer, "x-");
277	FindAll(lexer, "7-");
278	PASS();
279	}
280
281	TEST regex_unicode() {
282	regex_t pat;
283
284	// 1 or 2 bytes
285	// const char* p = "_..?_";
286	// const char* p = "_[^a]_";
287	const char* p = "_._"; // 1 byte, not code point?
288
289	if (regcomp(&pat, p, REG_EXTENDED) != 0) {
290	FAIL();
291	}
292	int outlen = pat.re_nsub + 1; // number of captures
293	regmatch_t* pmatch =
294	static_cast<regmatch_t>(malloc(sizeof(regmatch_t) outlen));
295
296	int result;
297
298	const char* bad = "_xyz_";
299	result = regexec(&pat, bad, outlen, pmatch, 0);
300	ASSERT_EQ_FMT(1, result, "%d"); // does not match
301
302	const char* a = "_x_";
303	result = regexec(&pat, a, outlen, pmatch, 0);
304	ASSERT_EQ_FMT(0, result, "%d");
305
306	// Doesn't change anything
307	// int lc_what = LC_ALL;
308	int lc_what = LC_CTYPE;
309
310	// char* saved_locale = setlocale(LC_ALL, "");
311	// char* saved_locale = setlocale(LC_ALL, NULL);
312
313	// char* saved_locale = setlocale(lc_what, NULL);
314
315	#if 0
316	// Doesn't change anything?
317	//if (setlocale(LC_CTYPE, "C.utf8") == NULL) {
318	if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) {
319	log("Couldn't set locale to C.utf8");
320	FAIL();
321	}
322	#endif
323
324	// const char* u = "_μ_";
325	const char* u = "_\u03bc_";
326	log("a = %d bytes", strlen(a));
327	log("u = %d bytes", strlen(u));
328	result = regexec(&pat, u, outlen, pmatch, 0);
329
330	#if 0
331	if (setlocale(lc_what, saved_locale) == NULL) {
332	log("Couldn't restore locale");
333	FAIL();
334	}
335	#endif
336
337	free(pmatch); // Clean up before test failures
338	regfree(&pat);
339
340	// TODO(unicode)
341	// ASSERT_EQ_FMT(0, result, "%d");
342
343	PASS();
344	}
345
346	TEST casefold_test() {
347	#if 0
348	// Turkish
349	if (setlocale(LC_CTYPE, "tr_TR.utf8") == NULL) {
350	log("Couldn't set locale to tr_TR.utf8");
351	FAIL();
352	}
353	#endif
354
355	// LC_CTYPE_MASK instead of LC_CTYPE
356	locale_t turkish = newlocale(LC_CTYPE_MASK, "tr_TR.utf8", NULL);
357
358	int u = toupper('i');
359	int wu = towupper('i');
360	int wul = towupper_l('i', turkish);
361
362	// Regular: upper case i is I, 73
363	// Turkish: upper case is 304
364	log("upper = %d", u);
365	log("wide upper = %d", wu);
366	log("wide upper locale = %d", wul);
367
368	freelocale(turkish);
369
370	PASS();
371	}
372
373	GREATEST_MAIN_DEFS();
374
375	int main(int argc, char** argv) {
376	gHeap.Init();
377
378	GREATEST_MAIN_BEGIN();
379
380	RUN_TEST(hostname_test);
381	RUN_TEST(realpath_test);
382	RUN_TEST(libc_test);
383	RUN_TEST(regex_wrapper_test);
384	RUN_TEST(glob_test);
385	RUN_TEST(fnmatch_test);
386	RUN_TEST(for_test_coverage);
387
388	RUN_TEST(regex_unanchored);
389	RUN_TEST(regex_caret);
390	RUN_TEST(regex_lexer);
391	RUN_TEST(regex_repeat_with_capture);
392	RUN_TEST(regex_alt_with_capture);
393	RUN_TEST(regex_nested_capture);
394	RUN_TEST(regex_unicode);
395
396	// Crashes in CI? Because of Turkish locale?
397	// RUN_TEST(casefold_test);
398
399	gHeap.CleanProcessExit();
400
401	GREATEST_MAIN_END();
402	return 0;
403	}