mycpp/gc_builtins.cc

OILS / mycpp / gc_builtins.cc View on Github | oilshell.org

458 lines, 247 significant

1	#include <errno.h> // errno
2	#include <float.h> // DBL_MIN, DBL_MAX
3	#include <math.h> // INFINITY
4	#include <stdio.h> // required for readline/readline.h (man readline)
5
6	#include "_build/detected-cpp-config.h"
7	#include "mycpp/runtime.h"
8	#ifdef HAVE_READLINE
9	#include "cpp/frontend_pyreadline.h"
10	#endif
11
12	// Translation of Python's print().
13	void print(BigStr* s) {
14	fputs(s->data_, stdout); // print until first NUL
15	fputc('\n', stdout);
16	}
17
18	BigStr* str(int i) {
19	BigStr* s = OverAllocatedStr(kIntBufSize);
20	int length = snprintf(s->data(), kIntBufSize, "%d", i);
21	s->MaybeShrink(length);
22	return s;
23	}
24
25	// TODO:
26	// - Does libc depend on locale?
27	BigStr* str(double d) {
28	char buf[64]; // overestimate, but we use snprintf() to be safe
29
30	// Problem:
31	// %f prints 3.0000000 and 3.500000
32	// %g prints 3 and 3.5
33	//
34	// We want 3.0 and 3.5, so add '.0' in some cases
35
36	int n = sizeof(buf) - 2; // in case we add '.0'
37
38	// %.9g digits for string that can be converted back to the same FLOAT
39	// (not double)
40	//
41	// See mycpp/float_test.cc for round-tripping test
42	//
43	// https://stackoverflow.com/a/21162120
44	// https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10
45	int length = snprintf(buf, n, "%.9g", d);
46
47	// %a is a hexfloat form, could use that somewhere
48	// int length = snprintf(buf, n, "%a", d);
49
50	if (strchr(buf, 'i') \|\| strchr(buf, 'n')) { // inf, -inf, nan
51	return StrFromC(buf);
52	}
53
54	if (!strchr(buf, '.')) { // 12345 -> 12345.0
55	buf[length] = '.';
56	buf[length + 1] = '0';
57	buf[length + 2] = '\0';
58	}
59
60	return StrFromC(buf);
61	}
62
63	// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
64	// better for getting values out of Token.line without allocating?
65	//
66	// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
67	//
68	// Also for SmallStr, we don't care about interning. Only for HeapStr.
69
70	BigStr* intern(BigStr* s) {
71	// TODO: put in table gHeap.interned_
72	return s;
73	}
74
75	// Print quoted string. Called by StrFormat('%r').
76	// TODO: consider using J8 notation instead, since error messages show that
77	// string.
78	BigStr* repr(BigStr* s) {
79	// Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
80	int n = len(s);
81	int upper_bound = n * 4 + 2;
82
83	BigStr* result = OverAllocatedStr(upper_bound);
84
85	// Single quote by default.
86	char quote = '\'';
87	if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
88	quote = '"';
89	}
90	char* p = result->data_;
91
92	// From PyString_Repr()
93	*p++ = quote;
94	for (int i = 0; i < n; ++i) {
95	unsigned char c = static_cast<unsigned char>(s->data_[i]);
96	if (c == quote \|\| c == '\\') {
97	*p++ = '\\';
98	*p++ = c;
99	} else if (c == '\t') {
100	*p++ = '\\';
101	*p++ = 't';
102	} else if (c == '\n') {
103	*p++ = '\\';
104	*p++ = 'n';
105	} else if (c == '\r') {
106	*p++ = '\\';
107	*p++ = 'r';
108	} else if (0x20 <= c && c < 0x80) {
109	*p++ = c;
110	} else {
111	// Unprintable becomes \xff.
112	// TODO: Consider \yff. This is similar to J8 strings, but we don't
113	// decode UTF-8.
114	sprintf(p, "\\x%02x", c & 0xff);
115	p += 4;
116	}
117	}
118	*p++ = quote;
119	*p = '\0';
120
121	int length = p - result->data_;
122	result->MaybeShrink(length);
123	return result;
124	}
125
126	// Helper functions that don't use exceptions.
127
128	bool StringToInt(const char* s, int length, int base, int* result) {
129	if (length == 0) {
130	return false; // empty string isn't a valid integer
131	}
132
133	// Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
134	// sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
135	// static_assert(sizeof(long) == 8);
136
137	char* pos; // mutated by strtol
138
139	errno = 0;
140	long v = strtol(s, &pos, base);
141
142	if (errno == ERANGE) {
143	switch (v) {
144	case LONG_MIN:
145	return false; // underflow of long, which may be 64 bits
146	case LONG_MAX:
147	return false; // overflow of long
148	}
149	}
150
151	// It should ALSO fit in an int, not just a long
152	if (v > INT_MAX) {
153	return false;
154	}
155	if (v < INT_MIN) {
156	return false;
157	}
158
159	const char* end = s + length;
160	if (pos == end) {
161	*result = v;
162	return true; // strtol() consumed ALL characters.
163	}
164
165	while (pos < end) {
166	if (!IsAsciiWhitespace(*pos)) {
167	return false; // Trailing non-space
168	}
169	pos++;
170	}
171
172	*result = v;
173	return true; // Trailing space is OK
174	}
175
176	bool StringToInt64(const char* s, int length, int base, int64_t* result) {
177	if (length == 0) {
178	return false; // empty string isn't a valid integer
179	}
180
181	// These should be the same type
182	static_assert(sizeof(long long) == sizeof(int64_t));
183
184	char* pos; // mutated by strtol
185
186	errno = 0;
187	long long v = strtoll(s, &pos, base);
188
189	if (errno == ERANGE) {
190	switch (v) {
191	case LLONG_MIN:
192	return false; // underflow
193	case LLONG_MAX:
194	return false; // overflow
195	}
196	}
197
198	const char* end = s + length;
199	if (pos == end) {
200	*result = v;
201	return true; // strtol() consumed ALL characters.
202	}
203
204	while (pos < end) {
205	if (!IsAsciiWhitespace(*pos)) {
206	return false; // Trailing non-space
207	}
208	pos++;
209	}
210
211	*result = v;
212	return true; // Trailing space is OK
213	}
214
215	int to_int(BigStr* s, int base) {
216	int i;
217	if (StringToInt(s->data_, len(s), base, &i)) {
218	return i; // truncated to int
219	} else {
220	throw Alloc<ValueError>();
221	}
222	}
223
224	BigStr* chr(int i) {
225	// NOTE: i should be less than 256, in which we could return an object from
226	// GLOBAL_STR() pool, like StrIter
227	auto result = NewStr(1);
228	result->data_[0] = i;
229	return result;
230	}
231
232	int ord(BigStr* s) {
233	assert(len(s) == 1);
234	// signed to unsigned conversion, so we don't get values like -127
235	uint8_t c = static_cast<uint8_t>(s->data_[0]);
236	return c;
237	}
238
239	bool to_bool(BigStr* s) {
240	return len(s) != 0;
241	}
242
243	double to_float(int i) {
244	return static_cast<double>(i);
245	}
246
247	double to_float(BigStr* s) {
248	char* begin = s->data_;
249	char* end = begin + len(s);
250
251	errno = 0;
252	double result = strtod(begin, &end);
253
254	if (errno == ERANGE) { // error: overflow or underflow
255	if (result >= HUGE_VAL) {
256	return INFINITY;
257	} else if (result <= -HUGE_VAL) {
258	return -INFINITY;
259	} else if (-DBL_MIN <= result && result <= DBL_MIN) {
260	return 0.0;
261	} else {
262	FAIL("Invalid value after ERANGE");
263	}
264	}
265	if (end == begin) { // error: not a floating point number
266	throw Alloc<ValueError>();
267	}
268
269	return result;
270	}
271
272	// e.g. ('a' in 'abc')
273	bool str_contains(BigStr* haystack, BigStr* needle) {
274	// Common case
275	if (len(needle) == 1) {
276	return memchr(haystack->data_, needle->data_[0], len(haystack));
277	}
278
279	if (len(needle) > len(haystack)) {
280	return false;
281	}
282
283	// General case. TODO: We could use a smarter substring algorithm.
284
285	const char* end = haystack->data_ + len(haystack);
286	const char* last_possible = end - len(needle);
287	const char* p = haystack->data_;
288
289	while (p <= last_possible) {
290	if (memcmp(p, needle->data_, len(needle)) == 0) {
291	return true;
292	}
293	p++;
294	}
295	return false;
296	}
297
298	BigStr* str_repeat(BigStr* s, int times) {
299	// Python allows -1 too, and Oil used that
300	if (times <= 0) {
301	return kEmptyString;
302	}
303	int len_ = len(s);
304	int new_len = len_ * times;
305	BigStr* result = NewStr(new_len);
306
307	char* dest = result->data_;
308	for (int i = 0; i < times; i++) {
309	memcpy(dest, s->data_, len_);
310	dest += len_;
311	}
312	return result;
313	}
314
315	// for os_path.join()
316	// NOTE(Jesse): Perfect candidate for BoundedBuffer
317	BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
318	int a_len = len(a);
319	int b_len = len(b);
320	int c_len = len(c);
321
322	int new_len = a_len + b_len + c_len;
323	BigStr* result = NewStr(new_len);
324	char* pos = result->data_;
325
326	memcpy(pos, a->data_, a_len);
327	pos += a_len;
328
329	memcpy(pos, b->data_, b_len);
330	pos += b_len;
331
332	memcpy(pos, c->data_, c_len);
333
334	assert(pos + c_len == result->data_ + new_len);
335
336	return result;
337	}
338
339	BigStr* str_concat(BigStr* a, BigStr* b) {
340	int a_len = len(a);
341	int b_len = len(b);
342	int new_len = a_len + b_len;
343	BigStr* result = NewStr(new_len);
344	char* buf = result->data_;
345
346	memcpy(buf, a->data_, a_len);
347	memcpy(buf + a_len, b->data_, b_len);
348
349	return result;
350	}
351
352	//
353	// Comparators
354	//
355
356	bool str_equals(BigStr* left, BigStr* right) {
357	// Fast path for identical strings. String deduplication during GC could
358	// make this more likely. String interning could guarantee it, allowing us
359	// to remove memcmp().
360	if (left == right) {
361	return true;
362	}
363
364	// TODO: It would be nice to remove this condition, but I think we need MyPy
365	// strict None checking for it
366	if (left == nullptr \|\| right == nullptr) {
367	return false;
368	}
369
370	if (left->len_ != right->len_) {
371	return false;
372	}
373
374	return memcmp(left->data_, right->data_, left->len_) == 0;
375	}
376
377	bool maybe_str_equals(BigStr* left, BigStr* right) {
378	if (left && right) {
379	return str_equals(left, right);
380	}
381
382	if (!left && !right) {
383	return true; // None == None
384	}
385
386	return false; // one is None and one is a BigStr*
387	}
388
389	bool items_equal(BigStr* left, BigStr* right) {
390	return str_equals(left, right);
391	}
392
393	bool keys_equal(BigStr* left, BigStr* right) {
394	return items_equal(left, right);
395	}
396
397	bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
398	return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
399	}
400
401	bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
402	return items_equal(t1, t2);
403	}
404
405	bool items_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
406	return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
407	}
408
409	bool keys_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
410	return items_equal(t1, t2);
411	}
412
413	bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
414	// Needs SmallStr change
415	if (len(s) == c_len) {
416	return memcmp(s->data_, c_string, c_len) == 0;
417	} else {
418	return false;
419	}
420	}
421
422	bool str_equals0(const char* c_string, BigStr* s) {
423	int n = strlen(c_string);
424	if (len(s) == n) {
425	return memcmp(s->data_, c_string, n) == 0;
426	} else {
427	return false;
428	}
429	}
430
431	int hash(BigStr* s) {
432	return s->hash(fnv1);
433	}
434
435	int max(int a, int b) {
436	return std::max(a, b);
437	}
438
439	int min(int a, int b) {
440	return std::min(a, b);
441	}
442
443	int max(List<int>* elems) {
444	int n = len(elems);
445	if (n < 1) {
446	throw Alloc<ValueError>();
447	}
448
449	int ret = elems->at(0);
450	for (int i = 0; i < n; ++i) {
451	int cand = elems->at(i);
452	if (cand > ret) {
453	ret = cand;
454	}
455	}
456
457	return ret;
458	}