mycpp/gc_builtins.cc

OILS / mycpp / gc_builtins.cc View on Github | oilshell.org

456 lines, 247 significant

1	#include <errno.h> // errno
2	#include <float.h> // DBL_MIN, DBL_MAX
3	#include <math.h> // INFINITY
4	#include <stdio.h> // required for readline/readline.h (man readline)
5
6	#include "_build/detected-cpp-config.h"
7	#include "mycpp/runtime.h"
8	#ifdef HAVE_READLINE
9	#include "cpp/frontend_pyreadline.h"
10	#endif
11
12	// Translation of Python's print().
13	void print(BigStr* s) {
14	fputs(s->data_, stdout); // print until first NUL
15	fputc('\n', stdout);
16	}
17
18	BigStr* str(int i) {
19	BigStr* s = OverAllocatedStr(kIntBufSize);
20	int length = snprintf(s->data(), kIntBufSize, "%d", i);
21	s->MaybeShrink(length);
22	return s;
23	}
24
25	// TODO:
26	// - This could use a fancy exact algorithm, not libc
27	// - Does libc depend on locale?
28	BigStr* str(double d) {
29	char buf[64]; // overestimate, but we use snprintf() to be safe
30
31	// Problem:
32	// %f prints 3.0000000 and 3.500000
33	// %g prints 3 and 3.5
34	//
35	// We want literal syntax to indicate float, so add '.'
36
37	int n = sizeof(buf) - 2; // in case we add '.0'
38
39	// %.9g digits for string that can be converted back to the same FLOAT
40	// (not double)
41	// https://stackoverflow.com/a/21162120
42	// https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10
43	int length = snprintf(buf, n, "%.9g", d);
44
45	// %a is a hexfloat form, could use that somewhere
46	// int length = snprintf(buf, n, "%a", d);
47
48	if (strchr(buf, 'i')) { // inf or -inf
49	return StrFromC(buf);
50	}
51
52	if (!strchr(buf, '.')) { // 12345 -> 12345.0
53	buf[length] = '.';
54	buf[length + 1] = '0';
55	buf[length + 2] = '\0';
56	}
57
58	return StrFromC(buf);
59	}
60
61	// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
62	// better for getting values out of Token.line without allocating?
63	//
64	// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
65	//
66	// Also for SmallStr, we don't care about interning. Only for HeapStr.
67
68	BigStr* intern(BigStr* s) {
69	// TODO: put in table gHeap.interned_
70	return s;
71	}
72
73	// Print quoted string. Called by StrFormat('%r').
74	// TODO: consider using J8 notation instead, since error messages show that
75	// string.
76	BigStr* repr(BigStr* s) {
77	// Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
78	int n = len(s);
79	int upper_bound = n * 4 + 2;
80
81	BigStr* result = OverAllocatedStr(upper_bound);
82
83	// Single quote by default.
84	char quote = '\'';
85	if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
86	quote = '"';
87	}
88	char* p = result->data_;
89
90	// From PyString_Repr()
91	*p++ = quote;
92	for (int i = 0; i < n; ++i) {
93	unsigned char c = static_cast<unsigned char>(s->data_[i]);
94	if (c == quote \|\| c == '\\') {
95	*p++ = '\\';
96	*p++ = c;
97	} else if (c == '\t') {
98	*p++ = '\\';
99	*p++ = 't';
100	} else if (c == '\n') {
101	*p++ = '\\';
102	*p++ = 'n';
103	} else if (c == '\r') {
104	*p++ = '\\';
105	*p++ = 'r';
106	} else if (0x20 <= c && c < 0x80) {
107	*p++ = c;
108	} else {
109	// Unprintable becomes \xff.
110	// TODO: Consider \yff. This is similar to J8 strings, but we don't
111	// decode UTF-8.
112	sprintf(p, "\\x%02x", c & 0xff);
113	p += 4;
114	}
115	}
116	*p++ = quote;
117	*p = '\0';
118
119	int length = p - result->data_;
120	result->MaybeShrink(length);
121	return result;
122	}
123
124	// Helper functions that don't use exceptions.
125
126	bool StringToInt(const char* s, int length, int base, int* result) {
127	if (length == 0) {
128	return false; // empty string isn't a valid integer
129	}
130
131	// Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
132	// sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
133	// static_assert(sizeof(long) == 8);
134
135	char* pos; // mutated by strtol
136
137	errno = 0;
138	long v = strtol(s, &pos, base);
139
140	if (errno == ERANGE) {
141	switch (v) {
142	case LONG_MIN:
143	return false; // underflow of long, which may be 64 bits
144	case LONG_MAX:
145	return false; // overflow of long
146	}
147	}
148
149	// It should ALSO fit in an int, not just a long
150	if (v > INT_MAX) {
151	return false;
152	}
153	if (v < INT_MIN) {
154	return false;
155	}
156
157	const char* end = s + length;
158	if (pos == end) {
159	*result = v;
160	return true; // strtol() consumed ALL characters.
161	}
162
163	while (pos < end) {
164	if (!IsAsciiWhitespace(*pos)) {
165	return false; // Trailing non-space
166	}
167	pos++;
168	}
169
170	*result = v;
171	return true; // Trailing space is OK
172	}
173
174	bool StringToInt64(const char* s, int length, int base, int64_t* result) {
175	if (length == 0) {
176	return false; // empty string isn't a valid integer
177	}
178
179	// These should be the same type
180	static_assert(sizeof(long long) == sizeof(int64_t));
181
182	char* pos; // mutated by strtol
183
184	errno = 0;
185	long long v = strtoll(s, &pos, base);
186
187	if (errno == ERANGE) {
188	switch (v) {
189	case LLONG_MIN:
190	return false; // underflow
191	case LLONG_MAX:
192	return false; // overflow
193	}
194	}
195
196	const char* end = s + length;
197	if (pos == end) {
198	*result = v;
199	return true; // strtol() consumed ALL characters.
200	}
201
202	while (pos < end) {
203	if (!IsAsciiWhitespace(*pos)) {
204	return false; // Trailing non-space
205	}
206	pos++;
207	}
208
209	*result = v;
210	return true; // Trailing space is OK
211	}
212
213	int to_int(BigStr* s, int base) {
214	int i;
215	if (StringToInt(s->data_, len(s), base, &i)) {
216	return i; // truncated to int
217	} else {
218	throw Alloc<ValueError>();
219	}
220	}
221
222	BigStr* chr(int i) {
223	// NOTE: i should be less than 256, in which we could return an object from
224	// GLOBAL_STR() pool, like StrIter
225	auto result = NewStr(1);
226	result->data_[0] = i;
227	return result;
228	}
229
230	int ord(BigStr* s) {
231	assert(len(s) == 1);
232	// signed to unsigned conversion, so we don't get values like -127
233	uint8_t c = static_cast<uint8_t>(s->data_[0]);
234	return c;
235	}
236
237	bool to_bool(BigStr* s) {
238	return len(s) != 0;
239	}
240
241	double to_float(int i) {
242	return static_cast<double>(i);
243	}
244
245	double to_float(BigStr* s) {
246	char* begin = s->data_;
247	char* end = begin + len(s);
248
249	errno = 0;
250	double result = strtod(begin, &end);
251
252	if (errno == ERANGE) { // error: overflow or underflow
253	if (result >= HUGE_VAL) {
254	return INFINITY;
255	} else if (result <= -HUGE_VAL) {
256	return -INFINITY;
257	} else if (-DBL_MIN <= result && result <= DBL_MIN) {
258	return 0.0;
259	} else {
260	FAIL("Invalid value after ERANGE");
261	}
262	}
263	if (end == begin) { // error: not a floating point number
264	throw Alloc<ValueError>();
265	}
266
267	return result;
268	}
269
270	// e.g. ('a' in 'abc')
271	bool str_contains(BigStr* haystack, BigStr* needle) {
272	// Common case
273	if (len(needle) == 1) {
274	return memchr(haystack->data_, needle->data_[0], len(haystack));
275	}
276
277	if (len(needle) > len(haystack)) {
278	return false;
279	}
280
281	// General case. TODO: We could use a smarter substring algorithm.
282
283	const char* end = haystack->data_ + len(haystack);
284	const char* last_possible = end - len(needle);
285	const char* p = haystack->data_;
286
287	while (p <= last_possible) {
288	if (memcmp(p, needle->data_, len(needle)) == 0) {
289	return true;
290	}
291	p++;
292	}
293	return false;
294	}
295
296	BigStr* str_repeat(BigStr* s, int times) {
297	// Python allows -1 too, and Oil used that
298	if (times <= 0) {
299	return kEmptyString;
300	}
301	int len_ = len(s);
302	int new_len = len_ * times;
303	BigStr* result = NewStr(new_len);
304
305	char* dest = result->data_;
306	for (int i = 0; i < times; i++) {
307	memcpy(dest, s->data_, len_);
308	dest += len_;
309	}
310	return result;
311	}
312
313	// for os_path.join()
314	// NOTE(Jesse): Perfect candidate for BoundedBuffer
315	BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
316	int a_len = len(a);
317	int b_len = len(b);
318	int c_len = len(c);
319
320	int new_len = a_len + b_len + c_len;
321	BigStr* result = NewStr(new_len);
322	char* pos = result->data_;
323
324	memcpy(pos, a->data_, a_len);
325	pos += a_len;
326
327	memcpy(pos, b->data_, b_len);
328	pos += b_len;
329
330	memcpy(pos, c->data_, c_len);
331
332	assert(pos + c_len == result->data_ + new_len);
333
334	return result;
335	}
336
337	BigStr* str_concat(BigStr* a, BigStr* b) {
338	int a_len = len(a);
339	int b_len = len(b);
340	int new_len = a_len + b_len;
341	BigStr* result = NewStr(new_len);
342	char* buf = result->data_;
343
344	memcpy(buf, a->data_, a_len);
345	memcpy(buf + a_len, b->data_, b_len);
346
347	return result;
348	}
349
350	//
351	// Comparators
352	//
353
354	bool str_equals(BigStr* left, BigStr* right) {
355	// Fast path for identical strings. String deduplication during GC could
356	// make this more likely. String interning could guarantee it, allowing us
357	// to remove memcmp().
358	if (left == right) {
359	return true;
360	}
361
362	// TODO: It would be nice to remove this condition, but I think we need MyPy
363	// strict None checking for it
364	if (left == nullptr \|\| right == nullptr) {
365	return false;
366	}
367
368	if (left->len_ != right->len_) {
369	return false;
370	}
371
372	return memcmp(left->data_, right->data_, left->len_) == 0;
373	}
374
375	bool maybe_str_equals(BigStr* left, BigStr* right) {
376	if (left && right) {
377	return str_equals(left, right);
378	}
379
380	if (!left && !right) {
381	return true; // None == None
382	}
383
384	return false; // one is None and one is a BigStr*
385	}
386
387	bool items_equal(BigStr* left, BigStr* right) {
388	return str_equals(left, right);
389	}
390
391	bool keys_equal(BigStr* left, BigStr* right) {
392	return items_equal(left, right);
393	}
394
395	bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
396	return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
397	}
398
399	bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
400	return items_equal(t1, t2);
401	}
402
403	bool items_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
404	return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
405	}
406
407	bool keys_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
408	return items_equal(t1, t2);
409	}
410
411	bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
412	// Needs SmallStr change
413	if (len(s) == c_len) {
414	return memcmp(s->data_, c_string, c_len) == 0;
415	} else {
416	return false;
417	}
418	}
419
420	bool str_equals0(const char* c_string, BigStr* s) {
421	int n = strlen(c_string);
422	if (len(s) == n) {
423	return memcmp(s->data_, c_string, n) == 0;
424	} else {
425	return false;
426	}
427	}
428
429	int hash(BigStr* s) {
430	return s->hash(fnv1);
431	}
432
433	int max(int a, int b) {
434	return std::max(a, b);
435	}
436
437	int min(int a, int b) {
438	return std::min(a, b);
439	}
440
441	int max(List<int>* elems) {
442	int n = len(elems);
443	if (n < 1) {
444	throw Alloc<ValueError>();
445	}
446
447	int ret = elems->at(0);
448	for (int i = 0; i < n; ++i) {
449	int cand = elems->at(i);
450	if (cand > ret) {
451	ret = cand;
452	}
453	}
454
455	return ret;
456	}