mycpp/gc_builtins.cc

OILS / mycpp / gc_builtins.cc View on Github | oilshell.org

455 lines, 247 significant

1	#include <errno.h> // errno
2	#include <float.h> // DBL_MIN, DBL_MAX
3	#include <math.h> // INFINITY
4	#include <stdio.h> // required for readline/readline.h (man readline)
5
6	#include "_build/detected-cpp-config.h"
7	#include "mycpp/runtime.h"
8	#ifdef HAVE_READLINE
9	#include "cpp/frontend_pyreadline.h"
10	#endif
11
12	// Translation of Python's print().
13	void print(BigStr* s) {
14	fputs(s->data_, stdout); // print until first NUL
15	fputc('\n', stdout);
16	}
17
18	BigStr* str(int i) {
19	BigStr* s = OverAllocatedStr(kIntBufSize);
20	int length = snprintf(s->data(), kIntBufSize, "%d", i);
21	s->MaybeShrink(length);
22	return s;
23	}
24
25	BigStr* str(double d) {
26	char buf[64]; // overestimate, but we use snprintf() to be safe
27
28	int n = sizeof(buf) - 2; // in case we add '.0'
29
30	// See mycpp/float_test.cc for round-tripping test
31	// %.9g - FLOAT round trip
32	// %.17g - DOUBLE round trip
33	//
34	// https://stackoverflow.com/a/21162120
35	// https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10
36
37	int length = snprintf(buf, n, "%.17g", d);
38	// TODO: This may depend on LC_NUMERIC locale!
39
40	if (strchr(buf, 'i') \|\| strchr(buf, 'n')) { // inf, -inf, nan
41	return StrFromC(buf);
42	}
43
44	// Problem:
45	// %f prints 3.0000000 and 3.500000
46	// %g prints 3 and 3.5
47	//
48	// We want 3.0 and 3.5, so add '.0' in some cases
49	if (!strchr(buf, '.')) { // 12345 -> 12345.0
50	buf[length] = '.';
51	buf[length + 1] = '0';
52	buf[length + 2] = '\0';
53	}
54
55	return StrFromC(buf);
56	}
57	// %a is a hexfloat form, probably don't need that
58	// int length = snprintf(buf, n, "%a", d);
59
60	// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
61	// better for getting values out of Token.line without allocating?
62	//
63	// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
64	//
65	// Also for SmallStr, we don't care about interning. Only for HeapStr.
66
67	BigStr* intern(BigStr* s) {
68	// TODO: put in table gHeap.interned_
69	return s;
70	}
71
72	// Print quoted string. Called by StrFormat('%r').
73	// TODO: consider using J8 notation instead, since error messages show that
74	// string.
75	BigStr* repr(BigStr* s) {
76	// Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
77	int n = len(s);
78	int upper_bound = n * 4 + 2;
79
80	BigStr* result = OverAllocatedStr(upper_bound);
81
82	// Single quote by default.
83	char quote = '\'';
84	if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
85	quote = '"';
86	}
87	char* p = result->data_;
88
89	// From PyString_Repr()
90	*p++ = quote;
91	for (int i = 0; i < n; ++i) {
92	unsigned char c = static_cast<unsigned char>(s->data_[i]);
93	if (c == quote \|\| c == '\\') {
94	*p++ = '\\';
95	*p++ = c;
96	} else if (c == '\t') {
97	*p++ = '\\';
98	*p++ = 't';
99	} else if (c == '\n') {
100	*p++ = '\\';
101	*p++ = 'n';
102	} else if (c == '\r') {
103	*p++ = '\\';
104	*p++ = 'r';
105	} else if (0x20 <= c && c < 0x80) {
106	*p++ = c;
107	} else {
108	// Unprintable becomes \xff.
109	// TODO: Consider \yff. This is similar to J8 strings, but we don't
110	// decode UTF-8.
111	sprintf(p, "\\x%02x", c & 0xff);
112	p += 4;
113	}
114	}
115	*p++ = quote;
116	*p = '\0';
117
118	int length = p - result->data_;
119	result->MaybeShrink(length);
120	return result;
121	}
122
123	// Helper functions that don't use exceptions.
124
125	bool StringToInt(const char* s, int length, int base, int* result) {
126	if (length == 0) {
127	return false; // empty string isn't a valid integer
128	}
129
130	// Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
131	// sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
132	// static_assert(sizeof(long) == 8);
133
134	char* pos; // mutated by strtol
135
136	errno = 0;
137	long v = strtol(s, &pos, base);
138
139	if (errno == ERANGE) {
140	switch (v) {
141	case LONG_MIN:
142	return false; // underflow of long, which may be 64 bits
143	case LONG_MAX:
144	return false; // overflow of long
145	}
146	}
147
148	// It should ALSO fit in an int, not just a long
149	if (v > INT_MAX) {
150	return false;
151	}
152	if (v < INT_MIN) {
153	return false;
154	}
155
156	const char* end = s + length;
157	if (pos == end) {
158	*result = v;
159	return true; // strtol() consumed ALL characters.
160	}
161
162	while (pos < end) {
163	if (!IsAsciiWhitespace(*pos)) {
164	return false; // Trailing non-space
165	}
166	pos++;
167	}
168
169	*result = v;
170	return true; // Trailing space is OK
171	}
172
173	bool StringToInt64(const char* s, int length, int base, int64_t* result) {
174	if (length == 0) {
175	return false; // empty string isn't a valid integer
176	}
177
178	// These should be the same type
179	static_assert(sizeof(long long) == sizeof(int64_t));
180
181	char* pos; // mutated by strtol
182
183	errno = 0;
184	long long v = strtoll(s, &pos, base);
185
186	if (errno == ERANGE) {
187	switch (v) {
188	case LLONG_MIN:
189	return false; // underflow
190	case LLONG_MAX:
191	return false; // overflow
192	}
193	}
194
195	const char* end = s + length;
196	if (pos == end) {
197	*result = v;
198	return true; // strtol() consumed ALL characters.
199	}
200
201	while (pos < end) {
202	if (!IsAsciiWhitespace(*pos)) {
203	return false; // Trailing non-space
204	}
205	pos++;
206	}
207
208	*result = v;
209	return true; // Trailing space is OK
210	}
211
212	int to_int(BigStr* s, int base) {
213	int i;
214	if (StringToInt(s->data_, len(s), base, &i)) {
215	return i; // truncated to int
216	} else {
217	throw Alloc<ValueError>();
218	}
219	}
220
221	BigStr* chr(int i) {
222	// NOTE: i should be less than 256, in which we could return an object from
223	// GLOBAL_STR() pool, like StrIter
224	auto result = NewStr(1);
225	result->data_[0] = i;
226	return result;
227	}
228
229	int ord(BigStr* s) {
230	assert(len(s) == 1);
231	// signed to unsigned conversion, so we don't get values like -127
232	uint8_t c = static_cast<uint8_t>(s->data_[0]);
233	return c;
234	}
235
236	bool to_bool(BigStr* s) {
237	return len(s) != 0;
238	}
239
240	double to_float(int i) {
241	return static_cast<double>(i);
242	}
243
244	double to_float(BigStr* s) {
245	char* begin = s->data_;
246	char* end = begin + len(s);
247
248	errno = 0;
249	double result = strtod(begin, &end);
250
251	if (errno == ERANGE) { // error: overflow or underflow
252	if (result >= HUGE_VAL) {
253	return INFINITY;
254	} else if (result <= -HUGE_VAL) {
255	return -INFINITY;
256	} else if (-DBL_MIN <= result && result <= DBL_MIN) {
257	return 0.0;
258	} else {
259	FAIL("Invalid value after ERANGE");
260	}
261	}
262	if (end == begin) { // error: not a floating point number
263	throw Alloc<ValueError>();
264	}
265
266	return result;
267	}
268
269	// e.g. ('a' in 'abc')
270	bool str_contains(BigStr* haystack, BigStr* needle) {
271	// Common case
272	if (len(needle) == 1) {
273	return memchr(haystack->data_, needle->data_[0], len(haystack));
274	}
275
276	if (len(needle) > len(haystack)) {
277	return false;
278	}
279
280	// General case. TODO: We could use a smarter substring algorithm.
281
282	const char* end = haystack->data_ + len(haystack);
283	const char* last_possible = end - len(needle);
284	const char* p = haystack->data_;
285
286	while (p <= last_possible) {
287	if (memcmp(p, needle->data_, len(needle)) == 0) {
288	return true;
289	}
290	p++;
291	}
292	return false;
293	}
294
295	BigStr* str_repeat(BigStr* s, int times) {
296	// Python allows -1 too, and Oil used that
297	if (times <= 0) {
298	return kEmptyString;
299	}
300	int len_ = len(s);
301	int new_len = len_ * times;
302	BigStr* result = NewStr(new_len);
303
304	char* dest = result->data_;
305	for (int i = 0; i < times; i++) {
306	memcpy(dest, s->data_, len_);
307	dest += len_;
308	}
309	return result;
310	}
311
312	// for os_path.join()
313	// NOTE(Jesse): Perfect candidate for BoundedBuffer
314	BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
315	int a_len = len(a);
316	int b_len = len(b);
317	int c_len = len(c);
318
319	int new_len = a_len + b_len + c_len;
320	BigStr* result = NewStr(new_len);
321	char* pos = result->data_;
322
323	memcpy(pos, a->data_, a_len);
324	pos += a_len;
325
326	memcpy(pos, b->data_, b_len);
327	pos += b_len;
328
329	memcpy(pos, c->data_, c_len);
330
331	assert(pos + c_len == result->data_ + new_len);
332
333	return result;
334	}
335
336	BigStr* str_concat(BigStr* a, BigStr* b) {
337	int a_len = len(a);
338	int b_len = len(b);
339	int new_len = a_len + b_len;
340	BigStr* result = NewStr(new_len);
341	char* buf = result->data_;
342
343	memcpy(buf, a->data_, a_len);
344	memcpy(buf + a_len, b->data_, b_len);
345
346	return result;
347	}
348
349	//
350	// Comparators
351	//
352
353	bool str_equals(BigStr* left, BigStr* right) {
354	// Fast path for identical strings. String deduplication during GC could
355	// make this more likely. String interning could guarantee it, allowing us
356	// to remove memcmp().
357	if (left == right) {
358	return true;
359	}
360
361	// TODO: It would be nice to remove this condition, but I think we need MyPy
362	// strict None checking for it
363	if (left == nullptr \|\| right == nullptr) {
364	return false;
365	}
366
367	if (left->len_ != right->len_) {
368	return false;
369	}
370
371	return memcmp(left->data_, right->data_, left->len_) == 0;
372	}
373
374	bool maybe_str_equals(BigStr* left, BigStr* right) {
375	if (left && right) {
376	return str_equals(left, right);
377	}
378
379	if (!left && !right) {
380	return true; // None == None
381	}
382
383	return false; // one is None and one is a BigStr*
384	}
385
386	bool items_equal(BigStr* left, BigStr* right) {
387	return str_equals(left, right);
388	}
389
390	bool keys_equal(BigStr* left, BigStr* right) {
391	return items_equal(left, right);
392	}
393
394	bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
395	return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
396	}
397
398	bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
399	return items_equal(t1, t2);
400	}
401
402	bool items_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
403	return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
404	}
405
406	bool keys_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
407	return items_equal(t1, t2);
408	}
409
410	bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
411	// Needs SmallStr change
412	if (len(s) == c_len) {
413	return memcmp(s->data_, c_string, c_len) == 0;
414	} else {
415	return false;
416	}
417	}
418
419	bool str_equals0(const char* c_string, BigStr* s) {
420	int n = strlen(c_string);
421	if (len(s) == n) {
422	return memcmp(s->data_, c_string, n) == 0;
423	} else {
424	return false;
425	}
426	}
427
428	int hash(BigStr* s) {
429	return s->hash(fnv1);
430	}
431
432	int max(int a, int b) {
433	return std::max(a, b);
434	}
435
436	int min(int a, int b) {
437	return std::min(a, b);
438	}
439
440	int max(List<int>* elems) {
441	int n = len(elems);
442	if (n < 1) {
443	throw Alloc<ValueError>();
444	}
445
446	int ret = elems->at(0);
447	for (int i = 0; i < n; ++i) {
448	int cand = elems->at(i);
449	if (cand > ret) {
450	ret = cand;
451	}
452	}
453
454	return ret;
455	}