mycpp/gc_builtins.cc

OILS / mycpp / gc_builtins.cc View on Github | oilshell.org

461 lines, 247 significant

1	#include <errno.h> // errno
2	#include <float.h> // DBL_MIN, DBL_MAX
3	#include <math.h> // INFINITY
4	#include <stdio.h> // required for readline/readline.h (man readline)
5
6	#include "_build/detected-cpp-config.h"
7	#include "mycpp/runtime.h"
8	#ifdef HAVE_READLINE
9	#include "cpp/frontend_pyreadline.h"
10	#endif
11
12	// Translation of Python's print().
13	void print(BigStr* s) {
14	fputs(s->data_, stdout); // print until first NUL
15	fputc('\n', stdout);
16	}
17
18	BigStr* str(int i) {
19	BigStr* s = OverAllocatedStr(kIntBufSize);
20	int length = snprintf(s->data(), kIntBufSize, "%d", i);
21	s->MaybeShrink(length);
22	return s;
23	}
24
25	BigStr* str(double d) {
26	char buf[64]; // overestimate, but we use snprintf() to be safe
27
28	int n = sizeof(buf) - 2; // in case we add '.0'
29
30	// The round tripping test in mycpp/float_test.cc tells us:
31	// %.9g - FLOAT round trip
32	// %.17g - DOUBLE round trip
33	// But this causes problems in practice, e.g. for 3.14, or 1/3
34	// int length = snprintf(buf, n, "%.17g", d);
35
36	// So use 1 less digit, which happens to match Python 3 and node.js (but not
37	// Python 2)
38	int length = snprintf(buf, n, "%.16g", d);
39
40	// TODO: This may depend on LC_NUMERIC locale!
41
42	// We may return the strings:
43	// inf -inf nan
44	// But this shouldn't come up much, because Python code changes it to:
45	// INFINITY -INFINITY NAN
46	if (strchr(buf, 'i') \|\| strchr(buf, 'n')) {
47	return StrFromC(buf); // don't add .0
48	}
49
50	// Problem:
51	// %f prints 3.0000000 and 3.500000
52	// %g prints 3 and 3.5
53	//
54	// We want 3.0 and 3.5, so add '.0' in some cases
55	if (!strchr(buf, '.')) { // 12345 -> 12345.0
56	buf[length] = '.';
57	buf[length + 1] = '0';
58	buf[length + 2] = '\0';
59	}
60
61	return StrFromC(buf);
62	}
63	// %a is a hexfloat form, probably don't need that
64	// int length = snprintf(buf, n, "%a", d);
65
66	// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
67	// better for getting values out of Token.line without allocating?
68	//
69	// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
70	//
71	// Also for SmallStr, we don't care about interning. Only for HeapStr.
72
73	BigStr* intern(BigStr* s) {
74	// TODO: put in table gHeap.interned_
75	return s;
76	}
77
78	// Print quoted string. Called by StrFormat('%r').
79	// TODO: consider using J8 notation instead, since error messages show that
80	// string.
81	BigStr* repr(BigStr* s) {
82	// Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
83	int n = len(s);
84	int upper_bound = n * 4 + 2;
85
86	BigStr* result = OverAllocatedStr(upper_bound);
87
88	// Single quote by default.
89	char quote = '\'';
90	if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
91	quote = '"';
92	}
93	char* p = result->data_;
94
95	// From PyString_Repr()
96	*p++ = quote;
97	for (int i = 0; i < n; ++i) {
98	unsigned char c = static_cast<unsigned char>(s->data_[i]);
99	if (c == quote \|\| c == '\\') {
100	*p++ = '\\';
101	*p++ = c;
102	} else if (c == '\t') {
103	*p++ = '\\';
104	*p++ = 't';
105	} else if (c == '\n') {
106	*p++ = '\\';
107	*p++ = 'n';
108	} else if (c == '\r') {
109	*p++ = '\\';
110	*p++ = 'r';
111	} else if (0x20 <= c && c < 0x80) {
112	*p++ = c;
113	} else {
114	// Unprintable becomes \xff.
115	// TODO: Consider \yff. This is similar to J8 strings, but we don't
116	// decode UTF-8.
117	sprintf(p, "\\x%02x", c & 0xff);
118	p += 4;
119	}
120	}
121	*p++ = quote;
122	*p = '\0';
123
124	int length = p - result->data_;
125	result->MaybeShrink(length);
126	return result;
127	}
128
129	// Helper functions that don't use exceptions.
130
131	bool StringToInt(const char* s, int length, int base, int* result) {
132	if (length == 0) {
133	return false; // empty string isn't a valid integer
134	}
135
136	// Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
137	// sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
138	// static_assert(sizeof(long) == 8);
139
140	char* pos; // mutated by strtol
141
142	errno = 0;
143	long v = strtol(s, &pos, base);
144
145	if (errno == ERANGE) {
146	switch (v) {
147	case LONG_MIN:
148	return false; // underflow of long, which may be 64 bits
149	case LONG_MAX:
150	return false; // overflow of long
151	}
152	}
153
154	// It should ALSO fit in an int, not just a long
155	if (v > INT_MAX) {
156	return false;
157	}
158	if (v < INT_MIN) {
159	return false;
160	}
161
162	const char* end = s + length;
163	if (pos == end) {
164	*result = v;
165	return true; // strtol() consumed ALL characters.
166	}
167
168	while (pos < end) {
169	if (!IsAsciiWhitespace(*pos)) {
170	return false; // Trailing non-space
171	}
172	pos++;
173	}
174
175	*result = v;
176	return true; // Trailing space is OK
177	}
178
179	bool StringToInt64(const char* s, int length, int base, int64_t* result) {
180	if (length == 0) {
181	return false; // empty string isn't a valid integer
182	}
183
184	// These should be the same type
185	static_assert(sizeof(long long) == sizeof(int64_t));
186
187	char* pos; // mutated by strtol
188
189	errno = 0;
190	long long v = strtoll(s, &pos, base);
191
192	if (errno == ERANGE) {
193	switch (v) {
194	case LLONG_MIN:
195	return false; // underflow
196	case LLONG_MAX:
197	return false; // overflow
198	}
199	}
200
201	const char* end = s + length;
202	if (pos == end) {
203	*result = v;
204	return true; // strtol() consumed ALL characters.
205	}
206
207	while (pos < end) {
208	if (!IsAsciiWhitespace(*pos)) {
209	return false; // Trailing non-space
210	}
211	pos++;
212	}
213
214	*result = v;
215	return true; // Trailing space is OK
216	}
217
218	int to_int(BigStr* s, int base) {
219	int i;
220	if (StringToInt(s->data_, len(s), base, &i)) {
221	return i; // truncated to int
222	} else {
223	throw Alloc<ValueError>();
224	}
225	}
226
227	BigStr* chr(int i) {
228	// NOTE: i should be less than 256, in which we could return an object from
229	// GLOBAL_STR() pool, like StrIter
230	auto result = NewStr(1);
231	result->data_[0] = i;
232	return result;
233	}
234
235	int ord(BigStr* s) {
236	assert(len(s) == 1);
237	// signed to unsigned conversion, so we don't get values like -127
238	uint8_t c = static_cast<uint8_t>(s->data_[0]);
239	return c;
240	}
241
242	bool to_bool(BigStr* s) {
243	return len(s) != 0;
244	}
245
246	double to_float(int i) {
247	return static_cast<double>(i);
248	}
249
250	double to_float(BigStr* s) {
251	char* begin = s->data_;
252	char* end = begin + len(s);
253
254	errno = 0;
255	double result = strtod(begin, &end);
256
257	if (errno == ERANGE) { // error: overflow or underflow
258	if (result >= HUGE_VAL) {
259	return INFINITY;
260	} else if (result <= -HUGE_VAL) {
261	return -INFINITY;
262	} else if (-DBL_MIN <= result && result <= DBL_MIN) {
263	return 0.0;
264	} else {
265	FAIL("Invalid value after ERANGE");
266	}
267	}
268	if (end == begin) { // error: not a floating point number
269	throw Alloc<ValueError>();
270	}
271
272	return result;
273	}
274
275	// e.g. ('a' in 'abc')
276	bool str_contains(BigStr* haystack, BigStr* needle) {
277	// Common case
278	if (len(needle) == 1) {
279	return memchr(haystack->data_, needle->data_[0], len(haystack));
280	}
281
282	if (len(needle) > len(haystack)) {
283	return false;
284	}
285
286	// General case. TODO: We could use a smarter substring algorithm.
287
288	const char* end = haystack->data_ + len(haystack);
289	const char* last_possible = end - len(needle);
290	const char* p = haystack->data_;
291
292	while (p <= last_possible) {
293	if (memcmp(p, needle->data_, len(needle)) == 0) {
294	return true;
295	}
296	p++;
297	}
298	return false;
299	}
300
301	BigStr* str_repeat(BigStr* s, int times) {
302	// Python allows -1 too, and Oil used that
303	if (times <= 0) {
304	return kEmptyString;
305	}
306	int len_ = len(s);
307	int new_len = len_ * times;
308	BigStr* result = NewStr(new_len);
309
310	char* dest = result->data_;
311	for (int i = 0; i < times; i++) {
312	memcpy(dest, s->data_, len_);
313	dest += len_;
314	}
315	return result;
316	}
317
318	// for os_path.join()
319	// NOTE(Jesse): Perfect candidate for BoundedBuffer
320	BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
321	int a_len = len(a);
322	int b_len = len(b);
323	int c_len = len(c);
324
325	int new_len = a_len + b_len + c_len;
326	BigStr* result = NewStr(new_len);
327	char* pos = result->data_;
328
329	memcpy(pos, a->data_, a_len);
330	pos += a_len;
331
332	memcpy(pos, b->data_, b_len);
333	pos += b_len;
334
335	memcpy(pos, c->data_, c_len);
336
337	assert(pos + c_len == result->data_ + new_len);
338
339	return result;
340	}
341
342	BigStr* str_concat(BigStr* a, BigStr* b) {
343	int a_len = len(a);
344	int b_len = len(b);
345	int new_len = a_len + b_len;
346	BigStr* result = NewStr(new_len);
347	char* buf = result->data_;
348
349	memcpy(buf, a->data_, a_len);
350	memcpy(buf + a_len, b->data_, b_len);
351
352	return result;
353	}
354
355	//
356	// Comparators
357	//
358
359	bool str_equals(BigStr* left, BigStr* right) {
360	// Fast path for identical strings. String deduplication during GC could
361	// make this more likely. String interning could guarantee it, allowing us
362	// to remove memcmp().
363	if (left == right) {
364	return true;
365	}
366
367	// TODO: It would be nice to remove this condition, but I think we need MyPy
368	// strict None checking for it
369	if (left == nullptr \|\| right == nullptr) {
370	return false;
371	}
372
373	if (left->len_ != right->len_) {
374	return false;
375	}
376
377	return memcmp(left->data_, right->data_, left->len_) == 0;
378	}
379
380	bool maybe_str_equals(BigStr* left, BigStr* right) {
381	if (left && right) {
382	return str_equals(left, right);
383	}
384
385	if (!left && !right) {
386	return true; // None == None
387	}
388
389	return false; // one is None and one is a BigStr*
390	}
391
392	bool items_equal(BigStr* left, BigStr* right) {
393	return str_equals(left, right);
394	}
395
396	bool keys_equal(BigStr* left, BigStr* right) {
397	return items_equal(left, right);
398	}
399
400	bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
401	return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
402	}
403
404	bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
405	return items_equal(t1, t2);
406	}
407
408	bool items_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
409	return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
410	}
411
412	bool keys_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
413	return items_equal(t1, t2);
414	}
415
416	bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
417	// Needs SmallStr change
418	if (len(s) == c_len) {
419	return memcmp(s->data_, c_string, c_len) == 0;
420	} else {
421	return false;
422	}
423	}
424
425	bool str_equals0(const char* c_string, BigStr* s) {
426	int n = strlen(c_string);
427	if (len(s) == n) {
428	return memcmp(s->data_, c_string, n) == 0;
429	} else {
430	return false;
431	}
432	}
433
434	int hash(BigStr* s) {
435	return s->hash(fnv1);
436	}
437
438	int max(int a, int b) {
439	return std::max(a, b);
440	}
441
442	int min(int a, int b) {
443	return std::min(a, b);
444	}
445
446	int max(List<int>* elems) {
447	int n = len(elems);
448	if (n < 1) {
449	throw Alloc<ValueError>();
450	}
451
452	int ret = elems->at(0);
453	for (int i = 0; i < n; ++i) {
454	int cand = elems->at(i);
455	if (cand > ret) {
456	ret = cand;
457	}
458	}
459
460	return ret;
461	}