OILS / mycpp / gc_builtins.cc View on Github | oilshell.org

455 lines, 247 significant
1#include <errno.h> // errno
2#include <float.h> // DBL_MIN, DBL_MAX
3#include <math.h> // INFINITY
4#include <stdio.h> // required for readline/readline.h (man readline)
5
6#include "_build/detected-cpp-config.h"
7#include "mycpp/runtime.h"
8#ifdef HAVE_READLINE
9 #include "cpp/frontend_pyreadline.h"
10#endif
11
12// Translation of Python's print().
13void print(BigStr* s) {
14 fputs(s->data_, stdout); // print until first NUL
15 fputc('\n', stdout);
16}
17
18BigStr* str(int i) {
19 BigStr* s = OverAllocatedStr(kIntBufSize);
20 int length = snprintf(s->data(), kIntBufSize, "%d", i);
21 s->MaybeShrink(length);
22 return s;
23}
24
25BigStr* str(double d) {
26 char buf[64]; // overestimate, but we use snprintf() to be safe
27
28 int n = sizeof(buf) - 2; // in case we add '.0'
29
30 // See mycpp/float_test.cc for round-tripping test
31 // %.9g - FLOAT round trip
32 // %.17g - DOUBLE round trip
33 //
34 // https://stackoverflow.com/a/21162120
35 // https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10
36
37 int length = snprintf(buf, n, "%.17g", d);
38 // TODO: This may depend on LC_NUMERIC locale!
39
40 if (strchr(buf, 'i') || strchr(buf, 'n')) { // inf, -inf, nan
41 return StrFromC(buf);
42 }
43
44 // Problem:
45 // %f prints 3.0000000 and 3.500000
46 // %g prints 3 and 3.5
47 //
48 // We want 3.0 and 3.5, so add '.0' in some cases
49 if (!strchr(buf, '.')) { // 12345 -> 12345.0
50 buf[length] = '.';
51 buf[length + 1] = '0';
52 buf[length + 2] = '\0';
53 }
54
55 return StrFromC(buf);
56}
57// %a is a hexfloat form, probably don't need that
58// int length = snprintf(buf, n, "%a", d);
59
60// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
61// better for getting values out of Token.line without allocating?
62//
63// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
64//
65// Also for SmallStr, we don't care about interning. Only for HeapStr.
66
67BigStr* intern(BigStr* s) {
68 // TODO: put in table gHeap.interned_
69 return s;
70}
71
72// Print quoted string. Called by StrFormat('%r').
73// TODO: consider using J8 notation instead, since error messages show that
74// string.
75BigStr* repr(BigStr* s) {
76 // Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
77 int n = len(s);
78 int upper_bound = n * 4 + 2;
79
80 BigStr* result = OverAllocatedStr(upper_bound);
81
82 // Single quote by default.
83 char quote = '\'';
84 if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
85 quote = '"';
86 }
87 char* p = result->data_;
88
89 // From PyString_Repr()
90 *p++ = quote;
91 for (int i = 0; i < n; ++i) {
92 unsigned char c = static_cast<unsigned char>(s->data_[i]);
93 if (c == quote || c == '\\') {
94 *p++ = '\\';
95 *p++ = c;
96 } else if (c == '\t') {
97 *p++ = '\\';
98 *p++ = 't';
99 } else if (c == '\n') {
100 *p++ = '\\';
101 *p++ = 'n';
102 } else if (c == '\r') {
103 *p++ = '\\';
104 *p++ = 'r';
105 } else if (0x20 <= c && c < 0x80) {
106 *p++ = c;
107 } else {
108 // Unprintable becomes \xff.
109 // TODO: Consider \yff. This is similar to J8 strings, but we don't
110 // decode UTF-8.
111 sprintf(p, "\\x%02x", c & 0xff);
112 p += 4;
113 }
114 }
115 *p++ = quote;
116 *p = '\0';
117
118 int length = p - result->data_;
119 result->MaybeShrink(length);
120 return result;
121}
122
123// Helper functions that don't use exceptions.
124
125bool StringToInt(const char* s, int length, int base, int* result) {
126 if (length == 0) {
127 return false; // empty string isn't a valid integer
128 }
129
130 // Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
131 // sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
132 // static_assert(sizeof(long) == 8);
133
134 char* pos; // mutated by strtol
135
136 errno = 0;
137 long v = strtol(s, &pos, base);
138
139 if (errno == ERANGE) {
140 switch (v) {
141 case LONG_MIN:
142 return false; // underflow of long, which may be 64 bits
143 case LONG_MAX:
144 return false; // overflow of long
145 }
146 }
147
148 // It should ALSO fit in an int, not just a long
149 if (v > INT_MAX) {
150 return false;
151 }
152 if (v < INT_MIN) {
153 return false;
154 }
155
156 const char* end = s + length;
157 if (pos == end) {
158 *result = v;
159 return true; // strtol() consumed ALL characters.
160 }
161
162 while (pos < end) {
163 if (!IsAsciiWhitespace(*pos)) {
164 return false; // Trailing non-space
165 }
166 pos++;
167 }
168
169 *result = v;
170 return true; // Trailing space is OK
171}
172
173bool StringToInt64(const char* s, int length, int base, int64_t* result) {
174 if (length == 0) {
175 return false; // empty string isn't a valid integer
176 }
177
178 // These should be the same type
179 static_assert(sizeof(long long) == sizeof(int64_t));
180
181 char* pos; // mutated by strtol
182
183 errno = 0;
184 long long v = strtoll(s, &pos, base);
185
186 if (errno == ERANGE) {
187 switch (v) {
188 case LLONG_MIN:
189 return false; // underflow
190 case LLONG_MAX:
191 return false; // overflow
192 }
193 }
194
195 const char* end = s + length;
196 if (pos == end) {
197 *result = v;
198 return true; // strtol() consumed ALL characters.
199 }
200
201 while (pos < end) {
202 if (!IsAsciiWhitespace(*pos)) {
203 return false; // Trailing non-space
204 }
205 pos++;
206 }
207
208 *result = v;
209 return true; // Trailing space is OK
210}
211
212int to_int(BigStr* s, int base) {
213 int i;
214 if (StringToInt(s->data_, len(s), base, &i)) {
215 return i; // truncated to int
216 } else {
217 throw Alloc<ValueError>();
218 }
219}
220
221BigStr* chr(int i) {
222 // NOTE: i should be less than 256, in which we could return an object from
223 // GLOBAL_STR() pool, like StrIter
224 auto result = NewStr(1);
225 result->data_[0] = i;
226 return result;
227}
228
229int ord(BigStr* s) {
230 assert(len(s) == 1);
231 // signed to unsigned conversion, so we don't get values like -127
232 uint8_t c = static_cast<uint8_t>(s->data_[0]);
233 return c;
234}
235
236bool to_bool(BigStr* s) {
237 return len(s) != 0;
238}
239
240double to_float(int i) {
241 return static_cast<double>(i);
242}
243
244double to_float(BigStr* s) {
245 char* begin = s->data_;
246 char* end = begin + len(s);
247
248 errno = 0;
249 double result = strtod(begin, &end);
250
251 if (errno == ERANGE) { // error: overflow or underflow
252 if (result >= HUGE_VAL) {
253 return INFINITY;
254 } else if (result <= -HUGE_VAL) {
255 return -INFINITY;
256 } else if (-DBL_MIN <= result && result <= DBL_MIN) {
257 return 0.0;
258 } else {
259 FAIL("Invalid value after ERANGE");
260 }
261 }
262 if (end == begin) { // error: not a floating point number
263 throw Alloc<ValueError>();
264 }
265
266 return result;
267}
268
269// e.g. ('a' in 'abc')
270bool str_contains(BigStr* haystack, BigStr* needle) {
271 // Common case
272 if (len(needle) == 1) {
273 return memchr(haystack->data_, needle->data_[0], len(haystack));
274 }
275
276 if (len(needle) > len(haystack)) {
277 return false;
278 }
279
280 // General case. TODO: We could use a smarter substring algorithm.
281
282 const char* end = haystack->data_ + len(haystack);
283 const char* last_possible = end - len(needle);
284 const char* p = haystack->data_;
285
286 while (p <= last_possible) {
287 if (memcmp(p, needle->data_, len(needle)) == 0) {
288 return true;
289 }
290 p++;
291 }
292 return false;
293}
294
295BigStr* str_repeat(BigStr* s, int times) {
296 // Python allows -1 too, and Oil used that
297 if (times <= 0) {
298 return kEmptyString;
299 }
300 int len_ = len(s);
301 int new_len = len_ * times;
302 BigStr* result = NewStr(new_len);
303
304 char* dest = result->data_;
305 for (int i = 0; i < times; i++) {
306 memcpy(dest, s->data_, len_);
307 dest += len_;
308 }
309 return result;
310}
311
312// for os_path.join()
313// NOTE(Jesse): Perfect candidate for BoundedBuffer
314BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
315 int a_len = len(a);
316 int b_len = len(b);
317 int c_len = len(c);
318
319 int new_len = a_len + b_len + c_len;
320 BigStr* result = NewStr(new_len);
321 char* pos = result->data_;
322
323 memcpy(pos, a->data_, a_len);
324 pos += a_len;
325
326 memcpy(pos, b->data_, b_len);
327 pos += b_len;
328
329 memcpy(pos, c->data_, c_len);
330
331 assert(pos + c_len == result->data_ + new_len);
332
333 return result;
334}
335
336BigStr* str_concat(BigStr* a, BigStr* b) {
337 int a_len = len(a);
338 int b_len = len(b);
339 int new_len = a_len + b_len;
340 BigStr* result = NewStr(new_len);
341 char* buf = result->data_;
342
343 memcpy(buf, a->data_, a_len);
344 memcpy(buf + a_len, b->data_, b_len);
345
346 return result;
347}
348
349//
350// Comparators
351//
352
353bool str_equals(BigStr* left, BigStr* right) {
354 // Fast path for identical strings. String deduplication during GC could
355 // make this more likely. String interning could guarantee it, allowing us
356 // to remove memcmp().
357 if (left == right) {
358 return true;
359 }
360
361 // TODO: It would be nice to remove this condition, but I think we need MyPy
362 // strict None checking for it
363 if (left == nullptr || right == nullptr) {
364 return false;
365 }
366
367 if (left->len_ != right->len_) {
368 return false;
369 }
370
371 return memcmp(left->data_, right->data_, left->len_) == 0;
372}
373
374bool maybe_str_equals(BigStr* left, BigStr* right) {
375 if (left && right) {
376 return str_equals(left, right);
377 }
378
379 if (!left && !right) {
380 return true; // None == None
381 }
382
383 return false; // one is None and one is a BigStr*
384}
385
386bool items_equal(BigStr* left, BigStr* right) {
387 return str_equals(left, right);
388}
389
390bool keys_equal(BigStr* left, BigStr* right) {
391 return items_equal(left, right);
392}
393
394bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
395 return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
396}
397
398bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
399 return items_equal(t1, t2);
400}
401
402bool items_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
403 return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
404}
405
406bool keys_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
407 return items_equal(t1, t2);
408}
409
410bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
411 // Needs SmallStr change
412 if (len(s) == c_len) {
413 return memcmp(s->data_, c_string, c_len) == 0;
414 } else {
415 return false;
416 }
417}
418
419bool str_equals0(const char* c_string, BigStr* s) {
420 int n = strlen(c_string);
421 if (len(s) == n) {
422 return memcmp(s->data_, c_string, n) == 0;
423 } else {
424 return false;
425 }
426}
427
428int hash(BigStr* s) {
429 return s->hash(fnv1);
430}
431
432int max(int a, int b) {
433 return std::max(a, b);
434}
435
436int min(int a, int b) {
437 return std::min(a, b);
438}
439
440int max(List<int>* elems) {
441 int n = len(elems);
442 if (n < 1) {
443 throw Alloc<ValueError>();
444 }
445
446 int ret = elems->at(0);
447 for (int i = 0; i < n; ++i) {
448 int cand = elems->at(i);
449 if (cand > ret) {
450 ret = cand;
451 }
452 }
453
454 return ret;
455}