OILS / cpp / data_lang_test.cc View on Github | oilshell.org

178 lines, 99 significant
1#include "cpp/data_lang.h"
2
3#include <stdio.h>
4
5#include "_gen/core/value.asdl.h"
6#include "data_lang/j8_libc.h" // for comparison
7#include "data_lang/j8_test_lib.h"
8#include "vendor/greatest.h"
9
10TEST PartIsUtf8_test() {
11 BigStr* s = StrFromC("hi");
12
13 ASSERT(pyj8::PartIsUtf8(s, 0, 2));
14
15 // empty string is trivially UTF-8
16 ASSERT(pyj8::PartIsUtf8(s, 0, 0));
17
18 BigStr* binary = StrFromC("h\xff");
19 ASSERT(!pyj8::PartIsUtf8(binary, 0, len(binary)));
20
21 // first byte is UTF-8
22 ASSERT(pyj8::PartIsUtf8(binary, 0, 1));
23 // second byte isn't
24 ASSERT(!pyj8::PartIsUtf8(binary, 1, 2));
25
26 PASS();
27}
28
29// TODO: remove duplication
30#define LOSSY_JSON (1 << 3)
31
32TEST WriteString_test() {
33 auto buf = Alloc<mylib::BufWriter>();
34
35 for (int i = 0; J8_TEST_CASES[i]; ++i) {
36 const char* s = J8_TEST_CASES[i];
37 BigStr* s2 = StrFromC(s);
38
39 buf = Alloc<mylib::BufWriter>();
40 pyj8::WriteString(s2, LOSSY_JSON, buf);
41
42 BigStr* result = buf->getvalue();
43 log("result = %s", result->data_);
44
45 buf = Alloc<mylib::BufWriter>();
46 pyj8::WriteString(s2, 0, buf);
47
48 result = buf->getvalue();
49 log("result = %s", result->data_);
50 }
51
52 PASS();
53}
54
55TEST compare_c_test() {
56 // Compare two implementations
57
58 auto buf = Alloc<mylib::BufWriter>();
59
60 for (int i = 0; J8_TEST_CASES[i]; ++i) {
61 const char* s = J8_TEST_CASES[i];
62 int input_len = strlen(s);
63 j8_buf_t in = {(unsigned char*)s, input_len};
64
65 j8_buf_t c_result = {0};
66 J8EncodeString(in, &c_result, 0);
67
68 printf("c_result %s\n", c_result.data);
69 printf("c_result.len %d\n", c_result.len);
70
71 BigStr* s2 = StrFromC(s);
72
73 buf = Alloc<mylib::BufWriter>();
74 pyj8::WriteString(s2, LOSSY_JSON, buf);
75
76 BigStr* cpp_result = buf->getvalue();
77
78 // Equal lengths
79 ASSERT_EQ_FMT(c_result.len, len(cpp_result), "%d");
80 // Equal contents
81 ASSERT(memcmp(c_result.data, cpp_result->data_, c_result.len) == 0);
82
83 free(c_result.data);
84
85 //
86 // Encode again with J8 fallback
87 //
88
89 c_result = {0};
90 J8EncodeString(in, &c_result, 1);
91
92 printf("c_result %s\n", c_result.data);
93 printf("c_result.len %d\n", c_result.len);
94
95 buf = Alloc<mylib::BufWriter>();
96 pyj8::WriteString(s2, 0, buf);
97
98 cpp_result = buf->getvalue();
99
100 // Equal lengths
101 ASSERT_EQ_FMT(c_result.len, len(cpp_result), "%d");
102 // Equal contents
103 ASSERT(memcmp(c_result.data, cpp_result->data_, c_result.len) == 0);
104
105 free(c_result.data);
106
107 printf("\n");
108 }
109
110 PASS();
111}
112
113using value_asdl::value;
114using value_asdl::value_t;
115
116TEST heap_id_test() {
117 value_t* val1 = Alloc<value::Str>(kEmptyString);
118 value_t* val2 = Alloc<value::Str>(kEmptyString);
119
120 int id1 = j8::HeapValueId(val1);
121 int id2 = j8::HeapValueId(val2);
122
123 log("id1 = %d, id2 = %d", id1, id2);
124 ASSERT(id1 != id2);
125
126 PASS();
127}
128
129TEST utf8_decode_one_test() {
130#define ASSERT_DECODE(codepoint, bytes_read, string, start) \
131 do { \
132 Tuple2<int, int> result = fastfunc::Utf8DecodeOne((string), (start)); \
133 ASSERT_EQ(result.at0(), (codepoint)); \
134 ASSERT_EQ(result.at1(), (bytes_read)); \
135 } while (false)
136
137 BigStr* s = StrFromC("h\xE2\xA1\x80\xC5\x81");
138 ASSERT_DECODE('h', 1, s, 0);
139 ASSERT_DECODE(0x2840, 3, s, 1);
140 ASSERT_DECODE(0x141, 2, s, 4);
141
142 // UTF8_ERR_OVERLONG = 1
143 ASSERT_DECODE(-1, 2, StrFromC("\xC1\x81"), 0);
144
145 // UTF8_ERR_SURROGATE = 2
146 ASSERT_DECODE(-2, 3, StrFromC("\xED\xBF\x80"), 0);
147
148 // UTF8_ERR_TOO_LARGE = 3
149 ASSERT_DECODE(-3, 4, StrFromC("\xF4\xA0\x80\x80"), 0);
150
151 // UTF8_ERR_BAD_ENCODING = 4
152 ASSERT_DECODE(-4, 2, StrFromC("\xC2\xFF"), 0);
153
154 // UTF8_ERR_TRUNCATED_BYTES = 5
155 ASSERT_DECODE(-5, 1, StrFromC("\xC2"), 0);
156
157 PASS();
158#undef ASSERT_DECODE
159}
160
161GREATEST_MAIN_DEFS();
162
163int main(int argc, char** argv) {
164 gHeap.Init();
165
166 GREATEST_MAIN_BEGIN();
167
168 RUN_TEST(PartIsUtf8_test);
169 RUN_TEST(WriteString_test);
170 RUN_TEST(compare_c_test);
171 RUN_TEST(heap_id_test);
172 RUN_TEST(utf8_decode_one_test);
173
174 gHeap.CleanProcessExit();
175
176 GREATEST_MAIN_END();
177 return 0;
178}