| 1 | #include "cpp/data_lang.h"
 | 
| 2 | 
 | 
| 3 | #include <stdio.h>
 | 
| 4 | 
 | 
| 5 | #include "_gen/core/value.asdl.h"
 | 
| 6 | #include "data_lang/j8_libc.h"  // for comparison
 | 
| 7 | #include "data_lang/j8_test_lib.h"
 | 
| 8 | #include "vendor/greatest.h"
 | 
| 9 | 
 | 
| 10 | TEST PartIsUtf8_test() {
 | 
| 11 |   BigStr* s = StrFromC("hi");
 | 
| 12 | 
 | 
| 13 |   ASSERT(pyj8::PartIsUtf8(s, 0, 2));
 | 
| 14 | 
 | 
| 15 |   // empty string is trivially UTF-8
 | 
| 16 |   ASSERT(pyj8::PartIsUtf8(s, 0, 0));
 | 
| 17 | 
 | 
| 18 |   BigStr* binary = StrFromC("h\xff");
 | 
| 19 |   ASSERT(!pyj8::PartIsUtf8(binary, 0, len(binary)));
 | 
| 20 | 
 | 
| 21 |   // first byte is UTF-8
 | 
| 22 |   ASSERT(pyj8::PartIsUtf8(binary, 0, 1));
 | 
| 23 |   // second byte isn't
 | 
| 24 |   ASSERT(!pyj8::PartIsUtf8(binary, 1, 2));
 | 
| 25 | 
 | 
| 26 |   PASS();
 | 
| 27 | }
 | 
| 28 | 
 | 
| 29 | // TODO: remove duplication
 | 
| 30 | #define LOSSY_JSON (1 << 3)
 | 
| 31 | 
 | 
| 32 | TEST WriteString_test() {
 | 
| 33 |   auto buf = Alloc<mylib::BufWriter>();
 | 
| 34 | 
 | 
| 35 |   for (int i = 0; J8_TEST_CASES[i]; ++i) {
 | 
| 36 |     const char* s = J8_TEST_CASES[i];
 | 
| 37 |     BigStr* s2 = StrFromC(s);
 | 
| 38 | 
 | 
| 39 |     buf = Alloc<mylib::BufWriter>();
 | 
| 40 |     pyj8::WriteString(s2, LOSSY_JSON, buf);
 | 
| 41 | 
 | 
| 42 |     BigStr* result = buf->getvalue();
 | 
| 43 |     log("result = %s", result->data_);
 | 
| 44 | 
 | 
| 45 |     buf = Alloc<mylib::BufWriter>();
 | 
| 46 |     pyj8::WriteString(s2, 0, buf);
 | 
| 47 | 
 | 
| 48 |     result = buf->getvalue();
 | 
| 49 |     log("result = %s", result->data_);
 | 
| 50 |   }
 | 
| 51 | 
 | 
| 52 |   PASS();
 | 
| 53 | }
 | 
| 54 | 
 | 
| 55 | TEST compare_c_test() {
 | 
| 56 |   // Compare two implementations
 | 
| 57 | 
 | 
| 58 |   auto buf = Alloc<mylib::BufWriter>();
 | 
| 59 | 
 | 
| 60 |   for (int i = 0; J8_TEST_CASES[i]; ++i) {
 | 
| 61 |     const char* s = J8_TEST_CASES[i];
 | 
| 62 |     int input_len = strlen(s);
 | 
| 63 |     j8_buf_t in = {(unsigned char*)s, input_len};
 | 
| 64 | 
 | 
| 65 |     j8_buf_t c_result = {0};
 | 
| 66 |     J8EncodeString(in, &c_result, 0);
 | 
| 67 | 
 | 
| 68 |     printf("c_result %s\n", c_result.data);
 | 
| 69 |     printf("c_result.len %d\n", c_result.len);
 | 
| 70 | 
 | 
| 71 |     BigStr* s2 = StrFromC(s);
 | 
| 72 | 
 | 
| 73 |     buf = Alloc<mylib::BufWriter>();
 | 
| 74 |     pyj8::WriteString(s2, LOSSY_JSON, buf);
 | 
| 75 | 
 | 
| 76 |     BigStr* cpp_result = buf->getvalue();
 | 
| 77 | 
 | 
| 78 |     // Equal lengths
 | 
| 79 |     ASSERT_EQ_FMT(c_result.len, len(cpp_result), "%d");
 | 
| 80 |     // Equal contents
 | 
| 81 |     ASSERT(memcmp(c_result.data, cpp_result->data_, c_result.len) == 0);
 | 
| 82 | 
 | 
| 83 |     free(c_result.data);
 | 
| 84 | 
 | 
| 85 |     //
 | 
| 86 |     // Encode again with J8 fallback
 | 
| 87 |     //
 | 
| 88 | 
 | 
| 89 |     c_result = {0};
 | 
| 90 |     J8EncodeString(in, &c_result, 1);
 | 
| 91 | 
 | 
| 92 |     printf("c_result %s\n", c_result.data);
 | 
| 93 |     printf("c_result.len %d\n", c_result.len);
 | 
| 94 | 
 | 
| 95 |     buf = Alloc<mylib::BufWriter>();
 | 
| 96 |     pyj8::WriteString(s2, 0, buf);
 | 
| 97 | 
 | 
| 98 |     cpp_result = buf->getvalue();
 | 
| 99 | 
 | 
| 100 |     // Equal lengths
 | 
| 101 |     ASSERT_EQ_FMT(c_result.len, len(cpp_result), "%d");
 | 
| 102 |     // Equal contents
 | 
| 103 |     ASSERT(memcmp(c_result.data, cpp_result->data_, c_result.len) == 0);
 | 
| 104 | 
 | 
| 105 |     free(c_result.data);
 | 
| 106 | 
 | 
| 107 |     printf("\n");
 | 
| 108 |   }
 | 
| 109 | 
 | 
| 110 |   PASS();
 | 
| 111 | }
 | 
| 112 | 
 | 
| 113 | using value_asdl::value;
 | 
| 114 | using value_asdl::value_t;
 | 
| 115 | 
 | 
| 116 | TEST heap_id_test() {
 | 
| 117 |   value_t* val1 = Alloc<value::Str>(kEmptyString);
 | 
| 118 |   value_t* val2 = Alloc<value::Str>(kEmptyString);
 | 
| 119 | 
 | 
| 120 |   int id1 = j8::HeapValueId(val1);
 | 
| 121 |   int id2 = j8::HeapValueId(val2);
 | 
| 122 | 
 | 
| 123 |   log("id1 = %d, id2 = %d", id1, id2);
 | 
| 124 |   ASSERT(id1 != id2);
 | 
| 125 | 
 | 
| 126 |   PASS();
 | 
| 127 | }
 | 
| 128 | 
 | 
| 129 | TEST utf8_decode_one_test() {
 | 
| 130 | #define ASSERT_DECODE(codepoint, bytes_read, string, start)               \
 | 
| 131 |   do {                                                                    \
 | 
| 132 |     Tuple2<int, int> result = fastfunc::Utf8DecodeOne((string), (start)); \
 | 
| 133 |     ASSERT_EQ(result.at0(), (codepoint));                                 \
 | 
| 134 |     ASSERT_EQ(result.at1(), (bytes_read));                                \
 | 
| 135 |   } while (false)
 | 
| 136 | 
 | 
| 137 |   BigStr* s = StrFromC("h\xE2\xA1\x80\xC5\x81");
 | 
| 138 |   ASSERT_DECODE('h', 1, s, 0);
 | 
| 139 |   ASSERT_DECODE(0x2840, 3, s, 1);
 | 
| 140 |   ASSERT_DECODE(0x141, 2, s, 4);
 | 
| 141 | 
 | 
| 142 |   // UTF8_ERR_OVERLONG = 1
 | 
| 143 |   ASSERT_DECODE(-1, 2, StrFromC("\xC1\x81"), 0);
 | 
| 144 | 
 | 
| 145 |   // UTF8_ERR_SURROGATE = 2
 | 
| 146 |   ASSERT_DECODE(-2, 3, StrFromC("\xED\xBF\x80"), 0);
 | 
| 147 | 
 | 
| 148 |   // UTF8_ERR_TOO_LARGE = 3
 | 
| 149 |   ASSERT_DECODE(-3, 4, StrFromC("\xF4\xA0\x80\x80"), 0);
 | 
| 150 | 
 | 
| 151 |   // UTF8_ERR_BAD_ENCODING = 4
 | 
| 152 |   ASSERT_DECODE(-4, 2, StrFromC("\xC2\xFF"), 0);
 | 
| 153 | 
 | 
| 154 |   // UTF8_ERR_TRUNCATED_BYTES = 5
 | 
| 155 |   ASSERT_DECODE(-5, 1, StrFromC("\xC2"), 0);
 | 
| 156 | 
 | 
| 157 |   PASS();
 | 
| 158 | #undef ASSERT_DECODE
 | 
| 159 | }
 | 
| 160 | 
 | 
| 161 | GREATEST_MAIN_DEFS();
 | 
| 162 | 
 | 
| 163 | int main(int argc, char** argv) {
 | 
| 164 |   gHeap.Init();
 | 
| 165 | 
 | 
| 166 |   GREATEST_MAIN_BEGIN();
 | 
| 167 | 
 | 
| 168 |   RUN_TEST(PartIsUtf8_test);
 | 
| 169 |   RUN_TEST(WriteString_test);
 | 
| 170 |   RUN_TEST(compare_c_test);
 | 
| 171 |   RUN_TEST(heap_id_test);
 | 
| 172 |   RUN_TEST(utf8_decode_one_test);
 | 
| 173 | 
 | 
| 174 |   gHeap.CleanProcessExit();
 | 
| 175 | 
 | 
| 176 |   GREATEST_MAIN_END();
 | 
| 177 |   return 0;
 | 
| 178 | }
 |