| 1 | #include "data_lang/j8_libc.h"
 | 
| 2 | 
 | 
| 3 | #include "data_lang/j8.h"
 | 
| 4 | #include "data_lang/j8_test_lib.h"
 | 
| 5 | #include "vendor/greatest.h"
 | 
| 6 | 
 | 
| 7 | TEST char_int_test() {
 | 
| 8 |   const char* s = "foo\xff";
 | 
| 9 | 
 | 
| 10 |   // Python uses Py_CHARMASK() macro to avoid this problem!
 | 
| 11 | 
 | 
| 12 |   // Like this
 | 
| 13 |   //     int c = Py_CHARMASK(s[i]);
 | 
| 14 | 
 | 
| 15 |   // Python.h:
 | 
| 16 |   //     #define Py_CHARMASK(c)           ((unsigned char)((c) & 0xff))
 | 
| 17 | 
 | 
| 18 |   // For j8, let's just use unsigned char and make callers cast.
 | 
| 19 | 
 | 
| 20 |   int c = s[3];
 | 
| 21 |   printf("c = %c\n", c);
 | 
| 22 |   printf("c = %d\n", c);
 | 
| 23 | 
 | 
| 24 |   PASS();
 | 
| 25 | }
 | 
| 26 | 
 | 
| 27 | TEST j8_encode_test() {
 | 
| 28 |   for (int i = 0; J8_TEST_CASES[i]; ++i) {
 | 
| 29 |     const char* s = J8_TEST_CASES[i];
 | 
| 30 |     int input_len = strlen(s);
 | 
| 31 |     j8_buf_t in = {(unsigned char*)s, input_len};
 | 
| 32 | 
 | 
| 33 |     // printf("input '%s' %d\n", in.data, input_len);
 | 
| 34 | 
 | 
| 35 |     j8_buf_t result = {0};
 | 
| 36 |     J8EncodeString(in, &result, 0);
 | 
| 37 | 
 | 
| 38 |     printf("result %s\n", result.data);
 | 
| 39 |     printf("result.len %d\n", result.len);
 | 
| 40 | 
 | 
| 41 |     // Some sanity checks
 | 
| 42 |     int n = strlen(s);
 | 
| 43 |     switch (n) {
 | 
| 44 |     case 0:  // empty string -> ""
 | 
| 45 |       ASSERT_EQ_FMT(2, result.len, "%d");
 | 
| 46 |       break;
 | 
| 47 |     case 1: {  // x -> "x"
 | 
| 48 |       unsigned char ch = s[0];
 | 
| 49 |       if (ch < 128) {
 | 
| 50 |         ASSERT_EQ_FMT(3, result.len, "%d");
 | 
| 51 |       }
 | 
| 52 |       break;
 | 
| 53 |     }
 | 
| 54 |     default:
 | 
| 55 |       ASSERT(input_len < result.len);
 | 
| 56 |       break;
 | 
| 57 |     }
 | 
| 58 |     free(result.data);
 | 
| 59 | 
 | 
| 60 |     // Encode again with J8 fallback
 | 
| 61 |     result = {0};
 | 
| 62 |     J8EncodeString(in, &result, 1);
 | 
| 63 | 
 | 
| 64 |     printf("result %s\n", result.data);
 | 
| 65 |     printf("result.len %d\n", result.len);
 | 
| 66 |     free(result.data);
 | 
| 67 | 
 | 
| 68 |     printf("\n");
 | 
| 69 |   }
 | 
| 70 | 
 | 
| 71 |   PASS();
 | 
| 72 | }
 | 
| 73 | 
 | 
| 74 | TEST shell_encode_test() {
 | 
| 75 |   for (int i = 0; J8_TEST_CASES[i]; ++i) {
 | 
| 76 |     const char* s = J8_TEST_CASES[i];
 | 
| 77 |     int input_len = strlen(s);
 | 
| 78 |     j8_buf_t in = {(unsigned char*)s, input_len};
 | 
| 79 | 
 | 
| 80 |     // printf("input '%s' %d\n", in.data, input_len);
 | 
| 81 | 
 | 
| 82 |     j8_buf_t result = {0};
 | 
| 83 |     ShellEncodeString(in, &result, 0);
 | 
| 84 | 
 | 
| 85 |     printf("result %s\n", result.data);
 | 
| 86 |     printf("result.len %d\n", result.len);
 | 
| 87 | 
 | 
| 88 |     // Some sanity checks
 | 
| 89 |     int n = strlen(s);
 | 
| 90 |     switch (n) {
 | 
| 91 |     case 0:  // empty string -> ""
 | 
| 92 |       ASSERT_EQ_FMT(2, result.len, "%d");
 | 
| 93 |       break;
 | 
| 94 |     case 1: {  // x -> "x"
 | 
| 95 |       unsigned char ch = s[0];
 | 
| 96 |       if (ch < 128) {
 | 
| 97 |         ASSERT_EQ_FMT(3, result.len, "%d");
 | 
| 98 |       }
 | 
| 99 |     } break;
 | 
| 100 |     default:
 | 
| 101 |       ASSERT(input_len < result.len);
 | 
| 102 |       break;
 | 
| 103 |     }
 | 
| 104 |     free(result.data);
 | 
| 105 | 
 | 
| 106 |     // Encode again with J8 fallback
 | 
| 107 |     result = {0};
 | 
| 108 |     ShellEncodeString(in, &result, 1);  // YSH fallback
 | 
| 109 | 
 | 
| 110 |     printf("result %s\n", result.data);
 | 
| 111 |     printf("result.len %d\n", result.len);
 | 
| 112 |     free(result.data);
 | 
| 113 | 
 | 
| 114 |     printf("\n");
 | 
| 115 |   }
 | 
| 116 | 
 | 
| 117 |   PASS();
 | 
| 118 | }
 | 
| 119 | 
 | 
| 120 | TEST invalid_utf8_test() {
 | 
| 121 |   {
 | 
| 122 |     // Truncated, should not have \x00 on the end
 | 
| 123 |     const char* s = "\xce";
 | 
| 124 | 
 | 
| 125 |     j8_buf_t in = {(unsigned char*)s, strlen(s)};
 | 
| 126 |     j8_buf_t result = {0};
 | 
| 127 |     ShellEncodeString(in, &result, 0);
 | 
| 128 | 
 | 
| 129 |     printf("%s\n", result.data);
 | 
| 130 |     ASSERT_EQ(0, memcmp("$'\\xce'", result.data, result.len));
 | 
| 131 |     free(result.data);
 | 
| 132 | 
 | 
| 133 |     J8EncodeString(in, &result, 1);
 | 
| 134 |     printf("%s\n", result.data);
 | 
| 135 |     ASSERT_EQ(0, memcmp("b'\\yce'", result.data, result.len));
 | 
| 136 |     free(result.data);
 | 
| 137 |   }
 | 
| 138 | 
 | 
| 139 |   {
 | 
| 140 |     // \U0001f926 with bad byte at the end
 | 
| 141 |     const char* s = "\xf0\x9f\xa4\xff";
 | 
| 142 | 
 | 
| 143 |     j8_buf_t in = {(unsigned char*)s, strlen(s)};
 | 
| 144 |     j8_buf_t result = {0};
 | 
| 145 |     ShellEncodeString(in, &result, 0);
 | 
| 146 | 
 | 
| 147 |     printf("%s\n", result.data);
 | 
| 148 |     ASSERT_EQ(0, memcmp("$'\\xf0\\x9f\\xa4\\xff'", result.data, result.len));
 | 
| 149 |     free(result.data);
 | 
| 150 | 
 | 
| 151 |     J8EncodeString(in, &result, 1);
 | 
| 152 |     printf("%s\n", result.data);
 | 
| 153 |     ASSERT_EQ(0, memcmp("b'\\yf0\\y9f\\ya4\\yff'", result.data, result.len));
 | 
| 154 |     free(result.data);
 | 
| 155 |   }
 | 
| 156 | 
 | 
| 157 |   PASS();
 | 
| 158 | }
 | 
| 159 | 
 | 
| 160 | TEST all_bytes_test() {
 | 
| 161 |   char s[2];
 | 
| 162 |   s[1] = '\0';
 | 
| 163 |   for (int i = 0; i < 256; ++i) {
 | 
| 164 |     s[0] = i;
 | 
| 165 | 
 | 
| 166 |     j8_buf_t in = {(unsigned char*)s, 1};
 | 
| 167 |     j8_buf_t result = {0};
 | 
| 168 |     ShellEncodeString(in, &result, 0);
 | 
| 169 | 
 | 
| 170 |     printf("i %d -> %s\n", i, result.data);
 | 
| 171 |     free(result.data);
 | 
| 172 | 
 | 
| 173 |     J8EncodeString(in, &result, 1);
 | 
| 174 |     // printf("i %d -> %s\n", i, result.data);
 | 
| 175 |     free(result.data);
 | 
| 176 |   }
 | 
| 177 | 
 | 
| 178 |   PASS();
 | 
| 179 | }
 | 
| 180 | 
 | 
| 181 | TEST can_omit_quotes_test() {
 | 
| 182 |   const char* s = "foo";
 | 
| 183 |   ASSERT(CanOmitQuotes((unsigned char*)s, strlen(s)));
 | 
| 184 | 
 | 
| 185 |   s = "foo bar";
 | 
| 186 |   ASSERT(!CanOmitQuotes((unsigned char*)s, strlen(s)));
 | 
| 187 | 
 | 
| 188 |   s = "my-dir/my_file.cc";
 | 
| 189 |   ASSERT(CanOmitQuotes((unsigned char*)s, strlen(s)));
 | 
| 190 | 
 | 
| 191 |   PASS();
 | 
| 192 | }
 | 
| 193 | 
 | 
| 194 | GREATEST_MAIN_DEFS();
 | 
| 195 | 
 | 
| 196 | int main(int argc, char** argv) {
 | 
| 197 |   GREATEST_MAIN_BEGIN();
 | 
| 198 | 
 | 
| 199 |   RUN_TEST(j8_encode_test);
 | 
| 200 |   RUN_TEST(shell_encode_test);
 | 
| 201 |   RUN_TEST(invalid_utf8_test);
 | 
| 202 |   RUN_TEST(all_bytes_test);
 | 
| 203 |   RUN_TEST(char_int_test);
 | 
| 204 |   RUN_TEST(can_omit_quotes_test);
 | 
| 205 | 
 | 
| 206 |   GREATEST_MAIN_END();
 | 
| 207 |   return 0;
 | 
| 208 | }
 |