1 | #include "data_lang/j8_libc.h"
|
2 |
|
3 | #include <stdbool.h> // false
|
4 | #include <stdlib.h> // realloc
|
5 |
|
6 | #include "data_lang/j8.h" // EncodeRuneOrByte
|
7 |
|
8 | void EncodeBString(j8_buf_t in_buf, j8_buf_t* out_buf, int capacity) {
|
9 | // Compute pointers for the inner loop
|
10 | unsigned char* in = (unsigned char*)in_buf.data;
|
11 | unsigned char* in_end = in + in_buf.len;
|
12 |
|
13 | unsigned char* out = out_buf->data; // mutated
|
14 | unsigned char* out_end = out_buf->data + capacity;
|
15 | unsigned char** p_out = &out;
|
16 |
|
17 | J8_OUT('b'); // Left quote b''
|
18 | J8_OUT('\'');
|
19 |
|
20 | while (true) {
|
21 | // printf("B iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
|
22 | // out, out_end);
|
23 | J8EncodeChunk(&in, in_end, &out, out_end, true); // Fill as much as we can
|
24 | out_buf->len = out - out_buf->data; // recompute length
|
25 |
|
26 | if (in >= in_end) {
|
27 | break;
|
28 | }
|
29 |
|
30 | // Same growth policy as below
|
31 | capacity = capacity * 3 / 2;
|
32 | // printf("[2] new capacity %d\n", capacity);
|
33 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
|
34 |
|
35 | // Recompute pointers
|
36 | out = out_buf->data + out_buf->len;
|
37 | out_end = out_buf->data + capacity;
|
38 | p_out = &out;
|
39 | }
|
40 |
|
41 | J8_OUT('\'');
|
42 | out_buf->len = out - out_buf->data;
|
43 |
|
44 | J8_OUT('\0'); // NUL terminate for printf
|
45 | }
|
46 |
|
47 | // $'' escaping
|
48 | // This function is a COPY of EncodeBString() above
|
49 | void EncodeBashDollarString(j8_buf_t in_buf, j8_buf_t* out_buf, int capacity) {
|
50 | // Compute pointers for the inner loop
|
51 | unsigned char* in = (unsigned char*)in_buf.data;
|
52 | unsigned char* in_end = in + in_buf.len;
|
53 |
|
54 | unsigned char* out = out_buf->data; // mutated
|
55 | unsigned char* out_end = out_buf->data + capacity;
|
56 | unsigned char** p_out = &out;
|
57 |
|
58 | J8_OUT('$'); // Left quote b''
|
59 | J8_OUT('\'');
|
60 |
|
61 | while (true) {
|
62 | // printf("B iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
|
63 | // out, out_end);
|
64 | // Fill as much as we can
|
65 | BashDollarEncodeChunk(&in, in_end, &out, out_end);
|
66 | out_buf->len = out - out_buf->data; // recompute length
|
67 |
|
68 | if (in >= in_end) {
|
69 | break;
|
70 | }
|
71 |
|
72 | // Same growth policy as below
|
73 | capacity = capacity * 3 / 2;
|
74 | // printf("[2] new capacity %d\n", capacity);
|
75 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
|
76 |
|
77 | // Recompute pointers
|
78 | out = out_buf->data + out_buf->len;
|
79 | out_end = out_buf->data + capacity;
|
80 | p_out = &out;
|
81 | }
|
82 |
|
83 | J8_OUT('\'');
|
84 | out_buf->len = out - out_buf->data;
|
85 |
|
86 | J8_OUT('\0'); // NUL terminate for printf
|
87 | }
|
88 |
|
89 | void J8EncodeString(j8_buf_t in_buf, j8_buf_t* out_buf, int j8_fallback) {
|
90 | unsigned char* in = (unsigned char*)in_buf.data;
|
91 | unsigned char* in_end = in + in_buf.len;
|
92 |
|
93 | // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
|
94 | int capacity = in_buf.len + 3 + 2; // 3 for quotes, 2 potential \" \n
|
95 | if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE
|
96 | capacity = J8_MIN_CAPACITY;
|
97 | }
|
98 | // printf("[1] capacity %d j8_fallback %d\n", capacity, j8_fallback);
|
99 |
|
100 | out_buf->data = (unsigned char*)malloc(capacity);
|
101 | out_buf->len = 0; // starts out empty
|
102 |
|
103 | unsigned char* out = out_buf->data; // mutated
|
104 | unsigned char* out_end = out_buf->data + capacity;
|
105 | unsigned char** p_out = &out;
|
106 |
|
107 | J8_OUT('"');
|
108 |
|
109 | while (true) {
|
110 | // Fill in as much as we can
|
111 | // printf("J8 iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
|
112 | // out, out_end);
|
113 | int invalid_utf8 = J8EncodeChunk(&in, in_end, &out, out_end, false);
|
114 | if (invalid_utf8 && j8_fallback) {
|
115 | out_buf->len = 0; // rewind to begining
|
116 | // printf("out %p out_end %p capacity %d\n", out, out_end, capacity);
|
117 | EncodeBString(in_buf, out_buf, capacity); // fall back to b''
|
118 | // printf("len %d\n", out_buf->len);
|
119 | return;
|
120 | }
|
121 | out_buf->len = out - out_buf->data; // recompute length
|
122 | // printf("[1] len %d\n", out_buf->len);
|
123 |
|
124 | if (in >= in_end) {
|
125 | break;
|
126 | }
|
127 |
|
128 | // Growth policy: every time through the loop, increase 1.5x
|
129 | //
|
130 | // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
|
131 | // This seems like a reasonable tradeoff between over-allocating and too
|
132 | // many realloc().
|
133 | capacity = capacity * 3 / 2;
|
134 | // printf("[1] new capacity %d\n", capacity);
|
135 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
|
136 |
|
137 | // Recompute pointers
|
138 | out = out_buf->data + out_buf->len;
|
139 | out_end = out_buf->data + capacity;
|
140 | p_out = &out;
|
141 | // printf("[1] out %p out_end %p\n", out, out_end);
|
142 | }
|
143 |
|
144 | J8_OUT('"');
|
145 | out_buf->len = out - out_buf->data;
|
146 |
|
147 | J8_OUT('\0'); // NUL terminate for printf
|
148 | }
|
149 |
|
150 | // Start with '', but fall back on $'' for ASCII control and \'
|
151 | //
|
152 | // Depending on options, fall back to
|
153 | //
|
154 | // EncodeBashDollarString() -- $'\xff'
|
155 | // EncodeBString() -- b'\yff'
|
156 |
|
157 | // Mostly a COPY of the above
|
158 | void ShellEncodeString(j8_buf_t in_buf, j8_buf_t* out_buf, int ysh_fallback) {
|
159 | unsigned char* in = (unsigned char*)in_buf.data;
|
160 | unsigned char* in_end = in + in_buf.len;
|
161 |
|
162 | // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
|
163 | int capacity = in_buf.len + 3 + 2; // 3 for quotes, 2 potential \" \n
|
164 | if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE
|
165 | capacity = J8_MIN_CAPACITY;
|
166 | }
|
167 | // printf("[1] capacity %d j8_fallback %d\n", capacity, j8_fallback);
|
168 |
|
169 | out_buf->data = (unsigned char*)malloc(capacity);
|
170 | out_buf->len = 0; // starts out empty
|
171 |
|
172 | unsigned char* out = out_buf->data; // mutated
|
173 | unsigned char* out_end = out_buf->data + capacity;
|
174 | unsigned char** p_out = &out;
|
175 |
|
176 | J8_OUT('\'');
|
177 |
|
178 | while (true) {
|
179 | // Fill in as much as we can
|
180 | // printf("J8 iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
|
181 | // out, out_end);
|
182 | int cannot_encode = BourneShellEncodeChunk(&in, in_end, &out, out_end);
|
183 | if (cannot_encode) {
|
184 | out_buf->len = 0; // rewind to begining
|
185 | // printf("out %p out_end %p capacity %d\n", out, out_end, capacity);
|
186 | if (ysh_fallback) {
|
187 | EncodeBString(in_buf, out_buf, capacity); // fall back to b''
|
188 | } else {
|
189 | EncodeBashDollarString(in_buf, out_buf, capacity); // fall back to $''
|
190 | }
|
191 | // printf("len %d\n", out_buf->len);
|
192 | return;
|
193 | }
|
194 | out_buf->len = out - out_buf->data; // recompute length
|
195 | // printf("[1] len %d\n", out_buf->len);
|
196 |
|
197 | if (in >= in_end) {
|
198 | break;
|
199 | }
|
200 |
|
201 | // Growth policy: every time through the loop, increase 1.5x
|
202 | //
|
203 | // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
|
204 | // This seems like a reasonable tradeoff between over-allocating and too
|
205 | // many realloc().
|
206 | capacity = capacity * 3 / 2;
|
207 | // printf("[1] new capacity %d\n", capacity);
|
208 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
|
209 |
|
210 | // Recompute pointers
|
211 | out = out_buf->data + out_buf->len;
|
212 | out_end = out_buf->data + capacity;
|
213 | p_out = &out;
|
214 | // printf("[1] out %p out_end %p\n", out, out_end);
|
215 | }
|
216 |
|
217 | J8_OUT('\'');
|
218 | out_buf->len = out - out_buf->data;
|
219 |
|
220 | J8_OUT('\0'); // NUL terminate for printf
|
221 | }
|