OILS / data_lang / j8_libc.c View on Github | oilshell.org

221 lines, 107 significant
1#include "data_lang/j8_libc.h"
2
3#include <stdbool.h> // false
4#include <stdlib.h> // realloc
5
6#include "data_lang/j8.h" // EncodeRuneOrByte
7
8void EncodeBString(j8_buf_t in_buf, j8_buf_t* out_buf, int capacity) {
9 // Compute pointers for the inner loop
10 unsigned char* in = (unsigned char*)in_buf.data;
11 unsigned char* in_end = in + in_buf.len;
12
13 unsigned char* out = out_buf->data; // mutated
14 unsigned char* out_end = out_buf->data + capacity;
15 unsigned char** p_out = &out;
16
17 J8_OUT('b'); // Left quote b''
18 J8_OUT('\'');
19
20 while (true) {
21 // printf("B iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
22 // out, out_end);
23 J8EncodeChunk(&in, in_end, &out, out_end, true); // Fill as much as we can
24 out_buf->len = out - out_buf->data; // recompute length
25
26 if (in >= in_end) {
27 break;
28 }
29
30 // Same growth policy as below
31 capacity = capacity * 3 / 2;
32 // printf("[2] new capacity %d\n", capacity);
33 out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
34
35 // Recompute pointers
36 out = out_buf->data + out_buf->len;
37 out_end = out_buf->data + capacity;
38 p_out = &out;
39 }
40
41 J8_OUT('\'');
42 out_buf->len = out - out_buf->data;
43
44 J8_OUT('\0'); // NUL terminate for printf
45}
46
47// $'' escaping
48// This function is a COPY of EncodeBString() above
49void EncodeBashDollarString(j8_buf_t in_buf, j8_buf_t* out_buf, int capacity) {
50 // Compute pointers for the inner loop
51 unsigned char* in = (unsigned char*)in_buf.data;
52 unsigned char* in_end = in + in_buf.len;
53
54 unsigned char* out = out_buf->data; // mutated
55 unsigned char* out_end = out_buf->data + capacity;
56 unsigned char** p_out = &out;
57
58 J8_OUT('$'); // Left quote b''
59 J8_OUT('\'');
60
61 while (true) {
62 // printf("B iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
63 // out, out_end);
64 // Fill as much as we can
65 BashDollarEncodeChunk(&in, in_end, &out, out_end);
66 out_buf->len = out - out_buf->data; // recompute length
67
68 if (in >= in_end) {
69 break;
70 }
71
72 // Same growth policy as below
73 capacity = capacity * 3 / 2;
74 // printf("[2] new capacity %d\n", capacity);
75 out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
76
77 // Recompute pointers
78 out = out_buf->data + out_buf->len;
79 out_end = out_buf->data + capacity;
80 p_out = &out;
81 }
82
83 J8_OUT('\'');
84 out_buf->len = out - out_buf->data;
85
86 J8_OUT('\0'); // NUL terminate for printf
87}
88
89void J8EncodeString(j8_buf_t in_buf, j8_buf_t* out_buf, int j8_fallback) {
90 unsigned char* in = (unsigned char*)in_buf.data;
91 unsigned char* in_end = in + in_buf.len;
92
93 // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
94 int capacity = in_buf.len + 3 + 2; // 3 for quotes, 2 potential \" \n
95 if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE
96 capacity = J8_MIN_CAPACITY;
97 }
98 // printf("[1] capacity %d j8_fallback %d\n", capacity, j8_fallback);
99
100 out_buf->data = (unsigned char*)malloc(capacity);
101 out_buf->len = 0; // starts out empty
102
103 unsigned char* out = out_buf->data; // mutated
104 unsigned char* out_end = out_buf->data + capacity;
105 unsigned char** p_out = &out;
106
107 J8_OUT('"');
108
109 while (true) {
110 // Fill in as much as we can
111 // printf("J8 iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
112 // out, out_end);
113 int invalid_utf8 = J8EncodeChunk(&in, in_end, &out, out_end, false);
114 if (invalid_utf8 && j8_fallback) {
115 out_buf->len = 0; // rewind to begining
116 // printf("out %p out_end %p capacity %d\n", out, out_end, capacity);
117 EncodeBString(in_buf, out_buf, capacity); // fall back to b''
118 // printf("len %d\n", out_buf->len);
119 return;
120 }
121 out_buf->len = out - out_buf->data; // recompute length
122 // printf("[1] len %d\n", out_buf->len);
123
124 if (in >= in_end) {
125 break;
126 }
127
128 // Growth policy: every time through the loop, increase 1.5x
129 //
130 // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
131 // This seems like a reasonable tradeoff between over-allocating and too
132 // many realloc().
133 capacity = capacity * 3 / 2;
134 // printf("[1] new capacity %d\n", capacity);
135 out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
136
137 // Recompute pointers
138 out = out_buf->data + out_buf->len;
139 out_end = out_buf->data + capacity;
140 p_out = &out;
141 // printf("[1] out %p out_end %p\n", out, out_end);
142 }
143
144 J8_OUT('"');
145 out_buf->len = out - out_buf->data;
146
147 J8_OUT('\0'); // NUL terminate for printf
148}
149
150// Start with '', but fall back on $'' for ASCII control and \'
151//
152// Depending on options, fall back to
153//
154// EncodeBashDollarString() -- $'\xff'
155// EncodeBString() -- b'\yff'
156
157// Mostly a COPY of the above
158void ShellEncodeString(j8_buf_t in_buf, j8_buf_t* out_buf, int ysh_fallback) {
159 unsigned char* in = (unsigned char*)in_buf.data;
160 unsigned char* in_end = in + in_buf.len;
161
162 // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
163 int capacity = in_buf.len + 3 + 2; // 3 for quotes, 2 potential \" \n
164 if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE
165 capacity = J8_MIN_CAPACITY;
166 }
167 // printf("[1] capacity %d j8_fallback %d\n", capacity, j8_fallback);
168
169 out_buf->data = (unsigned char*)malloc(capacity);
170 out_buf->len = 0; // starts out empty
171
172 unsigned char* out = out_buf->data; // mutated
173 unsigned char* out_end = out_buf->data + capacity;
174 unsigned char** p_out = &out;
175
176 J8_OUT('\'');
177
178 while (true) {
179 // Fill in as much as we can
180 // printf("J8 iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data,
181 // out, out_end);
182 int cannot_encode = BourneShellEncodeChunk(&in, in_end, &out, out_end);
183 if (cannot_encode) {
184 out_buf->len = 0; // rewind to begining
185 // printf("out %p out_end %p capacity %d\n", out, out_end, capacity);
186 if (ysh_fallback) {
187 EncodeBString(in_buf, out_buf, capacity); // fall back to b''
188 } else {
189 EncodeBashDollarString(in_buf, out_buf, capacity); // fall back to $''
190 }
191 // printf("len %d\n", out_buf->len);
192 return;
193 }
194 out_buf->len = out - out_buf->data; // recompute length
195 // printf("[1] len %d\n", out_buf->len);
196
197 if (in >= in_end) {
198 break;
199 }
200
201 // Growth policy: every time through the loop, increase 1.5x
202 //
203 // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
204 // This seems like a reasonable tradeoff between over-allocating and too
205 // many realloc().
206 capacity = capacity * 3 / 2;
207 // printf("[1] new capacity %d\n", capacity);
208 out_buf->data = (unsigned char*)realloc(out_buf->data, capacity);
209
210 // Recompute pointers
211 out = out_buf->data + out_buf->len;
212 out_end = out_buf->data + capacity;
213 p_out = &out;
214 // printf("[1] out %p out_end %p\n", out, out_end);
215 }
216
217 J8_OUT('\'');
218 out_buf->len = out - out_buf->data;
219
220 J8_OUT('\0'); // NUL terminate for printf
221}