OILS / data_lang / json-survey.sh View on Github | oilshell.org

393 lines, 216 significant
1#!/usr/bin/env bash
2#
3# Usage:
4# data_lang/json-survey.sh <function name>
5
6set -o nounset
7set -o pipefail
8set -o errexit
9
10source build/dev-shell.sh # python3 in $PATH
11
12decode-int-float() {
13 # This is a float
14 python2 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
15 python2 -c 'import json; val = json.loads("1e-6"); print(type(val)); print(val)'
16 python2 -c 'import json; val = json.loads("0.5"); print(type(val)); print(val)'
17
18 # Int
19 python2 -c 'import json; val = json.loads("42"); print(type(val)); print(val)'
20
21 python3 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
22
23 echo
24 echo
25
26 # JavaScript only has 'number', no Int and Float
27 nodejs -e 'var val = JSON.parse("1e6"); console.log(typeof(val)); console.log(val)'
28}
29
30big-int() {
31 for i in $(seq 1000); do
32 echo -n 1234567890
33 done
34}
35
36# Hm, decoding integers and floats doesn't have overflow cases
37
38decode-huge-int() {
39 local i
40 i=$(big-int)
41 echo $i
42
43 # really big integer causes 100% CPU usage in Python 3
44 echo "$i" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
45
46 # decodes to "Infinity"
47 echo "$i" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
48}
49
50decode-huge-float() {
51 local f
52 f=$(big-int).99
53 echo $f
54
55 # decodes to "inf"
56 echo "$f" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
57
58 # decodes to "Infinity"
59 echo "$f" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
60}
61
62decode-syntax-errors() {
63
64 python2 -c 'import json; val = json.loads("{3:4}"); print(type(val)); print(val)' || true
65 echo
66 python2 -c 'import json; val = json.loads("[3:4]"); print(type(val)); print(val)' || true
67
68 echo
69 echo
70
71 # This has good position information
72 # It prints the line number, the line, and points to the token in the line
73 # where the problem happened
74
75 nodejs -e 'var val = JSON.parse("{3: 4}"); console.log(typeof(val)); console.log(val)' || true
76
77 nodejs -e 'var val = JSON.parse("[\n 3: 4\n]"); console.log(typeof(val)); console.log(val)' || true
78
79 nodejs -e 'var val = JSON.parse("[\n\n \"hello "); console.log(typeof(val)); console.log(val)' || true
80}
81
82decode-empty-input() {
83 python3 -c 'import json; val = json.loads(""); print(type(val)); print(val)' || true
84
85 echo
86 echo
87
88 nodejs -e 'var val = JSON.parse(""); console.log(typeof(val)); console.log(val)' || true
89}
90
91decode-trailing-data() {
92 # Extra data
93 python3 -c 'import json; val = json.loads("[]]"); print(type(val)); print(val)' || true
94
95 echo
96 echo
97
98 nodejs -e 'var val = JSON.parse("[]]"); console.log(typeof(val)); console.log(val)' || true
99}
100
101
102decode-invalid-escape() {
103 # single quoted escape not valid
104 cat >_tmp/json.txt <<'EOF'
105"\'"
106EOF
107 local json
108 json=$(cat _tmp/json.txt)
109
110 python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
111 "$json" || true
112
113 echo
114 echo
115
116 nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
117 "$json" || true
118}
119
120decode-whitespace() {
121 # e.g. is carriage return whitespace? Yes, it is allowed
122 local json=$'{"age":\r42}'
123
124 # neither \f nor \v is allowed
125 #local json=$'{"age":\f42}'
126 #local json=$'{"age":\v42}'
127
128 python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
129 "$json" || true
130
131 echo
132 echo
133
134 nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
135 "$json" || true
136}
137
138encode-list-dict-indent() {
139 echo 'PYTHON'
140 python3 -c 'import json; val = {}; print(json.dumps(val, indent=4))'
141 python3 -c 'import json; val = {"a": 42}; print(json.dumps(val, indent=4))'
142 python3 -c 'import json; val = {"a": 42, "b": 43}; print(json.dumps(val, indent=4))'
143 python3 -c 'import json; val = []; print(json.dumps(val, indent=4))'
144 python3 -c 'import json; val = [42]; print(json.dumps(val, indent=4))'
145 echo
146
147 echo 'JS'
148 nodejs -e 'var val = {}; console.log(JSON.stringify(val, null, 4))'
149 nodejs -e 'var val = {"a": 42}; console.log(JSON.stringify(val, null, 4))'
150 nodejs -e 'var val = {"a": 42, "b": 43}; console.log(JSON.stringify(val, null, 4))'
151 nodejs -e 'var val = []; console.log(JSON.stringify(val, null, 4))'
152 nodejs -e 'var val = [42]; console.log(JSON.stringify(val, null, 4))'
153 echo
154}
155
156encode-default() {
157 echo 'PYTHON'
158 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val))'
159 echo
160
161 echo 'JS'
162 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val))'
163 echo
164
165 # Hm we indent by default, maybe we should change this
166 #
167 # I think the = operator indents by default, but json/json8 don't?
168 #
169 # PYTHON
170 # {"a": 42, "b": [1, 2, 3]}
171 #
172 # JS
173 # {"a":42,"b":[1,2,3]}
174
175 # Single knob design:
176 #
177 # json write (x) # space=2 by default
178 # json write (x, space=0) # like JS
179}
180
181encode-no-indent() {
182 echo 'PYTHON'
183
184 # has a space
185 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=None))'
186 # you control it like this
187 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, separators=[",", ":"]))'
188 echo
189
190 # Python: -1 and 0 both mean zero indent, but MULTIPLE lines
191 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=-1))'
192 echo
193 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=0))'
194 echo
195
196 echo 'JS'
197
198 # JS: -1 and 0 both print on ONE LINE
199 # Second arg is "replacer", which I don't think we need
200 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, -1))'
201 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, 0))'
202 # third arg can be a string too
203 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, "\t"))'
204
205 # Python has indent=0 vs indent=None, and it has separators=[",", ";"]
206 # JS has indent=1 and indent="\t" etc.
207 # - it also clamps strings/indents to 10 chars or less
208
209 # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
210 # indent less than 1 means "no space"
211 #
212 # Which behavior should OSH have for 0 and -1?
213 #
214 # Does indent=0 and indent=null and indent=" " make sense?
215 # I think it could
216}
217
218encode-obj-cycles() {
219 python3 -c 'import json; val = {}; val["k"] = val; print(json.dumps(val))' || true
220 echo
221
222 python3 -c 'import json; val = []; val.append(val); print(json.dumps(val))' || true
223 echo
224
225 # Better error message than Python!
226 # TypeError: Converting circular structure to JSON
227 # --> starting at object with constructor 'Object'
228 # --- property 'k' closes the circle
229 nodejs -e 'var val = {}; val["k"] = val; console.log(JSON.stringify(val))' || true
230 echo
231
232 nodejs -e 'var val = []; val.push(val); console.log(JSON.stringify(val))' || true
233 echo
234}
235
236multiple-refs() {
237 # Python prints a tree
238 python3 -c 'import json; mylist = [1,2,3]; val = [mylist, mylist]; print(repr(val)); print(json.dumps(val))'
239 echo
240
241 # Same with node.js
242 nodejs -e 'var mylist = [1,2,3]; var val = [mylist, mylist]; console.log(val); console.log(JSON.stringify(val))'
243 echo
244
245 # Same with Oils
246 bin/osh -c 'var mylist = [1,2,3]; var val = [mylist, mylist]; = val; json write (val); pp asdl (val)'
247 echo
248}
249
250oils-cycles() {
251 bin/ysh -c 'var d = {}; setvar d.key = d; = d; pp line (d); pp asdl (d); json write (d)'
252}
253
254surrogate-pair() {
255 local json=${1:-'"\ud83e\udd26"'}
256
257 # Hm it actually escapes. I thought it would use raw UTF-8
258 python2 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
259 echo
260
261 python3 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
262 echo
263
264 # This doesn't escape
265 nodejs -e 'var s = JSON.parse('\'$json\''); console.log(JSON.stringify(s))'
266 echo
267}
268
269surrogate-half() {
270 local json='"\ud83e"'
271
272 # Round trips correctly!
273 surrogate-pair "$json"
274}
275
276encode-nan() {
277 # Wow Python doesn't conform to spec!!
278 # https://docs.python.org/3.8/library/json.html#infinite-and-nan-number-values
279
280 # allow_nan=False and parse_constant alter the behavior
281
282 python2 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
283 echo
284
285 python3 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
286 echo
287
288 python3 -c 'import json; val = float("nan"); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
289 echo
290
291 # nodejs uses null
292 nodejs -e 'var val = NaN; var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
293 echo
294}
295
296encode-inf() {
297 # Again, Python doesn't conform to spec
298
299 python2 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
300 echo
301
302 python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
303 echo
304
305 python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
306 echo
307
308 # nodejs uses null again
309 nodejs -e 'var val = Number.NEGATIVE_INFINITY; console.log(val); var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
310 echo
311}
312
313encode-bad-type() {
314 python3 -c 'import json; print(json.dumps(json))' || true
315 echo
316
317 # {} or undefined - BAD!
318 nodejs -e 'console.log(JSON.stringify(JSON));' || true
319 nodejs -e 'function f() { return 42; }; console.log(JSON.stringify(f));' || true
320 echo
321}
322
323encode-binary-data() {
324 # utf-8 codec can't decode byte -- so it does UTF-8 decoding during encoding,
325 # which makes sense
326 python2 -c 'import json; print(json.dumps(b"\xff"))' || true
327 echo
328
329 # can't serialize bytes type
330 python3 -c 'import json; print(json.dumps(b"\xff"))' || true
331 echo
332
333 # there is no bytes type? \xff is a code point in JS
334 nodejs -e 'console.log(JSON.stringify("\xff"));' || true
335 nodejs -e 'console.log(JSON.stringify("\u{ff}"));' || true
336 echo
337}
338
339decode-utf8-in-surrogate-range() {
340 python2 -c 'b = "\xed\xa0\xbe"; print(repr(b.decode("utf-8")))'
341 echo
342
343 # Hm Python 3 gives an error here!
344 python3 -c 'b = b"\xed\xa0\xbe"; print(repr(b.decode("utf-8")))' || true
345 echo
346
347 # valid
348 nodejs -e 'var u = new Uint8Array([0xce, 0xbc]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
349 echo
350
351 # can't decode!
352 nodejs -e 'var u = new Uint8Array([0xed, 0xa0, 0xbe]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
353 echo
354}
355
356pairs() {
357 local nums
358 nums=$(seq $1)
359
360 echo -n '['
361 for i in $nums; do
362 echo -n '[42,'
363 done
364 echo -n '43]'
365 for i in $nums; do
366 echo -n ']'
367 done
368}
369
370decode-deeply-nested() {
371 local msg
372 msg=$(pairs 40200)
373
374 # RuntimeError
375 echo "$msg" | python2 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
376
377 # RecursionError
378 echo "$msg" | python3 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
379
380 # Hm node.js handles it fine? Probably doesn't have a stackful parser.
381 # [ [ [ [Array] ] ] ]
382 echo "$msg" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));' || true
383
384 echo "$msg" | bin/osh -c 'json read; = _reply' || true
385
386 # Hm this works past 40K in C++! Then segmentation fault. We could put an
387 # artifical limit on it.
388 local osh=_bin/cxx-opt/osh
389 ninja $osh
390 echo "$msg" | $osh -c 'json read; = _reply; echo $[len(_reply)]' || true
391}
392
393"$@"