| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Usage:
|
| 4 | # data_lang/json-survey.sh <function name>
|
| 5 |
|
| 6 | set -o nounset
|
| 7 | set -o pipefail
|
| 8 | set -o errexit
|
| 9 |
|
| 10 | source build/dev-shell.sh # python3 in $PATH
|
| 11 |
|
| 12 | decode-int-float() {
|
| 13 | # This is a float
|
| 14 | python2 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
|
| 15 | python2 -c 'import json; val = json.loads("1e-6"); print(type(val)); print(val)'
|
| 16 | python2 -c 'import json; val = json.loads("0.5"); print(type(val)); print(val)'
|
| 17 |
|
| 18 | # Int
|
| 19 | python2 -c 'import json; val = json.loads("42"); print(type(val)); print(val)'
|
| 20 |
|
| 21 | python3 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
|
| 22 |
|
| 23 | echo
|
| 24 | echo
|
| 25 |
|
| 26 | # JavaScript only has 'number', no Int and Float
|
| 27 | nodejs -e 'var val = JSON.parse("1e6"); console.log(typeof(val)); console.log(val)'
|
| 28 | }
|
| 29 |
|
| 30 | big-int() {
|
| 31 | for i in $(seq 1000); do
|
| 32 | echo -n 1234567890
|
| 33 | done
|
| 34 | }
|
| 35 |
|
| 36 | # Hm, decoding integers and floats doesn't have overflow cases
|
| 37 |
|
| 38 | decode-huge-int() {
|
| 39 | local i
|
| 40 | i=$(big-int)
|
| 41 | echo $i
|
| 42 |
|
| 43 | # really big integer causes 100% CPU usage in Python 3
|
| 44 | echo "$i" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
|
| 45 |
|
| 46 | # decodes to "Infinity"
|
| 47 | echo "$i" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
|
| 48 | }
|
| 49 |
|
| 50 | decode-huge-float() {
|
| 51 | local f
|
| 52 | f=$(big-int).99
|
| 53 | echo $f
|
| 54 |
|
| 55 | # decodes to "inf"
|
| 56 | echo "$f" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
|
| 57 |
|
| 58 | # decodes to "Infinity"
|
| 59 | echo "$f" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
|
| 60 | }
|
| 61 |
|
| 62 | decode-syntax-errors() {
|
| 63 |
|
| 64 | python2 -c 'import json; val = json.loads("{3:4}"); print(type(val)); print(val)' || true
|
| 65 | echo
|
| 66 | python2 -c 'import json; val = json.loads("[3:4]"); print(type(val)); print(val)' || true
|
| 67 |
|
| 68 | echo
|
| 69 | echo
|
| 70 |
|
| 71 | # This has good position information
|
| 72 | # It prints the line number, the line, and points to the token in the line
|
| 73 | # where the problem happened
|
| 74 |
|
| 75 | nodejs -e 'var val = JSON.parse("{3: 4}"); console.log(typeof(val)); console.log(val)' || true
|
| 76 |
|
| 77 | nodejs -e 'var val = JSON.parse("[\n 3: 4\n]"); console.log(typeof(val)); console.log(val)' || true
|
| 78 |
|
| 79 | nodejs -e 'var val = JSON.parse("[\n\n \"hello "); console.log(typeof(val)); console.log(val)' || true
|
| 80 | }
|
| 81 |
|
| 82 | decode-empty-input() {
|
| 83 | python3 -c 'import json; val = json.loads(""); print(type(val)); print(val)' || true
|
| 84 |
|
| 85 | echo
|
| 86 | echo
|
| 87 |
|
| 88 | nodejs -e 'var val = JSON.parse(""); console.log(typeof(val)); console.log(val)' || true
|
| 89 | }
|
| 90 |
|
| 91 | decode-trailing-data() {
|
| 92 | # Extra data
|
| 93 | python3 -c 'import json; val = json.loads("[]]"); print(type(val)); print(val)' || true
|
| 94 |
|
| 95 | echo
|
| 96 | echo
|
| 97 |
|
| 98 | nodejs -e 'var val = JSON.parse("[]]"); console.log(typeof(val)); console.log(val)' || true
|
| 99 | }
|
| 100 |
|
| 101 |
|
| 102 | decode-invalid-escape() {
|
| 103 | # single quoted escape not valid
|
| 104 | cat >_tmp/json.txt <<'EOF'
|
| 105 | "\'"
|
| 106 | EOF
|
| 107 | local json
|
| 108 | json=$(cat _tmp/json.txt)
|
| 109 |
|
| 110 | python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
|
| 111 | "$json" || true
|
| 112 |
|
| 113 | echo
|
| 114 | echo
|
| 115 |
|
| 116 | nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
|
| 117 | "$json" || true
|
| 118 | }
|
| 119 |
|
| 120 | decode-whitespace() {
|
| 121 | # e.g. is carriage return whitespace? Yes, it is allowed
|
| 122 | local json=$'{"age":\r42}'
|
| 123 |
|
| 124 | # neither \f nor \v is allowed
|
| 125 | #local json=$'{"age":\f42}'
|
| 126 | #local json=$'{"age":\v42}'
|
| 127 |
|
| 128 | python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
|
| 129 | "$json" || true
|
| 130 |
|
| 131 | echo
|
| 132 | echo
|
| 133 |
|
| 134 | nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
|
| 135 | "$json" || true
|
| 136 | }
|
| 137 |
|
| 138 | encode-list-dict-indent() {
|
| 139 | echo 'PYTHON'
|
| 140 | python3 -c 'import json; val = {}; print(json.dumps(val, indent=4))'
|
| 141 | python3 -c 'import json; val = {"a": 42}; print(json.dumps(val, indent=4))'
|
| 142 | python3 -c 'import json; val = {"a": 42, "b": 43}; print(json.dumps(val, indent=4))'
|
| 143 | python3 -c 'import json; val = []; print(json.dumps(val, indent=4))'
|
| 144 | python3 -c 'import json; val = [42]; print(json.dumps(val, indent=4))'
|
| 145 | echo
|
| 146 |
|
| 147 | echo 'JS'
|
| 148 | nodejs -e 'var val = {}; console.log(JSON.stringify(val, null, 4))'
|
| 149 | nodejs -e 'var val = {"a": 42}; console.log(JSON.stringify(val, null, 4))'
|
| 150 | nodejs -e 'var val = {"a": 42, "b": 43}; console.log(JSON.stringify(val, null, 4))'
|
| 151 | nodejs -e 'var val = []; console.log(JSON.stringify(val, null, 4))'
|
| 152 | nodejs -e 'var val = [42]; console.log(JSON.stringify(val, null, 4))'
|
| 153 | echo
|
| 154 | }
|
| 155 |
|
| 156 | encode-default() {
|
| 157 | echo 'PYTHON'
|
| 158 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val))'
|
| 159 | echo
|
| 160 |
|
| 161 | echo 'JS'
|
| 162 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val))'
|
| 163 | echo
|
| 164 |
|
| 165 | # Hm we indent by default, maybe we should change this
|
| 166 | #
|
| 167 | # I think the = operator indents by default, but json/json8 don't?
|
| 168 | #
|
| 169 | # PYTHON
|
| 170 | # {"a": 42, "b": [1, 2, 3]}
|
| 171 | #
|
| 172 | # JS
|
| 173 | # {"a":42,"b":[1,2,3]}
|
| 174 |
|
| 175 | # Single knob design:
|
| 176 | #
|
| 177 | # json write (x) # space=2 by default
|
| 178 | # json write (x, space=0) # like JS
|
| 179 | }
|
| 180 |
|
| 181 | encode-no-indent() {
|
| 182 | echo 'PYTHON'
|
| 183 |
|
| 184 | # has a space
|
| 185 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=None))'
|
| 186 | # you control it like this
|
| 187 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, separators=[",", ":"]))'
|
| 188 | echo
|
| 189 |
|
| 190 | # Python: -1 and 0 both mean zero indent, but MULTIPLE lines
|
| 191 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=-1))'
|
| 192 | echo
|
| 193 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=0))'
|
| 194 | echo
|
| 195 |
|
| 196 | echo 'JS'
|
| 197 |
|
| 198 | # JS: -1 and 0 both print on ONE LINE
|
| 199 | # Second arg is "replacer", which I don't think we need
|
| 200 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, -1))'
|
| 201 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, 0))'
|
| 202 | # third arg can be a string too
|
| 203 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, "\t"))'
|
| 204 |
|
| 205 | # Python has indent=0 vs indent=None, and it has separators=[",", ";"]
|
| 206 | # JS has indent=1 and indent="\t" etc.
|
| 207 | # - it also clamps strings/indents to 10 chars or less
|
| 208 |
|
| 209 | # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
|
| 210 | # indent less than 1 means "no space"
|
| 211 | #
|
| 212 | # Which behavior should OSH have for 0 and -1?
|
| 213 | #
|
| 214 | # Does indent=0 and indent=null and indent=" " make sense?
|
| 215 | # I think it could
|
| 216 | }
|
| 217 |
|
| 218 | encode-obj-cycles() {
|
| 219 | python3 -c 'import json; val = {}; val["k"] = val; print(json.dumps(val))' || true
|
| 220 | echo
|
| 221 |
|
| 222 | python3 -c 'import json; val = []; val.append(val); print(json.dumps(val))' || true
|
| 223 | echo
|
| 224 |
|
| 225 | # Better error message than Python!
|
| 226 | # TypeError: Converting circular structure to JSON
|
| 227 | # --> starting at object with constructor 'Object'
|
| 228 | # --- property 'k' closes the circle
|
| 229 | nodejs -e 'var val = {}; val["k"] = val; console.log(JSON.stringify(val))' || true
|
| 230 | echo
|
| 231 |
|
| 232 | nodejs -e 'var val = []; val.push(val); console.log(JSON.stringify(val))' || true
|
| 233 | echo
|
| 234 | }
|
| 235 |
|
| 236 | multiple-refs() {
|
| 237 | # Python prints a tree
|
| 238 | python3 -c 'import json; mylist = [1,2,3]; val = [mylist, mylist]; print(repr(val)); print(json.dumps(val))'
|
| 239 | echo
|
| 240 |
|
| 241 | # Same with node.js
|
| 242 | nodejs -e 'var mylist = [1,2,3]; var val = [mylist, mylist]; console.log(val); console.log(JSON.stringify(val))'
|
| 243 | echo
|
| 244 |
|
| 245 | # Same with Oils
|
| 246 | bin/osh -c 'var mylist = [1,2,3]; var val = [mylist, mylist]; = val; json write (val); pp asdl (val)'
|
| 247 | echo
|
| 248 | }
|
| 249 |
|
| 250 | oils-cycles() {
|
| 251 | bin/ysh -c 'var d = {}; setvar d.key = d; = d; pp line (d); pp asdl (d); json write (d)'
|
| 252 | }
|
| 253 |
|
| 254 | surrogate-pair() {
|
| 255 | local json=${1:-'"\ud83e\udd26"'}
|
| 256 |
|
| 257 | # Hm it actually escapes. I thought it would use raw UTF-8
|
| 258 | python2 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
|
| 259 | echo
|
| 260 |
|
| 261 | python3 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
|
| 262 | echo
|
| 263 |
|
| 264 | # This doesn't escape
|
| 265 | nodejs -e 'var s = JSON.parse('\'$json\''); console.log(JSON.stringify(s))'
|
| 266 | echo
|
| 267 | }
|
| 268 |
|
| 269 | surrogate-half() {
|
| 270 | local json='"\ud83e"'
|
| 271 |
|
| 272 | # Round trips correctly!
|
| 273 | surrogate-pair "$json"
|
| 274 | }
|
| 275 |
|
| 276 | encode-nan() {
|
| 277 | # Wow Python doesn't conform to spec!!
|
| 278 | # https://docs.python.org/3.8/library/json.html#infinite-and-nan-number-values
|
| 279 |
|
| 280 | # allow_nan=False and parse_constant alter the behavior
|
| 281 |
|
| 282 | python2 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
| 283 | echo
|
| 284 |
|
| 285 | python3 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
| 286 | echo
|
| 287 |
|
| 288 | python3 -c 'import json; val = float("nan"); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
|
| 289 | echo
|
| 290 |
|
| 291 | # nodejs uses null
|
| 292 | nodejs -e 'var val = NaN; var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
|
| 293 | echo
|
| 294 | }
|
| 295 |
|
| 296 | encode-inf() {
|
| 297 | # Again, Python doesn't conform to spec
|
| 298 |
|
| 299 | python2 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
| 300 | echo
|
| 301 |
|
| 302 | python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
| 303 | echo
|
| 304 |
|
| 305 | python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
|
| 306 | echo
|
| 307 |
|
| 308 | # nodejs uses null again
|
| 309 | nodejs -e 'var val = Number.NEGATIVE_INFINITY; console.log(val); var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
|
| 310 | echo
|
| 311 | }
|
| 312 |
|
| 313 | encode-bad-type() {
|
| 314 | python3 -c 'import json; print(json.dumps(json))' || true
|
| 315 | echo
|
| 316 |
|
| 317 | # {} or undefined - BAD!
|
| 318 | nodejs -e 'console.log(JSON.stringify(JSON));' || true
|
| 319 | nodejs -e 'function f() { return 42; }; console.log(JSON.stringify(f));' || true
|
| 320 | echo
|
| 321 | }
|
| 322 |
|
| 323 | encode-binary-data() {
|
| 324 | # utf-8 codec can't decode byte -- so it does UTF-8 decoding during encoding,
|
| 325 | # which makes sense
|
| 326 | python2 -c 'import json; print(json.dumps(b"\xff"))' || true
|
| 327 | echo
|
| 328 |
|
| 329 | # can't serialize bytes type
|
| 330 | python3 -c 'import json; print(json.dumps(b"\xff"))' || true
|
| 331 | echo
|
| 332 |
|
| 333 | # there is no bytes type? \xff is a code point in JS
|
| 334 | nodejs -e 'console.log(JSON.stringify("\xff"));' || true
|
| 335 | nodejs -e 'console.log(JSON.stringify("\u{ff}"));' || true
|
| 336 | echo
|
| 337 | }
|
| 338 |
|
| 339 | decode-utf8-in-surrogate-range() {
|
| 340 | python2 -c 'b = "\xed\xa0\xbe"; print(repr(b.decode("utf-8")))'
|
| 341 | echo
|
| 342 |
|
| 343 | # Hm Python 3 gives an error here!
|
| 344 | python3 -c 'b = b"\xed\xa0\xbe"; print(repr(b.decode("utf-8")))' || true
|
| 345 | echo
|
| 346 |
|
| 347 | # valid
|
| 348 | nodejs -e 'var u = new Uint8Array([0xce, 0xbc]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
|
| 349 | echo
|
| 350 |
|
| 351 | # can't decode!
|
| 352 | nodejs -e 'var u = new Uint8Array([0xed, 0xa0, 0xbe]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
|
| 353 | echo
|
| 354 | }
|
| 355 |
|
| 356 | pairs() {
|
| 357 | local nums
|
| 358 | nums=$(seq $1)
|
| 359 |
|
| 360 | echo -n '['
|
| 361 | for i in $nums; do
|
| 362 | echo -n '[42,'
|
| 363 | done
|
| 364 | echo -n '43]'
|
| 365 | for i in $nums; do
|
| 366 | echo -n ']'
|
| 367 | done
|
| 368 | }
|
| 369 |
|
| 370 | decode-deeply-nested() {
|
| 371 | local msg
|
| 372 | msg=$(pairs 40200)
|
| 373 |
|
| 374 | # RuntimeError
|
| 375 | echo "$msg" | python2 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
|
| 376 |
|
| 377 | # RecursionError
|
| 378 | echo "$msg" | python3 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
|
| 379 |
|
| 380 | # Hm node.js handles it fine? Probably doesn't have a stackful parser.
|
| 381 | # [ [ [ [Array] ] ] ]
|
| 382 | echo "$msg" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));' || true
|
| 383 |
|
| 384 | echo "$msg" | bin/osh -c 'json read; = _reply' || true
|
| 385 |
|
| 386 | # Hm this works past 40K in C++! Then segmentation fault. We could put an
|
| 387 | # artifical limit on it.
|
| 388 | local osh=_bin/cxx-opt/osh
|
| 389 | ninja $osh
|
| 390 | echo "$msg" | $osh -c 'json read; = _reply; echo $[len(_reply)]' || true
|
| 391 | }
|
| 392 |
|
| 393 | "$@"
|