OILS / spec / ysh-unicode.test.sh View on Github | oilshell.org

184 lines, 68 significant
1## oils_failures_allowed: 1
2
3#### ${#s} and len(s)
4
5source $REPO_ROOT/spec/testdata/unicode.sh
6
7# bash agrees
8echo "farmer scalars =" ${#farmer}
9
10echo "facepalm scalars =" ${#facepalm}
11
12echo "farmer len =" $[len(farmer)]
13
14echo "facepalm len =" $[len(facepalm)]
15
16## STDOUT:
17farmer scalars = 4
18facepalm scalars = 5
19farmer len = 15
20facepalm len = 17
21## END
22
23
24#### JSON \uXXXX\uYYYY as max code point - can't go above
25
26py-decode() {
27 python2 -c 'import json, sys; print json.load(sys.stdin).encode("utf-8")'
28}
29
30to-hex() {
31 od -A n -t x1
32}
33
34max='"\udbff\udfff"'
35
36# incrementing by one gives invalid surrogates
37# the encoding is "tight"
38# too_big='"\udc00\udfff"'
39
40echo "$max" | py-decode | to-hex
41
42echo "$max" | json read
43echo "$_reply" | to-hex
44
45## STDOUT:
46 f4 8f bf bf 0a
47 f4 8f bf bf 0a
48## END
49
50
51
52#### Parsing data - J8 rejects \u{110000}
53
54json8 read <<EOF
55u'\u{110000}'
56EOF
57echo status=$?
58
59## STDOUT:
60status=1
61## END
62
63
64#### Parsing source code - YSH rejects \u{110000}
65
66# Sanity check first: Python interpreter DOES check big code points,
67# whereas shells don't
68
69max=$(python2 -c 'print u"\U0010ffff".encode("utf-8")')
70echo status max=$?
71
72too_big=$(python2 -c 'print u"\U00110000".encode("utf-8")')
73echo status too_big=$?
74
75#echo py max=$max
76#echo py too_big=$too_big
77
78# python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$ok"
79# python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$too_big"
80
81var max = u'\u{10ffff}'
82pp line (max)
83
84var too_big = u'\u{110000}'
85pp line (too_big) # should not get here
86
87# These are errors too
88var max = b'\u{10ffff}'
89var too_big = b'\u{110000}'
90
91## status: 2
92## STDOUT:
93status max=0
94status too_big=1
95(Str) "􏿿"
96## END
97
98
99#### Parsing source code - YSH source code rejects encoded string
100
101max=$(bash <<'EOF'
102echo $'\U0010ffff'
103EOF
104)
105
106# bash allows the bad one
107too_big=$(bash <<'EOF'
108echo $'\U00110000'
109EOF
110)
111
112echo "var x = u'"$max"'; = x" | $SH
113echo status=$?
114#pp line (_reply)
115
116echo "var x = u'"$too_big"'; = x" | $SH
117echo status=$?
118#pp line (_reply)
119
120## STDOUT:
121## END
122
123
124#### JSON and J8 reject encoded string above max code point
125
126max=$(bash <<'EOF'
127echo $'\U0010ffff'
128EOF
129)
130
131# bash allows the bad one
132too_big=$(bash <<'EOF'
133echo $'\U00110000'
134EOF
135)
136
137# JSON string
138
139echo '"'$max'"' | json read
140echo status=$?
141#pp line (_reply)
142
143# Need to propagate the reason here
144
145echo '"'$too_big'"' | json read
146echo status=$?
147#pp line (_reply)
148
149
150# J8 string
151
152echo "u'"$max"'" | json8 read
153echo status=$?
154#pp line (_reply)
155
156echo "u'"$too_big"'" | json8 read
157echo status=$?
158#pp line (_reply)
159
160## STDOUT:
161status=0
162status=1
163status=0
164status=1
165## END
166
167#### Max code point: json, json8, = keyword, pp line
168
169var max = u'\u{10ffff}'
170
171json write (max)
172json8 write (max)
173
174= max
175pp line (max)
176
177#echo "var x = u'"$max"'; = x" | $SH
178
179## STDOUT:
180"􏿿"
181"􏿿"
182(Str) "􏿿"
183(Str) "􏿿"
184## END