1 ## oils_failures_allowed: 1
2
3 #### ${#s} and len(s)
4
5 source $REPO_ROOT/spec/testdata/unicode.sh
6
7 # bash agrees
8 echo "farmer scalars =" ${#farmer}
9
10 echo "facepalm scalars =" ${#facepalm}
11
12 echo "farmer len =" $[len(farmer)]
13
14 echo "facepalm len =" $[len(facepalm)]
15
16 ## STDOUT:
17 farmer scalars = 4
18 facepalm scalars = 5
19 farmer len = 15
20 facepalm len = 17
21 ## END
22
23
24 #### JSON \uXXXX\uYYYY as max code point - can't go above
25
26 py-decode() {
27 python2 -c 'import json, sys; print json.load(sys.stdin).encode("utf-8")'
28 }
29
30 to-hex() {
31 od -A n -t x1
32 }
33
34 max='"\udbff\udfff"'
35
36 # incrementing by one gives invalid surrogates
37 # the encoding is "tight"
38 # too_big='"\udc00\udfff"'
39
40 echo "$max" | py-decode | to-hex
41
42 echo "$max" | json read
43 echo "$_reply" | to-hex
44
45 ## STDOUT:
46 f4 8f bf bf 0a
47 f4 8f bf bf 0a
48 ## END
49
50
51
52 #### Parsing data - J8 rejects \u{110000}
53
54 json8 read <<EOF
55 u'\u{110000}'
56 EOF
57 echo status=$?
58
59 ## STDOUT:
60 status=1
61 ## END
62
63
64 #### Parsing source code - YSH rejects \u{110000}
65
66 # Sanity check first: Python interpreter DOES check big code points,
67 # whereas shells don't
68
69 max=$(python2 -c 'print u"\U0010ffff".encode("utf-8")')
70 echo status max=$?
71
72 too_big=$(python2 -c 'print u"\U00110000".encode("utf-8")')
73 echo status too_big=$?
74
75 #echo py max=$max
76 #echo py too_big=$too_big
77
78 # python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$ok"
79 # python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$too_big"
80
81 var max = u'\u{10ffff}'
82 pp line (max)
83
84 var too_big = u'\u{110000}'
85 pp line (too_big) # should not get here
86
87 # These are errors too
88 var max = b'\u{10ffff}'
89 var too_big = b'\u{110000}'
90
91 ## status: 2
92 ## STDOUT:
93 status max=0
94 status too_big=1
95 (Str) "􏿿"
96 ## END
97
98
99 #### Parsing source code - YSH source code rejects encoded string
100
101 max=$(bash <<'EOF'
102 echo $'\U0010ffff'
103 EOF
104 )
105
106 # bash allows the bad one
107 too_big=$(bash <<'EOF'
108 echo $'\U00110000'
109 EOF
110 )
111
112 echo "var x = u'"$max"'; = x" | $SH
113 echo status=$?
114 #pp line (_reply)
115
116 echo "var x = u'"$too_big"'; = x" | $SH
117 echo status=$?
118 #pp line (_reply)
119
120 ## STDOUT:
121 ## END
122
123
124 #### JSON and J8 reject encoded string above max code point
125
126 max=$(bash <<'EOF'
127 echo $'\U0010ffff'
128 EOF
129 )
130
131 # bash allows the bad one
132 too_big=$(bash <<'EOF'
133 echo $'\U00110000'
134 EOF
135 )
136
137 # JSON string
138
139 echo '"'$max'"' | json read
140 echo status=$?
141 #pp line (_reply)
142
143 # Need to propagate the reason here
144
145 echo '"'$too_big'"' | json read
146 echo status=$?
147 #pp line (_reply)
148
149
150 # J8 string
151
152 echo "u'"$max"'" | json8 read
153 echo status=$?
154 #pp line (_reply)
155
156 echo "u'"$too_big"'" | json8 read
157 echo status=$?
158 #pp line (_reply)
159
160 ## STDOUT:
161 status=0
162 status=1
163 status=0
164 status=1
165 ## END
166
167 #### Max code point: json, json8, = keyword, pp line
168
169 var max = u'\u{10ffff}'
170
171 json write (max)
172 json8 write (max)
173
174 = max
175 pp line (max)
176
177 #echo "var x = u'"$max"'; = x" | $SH
178
179 ## STDOUT:
180 "􏿿"
181 "􏿿"
182 (Str) '􏿿'
183 (Str) "􏿿"
184 ## END