1 | ## oils_failures_allowed: 1
|
2 |
|
3 | #### ${#s} and len(s)
|
4 |
|
5 | source $REPO_ROOT/spec/testdata/unicode.sh
|
6 |
|
7 | # bash agrees
|
8 | echo "farmer scalars =" ${#farmer}
|
9 |
|
10 | echo "facepalm scalars =" ${#facepalm}
|
11 |
|
12 | echo "farmer len =" $[len(farmer)]
|
13 |
|
14 | echo "facepalm len =" $[len(facepalm)]
|
15 |
|
16 | ## STDOUT:
|
17 | farmer scalars = 4
|
18 | facepalm scalars = 5
|
19 | farmer len = 15
|
20 | facepalm len = 17
|
21 | ## END
|
22 |
|
23 |
|
24 | #### JSON \uXXXX\uYYYY as max code point - can't go above
|
25 |
|
26 | py-decode() {
|
27 | python2 -c 'import json, sys; print json.load(sys.stdin).encode("utf-8")'
|
28 | }
|
29 |
|
30 | to-hex() {
|
31 | od -A n -t x1
|
32 | }
|
33 |
|
34 | max='"\udbff\udfff"'
|
35 |
|
36 | # incrementing by one gives invalid surrogates
|
37 | # the encoding is "tight"
|
38 | # too_big='"\udc00\udfff"'
|
39 |
|
40 | echo "$max" | py-decode | to-hex
|
41 |
|
42 | echo "$max" | json read
|
43 | echo "$_reply" | to-hex
|
44 |
|
45 | ## STDOUT:
|
46 | f4 8f bf bf 0a
|
47 | f4 8f bf bf 0a
|
48 | ## END
|
49 |
|
50 |
|
51 |
|
52 | #### Parsing data - J8 rejects \u{110000}
|
53 |
|
54 | json8 read <<EOF
|
55 | u'\u{110000}'
|
56 | EOF
|
57 | echo status=$?
|
58 |
|
59 | ## STDOUT:
|
60 | status=1
|
61 | ## END
|
62 |
|
63 |
|
64 | #### Parsing source code - YSH rejects \u{110000}
|
65 |
|
66 | # Sanity check first: Python interpreter DOES check big code points,
|
67 | # whereas shells don't
|
68 |
|
69 | max=$(python2 -c 'print u"\U0010ffff".encode("utf-8")')
|
70 | echo status max=$?
|
71 |
|
72 | too_big=$(python2 -c 'print u"\U00110000".encode("utf-8")')
|
73 | echo status too_big=$?
|
74 |
|
75 | #echo py max=$max
|
76 | #echo py too_big=$too_big
|
77 |
|
78 | # python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$ok"
|
79 | # python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$too_big"
|
80 |
|
81 | var max = u'\u{10ffff}'
|
82 | pp line (max)
|
83 |
|
84 | var too_big = u'\u{110000}'
|
85 | pp line (too_big) # should not get here
|
86 |
|
87 | # These are errors too
|
88 | var max = b'\u{10ffff}'
|
89 | var too_big = b'\u{110000}'
|
90 |
|
91 | ## status: 2
|
92 | ## STDOUT:
|
93 | status max=0
|
94 | status too_big=1
|
95 | (Str) ""
|
96 | ## END
|
97 |
|
98 |
|
99 | #### Parsing source code - YSH source code rejects encoded string
|
100 |
|
101 | max=$(bash <<'EOF'
|
102 | echo $'\U0010ffff'
|
103 | EOF
|
104 | )
|
105 |
|
106 | # bash allows the bad one
|
107 | too_big=$(bash <<'EOF'
|
108 | echo $'\U00110000'
|
109 | EOF
|
110 | )
|
111 |
|
112 | echo "var x = u'"$max"'; = x" | $SH
|
113 | echo status=$?
|
114 | #pp line (_reply)
|
115 |
|
116 | echo "var x = u'"$too_big"'; = x" | $SH
|
117 | echo status=$?
|
118 | #pp line (_reply)
|
119 |
|
120 | ## STDOUT:
|
121 | ## END
|
122 |
|
123 |
|
124 | #### JSON and J8 reject encoded string above max code point
|
125 |
|
126 | max=$(bash <<'EOF'
|
127 | echo $'\U0010ffff'
|
128 | EOF
|
129 | )
|
130 |
|
131 | # bash allows the bad one
|
132 | too_big=$(bash <<'EOF'
|
133 | echo $'\U00110000'
|
134 | EOF
|
135 | )
|
136 |
|
137 | # JSON string
|
138 |
|
139 | echo '"'$max'"' | json read
|
140 | echo status=$?
|
141 | #pp line (_reply)
|
142 |
|
143 | # Need to propagate the reason here
|
144 |
|
145 | echo '"'$too_big'"' | json read
|
146 | echo status=$?
|
147 | #pp line (_reply)
|
148 |
|
149 |
|
150 | # J8 string
|
151 |
|
152 | echo "u'"$max"'" | json8 read
|
153 | echo status=$?
|
154 | #pp line (_reply)
|
155 |
|
156 | echo "u'"$too_big"'" | json8 read
|
157 | echo status=$?
|
158 | #pp line (_reply)
|
159 |
|
160 | ## STDOUT:
|
161 | status=0
|
162 | status=1
|
163 | status=0
|
164 | status=1
|
165 | ## END
|
166 |
|
167 | #### Max code point: json, json8, = keyword, pp line
|
168 |
|
169 | var max = u'\u{10ffff}'
|
170 |
|
171 | json write (max)
|
172 | json8 write (max)
|
173 |
|
174 | = max
|
175 | pp line (max)
|
176 |
|
177 | #echo "var x = u'"$max"'; = x" | $SH
|
178 |
|
179 | ## STDOUT:
|
180 | ""
|
181 | ""
|
182 | (Str) ''
|
183 | (Str) ""
|
184 | ## END
|