1 |
## oils_failures_allowed: 0
|
2 |
## compare_shells: bash mksh zsh
|
3 |
|
4 |
|
5 |
|
6 |
# Should YSH be different? It would be nice.
|
7 |
# We would have to validate all Lit_Chars tokens, and the like.
|
8 |
#
|
9 |
# The logical place to put that would be in osh/word_parse.py where we read
|
10 |
# single and double quoted strings. Although there might be a global lexer
|
11 |
# hack for Id.Lit_Chars tokens. Would that catch here docs though?
|
12 |
|
13 |
# Test all the lexing contexts
|
14 |
cat >unicode.sh << 'EOF'
|
15 |
echo μ 'μ' "μ" $'μ'
|
16 |
EOF
|
17 |
|
18 |
# Show that all lexer modes recognize unicode sequences
|
19 |
#
|
20 |
# Oh I guess we need to check here docs too?
|
21 |
|
22 |
#$SH -n unicode.sh
|
23 |
|
24 |
$SH unicode.sh
|
25 |
|
26 |
# Trim off the first byte of mu
|
27 |
sed 's/\xce//g' unicode.sh > not-unicode.sh
|
28 |
|
29 |
echo --
|
30 |
$SH not-unicode.sh | od -A n -t x1
|
31 |
|
32 |
## STDOUT:
|
33 |
μ μ μ μ
|
34 |
--
|
35 |
bc 20 bc 20 bc 20 bc 0a
|
36 |
## END
|
37 |
|
38 |
|
39 |
# dash and ash don't support $''
|
40 |
|
41 |
|
42 |
|
43 |
case $SH in dash|ash) exit ;; esac
|
44 |
|
45 |
echo $'\u03bc \U000003bc'
|
46 |
|
47 |
echo -e '\u03bc \U000003bc'
|
48 |
|
49 |
printf '\u03bc \U000003bc\n'
|
50 |
|
51 |
## STDOUT:
|
52 |
μ μ
|
53 |
μ μ
|
54 |
μ μ
|
55 |
## END
|
56 |
|
57 |
## N-I dash/ash STDOUT:
|
58 |
## END
|
59 |
|
60 |
|
61 |
|
62 |
case $SH in dash|ash) exit ;; esac
|
63 |
|
64 |
py-repr() {
|
65 |
python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
66 |
}
|
67 |
|
68 |
py-repr $'\U0010ffff'
|
69 |
py-repr $(echo -e '\U0010ffff')
|
70 |
py-repr $(printf '\U0010ffff')
|
71 |
|
72 |
## STDOUT:
|
73 |
'\xf4\x8f\xbf\xbf'
|
74 |
'\xf4\x8f\xbf\xbf'
|
75 |
'\xf4\x8f\xbf\xbf'
|
76 |
## END
|
77 |
|
78 |
## N-I dash/ash STDOUT:
|
79 |
## END
|
80 |
|
81 |
# Unicode replacement char
|
82 |
|
83 |
## BUG mksh STDOUT:
|
84 |
'\xef\xbf\xbd'
|
85 |
'\xef\xbf\xbd'
|
86 |
'\xf4\x8f\xbf\xbf'
|
87 |
## END
|
88 |
|
89 |
|
90 |
|
91 |
py-repr() {
|
92 |
python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
93 |
}
|
94 |
|
95 |
py-repr $'\U00110000'
|
96 |
|
97 |
## STDOUT:
|
98 |
'\xf4\x90\x80\x80'
|
99 |
## END
|
100 |
|
101 |
## BUG mksh STDOUT:
|
102 |
'\xef\xbf\xbd'
|
103 |
## END
|
104 |
|
105 |
|
106 |
|
107 |
py-repr() {
|
108 |
python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
109 |
}
|
110 |
|
111 |
py-repr $'\udc00'
|
112 |
|
113 |
py-repr $'\U0000dc00'
|
114 |
|
115 |
## STDOUT:
|
116 |
'\xed\xb0\x80'
|
117 |
'\xed\xb0\x80'
|
118 |
## END
|
119 |
|
120 |
## OK zsh status: 1
|
121 |
## OK zsh STDOUT:
|
122 |
## END
|
123 |
|
124 |
|
125 |
|
126 |
case $SH in mksh) exit ;; esac
|
127 |
|
128 |
py-repr() {
|
129 |
python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
130 |
}
|
131 |
|
132 |
e="$(echo -e '\U00110000')"
|
133 |
echo status=$?
|
134 |
py-repr "$e"
|
135 |
|
136 |
p="$(printf '\U00110000')"
|
137 |
echo status=$?
|
138 |
py-repr "$p"
|
139 |
|
140 |
## STDOUT:
|
141 |
status=0
|
142 |
'\xf4\x90\x80\x80'
|
143 |
status=0
|
144 |
'\xf4\x90\x80\x80'
|
145 |
## END
|
146 |
|
147 |
## BUG mksh STDOUT:
|
148 |
## END
|
149 |
|
150 |
|
151 |
case $SH in mksh) exit ;; esac
|
152 |
|
153 |
py-repr() {
|
154 |
python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
155 |
}
|
156 |
|
157 |
e="$(echo -e '\udc00')"
|
158 |
echo status=$?
|
159 |
py-repr "$e"
|
160 |
|
161 |
e="$(echo -e '\U0000dc00')"
|
162 |
echo status=$?
|
163 |
py-repr "$e"
|
164 |
|
165 |
p="$(printf '\udc00')"
|
166 |
echo status=$?
|
167 |
py-repr "$p"
|
168 |
|
169 |
p="$(printf '\U0000dc00')"
|
170 |
echo status=$?
|
171 |
py-repr "$p"
|
172 |
|
173 |
## STDOUT:
|
174 |
status=0
|
175 |
'\xed\xb0\x80'
|
176 |
status=0
|
177 |
'\xed\xb0\x80'
|
178 |
status=0
|
179 |
'\xed\xb0\x80'
|
180 |
status=0
|
181 |
'\xed\xb0\x80'
|
182 |
## END
|
183 |
|
184 |
## BUG zsh STDOUT:
|
185 |
status=0
|
186 |
''
|
187 |
status=0
|
188 |
''
|
189 |
status=0
|
190 |
''
|
191 |
status=0
|
192 |
''
|
193 |
## END
|
194 |
|
195 |
## BUG mksh STDOUT:
|
196 |
## END
|