1 ## oils_failures_allowed: 0
2 ## compare_shells: bash mksh zsh
3
4 #### OSH source code doesn't have to be valid Unicode (like other shells)
5
6 # Should YSH be different? It would be nice.
7 # We would have to validate all Lit_Chars tokens, and the like.
8 #
9 # The logical place to put that would be in osh/word_parse.py where we read
10 # single and double quoted strings. Although there might be a global lexer
11 # hack for Id.Lit_Chars tokens. Would that catch here docs though?
12
13 # Test all the lexing contexts
14 cat >unicode.sh << 'EOF'
15 echo μ 'μ' "μ" $'μ'
16 EOF
17
18 # Show that all lexer modes recognize unicode sequences
19 #
20 # Oh I guess we need to check here docs too?
21
22 #$SH -n unicode.sh
23
24 $SH unicode.sh
25
26 # Trim off the first byte of mu
27 sed 's/\xce//g' unicode.sh > not-unicode.sh
28
29 echo --
30 $SH not-unicode.sh | od -A n -t x1
31
32 ## STDOUT:
33 μ μ μ μ
34 --
35 bc 20 bc 20 bc 20 bc 0a
36 ## END
37
38
39 # dash and ash don't support $''
40
41 #### Unicode escapes \u03bc \U000003bc in $'', echo -e, printf
42
43 case $SH in dash|ash) exit ;; esac
44
45 echo $'\u03bc \U000003bc'
46
47 echo -e '\u03bc \U000003bc'
48
49 printf '\u03bc \U000003bc\n'
50
51 ## STDOUT:
52 μ μ
53 μ μ
54 μ μ
55 ## END
56
57 ## N-I dash/ash STDOUT:
58 ## END
59
60 #### Max code point U+10ffff can escaped with $'' printf echo -e
61
62 case $SH in dash|ash) exit ;; esac
63
64 py-repr() {
65 python2 -c 'import sys; print repr(sys.argv[1])' "$@"
66 }
67
68 py-repr $'\U0010ffff'
69 py-repr $(echo -e '\U0010ffff')
70 py-repr $(printf '\U0010ffff')
71
72 ## STDOUT:
73 '\xf4\x8f\xbf\xbf'
74 '\xf4\x8f\xbf\xbf'
75 '\xf4\x8f\xbf\xbf'
76 ## END
77
78 ## N-I dash/ash STDOUT:
79 ## END
80
81 # Unicode replacement char
82
83 ## BUG mksh STDOUT:
84 '\xef\xbf\xbd'
85 '\xef\xbf\xbd'
86 '\xf4\x8f\xbf\xbf'
87 ## END
88
89 #### $'' does NOT check that 0x110000 is too big at parse time
90
91 py-repr() {
92 python2 -c 'import sys; print repr(sys.argv[1])' "$@"
93 }
94
95 py-repr $'\U00110000'
96
97 ## STDOUT:
98 '\xf4\x90\x80\x80'
99 ## END
100
101 ## BUG mksh STDOUT:
102 '\xef\xbf\xbd'
103 ## END
104
105 #### $'' does not check for surrogate range at parse time
106
107 py-repr() {
108 python2 -c 'import sys; print repr(sys.argv[1])' "$@"
109 }
110
111 py-repr $'\udc00'
112
113 py-repr $'\U0000dc00'
114
115 ## STDOUT:
116 '\xed\xb0\x80'
117 '\xed\xb0\x80'
118 ## END
119
120 ## OK zsh status: 1
121 ## OK zsh STDOUT:
122 ## END
123
124
125 #### printf / echo -e do NOT check max code point at runtime
126 case $SH in mksh) exit ;; esac
127
128 py-repr() {
129 python2 -c 'import sys; print repr(sys.argv[1])' "$@"
130 }
131
132 e="$(echo -e '\U00110000')"
133 echo status=$?
134 py-repr "$e"
135
136 p="$(printf '\U00110000')"
137 echo status=$?
138 py-repr "$p"
139
140 ## STDOUT:
141 status=0
142 '\xf4\x90\x80\x80'
143 status=0
144 '\xf4\x90\x80\x80'
145 ## END
146
147 ## BUG mksh STDOUT:
148 ## END
149
150 #### printf / echo -e do NOT check surrogates at runtime
151 case $SH in mksh) exit ;; esac
152
153 py-repr() {
154 python2 -c 'import sys; print repr(sys.argv[1])' "$@"
155 }
156
157 e="$(echo -e '\udc00')"
158 echo status=$?
159 py-repr "$e"
160
161 e="$(echo -e '\U0000dc00')"
162 echo status=$?
163 py-repr "$e"
164
165 p="$(printf '\udc00')"
166 echo status=$?
167 py-repr "$p"
168
169 p="$(printf '\U0000dc00')"
170 echo status=$?
171 py-repr "$p"
172
173 ## STDOUT:
174 status=0
175 '\xed\xb0\x80'
176 status=0
177 '\xed\xb0\x80'
178 status=0
179 '\xed\xb0\x80'
180 status=0
181 '\xed\xb0\x80'
182 ## END
183
184 ## BUG zsh STDOUT:
185 status=0
186 ''
187 status=0
188 ''
189 status=0
190 ''
191 status=0
192 ''
193 ## END
194
195 ## BUG mksh STDOUT:
196 ## END