1 | ## oils_failures_allowed: 0
|
2 | ## compare_shells: bash mksh zsh
|
3 |
|
4 | #### OSH source code doesn't have to be valid Unicode (like other shells)
|
5 |
|
6 | # Should YSH be different? It would be nice.
|
7 | # We would have to validate all Lit_Chars tokens, and the like.
|
8 | #
|
9 | # The logical place to put that would be in osh/word_parse.py where we read
|
10 | # single and double quoted strings. Although there might be a global lexer
|
11 | # hack for Id.Lit_Chars tokens. Would that catch here docs though?
|
12 |
|
13 | # Test all the lexing contexts
|
14 | cat >unicode.sh << 'EOF'
|
15 | echo μ 'μ' "μ" $'μ'
|
16 | EOF
|
17 |
|
18 | # Show that all lexer modes recognize unicode sequences
|
19 | #
|
20 | # Oh I guess we need to check here docs too?
|
21 |
|
22 | #$SH -n unicode.sh
|
23 |
|
24 | $SH unicode.sh
|
25 |
|
26 | # Trim off the first byte of mu
|
27 | sed 's/\xce//g' unicode.sh > not-unicode.sh
|
28 |
|
29 | echo --
|
30 | $SH not-unicode.sh | od -A n -t x1
|
31 |
|
32 | ## STDOUT:
|
33 | μ μ μ μ
|
34 | --
|
35 | bc 20 bc 20 bc 20 bc 0a
|
36 | ## END
|
37 |
|
38 |
|
39 | # dash and ash don't support $''
|
40 |
|
41 | #### Unicode escapes \u03bc \U000003bc in $'', echo -e, printf
|
42 |
|
43 | case $SH in dash|ash) exit ;; esac
|
44 |
|
45 | echo $'\u03bc \U000003bc'
|
46 |
|
47 | echo -e '\u03bc \U000003bc'
|
48 |
|
49 | printf '\u03bc \U000003bc\n'
|
50 |
|
51 | ## STDOUT:
|
52 | μ μ
|
53 | μ μ
|
54 | μ μ
|
55 | ## END
|
56 |
|
57 | ## N-I dash/ash STDOUT:
|
58 | ## END
|
59 |
|
60 | #### Max code point U+10ffff can escaped with $'' printf echo -e
|
61 |
|
62 | case $SH in dash|ash) exit ;; esac
|
63 |
|
64 | py-repr() {
|
65 | python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
66 | }
|
67 |
|
68 | py-repr $'\U0010ffff'
|
69 | py-repr $(echo -e '\U0010ffff')
|
70 | py-repr $(printf '\U0010ffff')
|
71 |
|
72 | ## STDOUT:
|
73 | '\xf4\x8f\xbf\xbf'
|
74 | '\xf4\x8f\xbf\xbf'
|
75 | '\xf4\x8f\xbf\xbf'
|
76 | ## END
|
77 |
|
78 | ## N-I dash/ash STDOUT:
|
79 | ## END
|
80 |
|
81 | # Unicode replacement char
|
82 |
|
83 | ## BUG mksh STDOUT:
|
84 | '\xef\xbf\xbd'
|
85 | '\xef\xbf\xbd'
|
86 | '\xf4\x8f\xbf\xbf'
|
87 | ## END
|
88 |
|
89 | #### $'' does NOT check that 0x110000 is too big at parse time
|
90 |
|
91 | py-repr() {
|
92 | python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
93 | }
|
94 |
|
95 | py-repr $'\U00110000'
|
96 |
|
97 | ## STDOUT:
|
98 | '\xf4\x90\x80\x80'
|
99 | ## END
|
100 |
|
101 | ## BUG mksh STDOUT:
|
102 | '\xef\xbf\xbd'
|
103 | ## END
|
104 |
|
105 | #### $'' does not check for surrogate range at parse time
|
106 |
|
107 | py-repr() {
|
108 | python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
109 | }
|
110 |
|
111 | py-repr $'\udc00'
|
112 |
|
113 | py-repr $'\U0000dc00'
|
114 |
|
115 | ## STDOUT:
|
116 | '\xed\xb0\x80'
|
117 | '\xed\xb0\x80'
|
118 | ## END
|
119 |
|
120 | ## OK zsh status: 1
|
121 | ## OK zsh STDOUT:
|
122 | ## END
|
123 |
|
124 |
|
125 | #### printf / echo -e do NOT check max code point at runtime
|
126 | case $SH in mksh) exit ;; esac
|
127 |
|
128 | py-repr() {
|
129 | python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
130 | }
|
131 |
|
132 | e="$(echo -e '\U00110000')"
|
133 | echo status=$?
|
134 | py-repr "$e"
|
135 |
|
136 | p="$(printf '\U00110000')"
|
137 | echo status=$?
|
138 | py-repr "$p"
|
139 |
|
140 | ## STDOUT:
|
141 | status=0
|
142 | '\xf4\x90\x80\x80'
|
143 | status=0
|
144 | '\xf4\x90\x80\x80'
|
145 | ## END
|
146 |
|
147 | ## BUG mksh STDOUT:
|
148 | ## END
|
149 |
|
150 | #### printf / echo -e do NOT check surrogates at runtime
|
151 | case $SH in mksh) exit ;; esac
|
152 |
|
153 | py-repr() {
|
154 | python2 -c 'import sys; print repr(sys.argv[1])' "$@"
|
155 | }
|
156 |
|
157 | e="$(echo -e '\udc00')"
|
158 | echo status=$?
|
159 | py-repr "$e"
|
160 |
|
161 | e="$(echo -e '\U0000dc00')"
|
162 | echo status=$?
|
163 | py-repr "$e"
|
164 |
|
165 | p="$(printf '\udc00')"
|
166 | echo status=$?
|
167 | py-repr "$p"
|
168 |
|
169 | p="$(printf '\U0000dc00')"
|
170 | echo status=$?
|
171 | py-repr "$p"
|
172 |
|
173 | ## STDOUT:
|
174 | status=0
|
175 | '\xed\xb0\x80'
|
176 | status=0
|
177 | '\xed\xb0\x80'
|
178 | status=0
|
179 | '\xed\xb0\x80'
|
180 | status=0
|
181 | '\xed\xb0\x80'
|
182 | ## END
|
183 |
|
184 | ## BUG zsh STDOUT:
|
185 | status=0
|
186 | ''
|
187 | status=0
|
188 | ''
|
189 | status=0
|
190 | ''
|
191 | status=0
|
192 | ''
|
193 | ## END
|
194 |
|
195 | ## BUG mksh STDOUT:
|
196 | ## END
|