1 | #!bin/ysh
|
2 | #
|
3 | # Usage:
|
4 | # demo/url-search-params.ysh <function name>
|
5 | #
|
6 | # Tested against JavaScript's URLSearchParams. Differences:
|
7 | #
|
8 | # - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
|
9 | # - YSH turns this into the 0xff byte, denoted as b'\yff'
|
10 | # - JS accepts '==' as key="" value="="
|
11 | # - In YSH, this is a syntax error.
|
12 | # - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
|
13 | # [["", ""]
|
14 | # ["", ""],
|
15 | # ["", ""]]
|
16 | #
|
17 | # Evaluation of "the YSH experience":
|
18 | #
|
19 | # GOOD:
|
20 | #
|
21 | # - Eggex is elegant
|
22 | # - This code is structured better than the Python stdlib urlparse.py!
|
23 | # - This problem is also hard/ugly in JavaScript. They use an extra
|
24 | # s=>replace() on top of decodeURIComponent()!
|
25 | # - Task files in YSH basically work!
|
26 | # - I think this file has a nice structure
|
27 | # - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js
|
28 | # - Triple quoted multiline strings are nice!
|
29 | #
|
30 | # NEEDS WORK:
|
31 | #
|
32 | # - need Vim syntax highlighting!
|
33 | # - e.g. multiline '' strings aren't higlighted
|
34 | # - need pp [x] for debugging
|
35 | # - need assert [x] for testing
|
36 | # - task files need completion
|
37 | #
|
38 | # - Eggex can use multiline /// syntax, though you can use \ for line continuation
|
39 | # - Eggex could use "which" match
|
40 | # - m=>group('lit') sorta bothers me, it should be
|
41 | # - m.group('lit')
|
42 | # - $lit - probably!
|
43 | # - with vars(m.groupDict()) { ... }
|
44 | # - Alternative to printf -v probably needed, or at least wrap it in the YSH
|
45 | # stdlib
|
46 | #
|
47 | # - ERROR messages for URL parsing should bubble up to the user!
|
48 | # - USER code should be able to point out to location info for bad escapes
|
49 | # like %f or %0z
|
50 | # - I guess we just need an idiom for this? A "class"?
|
51 |
|
52 | source $LIB_OSH/task-five.sh
|
53 | #source $LIB_YSH/yblocks.ysh
|
54 |
|
55 | func strFromTwoHex(two_hex) {
|
56 | var result
|
57 | # TODO: provide alternative to old OSH style!
|
58 |
|
59 | # Python style would include something like this
|
60 | # var i = int(two_hex, 16)
|
61 |
|
62 | printf -v result "\\x$two_hex"
|
63 | return (result)
|
64 | }
|
65 |
|
66 | const Hex = / [0-9 a-f A-F] /
|
67 |
|
68 | const Quoted = / \
|
69 | <capture !['%+']+ as lit> \
|
70 | | <capture '+' as plus> \
|
71 | | '%' <capture Hex Hex as two_hex> \
|
72 | /
|
73 |
|
74 | func unquote (s) {
|
75 | ### Turn strings with %20 into space, etc.
|
76 |
|
77 | #echo
|
78 | #echo "unquote $s"
|
79 |
|
80 | var pos = 0
|
81 | var parts = []
|
82 | while (true) {
|
83 | var m = s => leftMatch(Quoted, pos=pos)
|
84 | if (not m) {
|
85 | break
|
86 | }
|
87 |
|
88 | var lit = m => group('lit')
|
89 | var plus = m => group('plus')
|
90 | var two_hex = m => group('two_hex')
|
91 |
|
92 | var part
|
93 | if (lit) {
|
94 | #echo " lit $lit"
|
95 | setvar part = lit
|
96 | } elif (plus) {
|
97 | #echo " plus $plus"
|
98 | setvar part = ' '
|
99 | } elif (two_hex) {
|
100 | #echo " two_hex $two_hex"
|
101 | #setvar part = two_hex
|
102 |
|
103 | setvar part = strFromTwoHex(two_hex)
|
104 | }
|
105 | call parts->append(part)
|
106 |
|
107 | setvar pos = m => end(0)
|
108 | #echo
|
109 | }
|
110 | if (pos !== len(s)) {
|
111 | error "Unexpected trailing input in unquote"
|
112 | }
|
113 |
|
114 | return (join(parts))
|
115 | }
|
116 |
|
117 | proc js-decode-part(s) {
|
118 | nodejs -e '''
|
119 |
|
120 | var encoded = process.argv[1];
|
121 |
|
122 | // It does not handle +, because is only for query params, not components?
|
123 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
|
124 | var encoded = encoded.replace(/\+/g, " ")
|
125 |
|
126 | var j = JSON.stringify(decodeURIComponent(encoded))
|
127 | process.stdout.write(j);
|
128 |
|
129 | ''' $s
|
130 | }
|
131 |
|
132 | const PART_CASES = [
|
133 | 'foo+bar',
|
134 | 'foo%23%40',
|
135 | # empty key, empty value, invalid % , etc.
|
136 | ]
|
137 |
|
138 | proc test-part() {
|
139 | echo hi
|
140 |
|
141 | #_check ('foo bar' === unquote('foo+bar'))
|
142 |
|
143 | for s in (PART_CASES) {
|
144 | js-decode-part $s | json read (&js)
|
145 | echo 'JS'
|
146 | pp line (js)
|
147 |
|
148 | echo 'YSH'
|
149 | var y = unquote(s)
|
150 | pp line (y)
|
151 |
|
152 | assert [y === js]
|
153 |
|
154 | echo
|
155 | #break
|
156 | }
|
157 | }
|
158 |
|
159 | #
|
160 | # Query
|
161 | #
|
162 |
|
163 | # JavaScript allows either side of k=v to be empty, so we match that
|
164 | const Tok = / !['&= ']* /
|
165 |
|
166 | const Pair = / <capture Tok as key> '=' <capture Tok as value> /
|
167 |
|
168 | const Pairs = / Pair <capture '&' as sep>? /
|
169 |
|
170 | func URLSearchParams(s) {
|
171 | ### Turn k=v&foo=spam+eggs&k=v into a list of pairs
|
172 |
|
173 | # Loop over matches
|
174 | var pos = 0
|
175 | #echo Pairs=$Pairs
|
176 |
|
177 | var pairs = []
|
178 | while (true) {
|
179 | var m = s => leftMatch(Pairs, pos=pos)
|
180 | if (not m) {
|
181 | break
|
182 | }
|
183 | #pp line (m)
|
184 | #pp line (m => group(0))
|
185 | var k = m => group('key')
|
186 | var v = m => group('value')
|
187 |
|
188 | #pp line (k)
|
189 | #pp line (v)
|
190 |
|
191 | call pairs->append([unquote(k), unquote(v)])
|
192 |
|
193 | setvar pos = m => end(0)
|
194 | #pp line (pos)
|
195 |
|
196 | var sep = m => group('sep')
|
197 | if (not sep) {
|
198 | break
|
199 | }
|
200 | }
|
201 | if (pos !== len(s)) {
|
202 | error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
|
203 | }
|
204 |
|
205 | return (pairs)
|
206 | }
|
207 |
|
208 | proc js-decode-query(s) {
|
209 | nodejs -e '''
|
210 |
|
211 | const u = new URLSearchParams(process.argv[1]);
|
212 | //console.log(JSON.stringify(u));
|
213 |
|
214 | var pairs = []
|
215 | for (pair of u) {
|
216 | pairs.push(pair)
|
217 | }
|
218 |
|
219 | var j = JSON.stringify(pairs);
|
220 |
|
221 | //console.log(j):
|
222 | process.stdout.write(j);
|
223 | ''' $s
|
224 | }
|
225 |
|
226 | const QUERY_CASES = [
|
227 | 'k=foo+bar',
|
228 | 'key=foo%23%40',
|
229 | 'k=v&foo%23=bar+baz+%24%25&k=v',
|
230 | 'foo+bar=z',
|
231 |
|
232 | 'missing_val=&k=',
|
233 |
|
234 | '=missing_key&=m2',
|
235 |
|
236 | # This is valid
|
237 | '=&=',
|
238 | '=&=&',
|
239 |
|
240 | ]
|
241 |
|
242 | const OTHER_CASES = [
|
243 |
|
244 | # JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
|
245 | 'foo%ffbar=z',
|
246 |
|
247 | # JavaScript treats = as literal - that seems wrong
|
248 | # YSH treating this as an error seems right
|
249 | '==',
|
250 | ]
|
251 |
|
252 |
|
253 | proc test-query() {
|
254 | for s in (QUERY_CASES) {
|
255 | #for s in (OTHER_CASES) {
|
256 | echo 'INPUT'
|
257 | echo " $s"
|
258 |
|
259 | js-decode-query $s | json read (&js)
|
260 | echo 'JS'
|
261 | pp line (js)
|
262 |
|
263 | echo 'YSH'
|
264 | var pairs = URLSearchParams(s)
|
265 | pp line (pairs)
|
266 |
|
267 | assert [pairs === js]
|
268 |
|
269 | echo
|
270 | }
|
271 | }
|
272 |
|
273 | proc run-tests() {
|
274 | devtools/byo.sh test $0
|
275 | }
|
276 |
|
277 | task-five "$@"
|