OILS / demo / url-search-params.ysh View on Github | oilshell.org

277 lines, 121 significant
1#!bin/ysh
2#
3# Usage:
4# demo/url-search-params.ysh <function name>
5#
6# Tested against JavaScript's URLSearchParams. Differences:
7#
8# - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
9# - YSH turns this into the 0xff byte, denoted as b'\yff'
10# - JS accepts '==' as key="" value="="
11# - In YSH, this is a syntax error.
12# - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
13# [["", ""]
14# ["", ""],
15# ["", ""]]
16#
17# Evaluation of "the YSH experience":
18#
19# GOOD:
20#
21# - Eggex is elegant
22# - This code is structured better than the Python stdlib urlparse.py!
23# - This problem is also hard/ugly in JavaScript. They use an extra
24# s=>replace() on top of decodeURIComponent()!
25# - Task files in YSH basically work!
26# - I think this file has a nice structure
27# - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js
28# - Triple quoted multiline strings are nice!
29#
30# NEEDS WORK:
31#
32# - need Vim syntax highlighting!
33# - e.g. multiline '' strings aren't higlighted
34# - need pp [x] for debugging
35# - need assert [x] for testing
36# - task files need completion
37#
38# - Eggex can use multiline /// syntax, though you can use \ for line continuation
39# - Eggex could use "which" match
40# - m=>group('lit') sorta bothers me, it should be
41# - m.group('lit')
42# - $lit - probably!
43# - with vars(m.groupDict()) { ... }
44# - Alternative to printf -v probably needed, or at least wrap it in the YSH
45# stdlib
46#
47# - ERROR messages for URL parsing should bubble up to the user!
48# - USER code should be able to point out to location info for bad escapes
49# like %f or %0z
50# - I guess we just need an idiom for this? A "class"?
51
52source $LIB_OSH/task-five.sh
53#source $LIB_YSH/yblocks.ysh
54
55func strFromTwoHex(two_hex) {
56 var result
57 # TODO: provide alternative to old OSH style!
58
59 # Python style would include something like this
60 # var i = int(two_hex, 16)
61
62 printf -v result "\\x$two_hex"
63 return (result)
64}
65
66const Hex = / [0-9 a-f A-F] /
67
68const Quoted = / \
69 <capture !['%+']+ as lit> \
70 | <capture '+' as plus> \
71 | '%' <capture Hex Hex as two_hex> \
72 /
73
74func unquote (s) {
75 ### Turn strings with %20 into space, etc.
76
77 #echo
78 #echo "unquote $s"
79
80 var pos = 0
81 var parts = []
82 while (true) {
83 var m = s => leftMatch(Quoted, pos=pos)
84 if (not m) {
85 break
86 }
87
88 var lit = m => group('lit')
89 var plus = m => group('plus')
90 var two_hex = m => group('two_hex')
91
92 var part
93 if (lit) {
94 #echo " lit $lit"
95 setvar part = lit
96 } elif (plus) {
97 #echo " plus $plus"
98 setvar part = ' '
99 } elif (two_hex) {
100 #echo " two_hex $two_hex"
101 #setvar part = two_hex
102
103 setvar part = strFromTwoHex(two_hex)
104 }
105 call parts->append(part)
106
107 setvar pos = m => end(0)
108 #echo
109 }
110 if (pos !== len(s)) {
111 error "Unexpected trailing input in unquote"
112 }
113
114 return (join(parts))
115}
116
117proc js-decode-part(s) {
118 nodejs -e '''
119
120 var encoded = process.argv[1];
121
122 // It does not handle +, because is only for query params, not components?
123 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
124 var encoded = encoded.replace(/\+/g, " ")
125
126 var j = JSON.stringify(decodeURIComponent(encoded))
127 process.stdout.write(j);
128
129 ''' $s
130}
131
132const PART_CASES = [
133 'foo+bar',
134 'foo%23%40',
135 # empty key, empty value, invalid % , etc.
136]
137
138proc test-part() {
139 echo hi
140
141 #_check ('foo bar' === unquote('foo+bar'))
142
143 for s in (PART_CASES) {
144 js-decode-part $s | json read (&js)
145 echo 'JS'
146 pp line (js)
147
148 echo 'YSH'
149 var y = unquote(s)
150 pp line (y)
151
152 assert [y === js]
153
154 echo
155 #break
156 }
157}
158
159#
160# Query
161#
162
163# JavaScript allows either side of k=v to be empty, so we match that
164const Tok = / !['&= ']* /
165
166const Pair = / <capture Tok as key> '=' <capture Tok as value> /
167
168const Pairs = / Pair <capture '&' as sep>? /
169
170func URLSearchParams(s) {
171 ### Turn k=v&foo=spam+eggs&k=v into a list of pairs
172
173 # Loop over matches
174 var pos = 0
175 #echo Pairs=$Pairs
176
177 var pairs = []
178 while (true) {
179 var m = s => leftMatch(Pairs, pos=pos)
180 if (not m) {
181 break
182 }
183 #pp line (m)
184 #pp line (m => group(0))
185 var k = m => group('key')
186 var v = m => group('value')
187
188 #pp line (k)
189 #pp line (v)
190
191 call pairs->append([unquote(k), unquote(v)])
192
193 setvar pos = m => end(0)
194 #pp line (pos)
195
196 var sep = m => group('sep')
197 if (not sep) {
198 break
199 }
200 }
201 if (pos !== len(s)) {
202 error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
203 }
204
205 return (pairs)
206}
207
208proc js-decode-query(s) {
209 nodejs -e '''
210
211 const u = new URLSearchParams(process.argv[1]);
212 //console.log(JSON.stringify(u));
213
214 var pairs = []
215 for (pair of u) {
216 pairs.push(pair)
217 }
218
219 var j = JSON.stringify(pairs);
220
221 //console.log(j):
222 process.stdout.write(j);
223 ''' $s
224}
225
226const QUERY_CASES = [
227 'k=foo+bar',
228 'key=foo%23%40',
229 'k=v&foo%23=bar+baz+%24%25&k=v',
230 'foo+bar=z',
231
232 'missing_val=&k=',
233
234 '=missing_key&=m2',
235
236 # This is valid
237 '=&=',
238 '=&=&',
239
240]
241
242const OTHER_CASES = [
243
244 # JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
245 'foo%ffbar=z',
246
247 # JavaScript treats = as literal - that seems wrong
248 # YSH treating this as an error seems right
249 '==',
250]
251
252
253proc test-query() {
254 for s in (QUERY_CASES) {
255 #for s in (OTHER_CASES) {
256 echo 'INPUT'
257 echo " $s"
258
259 js-decode-query $s | json read (&js)
260 echo 'JS'
261 pp line (js)
262
263 echo 'YSH'
264 var pairs = URLSearchParams(s)
265 pp line (pairs)
266
267 assert [pairs === js]
268
269 echo
270 }
271}
272
273proc run-tests() {
274 devtools/byo.sh test $0
275}
276
277task-five "$@"