OILS / demo / url-search-params.ysh View on Github | oilshell.org

271 lines, 117 significant
1#!bin/ysh
2#
3# Usage:
4# demo/url-search-params.ysh <function name>
5#
6# Tested against JavaScript's URLSearchParams. Differences:
7#
8# - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
9# - YSH turns this into the 0xff byte, denoted as b'\yff'
10# - JS accepts '==' as key="" value="="
11# - In YSH, this is a syntax error.
12# - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
13# [["", ""]
14# ["", ""],
15# ["", ""]]
16#
17# Evaluation of "the YSH experience":
18#
19# GOOD:
20#
21# - Eggex is elegant
22# - This code is structured better than the Python stdlib urlparse.py!
23# - This problem is also hard/ugly in JavaScript. They use an extra
24# s=>replace() on top of decodeURIComponent()!
25# - Task files in YSH basically work!
26# - I think this file has a nice structure
27# - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js
28# - Triple quoted multiline strings are nice!
29#
30# NEEDS WORK:
31#
32# - need Vim syntax highlighting!
33# - e.g. multiline '' strings aren't higlighted
34# - need pp [x] for debugging
35# - need assert [x] for testing
36# - task files need completion
37#
38# - Eggex can use multiline /// syntax
39# - Eggex could use "which" match
40# - m=>group('lit') sorta bothers me, it should be
41# - m.group('lit')
42# - $lit - probably!
43# - with vars(m.groupDict()) { ... }
44# - Alternative to printf -v probably needed, or at least wrap it in the YSH
45# stdlib
46#
47# - ERROR messages for URL parsing should bubble up to the user!
48# - USER code should be able to point out to location info for bad escapes
49# like %f or %0z
50# - I guess we just need an idiom for this? A "class"?
51
52source $LIB_OSH/task-five.sh
53#source $LIB_YSH/yblocks.ysh
54
55proc _check (; val) { # TODO: assert
56 if (not val) {
57 pp line (val)
58 error "Failed: $val"
59 }
60}
61
62func strFromTwoHex(two_hex) {
63 var result
64 # TODO: provide alternative to old OSH style!
65
66 # Python style would include something like this
67 # var i = int(two_hex, 16)
68
69 printf -v result "\\x$two_hex"
70 return (result)
71}
72
73const Hex = / [0-9 a-f A-F] /
74
75const Quoted = / <capture !['%+']+ as lit> | <capture '+' as plus> | '%' <capture Hex Hex as two_hex> /
76
77func unquote (s) {
78 ### Turn strings with %20 into space, etc.
79
80 #echo
81 #echo "unquote $s"
82
83 var pos = 0
84 var parts = []
85 while (true) {
86 var m = s => leftMatch(Quoted, pos=pos)
87 if (not m) {
88 break
89 }
90
91 var lit = m => group('lit')
92 var plus = m => group('plus')
93 var two_hex = m => group('two_hex')
94
95 var part
96 if (lit) {
97 #echo " lit $lit"
98 setvar part = lit
99 } elif (plus) {
100 #echo " plus $plus"
101 setvar part = ' '
102 } elif (two_hex) {
103 #echo " two_hex $two_hex"
104 #setvar part = two_hex
105
106 setvar part = strFromTwoHex(two_hex)
107 }
108 call parts->append(part)
109
110 setvar pos = m => end(0)
111 #echo
112 }
113 if (pos !== len(s)) {
114 error "Unexpected trailing input in unquote"
115 }
116
117 return (join(parts))
118}
119
120proc js-decode-part(s) {
121 nodejs -e '''
122
123 var encoded = process.argv[1];
124
125 // It does not handle +, because is only for query params, not components?
126 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
127 var encoded = encoded.replace(/\+/g, " ")
128
129 var j = JSON.stringify(decodeURIComponent(encoded))
130 process.stdout.write(j);
131
132 ''' $s
133}
134
135const PART_CASES = [
136 'foo+bar',
137 'foo%23%40',
138 # empty key, empty value, invalid % , etc.
139]
140
141proc test-part() {
142 echo hi
143
144 #_check ('foo bar' === unquote('foo+bar'))
145
146 for s in (PART_CASES) {
147 js-decode-part $s | json read
148 echo 'JS'
149 pp line (_reply)
150
151 echo 'YSH'
152 = unquote(s)
153 echo
154 #break
155 }
156}
157
158#
159# Query
160#
161
162# JavaScript allows either side of k=v to be empty, so we match that
163const Tok = / !['&= ']* /
164
165const Pair = / <capture Tok as key> '=' <capture Tok as value> /
166
167const Pairs = / Pair <capture '&' as sep>? /
168
169func URLSearchParams(s) {
170 ### Turn k=v&foo=spam+eggs&k=v into a list of pairs
171
172 # Loop over matches
173 var pos = 0
174 #echo Pairs=$Pairs
175
176 var pairs = []
177 while (true) {
178 var m = s => leftMatch(Pairs, pos=pos)
179 if (not m) {
180 break
181 }
182 #pp line (m)
183 #pp line (m => group(0))
184 var k = m => group('key')
185 var v = m => group('value')
186
187 #pp line (k)
188 #pp line (v)
189
190 call pairs->append([unquote(k), unquote(v)])
191
192 setvar pos = m => end(0)
193 #pp line (pos)
194
195 var sep = m => group('sep')
196 if (not sep) {
197 break
198 }
199 }
200 if (pos !== len(s)) {
201 error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
202 }
203
204 return (pairs)
205}
206
207proc js-decode-query(s) {
208 nodejs -e '''
209
210 const u = new URLSearchParams(process.argv[1]);
211 //console.log(JSON.stringify(u));
212
213 var pairs = []
214 for (pair of u) {
215 pairs.push(pair)
216 }
217
218 var j = JSON.stringify(pairs);
219
220 //console.log(j):
221 process.stdout.write(j);
222 ''' $s
223}
224
225const QUERY_CASES = [
226 'k=foo+bar',
227 'key=foo%23%40',
228 'k=v&foo%23=bar+baz+%24%25&k=v',
229 'foo+bar=z',
230
231 # JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
232 'foo%ffbar=z',
233
234 'missing_val=&k=',
235
236 '=missing_key&=m2',
237
238 # This is valid
239 '=&=',
240 '=&=&',
241
242 # JavaScript treats = as literal - that seems wrong
243 # YSH treating this as an error seems right
244 #'==',
245]
246
247proc test-query() {
248 #_check ('foo bar' === unquote('foo+bar'))
249
250 for s in (QUERY_CASES) {
251 echo 'INPUT'
252 echo " $s"
253
254 js-decode-query $s | json read
255 echo 'JS'
256 pp line (_reply)
257
258 echo 'YSH'
259 var pairs = URLSearchParams(s)
260 pp line (pairs)
261
262 echo
263 #break
264 }
265}
266
267proc run-tests() {
268 devtools/byo.sh test $0
269}
270
271task-five "$@"