| 1 | #!bin/ysh
 | 
| 2 | #
 | 
| 3 | # Usage:
 | 
| 4 | #   demo/url-search-params.ysh <function name>
 | 
| 5 | #
 | 
| 6 | # Tested against JavaScript's URLSearchParams.  Differences:
 | 
| 7 | #
 | 
| 8 | # - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
 | 
| 9 | #   - YSH turns this into the 0xff byte, denoted as b'\yff'
 | 
| 10 | # - JS accepts '==' as key="" value="="
 | 
| 11 | #   - In YSH, this is a syntax error.
 | 
| 12 | # - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
 | 
| 13 | #   [["", ""]
 | 
| 14 | #    ["", ""],
 | 
| 15 | #    ["", ""]]
 | 
| 16 | #
 | 
| 17 | # Evaluation of "the YSH experience":
 | 
| 18 | #
 | 
| 19 | # GOOD:
 | 
| 20 | #
 | 
| 21 | # - Eggex is elegant
 | 
| 22 | #   - This code is structured better than the Python stdlib urlparse.py!
 | 
| 23 | #   - This problem is also hard/ugly in JavaScript.  They use an extra
 | 
| 24 | #     s=>replace() on top of decodeURIComponent()!
 | 
| 25 | # - Task files in YSH basically work!
 | 
| 26 | #   - I think this file has a nice structure
 | 
| 27 | # - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js 
 | 
| 28 | # - Triple quoted multiline strings are nice!
 | 
| 29 | #
 | 
| 30 | # NEEDS WORK:
 | 
| 31 | #
 | 
| 32 | # - need Vim syntax highlighting!
 | 
| 33 | #   - e.g. multiline '' strings aren't higlighted
 | 
| 34 | # - need pp [x] for debugging
 | 
| 35 | # - need assert [x] for testing
 | 
| 36 | # - task files need completion
 | 
| 37 | #
 | 
| 38 | # - Eggex can use multiline /// syntax, though you can use \ for line continuation
 | 
| 39 | # - Eggex could use "which" match
 | 
| 40 | # - m=>group('lit') sorta bothers me, it should be 
 | 
| 41 | #   - m.group('lit')
 | 
| 42 | #   - $lit - probably!
 | 
| 43 | #   - with vars(m.groupDict()) { ... }
 | 
| 44 | # - Alternative to printf -v probably needed, or at least wrap it in the YSH
 | 
| 45 | #   stdlib
 | 
| 46 | #
 | 
| 47 | # - ERROR messages for URL parsing should bubble up to the user!
 | 
| 48 | #   - USER code should be able to point out to location info for bad escapes
 | 
| 49 | #   like %f or %0z
 | 
| 50 | #   - I guess we just need an idiom for this?  A "class"?
 | 
| 51 | 
 | 
| 52 | source $LIB_OSH/task-five.sh
 | 
| 53 | #source $LIB_YSH/yblocks.ysh
 | 
| 54 | 
 | 
| 55 | func strFromTwoHex(two_hex) {
 | 
| 56 |   var result
 | 
| 57 |   # TODO: provide alternative to old OSH style!
 | 
| 58 | 
 | 
| 59 |   # Python style would include something like this
 | 
| 60 |   # var i = int(two_hex, 16)
 | 
| 61 | 
 | 
| 62 |   printf -v result "\\x$two_hex"
 | 
| 63 |   return (result)
 | 
| 64 | }
 | 
| 65 | 
 | 
| 66 | const Hex = / [0-9 a-f A-F] /
 | 
| 67 | 
 | 
| 68 | const Quoted = / \
 | 
| 69 |     <capture !['%+']+ as lit> \
 | 
| 70 |   | <capture '+' as plus> \
 | 
| 71 |   | '%' <capture Hex Hex as two_hex> \
 | 
| 72 |   /
 | 
| 73 | 
 | 
| 74 | func unquote (s) {
 | 
| 75 |   ### Turn strings with %20 into space, etc.
 | 
| 76 | 
 | 
| 77 |   #echo
 | 
| 78 |   #echo "unquote $s"
 | 
| 79 | 
 | 
| 80 |   var pos = 0
 | 
| 81 |   var parts = []
 | 
| 82 |   while (true) {
 | 
| 83 |     var m = s => leftMatch(Quoted, pos=pos)
 | 
| 84 |     if (not m) {
 | 
| 85 |       break
 | 
| 86 |     }
 | 
| 87 | 
 | 
| 88 |     var lit = m => group('lit')
 | 
| 89 |     var plus = m => group('plus')
 | 
| 90 |     var two_hex = m => group('two_hex')
 | 
| 91 | 
 | 
| 92 |     var part
 | 
| 93 |     if (lit) {
 | 
| 94 |       #echo "  lit $lit"
 | 
| 95 |       setvar part = lit
 | 
| 96 |     } elif (plus) {
 | 
| 97 |       #echo "  plus $plus"
 | 
| 98 |       setvar part = ' '
 | 
| 99 |     } elif (two_hex) {
 | 
| 100 |       #echo "  two_hex $two_hex"
 | 
| 101 |       #setvar part = two_hex
 | 
| 102 | 
 | 
| 103 |       setvar part = strFromTwoHex(two_hex)
 | 
| 104 |     }
 | 
| 105 |     call parts->append(part)
 | 
| 106 | 
 | 
| 107 |     setvar pos = m => end(0)
 | 
| 108 |     #echo
 | 
| 109 |   }
 | 
| 110 |   if (pos !== len(s)) {
 | 
| 111 |     error "Unexpected trailing input in unquote"
 | 
| 112 |   }
 | 
| 113 | 
 | 
| 114 |   return (join(parts))
 | 
| 115 | }
 | 
| 116 | 
 | 
| 117 | proc js-decode-part(s) {
 | 
| 118 |   nodejs -e '''
 | 
| 119 | 
 | 
| 120 |   var encoded = process.argv[1];
 | 
| 121 |   
 | 
| 122 |   // It does not handle +, because is only for query params, not components?
 | 
| 123 |   // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
 | 
| 124 |   var encoded = encoded.replace(/\+/g, " ")
 | 
| 125 |   
 | 
| 126 |   var j = JSON.stringify(decodeURIComponent(encoded))
 | 
| 127 |   process.stdout.write(j);
 | 
| 128 |   
 | 
| 129 |   ''' $s
 | 
| 130 | }
 | 
| 131 | 
 | 
| 132 | const PART_CASES = [
 | 
| 133 |   'foo+bar',
 | 
| 134 |   'foo%23%40',
 | 
| 135 |   # empty key, empty value, invalid % , etc.
 | 
| 136 | ]
 | 
| 137 | 
 | 
| 138 | proc test-part() {
 | 
| 139 |   echo hi
 | 
| 140 | 
 | 
| 141 |   #_check ('foo bar' === unquote('foo+bar'))
 | 
| 142 | 
 | 
| 143 |   for s in (PART_CASES) {
 | 
| 144 |     js-decode-part $s | json read (&js)
 | 
| 145 |     echo 'JS'
 | 
| 146 |     pp line (js)
 | 
| 147 | 
 | 
| 148 |     echo 'YSH'
 | 
| 149 |     var y = unquote(s)
 | 
| 150 |     pp line (y)
 | 
| 151 | 
 | 
| 152 |     assert [y === js]
 | 
| 153 | 
 | 
| 154 |     echo
 | 
| 155 |     #break
 | 
| 156 |   }
 | 
| 157 | }
 | 
| 158 | 
 | 
| 159 | #
 | 
| 160 | # Query
 | 
| 161 | #
 | 
| 162 | 
 | 
| 163 | # JavaScript allows either side of k=v to be empty, so we match that
 | 
| 164 | const Tok = / !['&= ']* /
 | 
| 165 | 
 | 
| 166 | const Pair = / <capture Tok as key> '=' <capture Tok as value> /
 | 
| 167 | 
 | 
| 168 | const Pairs = / Pair <capture '&' as sep>? /
 | 
| 169 | 
 | 
| 170 | func URLSearchParams(s) {
 | 
| 171 |   ### Turn k=v&foo=spam+eggs&k=v into a list of pairs
 | 
| 172 | 
 | 
| 173 |   # Loop over matches
 | 
| 174 |   var pos = 0
 | 
| 175 |   #echo Pairs=$Pairs
 | 
| 176 | 
 | 
| 177 |   var pairs = []
 | 
| 178 |   while (true) {
 | 
| 179 |     var m = s => leftMatch(Pairs, pos=pos)
 | 
| 180 |     if (not m) {
 | 
| 181 |       break
 | 
| 182 |     }
 | 
| 183 |     #pp line (m)
 | 
| 184 |     #pp line (m => group(0))
 | 
| 185 |     var k = m => group('key')
 | 
| 186 |     var v = m => group('value')
 | 
| 187 | 
 | 
| 188 |     #pp line (k)
 | 
| 189 |     #pp line (v)
 | 
| 190 | 
 | 
| 191 |     call pairs->append([unquote(k), unquote(v)])
 | 
| 192 | 
 | 
| 193 |     setvar pos = m => end(0)
 | 
| 194 |     #pp line (pos)
 | 
| 195 | 
 | 
| 196 |     var sep = m => group('sep')
 | 
| 197 |     if (not sep) {
 | 
| 198 |       break
 | 
| 199 |     }
 | 
| 200 |   }
 | 
| 201 |   if (pos !== len(s)) {
 | 
| 202 |     error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
 | 
| 203 |   }
 | 
| 204 | 
 | 
| 205 |   return (pairs)
 | 
| 206 | }
 | 
| 207 | 
 | 
| 208 | proc js-decode-query(s) {
 | 
| 209 |   nodejs -e '''
 | 
| 210 | 
 | 
| 211 |   const u = new URLSearchParams(process.argv[1]);
 | 
| 212 |   //console.log(JSON.stringify(u));
 | 
| 213 |   
 | 
| 214 |   var pairs = []
 | 
| 215 |   for (pair of u) {
 | 
| 216 |     pairs.push(pair)
 | 
| 217 |   }
 | 
| 218 |   
 | 
| 219 |   var j = JSON.stringify(pairs);
 | 
| 220 |   
 | 
| 221 |   //console.log(j):
 | 
| 222 |   process.stdout.write(j);
 | 
| 223 |   ''' $s
 | 
| 224 | }
 | 
| 225 | 
 | 
| 226 | const QUERY_CASES = [
 | 
| 227 |   'k=foo+bar',
 | 
| 228 |   'key=foo%23%40',
 | 
| 229 |   'k=v&foo%23=bar+baz+%24%25&k=v',
 | 
| 230 |   'foo+bar=z',
 | 
| 231 | 
 | 
| 232 |   'missing_val=&k=',
 | 
| 233 | 
 | 
| 234 |   '=missing_key&=m2',
 | 
| 235 | 
 | 
| 236 |   # This is valid
 | 
| 237 |   '=&=',
 | 
| 238 |   '=&=&',
 | 
| 239 | 
 | 
| 240 | ]
 | 
| 241 | 
 | 
| 242 | const OTHER_CASES = [
 | 
| 243 | 
 | 
| 244 |   # JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
 | 
| 245 |   'foo%ffbar=z',
 | 
| 246 | 
 | 
| 247 |   # JavaScript treats = as literal - that seems wrong
 | 
| 248 |   # YSH treating this as an error seems right
 | 
| 249 |   '==',
 | 
| 250 | ]
 | 
| 251 | 
 | 
| 252 | 
 | 
| 253 | proc test-query() {
 | 
| 254 |   for s in (QUERY_CASES) {
 | 
| 255 |   #for s in (OTHER_CASES) {
 | 
| 256 |     echo 'INPUT'
 | 
| 257 |     echo "  $s"
 | 
| 258 | 
 | 
| 259 |     js-decode-query $s | json read (&js)
 | 
| 260 |     echo 'JS'
 | 
| 261 |     pp line (js)
 | 
| 262 | 
 | 
| 263 |     echo 'YSH'
 | 
| 264 |     var pairs = URLSearchParams(s)
 | 
| 265 |     pp line (pairs)
 | 
| 266 | 
 | 
| 267 |     assert [pairs === js]
 | 
| 268 | 
 | 
| 269 |     echo
 | 
| 270 |   }
 | 
| 271 | }
 | 
| 272 | 
 | 
| 273 | proc run-tests() {
 | 
| 274 |   devtools/byo.sh test $0
 | 
| 275 | }
 | 
| 276 | 
 | 
| 277 | task-five "$@"
 |