Skip to content

Commit

Permalink
[demo] URLSearchParams function in JavaScript
Browse files Browse the repository at this point in the history
YSH has a good core!  I was able to do it in pure YSH, and Eggex has a
good structure.

But it needs polish!  See comments at the top of the file.
  • Loading branch information
Andy C committed Jul 18, 2024
1 parent 97565d0 commit e80f630
Showing 1 changed file with 271 additions and 0 deletions.
271 changes: 271 additions & 0 deletions demo/url-search-params.ysh
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
#!bin/ysh
#
# Usage:
# demo/url-search-params.ysh <function name>
#
# Tested against JavaScript's URLSearchParams. Differences:
#
# - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
# - YSH turns this into the 0xff byte, denoted as b'\yff'
# - JS accepts '==' as key="" value="="
# - In YSH, this is a syntax error.
# - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
# [["", ""]
# ["", ""],
# ["", ""]]
#
# Evaluation of "the YSH experience":
#
# GOOD:
#
# - Eggex is elegant
# - This code is structured better than the Python stdlib urlparse.py!
# - This problem is also hard/ugly in JavaScript. They use an extra
# s=>replace() on top of decodeURIComponent()!
# - Task files in YSH basically work!
# - I think this file has a nice structure
# - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js
# - Triple quoted multiline strings are nice!
#
# NEEDS WORK:
#
# - need Vim syntax highlighting!
# - e.g. multiline '' strings aren't higlighted
# - need pp [x] for debugging
# - need assert [x] for testing
# - task files need completion
#
# - Eggex can use multiline /// syntax
# - Eggex could use "which" match
# - m=>group('lit') sorta bothers me, it should be
# - m.group('lit')
# - $lit - probably!
# - with vars(m.groupDict()) { ... }
# - Alternative to printf -v probably needed, or at least wrap it in the YSH
# stdlib
#
# - ERROR messages for URL parsing should bubble up to the user!
# - USER code should be able to point out to location info for bad escapes
# like %f or %0z
# - I guess we just need an idiom for this? A "class"?

source $LIB_OSH/task-five.sh
#source $LIB_YSH/yblocks.ysh

proc _check (; val) { # TODO: assert
if (not val) {
pp line (val)
error "Failed: $val"
}
}

func strFromTwoHex(two_hex) {
var result
# TODO: provide alternative to old OSH style!

# Python style would include something like this
# var i = int(two_hex, 16)

printf -v result "\\x$two_hex"
return (result)
}

const Hex = / [0-9 a-f A-F] /

const Quoted = / <capture !['%+']+ as lit> | <capture '+' as plus> | '%' <capture Hex Hex as two_hex> /

func unquote (s) {
### Turn strings with %20 into space, etc.

#echo
#echo "unquote $s"

var pos = 0
var parts = []
while (true) {
var m = s => leftMatch(Quoted, pos=pos)
if (not m) {
break
}

var lit = m => group('lit')
var plus = m => group('plus')
var two_hex = m => group('two_hex')

var part
if (lit) {
#echo " lit $lit"
setvar part = lit
} elif (plus) {
#echo " plus $plus"
setvar part = ' '
} elif (two_hex) {
#echo " two_hex $two_hex"
#setvar part = two_hex

setvar part = strFromTwoHex(two_hex)
}
call parts->append(part)

setvar pos = m => end(0)
#echo
}
if (pos !== len(s)) {
error "Unexpected trailing input in unquote"
}

return (join(parts))
}

proc js-decode-part(s) {
nodejs -e '''

var encoded = process.argv[1];

// It does not handle +, because is only for query params, not components?
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
var encoded = encoded.replace(/\+/g, " ")

var j = JSON.stringify(decodeURIComponent(encoded))
process.stdout.write(j);

''' $s
}

const PART_CASES = [
'foo+bar',
'foo%23%40',
# empty key, empty value, invalid % , etc.
]

proc test-part() {
echo hi

#_check ('foo bar' === unquote('foo+bar'))

for s in (PART_CASES) {
js-decode-part $s | json read
echo 'JS'
pp line (_reply)

echo 'YSH'
= unquote(s)
echo
#break
}
}

#
# Query
#

# JavaScript allows either side of k=v to be empty, so we match that
const Tok = / !['&= ']* /

const Pair = / <capture Tok as key> '=' <capture Tok as value> /

const Pairs = / Pair <capture '&' as sep>? /

func URLSearchParams(s) {
### Turn k=v&foo=spam+eggs&k=v into a list of pairs

# Loop over matches
var pos = 0
#echo Pairs=$Pairs

var pairs = []
while (true) {
var m = s => leftMatch(Pairs, pos=pos)
if (not m) {
break
}
#pp line (m)
#pp line (m => group(0))
var k = m => group('key')
var v = m => group('value')

#pp line (k)
#pp line (v)

call pairs->append([unquote(k), unquote(v)])

setvar pos = m => end(0)
#pp line (pos)

var sep = m => group('sep')
if (not sep) {
break
}
}
if (pos !== len(s)) {
error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
}

return (pairs)
}

proc js-decode-query(s) {
nodejs -e '''

const u = new URLSearchParams(process.argv[1]);
//console.log(JSON.stringify(u));

var pairs = []
for (pair of u) {
pairs.push(pair)
}

var j = JSON.stringify(pairs);

//console.log(j):
process.stdout.write(j);
''' $s
}

const QUERY_CASES = [
'k=foo+bar',
'key=foo%23%40',
'k=v&foo%23=bar+baz+%24%25&k=v',
'foo+bar=z',

# JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
'foo%ffbar=z',

'missing_val=&k=',

'=missing_key&=m2',

# This is valid
'=&=',
'=&=&',

# JavaScript treats = as literal - that seems wrong
# YSH treating this as an error seems right
#'==',
]

proc test-query() {
#_check ('foo bar' === unquote('foo+bar'))

for s in (QUERY_CASES) {
echo 'INPUT'
echo " $s"

js-decode-query $s | json read
echo 'JS'
pp line (_reply)

echo 'YSH'
var pairs = URLSearchParams(s)
pp line (pairs)

echo
#break
}
}

proc run-tests() {
devtools/byo.sh test $0
}

task-five "$@"

0 comments on commit e80f630

Please sign in to comment.