-
-
Notifications
You must be signed in to change notification settings - Fork 163
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[demo] URLSearchParams function in JavaScript
YSH has a good core! I was able to do it in pure YSH, and Eggex has a good structure. But it needs polish! See comments at the top of the file.
- Loading branch information
Andy C
committed
Jul 18, 2024
1 parent
97565d0
commit e80f630
Showing
1 changed file
with
271 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,271 @@ | ||
#!bin/ysh | ||
# | ||
# Usage: | ||
# demo/url-search-params.ysh <function name> | ||
# | ||
# Tested against JavaScript's URLSearchParams. Differences: | ||
# | ||
# - JS strings can't represent bytes, so %ff turns into the Unicode replacement char. | ||
# - YSH turns this into the 0xff byte, denoted as b'\yff' | ||
# - JS accepts '==' as key="" value="=" | ||
# - In YSH, this is a syntax error. | ||
# - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs: | ||
# [["", ""] | ||
# ["", ""], | ||
# ["", ""]] | ||
# | ||
# Evaluation of "the YSH experience": | ||
# | ||
# GOOD: | ||
# | ||
# - Eggex is elegant | ||
# - This code is structured better than the Python stdlib urlparse.py! | ||
# - This problem is also hard/ugly in JavaScript. They use an extra | ||
# s=>replace() on top of decodeURIComponent()! | ||
# - Task files in YSH basically work! | ||
# - I think this file has a nice structure | ||
# - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js | ||
# - Triple quoted multiline strings are nice! | ||
# | ||
# NEEDS WORK: | ||
# | ||
# - need Vim syntax highlighting! | ||
# - e.g. multiline '' strings aren't higlighted | ||
# - need pp [x] for debugging | ||
# - need assert [x] for testing | ||
# - task files need completion | ||
# | ||
# - Eggex can use multiline /// syntax | ||
# - Eggex could use "which" match | ||
# - m=>group('lit') sorta bothers me, it should be | ||
# - m.group('lit') | ||
# - $lit - probably! | ||
# - with vars(m.groupDict()) { ... } | ||
# - Alternative to printf -v probably needed, or at least wrap it in the YSH | ||
# stdlib | ||
# | ||
# - ERROR messages for URL parsing should bubble up to the user! | ||
# - USER code should be able to point out to location info for bad escapes | ||
# like %f or %0z | ||
# - I guess we just need an idiom for this? A "class"? | ||
|
||
source $LIB_OSH/task-five.sh | ||
#source $LIB_YSH/yblocks.ysh | ||
|
||
proc _check (; val) { # TODO: assert | ||
if (not val) { | ||
pp line (val) | ||
error "Failed: $val" | ||
} | ||
} | ||
|
||
func strFromTwoHex(two_hex) { | ||
var result | ||
# TODO: provide alternative to old OSH style! | ||
|
||
# Python style would include something like this | ||
# var i = int(two_hex, 16) | ||
|
||
printf -v result "\\x$two_hex" | ||
return (result) | ||
} | ||
|
||
const Hex = / [0-9 a-f A-F] / | ||
|
||
const Quoted = / <capture !['%+']+ as lit> | <capture '+' as plus> | '%' <capture Hex Hex as two_hex> / | ||
|
||
func unquote (s) { | ||
### Turn strings with %20 into space, etc. | ||
|
||
#echo | ||
#echo "unquote $s" | ||
|
||
var pos = 0 | ||
var parts = [] | ||
while (true) { | ||
var m = s => leftMatch(Quoted, pos=pos) | ||
if (not m) { | ||
break | ||
} | ||
|
||
var lit = m => group('lit') | ||
var plus = m => group('plus') | ||
var two_hex = m => group('two_hex') | ||
|
||
var part | ||
if (lit) { | ||
#echo " lit $lit" | ||
setvar part = lit | ||
} elif (plus) { | ||
#echo " plus $plus" | ||
setvar part = ' ' | ||
} elif (two_hex) { | ||
#echo " two_hex $two_hex" | ||
#setvar part = two_hex | ||
|
||
setvar part = strFromTwoHex(two_hex) | ||
} | ||
call parts->append(part) | ||
|
||
setvar pos = m => end(0) | ||
#echo | ||
} | ||
if (pos !== len(s)) { | ||
error "Unexpected trailing input in unquote" | ||
} | ||
|
||
return (join(parts)) | ||
} | ||
|
||
proc js-decode-part(s) { | ||
nodejs -e ''' | ||
|
||
var encoded = process.argv[1]; | ||
|
||
// It does not handle +, because is only for query params, not components? | ||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent | ||
var encoded = encoded.replace(/\+/g, " ") | ||
|
||
var j = JSON.stringify(decodeURIComponent(encoded)) | ||
process.stdout.write(j); | ||
|
||
''' $s | ||
} | ||
|
||
const PART_CASES = [ | ||
'foo+bar', | ||
'foo%23%40', | ||
# empty key, empty value, invalid % , etc. | ||
] | ||
|
||
proc test-part() { | ||
echo hi | ||
|
||
#_check ('foo bar' === unquote('foo+bar')) | ||
|
||
for s in (PART_CASES) { | ||
js-decode-part $s | json read | ||
echo 'JS' | ||
pp line (_reply) | ||
|
||
echo 'YSH' | ||
= unquote(s) | ||
echo | ||
#break | ||
} | ||
} | ||
|
||
# | ||
# Query | ||
# | ||
|
||
# JavaScript allows either side of k=v to be empty, so we match that | ||
const Tok = / !['&= ']* / | ||
|
||
const Pair = / <capture Tok as key> '=' <capture Tok as value> / | ||
|
||
const Pairs = / Pair <capture '&' as sep>? / | ||
|
||
func URLSearchParams(s) { | ||
### Turn k=v&foo=spam+eggs&k=v into a list of pairs | ||
|
||
# Loop over matches | ||
var pos = 0 | ||
#echo Pairs=$Pairs | ||
|
||
var pairs = [] | ||
while (true) { | ||
var m = s => leftMatch(Pairs, pos=pos) | ||
if (not m) { | ||
break | ||
} | ||
#pp line (m) | ||
#pp line (m => group(0)) | ||
var k = m => group('key') | ||
var v = m => group('value') | ||
|
||
#pp line (k) | ||
#pp line (v) | ||
|
||
call pairs->append([unquote(k), unquote(v)]) | ||
|
||
setvar pos = m => end(0) | ||
#pp line (pos) | ||
|
||
var sep = m => group('sep') | ||
if (not sep) { | ||
break | ||
} | ||
} | ||
if (pos !== len(s)) { | ||
error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]" | ||
} | ||
|
||
return (pairs) | ||
} | ||
|
||
proc js-decode-query(s) { | ||
nodejs -e ''' | ||
|
||
const u = new URLSearchParams(process.argv[1]); | ||
//console.log(JSON.stringify(u)); | ||
|
||
var pairs = [] | ||
for (pair of u) { | ||
pairs.push(pair) | ||
} | ||
|
||
var j = JSON.stringify(pairs); | ||
|
||
//console.log(j): | ||
process.stdout.write(j); | ||
''' $s | ||
} | ||
|
||
const QUERY_CASES = [ | ||
'k=foo+bar', | ||
'key=foo%23%40', | ||
'k=v&foo%23=bar+baz+%24%25&k=v', | ||
'foo+bar=z', | ||
|
||
# JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes | ||
'foo%ffbar=z', | ||
|
||
'missing_val=&k=', | ||
|
||
'=missing_key&=m2', | ||
|
||
# This is valid | ||
'=&=', | ||
'=&=&', | ||
|
||
# JavaScript treats = as literal - that seems wrong | ||
# YSH treating this as an error seems right | ||
#'==', | ||
] | ||
|
||
proc test-query() { | ||
#_check ('foo bar' === unquote('foo+bar')) | ||
|
||
for s in (QUERY_CASES) { | ||
echo 'INPUT' | ||
echo " $s" | ||
|
||
js-decode-query $s | json read | ||
echo 'JS' | ||
pp line (_reply) | ||
|
||
echo 'YSH' | ||
var pairs = URLSearchParams(s) | ||
pp line (pairs) | ||
|
||
echo | ||
#break | ||
} | ||
} | ||
|
||
proc run-tests() { | ||
devtools/byo.sh test $0 | ||
} | ||
|
||
task-five "$@" |