From 4b6f2762a23f4959c47048e75d041e9b306f0f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Pie=C5=84kowski?= <8525083+Jakski@users.noreply.github.com> Date: Sat, 22 Jun 2024 00:26:58 +0200 Subject: [PATCH 1/3] Improve Base64 performance --- spork/base64.janet | 177 ++++++++++++++++++++++--------------------- test/suite0021.janet | 2 + 2 files changed, 92 insertions(+), 87 deletions(-) diff --git a/spork/base64.janet b/spork/base64.janet index a52853c..2b6caa6 100644 --- a/spork/base64.janet +++ b/spork/base64.janet @@ -8,98 +8,101 @@ (def- base64/table "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") -(defn- array-pad-right - [xs size padder] - (let [l (length xs)] - (if (< l size) - (do (for i l size - (put xs i padder)) - xs) - xs))) - -(defn- array-pad-left - [xs size padder] - (let [l (length xs)] - (if (< l size) - (do (for i 0 (- size l) - (array/insert xs i padder)) - xs) - xs))) - -(defn- decimal->binary - [x &opt bin] - (default bin @[]) - (if (< x 1) - (reverse bin) - (let [rem (% x 2) - new-x (math/floor (/ x 2))] - (decimal->binary new-x (array/push bin rem))))) - -(defn- binary->decimal - [xs] - (var num 0) - (for i 0 (length xs) - (if (= 1 (get (reverse xs) i)) - (set num (+ num (math/pow 2 i))))) - num) - -(defn- octets->sextets - [octets] - (->> octets - flatten - (partition 6) - (map |(array ;$0)))) - -(defn- sextets->octets - [sextets] - (->> sextets - flatten - (partition 8))) - -(defn- quadruples->bytes [xs] - (let [sextets (map (fn [x] - (-> (string/find (string/from-bytes x) base64/table) - (decimal->binary) - (array-pad-left 6 0))) xs) - octets (sextets->octets sextets)] - (apply string/from-bytes (map binary->decimal octets)))) - -(defn- pad-last-sextet [xs] - (let [last-index (dec (length xs))] - (update xs last-index array-pad-right 6 0))) - -(defn- add-padding [s] - (if (zero? (% (length s) 4)) - s - (let [pad-count (- 4 (% (length s) 4))] - (string s (string/repeat "=" pad-count))))) - (defn encode "Converts a string of any format (UTF-8, binary, ..) to base64 encoding." - [s] - (if (> (length s) 0) - (let [octets (map |(-> $0 - decimal->binary - (array-pad-left 8 0)) - (string/bytes s)) - sextets (pad-last-sextet (octets->sextets octets)) - bytes (map binary->decimal sextets) - base64-bytes (map (fn [i] (get base64/table i)) bytes) - base64 (add-padding (apply string/from-bytes base64-bytes))] - base64) - "")) + [input] + (var cursor 0) + (def rem (% (length input) 3)) + (def + output + (buffer/new-filled + (-> + (length input) + (+ rem) + (div 3) + (* 4)) + 0)) + (each + triplet + (partition + 3 + (case rem + 0 input + 1 (buffer input @"\0\0") + 2 (buffer input @"\0"))) + (set + (output cursor) + (in base64/table (brshift (triplet 0) 2))) + (set + (output (+ cursor 1)) + (in + base64/table + (bor + (-> (triplet 0) (band 2r11) (blshift 4)) + (brshift (triplet 1) 4)))) + (set + (output (+ cursor 2)) + (in + base64/table + (bor + (-> (triplet 1) (band 2r1111) (blshift 2)) + (brshift (triplet 2) 6)))) + (set + (output (+ cursor 3)) + (in base64/table (band (triplet 2) 2r111111))) + (set cursor (+ cursor 4))) + (case rem + 1 + (do + (set (output (- cursor 1)) 61) + (set (output (- cursor 2)) 61)) + 2 (set (output (- cursor 1)) 61)) + (string output)) (defn decode ``` Converts a base64 encoded string to its binary representation of any format (UTF-8, binary, ..). ``` - [s] - (if-not (empty? s) - (let [without-padding (string/replace-all "=" "" s) - padded? (not (zero? (% (length without-padding) 4))) - quadruples (partition 4 without-padding) - bytes (map quadruples->bytes quadruples) - base64 (apply string bytes)] - (if padded? (slice base64 0 (dec (length base64))) base64)) - "")) + [input] + (def padded-input + (case (% (length input) 4) + 0 input + 3 (string input "=") + 2 (string input "==") + 1 (error "Wrong length"))) + (def output (buffer/new-filled (* 3 (/ (length padded-input) 4)) 0)) + (var cursor 0) + (each quadruple (partition 4 padded-input) + (def values + (map + |(or + (string/find (string/from-bytes $) base64/table) + (if (= 61 $) + 0 + (errorf "Wrong character: %s" (string/from-bytes $)))) + quadruple)) + (set + (output cursor) + (bor + (blshift (values 0) 2) + (brshift (values 1) 4))) + (set + (output (+ cursor 1)) + (bor + (blshift (values 1) 4) + (brshift (values 2) 2))) + (set + (output (+ cursor 2)) + (bor + (blshift (values 2) 6) + (values 3))) + (set cursor (+ cursor 3))) + (slice + output + 0 + (- + (length output) + (- + (length padded-input) + (length (string/trimr padded-input "=")))))) diff --git a/test/suite0021.janet b/test/suite0021.janet index d2ec916..ee2846c 100644 --- a/test/suite0021.janet +++ b/test/suite0021.janet @@ -98,6 +98,8 @@ (assert (= (base64/encode "foobar") "Zm9vYmFy")) #base64/decode +(assert (= "Wrong length" (last (protect (base64/decode "A"))))) +(assert (= "Wrong character: %" (last (protect (base64/decode "A%"))))) (assert (= (base64/decode "dGhpcyBpcyBhIHRlc3Q=") "this is a test")) (assert (= (base64/decode "") "")) (do (def some-string "\x1Cdawdawdadwdaw\xB0") From bff9c5e2560c5866b11e03164d679117537495f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Pie=C5=84kowski?= <8525083+Jakski@users.noreply.github.com> Date: Sun, 23 Jun 2024 18:20:06 +0200 Subject: [PATCH 2/3] Implement Base64 in C --- project.janet | 4 ++ spork/base64.janet | 108 ---------------------------- src/base64.c | 156 ++++++++++++++++++++++++++++++++++++++++ test/suite-base64.janet | 24 +++++++ test/suite0020.janet | 1 + test/suite0021.janet | 20 ------ 6 files changed, 185 insertions(+), 128 deletions(-) delete mode 100644 spork/base64.janet create mode 100644 src/base64.c create mode 100644 test/suite-base64.janet diff --git a/project.janet b/project.janet index 8283813..675f936 100644 --- a/project.janet +++ b/project.janet @@ -56,3 +56,7 @@ (declare-native :name "spork/cmath" :source @["src/cmath.c"]) + +(declare-native + :name "spork/base64" + :source @["src/base64.c"]) diff --git a/spork/base64.janet b/spork/base64.janet deleted file mode 100644 index 2b6caa6..0000000 --- a/spork/base64.janet +++ /dev/null @@ -1,108 +0,0 @@ -### -### base64.janet -### -### base64 encoder/decoder -### - - -(def- base64/table - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") - -(defn encode - "Converts a string of any format (UTF-8, binary, ..) to base64 encoding." - [input] - (var cursor 0) - (def rem (% (length input) 3)) - (def - output - (buffer/new-filled - (-> - (length input) - (+ rem) - (div 3) - (* 4)) - 0)) - (each - triplet - (partition - 3 - (case rem - 0 input - 1 (buffer input @"\0\0") - 2 (buffer input @"\0"))) - (set - (output cursor) - (in base64/table (brshift (triplet 0) 2))) - (set - (output (+ cursor 1)) - (in - base64/table - (bor - (-> (triplet 0) (band 2r11) (blshift 4)) - (brshift (triplet 1) 4)))) - (set - (output (+ cursor 2)) - (in - base64/table - (bor - (-> (triplet 1) (band 2r1111) (blshift 2)) - (brshift (triplet 2) 6)))) - (set - (output (+ cursor 3)) - (in base64/table (band (triplet 2) 2r111111))) - (set cursor (+ cursor 4))) - (case rem - 1 - (do - (set (output (- cursor 1)) 61) - (set (output (- cursor 2)) 61)) - 2 (set (output (- cursor 1)) 61)) - (string output)) - -(defn decode - ``` - Converts a base64 encoded string to its binary representation of any format - (UTF-8, binary, ..). - ``` - [input] - (def padded-input - (case (% (length input) 4) - 0 input - 3 (string input "=") - 2 (string input "==") - 1 (error "Wrong length"))) - (def output (buffer/new-filled (* 3 (/ (length padded-input) 4)) 0)) - (var cursor 0) - (each quadruple (partition 4 padded-input) - (def values - (map - |(or - (string/find (string/from-bytes $) base64/table) - (if (= 61 $) - 0 - (errorf "Wrong character: %s" (string/from-bytes $)))) - quadruple)) - (set - (output cursor) - (bor - (blshift (values 0) 2) - (brshift (values 1) 4))) - (set - (output (+ cursor 1)) - (bor - (blshift (values 1) 4) - (brshift (values 2) 2))) - (set - (output (+ cursor 2)) - (bor - (blshift (values 2) 6) - (values 3))) - (set cursor (+ cursor 3))) - (slice - output - 0 - (- - (length output) - (- - (length padded-input) - (length (string/trimr padded-input "=")))))) diff --git a/src/base64.c b/src/base64.c new file mode 100644 index 0000000..b98f9e5 --- /dev/null +++ b/src/base64.c @@ -0,0 +1,156 @@ +/* +* Copyright (c) 2024 Janet contributors +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +#include + +const char *const table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static inline uint8_t encode_sextet_1(uint8_t byte1) { + return table[byte1 >> 2]; +} + +static inline uint8_t encode_sextet_2(uint8_t byte1, uint8_t byte2) { + return table[((byte1 & 3) << 4) | (byte2 >> 4)]; +} + +static inline uint8_t encode_sextet_3(uint8_t byte2, uint8_t byte3) { + return table[((byte2 & 15) << 2) | (byte3 >> 6)]; +} + +static inline uint8_t encode_sextet_4(uint8_t byte3) { + return table[byte3 & 63]; +} + +static Janet base64_encode(int32_t argc, Janet *argv) { + janet_fixarity(argc, 1); + const uint8_t *in = janet_getstring(argv, 0); + int32_t inlen = janet_length(argv[0]); + int rem = inlen % 3; + JanetBuffer *outbuf = janet_buffer(((inlen + 3 - rem) / 3) * 4); + int cursor = 0; + for (; cursor < (inlen - rem); cursor += 3) { + janet_buffer_push_u8(outbuf, encode_sextet_1(in[cursor])); + janet_buffer_push_u8(outbuf, encode_sextet_2(in[cursor], in[cursor + 1])); + janet_buffer_push_u8(outbuf, encode_sextet_3(in[cursor + 1], in[cursor + 2])); + janet_buffer_push_u8(outbuf, encode_sextet_4(in[cursor + 2])); + } + if (rem == 1) { + janet_buffer_push_u8(outbuf, encode_sextet_1(in[cursor])); + janet_buffer_push_u8(outbuf, encode_sextet_2(in[cursor], in[cursor + 1])); + janet_buffer_push_u8(outbuf, '='); + janet_buffer_push_u8(outbuf, '='); + } else if (rem == 2) { + janet_buffer_push_u8(outbuf, encode_sextet_1(in[cursor])); + janet_buffer_push_u8(outbuf, encode_sextet_2(in[cursor], in[cursor + 1])); + janet_buffer_push_u8(outbuf, encode_sextet_3(in[cursor + 1], in[cursor + 2])); + janet_buffer_push_u8(outbuf, '='); + } + return janet_stringv(outbuf->data, outbuf->count); +} + +static uint8_t decode_character(uint8_t c) { + if (c >= 'a') { + return c - 97 + 26; + } else if (c >= 'A') { + return c - 65; + } else if (c >= '0' && c <= '9') { + return c - 48 + 52; + } else if (c == '+') { + return 62; + } else if (c == '/') { + return 63; + } else { + janet_panicf("Wrong character: %c", c); + } +} + +static inline uint8_t decode_byte_1(uint8_t sextet1, uint8_t sextet2) { + return (sextet1 << 2) | (sextet2 >> 4); +} + +static inline uint8_t decode_byte_2(uint8_t sextet2, uint8_t sextet3) { + return (sextet2 << 4) | (sextet3 >> 2); +} + +static inline uint8_t decode_byte_3(uint8_t sextet3, uint8_t sextet4) { + return (sextet3 << 6) | sextet4; +} + +static Janet base64_decode(int32_t argc, Janet *argv) { + janet_fixarity(argc, 1); + int32_t inlen = janet_length(argv[0]); + if (inlen % 4 != 0) { + janet_panicf("Wrong length: %d", inlen); + } + const uint8_t *in = janet_getstring(argv, 0); + int padding = 0; + int end = inlen; + if (in[inlen - 2] == '=') { + end -= 4; + padding = 2; + } else if (in[inlen - 1] == '=') { + end -= 4; + padding = 1; + } + JanetBuffer *outbuf = janet_buffer((inlen / 4) * 3); + int cursor = 0; + uint8_t sextet1, sextet2, sextet3, sextet4; + for (; cursor < end; cursor += 4) { + sextet1 = decode_character(in[cursor]); + sextet2 = decode_character(in[cursor + 1]); + sextet3 = decode_character(in[cursor + 2]); + sextet4 = decode_character(in[cursor + 3]); + janet_buffer_push_u8(outbuf, decode_byte_1(sextet1, sextet2)); + janet_buffer_push_u8(outbuf, decode_byte_2(sextet2, sextet3)); + janet_buffer_push_u8(outbuf, decode_byte_3(sextet3, sextet4)); + } + if (padding == 2) { + sextet1 = decode_character(in[cursor]); + sextet2 = decode_character(in[cursor + 1]); + janet_buffer_push_u8(outbuf, decode_byte_1(sextet1, sextet2)); + } else if (padding == 1) { + sextet1 = decode_character(in[cursor]); + sextet2 = decode_character(in[cursor + 1]); + sextet3 = decode_character(in[cursor + 2]); + janet_buffer_push_u8(outbuf, decode_byte_1(sextet1, sextet2)); + janet_buffer_push_u8(outbuf, decode_byte_2(sextet2, sextet3)); + } + return janet_stringv(outbuf->data, outbuf->count); +} + +static const JanetReg cfuns[] = { + { + "encode", + base64_encode, + "(base64/encode x)\n\nEncodes a string in Base64. Returns encoded string." + }, + { + "decode", + base64_decode, + "(base64/decode x)\n\nDecodes a string from Base64. Returns decoded string." + }, + {NULL, NULL, NULL} +}; + +JANET_MODULE_ENTRY(JanetTable *env) { + janet_cfuns(env, "base64", cfuns); +} diff --git a/test/suite-base64.janet b/test/suite-base64.janet new file mode 100644 index 0000000..cea14b5 --- /dev/null +++ b/test/suite-base64.janet @@ -0,0 +1,24 @@ +(use ../spork/test) +(import spork/base64) + +(start-suite) + +(assert-docs "spork/base64") + +(eachp + [decoded encoded] + {"this is a test" "dGhpcyBpcyBhIHRlc3Q=" + "" "" + "f" "Zg==" + "fo" "Zm8=" + "foo" "Zm9v" + "foob" "Zm9vYg==" + "fooba" "Zm9vYmE=" + "foobar" "Zm9vYmFy" + "\x1Cdawdawdadwdaw\xB0" "HGRhd2Rhd2RhZHdkYXew"} + (assert (= (base64/decode encoded) decoded)) + (assert (= (base64/encode decoded) encoded))) +(assert (= "Wrong length: 1" (last (protect (base64/decode "A"))))) +(assert (= "Wrong character: %" (last (protect (base64/decode "A%=="))))) + +(end-suite) diff --git a/test/suite0020.janet b/test/suite0020.janet index ec74e1f..d7588cd 100644 --- a/test/suite0020.janet +++ b/test/suite0020.janet @@ -37,6 +37,7 @@ (assert-docs "spork/tarray") (assert-docs "spork/rawterm") (assert-docs "spork/utf8") +(assert-docs "spork/base64") (assert (deep= (misc/map-keys string {1 2 3 4}) @{"1" 2 "3" 4}) diff --git a/test/suite0021.janet b/test/suite0021.janet index ee2846c..e58c03c 100644 --- a/test/suite0021.janet +++ b/test/suite0021.janet @@ -85,24 +85,4 @@ (map |(diff-assert ;$) cases) -(import ../spork/base64) - -#base64/encode -(assert (= (base64/encode "this is a test") "dGhpcyBpcyBhIHRlc3Q=")) -(assert (= (base64/encode "") "")) -(assert (= (base64/encode "f") "Zg==")) -(assert (= (base64/encode "fo") "Zm8=")) -(assert (= (base64/encode "foo") "Zm9v")) -(assert (= (base64/encode "foob") "Zm9vYg==")) -(assert (= (base64/encode "fooba") "Zm9vYmE=")) -(assert (= (base64/encode "foobar") "Zm9vYmFy")) - -#base64/decode -(assert (= "Wrong length" (last (protect (base64/decode "A"))))) -(assert (= "Wrong character: %" (last (protect (base64/decode "A%"))))) -(assert (= (base64/decode "dGhpcyBpcyBhIHRlc3Q=") "this is a test")) -(assert (= (base64/decode "") "")) -(do (def some-string "\x1Cdawdawdadwdaw\xB0") - (assert (= (base64/decode (base64/encode some-string)) some-string))) - (end-suite) From dc1cc19413390129b3ec0b07ab15fe5cb041a83c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Pie=C5=84kowski?= <8525083+Jakski@users.noreply.github.com> Date: Sun, 23 Jun 2024 19:52:09 +0200 Subject: [PATCH 3/3] Remove redundant documentation check --- test/suite0020.janet | 1 - 1 file changed, 1 deletion(-) diff --git a/test/suite0020.janet b/test/suite0020.janet index d7588cd..ec74e1f 100644 --- a/test/suite0020.janet +++ b/test/suite0020.janet @@ -37,7 +37,6 @@ (assert-docs "spork/tarray") (assert-docs "spork/rawterm") (assert-docs "spork/utf8") -(assert-docs "spork/base64") (assert (deep= (misc/map-keys string {1 2 3 4}) @{"1" 2 "3" 4})