diff --git a/.github/workflows/sha2.yml b/.github/workflows/sha2.yml index ccbc5a51..2d8130b1 100644 --- a/.github/workflows/sha2.yml +++ b/.github/workflows/sha2.yml @@ -21,7 +21,7 @@ jobs: set-msrv: uses: RustCrypto/actions/.github/workflows/set-msrv.yml@master with: - msrv: 1.79.0 + msrv: 1.81.0 # Builds for no_std platforms build: @@ -196,6 +196,38 @@ jobs: env: RUSTFLAGS: -Dwarnings --cfg sha2_backend="riscv-zknh-compact" -C target-feature=+zknh,+zbkb + # wasmtime tests + wasm: + needs: set-msrv + strategy: + matrix: + include: + # without simd + - rust: ${{needs.set-msrv.outputs.msrv}} + flags: "-C target-feature=-simd128" + - rust: stable + flags: "-C target-feature=-simd128" + + # with simd + - rust: ${{needs.set-msrv.outputs.msrv}} + flags: "-C target-feature=+simd128" + - rust: stable + flags: "-C target-feature=+simd128" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + targets: wasm32-wasip1 + - uses: RustCrypto/actions/cargo-hack-install@master + - uses: jcbhmr/setup-wasmtime@v2 + - run: cargo hack test --feature-powerset --target wasm32-wasip1 + env: + RUSTFLAGS: ${{ matrix.flags }} + CARGO_TARGET_WASM32_WASIP1_RUNNER: wasmtime + minimal-versions: uses: RustCrypto/actions/.github/workflows/minimal-versions.yml@master with: diff --git a/README.md b/README.md index 101c3fca..f8abd290 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Additionally all crates do not require the standard library (i.e. `no_std` capab | [RIPEMD] | [`ripemd`] | [![crates.io](https://img.shields.io/crates/v/ripemd.svg)](https://crates.io/crates/ripemd) | [![Documentation](https://docs.rs/ripemd/badge.svg)](https://docs.rs/ripemd) | ![MSRV 1.71][msrv-1.71] | :green_heart: | | [SHA-1] | [`sha1`] | [![crates.io](https://img.shields.io/crates/v/sha1.svg)](https://crates.io/crates/sha1) | [![Documentation](https://docs.rs/sha1/badge.svg)](https://docs.rs/sha1) | ![MSRV 1.72][msrv-1.72] | :broken_heart: | | [SHA-1 Checked] | [`sha1-checked`] | [![crates.io](https://img.shields.io/crates/v/sha1-checked.svg)](https://crates.io/crates/sha1-checked) | [![Documentation](https://docs.rs/sha1-checked/badge.svg)](https://docs.rs/sha1-checked) | ![MSRV 1.72][msrv-1.72] | :yellow_heart: | -| [SHA-2] | [`sha2`] | [![crates.io](https://img.shields.io/crates/v/sha2.svg)](https://crates.io/crates/sha2) | [![Documentation](https://docs.rs/sha2/badge.svg)](https://docs.rs/sha2) | ![MSRV 1.72][msrv-1.72] | :green_heart: | +| [SHA-2] | [`sha2`] | [![crates.io](https://img.shields.io/crates/v/sha2.svg)](https://crates.io/crates/sha2) | [![Documentation](https://docs.rs/sha2/badge.svg)](https://docs.rs/sha2) | ![MSRV 1.81][msrv-1.81] | :green_heart: | | [SHA-3] (Keccak) | [`sha3`] | [![crates.io](https://img.shields.io/crates/v/sha3.svg)](https://crates.io/crates/sha3) | [![Documentation](https://docs.rs/sha3/badge.svg)](https://docs.rs/sha3) | ![MSRV 1.71][msrv-1.71] | :green_heart: | | [SHABAL] | [`shabal`] | [![crates.io](https://img.shields.io/crates/v/shabal.svg)](https://crates.io/crates/shabal) | [![Documentation](https://docs.rs/shabal/badge.svg)](https://docs.rs/shabal) | ![MSRV 1.71][msrv-1.71] | :green_heart: | | [Skein] | [`skein`] | [![crates.io](https://img.shields.io/crates/v/skein.svg)](https://crates.io/crates/skein) | [![Documentation](https://docs.rs/skein/badge.svg)](https://docs.rs/skein) | ![MSRV 1.71][msrv-1.71] | :green_heart: | @@ -237,6 +237,7 @@ Unless you explicitly state otherwise, any contribution intentionally submitted [msrv-1.71]: https://img.shields.io/badge/rustc-1.71.0+-blue.svg [msrv-1.72]: https://img.shields.io/badge/rustc-1.72.0+-blue.svg [msrv-1.74]: https://img.shields.io/badge/rustc-1.74.0+-blue.svg +[msrv-1.81]: https://img.shields.io/badge/rustc-1.81.0+-blue.svg [//]: # (crates) diff --git a/sha2/src/sha256.rs b/sha2/src/sha256.rs index 6d5896f1..75736e91 100644 --- a/sha2/src/sha256.rs +++ b/sha2/src/sha256.rs @@ -27,6 +27,9 @@ cfg_if::cfg_if! { } else if #[cfg(target_arch = "loongarch64")] { mod loongarch64_asm; use loongarch64_asm::compress; + } else if #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] { + mod wasm32; + use wasm32::compress; } else { mod soft; use soft::compress; diff --git a/sha2/src/sha256/wasm32.rs b/sha2/src/sha256/wasm32.rs new file mode 100644 index 00000000..31659bb4 --- /dev/null +++ b/sha2/src/sha256/wasm32.rs @@ -0,0 +1,190 @@ +#![allow(clippy::many_single_char_names)] +use core::arch::wasm32::*; +use core::mem::size_of; + +use crate::consts::K32; + +pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + assert_eq!(SHA256_HASH_WORDS_NUM, 8); + assert_eq!(SHA256_BLOCK_WORDS_NUM, 16); + let mut ms = [u64x2(0, 0); 4]; + let mut x = [u64x2(0, 0); 4]; + + for block in blocks { + unsafe { + let mut current_state = *state; + load_data(&mut x, &mut ms, block.as_ptr().cast()); + rounds_0_47(&mut current_state, &mut x, &mut ms); + rounds_48_63(&mut current_state, &ms); + accumulate_state(state, ¤t_state); + } + } +} + +#[inline(always)] +unsafe fn load_data(x: &mut [v128; 4], ms: &mut MsgSchedule, data: *const v128) { + macro_rules! unrolled_iterations { + ($($i:literal),*) => {$( + x[$i] = v128_load(data.add($i).cast()); + x[$i] = i8x16_shuffle::<3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12>(x[$i], x[$i]); + + let y = i32x4_add( + x[$i], + v128_load(K32.as_ptr().add(4 * $i).cast()), + ); + + ms[$i] = y; + )*}; + } + + unrolled_iterations!(0, 1, 2, 3); +} + +#[inline(always)] +unsafe fn rounds_0_47(current_state: &mut State, x: &mut [v128; 4], ms: &mut MsgSchedule) { + let mut k32_idx: usize = SHA256_BLOCK_WORDS_NUM; + + for _ in 0..3 { + for j in 0..4 { + let k32 = v128_load(K32.as_ptr().add(k32_idx).cast()); + let y = sha256_update_x(x, k32); + + { + let ms = ms[j]; + sha_round(current_state, u32x4_extract_lane::<0>(ms)); + sha_round(current_state, u32x4_extract_lane::<1>(ms)); + sha_round(current_state, u32x4_extract_lane::<2>(ms)); + sha_round(current_state, u32x4_extract_lane::<3>(ms)); + } + + ms[j] = y; + k32_idx += 4; + } + } +} + +#[inline(always)] +fn rounds_48_63(current_state: &mut State, ms: &MsgSchedule) { + for j in 0..4 { + let ms = ms[j]; + sha_round(current_state, u32x4_extract_lane::<0>(ms)); + sha_round(current_state, u32x4_extract_lane::<1>(ms)); + sha_round(current_state, u32x4_extract_lane::<2>(ms)); + sha_round(current_state, u32x4_extract_lane::<3>(ms)); + } +} + +#[inline(always)] +fn sha_round(s: &mut State, x: u32) { + macro_rules! big_sigma0 { + ($a:expr) => { + $a.rotate_right(2) ^ $a.rotate_right(13) ^ $a.rotate_right(22) + }; + } + macro_rules! big_sigma1 { + ($a:expr) => { + $a.rotate_right(6) ^ $a.rotate_right(11) ^ $a.rotate_right(25) + }; + } + macro_rules! bool3ary_202 { + ($a:expr, $b:expr, $c:expr) => { + $c ^ ($a & ($b ^ $c)) + }; + } // Choose, MD5F, SHA1C + macro_rules! bool3ary_232 { + ($a:expr, $b:expr, $c:expr) => { + ($a & $b) ^ ($a & $c) ^ ($b & $c) + }; + } // Majority, SHA1M + + macro_rules! rotate_state { + ($s:ident) => {{ + let tmp = $s[7]; + $s[7] = $s[6]; + $s[6] = $s[5]; + $s[5] = $s[4]; + $s[4] = $s[3]; + $s[3] = $s[2]; + $s[2] = $s[1]; + $s[1] = $s[0]; + $s[0] = tmp; + }}; + } + + let t = x + .wrapping_add(s[7]) + .wrapping_add(big_sigma1!(s[4])) + .wrapping_add(bool3ary_202!(s[4], s[5], s[6])); + + s[7] = t + .wrapping_add(big_sigma0!(s[0])) + .wrapping_add(bool3ary_232!(s[0], s[1], s[2])); + s[3] = s[3].wrapping_add(t); + + rotate_state!(s); +} + +#[inline(always)] +fn accumulate_state(dst: &mut State, src: &State) { + for i in 0..SHA256_HASH_WORDS_NUM { + dst[i] = dst[i].wrapping_add(src[i]); + } +} + +#[inline(always)] +unsafe fn sha256_update_x(x: &mut [v128; 4], k32: v128) -> v128 { + const SIGMA0_0: u32 = 7; + const SIGMA0_1: u32 = 18; + const SIGMA0_2: u32 = 3; + const SIGMA1_0: u32 = 17; + const SIGMA1_1: u32 = 19; + const SIGMA1_2: u32 = 10; + const SHA256_WORD_BIT_LEN: u32 = 8 * size_of::() as u32; + const ZERO: v128 = u64x2(0, 0); + + let mut t0 = u32x4_shuffle::<1, 2, 3, 4>(x[0], x[1]); + let mut t3 = u32x4_shuffle::<1, 2, 3, 4>(x[2], x[3]); + let mut t2 = u32x4_shr(t0, SIGMA0_0); + x[0] = u32x4_add(x[0], t3); + t3 = u32x4_shr(t0, SIGMA0_2); + let mut t1 = u32x4_shl(t0, SHA256_WORD_BIT_LEN - SIGMA0_1); + t0 = v128_xor(t3, t2); + t3 = u32x4_shuffle::<2, 2, 3, 3>(x[3], x[3]); + t2 = u32x4_shr(t2, SIGMA0_1 - SIGMA0_0); + t0 = v128_xor(t0, t1); + t0 = v128_xor(t0, t2); + t1 = u32x4_shl(t1, SIGMA0_1 - SIGMA0_0); + t2 = u32x4_shr(t3, SIGMA1_2); + t3 = u64x2_shr(t3, SIGMA1_0); + t1 = v128_xor(t0, t1); + x[0] = u32x4_add(x[0], t1); + t2 = v128_xor(t2, t3); + t3 = u64x2_shr(t3, SIGMA1_1 - SIGMA1_0); + t2 = v128_xor(t2, t3); + t2 = u32x4_shuffle::<0, 2, 7, 7>(t2, ZERO); + x[0] = u32x4_add(x[0], t2); + t3 = u32x4_shuffle::<0, 0, 1, 1>(x[0], x[0]); + t2 = u32x4_shr(t3, SIGMA1_2); + t3 = u64x2_shr(t3, SIGMA1_0); + t2 = v128_xor(t2, t3); + t3 = u64x2_shr(t3, SIGMA1_1 - SIGMA1_0); + t2 = v128_xor(t2, t3); + t2 = u32x4_shuffle::<7, 7, 0, 2>(t2, ZERO); + x[0] = u32x4_add(x[0], t2); + + let tmp = x[0]; + x[0] = x[1]; + x[1] = x[2]; + x[2] = x[3]; + x[3] = tmp; + + u32x4_add(x[3], k32) +} + +type State = [u32; SHA256_HASH_WORDS_NUM]; +type MsgSchedule = [v128; SHA256_BLOCK_WORDS_NUM / 4]; + +const SHA256_BLOCK_BYTE_LEN: usize = 64; +const SHA256_HASH_BYTE_LEN: usize = 32; +const SHA256_HASH_WORDS_NUM: usize = SHA256_HASH_BYTE_LEN / size_of::(); +const SHA256_BLOCK_WORDS_NUM: usize = SHA256_BLOCK_BYTE_LEN / size_of::(); diff --git a/sha2/src/sha512.rs b/sha2/src/sha512.rs index 20679266..1970f5e6 100644 --- a/sha2/src/sha512.rs +++ b/sha2/src/sha512.rs @@ -27,6 +27,9 @@ cfg_if::cfg_if! { } else if #[cfg(target_arch = "loongarch64")] { mod loongarch64_asm; use loongarch64_asm::compress; + } else if #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] { + mod wasm32; + use wasm32::compress; } else { mod soft; use soft::compress; diff --git a/sha2/src/sha512/wasm32.rs b/sha2/src/sha512/wasm32.rs new file mode 100644 index 00000000..6ff60bf2 --- /dev/null +++ b/sha2/src/sha512/wasm32.rs @@ -0,0 +1,172 @@ +#![allow(clippy::many_single_char_names)] + +use core::arch::wasm32::*; +use core::mem::size_of; + +use crate::consts::K64; + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + let mut ms = [u64x2(0, 0); 8]; + let mut x = [u64x2(0, 0); 8]; + + for block in blocks { + unsafe { + let mut current_state = *state; + load_data(&mut x, &mut ms, block.as_ptr().cast()); + rounds_0_63(&mut current_state, &mut x, &mut ms); + rounds_64_79(&mut current_state, &ms); + accumulate_state(state, ¤t_state); + } + } +} + +#[inline(always)] +unsafe fn load_data(x: &mut [v128; 8], ms: &mut MsgSchedule, data: *const v128) { + macro_rules! unrolled_iterations { + ($($i:literal),*) => {$( + x[$i] = v128_load(data.add($i).cast()); + x[$i] = i8x16_shuffle::<7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8>(x[$i], x[$i]); + + let y = i64x2_add( + x[$i], + v128_load(K64.as_ptr().add(2 * $i).cast()), + ); + + ms[$i] = y; + )*}; + } + + unrolled_iterations!(0, 1, 2, 3, 4, 5, 6, 7); +} + +#[inline(always)] +unsafe fn rounds_0_63(current_state: &mut State, x: &mut [v128; 8], ms: &mut MsgSchedule) { + let mut k64_idx: usize = SHA512_BLOCK_WORDS_NUM; + + for _ in 0..4 { + for j in 0..8 { + let k64 = v128_load(K64.as_ptr().add(k64_idx).cast()); + let y = sha512_update_x(x, k64); + + { + let ms = ms[j]; + sha_round(current_state, u64x2_extract_lane::<0>(ms)); + sha_round(current_state, u64x2_extract_lane::<1>(ms)); + } + + ms[j] = y; + k64_idx += 2; + } + } +} + +#[inline(always)] +fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) { + for j in 0..8 { + let ms = ms[j]; + sha_round(current_state, u64x2_extract_lane::<0>(ms)); + sha_round(current_state, u64x2_extract_lane::<1>(ms)); + } +} + +#[inline(always)] +fn sha_round(s: &mut State, x: u64) { + macro_rules! big_sigma0 { + ($a:expr) => { + $a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39) + }; + } + macro_rules! big_sigma1 { + ($a:expr) => { + $a.rotate_right(14) ^ $a.rotate_right(18) ^ $a.rotate_right(41) + }; + } + macro_rules! bool3ary_202 { + ($a:expr, $b:expr, $c:expr) => { + $c ^ ($a & ($b ^ $c)) + }; + } // Choose, MD5F, SHA1C + macro_rules! bool3ary_232 { + ($a:expr, $b:expr, $c:expr) => { + ($a & $b) ^ ($a & $c) ^ ($b & $c) + }; + } // Majority, SHA1M + + macro_rules! rotate_state { + ($s:ident) => {{ + let tmp = $s[7]; + $s[7] = $s[6]; + $s[6] = $s[5]; + $s[5] = $s[4]; + $s[4] = $s[3]; + $s[3] = $s[2]; + $s[2] = $s[1]; + $s[1] = $s[0]; + $s[0] = tmp; + }}; + } + + let t = x + .wrapping_add(s[7]) + .wrapping_add(big_sigma1!(s[4])) + .wrapping_add(bool3ary_202!(s[4], s[5], s[6])); + + s[7] = t + .wrapping_add(big_sigma0!(s[0])) + .wrapping_add(bool3ary_232!(s[0], s[1], s[2])); + s[3] = s[3].wrapping_add(t); + + rotate_state!(s); +} + +#[inline(always)] +fn accumulate_state(dst: &mut State, src: &State) { + for i in 0..SHA512_HASH_WORDS_NUM { + dst[i] = dst[i].wrapping_add(src[i]); + } +} + +#[inline(always)] +unsafe fn sha512_update_x(x: &mut [v128; 8], k64: v128) -> v128 { + let mut t0 = u64x2_shuffle::<1, 2>(x[0], x[1]); + let mut t3 = u64x2_shuffle::<1, 2>(x[4], x[5]); + let mut t2 = u64x2_shr(t0, 1); + x[0] = i64x2_add(x[0], t3); + t3 = u64x2_shr(t0, 7); + let mut t1 = u64x2_shl(t0, 64 - 8); + t0 = v128_xor(t3, t2); + t2 = u64x2_shr(t2, 8 - 1); + t0 = v128_xor(t0, t1); + t1 = u64x2_shl(t1, 8 - 1); + t0 = v128_xor(t0, t2); + t0 = v128_xor(t0, t1); + t3 = u64x2_shr(x[7], 6); + t2 = u64x2_shl(x[7], 64 - 61); + x[0] = i64x2_add(x[0], t0); + t1 = u64x2_shr(x[7], 19); + t3 = v128_xor(t3, t2); + t2 = u64x2_shl(t2, 61 - 19); + t3 = v128_xor(t3, t1); + t1 = u64x2_shr(t1, 61 - 19); + t3 = v128_xor(t3, t2); + t3 = v128_xor(t3, t1); + x[0] = i64x2_add(x[0], t3); + let temp = x[0]; + x[0] = x[1]; + x[1] = x[2]; + x[2] = x[3]; + x[3] = x[4]; + x[4] = x[5]; + x[5] = x[6]; + x[6] = x[7]; + x[7] = temp; + i64x2_add(x[7], k64) +} + +type State = [u64; SHA512_HASH_WORDS_NUM]; +type MsgSchedule = [v128; SHA512_BLOCK_WORDS_NUM / 2]; + +const SHA512_BLOCK_BYTE_LEN: usize = 128; +const SHA512_HASH_BYTE_LEN: usize = 64; +const SHA512_HASH_WORDS_NUM: usize = SHA512_HASH_BYTE_LEN / size_of::(); +const SHA512_BLOCK_WORDS_NUM: usize = SHA512_BLOCK_BYTE_LEN / size_of::();