From 03e1d29d1fdd7bb56e5cdec0d4a1331d0db24903 Mon Sep 17 00:00:00 2001 From: Artyom Pavlov Date: Fri, 23 Aug 2024 16:02:10 +0300 Subject: [PATCH] sha2: RISC-V scalar crypto extension support (#614) The support is Nightly-only and requires to enable the `sha2_backend` configuration flag with a value equal to `riscv-zknh` or `riscv-zknh-compact`. The resulting assembly and binary size of the `compress` function (not counting the `K32` and `K64` statics): - SHA-256, unrolled: https://rust.godbolt.org/z/177bqKd3h (5280 bytes) - SHA-256, compact: https://rust.godbolt.org/z/Kzx59bsdP (1308 bytes) - SHA-512, unrolled: https://rust.godbolt.org/z/ExqqrfE1r (7964 bytes) - SHA-512: compact: https://rust.godbolt.org/z/z41v6d4do (2852 bytes) --- .github/workflows/sha2.yml | 36 ++++++ sha2/Cargo.toml | 8 +- sha2/src/lib.rs | 10 ++ sha2/src/sha256.rs | 22 ++++ sha2/src/sha256/riscv_zknh.rs | 133 +++++++++++++++++++ sha2/src/sha256/riscv_zknh_compact.rs | 76 +++++++++++ sha2/src/sha256/soft.rs | 10 +- sha2/src/sha512.rs | 22 ++++ sha2/src/sha512/riscv_zknh.rs | 176 ++++++++++++++++++++++++++ sha2/src/sha512/riscv_zknh_compact.rs | 104 +++++++++++++++ sha2/src/sha512/soft.rs | 14 +- 11 files changed, 593 insertions(+), 18 deletions(-) create mode 100644 sha2/src/sha256/riscv_zknh.rs create mode 100644 sha2/src/sha256/riscv_zknh_compact.rs create mode 100644 sha2/src/sha512/riscv_zknh.rs create mode 100644 sha2/src/sha512/riscv_zknh_compact.rs diff --git a/.github/workflows/sha2.yml b/.github/workflows/sha2.yml index 1787b988..981228ab 100644 --- a/.github/workflows/sha2.yml +++ b/.github/workflows/sha2.yml @@ -146,6 +146,42 @@ jobs: target: ${{ matrix.target }} features: ${{ matrix.features }} + riscv64-zknh: + runs-on: ubuntu-latest + defaults: + run: + # Cross mounts only current package, i.e. by default it ignores workspace's Cargo.toml + working-directory: . + steps: + - uses: actions/checkout@v4 + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + - run: cargo install cross --git https://github.com/cross-rs/cross + - run: cross test --package sha2 --target riscv64gc-unknown-linux-gnu + env: + RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh" -C target-feature=+zknh' + - run: cross test --package sha2 --target riscv64gc-unknown-linux-gnu + env: + RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh-compact" -C target-feature=+zknh' + + riscv32-zknh: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: rust-src + - run: cargo build --target riscv32gc-unknown-linux-gnu -Z build-std + env: + RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh" -C target-feature=+zknh' + - run: cargo build --target riscv32gc-unknown-linux-gnu -Z build-std + env: + RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh-compact" -C target-feature=+zknh' + minimal-versions: uses: RustCrypto/actions/.github/workflows/minimal-versions.yml@master with: diff --git a/sha2/Cargo.toml b/sha2/Cargo.toml index 77817abc..3bc42abd 100644 --- a/sha2/Cargo.toml +++ b/sha2/Cargo.toml @@ -30,9 +30,13 @@ base16ct = { version = "0.2", features = ["alloc"] } [features] default = ["oid", "std"] std = ["digest/std"] -oid = ["digest/oid"] # Enable OID support +oid = ["digest/oid"] # Enable OID support zeroize = ["digest/zeroize"] -force-soft = [] # Force software implementation +force-soft = [] # Force software implementation + +[lints.rust.unexpected_cfgs] +level = "warn" +check-cfg = ['cfg(sha2_backend, values("riscv-zknh", "riscv-zknh-compact"))'] [package.metadata.docs.rs] all-features = true diff --git a/sha2/src/lib.rs b/sha2/src/lib.rs index f6eae82a..1efc78b6 100644 --- a/sha2/src/lib.rs +++ b/sha2/src/lib.rs @@ -6,6 +6,16 @@ )] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![warn(missing_docs, rust_2018_idioms)] +#![cfg_attr( + any(sha2_backend = "riscv-zknh", sha2_backend = "riscv-zknh-compact"), + feature(riscv_ext_intrinsics) +)] + +#[cfg(all( + any(sha2_backend = "riscv-zknh", sha2_backend = "riscv-zknh-compact"), + not(any(any(target_arch = "riscv32", target_arch = "riscv64"))) +))] +compile_error!("The Zknh backends can be enabled only for RISC-V targets"); pub use digest::{self, Digest}; diff --git a/sha2/src/sha256.rs b/sha2/src/sha256.rs index 417a51b6..016fabaf 100644 --- a/sha2/src/sha256.rs +++ b/sha2/src/sha256.rs @@ -6,6 +6,18 @@ cfg_if::cfg_if! { mod soft; mod x86; use x86::compress; + } else if #[cfg(all( + any(target_arch = "riscv32", target_arch = "riscv64"), + sha2_backend = "riscv-zknh" + ))] { + mod riscv_zknh; + use riscv_zknh::compress; + } else if #[cfg(all( + any(target_arch = "riscv32", target_arch = "riscv64"), + sha2_backend = "riscv-zknh-compact" + ))] { + mod riscv_zknh_compact; + use riscv_zknh_compact::compress; } else if #[cfg(target_arch = "aarch64")] { mod soft; mod aarch64; @@ -19,6 +31,16 @@ cfg_if::cfg_if! { } } +#[inline(always)] +#[allow(dead_code)] +fn to_u32s(block: &[u8; 64]) -> [u32; 16] { + let mut res = [0u32; 16]; + for (src, dst) in block.chunks_exact(4).zip(res.iter_mut()) { + *dst = u32::from_be_bytes(src.try_into().unwrap()); + } + res +} + /// Raw SHA-256 compression function. /// /// This is a low-level "hazmat" API which provides direct access to the core diff --git a/sha2/src/sha256/riscv_zknh.rs b/sha2/src/sha256/riscv_zknh.rs new file mode 100644 index 00000000..fe950bdc --- /dev/null +++ b/sha2/src/sha256/riscv_zknh.rs @@ -0,0 +1,133 @@ +use crate::consts::K32; + +#[cfg(target_arch = "riscv32")] +use core::arch::riscv32::*; +#[cfg(target_arch = "riscv64")] +use core::arch::riscv64::*; + +#[cfg(not(target_feature = "zknh"))] +compile_error!("riscv-zknh backend requires enabled zknh target feature"); + +#[inline(always)] +fn ch(x: u32, y: u32, z: u32) -> u32 { + (x & y) ^ (!x & z) +} + +#[inline(always)] +fn maj(x: u32, y: u32, z: u32) -> u32 { + (x & y) ^ (x & z) ^ (y & z) +} + +#[allow(clippy::identity_op)] +fn round(state: &mut [u32; 8], block: &[u32; 16]) { + let n = K32.len() - R; + #[allow(clippy::identity_op)] + let a = (n + 0) % 8; + let b = (n + 1) % 8; + let c = (n + 2) % 8; + let d = (n + 3) % 8; + let e = (n + 4) % 8; + let f = (n + 5) % 8; + let g = (n + 6) % 8; + let h = (n + 7) % 8; + + state[h] = state[h] + .wrapping_add(unsafe { sha256sum1(state[e]) }) + .wrapping_add(ch(state[e], state[f], state[g])) + // Force reading of constants from the static to prevent bad codegen + .wrapping_add(unsafe { core::ptr::read_volatile(&K32[R]) }) + .wrapping_add(block[R % 16]); + state[d] = state[d].wrapping_add(state[h]); + state[h] = state[h] + .wrapping_add(unsafe { sha256sum0(state[a]) }) + .wrapping_add(maj(state[a], state[b], state[c])) +} + +fn round_schedule(state: &mut [u32; 8], block: &mut [u32; 16]) { + round::(state, block); + + block[R % 16] = block[R % 16] + .wrapping_add(unsafe { sha256sig1(block[(R + 14) % 16]) }) + .wrapping_add(block[(R + 9) % 16]) + .wrapping_add(unsafe { sha256sig0(block[(R + 1) % 16]) }); +} + +fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) { + let s = &mut state.clone(); + let b = &mut block; + + round_schedule::<0>(s, b); + round_schedule::<1>(s, b); + round_schedule::<2>(s, b); + round_schedule::<3>(s, b); + round_schedule::<4>(s, b); + round_schedule::<5>(s, b); + round_schedule::<6>(s, b); + round_schedule::<7>(s, b); + round_schedule::<8>(s, b); + round_schedule::<9>(s, b); + round_schedule::<10>(s, b); + round_schedule::<11>(s, b); + round_schedule::<12>(s, b); + round_schedule::<13>(s, b); + round_schedule::<14>(s, b); + round_schedule::<15>(s, b); + round_schedule::<16>(s, b); + round_schedule::<17>(s, b); + round_schedule::<18>(s, b); + round_schedule::<19>(s, b); + round_schedule::<20>(s, b); + round_schedule::<21>(s, b); + round_schedule::<22>(s, b); + round_schedule::<23>(s, b); + round_schedule::<24>(s, b); + round_schedule::<25>(s, b); + round_schedule::<26>(s, b); + round_schedule::<27>(s, b); + round_schedule::<28>(s, b); + round_schedule::<29>(s, b); + round_schedule::<30>(s, b); + round_schedule::<31>(s, b); + round_schedule::<32>(s, b); + round_schedule::<33>(s, b); + round_schedule::<34>(s, b); + round_schedule::<35>(s, b); + round_schedule::<36>(s, b); + round_schedule::<37>(s, b); + round_schedule::<38>(s, b); + round_schedule::<39>(s, b); + round_schedule::<40>(s, b); + round_schedule::<41>(s, b); + round_schedule::<42>(s, b); + round_schedule::<43>(s, b); + round_schedule::<44>(s, b); + round_schedule::<45>(s, b); + round_schedule::<46>(s, b); + round_schedule::<47>(s, b); + round::<48>(s, b); + round::<49>(s, b); + round::<50>(s, b); + round::<51>(s, b); + round::<52>(s, b); + round::<53>(s, b); + round::<54>(s, b); + round::<55>(s, b); + round::<56>(s, b); + round::<57>(s, b); + round::<58>(s, b); + round::<59>(s, b); + round::<60>(s, b); + round::<61>(s, b); + round::<62>(s, b); + round::<63>(s, b); + + for i in 0..8 { + state[i] = state[i].wrapping_add(s[i]); + } +} + +pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + for block in blocks.iter().map(super::to_u32s) { + compress_block(state, block); + } +} diff --git a/sha2/src/sha256/riscv_zknh_compact.rs b/sha2/src/sha256/riscv_zknh_compact.rs new file mode 100644 index 00000000..98375cce --- /dev/null +++ b/sha2/src/sha256/riscv_zknh_compact.rs @@ -0,0 +1,76 @@ +use crate::consts::K32; + +#[cfg(target_arch = "riscv32")] +use core::arch::riscv32::*; +#[cfg(target_arch = "riscv64")] +use core::arch::riscv64::*; + +#[cfg(not(target_feature = "zknh"))] +compile_error!("riscv-zknh backend requires enabled zknh target feature"); + +#[inline(always)] +fn ch(x: u32, y: u32, z: u32) -> u32 { + (x & y) ^ (!x & z) +} + +#[inline(always)] +fn maj(x: u32, y: u32, z: u32) -> u32 { + (x & y) ^ (x & z) ^ (y & z) +} + +#[inline(always)] +fn round(state: &mut [u32; 8], block: &[u32; 16], r: usize) { + let n = K32.len() - r; + #[allow(clippy::identity_op)] + let a = (n + 0) % 8; + let b = (n + 1) % 8; + let c = (n + 2) % 8; + let d = (n + 3) % 8; + let e = (n + 4) % 8; + let f = (n + 5) % 8; + let g = (n + 6) % 8; + let h = (n + 7) % 8; + + state[h] = state[h] + .wrapping_add(unsafe { sha256sum1(state[e]) }) + .wrapping_add(ch(state[e], state[f], state[g])) + .wrapping_add(K32[r]) + .wrapping_add(block[r % 16]); + state[d] = state[d].wrapping_add(state[h]); + state[h] = state[h] + .wrapping_add(unsafe { sha256sum0(state[a]) }) + .wrapping_add(maj(state[a], state[b], state[c])) +} + +#[inline(always)] +fn round_schedule(state: &mut [u32; 8], block: &mut [u32; 16], r: usize) { + round(state, block, r); + + block[r % 16] = block[r % 16] + .wrapping_add(unsafe { sha256sig1(block[(r + 14) % 16]) }) + .wrapping_add(block[(r + 9) % 16]) + .wrapping_add(unsafe { sha256sig0(block[(r + 1) % 16]) }); +} + +#[inline(always)] +fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) { + let s = &mut state.clone(); + let b = &mut block; + + for i in 0..48 { + round_schedule(s, b, i); + } + for i in 48..64 { + round(s, b, i); + } + + for i in 0..8 { + state[i] = state[i].wrapping_add(s[i]); + } +} + +pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + for block in blocks.iter().map(super::to_u32s) { + compress_block(state, block); + } +} diff --git a/sha2/src/sha256/soft.rs b/sha2/src/sha256/soft.rs index 44f6d6bf..b385ef6a 100644 --- a/sha2/src/sha256/soft.rs +++ b/sha2/src/sha256/soft.rs @@ -186,7 +186,7 @@ macro_rules! schedule_rounds4 { } /// Process a block with the SHA-256 algorithm. -fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) { +fn sha256_digest_block_u32(state: &mut [u32; 8], block: [u32; 16]) { let mut abef = [state[0], state[1], state[4], state[5]]; let mut cdgh = [state[2], state[3], state[6], state[7]]; @@ -228,11 +228,7 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) { } pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { - for block in blocks { - let mut block_u32 = [0u32; 16]; - for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) { - *o = u32::from_be_bytes(chunk.try_into().unwrap()); - } - sha256_digest_block_u32(state, &block_u32); + for block in blocks.iter().map(super::to_u32s) { + sha256_digest_block_u32(state, block); } } diff --git a/sha2/src/sha512.rs b/sha2/src/sha512.rs index 7d938277..8d0fdbb3 100644 --- a/sha2/src/sha512.rs +++ b/sha2/src/sha512.rs @@ -6,6 +6,18 @@ cfg_if::cfg_if! { mod soft; mod x86; use x86::compress; + } else if #[cfg(all( + any(target_arch = "riscv32", target_arch = "riscv64"), + sha2_backend = "riscv-zknh" + ))] { + mod riscv_zknh; + use riscv_zknh::compress; + } else if #[cfg(all( + any(target_arch = "riscv32", target_arch = "riscv64"), + sha2_backend = "riscv-zknh-compact" + ))] { + mod riscv_zknh_compact; + use riscv_zknh_compact::compress; } else if #[cfg(target_arch = "aarch64")] { mod soft; mod aarch64; @@ -19,6 +31,16 @@ cfg_if::cfg_if! { } } +#[inline(always)] +#[allow(dead_code)] +fn to_u64s(block: &[u8; 128]) -> [u64; 16] { + let mut res = [0u64; 16]; + for (src, dst) in block.chunks_exact(8).zip(res.iter_mut()) { + *dst = u64::from_be_bytes(src.try_into().unwrap()); + } + res +} + /// Raw SHA-512 compression function. /// /// This is a low-level "hazmat" API which provides direct access to the core diff --git a/sha2/src/sha512/riscv_zknh.rs b/sha2/src/sha512/riscv_zknh.rs new file mode 100644 index 00000000..31a327eb --- /dev/null +++ b/sha2/src/sha512/riscv_zknh.rs @@ -0,0 +1,176 @@ +use crate::consts::K64; + +#[cfg(target_arch = "riscv32")] +use core::arch::riscv32::*; +#[cfg(target_arch = "riscv64")] +use core::arch::riscv64::*; + +#[cfg(not(target_feature = "zknh"))] +compile_error!("riscv-zknh backend requires enabled zknh target feature"); + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sum0(x: u64) -> u64 { + let a = sha512sum0r((x >> 32) as u32, x as u32); + let b = sha512sum0r(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sum1(x: u64) -> u64 { + let a = sha512sum1r((x >> 32) as u32, x as u32); + let b = sha512sum1r(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sig0(x: u64) -> u64 { + let a = sha512sig0h((x >> 32) as u32, x as u32); + let b = sha512sig0l(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sig1(x: u64) -> u64 { + let a = sha512sig1h((x >> 32) as u32, x as u32); + let b = sha512sig1l(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[inline(always)] +fn ch(x: u64, y: u64, z: u64) -> u64 { + (x & y) ^ (!x & z) +} + +#[inline(always)] +fn maj(x: u64, y: u64, z: u64) -> u64 { + (x & y) ^ (x & z) ^ (y & z) +} + +fn round(state: &mut [u64; 8], block: &[u64; 16]) { + let n = K64.len() - R; + #[allow(clippy::identity_op)] + let a = (n + 0) % 8; + let b = (n + 1) % 8; + let c = (n + 2) % 8; + let d = (n + 3) % 8; + let e = (n + 4) % 8; + let f = (n + 5) % 8; + let g = (n + 6) % 8; + let h = (n + 7) % 8; + + state[h] = state[h] + .wrapping_add(unsafe { sha512sum1(state[e]) }) + .wrapping_add(ch(state[e], state[f], state[g])) + // Force reading of constants from the static to prevent bad codegen + .wrapping_add(unsafe { core::ptr::read_volatile(&K64[R]) }) + .wrapping_add(block[R % 16]); + state[d] = state[d].wrapping_add(state[h]); + state[h] = state[h] + .wrapping_add(unsafe { sha512sum0(state[a]) }) + .wrapping_add(maj(state[a], state[b], state[c])) +} + +fn round_schedule(state: &mut [u64; 8], block: &mut [u64; 16]) { + round::(state, block); + + block[R % 16] = block[R % 16] + .wrapping_add(unsafe { sha512sig1(block[(R + 14) % 16]) }) + .wrapping_add(block[(R + 9) % 16]) + .wrapping_add(unsafe { sha512sig0(block[(R + 1) % 16]) }); +} + +fn compress_block(state: &mut [u64; 8], mut block: [u64; 16]) { + let s = &mut state.clone(); + let b = &mut block; + + round_schedule::<0>(s, b); + round_schedule::<1>(s, b); + round_schedule::<2>(s, b); + round_schedule::<3>(s, b); + round_schedule::<4>(s, b); + round_schedule::<5>(s, b); + round_schedule::<6>(s, b); + round_schedule::<7>(s, b); + round_schedule::<8>(s, b); + round_schedule::<9>(s, b); + round_schedule::<10>(s, b); + round_schedule::<11>(s, b); + round_schedule::<12>(s, b); + round_schedule::<13>(s, b); + round_schedule::<14>(s, b); + round_schedule::<15>(s, b); + round_schedule::<16>(s, b); + round_schedule::<17>(s, b); + round_schedule::<18>(s, b); + round_schedule::<19>(s, b); + round_schedule::<20>(s, b); + round_schedule::<21>(s, b); + round_schedule::<22>(s, b); + round_schedule::<23>(s, b); + round_schedule::<24>(s, b); + round_schedule::<25>(s, b); + round_schedule::<26>(s, b); + round_schedule::<27>(s, b); + round_schedule::<28>(s, b); + round_schedule::<29>(s, b); + round_schedule::<30>(s, b); + round_schedule::<31>(s, b); + round_schedule::<32>(s, b); + round_schedule::<33>(s, b); + round_schedule::<34>(s, b); + round_schedule::<35>(s, b); + round_schedule::<36>(s, b); + round_schedule::<37>(s, b); + round_schedule::<38>(s, b); + round_schedule::<39>(s, b); + round_schedule::<40>(s, b); + round_schedule::<41>(s, b); + round_schedule::<42>(s, b); + round_schedule::<43>(s, b); + round_schedule::<44>(s, b); + round_schedule::<45>(s, b); + round_schedule::<46>(s, b); + round_schedule::<47>(s, b); + round_schedule::<48>(s, b); + round_schedule::<49>(s, b); + round_schedule::<50>(s, b); + round_schedule::<51>(s, b); + round_schedule::<52>(s, b); + round_schedule::<53>(s, b); + round_schedule::<54>(s, b); + round_schedule::<55>(s, b); + round_schedule::<56>(s, b); + round_schedule::<57>(s, b); + round_schedule::<58>(s, b); + round_schedule::<59>(s, b); + round_schedule::<60>(s, b); + round_schedule::<61>(s, b); + round_schedule::<62>(s, b); + round_schedule::<63>(s, b); + round::<64>(s, b); + round::<65>(s, b); + round::<66>(s, b); + round::<67>(s, b); + round::<68>(s, b); + round::<69>(s, b); + round::<70>(s, b); + round::<71>(s, b); + round::<72>(s, b); + round::<73>(s, b); + round::<74>(s, b); + round::<75>(s, b); + round::<76>(s, b); + round::<77>(s, b); + round::<78>(s, b); + round::<79>(s, b); + + for i in 0..8 { + state[i] = state[i].wrapping_add(s[i]); + } +} + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + for block in blocks.iter().map(super::to_u64s) { + compress_block(state, block); + } +} diff --git a/sha2/src/sha512/riscv_zknh_compact.rs b/sha2/src/sha512/riscv_zknh_compact.rs new file mode 100644 index 00000000..92e984c5 --- /dev/null +++ b/sha2/src/sha512/riscv_zknh_compact.rs @@ -0,0 +1,104 @@ +use crate::consts::K64; + +#[cfg(target_arch = "riscv32")] +use core::arch::riscv32::*; +#[cfg(target_arch = "riscv64")] +use core::arch::riscv64::*; + +#[cfg(not(target_feature = "zknh"))] +compile_error!("riscv-zknh backend requires enabled zknh target feature"); + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sum0(x: u64) -> u64 { + let a = sha512sum0r((x >> 32) as u32, x as u32); + let b = sha512sum0r(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sum1(x: u64) -> u64 { + let a = sha512sum1r((x >> 32) as u32, x as u32); + let b = sha512sum1r(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sig0(x: u64) -> u64 { + let a = sha512sig0h((x >> 32) as u32, x as u32); + let b = sha512sig0l(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[cfg(target_arch = "riscv32")] +unsafe fn sha512sig1(x: u64) -> u64 { + let a = sha512sig1h((x >> 32) as u32, x as u32); + let b = sha512sig1l(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) +} + +#[inline(always)] +fn ch(x: u64, y: u64, z: u64) -> u64 { + (x & y) ^ (!x & z) +} + +#[inline(always)] +fn maj(x: u64, y: u64, z: u64) -> u64 { + (x & y) ^ (x & z) ^ (y & z) +} + +#[inline(always)] +fn round(state: &mut [u64; 8], block: &[u64; 16], r: usize) { + let n = K64.len() - r; + #[allow(clippy::identity_op)] + let a = (n + 0) % 8; + let b = (n + 1) % 8; + let c = (n + 2) % 8; + let d = (n + 3) % 8; + let e = (n + 4) % 8; + let f = (n + 5) % 8; + let g = (n + 6) % 8; + let h = (n + 7) % 8; + + state[h] = state[h] + .wrapping_add(unsafe { sha512sum1(state[e]) }) + .wrapping_add(ch(state[e], state[f], state[g])) + .wrapping_add(K64[r]) + .wrapping_add(block[r % 16]); + state[d] = state[d].wrapping_add(state[h]); + state[h] = state[h] + .wrapping_add(unsafe { sha512sum0(state[a]) }) + .wrapping_add(maj(state[a], state[b], state[c])) +} + +#[inline(always)] +fn round_schedule(state: &mut [u64; 8], block: &mut [u64; 16], r: usize) { + round(state, block, r); + + block[r % 16] = block[r % 16] + .wrapping_add(unsafe { sha512sig1(block[(r + 14) % 16]) }) + .wrapping_add(block[(r + 9) % 16]) + .wrapping_add(unsafe { sha512sig0(block[(r + 1) % 16]) }); +} + +#[inline(always)] +fn compress_block(state: &mut [u64; 8], mut block: [u64; 16]) { + let s = &mut state.clone(); + let b = &mut block; + + for i in 0..64 { + round_schedule(s, b, i); + } + for i in 64..80 { + round(s, b, i); + } + + for i in 0..8 { + state[i] = state[i].wrapping_add(s[i]); + } +} + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + for block in blocks.iter().map(super::to_u64s) { + compress_block(state, block); + } +} diff --git a/sha2/src/sha512/soft.rs b/sha2/src/sha512/soft.rs index 675f1614..c5d9f1cc 100644 --- a/sha2/src/sha512/soft.rs +++ b/sha2/src/sha512/soft.rs @@ -10,12 +10,12 @@ fn sha512load(v0: [u64; 2], v1: [u64; 2]) -> [u64; 2] { pub fn sha512_schedule_x2(v0: [u64; 2], v1: [u64; 2], v4to5: [u64; 2], v7: [u64; 2]) -> [u64; 2] { // sigma 0 fn sigma0(x: u64) -> u64 { - ((x << 63) | (x >> 1)) ^ ((x << 56) | (x >> 8)) ^ (x >> 7) + (x.rotate_right(1)) ^ (x.rotate_right(8)) ^ (x >> 7) } // sigma 1 fn sigma1(x: u64) -> u64 { - ((x << 45) | (x >> 19)) ^ ((x << 3) | (x >> 61)) ^ (x >> 6) + (x.rotate_right(19)) ^ (x.rotate_left(3)) ^ (x >> 6) } let [w1, w0] = v0; @@ -105,7 +105,7 @@ fn add_rk(mut w: [u64; 2], i: usize) -> [u64; 2] { } /// Process a block with the SHA-512 algorithm. -pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) { +pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: [u64; 16]) { macro_rules! schedule { ($v0:expr, $v1:expr, $v4:expr, $v5:expr, $v7:expr) => { sha512_schedule_x2($v0, $v1, sha512load($v4, $v5), $v7) @@ -209,11 +209,7 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) { } pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { - for block in blocks { - let mut block_u32 = [0u64; 16]; - for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(8)) { - *o = u64::from_be_bytes(chunk.try_into().unwrap()); - } - sha512_digest_block_u64(state, &block_u32); + for block in blocks.iter().map(super::to_u64s) { + sha512_digest_block_u64(state, block); } }