Skip to content

Commit

Permalink
sha2: Add inline-asm backend for LoongArch64 targets (#507)
Browse files Browse the repository at this point in the history
  • Loading branch information
heiher authored Sep 26, 2023
1 parent 026b0e8 commit c6decdf
Show file tree
Hide file tree
Showing 8 changed files with 498 additions and 2 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/sha2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,18 @@ jobs:
- uses: msys2/setup-msys2@v2
- run: cargo test --target ${{ matrix.target }}

# Build-only test of the LoongArch64 assembly backend
loongarch64_asm:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: RustCrypto/actions/cargo-cache@master
- uses: dtolnay/rust-toolchain@master
with:
toolchain: 1.72
targets: loongarch64-unknown-linux-gnu
- run: cargo build --target loongarch64-unknown-linux-gnu --features loongarch64_asm

# Cross-compiled tests
cross:
needs: set-msrv
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions sha2/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.10.8 (2023-09-26)
### Added
- `asm!`-based backend for LoongArch64 targets gated behind `loongarch64_asm` feature [#507]

[#507]: https://github.com/RustCrypto/hashes/pull/507

## 0.10.7 (2023-06-15)
### Added
- AArch64 Neon-based backend ([#490])
Expand Down
5 changes: 4 additions & 1 deletion sha2/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sha2"
version = "0.10.7"
version = "0.10.8"
description = """
Pure Rust implementation of the SHA-2 hash function family
including SHA-224, SHA-256, SHA-384, and SHA-512.
Expand Down Expand Up @@ -31,6 +31,9 @@ default = ["std"]
std = ["digest/std"]
oid = ["digest/oid"] # Enable OID support. WARNING: Bumps MSRV to 1.57
asm = ["sha2-asm"] # WARNING: this feature SHOULD NOT be enabled by library crates
# Use assembly backend for LoongArch64 targets
# WARNING: Bumps MSRV to 1.72. This feature SHOULD NOT be enabled by library crates
loongarch64_asm = []
compress = [] # Expose compress functions
force-soft = [] # Force software implementation
asm-aarch64 = ["asm"] # DEPRECATED: use `asm` instead
Expand Down
3 changes: 3 additions & 0 deletions sha2/src/sha256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ cfg_if::cfg_if! {
mod soft;
mod aarch64;
use aarch64::compress;
} else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
mod loongarch64_asm;
use loongarch64_asm::compress;
} else {
mod soft;
use soft::compress;
Expand Down
227 changes: 227 additions & 0 deletions sha2/src/sha256/loongarch64_asm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
//! LoongArch64 assembly backend

macro_rules! c {
($($l:expr)*) => {
concat!($($l ,)*)
};
}

macro_rules! rounda {
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
c!(
"ld.w $a5, $a1, (" $i " * 4);"
"revb.2h $a5, $a5;"
"rotri.w $a5, $a5, 16;"
roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
)
};
}

macro_rules! roundb {
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
c!(
"ld.w $a4, $sp, (((" $i " - 15) & 0xF) * 4);"
"ld.w $a5, $sp, (((" $i " - 16) & 0xF) * 4);"
"ld.w $a6, $sp, (((" $i " - 7) & 0xF) * 4);"
"add.w $a5, $a5, $a6;"
"rotri.w $a6, $a4, 18;"
"srli.w $a7, $a4, 3;"
"rotri.w $a4, $a4, 7;"
"xor $a6, $a6, $a7;"
"xor $a4, $a4, $a6;"
"add.w $a5, $a5, $a4;"
"ld.w $a4, $sp, (((" $i " - 2) & 0xF) * 4);"
"rotri.w $a6, $a4, 19;"
"srli.w $a7, $a4, 10;"
"rotri.w $a4, $a4, 17;"
"xor $a6, $a6, $a7;"
"xor $a4, $a4, $a6;"
"add.w $a5, $a5, $a4;"
roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
)
};
}

macro_rules! roundtail {
($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
c!(
// Part 0
"rotri.w $a6, " $e ", 11;"
"rotri.w $a7, " $e ", 25;"
"rotri.w $a4, " $e ", 6;"
"xor $a6, $a6, $a7;"
"xor $a4, $a4, $a6;"
"xor $a6, " $g ", " $f ";"
"ld.w $a7, $a3, " $i " * 4;"
"and $a6, $a6, " $e ";"
"xor $a6, $a6, " $g ";"
"add.w $a4, $a4, $a6;"
"add.w $a4, $a4, $a7;"
"add.w " $h ", " $h ", $a5;"
"add.w " $h ", " $h ", $a4;"
// Part 1
"add.w " $d ", " $d ", " $h ";"
// Part 2
"rotri.w $a6, " $a ", 13;"
"rotri.w $a7, " $a ", 22;"
"rotri.w $a4, " $a ", 2;"
"xor $a6, $a6, $a7;"
"xor $a4, $a4, $a6;"
"add.w " $h ", " $h ", $a4;"
"or $a4, " $c ", " $b ";"
"and $a6, " $c ", " $b ";"
"and $a4, $a4, " $a ";"
"or $a4, $a4, $a6;"
"add.w " $h ", " $h ", $a4;"
"st.w $a5, $sp, ((" $i " & 0xF) * 4);"
)
};
}

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
if blocks.is_empty() {
return;
}

unsafe {
core::arch::asm!(
// Allocate scratch stack space
"addi.d $sp, $sp, -64;",

// Load state
"ld.w $t0, $a0, 0",
"ld.w $t1, $a0, 4",
"ld.w $t2, $a0, 8",
"ld.w $t3, $a0, 12",
"ld.w $t4, $a0, 16",
"ld.w $t5, $a0, 20",
"ld.w $t6, $a0, 24",
"ld.w $t7, $a0, 28",

"42:",

// Do 64 rounds of hashing
rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),

// Update state registers
"ld.w $a4, $a0, 0", // a
"ld.w $a5, $a0, 4", // b
"ld.w $a6, $a0, 8", // c
"ld.w $a7, $a0, 12", // d
"add.w $t0, $t0, $a4",
"add.w $t1, $t1, $a5",
"add.w $t2, $t2, $a6",
"add.w $t3, $t3, $a7",
"ld.w $a4, $a0, 16", // e
"ld.w $a5, $a0, 20", // f
"ld.w $a6, $a0, 24", // g
"ld.w $a7, $a0, 28", // h
"add.w $t4, $t4, $a4",
"add.w $t5, $t5, $a5",
"add.w $t6, $t6, $a6",
"add.w $t7, $t7, $a7",

// Save updated state
"st.w $t0, $a0, 0",
"st.w $t1, $a0, 4",
"st.w $t2, $a0, 8",
"st.w $t3, $a0, 12",
"st.w $t4, $a0, 16",
"st.w $t5, $a0, 20",
"st.w $t6, $a0, 24",
"st.w $t7, $a0, 28",

// Looping over blocks
"addi.d $a1, $a1, 64",
"addi.d $a2, $a2, -1",
"bnez $a2, 42b",

// Restore stack register
"addi.d $sp, $sp, 64",

in("$a0") state,
inout("$a1") blocks.as_ptr() => _,
inout("$a2") blocks.len() => _,
in("$a3") crate::consts::K32.as_ptr(),

// Clobbers
out("$a4") _,
out("$a5") _,
out("$a6") _,
out("$a7") _,
out("$t0") _,
out("$t1") _,
out("$t2") _,
out("$t3") _,
out("$t4") _,
out("$t5") _,
out("$t6") _,
out("$t7") _,

options(preserves_flags),
);
}
}
3 changes: 3 additions & 0 deletions sha2/src/sha512.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ cfg_if::cfg_if! {
mod soft;
mod aarch64;
use aarch64::compress;
} else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
mod loongarch64_asm;
use loongarch64_asm::compress;
} else {
mod soft;
use soft::compress;
Expand Down
Loading

0 comments on commit c6decdf

Please sign in to comment.