Skip to content

Commit

Permalink
md4: Optimize compress to improve hash performance (#519)
Browse files Browse the repository at this point in the history
  • Loading branch information
CausingBrick authored Nov 18, 2023
1 parent 70a2b62 commit 9f66cb8
Showing 1 changed file with 52 additions and 55 deletions.
107 changes: 52 additions & 55 deletions md4/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@
)]
#![forbid(unsafe_code)]
#![warn(rust_2018_idioms)]
#![allow(clippy::many_single_char_names)]

pub use digest::{self, Digest};

use core::{convert::TryInto, fmt};
use core::{convert::TryInto, fmt, num::Wrapping as W};
#[cfg(feature = "oid")]
use digest::const_oid::{AssociatedOid, ObjectIdentifier};
use digest::{
Expand All @@ -47,10 +46,20 @@ use digest::{
HashMarker, Output,
};

type Wu32 = W<u32>;
const S0: [Wu32; 4] = [
W(0x6745_2301),
W(0xEFCD_AB89),
W(0x98BA_DCFE),
W(0x1032_5476),
];
const K1: Wu32 = W(0x5A82_7999);
const K2: Wu32 = W(0x6ED9_EBA1);

#[derive(Clone)]
pub struct Md4Core {
block_len: u64,
state: [u32; 4],
block_len: W<u64>,
state: [Wu32; 4],
}

impl HashMarker for Md4Core {}
Expand All @@ -70,7 +79,7 @@ impl OutputSizeUser for Md4Core {
impl UpdateCore for Md4Core {
#[inline]
fn update_blocks(&mut self, blocks: &[Block<Self>]) {
self.block_len = self.block_len.wrapping_add(blocks.len() as u64);
self.block_len += W(blocks.len() as u64);
for block in blocks {
compress(&mut self.state, block);
}
Expand All @@ -80,27 +89,25 @@ impl UpdateCore for Md4Core {
impl FixedOutputCore for Md4Core {
#[inline]
fn finalize_fixed_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) {
let bit_len = self
.block_len
.wrapping_mul(Self::BlockSize::U64)
.wrapping_add(buffer.get_pos() as u64)
.wrapping_mul(8);
let tail_len = W(buffer.get_pos() as u64);
let bytes_len = W(Self::BlockSize::U64) * self.block_len + tail_len;
let bits_len = W(8) * bytes_len;

let mut state = self.state;
buffer.len64_padding_le(bit_len, |block| compress(&mut state, block));
buffer.len64_padding_le(bits_len.0, |block| compress(&mut state, block));

for (chunk, v) in out.chunks_exact_mut(4).zip(state.iter()) {
chunk.copy_from_slice(&v.to_le_bytes());
chunk.copy_from_slice(&v.0.to_le_bytes());
}
}
}

impl Default for Md4Core {
#[inline]
fn default() -> Self {
let state = [0x6745_2301, 0xEFCD_AB89, 0x98BA_DCFE, 0x1032_5476];
Self {
state,
block_len: 0,
state: S0,
block_len: W(0),
}
}
}
Expand Down Expand Up @@ -133,35 +140,25 @@ impl AssociatedOid for Md4Core {
/// MD4 hasher state.
pub type Md4 = CoreWrapper<Md4Core>;

fn compress(state: &mut [u32; 4], input: &Block<Md4Core>) {
fn f(x: u32, y: u32, z: u32) -> u32 {
(x & y) | (!x & z)
fn compress(state: &mut [Wu32; 4], input: &Block<Md4Core>) {
fn f(x: Wu32, y: Wu32, z: Wu32) -> Wu32 {
z ^ (x & (y ^ z))
}

fn g(x: u32, y: u32, z: u32) -> u32 {
fn g(x: Wu32, y: Wu32, z: Wu32) -> Wu32 {
(x & y) | (x & z) | (y & z)
}

fn h(x: u32, y: u32, z: u32) -> u32 {
fn h(x: Wu32, y: Wu32, z: Wu32) -> Wu32 {
x ^ y ^ z
}

fn op1(a: u32, b: u32, c: u32, d: u32, k: u32, s: u32) -> u32 {
a.wrapping_add(f(b, c, d)).wrapping_add(k).rotate_left(s)
}

fn op2(a: u32, b: u32, c: u32, d: u32, k: u32, s: u32) -> u32 {
a.wrapping_add(g(b, c, d))
.wrapping_add(k)
.wrapping_add(0x5A82_7999)
.rotate_left(s)
}

fn op3(a: u32, b: u32, c: u32, d: u32, k: u32, s: u32) -> u32 {
a.wrapping_add(h(b, c, d))
.wrapping_add(k)
.wrapping_add(0x6ED9_EBA1)
.rotate_left(s)
fn op<F>(f: F, a: Wu32, b: Wu32, c: Wu32, d: Wu32, k: Wu32, s: u32) -> Wu32
where
F: Fn(Wu32, Wu32, Wu32) -> Wu32,
{
let t = a + f(b, c, d) + k;
W(t.0.rotate_left(s))
}

let mut a = state[0];
Expand All @@ -170,37 +167,37 @@ fn compress(state: &mut [u32; 4], input: &Block<Md4Core>) {
let mut d = state[3];

// load block to data
let mut data = [0u32; 16];
let mut data = [W(0u32); 16];
for (o, chunk) in data.iter_mut().zip(input.chunks_exact(4)) {
*o = u32::from_le_bytes(chunk.try_into().unwrap());
*o = W(u32::from_le_bytes(chunk.try_into().unwrap()));
}

// round 1
for &i in &[0, 4, 8, 12] {
a = op1(a, b, c, d, data[i], 3);
d = op1(d, a, b, c, data[i + 1], 7);
c = op1(c, d, a, b, data[i + 2], 11);
b = op1(b, c, d, a, data[i + 3], 19);
a = op(f, a, b, c, d, data[i], 3);
d = op(f, d, a, b, c, data[i + 1], 7);
c = op(f, c, d, a, b, data[i + 2], 11);
b = op(f, b, c, d, a, data[i + 3], 19);
}

// round 2
for i in 0..4 {
a = op2(a, b, c, d, data[i], 3);
d = op2(d, a, b, c, data[i + 4], 5);
c = op2(c, d, a, b, data[i + 8], 9);
b = op2(b, c, d, a, data[i + 12], 13);
for &i in &[0, 1, 2, 3] {
a = op(g, a, b, c, d, data[i] + K1, 3);
d = op(g, d, a, b, c, data[i + 4] + K1, 5);
c = op(g, c, d, a, b, data[i + 8] + K1, 9);
b = op(g, b, c, d, a, data[i + 12] + K1, 13);
}

// round 3
for &i in &[0, 2, 1, 3] {
a = op3(a, b, c, d, data[i], 3);
d = op3(d, a, b, c, data[i + 8], 9);
c = op3(c, d, a, b, data[i + 4], 11);
b = op3(b, c, d, a, data[i + 12], 15);
a = op(h, a, b, c, d, data[i] + K2, 3);
d = op(h, d, a, b, c, data[i + 8] + K2, 9);
c = op(h, c, d, a, b, data[i + 4] + K2, 11);
b = op(h, b, c, d, a, data[i + 12] + K2, 15);
}

state[0] = state[0].wrapping_add(a);
state[1] = state[1].wrapping_add(b);
state[2] = state[2].wrapping_add(c);
state[3] = state[3].wrapping_add(d);
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
}

0 comments on commit 9f66cb8

Please sign in to comment.