From 27656e5c3bfd0120ded410136861ff9607a76452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 18:48:30 +0200 Subject: [PATCH 1/8] Extend _mm_madd_epi16 test to check cases with large values. --- crates/core_arch/src/x86/sse2.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 7831ea7435..bc91339138 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -3139,6 +3139,32 @@ mod tests { let r = _mm_madd_epi16(a, b); let e = _mm_setr_epi32(29, 81, 149, 233); assert_eq_m128i(r, e); + + // Test large values. + // MIN*MIN+MIN*MIN will overflow into i32::MIN. + let a = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MAX, + 0, + 0, + ); + let b = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MAX, + i16::MIN, + 0, + 0, + ); + let r = _mm_madd_epi16(a, b); + let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0); + assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] From 7f6f0eaba1d782aefd836a6170b017fa7682cd98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 18:49:55 +0200 Subject: [PATCH 2/8] Extend _mm_shuffle_epi8 test to check index wrapping --- crates/core_arch/src/x86/ssse3.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index bdc6836ac8..3bd39b6fb1 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -372,6 +372,11 @@ mod tests { let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1); let r = _mm_shuffle_epi8(a, b); assert_eq_m128i(r, expected); + + // Test indices greater than 15 wrapping around + let b = _mm_add_epi8(b, _mm_set1_epi8(32)); + let r = _mm_shuffle_epi8(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] From 0c4c76dd4a3c0a9da5302c351d560414834ceeb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 18:52:17 +0200 Subject: [PATCH 3/8] Extend SSSE3 hadd/hsub tests to check overflow behavior (wrapping or saturating) --- crates/core_arch/src/x86/ssse3.rs | 78 +++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index 3bd39b6fb1..bd6944a375 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -426,6 +426,22 @@ mod tests { let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25); let r = _mm_hadd_epi16(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hadd_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -435,6 +451,22 @@ mod tests { let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768); let r = _mm_hadds_epi16(a, b); assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hadds_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -444,6 +476,13 @@ mod tests { let expected = _mm_setr_epi32(3, 7, 132, 7); let r = _mm_hadd_epi32(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2); + let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hadd_epi32(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -453,6 +492,22 @@ mod tests { let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13); let r = _mm_hsub_epi16(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hsub_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -462,6 +517,22 @@ mod tests { let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768); let r = _mm_hsubs_epi16(a, b); assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hsubs_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] @@ -471,6 +542,13 @@ mod tests { let expected = _mm_setr_epi32(-1, -1, -124, 1); let r = _mm_hsub_epi32(a, b); assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2); + let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hsub_epi32(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] From dec68d605fb1743ecc3f2c59e6ace719852c5453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 18:54:57 +0200 Subject: [PATCH 4/8] Extend _mm_maddubs_epi16 test to check widening and saturating behavior --- crates/core_arch/src/x86/ssse3.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index bd6944a375..8fe2390aa3 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -568,6 +568,27 @@ mod tests { let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120); let r = _mm_maddubs_epi16(a, b); assert_eq_m128i(r, expected); + + // Test widening and saturation + #[rustfmt::skip] + let a = _mm_setr_epi8( + u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, + 100, 100, 0, 0, + 0, 0, 0, 0, 0, 0, + ); + #[rustfmt::skip] + let b = _mm_setr_epi8( + i8::MAX, i8::MAX, + i8::MAX, i8::MIN, + i8::MIN, i8::MIN, + 50, 15, 0, 0, 0, + 0, 0, 0, 0, 0, + ); + let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0); + let r = _mm_maddubs_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] From e73ef1f1a19c37d0d895a35d96c89ee5519ff8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 18:55:47 +0200 Subject: [PATCH 5/8] Extend _mm_mulhrs_epi16 test to check large values --- crates/core_arch/src/x86/ssse3.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index 8fe2390aa3..4957c2b1ea 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -598,6 +598,13 @@ mod tests { let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0); let r = _mm_mulhrs_epi16(a, b); assert_eq_m128i(r, expected); + + // Test extreme values + let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0); + let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0); + let r = _mm_mulhrs_epi16(a, b); + assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] From 8bc0dff97a8d98f38ea6e34c0962ad137bb5ebbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 19:16:37 +0200 Subject: [PATCH 6/8] Extend _mm_insert_ps test to check zeroing priority over copying --- crates/core_arch/src/x86/sse41.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index 6d33238b08..6f5a4fc2ec 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -1294,6 +1294,13 @@ mod tests { let r = _mm_insert_ps::<0b11_00_1100>(a, b); let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); assert_eq_m128(r, e); + + // Zeroing takes precedence over copied value + let a = _mm_set1_ps(1.0); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_insert_ps::<0b11_00_0001>(a, b); + let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0); + assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] From 0bfc8261fa69bd0d67a37bcb9ff1a01cc8d5c3d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 19:18:19 +0200 Subject: [PATCH 7/8] Extend _mm_minpos_epu16 test to check case where minimum value is repeated --- crates/core_arch/src/x86/sse41.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index 6f5a4fc2ec..6e16dfa285 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -1731,6 +1731,15 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_minpos_epu16_3() { + // Case where the minimum value is repeated + let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13); + let r = _mm_minpos_epu16(a); + let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "sse4.1")] unsafe fn test_mm_mul_epi32() { { From ee7273793e6e0884110a86abc4fa9866ff430606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Mon, 16 Oct 2023 19:31:51 +0200 Subject: [PATCH 8/8] Improve _mm_round_ss/_mm_round_sd tests * Do not use deprecated CSR access functions * Test different rounding modes --- crates/core_arch/src/x86/sse41.rs | 48 ++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index 6e16dfa285..af51a53feb 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -1689,30 +1689,58 @@ mod tests { assert_eq_m128(r, e); } - #[allow(deprecated)] // FIXME: This test uses deprecated CSR access functions #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_sd() { let a = _mm_setr_pd(1.5, 3.5); let b = _mm_setr_pd(-2.5, -4.5); - let old_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO); - let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); - _MM_SET_ROUNDING_MODE(old_mode); + let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b); + let e = _mm_setr_pd(-3.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b); + let e = _mm_setr_pd(-2.0, 3.5); + assert_eq_m128d(r, e); + + let a = _mm_setr_pd(1.5, 3.5); + let b = _mm_setr_pd(-2.5, -4.5); + let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b); let e = _mm_setr_pd(-2.0, 3.5); assert_eq_m128d(r, e); } - #[allow(deprecated)] // FIXME: This test uses deprecated CSR access functions #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_ss() { let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); - let old_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); - _MM_SET_ROUNDING_MODE(old_mode); + let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b); let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b); + let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b); + let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); + + let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); + let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); + let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b); + let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); + assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")]