From 0bf4f0f4f351233106c76b9e871acbc7bbdd54e0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 29 May 2024 02:56:59 -0500 Subject: [PATCH 1/4] Add an apfloat fallback for int to float tests --- testcrate/tests/conv.rs | 129 ++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 52 deletions(-) diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 24f3a04a..60968038 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -12,60 +12,82 @@ mod int_to_float { use super::*; macro_rules! i_to_f { - ($($from:ty, $into:ty, $fn:ident);*;) => { + ($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => { $( #[test] fn $fn() { use compiler_builtins::float::conv::$fn; use compiler_builtins::int::Int; - fuzz(N, |x: $from| { - let f0 = x as $into; - let f1: $into = $fn(x); - // This makes sure that the conversion produced the best rounding possible, and does - // this independent of `x as $into` rounding correctly. - // This assumes that float to integer conversion is correct. - let y_minus_ulp = <$into>::from_bits(f1.to_bits().wrapping_sub(1)) as $from; - let y = f1 as $from; - let y_plus_ulp = <$into>::from_bits(f1.to_bits().wrapping_add(1)) as $from; - let error_minus = <$from as Int>::abs_diff(y_minus_ulp, x); - let error = <$from as Int>::abs_diff(y, x); - let error_plus = <$from as Int>::abs_diff(y_plus_ulp, x); - // The first two conditions check that none of the two closest float values are - // strictly closer in representation to `x`. The second makes sure that rounding is - // towards even significand if two float values are equally close to the integer. - if error_minus < error - || error_plus < error - || ((error_minus == error || error_plus == error) - && ((f0.to_bits() & 1) != 0)) - { - if !cfg!(any( - target_arch = "powerpc", - target_arch = "powerpc64" - )) { - panic!( - "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", - stringify!($fn), - x, - f1.to_bits(), - y_minus_ulp, - y, - y_plus_ulp, - error_minus, - error, - error_plus, - ); + fuzz(N, |x: $i_ty| { + let f0 = apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, + |x| x as $f_ty; + // When the builtin is not available, we need to use a different conversion + // method (since apfloat doesn't support `as` casting). + |x: $i_ty| { + use compiler_builtins::int::MinInt; + + let apf = if <$i_ty>::SIGNED { + FloatTy::from_i128(x.try_into().unwrap()).value + } else { + FloatTy::from_u128(x.try_into().unwrap()).value + }; + + <$f_ty>::from_bits(apf.to_bits()) + }, + x + ); + let f1: $f_ty = $fn(x); + + #[cfg($sys_available)] { + // This makes sure that the conversion produced the best rounding possible, and does + // this independent of `x as $into` rounding correctly. + // This assumes that float to integer conversion is correct. + let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty; + let y = f1 as $i_ty; + let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty; + let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x); + let error = <$i_ty as Int>::abs_diff(y, x); + let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x); + + // The first two conditions check that none of the two closest float values are + // strictly closer in representation to `x`. The second makes sure that rounding is + // towards even significand if two float values are equally close to the integer. + if error_minus < error + || error_plus < error + || ((error_minus == error || error_plus == error) + && ((f0.to_bits() & 1) != 0)) + { + if !cfg!(any( + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); + } } } + // Test against native conversion. We disable testing on all `x86` because of // rounding bugs with `i686`. `powerpc` also has the same rounding bug. - if f0 != f1 && !cfg!(any( + if !Float::eq_repr(f0, f1) && !cfg!(any( target_arch = "x86", target_arch = "powerpc", target_arch = "powerpc64" )) { panic!( - "{}({}): std: {}, builtins: {}", + "{}({}): std: {:?}, builtins: {:?}", stringify!($fn), x, f0, @@ -78,19 +100,22 @@ mod int_to_float { }; } - i_to_f! { - u32, f32, __floatunsisf; - u32, f64, __floatunsidf; - i32, f32, __floatsisf; - i32, f64, __floatsidf; - u64, f32, __floatundisf; - u64, f64, __floatundidf; - i64, f32, __floatdisf; - i64, f64, __floatdidf; - u128, f32, __floatuntisf; - u128, f64, __floatuntidf; - i128, f32, __floattisf; - i128, f64, __floattidf; + i_to_f! { f32, Single, all(), + u32, __floatunsisf; + i32, __floatsisf; + u64, __floatundisf; + i64, __floatdisf; + u128, __floatuntisf; + i128, __floattisf; + } + + i_to_f! { f64, Double, all(), + u32, __floatunsidf; + i32, __floatsidf; + u64, __floatundidf; + i64, __floatdidf; + u128, __floatuntidf; + i128, __floattidf; } } From 84021e790b884e5eecc6f1b470b3e116ef33ff5c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 13 Jun 2024 06:50:44 -0500 Subject: [PATCH 2/4] Refactor integer to float conversion Extract some common routines to separate functions in order to deduplicate code and remove some of the magic. --- src/float/conv.rs | 174 +++++++++++++++++++++++++++++----------- src/int/mod.rs | 10 ++- testcrate/tests/conv.rs | 2 +- 3 files changed, 138 insertions(+), 48 deletions(-) diff --git a/src/float/conv.rs b/src/float/conv.rs index e86fee6d..da87b3ca 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -6,21 +6,91 @@ use super::Float; /// Conversions from integers to floats. /// -/// These are hand-optimized bit twiddling code, -/// which unfortunately isn't the easiest kind of code to read. +/// The algorithm is explained here: . It roughly does the following: +/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a +/// mantissa _with the implicit bit set_! +/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some +/// patterns are used to simplify this. Adjust the mantissa with the result if needed. +/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one. +/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one +/// from the exponent (above) accounts for the explicit bit being set in the mantissa. /// -/// The algorithm is explained here: +/// # Terminology +/// +/// - `i`: the original integer +/// - `i_m`: the integer, shifted fully left (no leading zeros) +/// - `n`: number of leading zeroes +/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit. +/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set. +/// - `adj`: the bits that will be truncated, possibly compressed in some way. +/// - `m`: the resulting mantissa. Implicit bit is usually set. mod int_to_float { + use super::*; + + /// Calculate the exponent from the number of leading zeros. + /// + /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit + /// bit set can be added back later. + fn exp>>(n: u32) -> F::Int { + F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n) + } + + /// Adjust a mantissa with dropped bits to perform correct rounding. + /// + /// The dropped bits should be exactly the bits that get truncated (left-aligned), but they + /// can be combined or compressed in some way that simplifies operations. + fn m_adj(m_base: F::Int, dropped_bits: F::Int) -> F::Int { + // Branchlessly extract a `1` if rounding up should happen, 0 otherwise + // This accounts for rounding to even. + let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1); + + // Add one when we need to round up. Break ties to even. + m_base + adj + } + + /// Shift the exponent to its position and add the mantissa. + /// + /// If the mantissa has the implicit bit set, the exponent should be one less than its actual + /// value to cancel it out. + fn repr(e: F::Int, m: F::Int) -> F::Int { + // + rather than | so the mantissa can overflow into the exponent + (e << F::SIGNIFICAND_BITS) + m + } + + /// Shift distance from a left-aligned integer to a smaller float. + fn shift_f_lt_i() -> u32 { + (I::BITS - F::BITS) + F::EXPONENT_BITS + } + + /// Shift distance from an integer with `n` leading zeros to a smaller float. + fn shift_f_gt_i(n: u32) -> u32 { + F::SIGNIFICAND_BITS - I::BITS + 1 + n + } + + /// Perform a signed operation as unsigned, then add the sign back. + pub fn signed(i: I, conv: Conv) -> F + where + F: Float, + I: Int, + F::Int: CastFrom, + Conv: Fn(I::UnsignedInt) -> F::Int, + { + let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1); + F::from_bits(conv(i.unsigned_abs()) | sign_bit) + } + pub fn u32_to_f32_bits(i: u32) -> u32 { if i == 0 { return 0; } let n = i.leading_zeros(); - let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact. - let b = (i << n) << 24; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. - let e = 157 - n; // Exponent plus 127, minus one. - (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + // Mantissa with implicit bit set (significant bits) + let m_base = (i << n) >> f32::EXPONENT_BITS; + // Bits that will be dropped (insignificant bits) + let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) } pub fn u32_to_f64_bits(i: u32) -> u64 { @@ -28,19 +98,23 @@ mod int_to_float { return 0; } let n = i.leading_zeros(); - let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact. - let e = 1053 - n as u64; // Exponent plus 1023, minus one. - (e << 52) + m // Bit 53 of m will overflow into e. + // Mantissa with implicit bit set + let m = (i as u64) << shift_f_gt_i::(n); + let e = exp::(n) - 1; + repr::(e, m) } pub fn u64_to_f32_bits(i: u64) -> u32 { let n = i.leading_zeros(); - let y = i.wrapping_shl(n); - let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact. - let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. - let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero. - (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + let i_m = i.wrapping_shl(n); + // Mantissa with implicit bit set + let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; + // The entire lower half of `i` will be truncated (masked portion), plus the + // next `EXPONENT_BITS` bits. + let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32; + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) } pub fn u64_to_f64_bits(i: u64) -> u64 { @@ -48,31 +122,45 @@ mod int_to_float { return 0; } let n = i.leading_zeros(); - let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact. - let b = (i << n) << 53; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. - let e = 1085 - n as u64; // Exponent plus 1023, minus one. - (e << 52) + m // + not |, so the mantissa can overflow into the exponent. + // Mantissa with implicit bit set + let m_base = (i << n) >> f64::EXPONENT_BITS; + let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) } pub fn u128_to_f32_bits(i: u128) -> u32 { let n = i.leading_zeros(); - let y = i.wrapping_shl(n); - let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact. - let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. - let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero. - (e << 23) + m // + not |, so the mantissa can overflow into the exponent. + let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero + let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; + + // Within the upper `F::BITS`, everything except for the signifcand + // gets truncated + let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast(); + + // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just + // check if it is nonzero. + let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into(); + let adj = d1 | d2; + + // Mantissa with implicit bit set + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) } pub fn u128_to_f64_bits(i: u128) -> u64 { let n = i.leading_zeros(); - let y = i.wrapping_shl(n); - let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact. - let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding. - let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. - let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero. - (e << 52) + m // + not |, so the mantissa can overflow into the exponent. + let i_m = i.wrapping_shl(n); + // Mantissa with implicit bit set + let m_base: u64 = (i_m >> shift_f_lt_i::()) as u64; + // The entire lower half of `i` will be truncated (masked portion), plus the + // next `EXPONENT_BITS` bits. + let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64; + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) } } @@ -113,38 +201,32 @@ intrinsics! { intrinsics! { #[arm_aeabi_alias = __aeabi_i2f] pub extern "C" fn __floatsisf(i: i32) -> f32 { - let sign_bit = ((i >> 31) as u32) << 31; - f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u32_to_f32_bits) } #[arm_aeabi_alias = __aeabi_i2d] pub extern "C" fn __floatsidf(i: i32) -> f64 { - let sign_bit = ((i >> 31) as u64) << 63; - f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u32_to_f64_bits) } #[arm_aeabi_alias = __aeabi_l2f] pub extern "C" fn __floatdisf(i: i64) -> f32 { - let sign_bit = ((i >> 63) as u32) << 31; - f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u64_to_f32_bits) } #[arm_aeabi_alias = __aeabi_l2d] pub extern "C" fn __floatdidf(i: i64) -> f64 { - let sign_bit = ((i >> 63) as u64) << 63; - f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u64_to_f64_bits) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattisf(i: i128) -> f32 { - let sign_bit = ((i >> 127) as u32) << 31; - f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u128_to_f32_bits) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattidf(i: i128) -> f64 { - let sign_bit = ((i >> 127) as u64) << 63; - f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit) + int_to_float::signed(i, int_to_float::u128_to_f64_bits) } } diff --git a/src/int/mod.rs b/src/int/mod.rs index e6f31c53..0d3b0ce4 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt fn unsigned(self) -> Self::UnsignedInt; fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + fn unsigned_abs(self) -> Self::UnsignedInt; fn from_bool(b: bool) -> Self; @@ -178,7 +179,6 @@ macro_rules! int_impl_common { fn wrapping_mul(self, other: Self) -> Self { ::wrapping_mul(self, other) } - fn wrapping_sub(self, other: Self) -> Self { ::wrapping_sub(self, other) } @@ -235,6 +235,10 @@ macro_rules! int_impl { me } + fn unsigned_abs(self) -> Self { + self + } + fn abs_diff(self, other: Self) -> Self { if self < other { other.wrapping_sub(self) @@ -268,6 +272,10 @@ macro_rules! int_impl { me as $ity } + fn unsigned_abs(self) -> Self::UnsignedInt { + self.unsigned_abs() + } + fn abs_diff(self, other: Self) -> $uty { self.wrapping_sub(other).wrapping_abs() as $uty } diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 60968038..01cc588c 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -8,7 +8,7 @@ use compiler_builtins::float::Float; use rustc_apfloat::{Float as _, FloatConvert as _}; use testcrate::*; -mod int_to_float { +mod i_to_f { use super::*; macro_rules! i_to_f { From b639224c49a6cfd2bc092732e6d173870a5a9f30 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 13 Jun 2024 06:52:46 -0500 Subject: [PATCH 3/4] Add integer to `f128` conversions --- README.md | 12 +- build.rs | 18 +-- examples/intrinsics.rs | 62 ++++++++- src/float/conv.rs | 80 ++++++++++++ testcrate/benches/float_conv.rs | 222 ++++++++++++++++++++++++++------ testcrate/tests/conv.rs | 22 ++++ 6 files changed, 348 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index f792d188..a2b38cce 100644 --- a/README.md +++ b/README.md @@ -233,12 +233,12 @@ of being added to Rust. - [x] fixunstfdi.c - [x] fixunstfsi.c - [x] fixunstfti.c -- [ ] floatditf.c -- [ ] floatsitf.c -- [ ] floattitf.c -- [ ] floatunditf.c -- [ ] floatunsitf.c -- [ ] floatuntitf.c +- [x] floatditf.c +- [x] floatsitf.c +- [x] floattitf.c +- [x] floatunditf.c +- [x] floatunsitf.c +- [x] floatuntitf.c - [x] multf3.c - [x] powitf2.c - [x] subtf3.c diff --git a/build.rs b/build.rs index 2863c979..22ec9e4d 100644 --- a/build.rs +++ b/build.rs @@ -532,10 +532,6 @@ mod c { if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), - ("__floatditf", "floatditf.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunditf", "floatunditf.c"), - ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), ]); @@ -550,21 +546,11 @@ mod c { } if target.arch == "mips64" { - sources.extend(&[ - ("__netf2", "comparetf2.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunsitf", "floatunsitf.c"), - ("__fe_getround", "fp_mode.c"), - ]); + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); } if target.arch == "loongarch64" { - sources.extend(&[ - ("__netf2", "comparetf2.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunsitf", "floatunsitf.c"), - ("__fe_getround", "fp_mode.c"), - ]); + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); } // Remove the assembly implementations that won't compile for the target diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 06d77233..368da6af 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -264,14 +264,18 @@ mod intrinsics { /* i32 operations */ + // floatsisf + pub fn aeabi_i2f(x: i32) -> f32 { + x as f32 + } + // floatsidf pub fn aeabi_i2d(x: i32) -> f64 { x as f64 } - // floatsisf - pub fn aeabi_i2f(x: i32) -> f32 { - x as f32 + pub fn floatsitf(x: i32) -> f128 { + x as f128 } pub fn aeabi_idiv(a: i32, b: i32) -> i32 { @@ -294,6 +298,10 @@ mod intrinsics { x as f64 } + pub fn floatditf(x: i64) -> f128 { + x as f128 + } + pub fn mulodi4(a: i64, b: i64) -> i64 { a * b } @@ -314,6 +322,18 @@ mod intrinsics { /* i128 operations */ + pub fn floattisf(x: i128) -> f32 { + x as f32 + } + + pub fn floattidf(x: i128) -> f64 { + x as f64 + } + + pub fn floattitf(x: i128) -> f128 { + x as f128 + } + pub fn lshrti3(a: i128, b: usize) -> i128 { a >> b } @@ -328,14 +348,18 @@ mod intrinsics { /* u32 operations */ + // floatunsisf + pub fn aeabi_ui2f(x: u32) -> f32 { + x as f32 + } + // floatunsidf pub fn aeabi_ui2d(x: u32) -> f64 { x as f64 } - // floatunsisf - pub fn aeabi_ui2f(x: u32) -> f32 { - x as f32 + pub fn floatunsitf(x: u32) -> f128 { + x as f128 } pub fn aeabi_uidiv(a: u32, b: u32) -> u32 { @@ -358,6 +382,10 @@ mod intrinsics { x as f64 } + pub fn floatunditf(x: u64) -> f128 { + x as f128 + } + // udivdi3 pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { a * b @@ -369,6 +397,18 @@ mod intrinsics { /* u128 operations */ + pub fn floatuntisf(x: u128) -> f32 { + x as f32 + } + + pub fn floatuntidf(x: u128) -> f64 { + x as f64 + } + + pub fn floatuntitf(x: u128) -> f128 { + x as f128 + } + pub fn muloti4(a: u128, b: u128) -> Option { a.checked_mul(b) } @@ -466,6 +506,16 @@ fn run() { bb(fixunstfsi(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(fixunstfti(bb(2.))); + bb(floatditf(bb(2))); + bb(floatsitf(bb(2))); + bb(floattidf(bb(2))); + bb(floattisf(bb(2))); + bb(floattitf(bb(2))); + bb(floatunditf(bb(2))); + bb(floatunsitf(bb(2))); + bb(floatuntidf(bb(2))); + bb(floatuntisf(bb(2))); + bb(floatuntitf(bb(2))); bb(gttf(bb(2.), bb(2.))); bb(lshrti3(bb(2), bb(2))); bb(lttf(bb(2.), bb(2.))); diff --git a/src/float/conv.rs b/src/float/conv.rs index da87b3ca..4aea67c9 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -104,6 +104,24 @@ mod int_to_float { repr::(e, m) } + #[cfg(f128_enabled)] + pub fn u32_to_f128_bits(i: u32) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + + // Shift into mantissa position that is correct for the type, but shifted into the lower + // 64 bits over so can can avoid 128-bit math. + let m = (i as u64) << (shift_f_gt_i::(n) - 64); + let e = exp::(n) as u64 - 1; + // High 64 bits of f128 representation. + let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m; + + // Shift back to the high bits, the rest of the mantissa will always be 0. + (h as u128) << 64 + } + pub fn u64_to_f32_bits(i: u64) -> u32 { let n = i.leading_zeros(); let i_m = i.wrapping_shl(n); @@ -130,6 +148,18 @@ mod int_to_float { repr::(e, m) } + #[cfg(f128_enabled)] + pub fn u64_to_f128_bits(i: u64) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m = (i as u128) << shift_f_gt_i::(n); + let e = exp::(n) - 1; + repr::(e, m) + } + pub fn u128_to_f32_bits(i: u128) -> u32 { let n = i.leading_zeros(); let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero @@ -162,6 +192,20 @@ mod int_to_float { let e = if i == 0 { 0 } else { exp::(n) - 1 }; repr::(e, m) } + + #[cfg(f128_enabled)] + pub fn u128_to_f128_bits(i: u128) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m_base = (i << n) >> f128::EXPONENT_BITS; + let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) + } } // Conversions from unsigned integers to floats. @@ -195,6 +239,24 @@ intrinsics! { pub extern "C" fn __floatuntidf(i: u128) -> f64 { f64::from_bits(int_to_float::u128_to_f64_bits(i)) } + + #[ppc_alias = __floatunsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunsitf(i: u32) -> f128 { + f128::from_bits(int_to_float::u32_to_f128_bits(i)) + } + + #[ppc_alias = __floatundikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunditf(i: u64) -> f128 { + f128::from_bits(int_to_float::u64_to_f128_bits(i)) + } + + #[ppc_alias = __floatuntikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatuntitf(i: u128) -> f128 { + f128::from_bits(int_to_float::u128_to_f128_bits(i)) + } } // Conversions from signed integers to floats. @@ -228,6 +290,24 @@ intrinsics! { pub extern "C" fn __floattidf(i: i128) -> f64 { int_to_float::signed(i, int_to_float::u128_to_f64_bits) } + + #[ppc_alias = __floatsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatsitf(i: i32) -> f128 { + int_to_float::signed(i, int_to_float::u32_to_f128_bits) + } + + #[ppc_alias = __floatdikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatditf(i: i64) -> f128 { + int_to_float::signed(i, int_to_float::u64_to_f128_bits) + } + + #[ppc_alias = __floattikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floattitf(i: i128) -> f128 { + int_to_float::signed(i, int_to_float::u128_to_f128_bits) + } } /// Generic float to unsigned int conversions. diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs index de2043b0..0625a1ae 100644 --- a/testcrate/benches/float_conv.rs +++ b/testcrate/benches/float_conv.rs @@ -1,7 +1,8 @@ #![allow(improper_ctypes)] +#![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::conv; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; /* unsigned int -> float */ @@ -76,6 +77,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u32_f128, + sig: (a: u32) -> f128, + crate_fn: conv::__floatunsitf, + crate_fn_ppc: conv::__floatunsikf, + sys_fn: __floatunsitf, + sys_fn_ppc: __floatunsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_u64_f32, sig: (a: u64) -> f32, @@ -118,6 +131,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u64_f128, + sig: (a: u64) -> f128, + crate_fn: conv::__floatunditf, + crate_fn_ppc: conv::__floatundikf, + sys_fn: __floatunditf, + sys_fn_ppc: __floatundikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_u128_f32, sig: (a: u128) -> f32, @@ -136,6 +161,18 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u128_f128, + sig: (a: u128) -> f128, + crate_fn: conv::__floatuntitf, + crate_fn_ppc: conv::__floatuntikf, + sys_fn: __floatuntitf, + sys_fn_ppc: __floatuntikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* signed int -> float */ float_bench! { @@ -205,6 +242,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i32_f128, + sig: (a: i32) -> f128, + crate_fn: conv::__floatsitf, + crate_fn_ppc: conv::__floatsikf, + sys_fn: __floatsitf, + sys_fn_ppc: __floatsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_i64_f32, sig: (a: i64) -> f32, @@ -272,6 +321,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i64_f128, + sig: (a: i64) -> f128, + crate_fn: conv::__floatditf, + crate_fn_ppc: conv::__floatdikf, + sys_fn: __floatditf, + sys_fn_ppc: __floatdikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_i128_f32, sig: (a: i128) -> f32, @@ -290,6 +351,18 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i128_f128, + sig: (a: i128) -> f128, + crate_fn: conv::__floattitf, + crate_fn_ppc: conv::__floattikf, + sys_fn: __floattitf, + sys_fn_ppc: __floattikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* float -> unsigned int */ #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] @@ -397,6 +470,39 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u32, + sig: (a: f128) -> u32, + crate_fn: conv::__fixunstfsi, + crate_fn_ppc: conv::__fixunskfsi, + sys_fn: __fixunstfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u64, + sig: (a: f128) -> u64, + crate_fn: conv::__fixunstfdi, + crate_fn_ppc: conv::__fixunskfdi, + sys_fn: __fixunstfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u128, + sig: (a: f128) -> u128, + crate_fn: conv::__fixunstfti, + crate_fn_ppc: conv::__fixunskfti, + sys_fn: __fixunstfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* float -> signed int */ #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] @@ -504,43 +610,79 @@ float_bench! { asm: [] } -criterion_group!( - float_conv, - conv_u32_f32, - conv_u32_f64, - conv_u64_f32, - conv_u64_f64, - conv_u128_f32, - conv_u128_f64, - conv_i32_f32, - conv_i32_f64, - conv_i64_f32, - conv_i64_f64, - conv_i128_f32, - conv_i128_f64, - conv_f64_u32, - conv_f64_u64, - conv_f64_u128, - conv_f64_i32, - conv_f64_i64, - conv_f64_i128, -); - -// FIXME: ppc64le has a sporadic overflow panic in the crate functions -// -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] -criterion_group!( - float_conv_not_ppc64le, - conv_f32_u32, - conv_f32_u64, - conv_f32_u128, - conv_f32_i32, - conv_f32_i64, - conv_f32_i128, -); - -#[cfg(all(target_arch = "powerpc64", target_endian = "little"))] -criterion_main!(float_conv); +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i32, + sig: (a: f128) -> i32, + crate_fn: conv::__fixtfsi, + crate_fn_ppc: conv::__fixkfsi, + sys_fn: __fixtfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] -criterion_main!(float_conv, float_conv_not_ppc64le); +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i64, + sig: (a: f128) -> i64, + crate_fn: conv::__fixtfdi, + crate_fn_ppc: conv::__fixkfdi, + sys_fn: __fixtfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i128, + sig: (a: f128) -> i128, + crate_fn: conv::__fixtfti, + crate_fn_ppc: conv::__fixkfti, + sys_fn: __fixtfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +pub fn float_conv() { + let mut criterion = Criterion::default().configure_from_args(); + + conv_u32_f32(&mut criterion); + conv_u32_f64(&mut criterion); + conv_u64_f32(&mut criterion); + conv_u64_f64(&mut criterion); + conv_u128_f32(&mut criterion); + conv_u128_f64(&mut criterion); + conv_i32_f32(&mut criterion); + conv_i32_f64(&mut criterion); + conv_i64_f32(&mut criterion); + conv_i64_f64(&mut criterion); + conv_i128_f32(&mut criterion); + conv_i128_f64(&mut criterion); + conv_f64_u32(&mut criterion); + conv_f64_u64(&mut criterion); + conv_f64_u128(&mut criterion); + conv_f64_i32(&mut criterion); + conv_f64_i64(&mut criterion); + conv_f64_i128(&mut criterion); + + #[cfg(all(f128_enabled))] + // FIXME: ppc64le has a sporadic overflow panic in the crate functions + // + #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] + { + conv_u32_f128(&mut criterion); + conv_u64_f128(&mut criterion); + conv_u128_f128(&mut criterion); + conv_i32_f128(&mut criterion); + conv_i64_f128(&mut criterion); + conv_i128_f128(&mut criterion); + conv_f128_u32(&mut criterion); + conv_f128_u64(&mut criterion); + conv_f128_u128(&mut criterion); + conv_f128_i32(&mut criterion); + conv_f128_i64(&mut criterion); + conv_f128_i128(&mut criterion); + } +} + +criterion_main!(float_conv); diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 01cc588c..a08748af 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -117,6 +117,28 @@ mod i_to_f { u128, __floatuntidf; i128, __floattidf; } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsitf; + i32, __floatsitf; + u64, __floatunditf; + i64, __floatditf; + u128, __floatuntitf; + i128, __floattitf; + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsikf; + i32, __floatsikf; + u64, __floatundikf; + i64, __floatdikf; + u128, __floatuntikf; + i128, __floattikf; + } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 From 02e939b0c94977090d1302f25eb95dd5e4f119cc Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 30 Oct 2024 12:27:59 -0500 Subject: [PATCH 4/4] Remove the unneeded `isqrt` feature gate [1] has been stabilized so we no longer need to enable it. [1]: https://github.com/rust-lang/rust/issues/116226 --- testcrate/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 58419bf1..4154e0fb 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -15,7 +15,6 @@ #![no_std] #![cfg_attr(f128_enabled, feature(f128))] #![cfg_attr(f16_enabled, feature(f16))] -#![feature(isqrt)] pub mod bench; extern crate alloc;