From 0bf4f0f4f351233106c76b9e871acbc7bbdd54e0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 29 May 2024 02:56:59 -0500
Subject: [PATCH 1/4] Add an apfloat fallback for int to float tests

---
 testcrate/tests/conv.rs | 129 ++++++++++++++++++++++++----------------
 1 file changed, 77 insertions(+), 52 deletions(-)

diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs
index 24f3a04a..60968038 100644
--- a/testcrate/tests/conv.rs
+++ b/testcrate/tests/conv.rs
@@ -12,60 +12,82 @@ mod int_to_float {
     use super::*;
 
     macro_rules! i_to_f {
-        ($($from:ty, $into:ty, $fn:ident);*;) => {
+        ($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => {
             $(
                 #[test]
                 fn $fn() {
                     use compiler_builtins::float::conv::$fn;
                     use compiler_builtins::int::Int;
 
-                    fuzz(N, |x: $from| {
-                        let f0 = x as $into;
-                        let f1: $into = $fn(x);
-                        // This makes sure that the conversion produced the best rounding possible, and does
-                        // this independent of `x as $into` rounding correctly.
-                        // This assumes that float to integer conversion is correct.
-                        let y_minus_ulp = <$into>::from_bits(f1.to_bits().wrapping_sub(1)) as $from;
-                        let y = f1 as $from;
-                        let y_plus_ulp = <$into>::from_bits(f1.to_bits().wrapping_add(1)) as $from;
-                        let error_minus = <$from as Int>::abs_diff(y_minus_ulp, x);
-                        let error = <$from as Int>::abs_diff(y, x);
-                        let error_plus = <$from as Int>::abs_diff(y_plus_ulp, x);
-                        // The first two conditions check that none of the two closest float values are
-                        // strictly closer in representation to `x`. The second makes sure that rounding is
-                        // towards even significand if two float values are equally close to the integer.
-                        if error_minus < error
-                            || error_plus < error
-                            || ((error_minus == error || error_plus == error)
-                                && ((f0.to_bits() & 1) != 0))
-                        {
-                            if !cfg!(any(
-                                target_arch = "powerpc",
-                                target_arch = "powerpc64"
-                            )) {
-                                panic!(
-                                    "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
-                                    stringify!($fn),
-                                    x,
-                                    f1.to_bits(),
-                                    y_minus_ulp,
-                                    y,
-                                    y_plus_ulp,
-                                    error_minus,
-                                    error,
-                                    error_plus,
-                                );
+                    fuzz(N, |x: $i_ty| {
+                        let f0 = apfloat_fallback!(
+                            $f_ty, $apfloat_ty, $sys_available,
+                            |x| x as $f_ty;
+                            // When the builtin is not available, we need to use a different conversion
+                            // method (since apfloat doesn't support `as` casting).
+                            |x: $i_ty| {
+                                use compiler_builtins::int::MinInt;
+
+                                let apf = if <$i_ty>::SIGNED {
+                                    FloatTy::from_i128(x.try_into().unwrap()).value
+                                } else {
+                                    FloatTy::from_u128(x.try_into().unwrap()).value
+                                };
+
+                                <$f_ty>::from_bits(apf.to_bits())
+                            },
+                            x
+                        );
+                        let f1: $f_ty = $fn(x);
+
+                        #[cfg($sys_available)] {
+                            // This makes sure that the conversion produced the best rounding possible, and does
+                            // this independent of `x as $into` rounding correctly.
+                            // This assumes that float to integer conversion is correct.
+                            let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty;
+                            let y = f1 as $i_ty;
+                            let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty;
+                            let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x);
+                            let error = <$i_ty as Int>::abs_diff(y, x);
+                            let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x);
+
+                            // The first two conditions check that none of the two closest float values are
+                            // strictly closer in representation to `x`. The second makes sure that rounding is
+                            // towards even significand if two float values are equally close to the integer.
+                            if error_minus < error
+                                || error_plus < error
+                                || ((error_minus == error || error_plus == error)
+                                    && ((f0.to_bits() & 1) != 0))
+                            {
+                                if !cfg!(any(
+                                    target_arch = "powerpc",
+                                    target_arch = "powerpc64"
+                                )) {
+                                    panic!(
+                                        "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
+                                        stringify!($fn),
+                                        x,
+                                        f1.to_bits(),
+                                        y_minus_ulp,
+                                        y,
+                                        y_plus_ulp,
+                                        error_minus,
+                                        error,
+                                        error_plus,
+                                    );
+                                }
                             }
                         }
+
                         // Test against native conversion. We disable testing on all `x86` because of
                         // rounding bugs with `i686`. `powerpc` also has the same rounding bug.
-                        if f0 != f1 && !cfg!(any(
+                        if !Float::eq_repr(f0, f1) && !cfg!(any(
                             target_arch = "x86",
                             target_arch = "powerpc",
                             target_arch = "powerpc64"
                         )) {
                             panic!(
-                                "{}({}): std: {}, builtins: {}",
+                                "{}({}): std: {:?}, builtins: {:?}",
                                 stringify!($fn),
                                 x,
                                 f0,
@@ -78,19 +100,22 @@ mod int_to_float {
         };
     }
 
-    i_to_f! {
-        u32, f32, __floatunsisf;
-        u32, f64, __floatunsidf;
-        i32, f32, __floatsisf;
-        i32, f64, __floatsidf;
-        u64, f32, __floatundisf;
-        u64, f64, __floatundidf;
-        i64, f32, __floatdisf;
-        i64, f64, __floatdidf;
-        u128, f32, __floatuntisf;
-        u128, f64, __floatuntidf;
-        i128, f32, __floattisf;
-        i128, f64, __floattidf;
+    i_to_f! { f32, Single, all(),
+        u32, __floatunsisf;
+        i32, __floatsisf;
+        u64, __floatundisf;
+        i64, __floatdisf;
+        u128, __floatuntisf;
+        i128, __floattisf;
+    }
+
+    i_to_f! { f64, Double, all(),
+        u32, __floatunsidf;
+        i32, __floatsidf;
+        u64, __floatundidf;
+        i64, __floatdidf;
+        u128, __floatuntidf;
+        i128, __floattidf;
     }
 }
 

From 84021e790b884e5eecc6f1b470b3e116ef33ff5c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 13 Jun 2024 06:50:44 -0500
Subject: [PATCH 2/4] Refactor integer to float conversion

Extract some common routines to separate functions in order to
deduplicate code and remove some of the magic.
---
 src/float/conv.rs       | 174 +++++++++++++++++++++++++++++-----------
 src/int/mod.rs          |  10 ++-
 testcrate/tests/conv.rs |   2 +-
 3 files changed, 138 insertions(+), 48 deletions(-)

diff --git a/src/float/conv.rs b/src/float/conv.rs
index e86fee6d..da87b3ca 100644
--- a/src/float/conv.rs
+++ b/src/float/conv.rs
@@ -6,21 +6,91 @@ use super::Float;
 
 /// Conversions from integers to floats.
 ///
-/// These are hand-optimized bit twiddling code,
-/// which unfortunately isn't the easiest kind of code to read.
+/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
+/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
+///   mantissa _with the implicit bit set_!
+/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some
+///   patterns are used to simplify this. Adjust the mantissa with the result if needed.
+/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one.
+/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
+///   from the exponent (above) accounts for the explicit bit being set in the mantissa.
 ///
-/// The algorithm is explained here: <https://blog.m-ou.se/floats/>
+/// # Terminology
+///
+/// - `i`: the original integer
+/// - `i_m`: the integer, shifted fully left (no leading zeros)
+/// - `n`: number of leading zeroes
+/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
+/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
+/// - `adj`: the bits that will be truncated, possibly compressed in some way.
+/// - `m`: the resulting mantissa. Implicit bit is usually set.
 mod int_to_float {
+    use super::*;
+
+    /// Calculate the exponent from the number of leading zeros.
+    ///
+    /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
+    /// bit set can be added back later.
+    fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
+        F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
+    }
+
+    /// Adjust a mantissa with dropped bits to perform correct rounding.
+    ///
+    /// The dropped bits should be exactly the bits that get truncated (left-aligned), but they
+    /// can be combined or compressed in some way that simplifies operations.
+    fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
+        // Branchlessly extract a `1` if rounding up should happen, 0 otherwise
+        // This accounts for rounding to even.
+        let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
+
+        // Add one when we need to round up. Break ties to even.
+        m_base + adj
+    }
+
+    /// Shift the exponent to its position and add the mantissa.
+    ///
+    /// If the mantissa has the implicit bit set, the exponent should be one less than its actual
+    /// value to cancel it out.
+    fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
+        // + rather than | so the mantissa can overflow into the exponent
+        (e << F::SIGNIFICAND_BITS) + m
+    }
+
+    /// Shift distance from a left-aligned integer to a smaller float.
+    fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
+        (I::BITS - F::BITS) + F::EXPONENT_BITS
+    }
+
+    /// Shift distance from an integer with `n` leading zeros to a smaller float.
+    fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
+        F::SIGNIFICAND_BITS - I::BITS + 1 + n
+    }
+
+    /// Perform a signed operation as unsigned, then add the sign back.
+    pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
+    where
+        F: Float,
+        I: Int,
+        F::Int: CastFrom<I>,
+        Conv: Fn(I::UnsignedInt) -> F::Int,
+    {
+        let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
+        F::from_bits(conv(i.unsigned_abs()) | sign_bit)
+    }
+
     pub fn u32_to_f32_bits(i: u32) -> u32 {
         if i == 0 {
             return 0;
         }
         let n = i.leading_zeros();
-        let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
-        let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
-        let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
-        let e = 157 - n; // Exponent plus 127, minus one.
-        (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+        // Mantissa with implicit bit set (significant bits)
+        let m_base = (i << n) >> f32::EXPONENT_BITS;
+        // Bits that will be dropped (insignificant bits)
+        let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
+        let m = m_adj::<f32>(m_base, adj);
+        let e = exp::<u32, f32>(n) - 1;
+        repr::<f32>(e, m)
     }
 
     pub fn u32_to_f64_bits(i: u32) -> u64 {
@@ -28,19 +98,23 @@ mod int_to_float {
             return 0;
         }
         let n = i.leading_zeros();
-        let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
-        let e = 1053 - n as u64; // Exponent plus 1023, minus one.
-        (e << 52) + m // Bit 53 of m will overflow into e.
+        // Mantissa with implicit bit set
+        let m = (i as u64) << shift_f_gt_i::<u32, f64>(n);
+        let e = exp::<u32, f64>(n) - 1;
+        repr::<f64>(e, m)
     }
 
     pub fn u64_to_f32_bits(i: u64) -> u32 {
         let n = i.leading_zeros();
-        let y = i.wrapping_shl(n);
-        let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
-        let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
-        let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
-        let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
-        (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+        let i_m = i.wrapping_shl(n);
+        // Mantissa with implicit bit set
+        let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
+        // The entire lower half of `i` will be truncated (masked portion), plus the
+        // next `EXPONENT_BITS` bits.
+        let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
+        let m = m_adj::<f32>(m_base, adj);
+        let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
+        repr::<f32>(e, m)
     }
 
     pub fn u64_to_f64_bits(i: u64) -> u64 {
@@ -48,31 +122,45 @@ mod int_to_float {
             return 0;
         }
         let n = i.leading_zeros();
-        let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact.
-        let b = (i << n) << 53; // Insignificant bits, only relevant for rounding.
-        let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
-        let e = 1085 - n as u64; // Exponent plus 1023, minus one.
-        (e << 52) + m // + not |, so the mantissa can overflow into the exponent.
+        // Mantissa with implicit bit set
+        let m_base = (i << n) >> f64::EXPONENT_BITS;
+        let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
+        let m = m_adj::<f64>(m_base, adj);
+        let e = exp::<u64, f64>(n) - 1;
+        repr::<f64>(e, m)
     }
 
     pub fn u128_to_f32_bits(i: u128) -> u32 {
         let n = i.leading_zeros();
-        let y = i.wrapping_shl(n);
-        let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
-        let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
-        let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
-        let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
-        (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+        let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
+        let m_base: u32 = (i_m >> shift_f_lt_i::<u128, f32>()) as u32;
+
+        // Within the upper `F::BITS`, everything except for the signifcand
+        // gets truncated
+        let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
+
+        // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
+        // check if it is nonzero.
+        let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
+        let adj = d1 | d2;
+
+        // Mantissa with implicit bit set
+        let m = m_adj::<f32>(m_base, adj);
+        let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
+        repr::<f32>(e, m)
     }
 
     pub fn u128_to_f64_bits(i: u128) -> u64 {
         let n = i.leading_zeros();
-        let y = i.wrapping_shl(n);
-        let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
-        let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
-        let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
-        let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
-        (e << 52) + m // + not |, so the mantissa can overflow into the exponent.
+        let i_m = i.wrapping_shl(n);
+        // Mantissa with implicit bit set
+        let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
+        // The entire lower half of `i` will be truncated (masked portion), plus the
+        // next `EXPONENT_BITS` bits.
+        let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
+        let m = m_adj::<f64>(m_base, adj);
+        let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
+        repr::<f64>(e, m)
     }
 }
 
@@ -113,38 +201,32 @@ intrinsics! {
 intrinsics! {
     #[arm_aeabi_alias = __aeabi_i2f]
     pub extern "C" fn __floatsisf(i: i32) -> f32 {
-        let sign_bit = ((i >> 31) as u32) << 31;
-        f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
+        int_to_float::signed(i, int_to_float::u32_to_f32_bits)
     }
 
     #[arm_aeabi_alias = __aeabi_i2d]
     pub extern "C" fn __floatsidf(i: i32) -> f64 {
-        let sign_bit = ((i >> 31) as u64) << 63;
-        f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
+        int_to_float::signed(i, int_to_float::u32_to_f64_bits)
     }
 
     #[arm_aeabi_alias = __aeabi_l2f]
     pub extern "C" fn __floatdisf(i: i64) -> f32 {
-        let sign_bit = ((i >> 63) as u32) << 31;
-        f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
+        int_to_float::signed(i, int_to_float::u64_to_f32_bits)
     }
 
     #[arm_aeabi_alias = __aeabi_l2d]
     pub extern "C" fn __floatdidf(i: i64) -> f64 {
-        let sign_bit = ((i >> 63) as u64) << 63;
-        f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
+        int_to_float::signed(i, int_to_float::u64_to_f64_bits)
     }
 
     #[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
     pub extern "C" fn __floattisf(i: i128) -> f32 {
-        let sign_bit = ((i >> 127) as u32) << 31;
-        f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
+        int_to_float::signed(i, int_to_float::u128_to_f32_bits)
     }
 
     #[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
     pub extern "C" fn __floattidf(i: i128) -> f64 {
-        let sign_bit = ((i >> 127) as u64) << 63;
-        f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
+        int_to_float::signed(i, int_to_float::u128_to_f64_bits)
     }
 }
 
diff --git a/src/int/mod.rs b/src/int/mod.rs
index e6f31c53..0d3b0ce4 100644
--- a/src/int/mod.rs
+++ b/src/int/mod.rs
@@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt
 
     fn unsigned(self) -> Self::UnsignedInt;
     fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
+    fn unsigned_abs(self) -> Self::UnsignedInt;
 
     fn from_bool(b: bool) -> Self;
 
@@ -178,7 +179,6 @@ macro_rules! int_impl_common {
         fn wrapping_mul(self, other: Self) -> Self {
             <Self>::wrapping_mul(self, other)
         }
-
         fn wrapping_sub(self, other: Self) -> Self {
             <Self>::wrapping_sub(self, other)
         }
@@ -235,6 +235,10 @@ macro_rules! int_impl {
                 me
             }
 
+            fn unsigned_abs(self) -> Self {
+                self
+            }
+
             fn abs_diff(self, other: Self) -> Self {
                 if self < other {
                     other.wrapping_sub(self)
@@ -268,6 +272,10 @@ macro_rules! int_impl {
                 me as $ity
             }
 
+            fn unsigned_abs(self) -> Self::UnsignedInt {
+                self.unsigned_abs()
+            }
+
             fn abs_diff(self, other: Self) -> $uty {
                 self.wrapping_sub(other).wrapping_abs() as $uty
             }
diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs
index 60968038..01cc588c 100644
--- a/testcrate/tests/conv.rs
+++ b/testcrate/tests/conv.rs
@@ -8,7 +8,7 @@ use compiler_builtins::float::Float;
 use rustc_apfloat::{Float as _, FloatConvert as _};
 use testcrate::*;
 
-mod int_to_float {
+mod i_to_f {
     use super::*;
 
     macro_rules! i_to_f {

From b639224c49a6cfd2bc092732e6d173870a5a9f30 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 13 Jun 2024 06:52:46 -0500
Subject: [PATCH 3/4] Add integer to `f128` conversions

---
 README.md                       |  12 +-
 build.rs                        |  18 +--
 examples/intrinsics.rs          |  62 ++++++++-
 src/float/conv.rs               |  80 ++++++++++++
 testcrate/benches/float_conv.rs | 222 ++++++++++++++++++++++++++------
 testcrate/tests/conv.rs         |  22 ++++
 6 files changed, 348 insertions(+), 68 deletions(-)

diff --git a/README.md b/README.md
index f792d188..a2b38cce 100644
--- a/README.md
+++ b/README.md
@@ -233,12 +233,12 @@ of being added to Rust.
 - [x] fixunstfdi.c
 - [x] fixunstfsi.c
 - [x] fixunstfti.c
-- [ ] floatditf.c
-- [ ] floatsitf.c
-- [ ] floattitf.c
-- [ ] floatunditf.c
-- [ ] floatunsitf.c
-- [ ] floatuntitf.c
+- [x] floatditf.c
+- [x] floatsitf.c
+- [x] floattitf.c
+- [x] floatunditf.c
+- [x] floatunsitf.c
+- [x] floatuntitf.c
 - [x] multf3.c
 - [x] powitf2.c
 - [x] subtf3.c
diff --git a/build.rs b/build.rs
index 2863c979..22ec9e4d 100644
--- a/build.rs
+++ b/build.rs
@@ -532,10 +532,6 @@ mod c {
         if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics {
             sources.extend(&[
                 ("__comparetf2", "comparetf2.c"),
-                ("__floatditf", "floatditf.c"),
-                ("__floatsitf", "floatsitf.c"),
-                ("__floatunditf", "floatunditf.c"),
-                ("__floatunsitf", "floatunsitf.c"),
                 ("__fe_getround", "fp_mode.c"),
                 ("__fe_raise_inexact", "fp_mode.c"),
             ]);
@@ -550,21 +546,11 @@ mod c {
         }
 
         if target.arch == "mips64" {
-            sources.extend(&[
-                ("__netf2", "comparetf2.c"),
-                ("__floatsitf", "floatsitf.c"),
-                ("__floatunsitf", "floatunsitf.c"),
-                ("__fe_getround", "fp_mode.c"),
-            ]);
+            sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
         }
 
         if target.arch == "loongarch64" {
-            sources.extend(&[
-                ("__netf2", "comparetf2.c"),
-                ("__floatsitf", "floatsitf.c"),
-                ("__floatunsitf", "floatunsitf.c"),
-                ("__fe_getround", "fp_mode.c"),
-            ]);
+            sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
         }
 
         // Remove the assembly implementations that won't compile for the target
diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs
index 06d77233..368da6af 100644
--- a/examples/intrinsics.rs
+++ b/examples/intrinsics.rs
@@ -264,14 +264,18 @@ mod intrinsics {
 
     /* i32 operations */
 
+    // floatsisf
+    pub fn aeabi_i2f(x: i32) -> f32 {
+        x as f32
+    }
+
     // floatsidf
     pub fn aeabi_i2d(x: i32) -> f64 {
         x as f64
     }
 
-    // floatsisf
-    pub fn aeabi_i2f(x: i32) -> f32 {
-        x as f32
+    pub fn floatsitf(x: i32) -> f128 {
+        x as f128
     }
 
     pub fn aeabi_idiv(a: i32, b: i32) -> i32 {
@@ -294,6 +298,10 @@ mod intrinsics {
         x as f64
     }
 
+    pub fn floatditf(x: i64) -> f128 {
+        x as f128
+    }
+
     pub fn mulodi4(a: i64, b: i64) -> i64 {
         a * b
     }
@@ -314,6 +322,18 @@ mod intrinsics {
 
     /* i128 operations */
 
+    pub fn floattisf(x: i128) -> f32 {
+        x as f32
+    }
+
+    pub fn floattidf(x: i128) -> f64 {
+        x as f64
+    }
+
+    pub fn floattitf(x: i128) -> f128 {
+        x as f128
+    }
+
     pub fn lshrti3(a: i128, b: usize) -> i128 {
         a >> b
     }
@@ -328,14 +348,18 @@ mod intrinsics {
 
     /* u32 operations */
 
+    // floatunsisf
+    pub fn aeabi_ui2f(x: u32) -> f32 {
+        x as f32
+    }
+
     // floatunsidf
     pub fn aeabi_ui2d(x: u32) -> f64 {
         x as f64
     }
 
-    // floatunsisf
-    pub fn aeabi_ui2f(x: u32) -> f32 {
-        x as f32
+    pub fn floatunsitf(x: u32) -> f128 {
+        x as f128
     }
 
     pub fn aeabi_uidiv(a: u32, b: u32) -> u32 {
@@ -358,6 +382,10 @@ mod intrinsics {
         x as f64
     }
 
+    pub fn floatunditf(x: u64) -> f128 {
+        x as f128
+    }
+
     // udivdi3
     pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 {
         a * b
@@ -369,6 +397,18 @@ mod intrinsics {
 
     /* u128 operations */
 
+    pub fn floatuntisf(x: u128) -> f32 {
+        x as f32
+    }
+
+    pub fn floatuntidf(x: u128) -> f64 {
+        x as f64
+    }
+
+    pub fn floatuntitf(x: u128) -> f128 {
+        x as f128
+    }
+
     pub fn muloti4(a: u128, b: u128) -> Option<u128> {
         a.checked_mul(b)
     }
@@ -466,6 +506,16 @@ fn run() {
     bb(fixunstfsi(bb(2.)));
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     bb(fixunstfti(bb(2.)));
+    bb(floatditf(bb(2)));
+    bb(floatsitf(bb(2)));
+    bb(floattidf(bb(2)));
+    bb(floattisf(bb(2)));
+    bb(floattitf(bb(2)));
+    bb(floatunditf(bb(2)));
+    bb(floatunsitf(bb(2)));
+    bb(floatuntidf(bb(2)));
+    bb(floatuntisf(bb(2)));
+    bb(floatuntitf(bb(2)));
     bb(gttf(bb(2.), bb(2.)));
     bb(lshrti3(bb(2), bb(2)));
     bb(lttf(bb(2.), bb(2.)));
diff --git a/src/float/conv.rs b/src/float/conv.rs
index da87b3ca..4aea67c9 100644
--- a/src/float/conv.rs
+++ b/src/float/conv.rs
@@ -104,6 +104,24 @@ mod int_to_float {
         repr::<f64>(e, m)
     }
 
+    #[cfg(f128_enabled)]
+    pub fn u32_to_f128_bits(i: u32) -> u128 {
+        if i == 0 {
+            return 0;
+        }
+        let n = i.leading_zeros();
+
+        // Shift into mantissa position that is correct for the type, but shifted into the lower
+        // 64 bits over so can can avoid 128-bit math.
+        let m = (i as u64) << (shift_f_gt_i::<u32, f128>(n) - 64);
+        let e = exp::<u32, f128>(n) as u64 - 1;
+        // High 64 bits of f128 representation.
+        let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m;
+
+        // Shift back to the high bits, the rest of the mantissa will always be 0.
+        (h as u128) << 64
+    }
+
     pub fn u64_to_f32_bits(i: u64) -> u32 {
         let n = i.leading_zeros();
         let i_m = i.wrapping_shl(n);
@@ -130,6 +148,18 @@ mod int_to_float {
         repr::<f64>(e, m)
     }
 
+    #[cfg(f128_enabled)]
+    pub fn u64_to_f128_bits(i: u64) -> u128 {
+        if i == 0 {
+            return 0;
+        }
+        let n = i.leading_zeros();
+        // Mantissa with implicit bit set
+        let m = (i as u128) << shift_f_gt_i::<u64, f128>(n);
+        let e = exp::<u64, f128>(n) - 1;
+        repr::<f128>(e, m)
+    }
+
     pub fn u128_to_f32_bits(i: u128) -> u32 {
         let n = i.leading_zeros();
         let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
@@ -162,6 +192,20 @@ mod int_to_float {
         let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
         repr::<f64>(e, m)
     }
+
+    #[cfg(f128_enabled)]
+    pub fn u128_to_f128_bits(i: u128) -> u128 {
+        if i == 0 {
+            return 0;
+        }
+        let n = i.leading_zeros();
+        // Mantissa with implicit bit set
+        let m_base = (i << n) >> f128::EXPONENT_BITS;
+        let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1);
+        let m = m_adj::<f128>(m_base, adj);
+        let e = exp::<u128, f128>(n) - 1;
+        repr::<f128>(e, m)
+    }
 }
 
 // Conversions from unsigned integers to floats.
@@ -195,6 +239,24 @@ intrinsics! {
     pub extern "C" fn __floatuntidf(i: u128) -> f64 {
         f64::from_bits(int_to_float::u128_to_f64_bits(i))
     }
+
+    #[ppc_alias = __floatunsikf]
+    #[cfg(f128_enabled)]
+    pub extern "C" fn __floatunsitf(i: u32) -> f128 {
+        f128::from_bits(int_to_float::u32_to_f128_bits(i))
+    }
+
+    #[ppc_alias = __floatundikf]
+    #[cfg(f128_enabled)]
+    pub extern "C" fn __floatunditf(i: u64) -> f128 {
+        f128::from_bits(int_to_float::u64_to_f128_bits(i))
+    }
+
+    #[ppc_alias = __floatuntikf]
+    #[cfg(f128_enabled)]
+    pub extern "C" fn __floatuntitf(i: u128) -> f128 {
+        f128::from_bits(int_to_float::u128_to_f128_bits(i))
+    }
 }
 
 // Conversions from signed integers to floats.
@@ -228,6 +290,24 @@ intrinsics! {
     pub extern "C" fn __floattidf(i: i128) -> f64 {
         int_to_float::signed(i, int_to_float::u128_to_f64_bits)
     }
+
+    #[ppc_alias = __floatsikf]
+    #[cfg(f128_enabled)]
+    pub extern "C" fn __floatsitf(i: i32) -> f128 {
+        int_to_float::signed(i, int_to_float::u32_to_f128_bits)
+    }
+
+    #[ppc_alias = __floatdikf]
+    #[cfg(f128_enabled)]
+    pub extern "C" fn __floatditf(i: i64) -> f128 {
+        int_to_float::signed(i, int_to_float::u64_to_f128_bits)
+    }
+
+    #[ppc_alias = __floattikf]
+    #[cfg(f128_enabled)]
+    pub extern "C" fn __floattitf(i: i128) -> f128 {
+        int_to_float::signed(i, int_to_float::u128_to_f128_bits)
+    }
 }
 
 /// Generic float to unsigned int conversions.
diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs
index de2043b0..0625a1ae 100644
--- a/testcrate/benches/float_conv.rs
+++ b/testcrate/benches/float_conv.rs
@@ -1,7 +1,8 @@
 #![allow(improper_ctypes)]
+#![cfg_attr(f128_enabled, feature(f128))]
 
 use compiler_builtins::float::conv;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{criterion_main, Criterion};
 use testcrate::float_bench;
 
 /* unsigned int -> float */
@@ -76,6 +77,18 @@ float_bench! {
     ],
 }
 
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_u32_f128,
+    sig: (a: u32) -> f128,
+    crate_fn: conv::__floatunsitf,
+    crate_fn_ppc: conv::__floatunsikf,
+    sys_fn: __floatunsitf,
+    sys_fn_ppc: __floatunsikf,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
 float_bench! {
     name: conv_u64_f32,
     sig: (a: u64) -> f32,
@@ -118,6 +131,18 @@ float_bench! {
     ],
 }
 
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_u64_f128,
+    sig: (a: u64) -> f128,
+    crate_fn: conv::__floatunditf,
+    crate_fn_ppc: conv::__floatundikf,
+    sys_fn: __floatunditf,
+    sys_fn_ppc: __floatundikf,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
 float_bench! {
     name: conv_u128_f32,
     sig: (a: u128) -> f32,
@@ -136,6 +161,18 @@ float_bench! {
     asm: []
 }
 
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_u128_f128,
+    sig: (a: u128) -> f128,
+    crate_fn: conv::__floatuntitf,
+    crate_fn_ppc: conv::__floatuntikf,
+    sys_fn: __floatuntitf,
+    sys_fn_ppc: __floatuntikf,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
 /* signed int -> float */
 
 float_bench! {
@@ -205,6 +242,18 @@ float_bench! {
     ],
 }
 
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_i32_f128,
+    sig: (a: i32) -> f128,
+    crate_fn: conv::__floatsitf,
+    crate_fn_ppc: conv::__floatsikf,
+    sys_fn: __floatsitf,
+    sys_fn_ppc: __floatsikf,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
 float_bench! {
     name: conv_i64_f32,
     sig: (a: i64) -> f32,
@@ -272,6 +321,18 @@ float_bench! {
     ],
 }
 
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_i64_f128,
+    sig: (a: i64) -> f128,
+    crate_fn: conv::__floatditf,
+    crate_fn_ppc: conv::__floatdikf,
+    sys_fn: __floatditf,
+    sys_fn_ppc: __floatdikf,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
 float_bench! {
     name: conv_i128_f32,
     sig: (a: i128) -> f32,
@@ -290,6 +351,18 @@ float_bench! {
     asm: []
 }
 
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_i128_f128,
+    sig: (a: i128) -> f128,
+    crate_fn: conv::__floattitf,
+    crate_fn_ppc: conv::__floattikf,
+    sys_fn: __floattitf,
+    sys_fn_ppc: __floattikf,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
 /* float -> unsigned int */
 
 #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
@@ -397,6 +470,39 @@ float_bench! {
     asm: []
 }
 
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_f128_u32,
+    sig: (a: f128) -> u32,
+    crate_fn: conv::__fixunstfsi,
+    crate_fn_ppc: conv::__fixunskfsi,
+    sys_fn: __fixunstfsi,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_f128_u64,
+    sig: (a: f128) -> u64,
+    crate_fn: conv::__fixunstfdi,
+    crate_fn_ppc: conv::__fixunskfdi,
+    sys_fn: __fixunstfdi,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_f128_u128,
+    sig: (a: f128) -> u128,
+    crate_fn: conv::__fixunstfti,
+    crate_fn_ppc: conv::__fixunskfti,
+    sys_fn: __fixunstfti,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
 /* float -> signed int */
 
 #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
@@ -504,43 +610,79 @@ float_bench! {
     asm: []
 }
 
-criterion_group!(
-    float_conv,
-    conv_u32_f32,
-    conv_u32_f64,
-    conv_u64_f32,
-    conv_u64_f64,
-    conv_u128_f32,
-    conv_u128_f64,
-    conv_i32_f32,
-    conv_i32_f64,
-    conv_i64_f32,
-    conv_i64_f64,
-    conv_i128_f32,
-    conv_i128_f64,
-    conv_f64_u32,
-    conv_f64_u64,
-    conv_f64_u128,
-    conv_f64_i32,
-    conv_f64_i64,
-    conv_f64_i128,
-);
-
-// FIXME: ppc64le has a sporadic overflow panic in the crate functions
-// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
-criterion_group!(
-    float_conv_not_ppc64le,
-    conv_f32_u32,
-    conv_f32_u64,
-    conv_f32_u128,
-    conv_f32_i32,
-    conv_f32_i64,
-    conv_f32_i128,
-);
-
-#[cfg(all(target_arch = "powerpc64", target_endian = "little"))]
-criterion_main!(float_conv);
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_f128_i32,
+    sig: (a: f128) -> i32,
+    crate_fn: conv::__fixtfsi,
+    crate_fn_ppc: conv::__fixkfsi,
+    sys_fn: __fixtfsi,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
-criterion_main!(float_conv, float_conv_not_ppc64le);
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_f128_i64,
+    sig: (a: f128) -> i64,
+    crate_fn: conv::__fixtfdi,
+    crate_fn_ppc: conv::__fixkfdi,
+    sys_fn: __fixtfdi,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
+#[cfg(f128_enabled)]
+float_bench! {
+    name: conv_f128_i128,
+    sig: (a: f128) -> i128,
+    crate_fn: conv::__fixtfti,
+    crate_fn_ppc: conv::__fixkfti,
+    sys_fn: __fixtfti,
+    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    asm: []
+}
+
+pub fn float_conv() {
+    let mut criterion = Criterion::default().configure_from_args();
+
+    conv_u32_f32(&mut criterion);
+    conv_u32_f64(&mut criterion);
+    conv_u64_f32(&mut criterion);
+    conv_u64_f64(&mut criterion);
+    conv_u128_f32(&mut criterion);
+    conv_u128_f64(&mut criterion);
+    conv_i32_f32(&mut criterion);
+    conv_i32_f64(&mut criterion);
+    conv_i64_f32(&mut criterion);
+    conv_i64_f64(&mut criterion);
+    conv_i128_f32(&mut criterion);
+    conv_i128_f64(&mut criterion);
+    conv_f64_u32(&mut criterion);
+    conv_f64_u64(&mut criterion);
+    conv_f64_u128(&mut criterion);
+    conv_f64_i32(&mut criterion);
+    conv_f64_i64(&mut criterion);
+    conv_f64_i128(&mut criterion);
+
+    #[cfg(all(f128_enabled))]
+    // FIXME: ppc64le has a sporadic overflow panic in the crate functions
+    // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
+    #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
+    {
+        conv_u32_f128(&mut criterion);
+        conv_u64_f128(&mut criterion);
+        conv_u128_f128(&mut criterion);
+        conv_i32_f128(&mut criterion);
+        conv_i64_f128(&mut criterion);
+        conv_i128_f128(&mut criterion);
+        conv_f128_u32(&mut criterion);
+        conv_f128_u64(&mut criterion);
+        conv_f128_u128(&mut criterion);
+        conv_f128_i32(&mut criterion);
+        conv_f128_i64(&mut criterion);
+        conv_f128_i128(&mut criterion);
+    }
+}
+
+criterion_main!(float_conv);
diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs
index 01cc588c..a08748af 100644
--- a/testcrate/tests/conv.rs
+++ b/testcrate/tests/conv.rs
@@ -117,6 +117,28 @@ mod i_to_f {
         u128, __floatuntidf;
         i128, __floattidf;
     }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
+    i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
+        u32, __floatunsitf;
+        i32, __floatsitf;
+        u64, __floatunditf;
+        i64, __floatditf;
+        u128, __floatuntitf;
+        i128, __floattitf;
+    }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+    i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
+        u32, __floatunsikf;
+        i32, __floatsikf;
+        u64, __floatundikf;
+        i64, __floatdikf;
+        u128, __floatuntikf;
+        i128, __floattikf;
+    }
 }
 
 // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520

From 02e939b0c94977090d1302f25eb95dd5e4f119cc Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Oct 2024 12:27:59 -0500
Subject: [PATCH 4/4] Remove the unneeded `isqrt` feature gate

[1] has been stabilized so we no longer need to enable it.

[1]: https://github.com/rust-lang/rust/issues/116226
---
 testcrate/src/lib.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs
index 58419bf1..4154e0fb 100644
--- a/testcrate/src/lib.rs
+++ b/testcrate/src/lib.rs
@@ -15,7 +15,6 @@
 #![no_std]
 #![cfg_attr(f128_enabled, feature(f128))]
 #![cfg_attr(f16_enabled, feature(f16))]
-#![feature(isqrt)]
 
 pub mod bench;
 extern crate alloc;