diff --git a/include/xsimd/arch/generic/xsimd_generic_math.hpp b/include/xsimd/arch/generic/xsimd_generic_math.hpp index 101a4f783..56e4d98bb 100644 --- a/include/xsimd/arch/generic/xsimd_generic_math.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_math.hpp @@ -1718,8 +1718,17 @@ namespace xsimd { batch_type s = bitofsign(self); batch_type v = self ^ s; batch_type t2n = constants::twotonmb(); + // Under fast-math, reordering is possible and the compiler optimizes d + // to v. That's not what we want, so prevent compiler optimization here. + // FIXME: it may be better to emit a memory barrier here (?). +#ifdef __FAST_MATH__ + volatile batch_type d0 = v + t2n; + batch_type d = *(batch_type*)(void*)(&d0) - t2n; +#else batch_type d0 = v + t2n; - return s ^ select(v < t2n, d0 - t2n, v); + batch_type d = d0 - t2n; +#endif + return s ^ select(v < t2n, d, v); } } template batch nearbyint(batch const& self, requires_arch) {