From 45cad81a305b09c4b1c9db55c23fcbb0f4e01ee4 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <serge.guelton@telecom-bretagne.eu>
Date: Sat, 28 Aug 2021 23:12:24 +0200
Subject: [PATCH] Make nearbyhint generic implementation compatible with
 -ffast-math

Fix #515
---
 include/xsimd/arch/generic/xsimd_generic_math.hpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/include/xsimd/arch/generic/xsimd_generic_math.hpp b/include/xsimd/arch/generic/xsimd_generic_math.hpp
index 101a4f783..56e4d98bb 100644
--- a/include/xsimd/arch/generic/xsimd_generic_math.hpp
+++ b/include/xsimd/arch/generic/xsimd_generic_math.hpp
@@ -1718,8 +1718,17 @@ namespace xsimd {
         batch_type s = bitofsign(self);
         batch_type v = self ^ s;
         batch_type t2n = constants::twotonmb<batch_type>();
+        // Under fast-math, reordering is possible and the compiler optimizes d
+        // to v. That's not what we want, so prevent compiler optimization here.
+        // FIXME: it may be better to emit a memory barrier here (?).
+#ifdef __FAST_MATH__
+        volatile batch_type d0 = v + t2n;
+        batch_type d = *(batch_type*)(void*)(&d0) - t2n;
+#else
         batch_type d0 = v + t2n;
-        return s ^ select(v < t2n, d0 - t2n, v);
+        batch_type d = d0 - t2n;
+#endif
+        return s ^ select(v < t2n, d, v);
       }
     }
     template<class A> batch<float, A> nearbyint(batch<float, A> const& self, requires_arch<generic>) {