From a706eca6d188c3dbf0f92b699c577666a82334d0 Mon Sep 17 00:00:00 2001 From: Peter Sobot Date: Mon, 29 Jul 2024 14:27:38 -0400 Subject: [PATCH] Enable some (but not all) fast-math optimizations for better vectorization. (#78) * Enable some (but not all) fast-math optimizations for better vectorization. * Remove -ffinite-math-only. --- python/setup.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/python/setup.py b/python/setup.py index 1d1a8549..272d28f0 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,8 +14,8 @@ import os -import sys import platform +import sys from pathlib import Path import numpy as np @@ -24,7 +24,6 @@ from setuptools import Extension, setup from setuptools.command.build_ext import build_ext - # Find the "cpp" folder depending on where this script is run from: for search_path in ["./cpp/", "../cpp/", "../../cpp/"]: path = os.path.abspath(os.path.join(os.path.dirname(__file__), search_path)) @@ -100,6 +99,20 @@ def build_extensions(self): if ct == "unix": opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version()) opts.append("-std=c++17") + # Allow reordering floating-point operations for + # better automatic vectorization, even without -ffast-math + # See: https://simonbyrne.github.io/notes/fastmath/#flushing_subnormals_to_zero + # for why -ffast-math is not included: + opts.extend( + [ + "-fassociative-math", + "-fno-signaling-nans", + "-fno-trapping-math", + "-fno-signed-zeros", + "-freciprocal-math", + "-fno-math-errno", + ] + ) if has_flag(self.compiler, "-fvisibility=hidden"): opts.append("-fvisibility=hidden") elif ct == "msvc":