From 83935b11429c81f1b715bbda012a7f28c21359f1 Mon Sep 17 00:00:00 2001
From: jatin <jatinchowdhury18@gmail.com>
Date: Mon, 27 Nov 2023 23:27:23 -0800
Subject: [PATCH] Add benchmarks and some more testing for wright-omega

---
 .gitignore                                    |  2 +
 README.md                                     |  3 +-
 .../math_approx/src/wright_omega_approx.hpp   | 43 ++++++++++--
 tools/bench/CMakeLists.txt                    |  3 +
 tools/bench/wright_omega_bench.cpp            | 67 +++++++++++++++++++
 tools/plotter/plotter.cpp                     |  9 ++-
 6 files changed, 119 insertions(+), 8 deletions(-)
 create mode 100644 tools/bench/wright_omega_bench.cpp
diff --git a/.gitignore b/.gitignore
index 98b57a2..48854f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 .vscode/
 
 build*/
+
+.DS_Store
diff --git a/README.md b/README.md
index 0a1db15..228783a 100644
--- a/README.md
+++ b/README.md
@@ -8,4 +8,5 @@ Currently supported:
 - exp/exp2/exp10
 - log/log2/log10
 - tanh
-- sigmoid
\ No newline at end of file
+- sigmoid
+- Wright-Omega function
diff --git a/include/math_approx/src/wright_omega_approx.hpp b/include/math_approx/src/wright_omega_approx.hpp
index bc633fe..287e17e 100644
--- a/include/math_approx/src/wright_omega_approx.hpp
+++ b/include/math_approx/src/wright_omega_approx.hpp
@@ -12,7 +12,7 @@ T wright_omega (T x)
     using S = scalar_of_t<T>;
     static constexpr auto E = (S) 2.7182818284590452354;
 
-    const auto x1 = [](T _x)
+    const auto x1 = [] (T _x)
     {
         const auto x_sq = _x * _x;
         if constexpr (poly_order == 3)
@@ -31,14 +31,14 @@ T wright_omega (T x)
         }
         else
         {
-            return T{};
+            return T {};
         }
-    } (x);
+    }(x);
     const auto x2 = x - log<log_order> (x) + (S) 0.32352057096397160124 * exp<exp_order> ((S) -0.029614177658043381316 * x);
 
-    auto y = select (x < (S) -3, T{}, select (x < (S) E, x1, x2));
+    auto y = select (x < (S) -3, T {}, select (x < (S) E, x1, x2));
 
-    const auto nr_update = [](T _x, T _y)
+    const auto nr_update = [] (T _x, T _y)
     {
         return _y - (_y - exp<exp_order> (_x - _y)) / (_y + (S) 1);
     };
@@ -48,4 +48,37 @@ T wright_omega (T x)
 
     return y;
 }
+
+/**
+ * Wright-Omega function using Stephano D'Angelo's derivation (https://www.dafx.de/paper-archive/2019/DAFx2019_paper_5.pdf)
+ * With `num_nr_iters == 0`, this is the fastest implementation, but the least accurate.
+ * With `num_nr_iters == 1`, this is faster than the other implementation with 0 iterations, and little bit more accurate.
+ * For more accuracy, use the other implementation with at least 1 NR iteration.
+ */
+template <int num_nr_iters, int log_order = 3, int exp_order = log_order, typename T>
+T wright_omega_dangelo (T x)
+{
+    using S = scalar_of_t<T>;
+
+    const auto x1 = [] (T _x)
+    {
+        const auto x_sq = _x * _x;
+        const auto y_2_3 = (S) 4.775931364975583e-2 + (S) -1.314293149877800e-3 * _x;
+        const auto y_0_1 = (S) 6.313183464296682e-1 + (S) 3.631952663804445e-1 * _x;
+        return y_0_1 + y_2_3 * x_sq;
+    }(x);
+    const auto x2 = x - log<log_order> (x);
+
+    auto y = select (x < (S) -3.341459552768620, T {}, select (x < (S) 8, x1, x2));
+
+    const auto nr_update = [] (T _x, T _y)
+    {
+        return _y - (_y - exp<exp_order> (_x - _y)) / (_y + (S) 1);
+    };
+
+    for (int i = 0; i < num_nr_iters; ++i)
+        y = nr_update (x, y);
+
+    return y;
 }
+} // namespace math_approx
diff --git a/tools/bench/CMakeLists.txt b/tools/bench/CMakeLists.txt
index 773d33c..c96dff6 100644
--- a/tools/bench/CMakeLists.txt
+++ b/tools/bench/CMakeLists.txt
@@ -19,3 +19,6 @@ target_link_libraries(pow_approx_bench PRIVATE benchmark::benchmark math_approx)
 
 add_executable(log_approx_bench log_bench.cpp)
 target_link_libraries(log_approx_bench PRIVATE benchmark::benchmark math_approx)
+
+add_executable(wright_omega_approx_bench wright_omega_bench.cpp)
+target_link_libraries(wright_omega_approx_bench PRIVATE benchmark::benchmark math_approx)
diff --git a/tools/bench/wright_omega_bench.cpp b/tools/bench/wright_omega_bench.cpp
new file mode 100644
index 0000000..9279252
--- /dev/null
+++ b/tools/bench/wright_omega_bench.cpp
@@ -0,0 +1,67 @@
+#include <math_approx/math_approx.hpp>
+#include <benchmark/benchmark.h>
+
+#include "../../test/src/reference/toms917.hpp"
+
+static constexpr size_t N = 2000;
+const auto data = []
+{
+    std::vector<float> x;
+    x.resize (N, 0.0f);
+    for (size_t i = 0; i < N; ++i)
+        x[i] = -10.0f + 40.0f * (float) i / (float) N;
+    return x;
+}();
+
+#define WO_BENCH(name, func) \
+void name (benchmark::State& state) \
+{ \
+for (auto _ : state) \
+{ \
+for (auto& x : data) \
+{ \
+auto y = func (x); \
+benchmark::DoNotOptimize (y); \
+} \
+} \
+} \
+BENCHMARK (name);
+WO_BENCH (wright_omega_toms917, toms917::wrightomega)
+WO_BENCH (wright_omega_iter3_poly3_logexp5, (math_approx::wright_omega<3, 3, 5>))
+WO_BENCH (wright_omega_iter3_poly3, (math_approx::wright_omega<3, 3>))
+WO_BENCH (wright_omega_iter2_poly5, (math_approx::wright_omega<2, 5>))
+WO_BENCH (wright_omega_iter2_poly3, (math_approx::wright_omega<2, 3>))
+WO_BENCH (wright_omega_iter2_poly3_logexp3, (math_approx::wright_omega<2, 3, 3>))
+WO_BENCH (wright_omega_iter1_poly5, (math_approx::wright_omega<1, 5>))
+WO_BENCH (wright_omega_iter1_poly3, (math_approx::wright_omega<1, 3>))
+WO_BENCH (wright_omega_iter0_poly5, (math_approx::wright_omega<0, 5>))
+WO_BENCH (wright_omega_iter0_poly3, (math_approx::wright_omega<0, 3>))
+WO_BENCH (wright_omega_dangelo2, (math_approx::wright_omega_dangelo<2>))
+WO_BENCH (wright_omega_dangelo1, (math_approx::wright_omega_dangelo<1>))
+WO_BENCH (wright_omega_dangelo0, (math_approx::wright_omega_dangelo<0>))
+
+#define WO_SIMD_BENCH(name, func) \
+void name (benchmark::State& state) \
+{ \
+for (auto _ : state) \
+{ \
+for (auto& x : data) \
+{ \
+auto y = func (xsimd::broadcast (x)); \
+static_assert (std::is_same_v<xsimd::batch<float>, decltype(y)>); \
+benchmark::DoNotOptimize (y); \
+} \
+} \
+} \
+BENCHMARK (name);
+WO_SIMD_BENCH (wright_omega_simd_iter3_poly3_logexp5, (math_approx::wright_omega<3, 3, 5>))
+WO_SIMD_BENCH (wright_omega_simd_iter3_poly3, (math_approx::wright_omega<3, 3>))
+WO_SIMD_BENCH (wright_omega_simd_iter2_poly5, (math_approx::wright_omega<2, 5>))
+WO_SIMD_BENCH (wright_omega_simd_iter2_poly3, (math_approx::wright_omega<2, 3>))
+WO_SIMD_BENCH (wright_omega_simd_iter2_poly3_logexp3, (math_approx::wright_omega<2, 3, 3>))
+WO_SIMD_BENCH (wright_omega_simd_iter1_poly5, (math_approx::wright_omega<1, 5>))
+WO_SIMD_BENCH (wright_omega_simd_iter1_poly3, (math_approx::wright_omega<1, 3>))
+WO_SIMD_BENCH (wright_omega_simd_iter0_poly5, (math_approx::wright_omega<0, 5>))
+WO_SIMD_BENCH (wright_omega_simd_iter0_poly3, (math_approx::wright_omega<0, 3>))
+
+BENCHMARK_MAIN();
diff --git a/tools/plotter/plotter.cpp b/tools/plotter/plotter.cpp
index 85d61ee..9d25d44 100644
--- a/tools/plotter/plotter.cpp
+++ b/tools/plotter/plotter.cpp
@@ -8,6 +8,7 @@ namespace plt = matplotlibcpp;
 
 #include "../../test/src/test_helpers.hpp"
 #include "../../test/src/reference/toms917.hpp"
+#include "../../test/src/reference/dangelo_omega.hpp"
 #include <math_approx/math_approx.hpp>
 
 template <typename F_Approx>
@@ -66,8 +67,12 @@ int main()
     const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol);
     const auto y_exact = test_helpers::compute_all<float> (all_floats, FLOAT_FUNC(toms917::wrightomega));
 
-    plot_ulp_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega<3, 3, 4>)), "W-O 3-3-4");
-    plot_ulp_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega<3, 3, 5>)), "W-O 3-3-5");
+    // plot_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega<0>)), "W-O 0-3");
+    // plot_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega<0, 5>)), "W-O 0-5");
+    plot_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega<1>)), "W-O 1-3");
+    // plot_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega_dangelo<0>)), "W-O D'Angelo 0");
+    plot_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega_dangelo<1>)), "W-O D'Angelo 1");
+    plot_error (all_floats, y_exact, FLOAT_FUNC((math_approx::wright_omega_dangelo<2>)), "W-O D'Angelo 2");
 
     plt::legend ({ { "loc", "upper right" } });
     plt::xlim (range.first, range.second);