From d8944c1cdf410f4d7d7bb6b31244eddb806adbc3 Mon Sep 17 00:00:00 2001
From: shastry <malladi.sastry@intel.com>
Date: Mon, 4 Dec 2023 22:56:34 +0530
Subject: [PATCH] DRC: math: Use HiFi intrinsic to convert a C macro to a
 function for fast performance.

Using HiFi intrinsics, convert a C macro to a function for higher
performance. Q_SHIFT_RND, Q_SHIFT_LEFT, Q_MULTSR_32X32, and
Q_CONVERT_FLOAT are implemented.

Signed-off-by: shastry <malladi.sastry@intel.com>
---
 src/math/exp_fcn_hifi.c | 84 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 76 insertions(+), 8 deletions(-)

diff --git a/src/math/exp_fcn_hifi.c b/src/math/exp_fcn_hifi.c
index 814ab2675151..a14133f477a5 100644
--- a/src/math/exp_fcn_hifi.c
+++ b/src/math/exp_fcn_hifi.c
@@ -27,6 +27,9 @@
 #include <xtensa/tie/xt_hifi3.h>
 #endif
 
+#include <xtensa/tie/xt_hifi2.h>
+#include <xtensa/tie/xt_FP.h>
+
 #define SOFM_CONVERG_ERROR 28823037624320LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */
 
 /*
@@ -277,10 +280,72 @@ int32_t sofm_exp_int32(int32_t x)
 	return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
 }
 
-#define ONE_Q20         Q_CONVERT_FLOAT(1.0, 20)	  /* Use Q12.20 */
-#define TWO_Q27         Q_CONVERT_FLOAT(2.0, 27)	  /* Use Q5.27 */
-#define MINUS_TWO_Q27   Q_CONVERT_FLOAT(-2.0, 27)	  /* Use Q5.27 */
-#define q_mult(a, b, qa, qb, qy) ((int32_t)Q_MULTSR_32X32((int64_t)(a), b, qa, qb, qy))
+/* Xtensa C macros to function sing HiFi intrinsics*/
+/* Convert a float number to fractional Qnx.ny format. Note that there is no
+ * check for nx+ny number of bits to fit the word length of int. The parameter
+ * qy must be 31 or less.
+ */
+static inline int exp_hifi_q_convert_float(float a, int b)
+{
+	int val;
+	float x, y;
+
+	val = AE_SLAA32(1, b);
+	x = XT_FLOAT_S(val, 0);
+	y = XT_MUL_S(a, x);
+
+	return XT_TRUNC_S(y, 0);
+}
+
+/* Fractional multiplication with shift and round
+ * Note that the parameters px and py must be cast to (int64_t) if other type.
+ */
+static inline int exp_hifi_q_multsr_32x32(int a, int b, int c, int d, int e)
+{
+	ae_int64 res;
+	int xt_o;
+	int shift;
+
+	res = AE_MUL32_LL(a, b);
+	shift = XT_SUB(XT_ADD(c, d), XT_ADD(e, 1));
+	res = AE_SRAA64(res, shift);
+	res = AE_ADD64(res, 1);
+	res = AE_SRAI64(res, 1);
+	xt_o = AE_MOVINT32_FROMINT64(res);
+
+	return xt_o;
+}
+
+/* A macro for Q-shifts */
+static inline int exp_hifi_q_shift_rnd(int a, int b, int c)
+{
+	ae_int32 res;
+	int shift;
+
+	shift = XT_SUB(b, XT_ADD(c, 1));
+	res = AE_SRAA32(a, shift);
+	res = AE_ADD32(res, 1);
+	res = AE_SRAI32(res, 1);
+
+	return res;
+}
+
+/* Alternative version since compiler does not allow (x >> -1) */
+static inline int exp_hifi_q_shift_left(int a, int b, int c)
+{
+	ae_int32 xt_o;
+	int shift;
+
+	shift = XT_SUB(c, b);
+	xt_o = AE_SLAA32(a, shift);
+
+	return xt_o;
+}
+
+#define ONE_Q20         exp_hifi_q_convert_float(1.0, 20)	  /* Use Q12.20 */
+#define TWO_Q27         exp_hifi_q_convert_float(2.0, 27)	  /* Use Q5.27 */
+#define MINUS_TWO_Q27   exp_hifi_q_convert_float(-2.0, 27)	  /* Use Q5.27 */
+#define q_mult(a, b, qa, qb, qy) ((int32_t)exp_hifi_q_multsr_32x32((int64_t)(a), b, qa, qb, qy))
 /* Fixed point exponent function for approximate range -11.5 .. 7.6
  * that corresponds to decibels range -100 .. +66 dB.
  *
@@ -302,10 +367,10 @@ int32_t exp_fixed(int32_t x)
 	int i;
 	int n = 0;
 
-	if (x < Q_CONVERT_FLOAT(-11.5, 27))
+	if (x < exp_hifi_q_convert_float(-11.5, 27))
 		return 0;
 
-	if (x > Q_CONVERT_FLOAT(7.6245, 27))
+	if (x > exp_hifi_q_convert_float(7.6245, 27))
 		return INT32_MAX;
 
 	/* x is Q5.27 */
@@ -318,10 +383,13 @@ int32_t exp_fixed(int32_t x)
 	/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
 	 * sofm_exp_int32() output is Q9.23, while y0 is Q12.20
 	 */
-	y0 = Q_SHIFT_RND(sofm_exp_int32(Q_SHIFT_LEFT(xs, 27, 28)), 23, 20);
+	y0 = exp_hifi_q_shift_rnd(
+							  sofm_exp_int32(
+							  exp_hifi_q_shift_left(xs, 27, 28)),
+							  23, 20);
 	y = ONE_Q20;
 	for (i = 0; i < (1 << n); i++)
-		y = (int32_t)Q_MULTSR_32X32((int64_t)y, y0, 20, 20, 20);
+		y = (int32_t)exp_hifi_q_multsr_32x32((int64_t)y, y0, 20, 20, 20);
 
 	return y;
 }