From d8944c1cdf410f4d7d7bb6b31244eddb806adbc3 Mon Sep 17 00:00:00 2001 From: shastry Date: Mon, 4 Dec 2023 22:56:34 +0530 Subject: [PATCH] DRC: math: Use HiFi intrinsic to convert a C macro to a function for fast performance. Using HiFi intrinsics, convert a C macro to a function for higher performance. Q_SHIFT_RND, Q_SHIFT_LEFT, Q_MULTSR_32X32, and Q_CONVERT_FLOAT are implemented. Signed-off-by: shastry --- src/math/exp_fcn_hifi.c | 84 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 8 deletions(-) diff --git a/src/math/exp_fcn_hifi.c b/src/math/exp_fcn_hifi.c index 814ab2675151..a14133f477a5 100644 --- a/src/math/exp_fcn_hifi.c +++ b/src/math/exp_fcn_hifi.c @@ -27,6 +27,9 @@ #include #endif +#include +#include + #define SOFM_CONVERG_ERROR 28823037624320LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */ /* @@ -277,10 +280,72 @@ int32_t sofm_exp_int32(int32_t x) return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts)); } -#define ONE_Q20 Q_CONVERT_FLOAT(1.0, 20) /* Use Q12.20 */ -#define TWO_Q27 Q_CONVERT_FLOAT(2.0, 27) /* Use Q5.27 */ -#define MINUS_TWO_Q27 Q_CONVERT_FLOAT(-2.0, 27) /* Use Q5.27 */ -#define q_mult(a, b, qa, qb, qy) ((int32_t)Q_MULTSR_32X32((int64_t)(a), b, qa, qb, qy)) +/* Xtensa C macros to function sing HiFi intrinsics*/ +/* Convert a float number to fractional Qnx.ny format. Note that there is no + * check for nx+ny number of bits to fit the word length of int. The parameter + * qy must be 31 or less. + */ +static inline int exp_hifi_q_convert_float(float a, int b) +{ + int val; + float x, y; + + val = AE_SLAA32(1, b); + x = XT_FLOAT_S(val, 0); + y = XT_MUL_S(a, x); + + return XT_TRUNC_S(y, 0); +} + +/* Fractional multiplication with shift and round + * Note that the parameters px and py must be cast to (int64_t) if other type. + */ +static inline int exp_hifi_q_multsr_32x32(int a, int b, int c, int d, int e) +{ + ae_int64 res; + int xt_o; + int shift; + + res = AE_MUL32_LL(a, b); + shift = XT_SUB(XT_ADD(c, d), XT_ADD(e, 1)); + res = AE_SRAA64(res, shift); + res = AE_ADD64(res, 1); + res = AE_SRAI64(res, 1); + xt_o = AE_MOVINT32_FROMINT64(res); + + return xt_o; +} + +/* A macro for Q-shifts */ +static inline int exp_hifi_q_shift_rnd(int a, int b, int c) +{ + ae_int32 res; + int shift; + + shift = XT_SUB(b, XT_ADD(c, 1)); + res = AE_SRAA32(a, shift); + res = AE_ADD32(res, 1); + res = AE_SRAI32(res, 1); + + return res; +} + +/* Alternative version since compiler does not allow (x >> -1) */ +static inline int exp_hifi_q_shift_left(int a, int b, int c) +{ + ae_int32 xt_o; + int shift; + + shift = XT_SUB(c, b); + xt_o = AE_SLAA32(a, shift); + + return xt_o; +} + +#define ONE_Q20 exp_hifi_q_convert_float(1.0, 20) /* Use Q12.20 */ +#define TWO_Q27 exp_hifi_q_convert_float(2.0, 27) /* Use Q5.27 */ +#define MINUS_TWO_Q27 exp_hifi_q_convert_float(-2.0, 27) /* Use Q5.27 */ +#define q_mult(a, b, qa, qb, qy) ((int32_t)exp_hifi_q_multsr_32x32((int64_t)(a), b, qa, qb, qy)) /* Fixed point exponent function for approximate range -11.5 .. 7.6 * that corresponds to decibels range -100 .. +66 dB. * @@ -302,10 +367,10 @@ int32_t exp_fixed(int32_t x) int i; int n = 0; - if (x < Q_CONVERT_FLOAT(-11.5, 27)) + if (x < exp_hifi_q_convert_float(-11.5, 27)) return 0; - if (x > Q_CONVERT_FLOAT(7.6245, 27)) + if (x > exp_hifi_q_convert_float(7.6245, 27)) return INT32_MAX; /* x is Q5.27 */ @@ -318,10 +383,13 @@ int32_t exp_fixed(int32_t x) /* sofm_exp_int32() input is Q4.28, while x1 is Q5.27 * sofm_exp_int32() output is Q9.23, while y0 is Q12.20 */ - y0 = Q_SHIFT_RND(sofm_exp_int32(Q_SHIFT_LEFT(xs, 27, 28)), 23, 20); + y0 = exp_hifi_q_shift_rnd( + sofm_exp_int32( + exp_hifi_q_shift_left(xs, 27, 28)), + 23, 20); y = ONE_Q20; for (i = 0; i < (1 << n); i++) - y = (int32_t)Q_MULTSR_32X32((int64_t)y, y0, 20, 20, 20); + y = (int32_t)exp_hifi_q_multsr_32x32((int64_t)y, y0, 20, 20, 20); return y; }