1 // SPDX-License-Identifier: BSD-3-Clause
2 //
3 // Copyright(c) 2022 Intel Corporation. All rights reserved.
4 //
5 // Author: Andrula Song <andrula.song@intel.com>
6
7 #include <stdint.h>
8 #include <stddef.h>
9 #include <errno.h>
10 #include <sof/audio/format.h>
11 #include <sof/math/iir_df1.h>
12 #include <user/eq.h>
13
14 #if IIR_DF1_HIFI3
15
16 /*
17 * Direct form I second order filter block (biquad)
18 *
19 * +----+ +---+ +-------+
20 * X(z) ---o--->| b0 |---> + --+-------------o--->| g |--->| shift |---> Y(z)
21 * | +----+ ^ ^ | +---+ +-------+
22 * | | | |
23 * +------+ | | +------+
24 * | z^-1 | | | | z^-1 |
25 * +------+ | | +------+
26 * | +----+ | | +----+ |
27 * o--->| b1 |---> + + <---| a1 |---o
28 * | +----+ ^ ^ +----+ |
29 * | | | |
30 * +------+ | | +------+
31 * | z^-1 | | | | z^-1 |
32 * +------+ | | +------+
33 * | ^ ^ |
34 * | +----+ | | +----+ |
35 * o--->| b2 |---> + +<--- | a2 |---o
36 * +----+ +----+
37 *
38 * y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
39 * the a1 a2 has been negated during calculation
40 */
41
42 /* Series DF1 IIR */
43
44 /* 32 bit data, 32 bit coefficients and 32 bit state variables */
45
iir_df1(struct iir_state_df1 * iir,int32_t x)46 int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
47 {
48 ae_int64 acc;
49 ae_valign coef_align = AE_ZALIGN64();
50 ae_int32x2 coef_a2a1;
51 ae_int32x2 coef_b2b1;
52 ae_int32x2 coef_b0;
53 ae_int32x2 gain;
54 ae_int32x2 shift;
55 ae_int32x2 delay_y2y1;
56 ae_int32x2 delay_x2x1;
57 ae_int32 in;
58 ae_int32 tmp;
59 ae_int32x2 *coefp;
60 ae_int32x2 *delayp;
61 ae_int32 out = 0;
62 int32_t *delay_update;
63 int i;
64 int j;
65 int nseries = iir->biquads_in_series;
66
67 /* Bypass is set with number of biquads set to zero. */
68 if (!iir->biquads)
69 return x;
70
71 /* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
72 coefp = (ae_int32x2 *)&iir->coef[0];
73 delayp = (ae_int32x2 *)&iir->delay[0];
74 for (j = 0; j < iir->biquads; j += nseries) {
75 /* the first for loop is for parallel EQs, and they have the same input */
76 in = x;
77 for (i = 0; i < nseries; i++) {
78 /* Compute output: Delay is kept Q17.47 while multiply
79 * instruction gives Q2.30 x Q1.31 -> Q18.46. Need to
80 * shift delay line values right by one for same align
81 * as MAC. Store to delay line need to be shifted left
82 * by one similarly.
83 */
84 coef_align = AE_LA64_PP(coefp);
85 AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
86 AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
87 AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
88 AE_L32_IP(shift, (ae_int32 *)coefp, 4);
89 AE_L32_IP(gain, (ae_int32 *)coefp, 4);
90
91 AE_L32X2_IP(delay_y2y1, delayp, 8);
92 AE_L32X2_IP(delay_x2x1, delayp, 8);
93
94 acc = AE_MULF32R_HH(coef_a2a1, delay_y2y1); /* a2 * y(n - 2) */
95 AE_MULAF32R_LL(acc, coef_a2a1, delay_y2y1); /* a1 * y(n - 1) */
96 AE_MULAF32R_HH(acc, coef_b2b1, delay_x2x1); /* b2 * x(n - 2) */
97 AE_MULAF32R_LL(acc, coef_b2b1, delay_x2x1); /* b1 * x(n - 1) */
98 AE_MULAF32R_HH(acc, coef_b0, in); /* b0 * x */
99 acc = AE_SLAI64S(acc, 1); /* Convert to Q17.47 */
100 tmp = AE_ROUND32F48SSYM(acc); /* Round to Q1.31 */
101
102 /* update the state value */
103 delay_update = (int32_t *)delayp - 4;
104 delay_update[0] = delay_update[1];
105 delay_update[1] = tmp;
106 delay_update[2] = delay_update[3];
107 delay_update[3] = in;
108
109 /* Apply gain Q18.14 x Q1.31 -> Q34.30 */
110 acc = AE_MULF32R_HH(gain, tmp); /* Gain */
111 acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */
112
113 /* Apply biquad output shift right parameter and then
114 * round and saturate to 32 bits Q1.31.
115 */
116 acc = AE_SRAA64(acc, shift);
117 in = AE_ROUND32F48SSYM(acc);
118 }
119 /* Output of previous section is in variable in */
120 out = AE_F32_ADDS_F32(out, in);
121 }
122 return out;
123 }
124
125 #endif
126