1 /*
2  * Copyright 2018-2022 NXP
3  * All rights reserved.
4  *
5  * SPDX-License-Identifier: BSD-3-Clause
6  */
7 
8 #ifndef _FSL_POWERQUAD_H_
9 #define _FSL_POWERQUAD_H_
10 
11 #if defined(__CC_ARM)
12 
13 #elif defined(__ICCARM__)
14 #include <intrinsics.h>
15 #elif defined(__GNUC__)
16 #include <arm_acle.h>
17 #endif /* defined(__CC_ARM) */
18 
19 #include "fsl_common.h"
20 #include "fsl_powerquad_data.h"
21 
22 /*!
23  * @addtogroup powerquad
24  * @{
25  */
26 
27 /*******************************************************************************
28  * Definitions
29  ******************************************************************************/
30 
31 /*! @name Driver version */
32 /*@{*/
33 #define FSL_POWERQUAD_DRIVER_VERSION (MAKE_VERSION(2, 1, 0)) /*!< Version. */
34 /*@}*/
35 
36 /* For backword compatibility. */
37 #define PQ_VectorBiqaudDf2F32             PQ_VectorBiquadDf2F32
38 #define PQ_VectorBiqaudDf2Fixed32         PQ_VectorBiquadDf2Fixed32
39 #define PQ_VectorBiqaudDf2Fixed16         PQ_VectorBiquadDf2Fixed16
40 #define PQ_VectorBiqaudCascadeDf2F32      PQ_VectorBiquadCascadeDf2F32
41 #define PQ_VectorBiqaudCascadeDf2Fixed32  PQ_VectorBiquadCascadeDf2Fixed32
42 #define PQ_VectorBiqaudCascadeDf2Fixed16  PQ_VectorBiquadCascadeDf2Fixed16
43 #define PQ_Vector8BiqaudDf2CascadeF32     PQ_Vector8BiquadDf2CascadeF32
44 #define PQ_Vector8BiqaudDf2CascadeFixed32 PQ_Vector8BiquadDf2CascadeFixed32
45 #define PQ_Vector8BiqaudDf2CascadeFixed16 PQ_Vector8BiquadDf2CascadeFixed16
46 
47 #define PQ_FLOAT32 0U
48 #define PQ_FIXEDPT 1U
49 
50 #define CP_PQ     0U
51 #define CP_MTX    1U
52 #define CP_FFT    2U
53 #define CP_FIR    3U
54 #define CP_CORDIC 5U
55 
56 #define PQ_TRANS  0U
57 #define PQ_TRIG   1U
58 #define PQ_BIQUAD 2U
59 
60 #define PQ_TRANS_FIXED  4U
61 #define PQ_TRIG_FIXED   5U
62 #define PQ_BIQUAD_FIXED 6U
63 
64 #define PQ_INV     0U
65 #define PQ_LN      1U
66 #define PQ_SQRT    2U
67 #define PQ_INVSQRT 3U
68 #define PQ_ETOX    4U
69 #define PQ_ETONX   5U
70 #define PQ_DIV     6U
71 
72 #define PQ_SIN 0U
73 #define PQ_COS 1U
74 
75 #define PQ_BIQ0_CALC 1U
76 #define PQ_BIQ1_CALC 1U
77 
78 #define PQ_COMP0_ONLY (0U << 1U)
79 #define PQ_COMP1_ONLY (1U << 1U)
80 
81 #define CORDIC_ITER(x) ((uint32_t)(x) << 2U)
82 #define CORDIC_MIU(x)  ((uint32_t)(x) << 1U)
83 #define CORDIC_T(x)    ((uint32_t)(x) << 0U)
84 #define CORDIC_ARCTAN  CORDIC_T(1U) | CORDIC_MIU(0U)
85 #define CORDIC_ARCTANH CORDIC_T(1U) | CORDIC_MIU(1U)
86 
87 #define INST_BUSY 0x80000000U
88 
89 #define PQ_ERRSTAT_OVERFLOW      0U
90 #define PQ_ERRSTAT_NAN           1U
91 #define PQ_ERRSTAT_FIXEDOVERFLOW 2U
92 #define PQ_ERRSTAT_UNDERFLOW     3U
93 
94 #define PQ_TRANS_CFFT 0U
95 #define PQ_TRANS_IFFT 1U
96 #define PQ_TRANS_CDCT 2U
97 #define PQ_TRANS_IDCT 3U
98 #define PQ_TRANS_RFFT 4U
99 #define PQ_TRANS_RDCT 6U
100 
101 #define PQ_MTX_SCALE 1U
102 #define PQ_MTX_MULT  2U
103 #define PQ_MTX_ADD   3U
104 #define PQ_MTX_INV   4U
105 #define PQ_MTX_PROD  5U
106 #define PQ_MTX_SUB   7U
107 #define PQ_VEC_DOTP  9U
108 #define PQ_MTX_TRAN  10U
109 
110 /* FIR engine operation type */
111 #define PQ_FIR_FIR         0U
112 #define PQ_FIR_CONVOLUTION 1U
113 #define PQ_FIR_CORRELATION 2U
114 #define PQ_FIR_INCREMENTAL 4U
115 
116 #define _pq_ln0(x)      __arm_mcr(CP_PQ, PQ_LN, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRANS)
117 #define _pq_inv0(x)     __arm_mcr(CP_PQ, PQ_INV, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRANS)
118 #define _pq_sqrt0(x)    __arm_mcr(CP_PQ, PQ_SQRT, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRANS)
119 #define _pq_invsqrt0(x) __arm_mcr(CP_PQ, PQ_INVSQRT, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRANS)
120 #define _pq_etox0(x)    __arm_mcr(CP_PQ, PQ_ETOX, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRANS)
121 #define _pq_etonx0(x)   __arm_mcr(CP_PQ, PQ_ETONX, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRANS)
122 #define _pq_sin0(x)     __arm_mcr(CP_PQ, PQ_SIN, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRIG)
123 #define _pq_cos0(x)     __arm_mcr(CP_PQ, PQ_COS, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_TRIG)
124 #define _pq_biquad0(x)  __arm_mcr(CP_PQ, PQ_BIQ0_CALC, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP0_ONLY, 0, PQ_BIQUAD)
125 
126 #define _pq_ln_fx0(x)      __arm_mcr(CP_PQ, PQ_LN, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRANS_FIXED)
127 #define _pq_inv_fx0(x)     __arm_mcr(CP_PQ, PQ_INV, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRANS_FIXED)
128 #define _pq_sqrt_fx0(x)    __arm_mcr(CP_PQ, PQ_SQRT, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRANS_FIXED)
129 #define _pq_invsqrt_fx0(x) __arm_mcr(CP_PQ, PQ_INVSQRT, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRANS_FIXED)
130 #define _pq_etox_fx0(x)    __arm_mcr(CP_PQ, PQ_ETOX, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRANS_FIXED)
131 #define _pq_etonx_fx0(x)   __arm_mcr(CP_PQ, PQ_ETONX, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRANS_FIXED)
132 #define _pq_sin_fx0(x)     __arm_mcr(CP_PQ, PQ_SIN, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRIG_FIXED)
133 #define _pq_cos_fx0(x)     __arm_mcr(CP_PQ, PQ_COS, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_TRIG_FIXED)
134 #define _pq_biquad0_fx(x)  __arm_mcr(CP_PQ, PQ_BIQ0_CALC, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP0_ONLY, 0, PQ_BIQUAD_FIXED)
135 
136 #define _pq_div0(x) __arm_mcrr(CP_PQ, PQ_FLOAT32 | PQ_COMP0_ONLY, (uint64_t)(x), PQ_DIV)
137 #define _pq_div1(x) __arm_mcrr(CP_PQ, PQ_FLOAT32 | PQ_COMP1_ONLY, (uint64_t)(x), PQ_DIV)
138 
139 #define _pq_ln1(x)      __arm_mcr(CP_PQ, PQ_LN, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRANS)
140 #define _pq_inv1(x)     __arm_mcr(CP_PQ, PQ_INV, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRANS)
141 #define _pq_sqrt1(x)    __arm_mcr(CP_PQ, PQ_SQRT, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRANS)
142 #define _pq_invsqrt1(x) __arm_mcr(CP_PQ, PQ_INVSQRT, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRANS)
143 #define _pq_etox1(x)    __arm_mcr(CP_PQ, PQ_ETOX, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRANS)
144 #define _pq_etonx1(x)   __arm_mcr(CP_PQ, PQ_ETONX, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRANS)
145 #define _pq_sin1(x)     __arm_mcr(CP_PQ, PQ_SIN, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRIG)
146 #define _pq_cos1(x)     __arm_mcr(CP_PQ, PQ_COS, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_TRIG)
147 #define _pq_biquad1(x)  __arm_mcr(CP_PQ, PQ_BIQ1_CALC, (uint32_t)(x), PQ_FLOAT32 | PQ_COMP1_ONLY, 0, PQ_BIQUAD)
148 
149 #define _pq_ln_fx1(x)      __arm_mcr(CP_PQ, PQ_LN, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRANS_FIXED)
150 #define _pq_inv_fx1(x)     __arm_mcr(CP_PQ, PQ_INV, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRANS_FIXED)
151 #define _pq_sqrt_fx1(x)    __arm_mcr(CP_PQ, PQ_SQRT, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRANS_FIXED)
152 #define _pq_invsqrt_fx1(x) __arm_mcr(CP_PQ, PQ_INVSQRT, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRANS_FIXED)
153 #define _pq_etox_fx1(x)    __arm_mcr(CP_PQ, PQ_ETOX, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRANS_FIXED)
154 #define _pq_etonx_fx1(x)   __arm_mcr(CP_PQ, PQ_ETONX, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRANS_FIXED)
155 #define _pq_sin_fx1(x)     __arm_mcr(CP_PQ, PQ_SIN, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRIG_FIXED)
156 #define _pq_cos_fx1(x)     __arm_mcr(CP_PQ, PQ_COS, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_TRIG_FIXED)
157 #define _pq_biquad1_fx(x)  __arm_mcr(CP_PQ, PQ_BIQ1_CALC, (uint32_t)(x), PQ_FIXEDPT | PQ_COMP1_ONLY, 0, PQ_BIQUAD_FIXED)
158 
159 #define _pq_readMult0()    __arm_mrc(CP_PQ, 0, PQ_FLOAT32 | PQ_COMP0_ONLY, 0, 0)
160 #define _pq_readAdd0()     __arm_mrc(CP_PQ, 1, PQ_FLOAT32 | PQ_COMP0_ONLY, 0, 0)
161 #define _pq_readMult1()    __arm_mrc(CP_PQ, 0, PQ_FLOAT32 | PQ_COMP1_ONLY, 0, 0)
162 #define _pq_readAdd1()     __arm_mrc(CP_PQ, 1, PQ_FLOAT32 | PQ_COMP1_ONLY, 0, 0)
163 #define _pq_readMult0_fx() __arm_mrc(CP_PQ, 0, PQ_FIXEDPT | PQ_COMP0_ONLY, 0, 0)
164 #define _pq_readAdd0_fx()  __arm_mrc(CP_PQ, 1, PQ_FIXEDPT | PQ_COMP0_ONLY, 0, 0)
165 #define _pq_readMult1_fx() __arm_mrc(CP_PQ, 0, PQ_FIXEDPT | PQ_COMP1_ONLY, 0, 0)
166 #define _pq_readAdd1_fx()  __arm_mrc(CP_PQ, 1, PQ_FIXEDPT | PQ_COMP1_ONLY, 0, 0)
167 
168 /*! Parameter used for vector ln(x) */
169 #define PQ_LN_INF PQ_LN, 1, PQ_TRANS
170 /*! Parameter used for vector 1/x */
171 #define PQ_INV_INF PQ_INV, 0, PQ_TRANS
172 /*! Parameter used for vector sqrt(x) */
173 #define PQ_SQRT_INF PQ_SQRT, 0, PQ_TRANS
174 /*! Parameter used for vector 1/sqrt(x) */
175 #define PQ_ISQRT_INF PQ_INVSQRT, 0, PQ_TRANS
176 /*! Parameter used for vector e^x */
177 #define PQ_ETOX_INF PQ_ETOX, 0, PQ_TRANS
178 /*! Parameter used for vector e^(-x) */
179 #define PQ_ETONX_INF PQ_ETONX, 0, PQ_TRANS
180 /*! Parameter used for vector sin(x) */
181 #define PQ_SIN_INF PQ_SIN, 1, PQ_TRIG
182 /*! Parameter used for vector cos(x) */
183 #define PQ_COS_INF PQ_COS, 1, PQ_TRIG
184 
185 /*
186  * Workaround used in vector functions:
187  *
188  * 1. In floating sin/cos case, there must be at least 5 core clock cycles
189  *    between MCR and following MRRC
190  * 2. In fixed sin/cos case, there must be one NOP between two MCR
191  */
192 
193 /*
194  * Register assignment for the vector calculation assembly.
195  * r0: pSrc, r1: pDest, r2-r7: Data
196  */
197 
198 #define PQ_RUN_OPCODE_R3_R2(BATCH_OPCODE, BATCH_MACHINE)                             \
199     __asm volatile(                                                                  \
200         "    MCR  p0,%[opcode],r3,c2,c0,%[machine] \n"                               \
201         "    MCR  p0,%[opcode],r2,c0,c0,%[machine] \n" ::[opcode] "i"(BATCH_OPCODE), \
202         [machine] "i"(BATCH_MACHINE))
203 
204 #define PQ_RUN_OPCODE_R5_R4(BATCH_OPCODE, BATCH_MACHINE)                             \
205     __asm volatile(                                                                  \
206         "    MCR  p0,%[opcode],r5,c2,c0,%[machine] \n"                               \
207         "    MCR  p0,%[opcode],r4,c0,c0,%[machine] \n" ::[opcode] "i"(BATCH_OPCODE), \
208         [machine] "i"(BATCH_MACHINE))
209 
210 #define PQ_RUN_OPCODE_R7_R6(BATCH_OPCODE, BATCH_MACHINE)                             \
211     __asm volatile(                                                                  \
212         "    MCR  p0,%[opcode],r7,c2,c0,%[machine] \n"                               \
213         "    MCR  p0,%[opcode],r6,c0,c0,%[machine] \n" ::[opcode] "i"(BATCH_OPCODE), \
214         [machine] "i"(BATCH_MACHINE))
215 
216 #define PQ_Vector8_FP(middle, last, BATCH_OPCODE, DOUBLE_READ_ADDERS, BATCH_MACHINE) \
217     PQ_RUN_OPCODE_R3_R2(BATCH_OPCODE, BATCH_MACHINE);                                \
218     if (middle)                                                                      \
219     {                                                                                \
220         __asm volatile("STRD r4,r5,[r1],#8"); /* store fourth two results */         \
221     }                                                                                \
222     __asm volatile("LDMIA  r0!,{r4-r5}"); /* load next 2 datas */                    \
223     __asm volatile("NOP");                                                           \
224     __asm volatile("NOP");                                                           \
225     if (DOUBLE_READ_ADDERS)                                                          \
226     {                                                                                \
227         __asm volatile("MRRC p0,#0,r2,r3,c1");                                       \
228     }                                                                                \
229     else                                                                             \
230     {                                                                                \
231         __asm volatile("MRRC p0,#0,r2,r3,c0");                                       \
232     }                                                                                \
233     PQ_RUN_OPCODE_R5_R4(BATCH_OPCODE, BATCH_MACHINE);                                \
234     __asm volatile("STRD r2,r3,[r1],#8"); /* store first two results */              \
235     __asm volatile("LDMIA  r0!,{r6-r7}"); /* load next 2 datas */                    \
236     __asm volatile("NOP");                                                           \
237     __asm volatile("NOP");                                                           \
238     if (DOUBLE_READ_ADDERS)                                                          \
239     {                                                                                \
240         __asm volatile("MRRC p0,#0,r4,r5,c1");                                       \
241     }                                                                                \
242     else                                                                             \
243     {                                                                                \
244         __asm volatile("MRRC p0,#0,r4,r5,c0");                                       \
245     }                                                                                \
246     PQ_RUN_OPCODE_R7_R6(BATCH_OPCODE, BATCH_MACHINE);                                \
247     __asm volatile("STRD r4,r5,[r1],#8"); /* store second two results */             \
248     __asm volatile("LDRD r4,r5,[r0],#8"); /* load last 2 of the 8 */                 \
249     __asm volatile("NOP");                                                           \
250     __asm volatile("NOP");                                                           \
251     if (DOUBLE_READ_ADDERS)                                                          \
252     {                                                                                \
253         __asm volatile("MRRC p0,#0,r6,r7,c1");                                       \
254     }                                                                                \
255     else                                                                             \
256     {                                                                                \
257         __asm volatile("MRRC p0,#0,r6,r7,c0");                                       \
258     }                                                                                \
259     PQ_RUN_OPCODE_R5_R4(BATCH_OPCODE, BATCH_MACHINE);                                \
260     __asm volatile("STRD r6,r7,[r1],#8"); /* store third two results */              \
261     if (!last)                                                                       \
262     {                                                                                \
263         __asm volatile("LDRD r2,r3,[r0],#8"); /* load first two of next 8 */         \
264     }                                                                                \
265     else                                                                             \
266     {                                                                                \
267         __asm volatile("NOP");                                                       \
268         __asm volatile("NOP");                                                       \
269         __asm volatile("NOP");                                                       \
270     }                                                                                \
271     __asm volatile("NOP");                                                           \
272     __asm volatile("NOP");                                                           \
273     if (DOUBLE_READ_ADDERS)                                                          \
274     {                                                                                \
275         __asm volatile("MRRC p0,#0,r4,r5,c1");                                       \
276     }                                                                                \
277     else                                                                             \
278     {                                                                                \
279         __asm volatile("MRRC p0,#0,r4,r5,c0");                                       \
280     }                                                                                \
281     if (last)                                                                        \
282     {                                                                                \
283         __asm volatile("STRD r4,r5,[r1],#8"); /* store fourth two results */         \
284     }
285 
286 #define PQ_RUN_OPCODE_R2_R3(BATCH_OPCODE, BATCH_MACHINE)                             \
287     __asm volatile(                                                                  \
288         "    MCR  p0,%[opcode],r2,c1,c0,%[machine] \n"                               \
289         "    NOP                                   \n"                               \
290         "    MCR  p0,%[opcode],r3,c3,c0,%[machine] \n" ::[opcode] "i"(BATCH_OPCODE), \
291         [machine] "i"(BATCH_MACHINE))
292 
293 #define PQ_RUN_OPCODE_R4_R5(BATCH_OPCODE, BATCH_MACHINE)                             \
294     __asm volatile(                                                                  \
295         "    MCR  p0,%[opcode],r4,c1,c0,%[machine] \n"                               \
296         "    NOP                                   \n"                               \
297         "    MCR  p0,%[opcode],r5,c3,c0,%[machine] \n" ::[opcode] "i"(BATCH_OPCODE), \
298         [machine] "i"(BATCH_MACHINE))
299 
300 #define PQ_RUN_OPCODE_R6_R7(BATCH_OPCODE, BATCH_MACHINE)                             \
301     __asm volatile(                                                                  \
302         "    MCR  p0,%[opcode],r6,c1,c0,%[machine] \n"                               \
303         "    NOP                                   \n"                               \
304         "    MCR  p0,%[opcode],r7,c3,c0,%[machine] \n" ::[opcode] "i"(BATCH_OPCODE), \
305         [machine] "i"(BATCH_MACHINE))
306 
307 #define PQ_Vector8_FX(middle, last, BATCH_OPCODE, DOUBLE_READ_ADDERS, BATCH_MACHINE) \
308     PQ_RUN_OPCODE_R2_R3(BATCH_OPCODE, BATCH_MACHINE);                                \
309     if (middle)                                                                      \
310     {                                                                                \
311         __asm volatile("STRD r4,r5,[r1],#8"); /* store fourth two results */         \
312     }                                                                                \
313     __asm volatile("LDMIA  r0!,{r4-r7}"); /* load next 4 datas */                    \
314     if (DOUBLE_READ_ADDERS)                                                          \
315     {                                                                                \
316         __asm volatile("MRC  p0,#0x1,r2,c1,c0,#0");                                  \
317         __asm volatile("MRC  p0,#0x1,r3,c3,c0,#0");                                  \
318     }                                                                                \
319     else                                                                             \
320     {                                                                                \
321         __asm volatile("MRC  p0,#0,r2,c1,c0,#0");                                    \
322         __asm volatile("MRC  p0,#0,r3,c3,c0,#0");                                    \
323     }                                                                                \
324     PQ_RUN_OPCODE_R4_R5(BATCH_OPCODE, BATCH_MACHINE);                                \
325     __asm volatile("STRD r2,r3,[r1],#8"); /* store first two results */              \
326     if (DOUBLE_READ_ADDERS)                                                          \
327     {                                                                                \
328         __asm volatile("MRC  p0,#0x1,r4,c1,c0,#0");                                  \
329         __asm volatile("MRC  p0,#0x1,r5,c3,c0,#0");                                  \
330     }                                                                                \
331     else                                                                             \
332     {                                                                                \
333         __asm volatile("MRC  p0,#0,r4,c1,c0,#0");                                    \
334         __asm volatile("MRC  p0,#0,r5,c3,c0,#0");                                    \
335     }                                                                                \
336     PQ_RUN_OPCODE_R6_R7(BATCH_OPCODE, BATCH_MACHINE);                                \
337     __asm volatile("STRD r4,r5,[r1],#8"); /* store second two results */             \
338     __asm volatile("LDRD r4,r5,[r0],#8"); /* load last 2 of the 8 */                 \
339     if (DOUBLE_READ_ADDERS)                                                          \
340     {                                                                                \
341         __asm volatile("MRC  p0,#0x1,r6,c1,c0,#0");                                  \
342         __asm volatile("MRC  p0,#0x1,r7,c3,c0,#0");                                  \
343     }                                                                                \
344     else                                                                             \
345     {                                                                                \
346         __asm volatile("MRC  p0,#0,r6,c1,c0,#0");                                    \
347         __asm volatile("MRC  p0,#0,r7,c3,c0,#0");                                    \
348     }                                                                                \
349     PQ_RUN_OPCODE_R4_R5(BATCH_OPCODE, BATCH_MACHINE);                                \
350     __asm volatile("STRD r6,r7,[r1],#8"); /* store third two results */              \
351     if (!last)                                                                       \
352         __asm volatile("LDRD r2,r3,[r0],#8"); /* load first two of next 8 */         \
353     if (DOUBLE_READ_ADDERS)                                                          \
354     {                                                                                \
355         __asm volatile("MRC  p0,#0x1,r4,c1,c0,#0");                                  \
356         __asm volatile("MRC  p0,#0x1,r5,c3,c0,#0");                                  \
357     }                                                                                \
358     else                                                                             \
359     {                                                                                \
360         __asm volatile("MRC  p0,#0,r4,c1,c0,#0");                                    \
361         __asm volatile("MRC  p0,#0,r5,c3,c0,#0");                                    \
362     }                                                                                \
363     if (last)                                                                        \
364     {                                                                                \
365         __asm volatile("STRD r4,r5,[r1],#8"); /* store fourth two results */         \
366     }
367 
368 /*!
369  * @brief Start 32-bit data vector calculation.
370  *
371  * Start the vector calculation, the input data could be float, int32_t or Q31.
372  *
373  * @param pSrc  Pointer to the source data.
374  * @param pDst  Pointer to the destination data.
375  */
376 #define PQ_Initiate_Vector_Func(pSrc, pDst)              \
377     __asm volatile(                                      \
378         "MOV r0, %[psrc]         \n"                     \
379         "MOV r1, %[pdst]         \n"                     \
380         "PUSH {r2-r7}            \n"                     \
381         "LDRD r2,r3,[r0],#8      \n" ::[psrc] "r"(pSrc), \
382         [pdst] "r"(pDst)                                 \
383         : "r0", "r1")
384 
385 /*!
386  * @brief End vector calculation.
387  *
388  * This function should be called after vector calculation.
389  */
390 #define PQ_End_Vector_Func() __asm volatile("POP {r2-r7}")
391 
392 /*
393  * Register assignment for the vector calculation assembly.
394  * r0: pSrc, r1: pDest, r2: length, r3: middle, r4-r9: Data, r10:dra
395  */
396 
397 /*!
398  * @brief Start 32-bit data vector calculation.
399  *
400  * Start the vector calculation, the input data could be float, int32_t or Q31.
401  *
402  * @param PSRC  Pointer to the source data.
403  * @param PDST  Pointer to the destination data.
404  * @param LENGTH Number of the data, must be multiple of 8.
405  */
406 #define PQ_StartVector(PSRC, PDST, LENGTH)               \
407     __asm volatile(                                      \
408         "MOV r0, %[psrc]         \n"                     \
409         "MOV r1, %[pdst]         \n"                     \
410         "MOV r2, %[length]       \n"                     \
411         "PUSH {r3-r10}           \n"                     \
412         "MOV r3, #0              \n"                     \
413         "MOV r10, #0             \n"                     \
414         "LDRD r4,r5,[r0],#8      \n" ::[psrc] "r"(PSRC), \
415         [pdst] "r"(PDST), [length] "r"(LENGTH)           \
416         : "r0", "r1", "r2")
417 
418 /*!
419  * @brief Start 16-bit data vector calculation.
420  *
421  * Start the vector calculation, the input data could be int16_t. This function
422  * should be use with @ref PQ_Vector8Fixed16.
423  *
424  * @param PSRC  Pointer to the source data.
425  * @param PDST  Pointer to the destination data.
426  * @param LENGTH Number of the data, must be multiple of 8.
427  */
428 #define PQ_StartVectorFixed16(PSRC, PDST, LENGTH)         \
429     __asm volatile(                                       \
430         "MOV r0, %[psrc]          \n"                     \
431         "MOV r1, %[pdst]          \n"                     \
432         "MOV r2, %[length]        \n"                     \
433         "PUSH {r3-r10}            \n"                     \
434         "MOV r3, #0               \n"                     \
435         "LDRSH r4,[r0],#2         \n"                     \
436         "LDRSH r5,[r0],#2         \n" ::[psrc] "r"(PSRC), \
437         [pdst] "r"(PDST), [length] "r"(LENGTH)            \
438         : "r0", "r1", "r2")
439 
440 /*!
441  * @brief Start Q15-bit data vector calculation.
442  *
443  * Start the vector calculation, the input data could be Q15. This function
444  * should be use with @ref PQ_Vector8Q15. This function is dedicate for
445  * SinQ15/CosQ15 vector calculation. Because PowerQuad only supports Q31 Sin/Cos
446  * fixed function, so the input Q15 data is left shift 16 bits first, after
447  * Q31 calculation, the output data is right shift 16 bits.
448  *
449  * @param PSRC  Pointer to the source data.
450  * @param PDST  Pointer to the destination data.
451  * @param LENGTH Number of the data, must be multiple of 8.
452  */
453 #define PQ_StartVectorQ15(PSRC, PDST, LENGTH)             \
454     __asm volatile(                                       \
455         "MOV r0, %[psrc]          \n"                     \
456         "MOV r1, %[pdst]          \n"                     \
457         "MOV r2, %[length]        \n"                     \
458         "PUSH {r3-r10}            \n"                     \
459         "MOV r3, #0               \n"                     \
460         "LDR r5,[r0],#4           \n"                     \
461         "LSL r4,r5,#16            \n"                     \
462         "BFC r5,#0,#16            \n" ::[psrc] "r"(PSRC), \
463         [pdst] "r"(PDST), [length] "r"(LENGTH)            \
464         : "r0", "r1", "r2")
465 
466 /*!
467  * @brief End vector calculation.
468  *
469  * This function should be called after vector calculation.
470  */
471 #define PQ_EndVector() __asm volatile("POP {r3-r10}            \n")
472 
473 /*!
474  * @brief Float data vector calculation.
475  *
476  * Float data vector calculation, the input data should be float. The parameter
477  * could be PQ_LN_INF, PQ_INV_INF, PQ_SQRT_INF, PQ_ISQRT_INF, PQ_ETOX_INF, PQ_ETONX_INF.
478  * For example, to calculate sqrt of a vector, use like this:
479  * @code
480    #define VECTOR_LEN 8
481    float input[VECTOR_LEN] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
482    float output[VECTOR_LEN];
483 
484    PQ_StartVector(input, output, VECTOR_LEN);
485    PQ_Vector8F32(PQ_SQRT_INF);
486    PQ_EndVector();
487    @endcode
488  *
489  */
490 #define PQ_Vector8F32(BATCH_OPCODE, DOUBLE_READ_ADDERS, BATCH_MACHINE)                     \
491     __asm volatile(                                                                        \
492         "1:                                        \n"                                     \
493         "    MCR  p0,%[opcode],r5,c2,c0,%[machine] \n"                                     \
494         "    MCR  p0,%[opcode],r4,c0,c0,%[machine] \n"                                     \
495         "    CMP  r3, #0                           \n"                                     \
496         "    ITE  NE                               \n"                                     \
497         "    STRDNE r6,r7,[r1],#8                  \n" /* store fourth two results */      \
498         "    MOVEQ r3, #1                          \n" /* middle = 1 */                    \
499         "    LDMIA  r0!,{r6-r9}                    \n" /* load next 4 datas */             \
500         "    MOV  r10,%[dra]                       \n"                                     \
501         "    CMP  r10, #0                          \n"                                     \
502         "    ITE  NE                               \n"                                     \
503         "    MRRCNE  p0,#0,r4,r5,c1                \n"                                     \
504         "    MRRCEQ  p0,#0,r4,r5,c0                \n"                                     \
505         "    MCR  p0,%[opcode],r7,c2,c0,%[machine] \n"                                     \
506         "    MCR  p0,%[opcode],r6,c0,c0,%[machine] \n"                                     \
507         "    STRD r4,r5,[r1],#8                    \n" /* store first two results */       \
508         "    MOV  r10,%[dra]                       \n"                                     \
509         "    CMP  r10, #0                          \n"                                     \
510         "    ITE  NE                               \n"                                     \
511         "    MRRCNE  p0,#0,r6,r7,c1                \n"                                     \
512         "    MRRCEQ  p0,#0,r6,r7,c0                \n"                                     \
513         "    MCR  p0,%[opcode],r9,c2,c0,%[machine] \n"                                     \
514         "    MCR  p0,%[opcode],r8,c0,c0,%[machine] \n"                                     \
515         "    STRD r6,r7,[r1],#8                    \n" /* store second two results */      \
516         "    LDRD r6,r7,[r0],#8                    \n" /* load last 2 of the 8 */          \
517         "    CMP  r10, #0                          \n"                                     \
518         "    ITE  NE                               \n"                                     \
519         "    MRRCNE  p0,#0,r8,r9,c1                \n"                                     \
520         "    MRRCEQ  p0,#0,r8,r9,c0                \n"                                     \
521         "    MCR  p0,%[opcode],r7,c2,c0,%[machine] \n"                                     \
522         "    MCR  p0,%[opcode],r6,c0,c0,%[machine] \n"                                     \
523         "    STRD r8,r9,[r1],#8                    \n" /* store third two results */       \
524         "    SUBS r2, r2, #8                       \n" /* length -= 8; if (length != 0) */ \
525         "    IT   NE                               \n"                                     \
526         "    LDRDNE r4,r5,[r0],#8                  \n" /* load first two of next 8 */      \
527         "    CMP  r10, #0                          \n"                                     \
528         "    ITE  NE                               \n"                                     \
529         "    MRRCNE  p0,#0,r6,r7,c1                \n"                                     \
530         "    MRRCEQ  p0,#0,r6,r7,c0                \n"                                     \
531         "    CMP  r2, #0                           \n" /* if (length == 0) */              \
532         "    BNE  1b                               \n"                                     \
533         "    STRD r6,r7,[r1],#8                    \n" /* store fourth two results */      \
534         ::[opcode] "i"(BATCH_OPCODE),                                                      \
535         [dra] "i"(DOUBLE_READ_ADDERS), [machine] "i"(BATCH_MACHINE))
536 
537 /*!
538  * @brief Fixed 32bits data vector calculation.
539  *
540  * Float data vector calculation, the input data should be 32-bit integer. The parameter
541  * could be PQ_LN_INF, PQ_INV_INF, PQ_SQRT_INF, PQ_ISQRT_INF, PQ_ETOX_INF, PQ_ETONX_INF.
542  * PQ_SIN_INF, PQ_COS_INF. When this function is used for sin/cos calculation, the input
543  * data should be in the format Q1.31.
544  * For example, to calculate sqrt of a vector, use like this:
545  * @code
546    #define VECTOR_LEN 8
547    int32_t input[VECTOR_LEN] = {1, 4, 9, 16, 25, 36, 49, 64};
548    int32_t output[VECTOR_LEN];
549 
550    PQ_StartVector(input, output, VECTOR_LEN);
551    PQ_Vector8F32(PQ_SQRT_INF);
552    PQ_EndVector();
553    @endcode
554  *
555  */
556 #define PQ_Vector8Fixed32(BATCH_OPCODE, DOUBLE_READ_ADDERS, BATCH_MACHINE)                 \
557     __asm volatile(                                                                        \
558         "1:                                        \n"                                     \
559         "    MCR  p0,%[opcode],r4,c1,c0,%[machine] \n"                                     \
560         "    NOP                                   \n"                                     \
561         "    MCR  p0,%[opcode],r5,c3,c0,%[machine] \n"                                     \
562         "    CMP  r3, #0                           \n"                                     \
563         "    ITE  NE                               \n"                                     \
564         "    STRDNE r6,r7,[r1],#8                  \n" /* store fourth two results */      \
565         "    MOVEQ r3, #1                          \n" /* middle = 1 */                    \
566         "    LDMIA  r0!,{r6-r9}                    \n" /* load next 4 datas */             \
567         "    MRC  p0,%[dra],r4,c1,c0,#0            \n"                                     \
568         "    MRC  p0,%[dra],r5,c3,c0,#0            \n"                                     \
569         "    MCR  p0,%[opcode],r6,c1,c0,%[machine] \n"                                     \
570         "    NOP                                   \n"                                     \
571         "    MCR  p0,%[opcode],r7,c3,c0,%[machine] \n"                                     \
572         "    STRD r4,r5,[r1],#8                    \n" /* store first two results */       \
573         "    MRC  p0,%[dra],r6,c1,c0,#0            \n"                                     \
574         "    MRC  p0,%[dra],r7,c3,c0,#0            \n"                                     \
575         "    MCR  p0,%[opcode],r8,c1,c0,%[machine] \n"                                     \
576         "    NOP                                   \n"                                     \
577         "    MCR  p0,%[opcode],r9,c3,c0,%[machine] \n"                                     \
578         "    STRD r6,r7,[r1],#8                    \n" /* store second two results */      \
579         "    LDRD r6,r7,[r0],#8                    \n" /* load last 2 of the 8 */          \
580         "    MRC  p0,%[dra],r8,c1,c0,#0            \n"                                     \
581         "    MRC  p0,%[dra],r9,c3,c0,#0            \n"                                     \
582         "    MCR  p0,%[opcode],r6,c1,c0,%[machine] \n"                                     \
583         "    NOP                                   \n"                                     \
584         "    MCR  p0,%[opcode],r7,c3,c0,%[machine] \n"                                     \
585         "    STRD r8,r9,[r1],#8                    \n" /* store third two results */       \
586         "    SUBS r2, r2, #8                       \n" /* length -= 8; if (length != 0) */ \
587         "    IT   NE                               \n"                                     \
588         "    LDRDNE r4,r5,[r0],#8                  \n" /* load first two of next 8 */      \
589         "    MRC  p0,%[dra],r6,c1,c0,#0            \n"                                     \
590         "    MRC  p0,%[dra],r7,c3,c0,#0            \n"                                     \
591         "    CMP  r2, #0                           \n" /* if (length == 0) */              \
592         "    BNE  1b                               \n"                                     \
593         "    STRD r6,r7,[r1],#8                    \n" /* store fourth two results */      \
594         ::[opcode] "i"(BATCH_OPCODE),                                                      \
595         [dra] "i"(DOUBLE_READ_ADDERS), [machine] "i"(BATCH_MACHINE))
596 
597 /*!
598  * @brief Fixed 32bits data vector calculation.
599  *
600  * Float data vector calculation, the input data should be 16-bit integer. The parameter
601  * could be PQ_LN_INF, PQ_INV_INF, PQ_SQRT_INF, PQ_ISQRT_INF, PQ_ETOX_INF, PQ_ETONX_INF.
602  * For example, to calculate sqrt of a vector, use like this:
603  * @code
604    #define VECTOR_LEN 8
605    int16_t input[VECTOR_LEN] = {1, 4, 9, 16, 25, 36, 49, 64};
606    int16_t output[VECTOR_LEN];
607 
608    PQ_StartVector(input, output, VECTOR_LEN);
609    PQ_Vector8F32(PQ_SQRT_INF);
610    PQ_EndVector();
611    @endcode
612  *
613  */
614 #define PQ_Vector8Fixed16(BATCH_OPCODE, DOUBLE_READ_ADDERS, BATCH_MACHINE)                 \
615     __asm volatile(                                                                        \
616         "1:                                        \n"                                     \
617         "    MCR  p0,%[opcode],r4,c1,c0,%[machine] \n"                                     \
618         "    MCR  p0,%[opcode],r5,c3,c0,%[machine] \n"                                     \
619         "    CMP  r3, #0                           \n"                                     \
620         "    ITTE NE                               \n"                                     \
621         "    STRHNE r6,[r1],#2                     \n" /* store fourth two results */      \
622         "    STRHNE r7,[r1],#2                     \n" /* store fourth two results */      \
623         "    MOVEQ r3, #1                          \n" /* middle = 1 */                    \
624         "    LDRSH r6,[r0],#2                      \n" /* load next 2 of the 8 */          \
625         "    LDRSH r7,[r0],#2                      \n" /* load next 2 of the 8 */          \
626         "    MRC  p0,%[dra],r4,c1,c0,#0            \n"                                     \
627         "    MRC  p0,%[dra],r5,c3,c0,#0            \n"                                     \
628         "    MCR  p0,%[opcode],r6,c1,c0,%[machine] \n"                                     \
629         "    MCR  p0,%[opcode],r7,c3,c0,%[machine] \n"                                     \
630         "    STRH r4,[r1],#2                       \n" /* store first two results */       \
631         "    STRH r5,[r1],#2                       \n" /* store first two results */       \
632         "    LDRSH r8,[r0],#2                      \n" /* load next 2 of the 8 */          \
633         "    LDRSH r9,[r0],#2                      \n" /* load next 2 of the 8 */          \
634         "    MRC  p0,%[dra],r6,c1,c0,#0            \n"                                     \
635         "    MRC  p0,%[dra],r7,c3,c0,#0            \n"                                     \
636         "    MCR  p0,%[opcode],r8,c1,c0,%[machine] \n"                                     \
637         "    MCR  p0,%[opcode],r9,c3,c0,%[machine] \n"                                     \
638         "    STRH r6,[r1],#2                       \n"  /* store second two results */     \
639         "    STRH r7,[r1],#2                       \n"  /* store second two results */     \
640         "    LDRSH r6,[r0],#2                       \n" /* load last 2 of the 8 */         \
641         "    LDRSH r7,[r0],#2                       \n" /* load last 2 of the 8 */         \
642         "    MRC  p0,%[dra],r8,c1,c0,#0            \n"                                     \
643         "    MRC  p0,%[dra],r9,c3,c0,#0            \n"                                     \
644         "    MCR  p0,%[opcode],r6,c1,c0,%[machine] \n"                                     \
645         "    MCR  p0,%[opcode],r7,c3,c0,%[machine] \n"                                     \
646         "    STRH r8,[r1],#2                       \n" /* store third two results */       \
647         "    STRH r9,[r1],#2                       \n" /* store third two results */       \
648         "    SUBS r2, r2, #8                       \n" /* length -= 8; if (length != 0) */ \
649         "    ITT  NE                               \n"                                     \
650         "    LDRSHNE r4,[r0],#2                    \n" /* load first two of next 8 */      \
651         "    LDRSHNE r5,[r0],#2                    \n" /* load first two of next 8 */      \
652         "    MRC  p0,%[dra],r6,c1,c0,#0            \n"                                     \
653         "    MRC  p0,%[dra],r7,c3,c0,#0            \n"                                     \
654         "    CMP  r2, #0                           \n" /* if (length == 0) */              \
655         "    BNE  1b                               \n"                                     \
656         "    STRH r6,[r1],#2                       \n" /* store fourth two results */      \
657         "    STRH r7,[r1],#2                       \n" /* store fourth two results */      \
658         ::[opcode] "i"(BATCH_OPCODE),                                                      \
659         [dra] "i"(DOUBLE_READ_ADDERS), [machine] "i"(BATCH_MACHINE))
660 
661 /*!
662  * @brief Q15 data vector calculation.
663  *
664  * Q15 data vector calculation, this function should only be used for sin/cos Q15 calculation,
665  * and the coprocessor output prescaler must be set to 31 before this function. This function
666  * loads Q15 data and left shift 16 bits, calculate and right shift 16 bits, then stores to
667  * the output array. The input range -1 to 1 means -pi to pi.
668  * For example, to calculate sin of a vector, use like this:
669  * @code
670    #define VECTOR_LEN 8
671    int16_t input[VECTOR_LEN] = {...}
672    int16_t output[VECTOR_LEN];
673    const pq_prescale_t prescale =
674    {
675        .inputPrescale = 0,
676        .outputPrescale = 31,
677        .outputSaturate = 0
678    };
679 
680    PQ_SetCoprocessorScaler(POWERQUAD, const pq_prescale_t *prescale);
681 
682    PQ_StartVectorQ15(pSrc, pDst, length);
683    PQ_Vector8Q15(PQ_SQRT_INF);
684    PQ_EndVector();
685    @endcode
686  *
687  */
688 #define PQ_Vector8Q15(BATCH_OPCODE, DOUBLE_READ_ADDERS, BATCH_MACHINE)                     \
689     __asm volatile(                                                                        \
690         "1:                                        \n"                                     \
691         "    MCR  p0,%[opcode],r4,c1,c0,%[machine] \n"                                     \
692         "    NOP                                   \n"                                     \
693         "    MCR  p0,%[opcode],r5,c3,c0,%[machine] \n"                                     \
694         "    CMP  r3, #0                           \n"                                     \
695         "    ITTTE NE                              \n"                                     \
696         "    LSRNE r6,r6,#16                       \n" /* store fourth two results */      \
697         "    BFINE r7,r6,#0,#16                    \n" /* store fourth two results */      \
698         "    STRNE r7,[r1],#4                      \n" /* store fourth two results */      \
699         "    MOVEQ r3, #1                          \n" /* middle = 1 */                    \
700         "    LDR r7,[r0],#4                        \n" /* load next 2 of the 8 */          \
701         "    LSL r6,r7,#16                         \n" /* load next 2 of the 8 */          \
702         "    BFC r7,#0,#16                         \n" /* load next 2 of the 8 */          \
703         "    MRC  p0,%[dra],r4,c1,c0,#0            \n"                                     \
704         "    MRC  p0,%[dra],r5,c3,c0,#0            \n"                                     \
705         "    MCR  p0,%[opcode],r6,c1,c0,%[machine] \n"                                     \
706         "    NOP                                   \n"                                     \
707         "    MCR  p0,%[opcode],r7,c3,c0,%[machine] \n"                                     \
708         "    LSR r4,r4,#16                         \n" /* store first two results */       \
709         "    BFI r5,r4,#0,#16                      \n" /* store first two results */       \
710         "    STR r5,[r1],#4                        \n" /* store first two results */       \
711         "    LDR r9,[r0],#4                        \n" /* load next 2 of the 8 */          \
712         "    LSL r8,r9,#16                         \n" /* load next 2 of the 8 */          \
713         "    BFC r9,#0,#16                         \n" /* load next 2 of the 8 */          \
714         "    MRC  p0,%[dra],r6,c1,c0,#0            \n"                                     \
715         "    MRC  p0,%[dra],r7,c3,c0,#0            \n"                                     \
716         "    MCR  p0,%[opcode],r8,c1,c0,%[machine] \n"                                     \
717         "    NOP                                   \n"                                     \
718         "    MCR  p0,%[opcode],r9,c3,c0,%[machine] \n"                                     \
719         "    LSR r6,r6,#16                         \n" /* store second two results */      \
720         "    BFI r7,r6,#0,#16                      \n" /* store second two results */      \
721         "    STR r7,[r1],#4                        \n" /* store second two results */      \
722         "    LDR r7,[r0],#4                        \n" /* load next 2 of the 8 */          \
723         "    LSL r6,r7,#16                         \n" /* load next 2 of the 8 */          \
724         "    BFC r7,#0,#16                         \n" /* load next 2 of the 8 */          \
725         "    MRC  p0,%[dra],r8,c1,c0,#0            \n"                                     \
726         "    MRC  p0,%[dra],r9,c3,c0,#0            \n"                                     \
727         "    MCR  p0,%[opcode],r6,c1,c0,%[machine] \n"                                     \
728         "    NOP                                   \n"                                     \
729         "    MCR  p0,%[opcode],r7,c3,c0,%[machine] \n"                                     \
730         "    LSR r8,r8,#16                         \n" /* store third two results */       \
731         "    BFI r9,r8,#0,#16                      \n" /* store third two results */       \
732         "    STR r9,[r1],#4                        \n" /* store third two results */       \
733         "    SUBS r2, r2, #8                       \n" /* length -= 8; if (length != 0) */ \
734         "    ITTT  NE                              \n"                                     \
735         "    LDRNE r5,[r0],#4                      \n" /* load next 2 of the 8 */          \
736         "    LSLNE r4,r5,#16                       \n" /* load next 2 of the 8 */          \
737         "    BFCNE r5,#0,#16                       \n" /* load next 2 of the 8 */          \
738         "    MRC  p0,%[dra],r6,c1,c0,#0            \n"                                     \
739         "    MRC  p0,%[dra],r7,c3,c0,#0            \n"                                     \
740         "    CMP  r2, #0                           \n" /* if (length == 0) */              \
741         "    BNE  1b                               \n"                                     \
742         "    LSR r6,r6,#16                         \n" /* store fourth two results */      \
743         "    BFI r7,r6,#0,#16                      \n" /* store fourth two results */      \
744         "    STR r7,[r1],#4                        \n" /* store fourth two results */      \
745         ::[opcode] "i"(BATCH_OPCODE),                                                      \
746         [dra] "i"(DOUBLE_READ_ADDERS), [machine] "i"(BATCH_MACHINE))
747 
748 /*!
749  * @brief Float data vector biquad direct form II calculation.
750  *
751  * Biquad filter, the input and output data are float data. Biquad side 0 is used. Example:
752  * @code
753    #define VECTOR_LEN 16
754    float input[VECTOR_LEN] = {1024.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
755    float output[VECTOR_LEN];
756    pq_biquad_state_t state =
757    {
758         .param =
759         {
760             .a_1 = xxx,
761             .a_2 = xxx,
762             .b_0 = xxx,
763             .b_1 = xxx,
764             .b_2 = xxx,
765         },
766    };
767 
768    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state);
769 
770    PQ_Initiate_Vector_Func(pSrc,pDst);
771    PQ_DF2_Vector8_FP(false,false);
772    PQ_DF2_Vector8_FP(true,true);
773    PQ_End_Vector_Func();
774    @endcode
775  *
776  */
777 #define PQ_DF2_Vector8_FP(middle, last)                                          \
778     __asm volatile("MCR      p0,#0x1,r2,c0,c0,#6"); /* write biquad0*/           \
779     if (middle)                                                                  \
780     {                                                                            \
781         __asm volatile("STR      r5,[r1],#4"); /* store last result*/            \
782     }                                                                            \
783     __asm volatile("LDRD     r4,r5,[r0],#8");       /* load next 2 datas */      \
784     __asm volatile("MRC      p0,#0x1,r2,c0,c0,#0"); /* read  biquad0*/           \
785     __asm volatile("MCR      p0,#0x1,r3,c0,c0,#6"); /* write biquad0 */          \
786     __asm volatile("MRC      p0,#0x1,r3,c0,c0,#0"); /* read  biquad0*/           \
787     __asm volatile("MCR      p0,#0x1,r4,c0,c0,#6"); /* write biquad0 */          \
788     __asm volatile("STRD     r2,r3,[r1],#8");       /* store first 2 results */  \
789     __asm volatile("MRC      p0,#0x1,r4,c0,c0,#0");                              \
790     __asm volatile("MCR      p0,#0x1,r5,c0,c0,#6");                              \
791     __asm volatile("LDRD     r6,r7,[r0],#8"); /* load next 2 datas */            \
792     __asm volatile("MRC      p0,#0x1,r5,c0,c0,#0");                              \
793     __asm volatile("MCR      p0,#0x1,r6,c0,c0,#6");                              \
794     __asm volatile("STRD     r4,r5,[r1],#8"); /* store next 2 results */         \
795     __asm volatile("MRC      p0,#0x1,r6,c0,c0,#0");                              \
796     __asm volatile("MCR      p0,#0x1,r7,c0,c0,#6");                              \
797     __asm volatile("LDRD     r4,r5,[r0],#8"); /* load next 2 datas */            \
798     __asm volatile("MRC      p0,#0x1,r7,c0,c0,#0");                              \
799     __asm volatile("MCR      p0,#0x1,r4,c0,c0,#6");                              \
800     __asm volatile("STRD     r6,r7,[r1],#8"); /* store next 2 results */         \
801     __asm volatile("MRC      p0,#0x1,r4,c0,c0,#0");                              \
802     __asm volatile("MCR      p0,#0x1,r5,c0,c0,#6");                              \
803     if (!last)                                                                   \
804     {                                                                            \
805         __asm volatile("LDRD     r2,r3,[r0],#8"); /* load first two of next 8 */ \
806     }                                                                            \
807     __asm volatile("STR      r4,[r1],#4");                                       \
808     __asm volatile("MRC      p0,#0x1,r5,c0,c0,#0");                              \
809     if (last)                                                                    \
810     {                                                                            \
811         __asm volatile("STR      r5,[r1],#4"); /* store last result */           \
812     }
813 
814 /*!
815  * @brief Fixed data vector biquad direct form II calculation.
816  *
817  * Biquad filter, the input and output data are fixed data. Biquad side 0 is used. Example:
818  * @code
819    #define VECTOR_LEN 16
820    int32_t input[VECTOR_LEN] = {1024, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
821    int32_t output[VECTOR_LEN];
822    pq_biquad_state_t state =
823    {
824         .param =
825         {
826             .a_1 = xxx,
827             .a_2 = xxx,
828             .b_0 = xxx,
829             .b_1 = xxx,
830             .b_2 = xxx,
831         },
832    };
833 
834    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state);
835 
836    PQ_Initiate_Vector_Func(pSrc,pDst);
837    PQ_DF2_Vector8_FX(false,false);
838    PQ_DF2_Vector8_FX(true,true);
839    PQ_End_Vector_Func();
840    @endcode
841  *
842  */
843 #define PQ_DF2_Vector8_FX(middle, last)                                     \
844     __asm volatile("MCR      p0,#0x1,r2,c1,c0,#6"); /* write biquad0*/      \
845     if (middle)                                                             \
846     {                                                                       \
847         __asm volatile("STR      r5,[r1],#4"); /* store last result*/       \
848     }                                                                       \
849     __asm volatile("LDRD     r4,r5,[r0],#8");       /* load next 2 datas */ \
850     __asm volatile("MRC      p0,#0x1,r2,c1,c0,#0"); /* read  biquad0*/      \
851     __asm volatile("MCR      p0,#0x1,r3,c1,c0,#6"); /* write biquad0 */     \
852     __asm volatile("MRC      p0,#0x1,r3,c1,c0,#0");                         \
853     __asm volatile("MCR      p0,#0x1,r4,c1,c0,#6");                         \
854     __asm volatile("STRD     r2,r3,[r1],#8"); /* store first 2 results */   \
855     __asm volatile("MRC      p0,#0x1,r4,c1,c0,#0");                         \
856     __asm volatile("MCR      p0,#0x1,r5,c1,c0,#6");                         \
857     __asm volatile("LDRD     r6,r7,[r0],#8");                               \
858     __asm volatile("MRC      p0,#0x1,r5,c1,c0,#0");                         \
859     __asm volatile("MCR      p0,#0x1,r6,c1,c0,#6");                         \
860     __asm volatile("STRD     r4,r5,[r1],#8"); /* store next 2 results */    \
861     __asm volatile("MRC      p0,#0x1,r6,c1,c0,#0");                         \
862     __asm volatile("MCR      p0,#0x1,r7,c1,c0,#6");                         \
863     __asm volatile("LDRD     r4,r5,[r0],#8");                               \
864     __asm volatile("MRC      p0,#0x1,r7,c1,c0,#0");                         \
865     __asm volatile("MCR      p0,#0x1,r4,c1,c0,#6");                         \
866     __asm volatile("STRD     r6,r7,[r1],#8"); /* store next 2 results */    \
867     __asm volatile("MRC      p0,#0x1,r4,c1,c0,#0");                         \
868     __asm volatile("MCR      p0,#0x1,r5,c1,c0,#6");                         \
869     if (!last)                                                              \
870     {                                                                       \
871         __asm volatile("LDRD     r2,r3,[r0],#8"); /* load two of next 8 */  \
872     }                                                                       \
873     __asm volatile("STR      r4,[r1],#4"); /* store 7th results */          \
874     __asm volatile("MRC      p0,#0x1,r5,c1,c0,#0");                         \
875     if (last)                                                               \
876     {                                                                       \
877         __asm volatile("STR      r5,[r1],#4"); /* store last result */      \
878     }
879 
880 /*!
881  * @brief Float data vector biquad direct form II calculation.
882  *
883  * Biquad filter, the input and output data are float data. Biquad side 0 is used. Example:
884  * @code
885    #define VECTOR_LEN 8
886    float input[VECTOR_LEN] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
887    float output[VECTOR_LEN];
888    pq_biquad_state_t state =
889    {
890         .param =
891         {
892             .a_1 = xxx,
893             .a_2 = xxx,
894             .b_0 = xxx,
895             .b_1 = xxx,
896             .b_2 = xxx,
897         },
898    };
899 
900    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state);
901 
902    PQ_StartVector(input, output, VECTOR_LEN);
903    PQ_Vector8BiquadDf2F32();
904    PQ_EndVector();
905    @endcode
906  *
907  */
908 #define PQ_Vector8BiquadDf2F32()                                                         \
909     __asm volatile(                                                                      \
910         "1:                                      \n"                                     \
911         "    MCR  p0,#0x1,r4,c0,c0,#6            \n" /* write biquad0*/                  \
912         "    CMP  r3, #0                         \n"                                     \
913         "    ITE  NE                             \n"                                     \
914         "    STRNE r7,[r1],#4                    \n" /* store last result*/              \
915         "    MOVEQ r3, #1                        \n" /* middle = 1 */                    \
916         "    LDMIA  r0!,{r6-r9}                  \n" /* load next 4 datas */             \
917         "    MRC  p0,#0x1,r4,c0,c0,#0            \n" /* read  biquad0*/                  \
918         "    MCR  p0,#0x1,r5,c0,c0,#6            \n" /* write biquad0 */                 \
919         "    MRC  p0,#0x1,r5,c0,c0,#0            \n" /* read  biquad0*/                  \
920         "    MCR  p0,#0x1,r6,c0,c0,#6            \n" /* write biquad0 */                 \
921         "    MRC  p0,#0x1,r6,c0,c0,#0            \n" /* read  biquad0 */                 \
922         "    MCR  p0,#0x1,r7,c0,c0,#6            \n" /* write biquad0 */                 \
923         "    MRC  p0,#0x1,r7,c0,c0,#0            \n" /* read  biquad0 */                 \
924         "    MCR  p0,#0x1,r8,c0,c0,#6            \n" /* write biquad0*/                  \
925         "    STMIA    r1!,{r4-r7}                \n" /* store first four results */      \
926         "    MRC  p0,#0x1,r8,c0,c0,#0            \n" /* read  biquad0*/                  \
927         "    MCR  p0,#0x1,r9,c0,c0,#6            \n" /* write biquad0*/                  \
928         "    LDRD r6,r7,[r0],#8                  \n" /* load next 2 items*/              \
929         "    MRC  p0,#0x1,r9,c0,c0,#0            \n" /* read  biquad0*/                  \
930         "    MCR  p0,#0x1,r6,c0,c0,#6            \n" /* write biquad0*/                  \
931         "    STRD r8,r9,[r1],#8                  \n" /* store third two results */       \
932         "    MRC  p0,#0x1,r6,c0,c0,#0            \n" /* read  biquad0*/                  \
933         "    MCR  p0,#0x1,r7,c0,c0,#6            \n" /* write biquad0*/                  \
934         "    SUBS r2, r2, #8                     \n" /* length -= 8; if (length != 0) */ \
935         "    IT   NE                             \n"                                     \
936         "    LDRDNE r4,r5,[r0],#8                \n" /* load first two of next 8 */      \
937         "    STR r6,[r1],#4                      \n" /* store 7th results */             \
938         "    MRC  p0,#0x1,r7,c0,c0,#0            \n" /* read  biquad0*/                  \
939         "    CMP  r2, #0                         \n" /* if (length == 0) */              \
940         "    BNE  1b                             \n"                                     \
941         "    STR r7,[r1],#4                      \n" /* store last result */             \
942     )
943 
944 /*!
945  * @brief Fixed 32-bit data vector biquad direct form II calculation.
946  *
947  * Biquad filter, the input and output data are Q31 or 32-bit integer. Biquad side 0 is used. Example:
948  * @code
949    #define VECTOR_LEN 8
950    int32_t input[VECTOR_LEN] = {1, 2, 3, 4, 5, 6, 7, 8};
951    int32_t output[VECTOR_LEN];
952    pq_biquad_state_t state =
953    {
954         .param =
955         {
956             .a_1 = xxx,
957             .a_2 = xxx,
958             .b_0 = xxx,
959             .b_1 = xxx,
960             .b_2 = xxx,
961         },
962    };
963 
964    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state);
965 
966    PQ_StartVector(input, output, VECTOR_LEN);
967    PQ_Vector8BiquadDf2Fixed32();
968    PQ_EndVector();
969    @endcode
970  *
971  */
972 #define PQ_Vector8BiquadDf2Fixed32()                                                     \
973     __asm volatile(                                                                      \
974         "1:                                      \n"                                     \
975         "    MCR  p0,#0x1,r4,c1,c0,#6            \n" /* write biquad0*/                  \
976         "    CMP  r3, #0                         \n"                                     \
977         "    ITE  NE                             \n"                                     \
978         "    STRNE r7,[r1],#4                    \n" /* store last result*/              \
979         "    MOVEQ r3, #1                        \n" /* middle = 1 */                    \
980         "    LDMIA  r0!,{r6-r9}                  \n" /* load next 4 datas */             \
981         "    MRC  p0,#0x1,r4,c1,c0,#0            \n" /* read  biquad0*/                  \
982         "    MCR  p0,#0x1,r5,c1,c0,#6            \n" /* write biquad0 */                 \
983         "    MRC  p0,#0x1,r5,c1,c0,#0            \n" /* read  biquad0*/                  \
984         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0 */                 \
985         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0 */                 \
986         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0 */                 \
987         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0 */                 \
988         "    MCR  p0,#0x1,r8,c1,c0,#6            \n" /* write biquad0*/                  \
989         "    STMIA    r1!,{r4-r7}                \n" /* store first four results */      \
990         "    MRC  p0,#0x1,r8,c1,c0,#0            \n" /* read  biquad0*/                  \
991         "    MCR  p0,#0x1,r9,c1,c0,#6            \n" /* write biquad0*/                  \
992         "    LDRD r6,r7,[r0],#8                  \n" /* load next 2 items*/              \
993         "    MRC  p0,#0x1,r9,c1,c0,#0            \n" /* read  biquad0*/                  \
994         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0*/                  \
995         "    STRD r8,r9,[r1],#8                  \n" /* store third two results */       \
996         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0*/                  \
997         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0*/                  \
998         "    SUBS r2, r2, #8                     \n" /* length -= 8; if (length != 0) */ \
999         "    IT   NE                             \n"                                     \
1000         "    LDRDNE r4,r5,[r0],#8                \n" /* load first two of next 8 */      \
1001         "    STR r6,[r1],#4                      \n" /* store 7th results */             \
1002         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0*/                  \
1003         "    CMP  r2, #0                         \n" /* if (length == 0) */              \
1004         "    BNE  1b                             \n"                                     \
1005         "    STR r7,[r1],#4                      \n" /* store last result */             \
1006     )
1007 
1008 /*!
1009  * @brief Fixed 16-bit data vector biquad direct form II calculation.
1010  *
1011  * Biquad filter, the input and output data are Q15 or 16-bit integer. Biquad side 0 is used. Example:
1012  * @code
1013    #define VECTOR_LEN 8
1014    int16_t input[VECTOR_LEN] = {1, 2, 3, 4, 5, 6, 7, 8};
1015    int16_t output[VECTOR_LEN];
1016    pq_biquad_state_t state =
1017    {
1018         .param =
1019         {
1020             .a_1 = xxx,
1021             .a_2 = xxx,
1022             .b_0 = xxx,
1023             .b_1 = xxx,
1024             .b_2 = xxx,
1025         },
1026    };
1027 
1028    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state);
1029 
1030    PQ_StartVector(input, output, VECTOR_LEN);
1031    PQ_Vector8BiquadDf2Fixed16();
1032    PQ_EndVector();
1033    @endcode
1034  *
1035  */
1036 #define PQ_Vector8BiquadDf2Fixed16()                                                     \
1037     __asm volatile(                                                                      \
1038         "1:                                      \n"                                     \
1039         "    MCR  p0,#0x1,r4,c1,c0,#6            \n" /* write biquad0*/                  \
1040         "    CMP  r3, #0                         \n"                                     \
1041         "    ITE  NE                             \n"                                     \
1042         "    STRHNE r7,[r1],#2                   \n" /* store last result*/              \
1043         "    MOVEQ r3, #1                        \n" /* middle = 1 */                    \
1044         "    LDRSH r6,[r0],#2                    \n" /* load next 2 of the 8*/           \
1045         "    LDRSH r7,[r0],#2                    \n" /* load next 2 of the 8*/           \
1046         "    MRC  p0,#0x1,r4,c1,c0,#0            \n" /* read  biquad0*/                  \
1047         "    MCR  p0,#0x1,r5,c1,c0,#6            \n" /* write biquad0 */                 \
1048         "    MRC  p0,#0x1,r5,c1,c0,#0            \n" /* read  biquad0*/                  \
1049         "    LDRSH r8,[r0],#2                    \n" /* load next 2 of the 8*/           \
1050         "    LDRSH r9,[r0],#2                    \n" /* load next 2 of the 8*/           \
1051         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0 */                 \
1052         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0 */                 \
1053         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0 */                 \
1054         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0 */                 \
1055         "    STRH r4,[r1],#2                     \n" /* store first 4 results */         \
1056         "    STRH r5,[r1],#2                     \n" /* store first 4 results */         \
1057         "    MCR  p0,#0x1,r8,c1,c0,#6            \n" /* write biquad0*/                  \
1058         "    STRH r6,[r1],#2                     \n" /* store first 4 results */         \
1059         "    STRH r7,[r1],#2                     \n" /* store first 4 results */         \
1060         "    MRC  p0,#0x1,r8,c1,c0,#0            \n" /* read  biquad0*/                  \
1061         "    MCR  p0,#0x1,r9,c1,c0,#6            \n" /* write biquad0*/                  \
1062         "    LDRSH r6,[r0],#2                    \n" /* load next 1 of the 8*/           \
1063         "    LDRSH r7,[r0],#2                    \n" /* load next 1 of the 8*/           \
1064         "    MRC  p0,#0x1,r9,c1,c0,#0            \n" /* read  biquad0*/                  \
1065         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0*/                  \
1066         "    STRH r8,[r1],#2                     \n" /* store next two results */        \
1067         "    STRH r9,[r1],#2                     \n" /* store next two results */        \
1068         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0*/                  \
1069         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0*/                  \
1070         "    SUBS r2, r2, #8                     \n" /* length -= 8; if (length != 0) */ \
1071         "    ITT   NE                            \n"                                     \
1072         "    LDRSHNE r4,[r0],#2                  \n" /* load first two of next 8*/       \
1073         "    LDRSHNE r5,[r0],#2                  \n" /* load first two of next 8*/       \
1074         "    STRH r6,[r1],#2                     \n" /* store 7th results */             \
1075         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0*/                  \
1076         "    CMP  r2, #0                         \n" /* if (length == 0) */              \
1077         "    BNE  1b                             \n"                                     \
1078         "    STRH r7,[r1],#2                     \n" /* store last result */             \
1079     )
1080 
1081 /*!
1082  * @brief Float data vector direct form II biquad cascade filter.
1083  *
1084  * The input and output data are float data. The data flow is
1085  * input  -> biquad side 1 -> biquad side 0 -> output.
1086  *
1087  * @code
1088    #define VECTOR_LEN 16
1089    float input[VECTOR_LEN] = {1024.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1090    float output[VECTOR_LEN];
1091    pq_biquad_state_t state0 =
1092    {
1093         .param =
1094         {
1095             .a_1 = xxx,
1096             .a_2 = xxx,
1097             .b_0 = xxx,
1098             .b_1 = xxx,
1099             .b_2 = xxx,
1100         },
1101    };
1102 
1103    pq_biquad_state_t state1 =
1104    {
1105         .param =
1106         {
1107             .a_1 = xxx,
1108             .a_2 = xxx,
1109             .b_0 = xxx,
1110             .b_1 = xxx,
1111             .b_2 = xxx,
1112         },
1113    };
1114 
1115    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state0);
1116    PQ_BiquadRestoreInternalState(POWERQUAD, 1, &state1);
1117 
1118    PQ_Initiate_Vector_Func(pSrc, pDst);
1119    PQ_DF2_Cascade_Vector8_FP(false, false);
1120    PQ_DF2_Cascade_Vector8_FP(true, true);
1121    PQ_End_Vector_Func();
1122    @endcode
1123  *
1124  */
1125 #define PQ_DF2_Cascade_Vector8_FP(middle, last)                                    \
1126     __asm volatile("MCR  p0,#0x1,r2,c2,c0,#6"); /* write biquad1*/                 \
1127     if (middle)                                                                    \
1128     {                                                                              \
1129         __asm volatile("MCR  p0,#0x1,r5,c0,c0,#6"); /* write biquad0*/             \
1130         __asm volatile("MRRC p0,#0,r5,r2,c1");      /* read both biquad*/          \
1131     }                                                                              \
1132     else                                                                           \
1133     {                                                                              \
1134         __asm volatile("MRC  p0,#0x1,r2,c2,c0,#0"); /* read  biquad1*/             \
1135     }                                                                              \
1136     __asm volatile("MCR  p0,#0x1,r3,c2,c0,#6"); /* write biquad1*/                 \
1137     __asm volatile("MCR  p0,#0x1,r2,c0,c0,#6"); /* write biquad0*/                 \
1138     if (middle)                                                                    \
1139     {                                                                              \
1140         __asm volatile("STRD r4,r5,[r1],#8"); /* store last two results*/          \
1141     }                                                                              \
1142     __asm volatile("LDRD r4,r5,[r0],#8");       /* load next 2 datas */            \
1143     __asm volatile("MRRC p0,#0,r2,r3,c1");      /* read both biquad*/              \
1144     __asm volatile("MCR  p0,#0x1,r4,c2,c0,#6"); /* write biquad1*/                 \
1145     __asm volatile("MCR  p0,#0x1,r3,c0,c0,#6"); /* write biquad0*/                 \
1146     __asm volatile("LDRD r6,r7,[r0],#8");                                          \
1147     __asm volatile("MRRC p0,#0,r3,r4,c1");                                         \
1148     __asm volatile("MCR  p0,#0x1,r5,c2,c0,#6");                                    \
1149     __asm volatile("MCR  p0,#0x1,r4,c0,c0,#6");                                    \
1150     __asm volatile("STRD r2,r3,[r1],#8"); /* store first two results */            \
1151     __asm volatile("MRRC p0,#0,r4,r5,c1");                                         \
1152     __asm volatile("MCR  p0,#0x1,r6,c2,c0,#6");                                    \
1153     __asm volatile("MCR  p0,#0x1,r5,c0,c0,#6");                                    \
1154     __asm volatile("STR  r4,[r1],#4");                                             \
1155     __asm volatile("MRRC p0,#0,r5,r6,c1");                                         \
1156     __asm volatile("MCR  p0,#0x1,r7,c2,c0,#6");                                    \
1157     __asm volatile("MCR  p0,#0x1,r6,c0,c0,#6");                                    \
1158     __asm volatile("STR  r5,[r1],#4");                                             \
1159     __asm volatile("LDRD r4,r5,[r0],#8");                                          \
1160     __asm volatile("MRRC p0,#0,r6,r7,c1");                                         \
1161     __asm volatile("MCR  p0,#0x1,r4,c2,c0,#6");                                    \
1162     __asm volatile("MCR  p0,#0x1,r7,c0,c0,#6");                                    \
1163     if (!last)                                                                     \
1164     {                                                                              \
1165         __asm volatile("LDRD r2,r3,[r0],#8"); /* load first two of next 8 */       \
1166     }                                                                              \
1167     __asm volatile("MRRC p0,#0,r7,r4,c1");                                         \
1168     __asm volatile("MCR  p0,#0x1,r5,c2,c0,#6");                                    \
1169     __asm volatile("MCR  p0,#0x1,r4,c0,c0,#6");                                    \
1170     __asm volatile("STRD r6,r7,[r1],#8"); /* store third two results */            \
1171     __asm volatile("MRRC p0,#0,r4,r5,c1");                                         \
1172     if (last)                                                                      \
1173     {                                                                              \
1174         __asm volatile("MCR  p0,#0x1,r5,c0,c0,#6"); /* write biquad0*/             \
1175         __asm volatile("MRC  p0,#0x1,r5,c0,c0,#0"); /* read  biquad0*/             \
1176         __asm volatile("STRD r4,r5,[r1],#8");       /* store fourth two results */ \
1177     }
1178 
1179 /*!
1180  * @brief Fixed data vector direct form II biquad cascade filter.
1181  *
1182  * The input and output data are fixed data. The data flow is
1183  * input  -> biquad side 1 -> biquad side 0 -> output.
1184  *
1185  * @code
1186    #define VECTOR_LEN 16
1187    int32_t input[VECTOR_LEN] = {1024.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1188    int32_t output[VECTOR_LEN];
1189    pq_biquad_state_t state0 =
1190    {
1191         .param =
1192         {
1193             .a_1 = xxx,
1194             .a_2 = xxx,
1195             .b_0 = xxx,
1196             .b_1 = xxx,
1197             .b_2 = xxx,
1198         },
1199    };
1200 
1201    pq_biquad_state_t state1 =
1202    {
1203         .param =
1204         {
1205             .a_1 = xxx,
1206             .a_2 = xxx,
1207             .b_0 = xxx,
1208             .b_1 = xxx,
1209             .b_2 = xxx,
1210         },
1211    };
1212 
1213    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state0);
1214    PQ_BiquadRestoreInternalState(POWERQUAD, 1, &state1);
1215 
1216    PQ_Initiate_Vector_Func(pSrc, pDst);
1217    PQ_DF2_Cascade_Vector8_FX(false, false);
1218    PQ_DF2_Cascade_Vector8_FX(true, true);
1219    PQ_End_Vector_Func();
1220    @endcode
1221  *
1222  */
1223 #define PQ_DF2_Cascade_Vector8_FX(middle, last)                                    \
1224     __asm volatile("MCR  p0,#0x1,r2,c3,c0,#6"); /* write biquad1*/                 \
1225     if (middle)                                                                    \
1226     {                                                                              \
1227         __asm volatile("MCR  p0,#0x1,r5,c1,c0,#6"); /* write biquad0*/             \
1228         __asm volatile("MRC  p0,#0x1,r5,c1,c0,#0"); /* read  biquad0*/             \
1229         __asm volatile("MRC  p0,#0x1,r2,c3,c0,#0"); /* read  biquad1*/             \
1230     }                                                                              \
1231     else                                                                           \
1232     {                                                                              \
1233         __asm volatile("MRC  p0,#0x1,r2,c3,c0,#0"); /* read  biquad1*/             \
1234     }                                                                              \
1235     __asm volatile("MCR  p0,#0x1,r3,c3,c0,#6"); /* write biquad1*/                 \
1236     __asm volatile("MCR  p0,#0x1,r2,c1,c0,#6"); /* write biquad0*/                 \
1237     if (middle)                                                                    \
1238     {                                                                              \
1239         __asm volatile("STRD r4,r5,[r1],#8"); /* store last two results*/          \
1240     }                                                                              \
1241     __asm volatile("LDRD r4,r5,[r0],#8");       /* load next 2 datas */            \
1242     __asm volatile("MRC  p0,#0x1,r2,c1,c0,#0"); /* read  biquad0*/                 \
1243     __asm volatile("MRC  p0,#0x1,r3,c3,c0,#0"); /* read  biquad1*/                 \
1244     __asm volatile("MCR  p0,#0x1,r4,c3,c0,#6"); /* write biquad1*/                 \
1245     __asm volatile("MCR  p0,#0x1,r3,c1,c0,#6"); /* write biquad0*/                 \
1246     __asm volatile("LDRD r6,r7,[r0],#8");                                          \
1247     __asm volatile("MRC  p0,#0x1,r3,c1,c0,#0");                                    \
1248     __asm volatile("MRC  p0,#0x1,r4,c3,c0,#0");                                    \
1249     __asm volatile("MCR  p0,#0x1,r5,c3,c0,#6");                                    \
1250     __asm volatile("MCR  p0,#0x1,r4,c1,c0,#6");                                    \
1251     __asm volatile("STRD r2,r3,[r1],#8");                                          \
1252     __asm volatile("MRC  p0,#0x1,r4,c1,c0,#0");                                    \
1253     __asm volatile("MRC  p0,#0x1,r5,c3,c0,#0");                                    \
1254     __asm volatile("MCR  p0,#0x1,r6,c3,c0,#6");                                    \
1255     __asm volatile("MCR  p0,#0x1,r5,c1,c0,#6");                                    \
1256     __asm volatile("STR  r4,[r1],#4");                                             \
1257     __asm volatile("MRC  p0,#0x1,r5,c1,c0,#0");                                    \
1258     __asm volatile("MRC  p0,#0x1,r6,c3,c0,#0");                                    \
1259     __asm volatile("MCR  p0,#0x1,r7,c3,c0,#6");                                    \
1260     __asm volatile("MCR  p0,#0x1,r6,c1,c0,#6");                                    \
1261     __asm volatile("STR  r5,[r1],#4");                                             \
1262     __asm volatile("LDRD r4,r5,[r0],#8");                                          \
1263     __asm volatile("MRC  p0,#0x1,r6,c1,c0,#0");                                    \
1264     __asm volatile("MRC  p0,#0x1,r7,c3,c0,#0");                                    \
1265     __asm volatile("MCR  p0,#0x1,r4,c3,c0,#6");                                    \
1266     __asm volatile("MCR  p0,#0x1,r7,c1,c0,#6");                                    \
1267     if (!last)                                                                     \
1268     {                                                                              \
1269         __asm volatile("LDRD r2,r3,[r0],#8"); /* load first two of next 8 */       \
1270     }                                                                              \
1271     __asm volatile("MRC  p0,#0x1,r7,c1,c0,#0");                                    \
1272     __asm volatile("MRC  p0,#0x1,r4,c3,c0,#0");                                    \
1273     __asm volatile("MCR  p0,#0x1,r5,c3,c0,#6");                                    \
1274     __asm volatile("MCR  p0,#0x1,r4,c1,c0,#6");                                    \
1275     __asm volatile("STRD r6,r7,[r1],#8");       /* store third two results */      \
1276     __asm volatile("MRC  p0,#0x1,r4,c1,c0,#0"); /* read  biquad0*/                 \
1277     __asm volatile("MRC  p0,#0x1,r5,c3,c0,#0"); /* read  biquad1*/                 \
1278     if (last)                                                                      \
1279     {                                                                              \
1280         __asm volatile("MCR  p0,#0x1,r5,c1,c0,#6"); /* write biquad0*/             \
1281         __asm volatile("MRC  p0,#0x1,r5,c1,c0,#0"); /* read  biquad0*/             \
1282         __asm volatile("STRD r4,r5,[r1],#8");       /* store fourth two results */ \
1283     }
1284 
1285 /*!
1286  * @brief Float data vector direct form II biquad cascade filter.
1287  *
1288  * The input and output data are float data. The data flow is
1289  * input  -> biquad side 1 -> biquad side 0 -> output.
1290  *
1291  * @code
1292    #define VECTOR_LEN 8
1293    float input[VECTOR_LEN] = {1, 2, 3, 4, 5, 6, 7, 8};
1294    float output[VECTOR_LEN];
1295    pq_biquad_state_t state0 =
1296    {
1297         .param =
1298         {
1299             .a_1 = xxx,
1300             .a_2 = xxx,
1301             .b_0 = xxx,
1302             .b_1 = xxx,
1303             .b_2 = xxx,
1304         },
1305    };
1306 
1307    pq_biquad_state_t state1 =
1308    {
1309         .param =
1310         {
1311             .a_1 = xxx,
1312             .a_2 = xxx,
1313             .b_0 = xxx,
1314             .b_1 = xxx,
1315             .b_2 = xxx,
1316         },
1317    };
1318 
1319    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state0);
1320    PQ_BiquadRestoreInternalState(POWERQUAD, 1, &state1);
1321 
1322    PQ_StartVector(input, output, VECTOR_LEN);
1323    PQ_Vector8BiquadDf2CascadeF32();
1324    PQ_EndVector();
1325    @endcode
1326  *
1327  */
1328 #define PQ_Vector8BiquadDf2CascadeF32()                                                       \
1329     __asm volatile(                                                                           \
1330         "1:                                      \n"                                          \
1331         "    MCR  p0,#0x1,r4,c2,c0,#2            \n" /* write biquad1*/                       \
1332         "    CMP  r3, #0                         \n"                                          \
1333         "    ITTE  NE                            \n"                                          \
1334         "    MCRNE  p0,#0x1,r7,c0,c0,#2          \n" /* write biquad0*/                       \
1335         "    MRRCNE p0,#0,r7,r4,c1               \n" /* read both biquad*/                    \
1336         "    MRCEQ  p0,#0x1,r4,c2,c0,#0          \n" /* read  biquad1*/                       \
1337         "    MCR  p0,#0x1,r5,c2,c0,#2            \n" /* write biquad1*/                       \
1338         "    MCR  p0,#0x1,r4,c0,c0,#2            \n" /* write biquad0*/                       \
1339         "    CMP  r3, #0                         \n"                                          \
1340         "    ITE  NE                             \n"                                          \
1341         "    STRDNE r6,r7,[r1],#8                \n" /* store last two results*/              \
1342         "    MOVEQ r3, #1                        \n" /* middle = 1 */                         \
1343         "    LDMIA r0!,{r6-r9}                   \n" /* load next 4 datas */                  \
1344         "    MRRC p0,#0,r4,r5,c1                 \n" /* read both biquad*/                    \
1345         "    MCR  p0,#0x1,r6,c2,c0,#2            \n" /* write biquad1*/                       \
1346         "    MCR  p0,#0x1,r5,c0,c0,#2            \n" /* write biquad0*/                       \
1347         "    MRRC p0,#0,r5,r6,c1                 \n" /* read both biquad*/                    \
1348         "    MCR  p0,#0x1,r7,c2,c0,#2            \n" /* write biquad1*/                       \
1349         "    MCR  p0,#0x1,r6,c0,c0,#2            \n" /* write biquad0*/                       \
1350         "    MRRC p0,#0,r6,r7,c1                 \n" /* read both biquad*/                    \
1351         "    MCR  p0,#0x1,r8,c2,c0,#2            \n" /* write biquad1*/                       \
1352         "    MCR  p0,#0x1,r7,c0,c0,#2            \n" /* write biquad0*/                       \
1353         "    MRRC p0,#0,r7,r8,c1                 \n" /* read both biquad*/                    \
1354         "    MCR  p0,#0x1,r9,c2,c0,#2            \n" /* write biquad1*/                       \
1355         "    MCR  p0,#0x1,r8,c0,c0,#2            \n" /* write biquad0*/                       \
1356         "    STMIA r1!,{R4-R7}                   \n" /* store first and second two results */ \
1357         "    LDRD r6,r7,[r0],#8                  \n" /* load last 2 of the 8 */               \
1358         "    MRRC p0,#0,r8,r9,c1                 \n" /* read both biquad*/                    \
1359         "    MCR  p0,#0x1,r6,c2,c0,#2            \n" /* write biquad1*/                       \
1360         "    MCR  p0,#0x1,r9,c0,c0,#2            \n" /* write biquad0*/                       \
1361         "    SUBS r2, r2, #8                     \n" /* length -= 8; if (length != 0) */      \
1362         "    IT   NE                             \n"                                          \
1363         "    LDRDNE r4,r5,[r0],#8                \n" /* load first two of next 8 */           \
1364         "    MRRC p0,#0,r9,r6,c1                 \n" /* read both biquad*/                    \
1365         "    MCR  p0,#0x1,r7,c2,c0,#2            \n" /* write biquad1*/                       \
1366         "    MCR  p0,#0x1,r6,c0,c0,#2            \n" /* write biquad0*/                       \
1367         "    STRD r8,r9,[r1],#8                  \n" /* store third two results */            \
1368         "    MRRC p0,#0,r6,r7,c1                 \n" /* read both biquad*/                    \
1369         "    CMP  r2, #0                         \n" /* if (length == 0) */                   \
1370         "    BNE  1b                             \n"                                          \
1371         "    MCR  p0,#0x1,r7,c0,c0,#2            \n" /* write biquad0*/                       \
1372         "    MRC  p0,#0x1,r7,c0,c0,#0            \n" /* read  biquad0*/                       \
1373         "    STRD r6,r7,[r1],#8                  \n" /* store fourth two results */           \
1374     )
1375 
1376 /*!
1377  * @brief Fixed 32-bit data vector direct form II biquad cascade filter.
1378  *
1379  * The input and output data are fixed 32-bit data. The data flow is
1380  * input  -> biquad side 1 -> biquad side 0 -> output.
1381  *
1382  * @code
1383    #define VECTOR_LEN 8
1384    int32_t input[VECTOR_LEN] = {1, 2, 3, 4, 5, 6, 7, 8};
1385    int32_t output[VECTOR_LEN];
1386    pq_biquad_state_t state0 =
1387    {
1388         .param =
1389         {
1390             .a_1 = xxx,
1391             .a_2 = xxx,
1392             .b_0 = xxx,
1393             .b_1 = xxx,
1394             .b_2 = xxx,
1395         },
1396    };
1397 
1398    pq_biquad_state_t state1 =
1399    {
1400         .param =
1401         {
1402             .a_1 = xxx,
1403             .a_2 = xxx,
1404             .b_0 = xxx,
1405             .b_1 = xxx,
1406             .b_2 = xxx,
1407         },
1408    };
1409 
1410    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state0);
1411    PQ_BiquadRestoreInternalState(POWERQUAD, 1, &state1);
1412 
1413    PQ_StartVector(input, output, VECTOR_LEN);
1414    PQ_Vector8BiquadDf2CascadeFixed32();
1415    PQ_EndVector();
1416    @endcode
1417  *
1418  */
1419 #define PQ_Vector8BiquadDf2CascadeFixed32()                                                   \
1420     __asm volatile(                                                                           \
1421         "1:                                      \n"                                          \
1422         "    MCR  p0,#0x1,r4,c3,c0,#6            \n" /* write biquad1*/                       \
1423         "    CMP  r3, #0                         \n"                                          \
1424         "    ITTTE  NE                           \n"                                          \
1425         "    MCRNE  p0,#0x1,r7,c1,c0,#6          \n" /* write biquad0*/                       \
1426         "    MRCNE  p0,#0x1,r7,c1,c0,#0          \n" /* read  biquad0*/                       \
1427         "    MRCNE  p0,#0x1,r4,c3,c0,#0          \n" /* read  biquad1*/                       \
1428         "    MRCEQ  p0,#0x1,r4,c3,c0,#0          \n" /* read  biquad1*/                       \
1429         "    MCR  p0,#0x1,r5,c3,c0,#6            \n" /* write biquad1*/                       \
1430         "    MCR  p0,#0x1,r4,c1,c0,#6            \n" /* write biquad0*/                       \
1431         "    CMP  r3, #0                         \n"                                          \
1432         "    ITE  NE                             \n"                                          \
1433         "    STRDNE r6,r7,[r1],#8                \n" /* store last two results*/              \
1434         "    MOVEQ r3, #1                        \n" /* middle = 1 */                         \
1435         "    LDMIA r0!,{r6-r9}                   \n" /* load next 4 datas */                  \
1436         "    MRC  p0,#0x1,r4,c1,c0,#0            \n" /* read  biquad0*/                       \
1437         "    MRC  p0,#0x1,r5,c3,c0,#0            \n" /* read  biquad1*/                       \
1438         "    MCR  p0,#0x1,r6,c3,c0,#6            \n" /* write biquad1*/                       \
1439         "    MCR  p0,#0x1,r5,c1,c0,#6            \n" /* write biquad0*/                       \
1440         "    MRC  p0,#0x1,r5,c1,c0,#0            \n" /* read  biquad0*/                       \
1441         "    MRC  p0,#0x1,r6,c3,c0,#0            \n" /* read  biquad1*/                       \
1442         "    MCR  p0,#0x1,r7,c3,c0,#6            \n" /* write biquad1*/                       \
1443         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0*/                       \
1444         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0*/                       \
1445         "    MRC  p0,#0x1,r7,c3,c0,#0            \n" /* read  biquad1*/                       \
1446         "    MCR  p0,#0x1,r8,c3,c0,#6            \n" /* write biquad1*/                       \
1447         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0*/                       \
1448         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0*/                       \
1449         "    MRC  p0,#0x1,r8,c3,c0,#0            \n" /* read  biquad1*/                       \
1450         "    MCR  p0,#0x1,r9,c3,c0,#6            \n" /* write biquad1*/                       \
1451         "    MCR  p0,#0x1,r8,c1,c0,#6            \n" /* write biquad0*/                       \
1452         "    STMIA r1!,{R4-R7}                   \n" /* store first and second two results */ \
1453         "    LDRD r6,r7,[r0],#8                  \n" /* load last 2 of the 8 */               \
1454         "    MRC  p0,#0x1,r8,c1,c0,#0            \n" /* read  biquad0*/                       \
1455         "    MRC  p0,#0x1,r9,c3,c0,#0            \n" /* read  biquad1*/                       \
1456         "    MCR  p0,#0x1,r6,c3,c0,#6            \n" /* write biquad1*/                       \
1457         "    MCR  p0,#0x1,r9,c1,c0,#6            \n" /* write biquad0*/                       \
1458         "    SUBS r2, r2, #8                     \n" /* length -= 8; if (length != 0) */      \
1459         "    IT   NE                             \n"                                          \
1460         "    LDRDNE r4,r5,[r0],#8                \n" /* load first two of next 8 */           \
1461         "    MRC  p0,#0x1,r9,c1,c0,#0            \n" /* read  biquad0*/                       \
1462         "    MRC  p0,#0x1,r6,c3,c0,#0            \n" /* read  biquad1*/                       \
1463         "    MCR  p0,#0x1,r7,c3,c0,#6            \n" /* write biquad1*/                       \
1464         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0*/                       \
1465         "    STRD r8,r9,[r1],#8                  \n" /* store third two results */            \
1466         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0*/                       \
1467         "    MRC  p0,#0x1,r7,c3,c0,#0            \n" /* read  biquad1*/                       \
1468         "    CMP  r2, #0                         \n" /* if (length == 0) */                   \
1469         "    BNE  1b                             \n"                                          \
1470         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0*/                       \
1471         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0*/                       \
1472         "    STRD r6,r7,[r1],#8                  \n" /* store fourth two results */           \
1473     )
1474 
1475 /*!
1476  * @brief Fixed 16-bit data vector direct form II biquad cascade filter.
1477  *
1478  * The input and output data are fixed 16-bit data. The data flow is
1479  * input  -> biquad side 1 -> biquad side 0 -> output.
1480  *
1481  * @code
1482    #define VECTOR_LEN 8
1483    int32_t input[VECTOR_LEN] = {1, 2, 3, 4, 5, 6, 7, 8};
1484    int32_t output[VECTOR_LEN];
1485    pq_biquad_state_t state0 =
1486    {
1487         .param =
1488         {
1489             .a_1 = xxx,
1490             .a_2 = xxx,
1491             .b_0 = xxx,
1492             .b_1 = xxx,
1493             .b_2 = xxx,
1494         },
1495    };
1496 
1497    pq_biquad_state_t state1 =
1498    {
1499         .param =
1500         {
1501             .a_1 = xxx,
1502             .a_2 = xxx,
1503             .b_0 = xxx,
1504             .b_1 = xxx,
1505             .b_2 = xxx,
1506         },
1507    };
1508 
1509    PQ_BiquadRestoreInternalState(POWERQUAD, 0, &state0);
1510    PQ_BiquadRestoreInternalState(POWERQUAD, 1, &state1);
1511 
1512    PQ_StartVector(input, output, VECTOR_LEN);
1513    PQ_Vector8BiquadDf2CascadeFixed16();
1514    PQ_EndVector();
1515    @endcode
1516  *
1517  */
1518 #define PQ_Vector8BiquadDf2CascadeFixed16()                                              \
1519     __asm volatile(                                                                      \
1520         "1:                                      \n"                                     \
1521         "    MCR  p0,#0x1,r4,c3,c0,#6            \n" /* write biquad1*/                  \
1522         "    CMP  r3, #0                         \n"                                     \
1523         "    ITTTE  NE                           \n"                                     \
1524         "    MCRNE  p0,#0x1,r7,c1,c0,#6          \n" /* write biquad0*/                  \
1525         "    MRCNE  p0,#0x1,r7,c1,c0,#0          \n" /* read  biquad0*/                  \
1526         "    MRCNE  p0,#0x1,r4,c3,c0,#0          \n" /* read  biquad1*/                  \
1527         "    MRCEQ  p0,#0x1,r4,c3,c0,#0          \n" /* read  biquad1*/                  \
1528         "    MCR  p0,#0x1,r5,c3,c0,#6            \n" /* write biquad1*/                  \
1529         "    MCR  p0,#0x1,r4,c1,c0,#6            \n" /* write biquad0*/                  \
1530         "    CMP  r3, #0                         \n"                                     \
1531         "    ITTE  NE                            \n"                                     \
1532         "    STRHNE r6,[r1],#2                   \n" /* store last two results*/         \
1533         "    STRHNE r7,[r1],#2                   \n" /* store last two results*/         \
1534         "    MOVEQ r3, #1                        \n" /* middle = 1 */                    \
1535         "    LDRSH r6,[r0],#2                    \n" /* load next 2 of the 8*/           \
1536         "    LDRSH r7,[r0],#2                    \n" /* load next 2 of the 8*/           \
1537         "    MRC  p0,#0x1,r4,c1,c0,#0            \n" /* read  biquad0*/                  \
1538         "    MRC  p0,#0x1,r5,c3,c0,#0            \n" /* read  biquad1*/                  \
1539         "    MCR  p0,#0x1,r6,c3,c0,#6            \n" /* write biquad1*/                  \
1540         "    MCR  p0,#0x1,r5,c1,c0,#6            \n" /* write biquad0*/                  \
1541         "    MRC  p0,#0x1,r5,c1,c0,#0            \n" /* read  biquad0*/                  \
1542         "    MRC  p0,#0x1,r6,c3,c0,#0            \n" /* read  biquad1*/                  \
1543         "    LDRSH r8,[r0],#2                    \n" /* load next 2 of the 8*/           \
1544         "    LDRSH r9,[r0],#2                    \n" /* load next 2 of the 8*/           \
1545         "    MCR  p0,#0x1,r7,c3,c0,#6            \n" /* write biquad1*/                  \
1546         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0*/                  \
1547         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0*/                  \
1548         "    MRC  p0,#0x1,r7,c3,c0,#0            \n" /* read  biquad1*/                  \
1549         "    STRH r4,[r1],#2                     \n" /* store first 4 results */         \
1550         "    STRH r5,[r1],#2                     \n" /* store first 4 results */         \
1551         "    MCR  p0,#0x1,r8,c3,c0,#6            \n" /* write biquad1*/                  \
1552         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0*/                  \
1553         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0*/                  \
1554         "    MRC  p0,#0x1,r8,c3,c0,#0            \n" /* read  biquad1*/                  \
1555         "    MCR  p0,#0x1,r9,c3,c0,#6            \n" /* write biquad1*/                  \
1556         "    MCR  p0,#0x1,r8,c1,c0,#6            \n" /* write biquad0*/                  \
1557         "    STRH r6,[r1],#2                     \n" /* store first 4 results */         \
1558         "    STRH r7,[r1],#2                     \n" /* store first 4 results */         \
1559         "    LDRSH r6,[r0],#2                    \n" /* load last 2 of the 8*/           \
1560         "    LDRSH r7,[r0],#2                    \n" /* load last 2 of the 8*/           \
1561         "    MRC  p0,#0x1,r8,c1,c0,#0            \n" /* read  biquad0*/                  \
1562         "    MRC  p0,#0x1,r9,c3,c0,#0            \n" /* read  biquad1*/                  \
1563         "    MCR  p0,#0x1,r6,c3,c0,#6            \n" /* write biquad1*/                  \
1564         "    MCR  p0,#0x1,r9,c1,c0,#6            \n" /* write biquad0*/                  \
1565         "    SUBS r2, r2, #8                     \n" /* length -= 8; if (length != 0) */ \
1566         "    ITT   NE                            \n"                                     \
1567         "    LDRSHNE r4,[r0],#2                  \n" /* load first two of next 8*/       \
1568         "    LDRSHNE r5,[r0],#2                  \n" /* load first two of next 8*/       \
1569         "    MRC  p0,#0x1,r9,c1,c0,#0            \n" /* read  biquad0*/                  \
1570         "    MRC  p0,#0x1,r6,c3,c0,#0            \n" /* read  biquad1*/                  \
1571         "    MCR  p0,#0x1,r7,c3,c0,#6            \n" /* write biquad1*/                  \
1572         "    MCR  p0,#0x1,r6,c1,c0,#6            \n" /* write biquad0*/                  \
1573         "    STRH r8,[r1],#2                     \n" /* store third two results */       \
1574         "    STRH r9,[r1],#2                     \n" /* store third two results */       \
1575         "    MRC  p0,#0x1,r6,c1,c0,#0            \n" /* read  biquad0*/                  \
1576         "    MRC  p0,#0x1,r7,c3,c0,#0            \n" /* read  biquad1*/                  \
1577         "    CMP  r2, #0                         \n" /* if (length == 0) */              \
1578         "    BNE  1b                             \n"                                     \
1579         "    MCR  p0,#0x1,r7,c1,c0,#6            \n" /* write biquad0*/                  \
1580         "    MRC  p0,#0x1,r7,c1,c0,#0            \n" /* read  biquad0*/                  \
1581         "    STRH r6,[r1],#2                     \n" /* store fourth two results */      \
1582         "    STRH r7,[r1],#2                     \n" /* store fourth two results */      \
1583     )
1584 
1585 /*! @brief Make the length used for matrix functions. */
1586 #define POWERQUAD_MAKE_MATRIX_LEN(mat1Row, mat1Col, mat2Col) \
1587     (((uint32_t)(mat1Row) << 0U) | ((uint32_t)(mat1Col) << 8U) | ((uint32_t)(mat2Col) << 16U))
1588 
1589 /*! @brief Convert Q31 to float. */
1590 #define PQ_Q31_2_FLOAT(x) (((float)(x)) / 2147483648.0f)
1591 
1592 /*! @brief Convert Q15 to float. */
1593 #define PQ_Q15_2_FLOAT(x) (((float)(x)) / 32768.0f)
1594 
1595 /*! @brief powerquad computation engine */
1596 typedef enum
1597 {
1598     kPQ_CP_PQ     = 0, /*!< Math engine.*/
1599     kPQ_CP_MTX    = 1, /*!< Matrix engine.*/
1600     kPQ_CP_FFT    = 2, /*!< FFT engine.*/
1601     kPQ_CP_FIR    = 3, /*!< FIR engine.*/
1602     kPQ_CP_CORDIC = 5  /*!< CORDIC engine.*/
1603 } pq_computationengine_t;
1604 
1605 /*! @brief powerquad data structure format type */
1606 typedef enum
1607 {
1608     kPQ_16Bit = 0, /*!< Int16 Fixed point.*/
1609     kPQ_32Bit = 1, /*!< Int32 Fixed point.*/
1610     kPQ_Float = 2  /*!< Float point.*/
1611 } pq_format_t;
1612 
1613 /*! @brief Coprocessor prescale */
1614 typedef struct
1615 {
1616     int8_t inputPrescale;  /*!< Input prescale.*/
1617     int8_t outputPrescale; /*!< Output prescale.*/
1618     int8_t outputSaturate; /*!< Output saturate at n bits, for example 0x11 is 8 bit space,
1619                                   the value will be truncated at +127 or -128.*/
1620 } pq_prescale_t;
1621 
1622 /*! @brief powerquad data structure format */
1623 typedef struct
1624 {
1625     pq_format_t inputAFormat;  /*!< Input A format.*/
1626     int8_t inputAPrescale;     /*!< Input A prescale, for example 1.5 can be 1.5*2^n if you scale by 'shifting'
1627                                    ('scaling' by a factor of n).*/
1628     pq_format_t inputBFormat;  /*!< Input B format.*/
1629     int8_t inputBPrescale;     /*!< Input B prescale.*/
1630     pq_format_t outputFormat;  /*!< Out format.*/
1631     int8_t outputPrescale;     /*!< Out prescale.*/
1632     pq_format_t tmpFormat;     /*!< Temp format.*/
1633     int8_t tmpPrescale;        /*!< Temp prescale.*/
1634     pq_format_t machineFormat; /*!< Machine format.*/
1635     uint32_t *tmpBase;         /*!< Tmp base address.*/
1636 } pq_config_t;
1637 
1638 /*! @brief Struct to save biquad parameters. */
1639 typedef struct _pq_biquad_param
1640 {
1641     float v_n_1; /*!< v[n-1], set to 0 when initialization. */
1642     float v_n;   /*!< v[n], set to 0 when initialization.  */
1643     float a_1;   /*!< a[1] */
1644     float a_2;   /*!< a[2] */
1645     float b_0;   /*!< b[0] */
1646     float b_1;   /*!< b[1] */
1647     float b_2;   /*!< b[2] */
1648 } pq_biquad_param_t;
1649 
1650 /*! @brief Struct to save biquad state. */
1651 typedef struct _pq_biquad_state
1652 {
1653     pq_biquad_param_t param; /*!< Filter parameter. */
1654     uint32_t compreg;        /*!< Internal register, set to 0 when initialization. */
1655 } pq_biquad_state_t;
1656 
1657 /*! @brief Instance structure for the direct form II Biquad cascade filter */
1658 typedef struct
1659 {
1660     uint8_t numStages;         /**< Number of 2nd order stages in the filter.*/
1661     pq_biquad_state_t *pState; /**< Points to the array of state coefficients.*/
1662 } pq_biquad_cascade_df2_instance;
1663 
1664 /*! @brief CORDIC iteration */
1665 typedef enum
1666 {
1667     kPQ_Iteration_8 = 0, /*!< Iterate 8 times.*/
1668     kPQ_Iteration_16,    /*!< Iterate 16 times.*/
1669     kPQ_Iteration_24     /*!< Iterate 24 times.*/
1670 } pq_cordic_iter_t;
1671 
1672 /*! @brief Conversion between integer and float type */
1673 typedef union _pq_float
1674 {
1675     float floatX;      /*!< Float type.*/
1676     uint32_t integerX; /*!< Unsigned interger type.*/
1677 } pq_float_t;
1678 
1679 /*******************************************************************************
1680  * API
1681  ******************************************************************************/
1682 
1683 #if defined(__cplusplus)
1684 extern "C" {
1685 #endif /* __cplusplus */
1686 
1687 /*!
1688  * @name POWERQUAD functional Operation
1689  * @{
1690  */
1691 
1692 /*!
1693  * @brief Get default configuration.
1694  *
1695  * This function initializes the POWERQUAD configuration structure to a default value.
1696  * FORMAT register field definitions
1697  *   Bits[15:8] scaler (for scaled 'q31' formats)
1698  *   Bits[5:4] external format. 00b=q15, 01b=q31, 10b=float
1699  *   Bits[1:0] internal format. 00b=q15, 01b=q31, 10b=float
1700  *   POWERQUAD->INAFORMAT = (config->inputAPrescale << 8U) | (config->inputAFormat << 4U) | config->machineFormat
1701  *
1702  * For all Powerquad operations internal format must be float (with the only exception being
1703  * the FFT related functions, ie FFT/IFFT/DCT/IDCT which must be set to q31).
1704  * The default values are:
1705  *   config->inputAFormat = kPQ_Float;
1706  *   config->inputAPrescale = 0;
1707  *   config->inputBFormat = kPQ_Float;
1708  *   config->inputBPrescale = 0;
1709  *   config->outputFormat = kPQ_Float;
1710  *   config->outputPrescale = 0;
1711  *   config->tmpFormat = kPQ_Float;
1712  *   config->tmpPrescale = 0;
1713  *   config->machineFormat = kPQ_Float;
1714  *   config->tmpBase = 0xE0000000;
1715  *
1716  * @param config Pointer to "pq_config_t" structure.
1717  */
1718 void PQ_GetDefaultConfig(pq_config_t *config);
1719 
1720 /*!
1721  * @brief Set configuration with format/prescale.
1722  *
1723  * @param base  POWERQUAD peripheral base address
1724  * @param config Pointer to "pq_config_t" structure.
1725  */
1726 void PQ_SetConfig(POWERQUAD_Type *base, const pq_config_t *config);
1727 
1728 /*!
1729  * @brief set coprocessor scaler for coprocessor instructions, this function is used to
1730  * set output saturation and scaleing for input/output.
1731  *
1732  * @param base  POWERQUAD peripheral base address
1733  * @param prescale Pointer to "pq_prescale_t" structure.
1734  */
PQ_SetCoprocessorScaler(POWERQUAD_Type * base,const pq_prescale_t * prescale)1735 static inline void PQ_SetCoprocessorScaler(POWERQUAD_Type *base, const pq_prescale_t *prescale)
1736 {
1737     assert(NULL != prescale);
1738 
1739     base->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(prescale->inputPrescale) |
1740                   POWERQUAD_CPPRE_CPPRE_OUT(prescale->outputPrescale) |
1741                   ((uint32_t)prescale->outputSaturate << POWERQUAD_CPPRE_CPPRE_SAT_SHIFT);
1742 }
1743 
1744 /*!
1745  * @brief Initializes the POWERQUAD module.
1746  *
1747  * @param base   POWERQUAD peripheral base address.
1748  */
1749 void PQ_Init(POWERQUAD_Type *base);
1750 
1751 /*!
1752  * @brief De-initializes the POWERQUAD module.
1753  *
1754  * @param base POWERQUAD peripheral base address.
1755  */
1756 void PQ_Deinit(POWERQUAD_Type *base);
1757 
1758 /*!
1759  * @brief Set format for non-coprecessor instructions.
1760  *
1761  * @param base  POWERQUAD peripheral base address
1762  * @param engine Computation engine
1763  * @param format Data format
1764  */
1765 void PQ_SetFormat(POWERQUAD_Type *base, pq_computationengine_t engine, pq_format_t format);
1766 
1767 /*!
1768  * @brief Wait for the completion.
1769  *
1770  * @param base  POWERQUAD peripheral base address
1771  */
PQ_WaitDone(POWERQUAD_Type * base)1772 static inline void PQ_WaitDone(POWERQUAD_Type *base)
1773 {
1774     /* wait for the completion */
1775     while ((base->CONTROL & INST_BUSY) == INST_BUSY)
1776     {
1777         __WFE();
1778     }
1779 }
1780 
1781 /*!
1782  * @brief Processing function for the floating-point natural log.
1783  *
1784  * @param  *pSrc      points to the block of input data. The range of the input value is (0 +INFINITY).
1785  * @param  *pDst      points to the block of output data
1786  */
PQ_LnF32(float * pSrc,float * pDst)1787 static inline void PQ_LnF32(float *pSrc, float *pDst)
1788 {
1789     pq_float_t val;
1790 
1791     val.floatX = *pSrc;
1792     _pq_ln0(val.integerX);
1793     val.integerX = _pq_readAdd0();
1794     *pDst        = val.floatX;
1795 }
1796 
1797 /*!
1798  * @brief Processing function for the floating-point reciprocal.
1799  *
1800  * @param  *pSrc      points to the block of input data. The range of the input value is non-zero.
1801  * @param  *pDst      points to the block of output data
1802  */
PQ_InvF32(float * pSrc,float * pDst)1803 static inline void PQ_InvF32(float *pSrc, float *pDst)
1804 {
1805     pq_float_t val;
1806 
1807     val.floatX = *pSrc;
1808     _pq_inv0(val.integerX);
1809     val.integerX = _pq_readMult0();
1810     *pDst        = val.floatX;
1811 }
1812 
1813 /*!
1814  * @brief Processing function for the floating-point square-root.
1815  *
1816  * @param  *pSrc      points to the block of input data. The range of the input value is [0 +INFINITY).
1817  * @param  *pDst      points to the block of output data
1818  */
PQ_SqrtF32(float * pSrc,float * pDst)1819 static inline void PQ_SqrtF32(float *pSrc, float *pDst)
1820 {
1821     pq_float_t val;
1822 
1823     val.floatX = *pSrc;
1824     _pq_sqrt0(val.integerX);
1825     val.integerX = _pq_readMult0();
1826     *pDst        = val.floatX;
1827 }
1828 
1829 /*!
1830  * @brief Processing function for the floating-point inverse square-root.
1831  *
1832  * @param  *pSrc      points to the block of input data. The range of the input value is (0 +INFINITY).
1833  * @param  *pDst      points to the block of output data
1834  */
PQ_InvSqrtF32(float * pSrc,float * pDst)1835 static inline void PQ_InvSqrtF32(float *pSrc, float *pDst)
1836 {
1837     pq_float_t val;
1838 
1839     val.floatX = *pSrc;
1840     _pq_invsqrt0(val.integerX);
1841     val.integerX = _pq_readMult0();
1842     *pDst        = val.floatX;
1843 }
1844 
1845 /*!
1846  * @brief Processing function for the floating-point natural exponent.
1847  *
1848  * @param  *pSrc      points to the block of input data. The range of the input value is (-INFINITY +INFINITY).
1849  * @param  *pDst      points to the block of output data
1850  */
PQ_EtoxF32(float * pSrc,float * pDst)1851 static inline void PQ_EtoxF32(float *pSrc, float *pDst)
1852 {
1853     pq_float_t val;
1854 
1855     val.floatX = *pSrc;
1856     _pq_etox0(val.integerX);
1857     val.integerX = _pq_readMult0();
1858     *pDst        = val.floatX;
1859 }
1860 
1861 /*!
1862  * @brief Processing function for the floating-point natural exponent with negative parameter.
1863  *
1864  * @param  *pSrc      points to the block of input data. The range of the input value is (-INFINITY +INFINITY).
1865  * @param  *pDst      points to the block of output data
1866  */
PQ_EtonxF32(float * pSrc,float * pDst)1867 static inline void PQ_EtonxF32(float *pSrc, float *pDst)
1868 {
1869     pq_float_t val;
1870 
1871     val.floatX = *pSrc;
1872     _pq_etonx0(val.integerX);
1873     val.integerX = _pq_readMult0();
1874     *pDst        = val.floatX;
1875 }
1876 
1877 /*!
1878  * @brief Processing function for the floating-point sine.
1879  *
1880  * @param  *pSrc      points to the block of input data. The input value is in radians, the range is (-INFINITY
1881  * +INFINITY).
1882  * @param  *pDst      points to the block of output data
1883  */
PQ_SinF32(float * pSrc,float * pDst)1884 static inline void PQ_SinF32(float *pSrc, float *pDst)
1885 {
1886     pq_float_t val;
1887 
1888     val.floatX = *pSrc;
1889     _pq_sin0(val.integerX);
1890     val.integerX = _pq_readAdd0();
1891     *pDst        = val.floatX;
1892 }
1893 
1894 /*!
1895  * @brief Processing function for the floating-point cosine.
1896  *
1897  * @param  *pSrc      points to the block of input data. The input value is in radians, the range is (-INFINITY
1898  * +INFINITY).
1899  * @param  *pDst      points to the block of output data
1900  */
PQ_CosF32(float * pSrc,float * pDst)1901 static inline void PQ_CosF32(float *pSrc, float *pDst)
1902 {
1903     pq_float_t val;
1904 
1905     val.floatX = *pSrc;
1906     _pq_cos0(val.integerX);
1907     val.integerX = _pq_readAdd0();
1908     *pDst        = val.floatX;
1909 }
1910 
1911 /*!
1912  * @brief Processing function for the floating-point biquad.
1913  *
1914  * @param  *pSrc      points to the block of input data
1915  * @param  *pDst      points to the block of output data
1916  */
PQ_BiquadF32(float * pSrc,float * pDst)1917 static inline void PQ_BiquadF32(float *pSrc, float *pDst)
1918 {
1919     pq_float_t val;
1920 
1921     val.floatX = *pSrc;
1922     _pq_biquad0(val.integerX);
1923     val.integerX = _pq_readAdd0();
1924     *pDst        = val.floatX;
1925 }
1926 
1927 /*!
1928  * @brief Processing function for the floating-point division.
1929  *
1930  * Get x1 / x2.
1931  *
1932  * @param  x1 x1
1933  * @param  x2 x2
1934  * @param  *pDst      points to the block of output data
1935  */
PQ_DivF32(float * x1,float * x2,float * pDst)1936 static inline void PQ_DivF32(float *x1, float *x2, float *pDst)
1937 {
1938     pq_float_t X1;
1939     pq_float_t X2;
1940 
1941     X1.floatX      = *x1;
1942     X2.floatX      = *x2;
1943     uint64_t input = (uint64_t)(X2.integerX) | ((uint64_t)(X1.integerX) << 32U);
1944 
1945     _pq_div0(input);
1946     X1.integerX = _pq_readMult0();
1947     *pDst       = X1.floatX;
1948 }
1949 
1950 /*!
1951  * @brief Processing function for the floating-point biquad.
1952  *
1953  * @param  *pSrc      points to the block of input data
1954  * @param  *pDst      points to the block of output data
1955  */
PQ_Biquad1F32(float * pSrc,float * pDst)1956 static inline void PQ_Biquad1F32(float *pSrc, float *pDst)
1957 {
1958     pq_float_t val;
1959 
1960     val.floatX = *pSrc;
1961     _pq_biquad1(val.integerX);
1962     val.integerX = _pq_readAdd1();
1963     *pDst        = val.floatX;
1964 }
1965 
1966 /*!
1967  * @brief Processing function for the fixed natural log.
1968  *
1969  * @param val value to be calculated. The range of the input value is (0 +INFINITY).
1970  * @return returns ln(val).
1971  */
PQ_LnFixed(int32_t val)1972 static inline int32_t PQ_LnFixed(int32_t val)
1973 {
1974     _pq_ln_fx0(val);
1975     return (int32_t)_pq_readAdd0_fx();
1976 }
1977 
1978 /*!
1979  * @brief Processing function for the fixed reciprocal.
1980  *
1981  * @param val value to be calculated. The range of the input value is non-zero.
1982  * @return returns inv(val).
1983  */
PQ_InvFixed(int32_t val)1984 static inline int32_t PQ_InvFixed(int32_t val)
1985 {
1986     _pq_inv_fx0(val);
1987     return (int32_t)_pq_readMult0_fx();
1988 }
1989 
1990 /*!
1991  * @brief Processing function for the fixed square-root.
1992  *
1993  * @param val value to be calculated. The range of the input value is [0 +INFINITY).
1994  * @return returns sqrt(val).
1995  */
PQ_SqrtFixed(uint32_t val)1996 static inline uint32_t PQ_SqrtFixed(uint32_t val)
1997 {
1998     _pq_sqrt_fx0(val);
1999     return _pq_readMult0_fx();
2000 }
2001 
2002 /*!
2003  * @brief Processing function for the fixed inverse square-root.
2004  *
2005  * @param val value to be calculated. The range of the input value is (0 +INFINITY).
2006  * @return returns 1/sqrt(val).
2007  */
PQ_InvSqrtFixed(int32_t val)2008 static inline int32_t PQ_InvSqrtFixed(int32_t val)
2009 {
2010     _pq_invsqrt_fx0(val);
2011     return (int32_t)_pq_readMult0_fx();
2012 }
2013 
2014 /*!
2015  * @brief Processing function for the Fixed natural exponent.
2016  *
2017  * @param val value to be calculated. The range of the input value is (-INFINITY +INFINITY).
2018  * @return returns etox^(val).
2019  */
PQ_EtoxFixed(int32_t val)2020 static inline int32_t PQ_EtoxFixed(int32_t val)
2021 {
2022     _pq_etox_fx0(val);
2023     return (int32_t)_pq_readMult0_fx();
2024 }
2025 
2026 /*!
2027  * @brief Processing function for the fixed natural exponent with negative parameter.
2028  *
2029  * @param val value to be calculated. The range of the input value is (-INFINITY +INFINITY).
2030  * @return returns etonx^(val).
2031  */
PQ_EtonxFixed(int32_t val)2032 static inline int32_t PQ_EtonxFixed(int32_t val)
2033 {
2034     _pq_etonx_fx0(val);
2035     return (int32_t)_pq_readMult0_fx();
2036 }
2037 
2038 /*!
2039  * @brief Processing function for the fixed sine.
2040  *
2041  * @param val value to be calculated. The input value is [-1, 1] in Q31 format, which means [-pi, pi].
2042  * @return returns sin(val).
2043  */
PQ_SinQ31(int32_t val)2044 static inline int32_t PQ_SinQ31(int32_t val)
2045 {
2046     int32_t ret;
2047     uint32_t cppre;
2048 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2049     pq_float_t magic;
2050     pq_float_t valFloat;
2051 
2052     magic.integerX  = 0x30c90fdb;
2053     valFloat.floatX = magic.floatX * (float)val;
2054 #endif
2055 
2056     cppre            = POWERQUAD->CPPRE;
2057     POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_OUT(31);
2058 
2059 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2060     _pq_sin0(valFloat.integerX);
2061 
2062     (void)_pq_readAdd0();
2063     ret = (int32_t)_pq_readAdd0_fx();
2064 #else
2065     _pq_sin_fx0(val);
2066     ret = (int32_t)_pq_readAdd0_fx();
2067 #endif
2068 
2069     POWERQUAD->CPPRE = cppre;
2070 
2071     return ret;
2072 }
2073 
2074 /*!
2075  * @brief Processing function for the fixed sine.
2076  *
2077  * @param val value to be calculated. The input value is [-1, 1] in Q15 format, which means [-pi, pi].
2078  * @return returns sin(val).
2079  */
PQ_SinQ15(int16_t val)2080 static inline int16_t PQ_SinQ15(int16_t val)
2081 {
2082     uint32_t ret;
2083     uint32_t cppre;
2084 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2085     pq_float_t magic;
2086     pq_float_t valFloat;
2087 
2088     magic.integerX  = 0x30c90fdbU;
2089     valFloat.floatX = magic.floatX * (float)(uint32_t)((uint32_t)val << 16U);
2090 #endif
2091 
2092     cppre = POWERQUAD->CPPRE;
2093     /* Don't use 15 here, it is wrong then val is 0x4000 */
2094     POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_OUT(31);
2095 
2096 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2097     _pq_sin0(valFloat.integerX);
2098 
2099     (void)_pq_readAdd0();
2100     ret = (_pq_readAdd0_fx() >> 16U);
2101 #else
2102     _pq_sin_fx0((uint32_t)val << 16U);
2103     ret = (_pq_readAdd0_fx() >> 16U);
2104 #endif
2105 
2106     POWERQUAD->CPPRE = cppre;
2107 
2108     return (int16_t)ret;
2109 }
2110 
2111 /*!
2112  * @brief Processing function for the fixed cosine.
2113  *
2114  * @param val value to be calculated. The input value is [-1, 1] in Q31 format, which means [-pi, pi].
2115  * @return returns cos(val).
2116  */
PQ_CosQ31(int32_t val)2117 static inline int32_t PQ_CosQ31(int32_t val)
2118 {
2119     int32_t ret;
2120     uint32_t cppre;
2121 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2122     pq_float_t magic;
2123     pq_float_t valFloat;
2124 
2125     magic.integerX  = 0x30c90fdb;
2126     valFloat.floatX = magic.floatX * (float)val;
2127 #endif
2128 
2129     cppre            = POWERQUAD->CPPRE;
2130     POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_OUT(31);
2131 
2132 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2133     _pq_cos0(valFloat.integerX);
2134 
2135     (void)_pq_readAdd0();
2136     ret = (int32_t)_pq_readAdd0_fx();
2137 #else
2138     _pq_cos_fx0(val);
2139     ret = (int32_t)_pq_readAdd0_fx();
2140 #endif
2141 
2142     POWERQUAD->CPPRE = cppre;
2143 
2144     return ret;
2145 }
2146 
2147 /*!
2148  * @brief Processing function for the fixed sine.
2149  *
2150  * @param val value to be calculated. The input value is [-1, 1] in Q15 format, which means [-pi, pi].
2151  * @return returns sin(val).
2152  */
PQ_CosQ15(int16_t val)2153 static inline int16_t PQ_CosQ15(int16_t val)
2154 {
2155     uint32_t ret;
2156     uint32_t cppre;
2157 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2158     pq_float_t magic;
2159     pq_float_t valFloat;
2160 
2161     magic.integerX  = 0x30c90fdbU;
2162     valFloat.floatX = magic.floatX * (float)(uint32_t)((uint32_t)val << 16U);
2163 #endif
2164 
2165     cppre            = POWERQUAD->CPPRE;
2166     POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_OUT(31);
2167 
2168 #if defined(FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA) && FSL_FEATURE_POWERQUAD_SIN_COS_FIX_ERRATA
2169     _pq_cos0(valFloat.integerX);
2170 
2171     (void)_pq_readAdd0();
2172     ret = _pq_readAdd0_fx() >> 16U;
2173 #else
2174     _pq_cos_fx0((uint32_t)val << 16U);
2175     ret = _pq_readAdd0_fx() >> 16U;
2176 #endif
2177 
2178     POWERQUAD->CPPRE = cppre;
2179 
2180     return (int16_t)ret;
2181 }
2182 
2183 /*!
2184  * @brief Processing function for the fixed biquad.
2185  *
2186  * @param val value to be calculated
2187  * @return returns biquad(val).
2188  */
PQ_BiquadFixed(int32_t val)2189 static inline int32_t PQ_BiquadFixed(int32_t val)
2190 {
2191     _pq_biquad0_fx(val);
2192     return (int32_t)_pq_readAdd0_fx();
2193 }
2194 
2195 /*!
2196  * @brief Processing function for the floating-point vectorised natural log.
2197  *
2198  * @param  *pSrc      points to the block of input data
2199  * @param  *pDst      points to the block of output data
2200  * @param  length     the block of input data.
2201  */
2202 void PQ_VectorLnF32(float *pSrc, float *pDst, int32_t length);
2203 
2204 /*!
2205  * @brief Processing function for the floating-point vectorised reciprocal.
2206  *
2207  * @param  *pSrc      points to the block of input data
2208  * @param  *pDst      points to the block of output data
2209  * @param  length     the block of input data.
2210  */
2211 void PQ_VectorInvF32(float *pSrc, float *pDst, int32_t length);
2212 
2213 /*!
2214  * @brief Processing function for the floating-point vectorised square-root.
2215  *
2216  * @param  *pSrc      points to the block of input data
2217  * @param  *pDst      points to the block of output data
2218  * @param  length     the block of input data.
2219  */
2220 void PQ_VectorSqrtF32(float *pSrc, float *pDst, int32_t length);
2221 
2222 /*!
2223  * @brief Processing function for the floating-point vectorised inverse square-root.
2224  *
2225  * @param  *pSrc      points to the block of input data
2226  * @param  *pDst      points to the block of output data
2227  * @param  length     the block of input data.
2228  */
2229 void PQ_VectorInvSqrtF32(float *pSrc, float *pDst, int32_t length);
2230 
2231 /*!
2232  * @brief Processing function for the floating-point vectorised natural exponent.
2233  *
2234  * @param  *pSrc      points to the block of input data
2235  * @param  *pDst      points to the block of output data
2236  * @param  length     the block of input data.
2237  */
2238 void PQ_VectorEtoxF32(float *pSrc, float *pDst, int32_t length);
2239 
2240 /*!
2241  * @brief Processing function for the floating-point vectorised natural exponent with negative parameter.
2242  *
2243  * @param  *pSrc      points to the block of input data
2244  * @param  *pDst      points to the block of output data
2245  * @param  length     the block of input data.
2246  */
2247 void PQ_VectorEtonxF32(float *pSrc, float *pDst, int32_t length);
2248 
2249 /*!
2250  * @brief Processing function for the floating-point vectorised sine
2251  *
2252  * @param  *pSrc      points to the block of input data
2253  * @param  *pDst      points to the block of output data
2254  * @param  length     the block of input data.
2255  */
2256 void PQ_VectorSinF32(float *pSrc, float *pDst, int32_t length);
2257 
2258 /*!
2259  * @brief Processing function for the floating-point vectorised cosine.
2260  *
2261  * @param  *pSrc      points to the block of input data
2262  * @param  *pDst      points to the block of output data
2263  * @param  length     the block of input data.
2264  */
2265 void PQ_VectorCosF32(float *pSrc, float *pDst, int32_t length);
2266 
2267 /*!
2268  * @brief Processing function for the Q31 vectorised natural log.
2269  *
2270  * @param  *pSrc      points to the block of input data
2271  * @param  *pDst      points to the block of output data
2272  * @param  length     the block of input data.
2273  */
2274 void PQ_VectorLnFixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2275 
2276 /*!
2277  * @brief Processing function for the Q31 vectorised reciprocal.
2278  *
2279  * @param  *pSrc      points to the block of input data
2280  * @param  *pDst      points to the block of output data
2281  * @param  length     the block of input data.
2282  */
2283 void PQ_VectorInvFixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2284 
2285 /*!
2286  * @brief Processing function for the 32-bit integer vectorised square-root.
2287  *
2288  * @param  *pSrc      points to the block of input data
2289  * @param  *pDst      points to the block of output data
2290  * @param  length     the block of input data.
2291  */
2292 void PQ_VectorSqrtFixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2293 
2294 /*!
2295  * @brief Processing function for the 32-bit integer vectorised inverse square-root.
2296  *
2297  * @param  *pSrc      points to the block of input data
2298  * @param  *pDst      points to the block of output data
2299  * @param  length     the block of input data.
2300  */
2301 void PQ_VectorInvSqrtFixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2302 
2303 /*!
2304  * @brief Processing function for the 32-bit integer vectorised natural exponent.
2305  *
2306  * @param  *pSrc      points to the block of input data
2307  * @param  *pDst      points to the block of output data
2308  * @param  length     the block of input data.
2309  */
2310 void PQ_VectorEtoxFixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2311 
2312 /*!
2313  * @brief Processing function for the 32-bit integer vectorised natural exponent with negative parameter.
2314  *
2315  * @param  *pSrc      points to the block of input data
2316  * @param  *pDst      points to the block of output data
2317  * @param  length     the block of input data.
2318  */
2319 void PQ_VectorEtonxFixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2320 
2321 /*!
2322  * @brief Processing function for the Q15 vectorised sine
2323  *
2324  * @param  *pSrc      points to the block of input data
2325  * @param  *pDst      points to the block of output data
2326  * @param  length     the block of input data.
2327  */
2328 void PQ_VectorSinQ15(int16_t *pSrc, int16_t *pDst, int32_t length);
2329 
2330 /*!
2331  * @brief Processing function for the Q15 vectorised cosine.
2332  *
2333  * @param  *pSrc      points to the block of input data
2334  * @param  *pDst      points to the block of output data
2335  * @param  length     the block of input data.
2336  */
2337 void PQ_VectorCosQ15(int16_t *pSrc, int16_t *pDst, int32_t length);
2338 
2339 /*!
2340  * @brief Processing function for the Q31 vectorised sine
2341  *
2342  * @param  *pSrc      points to the block of input data
2343  * @param  *pDst      points to the block of output data
2344  * @param  length     the block of input data.
2345  */
2346 void PQ_VectorSinQ31(int32_t *pSrc, int32_t *pDst, int32_t length);
2347 
2348 /*!
2349  * @brief Processing function for the Q31 vectorised cosine.
2350  *
2351  * @param  *pSrc      points to the block of input data
2352  * @param  *pDst      points to the block of output data
2353  * @param  length     the block of input data.
2354  */
2355 void PQ_VectorCosQ31(int32_t *pSrc, int32_t *pDst, int32_t length);
2356 
2357 /*!
2358  * @brief Processing function for the 16-bit integer vectorised natural log.
2359  *
2360  * @param  *pSrc      points to the block of input data
2361  * @param  *pDst      points to the block of output data
2362  * @param  length     the block of input data.
2363  */
2364 void PQ_VectorLnFixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2365 
2366 /*!
2367  * @brief Processing function for the 16-bit integer vectorised reciprocal.
2368  *
2369  * @param  *pSrc      points to the block of input data
2370  * @param  *pDst      points to the block of output data
2371  * @param  length     the block of input data.
2372  */
2373 void PQ_VectorInvFixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2374 
2375 /*!
2376  * @brief Processing function for the 16-bit integer vectorised square-root.
2377  *
2378  * @param  *pSrc      points to the block of input data
2379  * @param  *pDst      points to the block of output data
2380  * @param  length     the block of input data.
2381  */
2382 void PQ_VectorSqrtFixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2383 
2384 /*!
2385  * @brief Processing function for the 16-bit integer vectorised inverse square-root.
2386  *
2387  * @param  *pSrc      points to the block of input data
2388  * @param  *pDst      points to the block of output data
2389  * @param  length     the block of input data.
2390  */
2391 void PQ_VectorInvSqrtFixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2392 
2393 /*!
2394  * @brief Processing function for the 16-bit integer vectorised natural exponent.
2395  *
2396  * @param  *pSrc      points to the block of input data
2397  * @param  *pDst      points to the block of output data
2398  * @param  length     the block of input data.
2399  */
2400 void PQ_VectorEtoxFixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2401 
2402 /*!
2403  * @brief Processing function for the 16-bit integer vectorised natural exponent with negative parameter.
2404  *
2405  * @param  *pSrc      points to the block of input data
2406  * @param  *pDst      points to the block of output data
2407  * @param  length     the block of input data.
2408  */
2409 void PQ_VectorEtonxFixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2410 
2411 /*!
2412  * @brief Processing function for the floating-point vectorised biquad direct form II.
2413  *
2414  * @param  *pSrc      points to the block of input data
2415  * @param  *pDst      points to the block of output data
2416  * @param  length the block size of input data.
2417  */
2418 void PQ_VectorBiquadDf2F32(float *pSrc, float *pDst, int32_t length);
2419 
2420 /*!
2421  * @brief Processing function for the 32-bit integer vectorised biquad direct form II.
2422  *
2423  * @param  *pSrc      points to the block of input data
2424  * @param  *pDst      points to the block of output data
2425  * @param  length the block size of input data
2426  */
2427 void PQ_VectorBiquadDf2Fixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2428 
2429 /*!
2430  * @brief Processing function for the 16-bit integer vectorised biquad direct form II.
2431  *
2432  * @param  *pSrc      points to the block of input data
2433  * @param  *pDst      points to the block of output data
2434  * @param  length the block size of input data
2435  */
2436 void PQ_VectorBiquadDf2Fixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2437 
2438 /*!
2439  * @brief Processing function for the floating-point vectorised biquad direct form II.
2440  *
2441  * @param  *pSrc      points to the block of input data
2442  * @param  *pDst      points to the block of output data
2443  * @param  length the block size of input data
2444  */
2445 void PQ_VectorBiquadCascadeDf2F32(float *pSrc, float *pDst, int32_t length);
2446 
2447 /*!
2448  * @brief Processing function for the 32-bit integer vectorised biquad direct form II.
2449  *
2450  * @param  *pSrc      points to the block of input data
2451  * @param  *pDst      points to the block of output data
2452  * @param  length the block size of input data
2453  */
2454 void PQ_VectorBiquadCascadeDf2Fixed32(int32_t *pSrc, int32_t *pDst, int32_t length);
2455 
2456 /*!
2457  * @brief Processing function for the 16-bit integer vectorised biquad direct form II.
2458  *
2459  * @param  *pSrc      points to the block of input data
2460  * @param  *pDst      points to the block of output data
2461  * @param  length the block size of input data
2462  */
2463 void PQ_VectorBiquadCascadeDf2Fixed16(int16_t *pSrc, int16_t *pDst, int32_t length);
2464 
2465 /*!
2466  * @brief Processing function for the fixed inverse trigonometric.
2467  *
2468  * @param base  POWERQUAD peripheral base address
2469  * @param x value of opposite
2470  * @param y value of adjacent
2471  * @param iteration iteration times
2472  * @return The return value is in the range of -2^27 to 2^27, which means -pi to pi.
2473  * @note The sum of x and y should not exceed the range of int32_t.
2474  * @note Larger input number gets higher output accuracy, for example the arctan(0.5),
2475  * the result of PQ_ArctanFixed(POWERQUAD, 100000, 200000, kPQ_Iteration_24) is more
2476  * accurate than PQ_ArctanFixed(POWERQUAD, 1, 2, kPQ_Iteration_24).
2477  */
2478 int32_t PQ_ArctanFixed(POWERQUAD_Type *base, int32_t x, int32_t y, pq_cordic_iter_t iteration);
2479 
2480 /*!
2481  * @brief Processing function for the fixed inverse trigonometric.
2482  *
2483  * @param base  POWERQUAD peripheral base address
2484  * @param x value of opposite
2485  * @param y value of adjacent
2486  * @param iteration iteration times
2487  * @return The return value is in the range of -2^27 to 2^27, which means -1 to 1.
2488  * @note The sum of x and y should not exceed the range of int32_t.
2489  * @note Larger input number gets higher output accuracy, for example the arctanh(0.5),
2490  * the result of PQ_ArctanhFixed(POWERQUAD, 100000, 200000, kPQ_Iteration_24) is more
2491  * accurate than PQ_ArctanhFixed(POWERQUAD, 1, 2, kPQ_Iteration_24).
2492  */
2493 int32_t PQ_ArctanhFixed(POWERQUAD_Type *base, int32_t x, int32_t y, pq_cordic_iter_t iteration);
2494 
2495 /*!
2496  * @brief Processing function for the fixed biquad.
2497  *
2498  * @param val value to be calculated
2499  * @return returns biquad(val).
2500  */
PQ_Biquad1Fixed(int32_t val)2501 static inline int32_t PQ_Biquad1Fixed(int32_t val)
2502 {
2503     _pq_biquad1_fx(val);
2504     return (int32_t)_pq_readAdd1_fx();
2505 }
2506 
2507 /*!
2508  * @brief Processing function for the complex FFT.
2509  *
2510  * @param base  POWERQUAD peripheral base address
2511  * @param length number of input samples
2512  * @param pData input data
2513  * @param pResult output data.
2514  */
2515 void PQ_TransformCFFT(POWERQUAD_Type *base, uint32_t length, void *pData, void *pResult);
2516 
2517 /*!
2518  * @brief Processing function for the real FFT.
2519  *
2520  * @param base  POWERQUAD peripheral base address
2521  * @param length number of input samples
2522  * @param pData input data
2523  * @param pResult output data.
2524  */
2525 void PQ_TransformRFFT(POWERQUAD_Type *base, uint32_t length, void *pData, void *pResult);
2526 
2527 /*!
2528  * @brief Processing function for the inverse complex FFT.
2529  *
2530  * @param base  POWERQUAD peripheral base address
2531  * @param length number of input samples
2532  * @param pData input data
2533  * @param pResult output data.
2534  */
2535 void PQ_TransformIFFT(POWERQUAD_Type *base, uint32_t length, void *pData, void *pResult);
2536 
2537 /*!
2538  * @brief Processing function for the complex DCT.
2539  *
2540  * @param base  POWERQUAD peripheral base address
2541  * @param length number of input samples
2542  * @param pData input data
2543  * @param pResult output data.
2544  */
2545 void PQ_TransformCDCT(POWERQUAD_Type *base, uint32_t length, void *pData, void *pResult);
2546 
2547 /*!
2548  * @brief Processing function for the real DCT.
2549  *
2550  * @param base  POWERQUAD peripheral base address
2551  * @param length number of input samples
2552  * @param pData input data
2553  * @param pResult output data.
2554  */
2555 void PQ_TransformRDCT(POWERQUAD_Type *base, uint32_t length, void *pData, void *pResult);
2556 
2557 /*!
2558  * @brief Processing function for the inverse complex DCT.
2559  *
2560  * @param base  POWERQUAD peripheral base address
2561  * @param length number of input samples
2562  * @param pData input data
2563  * @param pResult output data.
2564  */
2565 void PQ_TransformIDCT(POWERQUAD_Type *base, uint32_t length, void *pData, void *pResult);
2566 
2567 /*!
2568  * @brief Processing function for backup biquad context.
2569  *
2570  * @param base  POWERQUAD peripheral base address
2571  * @param biquad_num biquad side
2572  * @param state point to states.
2573  */
2574 void PQ_BiquadBackUpInternalState(POWERQUAD_Type *base, int32_t biquad_num, pq_biquad_state_t *state);
2575 
2576 /*!
2577  * @brief Processing function for restore biquad context.
2578  *
2579  * @param base  POWERQUAD peripheral base address
2580  * @param biquad_num biquad side
2581  * @param state point to states.
2582  */
2583 void PQ_BiquadRestoreInternalState(POWERQUAD_Type *base, int32_t biquad_num, pq_biquad_state_t *state);
2584 
2585 /*!
2586  * @brief  Initialization function for the direct form II Biquad cascade filter.
2587  *
2588  * @param[in,out] *S           points to an instance of the filter data structure.
2589  * @param[in]     numStages    number of 2nd order stages in the filter.
2590  * @param[in]     *pState      points to the state buffer.
2591  */
2592 void PQ_BiquadCascadeDf2Init(pq_biquad_cascade_df2_instance *S, uint8_t numStages, pq_biquad_state_t *pState);
2593 
2594 /*!
2595  * @brief Processing function for the floating-point direct form II Biquad cascade filter.
2596  *
2597  * @param[in]  *S        points to an instance of the filter data structure.
2598  * @param[in]  *pSrc     points to the block of input data.
2599  * @param[out] *pDst     points to the block of output data
2600  * @param[in]  blockSize number of samples to process.
2601  */
2602 void PQ_BiquadCascadeDf2F32(const pq_biquad_cascade_df2_instance *S, float *pSrc, float *pDst, uint32_t blockSize);
2603 
2604 /*!
2605  * @brief Processing function for the Q31 direct form II Biquad cascade filter.
2606  *
2607  * @param[in]  *S        points to an instance of the filter data structure.
2608  * @param[in]  *pSrc     points to the block of input data.
2609  * @param[out] *pDst     points to the block of output data
2610  * @param[in]  blockSize number of samples to process.
2611  */
2612 void PQ_BiquadCascadeDf2Fixed32(const pq_biquad_cascade_df2_instance *S,
2613                                 int32_t *pSrc,
2614                                 int32_t *pDst,
2615                                 uint32_t blockSize);
2616 
2617 /*!
2618  * @brief Processing function for the Q15 direct form II Biquad cascade filter.
2619  *
2620  * @param[in]  *S        points to an instance of the filter data structure.
2621  * @param[in]  *pSrc     points to the block of input data.
2622  * @param[out] *pDst     points to the block of output data
2623  * @param[in]  blockSize number of samples to process.
2624  */
2625 void PQ_BiquadCascadeDf2Fixed16(const pq_biquad_cascade_df2_instance *S,
2626                                 int16_t *pSrc,
2627                                 int16_t *pDst,
2628                                 uint32_t blockSize);
2629 
2630 /*!
2631  * @brief Processing function for the FIR.
2632  *
2633  * @param base  POWERQUAD peripheral base address
2634  * @param pAData the first input sequence
2635  * @param ALength number of the first input sequence
2636  * @param pBData the second input sequence
2637  * @param BLength number of the second input sequence
2638  * @param pResult array for the output data
2639  * @param opType operation type, could be PQ_FIR_FIR, PQ_FIR_CONVOLUTION, PQ_FIR_CORRELATION.
2640  */
2641 void PQ_FIR(POWERQUAD_Type *base,
2642             const void *pAData,
2643             int32_t ALength,
2644             const void *pBData,
2645             int32_t BLength,
2646             void *pResult,
2647             uint32_t opType);
2648 
2649 /*!
2650  * @brief Processing function for the incremental FIR.
2651  *        This function can be used after pq_fir() for incremental FIR
2652  *        operation when new x data are available
2653  *
2654  * @param base  POWERQUAD peripheral base address
2655  * @param ALength number of input samples
2656  * @param BLength number of taps
2657  * @param xOffset offset for number of input samples
2658  */
2659 void PQ_FIRIncrement(POWERQUAD_Type *base, int32_t ALength, int32_t BLength, int32_t xOffset);
2660 
2661 /*!
2662  * @brief Processing function for the matrix addition.
2663  *
2664  * @param base  POWERQUAD peripheral base address
2665  * @param length rows and cols for matrix. LENGTH register configuration:
2666  *        LENGTH[23:16] = M2 cols
2667  *        LENGTH[15:8]  = M1 cols
2668  *        LENGTH[7:0]   = M1 rows
2669  *        This could be constructed using macro @ref POWERQUAD_MAKE_MATRIX_LEN.
2670  * @param pAData input matrix A
2671  * @param pBData input matrix B
2672  * @param pResult array for the output data.
2673  */
2674 void PQ_MatrixAddition(POWERQUAD_Type *base, uint32_t length, void *pAData, void *pBData, void *pResult);
2675 
2676 /*!
2677  * @brief Processing function for the matrix subtraction.
2678  *
2679  * @param base  POWERQUAD peripheral base address
2680  * @param length rows and cols for matrix. LENGTH register configuration:
2681  *        LENGTH[23:16] = M2 cols
2682  *        LENGTH[15:8]  = M1 cols
2683  *        LENGTH[7:0]   = M1 rows
2684  *        This could be constructed using macro @ref POWERQUAD_MAKE_MATRIX_LEN.
2685  * @param pAData input matrix A
2686  * @param pBData input matrix B
2687  * @param pResult array for the output data.
2688  */
2689 void PQ_MatrixSubtraction(POWERQUAD_Type *base, uint32_t length, void *pAData, void *pBData, void *pResult);
2690 
2691 /*!
2692  * @brief Processing function for the matrix multiplication.
2693  *
2694  * @param base  POWERQUAD peripheral base address
2695  * @param length rows and cols for matrix. LENGTH register configuration:
2696  *        LENGTH[23:16] = M2 cols
2697  *        LENGTH[15:8]  = M1 cols
2698  *        LENGTH[7:0]   = M1 rows
2699  *        This could be constructed using macro @ref POWERQUAD_MAKE_MATRIX_LEN.
2700  * @param pAData input matrix A
2701  * @param pBData input matrix B
2702  * @param pResult array for the output data.
2703  */
2704 void PQ_MatrixMultiplication(POWERQUAD_Type *base, uint32_t length, void *pAData, void *pBData, void *pResult);
2705 
2706 /*!
2707  * @brief Processing function for the matrix product.
2708  *
2709  * @param base  POWERQUAD peripheral base address
2710  * @param length rows and cols for matrix. LENGTH register configuration:
2711  *        LENGTH[23:16] = M2 cols
2712  *        LENGTH[15:8]  = M1 cols
2713  *        LENGTH[7:0]   = M1 rows
2714  *        This could be constructed using macro @ref POWERQUAD_MAKE_MATRIX_LEN.
2715  * @param pAData input matrix A
2716  * @param pBData input matrix B
2717  * @param pResult array for the output data.
2718  */
2719 void PQ_MatrixProduct(POWERQUAD_Type *base, uint32_t length, void *pAData, void *pBData, void *pResult);
2720 
2721 /*!
2722  * @brief Processing function for the vector dot product.
2723  *
2724  * @param base  POWERQUAD peripheral base address
2725  * @param length length of vector
2726  * @param pAData input vector A
2727  * @param pBData input vector B
2728  * @param pResult array for the output data.
2729  */
2730 void PQ_VectorDotProduct(POWERQUAD_Type *base, uint32_t length, void *pAData, void *pBData, void *pResult);
2731 
2732 /*!
2733  * @brief Processing function for the matrix inverse.
2734  *
2735  * @param base  POWERQUAD peripheral base address
2736  * @param length rows and cols for matrix. LENGTH register configuration:
2737  *        LENGTH[23:16] = M2 cols
2738  *        LENGTH[15:8]  = M1 cols
2739  *        LENGTH[7:0]   = M1 rows
2740  *        This could be constructed using macro @ref POWERQUAD_MAKE_MATRIX_LEN.
2741  * @param pData input matrix
2742  * @param pTmpData input temporary matrix, pTmpData length not less than pData lenght and 1024 words is sufficient for
2743  * the largest supported matrix.
2744  * @param pResult array for the output data, round down for fixed point.
2745  */
2746 void PQ_MatrixInversion(POWERQUAD_Type *base, uint32_t length, void *pData, void *pTmpData, void *pResult);
2747 
2748 /*!
2749  * @brief Processing function for the matrix transpose.
2750  *
2751  * @param base  POWERQUAD peripheral base address
2752  * @param length rows and cols for matrix. LENGTH register configuration:
2753  *        LENGTH[23:16] = M2 cols
2754  *        LENGTH[15:8]  = M1 cols
2755  *        LENGTH[7:0]   = M1 rows
2756  *        This could be constructed using macro @ref POWERQUAD_MAKE_MATRIX_LEN.
2757  * @param pData input matrix
2758  * @param pResult array for the output data.
2759  */
2760 void PQ_MatrixTranspose(POWERQUAD_Type *base, uint32_t length, void *pData, void *pResult);
2761 
2762 /*!
2763  * @brief Processing function for the matrix scale.
2764  *
2765  * @param base  POWERQUAD peripheral base address
2766  * @param length rows and cols for matrix. LENGTH register configuration:
2767  *        LENGTH[23:16] = M2 cols
2768  *        LENGTH[15:8]  = M1 cols
2769  *        LENGTH[7:0]   = M1 rows
2770  *        This could be constructed using macro @ref POWERQUAD_MAKE_MATRIX_LEN.
2771  * @param misc scaling parameters
2772  * @param pData input matrix
2773  * @param pResult array for the output data.
2774  */
2775 void PQ_MatrixScale(POWERQUAD_Type *base, uint32_t length, float misc, const void *pData, void *pResult);
2776 
2777 /* @} */
2778 
2779 #if defined(__cplusplus)
2780 }
2781 #endif /* __cplusplus */
2782 
2783 /*! @}*/
2784 
2785 #endif /* _FSL_POWERQUAD_H_ */
2786