1 /*
2  * Copyright 2018-2022 NXP
3  * All rights reserved.
4  *
5  * SPDX-License-Identifier: BSD-3-Clause
6  */
7 
8 #include "fsl_powerquad.h"
9 #include "fsl_powerquad_data.h"
10 #include "arm_math.h"
11 
12 /*******************************************************************************
13  * Definitions
14  ******************************************************************************/
15 
16 /* Component ID definition, used by tools. */
17 #ifndef FSL_COMPONENT_ID
18 #define FSL_COMPONENT_ID "platform.drivers.powerquad_cmsis"
19 #endif
20 
21 #define PQ_SET_FIX32_CONFIG                                                                           \
22     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
23     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
24     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
25     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
26     POWERQUAD->TMPBASE   = 0xE0000000U
27 
28 #define PQ_SET_FIX16_CONFIG                                                                           \
29     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
30     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
31     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
32     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
33     POWERQUAD->TMPBASE   = 0xE0000000U
34 
35 #define PQ_SET_Q31_CONFIG                                                                               \
36     POWERQUAD->OUTFORMAT = ((uint32_t)(-31) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
37     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float;   \
38     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float;   \
39     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;   \
40     POWERQUAD->TMPBASE   = 0xE0000000U
41 
42 #define PQ_SET_Q15_CONFIG                                                                               \
43     POWERQUAD->OUTFORMAT = ((uint32_t)(-15) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
44     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;   \
45     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;   \
46     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;   \
47     POWERQUAD->TMPBASE   = 0xE0000000U
48 
49 #define PQ_SET_F32_CONFIG                                                                             \
50     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
51     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
52     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
53     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
54     POWERQUAD->TMPBASE   = 0xE0000000U
55 
56 #define PQ_SET_FFT_Q31_CONFIG                                                                         \
57     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
58     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
59     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
60     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
61     POWERQUAD->TMPBASE   = 0xE0000000U
62 
63 #define PQ_SET_FFT_Q15_CONFIG                                                                         \
64     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
65     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
66     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
67     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
68     POWERQUAD->TMPBASE   = 0xE0000000U
69 
70 #define PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG                                                      \
71     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
72     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
73     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
74     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
75     POWERQUAD->TMPBASE   = 0xE0000000U
76 
77 #define PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG                                                       \
78     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
79     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
80     POWERQUAD->TMPBASE   = 0xE0000000U
81 
82 #define PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG                                                      \
83     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
84     POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
85     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
86     POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
87     POWERQUAD->TMPBASE   = 0xE0000000U
88 
89 #define PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG                                                       \
90     POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
91     POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
92     POWERQUAD->TMPBASE   = 0xE0000000U
93 
94 /*******************************************************************************
95  * Code
96  ******************************************************************************/
_arm_fir_increment(const void * pSrc,uint32_t srcLen,const void * pTap,uint16_t tapLen,void * pDst,uint32_t offset,uint32_t elemSize)97 static void _arm_fir_increment(const void *pSrc,
98                                uint32_t srcLen,
99                                const void *pTap,
100                                uint16_t tapLen,
101                                void *pDst,
102                                uint32_t offset,
103                                uint32_t elemSize)
104 {
105     POWERQUAD->INABASE = ((uint32_t)(const uint32_t *)pSrc) - (offset * elemSize);
106     POWERQUAD->INBBASE = (uint32_t)(const uint32_t *)pTap;
107     POWERQUAD->LENGTH  = (((uint32_t)tapLen & 0xFFFFUL) << 16U) + (srcLen & 0xFFFFUL);
108     POWERQUAD->OUTBASE = ((uint32_t)(uint32_t *)pDst) - (offset * elemSize);
109     POWERQUAD->MISC    = offset;
110     POWERQUAD->CONTROL = (CP_FIR << 4U) | PQ_FIR_INCREMENTAL;
111 }
112 
arm_cos_f32(float32_t x)113 float32_t arm_cos_f32(float32_t x)
114 {
115     float tmp;
116 
117     PQ_CosF32(&x, &tmp);
118     return tmp;
119 }
120 
arm_cos_q31(q31_t x)121 q31_t arm_cos_q31(q31_t x)
122 {
123     /* For PQ: input -1 to 1 means -pi to pi
124      * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
125     x *= 2;
126     return PQ_CosQ31(x);
127 }
128 
arm_cos_q15(q15_t x)129 q15_t arm_cos_q15(q15_t x)
130 {
131     /* For PQ: input -1 to 1 means -pi to pi
132      * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
133     x *= 2;
134     return PQ_CosQ15(x);
135 }
136 
arm_sin_f32(float32_t x)137 float32_t arm_sin_f32(float32_t x)
138 {
139     float tmp;
140 
141     PQ_SinF32(&x, &tmp);
142     return tmp;
143 }
144 
arm_sin_q31(q31_t x)145 q31_t arm_sin_q31(q31_t x)
146 {
147     /* For PQ: input -1 to 1 means -pi to pi
148      * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
149     x *= 2;
150     return PQ_SinQ31(x);
151 }
152 
arm_sin_q15(q15_t x)153 q15_t arm_sin_q15(q15_t x)
154 {
155     /* For PQ: input -1 to 1 means -pi to pi
156      * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
157     x *= 2;
158     return PQ_SinQ15(x);
159 }
160 
arm_sqrt_q31(q31_t in,q31_t * pOut)161 arm_status arm_sqrt_q31(q31_t in, q31_t *pOut)
162 {
163     uint32_t cppre;
164     arm_status status;
165 
166     /* If the input is a positive number then compute the signBits. */
167     if (in > 0)
168     {
169         cppre            = POWERQUAD->CPPRE;
170         POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-31) | POWERQUAD_CPPRE_CPPRE_OUT(31);
171         *pOut            = (q31_t)PQ_SqrtFixed((uint32_t)in);
172         POWERQUAD->CPPRE = cppre;
173 
174         status = (ARM_MATH_SUCCESS);
175     }
176     /* If the number is a negative number then store zero as its square root value */
177     else
178     {
179         *pOut = 0;
180 
181         status = (ARM_MATH_ARGUMENT_ERROR);
182     }
183 
184     return status;
185 }
186 
arm_sqrt_q15(q15_t in,q15_t * pOut)187 arm_status arm_sqrt_q15(q15_t in, q15_t *pOut)
188 {
189     uint32_t cppre;
190     arm_status status;
191 
192     /* If the input is a positive number then compute the signBits. */
193     if (in > 0)
194     {
195         cppre            = POWERQUAD->CPPRE;
196         POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-15) | POWERQUAD_CPPRE_CPPRE_OUT(15);
197         *pOut            = (q15_t)PQ_SqrtFixed((uint32_t)in);
198         POWERQUAD->CPPRE = cppre;
199 
200         status = (ARM_MATH_SUCCESS);
201     }
202     /* If the number is a negative number then store zero as its square root value */
203     else
204     {
205         *pOut = 0;
206 
207         status = (ARM_MATH_ARGUMENT_ERROR);
208     }
209 
210     return status;
211 }
212 
arm_cfft_q31(const arm_cfft_instance_q31 * S,q31_t * p1,uint8_t ifftFlag,uint8_t bitReverseFlag)213 void arm_cfft_q31(const arm_cfft_instance_q31 *S, q31_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
214 {
215     assert(bitReverseFlag == 1U);
216 
217     q31_t *pIn      = p1;
218     q31_t *pOut     = p1;
219     uint32_t length = S->fftLen;
220 
221     PQ_SET_FFT_Q31_CONFIG;
222 
223     if (ifftFlag == 1U)
224     {
225         PQ_TransformIFFT(POWERQUAD, length, pIn, pOut);
226     }
227     else
228     {
229         PQ_TransformCFFT(POWERQUAD, length, pIn, pOut);
230     }
231 
232     PQ_WaitDone(POWERQUAD);
233 }
234 
arm_cfft_q15(const arm_cfft_instance_q15 * S,q15_t * p1,uint8_t ifftFlag,uint8_t bitReverseFlag)235 void arm_cfft_q15(const arm_cfft_instance_q15 *S, q15_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
236 {
237     assert(bitReverseFlag == 1U);
238 
239     q15_t *pIn      = p1;
240     q15_t *pOut     = p1;
241     uint32_t length = S->fftLen;
242 
243     PQ_SET_FFT_Q15_CONFIG;
244 
245     if (ifftFlag == 1U)
246     {
247         PQ_TransformIFFT(POWERQUAD, length, pIn, pOut);
248     }
249     else
250     {
251         PQ_TransformCFFT(POWERQUAD, length, pIn, pOut);
252     }
253 
254     PQ_WaitDone(POWERQUAD);
255 }
256 
arm_rfft_init_q31(arm_rfft_instance_q31 * S,uint32_t fftLenReal,uint32_t ifftFlagR,uint32_t bitReverseFlag)257 arm_status arm_rfft_init_q31(arm_rfft_instance_q31 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag)
258 {
259     /* Only supprt such mode. */
260     assert(ifftFlagR == 0U);
261     assert(bitReverseFlag == 1U);
262 
263     /*  Initialise the default arm status */
264     arm_status status = ARM_MATH_SUCCESS;
265 
266     /*  Initialize the Real FFT length */
267     S->fftLenReal = (uint16_t)fftLenReal;
268 
269     /*  Initialize the Flag for selection of RFFT or RIFFT */
270     S->ifftFlagR = (uint8_t)ifftFlagR;
271 
272     /*  Initialize the Flag for calculation Bit reversal or not */
273     S->bitReverseFlagR = (uint8_t)bitReverseFlag;
274 
275     /* return the status of RFFT Init function */
276     return (status);
277 }
278 
arm_rfft_q31(const arm_rfft_instance_q31 * S,q31_t * pSrc,q31_t * pDst)279 void arm_rfft_q31(const arm_rfft_instance_q31 *S, q31_t *pSrc, q31_t *pDst)
280 {
281     uint32_t length = S->fftLenReal;
282     PQ_SET_FFT_Q31_CONFIG;
283 
284     /* Calculation of RFFT of input */
285     PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst);
286 
287     PQ_WaitDone(POWERQUAD);
288 }
289 
arm_rfft_init_q15(arm_rfft_instance_q15 * S,uint32_t fftLenReal,uint32_t ifftFlagR,uint32_t bitReverseFlag)290 arm_status arm_rfft_init_q15(arm_rfft_instance_q15 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag)
291 {
292     /* Only supprt such mode. */
293     assert(ifftFlagR == 0U);
294     assert(bitReverseFlag == 1U);
295 
296     /*  Initialise the default arm status */
297     arm_status status = ARM_MATH_SUCCESS;
298 
299     /*  Initialize the Real FFT length */
300     S->fftLenReal = (uint16_t)fftLenReal;
301 
302     /*  Initialize the Flag for selection of RFFT or RIFFT */
303     S->ifftFlagR = (uint8_t)ifftFlagR;
304 
305     /*  Initialize the Flag for calculation Bit reversal or not */
306     S->bitReverseFlagR = (uint8_t)bitReverseFlag;
307 
308     /* return the status of RFFT Init function */
309     return (status);
310 }
311 
arm_rfft_q15(const arm_rfft_instance_q15 * S,q15_t * pSrc,q15_t * pDst)312 void arm_rfft_q15(const arm_rfft_instance_q15 *S, q15_t *pSrc, q15_t *pDst)
313 {
314     uint32_t length = S->fftLenReal;
315     PQ_SET_FFT_Q15_CONFIG;
316 
317     /* Calculation of RFFT of input */
318     PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst);
319 
320     PQ_WaitDone(POWERQUAD);
321 }
322 
arm_dct4_init_q31(arm_dct4_instance_q31 * S,arm_rfft_instance_q31 * S_RFFT,arm_cfft_radix4_instance_q31 * S_CFFT,uint16_t N,uint16_t Nby2,q31_t normalize)323 arm_status arm_dct4_init_q31(arm_dct4_instance_q31 *S,
324                              arm_rfft_instance_q31 *S_RFFT,
325                              arm_cfft_radix4_instance_q31 *S_CFFT,
326                              uint16_t N,
327                              uint16_t Nby2,
328                              q31_t normalize)
329 {
330     arm_status status = ARM_MATH_SUCCESS;
331 
332     /* Initialize the DCT4 length */
333     S->N = N;
334 
335     /* Initialize Real FFT Instance */
336     S->pRfft = S_RFFT;
337 
338     switch (N)
339     {
340         /* Initialize the table modifier values */
341         case 512U:
342             S->pTwiddle   = dct512_twiddle;
343             S->pCosFactor = dct512_cosFactor;
344             break;
345 
346         case 256U:
347             S->pTwiddle   = dct256_twiddle;
348             S->pCosFactor = dct256_cosFactor;
349             break;
350 
351         case 128U:
352             S->pTwiddle   = dct128_twiddle;
353             S->pCosFactor = dct128_cosFactor;
354             break;
355 
356         case 64U:
357             S->pTwiddle   = dct64_twiddle;
358             S->pCosFactor = dct64_cosFactor;
359             break;
360 
361         case 32U:
362             S->pTwiddle   = dct32_twiddle;
363             S->pCosFactor = dct32_cosFactor;
364             break;
365 
366         case 16U:
367             S->pTwiddle   = dct16_twiddle;
368             S->pCosFactor = dct16_cosFactor;
369             break;
370 
371         default:
372             status = ARM_MATH_ARGUMENT_ERROR;
373             break;
374     }
375 
376     if (ARM_MATH_SUCCESS == status)
377     {
378         /* Initialize the RFFT/RIFFT Function */
379         status = arm_rfft_init_q31(S->pRfft, S->N, 0, 1);
380     }
381 
382     return status;
383 }
384 
arm_dct4_q31(const arm_dct4_instance_q31 * S,q31_t * pState,q31_t * pInlineBuffer)385 void arm_dct4_q31(const arm_dct4_instance_q31 *S, q31_t *pState, q31_t *pInlineBuffer)
386 {
387     /* Calculate DCT-II for N-point input */
388     uint16_t i;           /* Loop counter */
389     const q31_t *weights; /* Pointer to the Weights table */
390     q31_t *pOut;          /* Temporary pointers for output buffer */
391     q31_t *pS1, *pbuff;   /* Temporary pointers for input buffer and pState buffer */
392     q31_t in;             /* Temporary variable */
393     const q31_t *cosFact;
394     uint32_t length;
395     uint8_t matRow;
396     uint8_t matCol;
397     uint8_t matLoop;
398     uint16_t lenPerMatLoop;
399 
400     /*
401      * Pre-processing involves multiplying input with cos factor,
402      *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N))
403      *              where,
404      *                 r(n) -- output of preprocessing
405      *                 u(n) -- input to preprocessing(actual Source buffer)
406      */
407 
408     /*
409      * Use matrix production function for preprocessing. Matrix production
410      * supports 16x16 at the most, so the matrix row is set to 16.
411      */
412     matRow        = 16U;
413     lenPerMatLoop = S->N >= 256U ? 256U : S->N;
414     matCol        = (uint8_t)(lenPerMatLoop / 16U);
415     matLoop       = (uint8_t)(((S->N - 1U) >> 8U) + 1U);
416     cosFact       = S->pCosFactor;
417     pbuff         = pInlineBuffer;
418 
419     length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
420 
421     while ((matLoop--) != 0U)
422     {
423         PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
424 
425         /* cos factor is Q31, convert to float */
426         PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U);
427         cosFact = &cosFact[lenPerMatLoop];
428 
429         PQ_WaitDone(POWERQUAD);
430 
431         /* Product. */
432         PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
433 
434         PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff);
435 
436         pbuff = &pbuff[lenPerMatLoop];
437 
438         PQ_WaitDone(POWERQUAD);
439     }
440 
441     PQ_SET_FFT_Q31_CONFIG;
442 
443     PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState);
444 
445     /* ARM calculation while PQ is running. */
446     /*
447      * Use matrix production function for twiddle multiplication.
448      * Matrix production supports 16x16 at the most. The total elements are 2*N;
449      */
450     lenPerMatLoop = S->N >= 128U ? 128U : S->N;
451     matCol        = (uint8_t)(lenPerMatLoop / 8U);
452     matLoop       = (uint8_t)(((S->N - 1U) >> 7U) + 1U);
453     weights       = S->pTwiddle;
454     pOut          = pState;
455 
456     length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
457 
458     PQ_WaitDone(POWERQUAD);
459 
460     while ((matLoop--) != 0U)
461     {
462         PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
463 
464         /* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */
465         PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U);
466         weights = &weights[lenPerMatLoop * 2U];
467 
468         PQ_WaitDone(POWERQUAD);
469 
470         /* Product. */
471         PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
472 
473         PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut);
474 
475         PQ_WaitDone(POWERQUAD);
476 
477         for (i = 0; i < lenPerMatLoop / 4U; i++)
478         {
479             pOut[0] -= pOut[1];
480             pOut = &pOut[2];
481             pOut[0] -= pOut[1];
482             pOut = &pOut[2];
483             pOut[0] -= pOut[1];
484             pOut = &pOut[2];
485             pOut[0] -= pOut[1];
486             pOut = &pOut[2];
487         }
488     }
489 
490     /* ----------- Post-processing ---------- */
491     /* DCT-IV can be obtained from DCT-II by the equation,
492      *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
493      *       Hence, Y4(0) = Y2(0)/2  */
494     /* Getting only real part from the output and Converting to DCT-IV */
495 
496     /* pbuff initialized to input buffer. */
497     pbuff = pInlineBuffer;
498 
499     /* pS1 initialized to pState */
500     pS1 = pState;
501 
502     /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/
503     in       = (q31_t)(float)((float)*pS1 / 1.41421356237f);
504     *pbuff++ = in;
505     pS1      = &pS1[2];
506 
507     in       = *pS1 - in;
508     *pbuff++ = in;
509     pS1      = &pS1[2];
510 
511     in       = *pS1 - in;
512     *pbuff++ = in;
513     pS1      = &pS1[2];
514 
515     in       = *pS1 - in;
516     *pbuff++ = in;
517     pS1      = &pS1[2];
518 
519     i = S->N / 4U - 1U;
520 
521     while (i > 0U)
522     {
523         in       = *pS1 - in;
524         *pbuff++ = in;
525         pS1      = &pS1[2];
526 
527         in       = *pS1 - in;
528         *pbuff++ = in;
529         pS1      = &pS1[2];
530 
531         in       = *pS1 - in;
532         *pbuff++ = in;
533         pS1      = &pS1[2];
534 
535         in       = *pS1 - in;
536         *pbuff++ = in;
537         pS1      = &pS1[2];
538 
539         i--;
540     }
541 }
542 
arm_dct4_init_q15(arm_dct4_instance_q15 * S,arm_rfft_instance_q15 * S_RFFT,arm_cfft_radix4_instance_q15 * S_CFFT,uint16_t N,uint16_t Nby2,q15_t normalize)543 arm_status arm_dct4_init_q15(arm_dct4_instance_q15 *S,
544                              arm_rfft_instance_q15 *S_RFFT,
545                              arm_cfft_radix4_instance_q15 *S_CFFT,
546                              uint16_t N,
547                              uint16_t Nby2,
548                              q15_t normalize)
549 {
550     arm_status status = ARM_MATH_SUCCESS;
551 
552     /* Initialize the DCT4 length */
553     S->N = N;
554 
555     /* Initialize Real FFT Instance */
556     S->pRfft = S_RFFT;
557 
558     switch (N)
559     {
560         /* Initialize the table modifier values */
561         case 512U:
562             S->pTwiddle   = (void *)dct512_twiddle;
563             S->pCosFactor = (void *)dct512_cosFactor;
564             break;
565 
566         case 256U:
567             S->pTwiddle   = (void *)dct256_twiddle;
568             S->pCosFactor = (void *)dct256_cosFactor;
569             break;
570 
571         case 128U:
572             S->pTwiddle   = (void *)dct128_twiddle;
573             S->pCosFactor = (void *)dct128_cosFactor;
574             break;
575 
576         case 64U:
577             S->pTwiddle   = (void *)dct64_twiddle;
578             S->pCosFactor = (void *)dct64_cosFactor;
579             break;
580 
581         case 32U:
582             S->pTwiddle   = (void *)dct32_twiddle;
583             S->pCosFactor = (void *)dct32_cosFactor;
584             break;
585 
586         case 16U:
587             S->pTwiddle   = (void *)dct16_twiddle;
588             S->pCosFactor = (void *)dct16_cosFactor;
589             break;
590 
591         default:
592             status = ARM_MATH_ARGUMENT_ERROR;
593             break;
594     }
595 
596     if (ARM_MATH_SUCCESS == status)
597     {
598         /* Initialize the RFFT/RIFFT Function */
599         status = arm_rfft_init_q15(S->pRfft, S->N, 0, 1);
600     }
601 
602     /* return the status of DCT4 Init function */
603     return status;
604 }
605 
arm_dct4_q15(const arm_dct4_instance_q15 * S,q15_t * pState,q15_t * pInlineBuffer)606 void arm_dct4_q15(const arm_dct4_instance_q15 *S, q15_t *pState, q15_t *pInlineBuffer)
607 {
608     /* Calculate DCT-II for N-point input */
609     uint16_t i;           /* Loop counter */
610     const q15_t *weights; /* Pointer to the Weights table */
611     q15_t *pOut;          /* Temporary pointers for output buffer */
612     q15_t *pS1, *pbuff;   /* Temporary pointers for input buffer and pState buffer */
613     q15_t in;             /* Temporary variable */
614     const q15_t *cosFact;
615     uint32_t length;
616     uint8_t matRow;
617     uint8_t matCol;
618     uint8_t matLoop;
619     uint16_t lenPerMatLoop;
620 
621     /*
622      * Pre-processing involves multiplying input with cos factor,
623      *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N))
624      *              where,
625      *                 r(n) -- output of preprocessing
626      *                 u(n) -- input to preprocessing(actual Source buffer)
627      */
628 
629     /*
630      * Use matrix production function for preprocessing. Matrix production
631      * supports 16x16 at the most, so the matrix row is set to 16.
632      */
633     matRow        = 16U;
634     lenPerMatLoop = S->N >= 256U ? 256U : S->N;
635     matCol        = (uint8_t)(lenPerMatLoop / 16U);
636     matLoop       = (uint8_t)(((S->N - 1U) >> 8U) + 1U);
637     cosFact       = S->pCosFactor;
638     pbuff         = pInlineBuffer;
639 
640     length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, 0);
641 
642     while ((matLoop--) != 0U)
643     {
644         PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
645 
646         /* cos factor is Q31, convert to float */
647         PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U);
648         cosFact = &cosFact[2U * lenPerMatLoop];
649 
650         PQ_WaitDone(POWERQUAD);
651 
652         /* Product. */
653         POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
654         POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
655         POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
656         POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
657         POWERQUAD->TMPBASE   = 0xE0000000U;
658 
659         PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff);
660 
661         PQ_WaitDone(POWERQUAD);
662 
663         pbuff = &pbuff[lenPerMatLoop];
664     }
665 
666     PQ_SET_FFT_Q15_CONFIG;
667 
668     PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState);
669 
670     /* ARM calculation while PQ is running. */
671     /*
672      * Use matrix production function for twiddle multiplication.
673      * Matrix production supports 16x16 at the most. The total elements are 2*N;
674      */
675     lenPerMatLoop = S->N >= 128U ? 128U : S->N;
676     matCol        = (uint8_t)(lenPerMatLoop / 8U);
677     matLoop       = (uint8_t)(((S->N - 1U) >> 7U) + 1U);
678     weights       = S->pTwiddle;
679     pOut          = pState;
680 
681     length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
682 
683     PQ_WaitDone(POWERQUAD);
684 
685     while ((matLoop--) != 0U)
686     {
687         PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
688 
689         /* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */
690         PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U);
691         weights = &weights[lenPerMatLoop * 2U];
692 
693         PQ_WaitDone(POWERQUAD);
694 
695         /* Product. */
696         POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
697         POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
698         POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
699         POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
700         POWERQUAD->TMPBASE   = 0xE0000000U;
701 
702         PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut);
703 
704         PQ_WaitDone(POWERQUAD);
705 
706         for (i = 0; i < lenPerMatLoop / 4U; i++)
707         {
708             pOut[0] -= pOut[1];
709             pOut = &pOut[2];
710             pOut[0] -= pOut[1];
711             pOut = &pOut[2];
712             pOut[0] -= pOut[1];
713             pOut = &pOut[2];
714             pOut[0] -= pOut[1];
715             pOut = &pOut[2];
716         }
717     }
718 
719     /* ----------- Post-processing ---------- */
720     /* DCT-IV can be obtained from DCT-II by the equation,
721      *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
722      *       Hence, Y4(0) = Y2(0)/2  */
723     /* Getting only real part from the output and Converting to DCT-IV */
724 
725     /* pbuff initialized to input buffer. */
726     pbuff = pInlineBuffer;
727 
728     /* pS1 initialized to pState */
729     pS1 = pState;
730 
731     /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/
732     in       = (q15_t)(float)((float)*pS1 / 1.41421356237f);
733     *pbuff++ = in;
734     pS1      = &pS1[2];
735 
736     in       = *pS1 - in;
737     *pbuff++ = in;
738     pS1      = &pS1[2];
739 
740     in       = *pS1 - in;
741     *pbuff++ = in;
742     pS1      = &pS1[2];
743 
744     in       = *pS1 - in;
745     *pbuff++ = in;
746     pS1      = &pS1[2];
747 
748     i = S->N / 4U - 1U;
749 
750     while (i > 0U)
751     {
752         in       = *pS1 - in;
753         *pbuff++ = in;
754         pS1      = &pS1[2];
755 
756         in       = *pS1 - in;
757         *pbuff++ = in;
758         pS1      = &pS1[2];
759 
760         in       = *pS1 - in;
761         *pbuff++ = in;
762         pS1      = &pS1[2];
763 
764         in       = *pS1 - in;
765         *pbuff++ = in;
766         pS1      = &pS1[2];
767 
768         i--;
769     }
770 }
771 
arm_fir_init_f32(arm_fir_instance_f32 * S,uint16_t numTaps,const float32_t * pCoeffs,float32_t * pState,uint32_t blockSize)772 void arm_fir_init_f32(
773     arm_fir_instance_f32 *S, uint16_t numTaps, const float32_t *pCoeffs, float32_t *pState, uint32_t blockSize)
774 {
775     uint32_t i;
776 
777     /*
778      * CMSIS DSP API filter coefficients stored in time reversed order, but PQ
779      * uses the positive order. PQ does not use pState, so pState pState[1:numTaps]
780      * is used here to save the coefficients in positive order. At the same time,
781      * pState[0] is used to save the offset used for incremetal calculation.
782      * Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
783      * the blockSize should be larger than 1.
784      */
785     assert(blockSize > 1U);
786 
787     S->numTaps = numTaps;
788     S->pCoeffs = pCoeffs;
789     S->pState  = pState;
790 
791     for (i = 0U; i < numTaps; i++)
792     {
793         pState[numTaps - i] = pCoeffs[i];
794     }
795 
796     *(uint32_t *)(void *)pState = 0U;
797 }
798 
arm_fir_init_q31(arm_fir_instance_q31 * S,uint16_t numTaps,const q31_t * pCoeffs,q31_t * pState,uint32_t blockSize)799 void arm_fir_init_q31(
800     arm_fir_instance_q31 *S, uint16_t numTaps, const q31_t *pCoeffs, q31_t *pState, uint32_t blockSize)
801 {
802     uint32_t i;
803 
804     /*
805      * CMSIS DSP API filter coefficients stored in time reversed order, but PQ
806      * uses the positive order. PQ does not use pState, so pState pState[1:numTaps]
807      * is used here to save the coefficients in positive order. At the same time,
808      * pState[0] is used to save the offset used for incremetal calculation.
809      * Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
810      * the blockSize should be larger than 1.
811      */
812     assert(blockSize > 1U);
813 
814     S->numTaps = numTaps;
815     S->pCoeffs = pCoeffs;
816     S->pState  = pState;
817 
818     for (i = 0U; i < numTaps; i++)
819     {
820         pState[numTaps - i] = pCoeffs[i];
821     }
822 
823     pState[0] = 0;
824 }
825 
arm_fir_init_q15(arm_fir_instance_q15 * S,uint16_t numTaps,const q15_t * pCoeffs,q15_t * pState,uint32_t blockSize)826 arm_status arm_fir_init_q15(
827     arm_fir_instance_q15 *S, uint16_t numTaps, const q15_t *pCoeffs, q15_t *pState, uint32_t blockSize)
828 {
829     uint16_t i;
830 
831     /*
832      * CMSIS DSP API filter coefficients stored in time reversed order, but PQ
833      * uses the positive order. PQ does not use pState, so pState pState[2:numTaps]
834      * is used here to save the coefficients in positive order. At the same time,
835      * pState[0:1] is used to save the offset used for incremetal calculation.
836      * Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
837      * the blockSize should be larger than 2.
838      */
839     assert(blockSize > 2U);
840 
841     S->numTaps = numTaps;
842     S->pCoeffs = pCoeffs;
843     S->pState  = pState;
844 
845     for (i = 0U; i < numTaps; i++)
846     {
847         pState[numTaps + 1U - i] = pCoeffs[i];
848     }
849 
850     *(uint32_t *)(void *)pState = 0U;
851 
852     return ARM_MATH_SUCCESS;
853 }
854 
855 /**
856  * brief Processing function for the floating-point FIR filter.
857  * param[in]  S          points to an instance of the floating-point FIR structure.
858  * param[in]  pSrc       points to the block of input data.
859  * param[out] pDst       points to the block of output data.
860  * param[in]  blockSize  number of samples to process.
861  *
862  * Note: Powerquad has a hardware limitation, when using it for FIR increment calculation, the address of pSrc needs to
863  * be a continuous address.
864  */
arm_fir_f32(const arm_fir_instance_f32 * S,const float32_t * pSrc,float32_t * pDst,uint32_t blockSize)865 void arm_fir_f32(const arm_fir_instance_f32 *S, const float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
866 {
867     assert(NULL != S);
868     assert(NULL != pSrc);
869     assert(NULL != pDst);
870 
871     uint32_t curOffset;
872     PQ_SET_F32_CONFIG;
873 
874     curOffset = *(uint32_t *)(void *)(S->pState);
875 
876     if (curOffset == 0U)
877     {
878         PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
879     }
880     else
881     {
882         _arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc));
883     }
884 
885     *(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
886 
887     PQ_WaitDone(POWERQUAD);
888 }
889 
arm_fir_q31(const arm_fir_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)890 void arm_fir_q31(const arm_fir_instance_q31 *S, const q31_t *pSrc, q31_t *pDst, uint32_t blockSize)
891 {
892     assert(NULL != S);
893     assert(NULL != pSrc);
894     assert(NULL != pDst);
895 
896     uint32_t curOffset;
897     PQ_SET_Q31_CONFIG;
898 
899     curOffset = *(uint32_t *)(void *)(S->pState);
900 
901     if (curOffset == 0U)
902     {
903         PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
904     }
905     else
906     {
907         _arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc));
908     }
909 
910     *(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
911 
912     PQ_WaitDone(POWERQUAD);
913 }
914 
arm_fir_q15(const arm_fir_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)915 void arm_fir_q15(const arm_fir_instance_q15 *S, const q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
916 {
917     assert(NULL != S);
918     assert(NULL != pSrc);
919     assert(NULL != pDst);
920 
921     uint32_t curOffset;
922 
923     PQ_SET_Q15_CONFIG;
924 
925     curOffset = *(uint32_t *)(void *)(S->pState);
926 
927     if (curOffset == 0U)
928     {
929         PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[2]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
930     }
931     else
932     {
933         _arm_fir_increment(pSrc, blockSize, &S->pState[2], S->numTaps, pDst, curOffset, sizeof(*pSrc));
934     }
935 
936     *(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
937 
938     PQ_WaitDone(POWERQUAD);
939 }
940 
arm_conv_f32(const float32_t * pSrcA,uint32_t srcALen,const float32_t * pSrcB,uint32_t srcBLen,float32_t * pDst)941 void arm_conv_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst)
942 {
943     assert(NULL != pSrcA);
944     assert(NULL != pSrcB);
945     assert(NULL != pDst);
946 
947     PQ_SET_F32_CONFIG;
948 
949     PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
950     PQ_WaitDone(POWERQUAD);
951 }
952 
arm_conv_q31(const q31_t * pSrcA,uint32_t srcALen,const q31_t * pSrcB,uint32_t srcBLen,q31_t * pDst)953 void arm_conv_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst)
954 {
955     assert(NULL != pSrcA);
956     assert(NULL != pSrcB);
957     assert(NULL != pDst);
958 
959     PQ_SET_Q31_CONFIG;
960 
961     PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
962     PQ_WaitDone(POWERQUAD);
963 }
964 
arm_conv_q15(const q15_t * pSrcA,uint32_t srcALen,const q15_t * pSrcB,uint32_t srcBLen,q15_t * pDst)965 void arm_conv_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
966 {
967     assert(NULL != pSrcA);
968     assert(NULL != pSrcB);
969     assert(NULL != pDst);
970 
971     PQ_SET_Q15_CONFIG;
972 
973     PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
974     PQ_WaitDone(POWERQUAD);
975 }
976 
arm_correlate_f32(const float32_t * pSrcA,uint32_t srcALen,const float32_t * pSrcB,uint32_t srcBLen,float32_t * pDst)977 void arm_correlate_f32(
978     const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst)
979 {
980     assert(NULL != pSrcA);
981     assert(NULL != pSrcB);
982     assert(NULL != pDst);
983 
984     PQ_SET_F32_CONFIG;
985 
986     PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
987     PQ_WaitDone(POWERQUAD);
988 }
989 
arm_correlate_q31(const q31_t * pSrcA,uint32_t srcALen,const q31_t * pSrcB,uint32_t srcBLen,q31_t * pDst)990 void arm_correlate_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst)
991 {
992     assert(NULL != pSrcA);
993     assert(NULL != pSrcB);
994     assert(NULL != pDst);
995 
996     PQ_SET_Q31_CONFIG;
997 
998     PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
999     PQ_WaitDone(POWERQUAD);
1000 }
1001 
arm_correlate_q15(const q15_t * pSrcA,uint32_t srcALen,const q15_t * pSrcB,uint32_t srcBLen,q15_t * pDst)1002 void arm_correlate_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
1003 {
1004     assert(NULL != pSrcA);
1005     assert(NULL != pSrcB);
1006     assert(NULL != pDst);
1007 
1008     PQ_SET_Q15_CONFIG;
1009 
1010     PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
1011     PQ_WaitDone(POWERQUAD);
1012 }
1013 
arm_mat_init_f32(arm_matrix_instance_f32 * S,uint16_t nRows,uint16_t nColumns,float32_t * pData)1014 void arm_mat_init_f32(arm_matrix_instance_f32 *S, uint16_t nRows, uint16_t nColumns, float32_t *pData)
1015 {
1016     /* Assign Number of Rows */
1017     S->numRows = nRows;
1018 
1019     /* Assign Number of Columns */
1020     S->numCols = nColumns;
1021 
1022     /* Assign Data pointer */
1023     S->pData = pData;
1024 }
1025 
arm_mat_init_q31(arm_matrix_instance_q31 * S,uint16_t nRows,uint16_t nColumns,q31_t * pData)1026 void arm_mat_init_q31(arm_matrix_instance_q31 *S, uint16_t nRows, uint16_t nColumns, q31_t *pData)
1027 {
1028     /* Assign Number of Rows */
1029     S->numRows = nRows;
1030 
1031     /* Assign Number of Columns */
1032     S->numCols = nColumns;
1033 
1034     /* Assign Data pointer */
1035     S->pData = pData;
1036 }
1037 
arm_mat_init_q15(arm_matrix_instance_q15 * S,uint16_t nRows,uint16_t nColumns,q15_t * pData)1038 void arm_mat_init_q15(arm_matrix_instance_q15 *S, uint16_t nRows, uint16_t nColumns, q15_t *pData)
1039 {
1040     /* Assign Number of Rows */
1041     S->numRows = nRows;
1042 
1043     /* Assign Number of Columns */
1044     S->numCols = nColumns;
1045 
1046     /* Assign Data pointer */
1047     S->pData = pData;
1048 }
1049 
arm_mat_add_f32(const arm_matrix_instance_f32 * pSrcA,const arm_matrix_instance_f32 * pSrcB,arm_matrix_instance_f32 * pDst)1050 arm_status arm_mat_add_f32(const arm_matrix_instance_f32 *pSrcA,
1051                            const arm_matrix_instance_f32 *pSrcB,
1052                            arm_matrix_instance_f32 *pDst)
1053 {
1054     assert(NULL != pSrcA);
1055     assert(NULL != pSrcB);
1056     assert(NULL != pDst);
1057 
1058     arm_status status;
1059     uint32_t length;
1060 
1061 #ifdef ARM_MATH_MATRIX_CHECK
1062     /* Check for matrix mismatch condition */
1063     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1064         (pSrcA->numCols != pDst->numCols))
1065     {
1066         /* Set status as ARM_MATH_SIZE_MISMATCH */
1067         status = ARM_MATH_SIZE_MISMATCH;
1068     }
1069     else
1070 #endif
1071     {
1072         PQ_SET_F32_CONFIG;
1073 
1074         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1075 
1076         PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1077 
1078         /* Wait for the completion */
1079         PQ_WaitDone(POWERQUAD);
1080 
1081         status = ARM_MATH_SUCCESS;
1082     }
1083 
1084     /* Return to application */
1085     return status;
1086 }
1087 
arm_mat_add_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)1088 arm_status arm_mat_add_q31(const arm_matrix_instance_q31 *pSrcA,
1089                            const arm_matrix_instance_q31 *pSrcB,
1090                            arm_matrix_instance_q31 *pDst)
1091 {
1092     assert(NULL != pSrcA);
1093     assert(NULL != pSrcB);
1094     assert(NULL != pDst);
1095 
1096     arm_status status;
1097     uint32_t length;
1098 
1099 #ifdef ARM_MATH_MATRIX_CHECK
1100     /* Check for matrix mismatch condition */
1101     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1102         (pSrcA->numCols != pDst->numCols))
1103     {
1104         /* Set status as ARM_MATH_SIZE_MISMATCH */
1105         status = ARM_MATH_SIZE_MISMATCH;
1106     }
1107     else
1108 #endif
1109     {
1110         PQ_SET_FIX32_CONFIG;
1111 
1112         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1113 
1114         PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1115 
1116         /* Wait for the completion */
1117         PQ_WaitDone(POWERQUAD);
1118 
1119         status = ARM_MATH_SUCCESS;
1120     }
1121 
1122     /* Return to application */
1123     return status;
1124 }
1125 
arm_mat_add_q15(const arm_matrix_instance_q15 * pSrcA,const arm_matrix_instance_q15 * pSrcB,arm_matrix_instance_q15 * pDst)1126 arm_status arm_mat_add_q15(const arm_matrix_instance_q15 *pSrcA,
1127                            const arm_matrix_instance_q15 *pSrcB,
1128                            arm_matrix_instance_q15 *pDst)
1129 {
1130     assert(NULL != pSrcA);
1131     assert(NULL != pSrcB);
1132     assert(NULL != pDst);
1133 
1134     arm_status status;
1135     uint32_t length;
1136 
1137 #ifdef ARM_MATH_MATRIX_CHECK
1138     /* Check for matrix mismatch condition */
1139     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1140         (pSrcA->numCols != pDst->numCols))
1141     {
1142         /* Set status as ARM_MATH_SIZE_MISMATCH */
1143         status = ARM_MATH_SIZE_MISMATCH;
1144     }
1145     else
1146 #endif
1147     {
1148         PQ_SET_FIX16_CONFIG;
1149 
1150         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1151 
1152         PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1153 
1154         /* Wait for the completion */
1155         PQ_WaitDone(POWERQUAD);
1156 
1157         status = ARM_MATH_SUCCESS;
1158     }
1159 
1160     /* Return to application */
1161     return status;
1162 }
1163 
arm_mat_sub_f32(const arm_matrix_instance_f32 * pSrcA,const arm_matrix_instance_f32 * pSrcB,arm_matrix_instance_f32 * pDst)1164 arm_status arm_mat_sub_f32(const arm_matrix_instance_f32 *pSrcA,
1165                            const arm_matrix_instance_f32 *pSrcB,
1166                            arm_matrix_instance_f32 *pDst)
1167 {
1168     assert(NULL != pSrcA);
1169     assert(NULL != pSrcB);
1170     assert(NULL != pDst);
1171 
1172     arm_status status;
1173     uint32_t length;
1174 
1175 #ifdef ARM_MATH_MATRIX_CHECK
1176     /* Check for matrix mismatch condition */
1177     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1178         (pSrcA->numCols != pDst->numCols))
1179     {
1180         /* Set status as ARM_MATH_SIZE_MISMATCH */
1181         status = ARM_MATH_SIZE_MISMATCH;
1182     }
1183     else
1184 #endif
1185     {
1186         PQ_SET_F32_CONFIG;
1187 
1188         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1189 
1190         PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1191 
1192         /* Wait for the completion */
1193         PQ_WaitDone(POWERQUAD);
1194 
1195         status = ARM_MATH_SUCCESS;
1196     }
1197 
1198     /* Return to application */
1199     return status;
1200 }
1201 
arm_mat_sub_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)1202 arm_status arm_mat_sub_q31(const arm_matrix_instance_q31 *pSrcA,
1203                            const arm_matrix_instance_q31 *pSrcB,
1204                            arm_matrix_instance_q31 *pDst)
1205 {
1206     assert(NULL != pSrcA);
1207     assert(NULL != pSrcB);
1208     assert(NULL != pDst);
1209 
1210     arm_status status;
1211     uint32_t length;
1212 
1213 #ifdef ARM_MATH_MATRIX_CHECK
1214     /* Check for matrix mismatch condition */
1215     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1216         (pSrcA->numCols != pDst->numCols))
1217     {
1218         /* Set status as ARM_MATH_SIZE_MISMATCH */
1219         status = ARM_MATH_SIZE_MISMATCH;
1220     }
1221     else
1222 #endif
1223     {
1224         PQ_SET_FIX32_CONFIG;
1225 
1226         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1227 
1228         PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1229 
1230         /* Wait for the completion */
1231         PQ_WaitDone(POWERQUAD);
1232 
1233         status = ARM_MATH_SUCCESS;
1234     }
1235 
1236     /* Return to application */
1237     return status;
1238 }
1239 
arm_mat_sub_q15(const arm_matrix_instance_q15 * pSrcA,const arm_matrix_instance_q15 * pSrcB,arm_matrix_instance_q15 * pDst)1240 arm_status arm_mat_sub_q15(const arm_matrix_instance_q15 *pSrcA,
1241                            const arm_matrix_instance_q15 *pSrcB,
1242                            arm_matrix_instance_q15 *pDst)
1243 {
1244     assert(NULL != pSrcA);
1245     assert(NULL != pSrcB);
1246     assert(NULL != pDst);
1247 
1248     arm_status status;
1249     uint32_t length;
1250 
1251 #ifdef ARM_MATH_MATRIX_CHECK
1252     /* Check for matrix mismatch condition */
1253     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1254         (pSrcA->numCols != pDst->numCols))
1255     {
1256         /* Set status as ARM_MATH_SIZE_MISMATCH */
1257         status = ARM_MATH_SIZE_MISMATCH;
1258     }
1259     else
1260 #endif
1261     {
1262         PQ_SET_FIX16_CONFIG;
1263 
1264         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1265 
1266         PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1267 
1268         /* Wait for the completion */
1269         PQ_WaitDone(POWERQUAD);
1270 
1271         status = ARM_MATH_SUCCESS;
1272     }
1273 
1274     /* Return to application */
1275     return status;
1276 }
1277 
arm_mat_mult_f32(const arm_matrix_instance_f32 * pSrcA,const arm_matrix_instance_f32 * pSrcB,arm_matrix_instance_f32 * pDst)1278 arm_status arm_mat_mult_f32(const arm_matrix_instance_f32 *pSrcA,
1279                             const arm_matrix_instance_f32 *pSrcB,
1280                             arm_matrix_instance_f32 *pDst)
1281 {
1282     assert(NULL != pSrcA);
1283     assert(NULL != pSrcB);
1284     assert(NULL != pDst);
1285 
1286     arm_status status;
1287     uint32_t length;
1288 
1289 #ifdef ARM_MATH_MATRIX_CHECK
1290     /* Check for matrix mismatch condition */
1291     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1292         (pSrcA->numCols != pDst->numCols))
1293     {
1294         /* Set status as ARM_MATH_SIZE_MISMATCH */
1295         status = ARM_MATH_SIZE_MISMATCH;
1296     }
1297     else
1298 #endif
1299     {
1300         PQ_SET_F32_CONFIG;
1301 
1302         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1303 
1304         PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1305 
1306         /* Wait for the completion */
1307         PQ_WaitDone(POWERQUAD);
1308 
1309         status = ARM_MATH_SUCCESS;
1310     }
1311 
1312     /* Return to application */
1313     return status;
1314 }
1315 
arm_mat_mult_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)1316 arm_status arm_mat_mult_q31(const arm_matrix_instance_q31 *pSrcA,
1317                             const arm_matrix_instance_q31 *pSrcB,
1318                             arm_matrix_instance_q31 *pDst)
1319 {
1320     assert(NULL != pSrcA);
1321     assert(NULL != pSrcB);
1322     assert(NULL != pDst);
1323 
1324     arm_status status;
1325     uint32_t length;
1326 
1327     /*
1328      * The output prescale does not supprt negative value due to hardware issue,
1329      * workaround:
1330      * 1. Downscale the matrix B and save the float output value to private memory.
1331      * 2. Multiply the float matrix B in private memory with matrix A, output as Q31.
1332      * Note: Put matrix B in private memory is faster.
1333      */
1334 
1335 #ifdef ARM_MATH_MATRIX_CHECK
1336     /* Check for matrix mismatch condition */
1337     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1338         (pSrcA->numCols != pDst->numCols))
1339     {
1340         /* Set status as ARM_MATH_SIZE_MISMATCH */
1341         status = ARM_MATH_SIZE_MISMATCH;
1342     }
1343     else
1344 #endif
1345     {
1346         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U);
1347 
1348         PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
1349 
1350         /* Downscale. */
1351         PQ_MatrixScale(POWERQUAD, length, 1.0f / 2147483648.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U);
1352 
1353         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1354 
1355         PQ_WaitDone(POWERQUAD);
1356 
1357         PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
1358 
1359         PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData);
1360 
1361         /* Wait for the completion */
1362         PQ_WaitDone(POWERQUAD);
1363 
1364         status = ARM_MATH_SUCCESS;
1365     }
1366 
1367     /* Return to application */
1368     return status;
1369 }
1370 
arm_mat_mult_q15(const arm_matrix_instance_q15 * pSrcA,const arm_matrix_instance_q15 * pSrcB,arm_matrix_instance_q15 * pDst,q15_t * pState)1371 arm_status arm_mat_mult_q15(const arm_matrix_instance_q15 *pSrcA,
1372                             const arm_matrix_instance_q15 *pSrcB,
1373                             arm_matrix_instance_q15 *pDst,
1374                             q15_t *pState)
1375 {
1376     assert(NULL != pSrcA);
1377     assert(NULL != pSrcB);
1378     assert(NULL != pDst);
1379 
1380     arm_status status;
1381     uint32_t length;
1382 
1383 #ifdef ARM_MATH_MATRIX_CHECK
1384     /* Check for matrix mismatch condition */
1385     if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1386         (pSrcA->numCols != pDst->numCols))
1387     {
1388         /* Set status as ARM_MATH_SIZE_MISMATCH */
1389         status = ARM_MATH_SIZE_MISMATCH;
1390     }
1391     else
1392 #endif
1393     {
1394         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U);
1395 
1396         PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG;
1397 
1398         /* Downscale. */
1399         PQ_MatrixScale(POWERQUAD, length, 1.0f / 32768.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U);
1400 
1401         length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1402 
1403         PQ_WaitDone(POWERQUAD);
1404 
1405         PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG;
1406 
1407         PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData);
1408 
1409         /* Wait for the completion */
1410         PQ_WaitDone(POWERQUAD);
1411 
1412         status = ARM_MATH_SUCCESS;
1413     }
1414 
1415     /* Return to application */
1416     return status;
1417 }
1418 
arm_mat_inverse_f32(const arm_matrix_instance_f32 * src,arm_matrix_instance_f32 * dst)1419 arm_status arm_mat_inverse_f32(const arm_matrix_instance_f32 *src, arm_matrix_instance_f32 *dst)
1420 {
1421     assert(NULL != src);
1422     assert(NULL != dst);
1423 
1424     arm_status status;
1425     uint32_t length;
1426     float tmp[1024];
1427 
1428 #ifdef ARM_MATH_MATRIX_CHECK
1429     /* Check for matrix mismatch condition */
1430     if ((src->numRows != src->numCols) || (dst->numRows != dst->numCols) || (src->numRows != dst->numRows))
1431     {
1432         /* Set status as ARM_MATH_SIZE_MISMATCH */
1433         status = ARM_MATH_SIZE_MISMATCH;
1434     }
1435     else
1436 #endif
1437     {
1438         PQ_SET_F32_CONFIG;
1439 
1440         length = POWERQUAD_MAKE_MATRIX_LEN(src->numRows, src->numCols, src->numRows);
1441 
1442         PQ_MatrixInversion(POWERQUAD, length, src->pData, tmp, dst->pData);
1443 
1444         /* Wait for the completion */
1445         PQ_WaitDone(POWERQUAD);
1446 
1447         status = ARM_MATH_SUCCESS;
1448     }
1449 
1450     /* Return to application */
1451     return status;
1452 }
1453 
arm_mat_trans_f32(const arm_matrix_instance_f32 * pSrc,arm_matrix_instance_f32 * pDst)1454 arm_status arm_mat_trans_f32(const arm_matrix_instance_f32 *pSrc, arm_matrix_instance_f32 *pDst)
1455 {
1456     assert(NULL != pSrc);
1457     assert(NULL != pDst);
1458 
1459     arm_status status;
1460     uint32_t length;
1461 
1462 #ifdef ARM_MATH_MATRIX_CHECK
1463     /* Check for matrix mismatch condition */
1464     if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1465     {
1466         /* Set status as ARM_MATH_SIZE_MISMATCH */
1467         status = ARM_MATH_SIZE_MISMATCH;
1468     }
1469     else
1470 #endif
1471     {
1472         length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1473 
1474         PQ_SetFormat(POWERQUAD, kPQ_CP_MTX, kPQ_Float);
1475 
1476         PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
1477 
1478         /* Wait for the completion */
1479         PQ_WaitDone(POWERQUAD);
1480         status = ARM_MATH_SUCCESS;
1481     }
1482 
1483     /* Return to application */
1484     return status;
1485 }
1486 
arm_mat_trans_q31(const arm_matrix_instance_q31 * pSrc,arm_matrix_instance_q31 * pDst)1487 arm_status arm_mat_trans_q31(const arm_matrix_instance_q31 *pSrc, arm_matrix_instance_q31 *pDst)
1488 {
1489     assert(NULL != pSrc);
1490     assert(NULL != pDst);
1491 
1492     arm_status status;
1493     uint32_t length;
1494 
1495 #ifdef ARM_MATH_MATRIX_CHECK
1496     /* Check for matrix mismatch condition */
1497     if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1498     {
1499         /* Set status as ARM_MATH_SIZE_MISMATCH */
1500         status = ARM_MATH_SIZE_MISMATCH;
1501     }
1502     else
1503 #endif
1504     {
1505         PQ_SET_FIX32_CONFIG;
1506 
1507         length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1508 
1509         PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
1510 
1511         /* Wait for the completion */
1512         PQ_WaitDone(POWERQUAD);
1513 
1514         status = ARM_MATH_SUCCESS;
1515     }
1516 
1517     /* Return to application */
1518     return status;
1519 }
1520 
arm_mat_trans_q15(const arm_matrix_instance_q15 * pSrc,arm_matrix_instance_q15 * pDst)1521 arm_status arm_mat_trans_q15(const arm_matrix_instance_q15 *pSrc, arm_matrix_instance_q15 *pDst)
1522 {
1523     assert(NULL != pSrc);
1524     assert(NULL != pDst);
1525 
1526     arm_status status;
1527     uint32_t length;
1528 
1529 #ifdef ARM_MATH_MATRIX_CHECK
1530     /* Check for matrix mismatch condition */
1531     if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1532     {
1533         /* Set status as ARM_MATH_SIZE_MISMATCH */
1534         status = ARM_MATH_SIZE_MISMATCH;
1535     }
1536     else
1537 #endif
1538     {
1539         PQ_SET_FIX16_CONFIG;
1540 
1541         length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1542 
1543         PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
1544 
1545         /* Wait for the completion */
1546         PQ_WaitDone(POWERQUAD);
1547 
1548         status = ARM_MATH_SUCCESS;
1549     }
1550 
1551     /* Return to application */
1552     return status;
1553 }
1554 
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)1555 arm_status arm_mat_scale_f32(const arm_matrix_instance_f32 *pSrc, float32_t scale, arm_matrix_instance_f32 *pDst)
1556 {
1557     assert(NULL != pSrc);
1558     assert(NULL != pDst);
1559 
1560     arm_status status;
1561     uint32_t length;
1562 
1563 #ifdef ARM_MATH_MATRIX_CHECK
1564     /* Check for matrix mismatch condition */
1565     if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1566     {
1567         /* Set status as ARM_MATH_SIZE_MISMATCH */
1568         status = ARM_MATH_SIZE_MISMATCH;
1569     }
1570     else
1571 #endif
1572     {
1573         PQ_SET_F32_CONFIG;
1574 
1575         length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1576 
1577         PQ_MatrixScale(POWERQUAD, length, scale, pSrc->pData, pDst->pData);
1578 
1579         /* Wait for the completion */
1580         PQ_WaitDone(POWERQUAD);
1581 
1582         status = ARM_MATH_SUCCESS;
1583     }
1584 
1585     /* Return to application */
1586     return status;
1587 }
1588 
arm_mat_scale_q31(const arm_matrix_instance_q31 * pSrc,q31_t scaleFract,int32_t shift,arm_matrix_instance_q31 * pDst)1589 arm_status arm_mat_scale_q31(const arm_matrix_instance_q31 *pSrc,
1590                              q31_t scaleFract,
1591                              int32_t shift,
1592                              arm_matrix_instance_q31 *pDst)
1593 {
1594     assert(NULL != pSrc);
1595     assert(NULL != pDst);
1596 
1597     arm_status status;
1598     uint32_t length;
1599     float scaleFloat;
1600 
1601     pq_config_t config = {
1602         kPQ_32Bit,               /* inputAFormat   */
1603         0,                       /* inputAPrescale */
1604         kPQ_32Bit,               /* inputBFormat   */
1605         0,                       /* inputBPrescale */
1606         kPQ_32Bit,               /* outputFormat   */
1607         (int8_t)shift,           /* outputPrescale */
1608         kPQ_Float,               /* tmpFormat      */
1609         0,                       /* tmpPrescale    */
1610         kPQ_Float,               /* machineFormat  */
1611         (uint32_t *)0xe0000000U, /* tmpBase        */
1612     };
1613 
1614 #ifdef ARM_MATH_MATRIX_CHECK
1615     /* Check for matrix mismatch condition */
1616     if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1617     {
1618         /* Set status as ARM_MATH_SIZE_MISMATCH */
1619         status = ARM_MATH_SIZE_MISMATCH;
1620     }
1621     else
1622 #endif
1623     {
1624         length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1625 
1626         scaleFloat = PQ_Q31_2_FLOAT(scaleFract);
1627 
1628         PQ_SetConfig(POWERQUAD, &config);
1629 
1630         PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData);
1631 
1632         /* Wait for the completion */
1633         PQ_WaitDone(POWERQUAD);
1634 
1635         status = ARM_MATH_SUCCESS;
1636     }
1637 
1638     /* Return to application */
1639     return status;
1640 }
1641 
arm_mat_scale_q15(const arm_matrix_instance_q15 * pSrc,q15_t scaleFract,int32_t shift,arm_matrix_instance_q15 * pDst)1642 arm_status arm_mat_scale_q15(const arm_matrix_instance_q15 *pSrc,
1643                              q15_t scaleFract,
1644                              int32_t shift,
1645                              arm_matrix_instance_q15 *pDst)
1646 {
1647     assert(NULL != pSrc);
1648     assert(NULL != pDst);
1649 
1650     arm_status status;
1651     uint32_t length;
1652     float scaleFloat;
1653 
1654     pq_config_t config = {
1655         kPQ_16Bit,               /* inputAFormat   */
1656         0,                       /* inputAPrescale */
1657         kPQ_16Bit,               /* inputBFormat   */
1658         0,                       /* inputBPrescale */
1659         kPQ_16Bit,               /* outputFormat   */
1660         (int8_t)shift,           /* outputPrescale */
1661         kPQ_Float,               /* tmpFormat      */
1662         0,                       /* tmpPrescale    */
1663         kPQ_Float,               /* machineFormat  */
1664         (uint32_t *)0xe0000000U, /* tmpBase        */
1665     };
1666 
1667 #ifdef ARM_MATH_MATRIX_CHECK
1668     /* Check for matrix mismatch condition */
1669     if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1670     {
1671         /* Set status as ARM_MATH_SIZE_MISMATCH */
1672         status = ARM_MATH_SIZE_MISMATCH;
1673     }
1674     else
1675 #endif
1676     {
1677         length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1678 
1679         scaleFloat = PQ_Q15_2_FLOAT(scaleFract);
1680 
1681         PQ_SetConfig(POWERQUAD, &config);
1682 
1683         PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData);
1684 
1685         /* Wait for the completion */
1686         PQ_WaitDone(POWERQUAD);
1687 
1688         status = ARM_MATH_SUCCESS;
1689     }
1690 
1691     /* Return to application */
1692     return status;
1693 }
1694