1 /*
2 * Copyright 2018-2022 NXP
3 * All rights reserved.
4 *
5 * SPDX-License-Identifier: BSD-3-Clause
6 */
7
8 #include "fsl_powerquad.h"
9 #include "fsl_powerquad_data.h"
10 #include "arm_math.h"
11
12 /*******************************************************************************
13 * Definitions
14 ******************************************************************************/
15
16 /* Component ID definition, used by tools. */
17 #ifndef FSL_COMPONENT_ID
18 #define FSL_COMPONENT_ID "platform.drivers.powerquad_cmsis"
19 #endif
20
21 #define PQ_SET_FIX32_CONFIG \
22 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
23 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
24 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
25 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
26 POWERQUAD->TMPBASE = 0xE0000000U
27
28 #define PQ_SET_FIX16_CONFIG \
29 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
30 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
31 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
32 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
33 POWERQUAD->TMPBASE = 0xE0000000U
34
35 #define PQ_SET_Q31_CONFIG \
36 POWERQUAD->OUTFORMAT = ((uint32_t)(-31) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
37 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
38 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
39 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
40 POWERQUAD->TMPBASE = 0xE0000000U
41
42 #define PQ_SET_Q15_CONFIG \
43 POWERQUAD->OUTFORMAT = ((uint32_t)(-15) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
44 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
45 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
46 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
47 POWERQUAD->TMPBASE = 0xE0000000U
48
49 #define PQ_SET_F32_CONFIG \
50 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
51 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
52 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
53 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
54 POWERQUAD->TMPBASE = 0xE0000000U
55
56 #define PQ_SET_FFT_Q31_CONFIG \
57 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
58 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
59 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
60 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
61 POWERQUAD->TMPBASE = 0xE0000000U
62
63 #define PQ_SET_FFT_Q15_CONFIG \
64 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
65 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
66 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
67 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
68 POWERQUAD->TMPBASE = 0xE0000000U
69
70 #define PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG \
71 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
72 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
73 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
74 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
75 POWERQUAD->TMPBASE = 0xE0000000U
76
77 #define PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG \
78 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
79 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
80 POWERQUAD->TMPBASE = 0xE0000000U
81
82 #define PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG \
83 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
84 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
85 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
86 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
87 POWERQUAD->TMPBASE = 0xE0000000U
88
89 #define PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG \
90 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
91 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
92 POWERQUAD->TMPBASE = 0xE0000000U
93
94 /*******************************************************************************
95 * Code
96 ******************************************************************************/
_arm_fir_increment(const void * pSrc,uint32_t srcLen,const void * pTap,uint16_t tapLen,void * pDst,uint32_t offset,uint32_t elemSize)97 static void _arm_fir_increment(const void *pSrc,
98 uint32_t srcLen,
99 const void *pTap,
100 uint16_t tapLen,
101 void *pDst,
102 uint32_t offset,
103 uint32_t elemSize)
104 {
105 POWERQUAD->INABASE = ((uint32_t)(const uint32_t *)pSrc) - (offset * elemSize);
106 POWERQUAD->INBBASE = (uint32_t)(const uint32_t *)pTap;
107 POWERQUAD->LENGTH = (((uint32_t)tapLen & 0xFFFFUL) << 16U) + (srcLen & 0xFFFFUL);
108 POWERQUAD->OUTBASE = ((uint32_t)(uint32_t *)pDst) - (offset * elemSize);
109 POWERQUAD->MISC = offset;
110 POWERQUAD->CONTROL = (CP_FIR << 4U) | PQ_FIR_INCREMENTAL;
111 }
112
arm_cos_f32(float32_t x)113 float32_t arm_cos_f32(float32_t x)
114 {
115 float tmp;
116
117 PQ_CosF32(&x, &tmp);
118 return tmp;
119 }
120
arm_cos_q31(q31_t x)121 q31_t arm_cos_q31(q31_t x)
122 {
123 /* For PQ: input -1 to 1 means -pi to pi
124 * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
125 x *= 2;
126 return PQ_CosQ31(x);
127 }
128
arm_cos_q15(q15_t x)129 q15_t arm_cos_q15(q15_t x)
130 {
131 /* For PQ: input -1 to 1 means -pi to pi
132 * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
133 x *= 2;
134 return PQ_CosQ15(x);
135 }
136
arm_sin_f32(float32_t x)137 float32_t arm_sin_f32(float32_t x)
138 {
139 float tmp;
140
141 PQ_SinF32(&x, &tmp);
142 return tmp;
143 }
144
arm_sin_q31(q31_t x)145 q31_t arm_sin_q31(q31_t x)
146 {
147 /* For PQ: input -1 to 1 means -pi to pi
148 * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
149 x *= 2;
150 return PQ_SinQ31(x);
151 }
152
arm_sin_q15(q15_t x)153 q15_t arm_sin_q15(q15_t x)
154 {
155 /* For PQ: input -1 to 1 means -pi to pi
156 * For CMSIS DSP: input 0 to 1 means pi to 2*pi */
157 x *= 2;
158 return PQ_SinQ15(x);
159 }
160
arm_sqrt_q31(q31_t in,q31_t * pOut)161 arm_status arm_sqrt_q31(q31_t in, q31_t *pOut)
162 {
163 uint32_t cppre;
164 arm_status status;
165
166 /* If the input is a positive number then compute the signBits. */
167 if (in > 0)
168 {
169 cppre = POWERQUAD->CPPRE;
170 POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-31) | POWERQUAD_CPPRE_CPPRE_OUT(31);
171 *pOut = (q31_t)PQ_SqrtFixed((uint32_t)in);
172 POWERQUAD->CPPRE = cppre;
173
174 status = (ARM_MATH_SUCCESS);
175 }
176 /* If the number is a negative number then store zero as its square root value */
177 else
178 {
179 *pOut = 0;
180
181 status = (ARM_MATH_ARGUMENT_ERROR);
182 }
183
184 return status;
185 }
186
arm_sqrt_q15(q15_t in,q15_t * pOut)187 arm_status arm_sqrt_q15(q15_t in, q15_t *pOut)
188 {
189 uint32_t cppre;
190 arm_status status;
191
192 /* If the input is a positive number then compute the signBits. */
193 if (in > 0)
194 {
195 cppre = POWERQUAD->CPPRE;
196 POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-15) | POWERQUAD_CPPRE_CPPRE_OUT(15);
197 *pOut = (q15_t)PQ_SqrtFixed((uint32_t)in);
198 POWERQUAD->CPPRE = cppre;
199
200 status = (ARM_MATH_SUCCESS);
201 }
202 /* If the number is a negative number then store zero as its square root value */
203 else
204 {
205 *pOut = 0;
206
207 status = (ARM_MATH_ARGUMENT_ERROR);
208 }
209
210 return status;
211 }
212
arm_cfft_q31(const arm_cfft_instance_q31 * S,q31_t * p1,uint8_t ifftFlag,uint8_t bitReverseFlag)213 void arm_cfft_q31(const arm_cfft_instance_q31 *S, q31_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
214 {
215 assert(bitReverseFlag == 1U);
216
217 q31_t *pIn = p1;
218 q31_t *pOut = p1;
219 uint32_t length = S->fftLen;
220
221 PQ_SET_FFT_Q31_CONFIG;
222
223 if (ifftFlag == 1U)
224 {
225 PQ_TransformIFFT(POWERQUAD, length, pIn, pOut);
226 }
227 else
228 {
229 PQ_TransformCFFT(POWERQUAD, length, pIn, pOut);
230 }
231
232 PQ_WaitDone(POWERQUAD);
233 }
234
arm_cfft_q15(const arm_cfft_instance_q15 * S,q15_t * p1,uint8_t ifftFlag,uint8_t bitReverseFlag)235 void arm_cfft_q15(const arm_cfft_instance_q15 *S, q15_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
236 {
237 assert(bitReverseFlag == 1U);
238
239 q15_t *pIn = p1;
240 q15_t *pOut = p1;
241 uint32_t length = S->fftLen;
242
243 PQ_SET_FFT_Q15_CONFIG;
244
245 if (ifftFlag == 1U)
246 {
247 PQ_TransformIFFT(POWERQUAD, length, pIn, pOut);
248 }
249 else
250 {
251 PQ_TransformCFFT(POWERQUAD, length, pIn, pOut);
252 }
253
254 PQ_WaitDone(POWERQUAD);
255 }
256
arm_rfft_init_q31(arm_rfft_instance_q31 * S,uint32_t fftLenReal,uint32_t ifftFlagR,uint32_t bitReverseFlag)257 arm_status arm_rfft_init_q31(arm_rfft_instance_q31 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag)
258 {
259 /* Only supprt such mode. */
260 assert(ifftFlagR == 0U);
261 assert(bitReverseFlag == 1U);
262
263 /* Initialise the default arm status */
264 arm_status status = ARM_MATH_SUCCESS;
265
266 /* Initialize the Real FFT length */
267 S->fftLenReal = (uint16_t)fftLenReal;
268
269 /* Initialize the Flag for selection of RFFT or RIFFT */
270 S->ifftFlagR = (uint8_t)ifftFlagR;
271
272 /* Initialize the Flag for calculation Bit reversal or not */
273 S->bitReverseFlagR = (uint8_t)bitReverseFlag;
274
275 /* return the status of RFFT Init function */
276 return (status);
277 }
278
arm_rfft_q31(const arm_rfft_instance_q31 * S,q31_t * pSrc,q31_t * pDst)279 void arm_rfft_q31(const arm_rfft_instance_q31 *S, q31_t *pSrc, q31_t *pDst)
280 {
281 uint32_t length = S->fftLenReal;
282 PQ_SET_FFT_Q31_CONFIG;
283
284 /* Calculation of RFFT of input */
285 PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst);
286
287 PQ_WaitDone(POWERQUAD);
288 }
289
arm_rfft_init_q15(arm_rfft_instance_q15 * S,uint32_t fftLenReal,uint32_t ifftFlagR,uint32_t bitReverseFlag)290 arm_status arm_rfft_init_q15(arm_rfft_instance_q15 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag)
291 {
292 /* Only supprt such mode. */
293 assert(ifftFlagR == 0U);
294 assert(bitReverseFlag == 1U);
295
296 /* Initialise the default arm status */
297 arm_status status = ARM_MATH_SUCCESS;
298
299 /* Initialize the Real FFT length */
300 S->fftLenReal = (uint16_t)fftLenReal;
301
302 /* Initialize the Flag for selection of RFFT or RIFFT */
303 S->ifftFlagR = (uint8_t)ifftFlagR;
304
305 /* Initialize the Flag for calculation Bit reversal or not */
306 S->bitReverseFlagR = (uint8_t)bitReverseFlag;
307
308 /* return the status of RFFT Init function */
309 return (status);
310 }
311
arm_rfft_q15(const arm_rfft_instance_q15 * S,q15_t * pSrc,q15_t * pDst)312 void arm_rfft_q15(const arm_rfft_instance_q15 *S, q15_t *pSrc, q15_t *pDst)
313 {
314 uint32_t length = S->fftLenReal;
315 PQ_SET_FFT_Q15_CONFIG;
316
317 /* Calculation of RFFT of input */
318 PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst);
319
320 PQ_WaitDone(POWERQUAD);
321 }
322
arm_dct4_init_q31(arm_dct4_instance_q31 * S,arm_rfft_instance_q31 * S_RFFT,arm_cfft_radix4_instance_q31 * S_CFFT,uint16_t N,uint16_t Nby2,q31_t normalize)323 arm_status arm_dct4_init_q31(arm_dct4_instance_q31 *S,
324 arm_rfft_instance_q31 *S_RFFT,
325 arm_cfft_radix4_instance_q31 *S_CFFT,
326 uint16_t N,
327 uint16_t Nby2,
328 q31_t normalize)
329 {
330 arm_status status = ARM_MATH_SUCCESS;
331
332 /* Initialize the DCT4 length */
333 S->N = N;
334
335 /* Initialize Real FFT Instance */
336 S->pRfft = S_RFFT;
337
338 switch (N)
339 {
340 /* Initialize the table modifier values */
341 case 512U:
342 S->pTwiddle = dct512_twiddle;
343 S->pCosFactor = dct512_cosFactor;
344 break;
345
346 case 256U:
347 S->pTwiddle = dct256_twiddle;
348 S->pCosFactor = dct256_cosFactor;
349 break;
350
351 case 128U:
352 S->pTwiddle = dct128_twiddle;
353 S->pCosFactor = dct128_cosFactor;
354 break;
355
356 case 64U:
357 S->pTwiddle = dct64_twiddle;
358 S->pCosFactor = dct64_cosFactor;
359 break;
360
361 case 32U:
362 S->pTwiddle = dct32_twiddle;
363 S->pCosFactor = dct32_cosFactor;
364 break;
365
366 case 16U:
367 S->pTwiddle = dct16_twiddle;
368 S->pCosFactor = dct16_cosFactor;
369 break;
370
371 default:
372 status = ARM_MATH_ARGUMENT_ERROR;
373 break;
374 }
375
376 if (ARM_MATH_SUCCESS == status)
377 {
378 /* Initialize the RFFT/RIFFT Function */
379 status = arm_rfft_init_q31(S->pRfft, S->N, 0, 1);
380 }
381
382 return status;
383 }
384
arm_dct4_q31(const arm_dct4_instance_q31 * S,q31_t * pState,q31_t * pInlineBuffer)385 void arm_dct4_q31(const arm_dct4_instance_q31 *S, q31_t *pState, q31_t *pInlineBuffer)
386 {
387 /* Calculate DCT-II for N-point input */
388 uint16_t i; /* Loop counter */
389 const q31_t *weights; /* Pointer to the Weights table */
390 q31_t *pOut; /* Temporary pointers for output buffer */
391 q31_t *pS1, *pbuff; /* Temporary pointers for input buffer and pState buffer */
392 q31_t in; /* Temporary variable */
393 const q31_t *cosFact;
394 uint32_t length;
395 uint8_t matRow;
396 uint8_t matCol;
397 uint8_t matLoop;
398 uint16_t lenPerMatLoop;
399
400 /*
401 * Pre-processing involves multiplying input with cos factor,
402 * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N))
403 * where,
404 * r(n) -- output of preprocessing
405 * u(n) -- input to preprocessing(actual Source buffer)
406 */
407
408 /*
409 * Use matrix production function for preprocessing. Matrix production
410 * supports 16x16 at the most, so the matrix row is set to 16.
411 */
412 matRow = 16U;
413 lenPerMatLoop = S->N >= 256U ? 256U : S->N;
414 matCol = (uint8_t)(lenPerMatLoop / 16U);
415 matLoop = (uint8_t)(((S->N - 1U) >> 8U) + 1U);
416 cosFact = S->pCosFactor;
417 pbuff = pInlineBuffer;
418
419 length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
420
421 while ((matLoop--) != 0U)
422 {
423 PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
424
425 /* cos factor is Q31, convert to float */
426 PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U);
427 cosFact = &cosFact[lenPerMatLoop];
428
429 PQ_WaitDone(POWERQUAD);
430
431 /* Product. */
432 PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
433
434 PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff);
435
436 pbuff = &pbuff[lenPerMatLoop];
437
438 PQ_WaitDone(POWERQUAD);
439 }
440
441 PQ_SET_FFT_Q31_CONFIG;
442
443 PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState);
444
445 /* ARM calculation while PQ is running. */
446 /*
447 * Use matrix production function for twiddle multiplication.
448 * Matrix production supports 16x16 at the most. The total elements are 2*N;
449 */
450 lenPerMatLoop = S->N >= 128U ? 128U : S->N;
451 matCol = (uint8_t)(lenPerMatLoop / 8U);
452 matLoop = (uint8_t)(((S->N - 1U) >> 7U) + 1U);
453 weights = S->pTwiddle;
454 pOut = pState;
455
456 length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
457
458 PQ_WaitDone(POWERQUAD);
459
460 while ((matLoop--) != 0U)
461 {
462 PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
463
464 /* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */
465 PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U);
466 weights = &weights[lenPerMatLoop * 2U];
467
468 PQ_WaitDone(POWERQUAD);
469
470 /* Product. */
471 PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
472
473 PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut);
474
475 PQ_WaitDone(POWERQUAD);
476
477 for (i = 0; i < lenPerMatLoop / 4U; i++)
478 {
479 pOut[0] -= pOut[1];
480 pOut = &pOut[2];
481 pOut[0] -= pOut[1];
482 pOut = &pOut[2];
483 pOut[0] -= pOut[1];
484 pOut = &pOut[2];
485 pOut[0] -= pOut[1];
486 pOut = &pOut[2];
487 }
488 }
489
490 /* ----------- Post-processing ---------- */
491 /* DCT-IV can be obtained from DCT-II by the equation,
492 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
493 * Hence, Y4(0) = Y2(0)/2 */
494 /* Getting only real part from the output and Converting to DCT-IV */
495
496 /* pbuff initialized to input buffer. */
497 pbuff = pInlineBuffer;
498
499 /* pS1 initialized to pState */
500 pS1 = pState;
501
502 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/
503 in = (q31_t)(float)((float)*pS1 / 1.41421356237f);
504 *pbuff++ = in;
505 pS1 = &pS1[2];
506
507 in = *pS1 - in;
508 *pbuff++ = in;
509 pS1 = &pS1[2];
510
511 in = *pS1 - in;
512 *pbuff++ = in;
513 pS1 = &pS1[2];
514
515 in = *pS1 - in;
516 *pbuff++ = in;
517 pS1 = &pS1[2];
518
519 i = S->N / 4U - 1U;
520
521 while (i > 0U)
522 {
523 in = *pS1 - in;
524 *pbuff++ = in;
525 pS1 = &pS1[2];
526
527 in = *pS1 - in;
528 *pbuff++ = in;
529 pS1 = &pS1[2];
530
531 in = *pS1 - in;
532 *pbuff++ = in;
533 pS1 = &pS1[2];
534
535 in = *pS1 - in;
536 *pbuff++ = in;
537 pS1 = &pS1[2];
538
539 i--;
540 }
541 }
542
arm_dct4_init_q15(arm_dct4_instance_q15 * S,arm_rfft_instance_q15 * S_RFFT,arm_cfft_radix4_instance_q15 * S_CFFT,uint16_t N,uint16_t Nby2,q15_t normalize)543 arm_status arm_dct4_init_q15(arm_dct4_instance_q15 *S,
544 arm_rfft_instance_q15 *S_RFFT,
545 arm_cfft_radix4_instance_q15 *S_CFFT,
546 uint16_t N,
547 uint16_t Nby2,
548 q15_t normalize)
549 {
550 arm_status status = ARM_MATH_SUCCESS;
551
552 /* Initialize the DCT4 length */
553 S->N = N;
554
555 /* Initialize Real FFT Instance */
556 S->pRfft = S_RFFT;
557
558 switch (N)
559 {
560 /* Initialize the table modifier values */
561 case 512U:
562 S->pTwiddle = (void *)dct512_twiddle;
563 S->pCosFactor = (void *)dct512_cosFactor;
564 break;
565
566 case 256U:
567 S->pTwiddle = (void *)dct256_twiddle;
568 S->pCosFactor = (void *)dct256_cosFactor;
569 break;
570
571 case 128U:
572 S->pTwiddle = (void *)dct128_twiddle;
573 S->pCosFactor = (void *)dct128_cosFactor;
574 break;
575
576 case 64U:
577 S->pTwiddle = (void *)dct64_twiddle;
578 S->pCosFactor = (void *)dct64_cosFactor;
579 break;
580
581 case 32U:
582 S->pTwiddle = (void *)dct32_twiddle;
583 S->pCosFactor = (void *)dct32_cosFactor;
584 break;
585
586 case 16U:
587 S->pTwiddle = (void *)dct16_twiddle;
588 S->pCosFactor = (void *)dct16_cosFactor;
589 break;
590
591 default:
592 status = ARM_MATH_ARGUMENT_ERROR;
593 break;
594 }
595
596 if (ARM_MATH_SUCCESS == status)
597 {
598 /* Initialize the RFFT/RIFFT Function */
599 status = arm_rfft_init_q15(S->pRfft, S->N, 0, 1);
600 }
601
602 /* return the status of DCT4 Init function */
603 return status;
604 }
605
arm_dct4_q15(const arm_dct4_instance_q15 * S,q15_t * pState,q15_t * pInlineBuffer)606 void arm_dct4_q15(const arm_dct4_instance_q15 *S, q15_t *pState, q15_t *pInlineBuffer)
607 {
608 /* Calculate DCT-II for N-point input */
609 uint16_t i; /* Loop counter */
610 const q15_t *weights; /* Pointer to the Weights table */
611 q15_t *pOut; /* Temporary pointers for output buffer */
612 q15_t *pS1, *pbuff; /* Temporary pointers for input buffer and pState buffer */
613 q15_t in; /* Temporary variable */
614 const q15_t *cosFact;
615 uint32_t length;
616 uint8_t matRow;
617 uint8_t matCol;
618 uint8_t matLoop;
619 uint16_t lenPerMatLoop;
620
621 /*
622 * Pre-processing involves multiplying input with cos factor,
623 * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N))
624 * where,
625 * r(n) -- output of preprocessing
626 * u(n) -- input to preprocessing(actual Source buffer)
627 */
628
629 /*
630 * Use matrix production function for preprocessing. Matrix production
631 * supports 16x16 at the most, so the matrix row is set to 16.
632 */
633 matRow = 16U;
634 lenPerMatLoop = S->N >= 256U ? 256U : S->N;
635 matCol = (uint8_t)(lenPerMatLoop / 16U);
636 matLoop = (uint8_t)(((S->N - 1U) >> 8U) + 1U);
637 cosFact = S->pCosFactor;
638 pbuff = pInlineBuffer;
639
640 length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, 0);
641
642 while ((matLoop--) != 0U)
643 {
644 PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
645
646 /* cos factor is Q31, convert to float */
647 PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U);
648 cosFact = &cosFact[2U * lenPerMatLoop];
649
650 PQ_WaitDone(POWERQUAD);
651
652 /* Product. */
653 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
654 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
655 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
656 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
657 POWERQUAD->TMPBASE = 0xE0000000U;
658
659 PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff);
660
661 PQ_WaitDone(POWERQUAD);
662
663 pbuff = &pbuff[lenPerMatLoop];
664 }
665
666 PQ_SET_FFT_Q15_CONFIG;
667
668 PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState);
669
670 /* ARM calculation while PQ is running. */
671 /*
672 * Use matrix production function for twiddle multiplication.
673 * Matrix production supports 16x16 at the most. The total elements are 2*N;
674 */
675 lenPerMatLoop = S->N >= 128U ? 128U : S->N;
676 matCol = (uint8_t)(lenPerMatLoop / 8U);
677 matLoop = (uint8_t)(((S->N - 1U) >> 7U) + 1U);
678 weights = S->pTwiddle;
679 pOut = pState;
680
681 length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
682
683 PQ_WaitDone(POWERQUAD);
684
685 while ((matLoop--) != 0U)
686 {
687 PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
688
689 /* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */
690 PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U);
691 weights = &weights[lenPerMatLoop * 2U];
692
693 PQ_WaitDone(POWERQUAD);
694
695 /* Product. */
696 POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
697 POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
698 POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
699 POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
700 POWERQUAD->TMPBASE = 0xE0000000U;
701
702 PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut);
703
704 PQ_WaitDone(POWERQUAD);
705
706 for (i = 0; i < lenPerMatLoop / 4U; i++)
707 {
708 pOut[0] -= pOut[1];
709 pOut = &pOut[2];
710 pOut[0] -= pOut[1];
711 pOut = &pOut[2];
712 pOut[0] -= pOut[1];
713 pOut = &pOut[2];
714 pOut[0] -= pOut[1];
715 pOut = &pOut[2];
716 }
717 }
718
719 /* ----------- Post-processing ---------- */
720 /* DCT-IV can be obtained from DCT-II by the equation,
721 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
722 * Hence, Y4(0) = Y2(0)/2 */
723 /* Getting only real part from the output and Converting to DCT-IV */
724
725 /* pbuff initialized to input buffer. */
726 pbuff = pInlineBuffer;
727
728 /* pS1 initialized to pState */
729 pS1 = pState;
730
731 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/
732 in = (q15_t)(float)((float)*pS1 / 1.41421356237f);
733 *pbuff++ = in;
734 pS1 = &pS1[2];
735
736 in = *pS1 - in;
737 *pbuff++ = in;
738 pS1 = &pS1[2];
739
740 in = *pS1 - in;
741 *pbuff++ = in;
742 pS1 = &pS1[2];
743
744 in = *pS1 - in;
745 *pbuff++ = in;
746 pS1 = &pS1[2];
747
748 i = S->N / 4U - 1U;
749
750 while (i > 0U)
751 {
752 in = *pS1 - in;
753 *pbuff++ = in;
754 pS1 = &pS1[2];
755
756 in = *pS1 - in;
757 *pbuff++ = in;
758 pS1 = &pS1[2];
759
760 in = *pS1 - in;
761 *pbuff++ = in;
762 pS1 = &pS1[2];
763
764 in = *pS1 - in;
765 *pbuff++ = in;
766 pS1 = &pS1[2];
767
768 i--;
769 }
770 }
771
arm_fir_init_f32(arm_fir_instance_f32 * S,uint16_t numTaps,const float32_t * pCoeffs,float32_t * pState,uint32_t blockSize)772 void arm_fir_init_f32(
773 arm_fir_instance_f32 *S, uint16_t numTaps, const float32_t *pCoeffs, float32_t *pState, uint32_t blockSize)
774 {
775 uint32_t i;
776
777 /*
778 * CMSIS DSP API filter coefficients stored in time reversed order, but PQ
779 * uses the positive order. PQ does not use pState, so pState pState[1:numTaps]
780 * is used here to save the coefficients in positive order. At the same time,
781 * pState[0] is used to save the offset used for incremetal calculation.
782 * Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
783 * the blockSize should be larger than 1.
784 */
785 assert(blockSize > 1U);
786
787 S->numTaps = numTaps;
788 S->pCoeffs = pCoeffs;
789 S->pState = pState;
790
791 for (i = 0U; i < numTaps; i++)
792 {
793 pState[numTaps - i] = pCoeffs[i];
794 }
795
796 *(uint32_t *)(void *)pState = 0U;
797 }
798
arm_fir_init_q31(arm_fir_instance_q31 * S,uint16_t numTaps,const q31_t * pCoeffs,q31_t * pState,uint32_t blockSize)799 void arm_fir_init_q31(
800 arm_fir_instance_q31 *S, uint16_t numTaps, const q31_t *pCoeffs, q31_t *pState, uint32_t blockSize)
801 {
802 uint32_t i;
803
804 /*
805 * CMSIS DSP API filter coefficients stored in time reversed order, but PQ
806 * uses the positive order. PQ does not use pState, so pState pState[1:numTaps]
807 * is used here to save the coefficients in positive order. At the same time,
808 * pState[0] is used to save the offset used for incremetal calculation.
809 * Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
810 * the blockSize should be larger than 1.
811 */
812 assert(blockSize > 1U);
813
814 S->numTaps = numTaps;
815 S->pCoeffs = pCoeffs;
816 S->pState = pState;
817
818 for (i = 0U; i < numTaps; i++)
819 {
820 pState[numTaps - i] = pCoeffs[i];
821 }
822
823 pState[0] = 0;
824 }
825
arm_fir_init_q15(arm_fir_instance_q15 * S,uint16_t numTaps,const q15_t * pCoeffs,q15_t * pState,uint32_t blockSize)826 arm_status arm_fir_init_q15(
827 arm_fir_instance_q15 *S, uint16_t numTaps, const q15_t *pCoeffs, q15_t *pState, uint32_t blockSize)
828 {
829 uint16_t i;
830
831 /*
832 * CMSIS DSP API filter coefficients stored in time reversed order, but PQ
833 * uses the positive order. PQ does not use pState, so pState pState[2:numTaps]
834 * is used here to save the coefficients in positive order. At the same time,
835 * pState[0:1] is used to save the offset used for incremetal calculation.
836 * Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
837 * the blockSize should be larger than 2.
838 */
839 assert(blockSize > 2U);
840
841 S->numTaps = numTaps;
842 S->pCoeffs = pCoeffs;
843 S->pState = pState;
844
845 for (i = 0U; i < numTaps; i++)
846 {
847 pState[numTaps + 1U - i] = pCoeffs[i];
848 }
849
850 *(uint32_t *)(void *)pState = 0U;
851
852 return ARM_MATH_SUCCESS;
853 }
854
855 /**
856 * brief Processing function for the floating-point FIR filter.
857 * param[in] S points to an instance of the floating-point FIR structure.
858 * param[in] pSrc points to the block of input data.
859 * param[out] pDst points to the block of output data.
860 * param[in] blockSize number of samples to process.
861 *
862 * Note: Powerquad has a hardware limitation, when using it for FIR increment calculation, the address of pSrc needs to
863 * be a continuous address.
864 */
arm_fir_f32(const arm_fir_instance_f32 * S,const float32_t * pSrc,float32_t * pDst,uint32_t blockSize)865 void arm_fir_f32(const arm_fir_instance_f32 *S, const float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
866 {
867 assert(NULL != S);
868 assert(NULL != pSrc);
869 assert(NULL != pDst);
870
871 uint32_t curOffset;
872 PQ_SET_F32_CONFIG;
873
874 curOffset = *(uint32_t *)(void *)(S->pState);
875
876 if (curOffset == 0U)
877 {
878 PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
879 }
880 else
881 {
882 _arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc));
883 }
884
885 *(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
886
887 PQ_WaitDone(POWERQUAD);
888 }
889
arm_fir_q31(const arm_fir_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)890 void arm_fir_q31(const arm_fir_instance_q31 *S, const q31_t *pSrc, q31_t *pDst, uint32_t blockSize)
891 {
892 assert(NULL != S);
893 assert(NULL != pSrc);
894 assert(NULL != pDst);
895
896 uint32_t curOffset;
897 PQ_SET_Q31_CONFIG;
898
899 curOffset = *(uint32_t *)(void *)(S->pState);
900
901 if (curOffset == 0U)
902 {
903 PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
904 }
905 else
906 {
907 _arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc));
908 }
909
910 *(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
911
912 PQ_WaitDone(POWERQUAD);
913 }
914
arm_fir_q15(const arm_fir_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)915 void arm_fir_q15(const arm_fir_instance_q15 *S, const q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
916 {
917 assert(NULL != S);
918 assert(NULL != pSrc);
919 assert(NULL != pDst);
920
921 uint32_t curOffset;
922
923 PQ_SET_Q15_CONFIG;
924
925 curOffset = *(uint32_t *)(void *)(S->pState);
926
927 if (curOffset == 0U)
928 {
929 PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[2]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
930 }
931 else
932 {
933 _arm_fir_increment(pSrc, blockSize, &S->pState[2], S->numTaps, pDst, curOffset, sizeof(*pSrc));
934 }
935
936 *(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
937
938 PQ_WaitDone(POWERQUAD);
939 }
940
arm_conv_f32(const float32_t * pSrcA,uint32_t srcALen,const float32_t * pSrcB,uint32_t srcBLen,float32_t * pDst)941 void arm_conv_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst)
942 {
943 assert(NULL != pSrcA);
944 assert(NULL != pSrcB);
945 assert(NULL != pDst);
946
947 PQ_SET_F32_CONFIG;
948
949 PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
950 PQ_WaitDone(POWERQUAD);
951 }
952
arm_conv_q31(const q31_t * pSrcA,uint32_t srcALen,const q31_t * pSrcB,uint32_t srcBLen,q31_t * pDst)953 void arm_conv_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst)
954 {
955 assert(NULL != pSrcA);
956 assert(NULL != pSrcB);
957 assert(NULL != pDst);
958
959 PQ_SET_Q31_CONFIG;
960
961 PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
962 PQ_WaitDone(POWERQUAD);
963 }
964
arm_conv_q15(const q15_t * pSrcA,uint32_t srcALen,const q15_t * pSrcB,uint32_t srcBLen,q15_t * pDst)965 void arm_conv_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
966 {
967 assert(NULL != pSrcA);
968 assert(NULL != pSrcB);
969 assert(NULL != pDst);
970
971 PQ_SET_Q15_CONFIG;
972
973 PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
974 PQ_WaitDone(POWERQUAD);
975 }
976
arm_correlate_f32(const float32_t * pSrcA,uint32_t srcALen,const float32_t * pSrcB,uint32_t srcBLen,float32_t * pDst)977 void arm_correlate_f32(
978 const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst)
979 {
980 assert(NULL != pSrcA);
981 assert(NULL != pSrcB);
982 assert(NULL != pDst);
983
984 PQ_SET_F32_CONFIG;
985
986 PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
987 PQ_WaitDone(POWERQUAD);
988 }
989
arm_correlate_q31(const q31_t * pSrcA,uint32_t srcALen,const q31_t * pSrcB,uint32_t srcBLen,q31_t * pDst)990 void arm_correlate_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst)
991 {
992 assert(NULL != pSrcA);
993 assert(NULL != pSrcB);
994 assert(NULL != pDst);
995
996 PQ_SET_Q31_CONFIG;
997
998 PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
999 PQ_WaitDone(POWERQUAD);
1000 }
1001
arm_correlate_q15(const q15_t * pSrcA,uint32_t srcALen,const q15_t * pSrcB,uint32_t srcBLen,q15_t * pDst)1002 void arm_correlate_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
1003 {
1004 assert(NULL != pSrcA);
1005 assert(NULL != pSrcB);
1006 assert(NULL != pDst);
1007
1008 PQ_SET_Q15_CONFIG;
1009
1010 PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
1011 PQ_WaitDone(POWERQUAD);
1012 }
1013
arm_mat_init_f32(arm_matrix_instance_f32 * S,uint16_t nRows,uint16_t nColumns,float32_t * pData)1014 void arm_mat_init_f32(arm_matrix_instance_f32 *S, uint16_t nRows, uint16_t nColumns, float32_t *pData)
1015 {
1016 /* Assign Number of Rows */
1017 S->numRows = nRows;
1018
1019 /* Assign Number of Columns */
1020 S->numCols = nColumns;
1021
1022 /* Assign Data pointer */
1023 S->pData = pData;
1024 }
1025
arm_mat_init_q31(arm_matrix_instance_q31 * S,uint16_t nRows,uint16_t nColumns,q31_t * pData)1026 void arm_mat_init_q31(arm_matrix_instance_q31 *S, uint16_t nRows, uint16_t nColumns, q31_t *pData)
1027 {
1028 /* Assign Number of Rows */
1029 S->numRows = nRows;
1030
1031 /* Assign Number of Columns */
1032 S->numCols = nColumns;
1033
1034 /* Assign Data pointer */
1035 S->pData = pData;
1036 }
1037
arm_mat_init_q15(arm_matrix_instance_q15 * S,uint16_t nRows,uint16_t nColumns,q15_t * pData)1038 void arm_mat_init_q15(arm_matrix_instance_q15 *S, uint16_t nRows, uint16_t nColumns, q15_t *pData)
1039 {
1040 /* Assign Number of Rows */
1041 S->numRows = nRows;
1042
1043 /* Assign Number of Columns */
1044 S->numCols = nColumns;
1045
1046 /* Assign Data pointer */
1047 S->pData = pData;
1048 }
1049
arm_mat_add_f32(const arm_matrix_instance_f32 * pSrcA,const arm_matrix_instance_f32 * pSrcB,arm_matrix_instance_f32 * pDst)1050 arm_status arm_mat_add_f32(const arm_matrix_instance_f32 *pSrcA,
1051 const arm_matrix_instance_f32 *pSrcB,
1052 arm_matrix_instance_f32 *pDst)
1053 {
1054 assert(NULL != pSrcA);
1055 assert(NULL != pSrcB);
1056 assert(NULL != pDst);
1057
1058 arm_status status;
1059 uint32_t length;
1060
1061 #ifdef ARM_MATH_MATRIX_CHECK
1062 /* Check for matrix mismatch condition */
1063 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1064 (pSrcA->numCols != pDst->numCols))
1065 {
1066 /* Set status as ARM_MATH_SIZE_MISMATCH */
1067 status = ARM_MATH_SIZE_MISMATCH;
1068 }
1069 else
1070 #endif
1071 {
1072 PQ_SET_F32_CONFIG;
1073
1074 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1075
1076 PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1077
1078 /* Wait for the completion */
1079 PQ_WaitDone(POWERQUAD);
1080
1081 status = ARM_MATH_SUCCESS;
1082 }
1083
1084 /* Return to application */
1085 return status;
1086 }
1087
arm_mat_add_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)1088 arm_status arm_mat_add_q31(const arm_matrix_instance_q31 *pSrcA,
1089 const arm_matrix_instance_q31 *pSrcB,
1090 arm_matrix_instance_q31 *pDst)
1091 {
1092 assert(NULL != pSrcA);
1093 assert(NULL != pSrcB);
1094 assert(NULL != pDst);
1095
1096 arm_status status;
1097 uint32_t length;
1098
1099 #ifdef ARM_MATH_MATRIX_CHECK
1100 /* Check for matrix mismatch condition */
1101 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1102 (pSrcA->numCols != pDst->numCols))
1103 {
1104 /* Set status as ARM_MATH_SIZE_MISMATCH */
1105 status = ARM_MATH_SIZE_MISMATCH;
1106 }
1107 else
1108 #endif
1109 {
1110 PQ_SET_FIX32_CONFIG;
1111
1112 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1113
1114 PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1115
1116 /* Wait for the completion */
1117 PQ_WaitDone(POWERQUAD);
1118
1119 status = ARM_MATH_SUCCESS;
1120 }
1121
1122 /* Return to application */
1123 return status;
1124 }
1125
arm_mat_add_q15(const arm_matrix_instance_q15 * pSrcA,const arm_matrix_instance_q15 * pSrcB,arm_matrix_instance_q15 * pDst)1126 arm_status arm_mat_add_q15(const arm_matrix_instance_q15 *pSrcA,
1127 const arm_matrix_instance_q15 *pSrcB,
1128 arm_matrix_instance_q15 *pDst)
1129 {
1130 assert(NULL != pSrcA);
1131 assert(NULL != pSrcB);
1132 assert(NULL != pDst);
1133
1134 arm_status status;
1135 uint32_t length;
1136
1137 #ifdef ARM_MATH_MATRIX_CHECK
1138 /* Check for matrix mismatch condition */
1139 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1140 (pSrcA->numCols != pDst->numCols))
1141 {
1142 /* Set status as ARM_MATH_SIZE_MISMATCH */
1143 status = ARM_MATH_SIZE_MISMATCH;
1144 }
1145 else
1146 #endif
1147 {
1148 PQ_SET_FIX16_CONFIG;
1149
1150 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1151
1152 PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1153
1154 /* Wait for the completion */
1155 PQ_WaitDone(POWERQUAD);
1156
1157 status = ARM_MATH_SUCCESS;
1158 }
1159
1160 /* Return to application */
1161 return status;
1162 }
1163
arm_mat_sub_f32(const arm_matrix_instance_f32 * pSrcA,const arm_matrix_instance_f32 * pSrcB,arm_matrix_instance_f32 * pDst)1164 arm_status arm_mat_sub_f32(const arm_matrix_instance_f32 *pSrcA,
1165 const arm_matrix_instance_f32 *pSrcB,
1166 arm_matrix_instance_f32 *pDst)
1167 {
1168 assert(NULL != pSrcA);
1169 assert(NULL != pSrcB);
1170 assert(NULL != pDst);
1171
1172 arm_status status;
1173 uint32_t length;
1174
1175 #ifdef ARM_MATH_MATRIX_CHECK
1176 /* Check for matrix mismatch condition */
1177 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1178 (pSrcA->numCols != pDst->numCols))
1179 {
1180 /* Set status as ARM_MATH_SIZE_MISMATCH */
1181 status = ARM_MATH_SIZE_MISMATCH;
1182 }
1183 else
1184 #endif
1185 {
1186 PQ_SET_F32_CONFIG;
1187
1188 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1189
1190 PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1191
1192 /* Wait for the completion */
1193 PQ_WaitDone(POWERQUAD);
1194
1195 status = ARM_MATH_SUCCESS;
1196 }
1197
1198 /* Return to application */
1199 return status;
1200 }
1201
arm_mat_sub_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)1202 arm_status arm_mat_sub_q31(const arm_matrix_instance_q31 *pSrcA,
1203 const arm_matrix_instance_q31 *pSrcB,
1204 arm_matrix_instance_q31 *pDst)
1205 {
1206 assert(NULL != pSrcA);
1207 assert(NULL != pSrcB);
1208 assert(NULL != pDst);
1209
1210 arm_status status;
1211 uint32_t length;
1212
1213 #ifdef ARM_MATH_MATRIX_CHECK
1214 /* Check for matrix mismatch condition */
1215 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1216 (pSrcA->numCols != pDst->numCols))
1217 {
1218 /* Set status as ARM_MATH_SIZE_MISMATCH */
1219 status = ARM_MATH_SIZE_MISMATCH;
1220 }
1221 else
1222 #endif
1223 {
1224 PQ_SET_FIX32_CONFIG;
1225
1226 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1227
1228 PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1229
1230 /* Wait for the completion */
1231 PQ_WaitDone(POWERQUAD);
1232
1233 status = ARM_MATH_SUCCESS;
1234 }
1235
1236 /* Return to application */
1237 return status;
1238 }
1239
arm_mat_sub_q15(const arm_matrix_instance_q15 * pSrcA,const arm_matrix_instance_q15 * pSrcB,arm_matrix_instance_q15 * pDst)1240 arm_status arm_mat_sub_q15(const arm_matrix_instance_q15 *pSrcA,
1241 const arm_matrix_instance_q15 *pSrcB,
1242 arm_matrix_instance_q15 *pDst)
1243 {
1244 assert(NULL != pSrcA);
1245 assert(NULL != pSrcB);
1246 assert(NULL != pDst);
1247
1248 arm_status status;
1249 uint32_t length;
1250
1251 #ifdef ARM_MATH_MATRIX_CHECK
1252 /* Check for matrix mismatch condition */
1253 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1254 (pSrcA->numCols != pDst->numCols))
1255 {
1256 /* Set status as ARM_MATH_SIZE_MISMATCH */
1257 status = ARM_MATH_SIZE_MISMATCH;
1258 }
1259 else
1260 #endif
1261 {
1262 PQ_SET_FIX16_CONFIG;
1263
1264 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1265
1266 PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1267
1268 /* Wait for the completion */
1269 PQ_WaitDone(POWERQUAD);
1270
1271 status = ARM_MATH_SUCCESS;
1272 }
1273
1274 /* Return to application */
1275 return status;
1276 }
1277
arm_mat_mult_f32(const arm_matrix_instance_f32 * pSrcA,const arm_matrix_instance_f32 * pSrcB,arm_matrix_instance_f32 * pDst)1278 arm_status arm_mat_mult_f32(const arm_matrix_instance_f32 *pSrcA,
1279 const arm_matrix_instance_f32 *pSrcB,
1280 arm_matrix_instance_f32 *pDst)
1281 {
1282 assert(NULL != pSrcA);
1283 assert(NULL != pSrcB);
1284 assert(NULL != pDst);
1285
1286 arm_status status;
1287 uint32_t length;
1288
1289 #ifdef ARM_MATH_MATRIX_CHECK
1290 /* Check for matrix mismatch condition */
1291 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1292 (pSrcA->numCols != pDst->numCols))
1293 {
1294 /* Set status as ARM_MATH_SIZE_MISMATCH */
1295 status = ARM_MATH_SIZE_MISMATCH;
1296 }
1297 else
1298 #endif
1299 {
1300 PQ_SET_F32_CONFIG;
1301
1302 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1303
1304 PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
1305
1306 /* Wait for the completion */
1307 PQ_WaitDone(POWERQUAD);
1308
1309 status = ARM_MATH_SUCCESS;
1310 }
1311
1312 /* Return to application */
1313 return status;
1314 }
1315
arm_mat_mult_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)1316 arm_status arm_mat_mult_q31(const arm_matrix_instance_q31 *pSrcA,
1317 const arm_matrix_instance_q31 *pSrcB,
1318 arm_matrix_instance_q31 *pDst)
1319 {
1320 assert(NULL != pSrcA);
1321 assert(NULL != pSrcB);
1322 assert(NULL != pDst);
1323
1324 arm_status status;
1325 uint32_t length;
1326
1327 /*
1328 * The output prescale does not supprt negative value due to hardware issue,
1329 * workaround:
1330 * 1. Downscale the matrix B and save the float output value to private memory.
1331 * 2. Multiply the float matrix B in private memory with matrix A, output as Q31.
1332 * Note: Put matrix B in private memory is faster.
1333 */
1334
1335 #ifdef ARM_MATH_MATRIX_CHECK
1336 /* Check for matrix mismatch condition */
1337 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1338 (pSrcA->numCols != pDst->numCols))
1339 {
1340 /* Set status as ARM_MATH_SIZE_MISMATCH */
1341 status = ARM_MATH_SIZE_MISMATCH;
1342 }
1343 else
1344 #endif
1345 {
1346 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U);
1347
1348 PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
1349
1350 /* Downscale. */
1351 PQ_MatrixScale(POWERQUAD, length, 1.0f / 2147483648.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U);
1352
1353 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1354
1355 PQ_WaitDone(POWERQUAD);
1356
1357 PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
1358
1359 PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData);
1360
1361 /* Wait for the completion */
1362 PQ_WaitDone(POWERQUAD);
1363
1364 status = ARM_MATH_SUCCESS;
1365 }
1366
1367 /* Return to application */
1368 return status;
1369 }
1370
arm_mat_mult_q15(const arm_matrix_instance_q15 * pSrcA,const arm_matrix_instance_q15 * pSrcB,arm_matrix_instance_q15 * pDst,q15_t * pState)1371 arm_status arm_mat_mult_q15(const arm_matrix_instance_q15 *pSrcA,
1372 const arm_matrix_instance_q15 *pSrcB,
1373 arm_matrix_instance_q15 *pDst,
1374 q15_t *pState)
1375 {
1376 assert(NULL != pSrcA);
1377 assert(NULL != pSrcB);
1378 assert(NULL != pDst);
1379
1380 arm_status status;
1381 uint32_t length;
1382
1383 #ifdef ARM_MATH_MATRIX_CHECK
1384 /* Check for matrix mismatch condition */
1385 if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
1386 (pSrcA->numCols != pDst->numCols))
1387 {
1388 /* Set status as ARM_MATH_SIZE_MISMATCH */
1389 status = ARM_MATH_SIZE_MISMATCH;
1390 }
1391 else
1392 #endif
1393 {
1394 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U);
1395
1396 PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG;
1397
1398 /* Downscale. */
1399 PQ_MatrixScale(POWERQUAD, length, 1.0f / 32768.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U);
1400
1401 length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
1402
1403 PQ_WaitDone(POWERQUAD);
1404
1405 PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG;
1406
1407 PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData);
1408
1409 /* Wait for the completion */
1410 PQ_WaitDone(POWERQUAD);
1411
1412 status = ARM_MATH_SUCCESS;
1413 }
1414
1415 /* Return to application */
1416 return status;
1417 }
1418
arm_mat_inverse_f32(const arm_matrix_instance_f32 * src,arm_matrix_instance_f32 * dst)1419 arm_status arm_mat_inverse_f32(const arm_matrix_instance_f32 *src, arm_matrix_instance_f32 *dst)
1420 {
1421 assert(NULL != src);
1422 assert(NULL != dst);
1423
1424 arm_status status;
1425 uint32_t length;
1426 float tmp[1024];
1427
1428 #ifdef ARM_MATH_MATRIX_CHECK
1429 /* Check for matrix mismatch condition */
1430 if ((src->numRows != src->numCols) || (dst->numRows != dst->numCols) || (src->numRows != dst->numRows))
1431 {
1432 /* Set status as ARM_MATH_SIZE_MISMATCH */
1433 status = ARM_MATH_SIZE_MISMATCH;
1434 }
1435 else
1436 #endif
1437 {
1438 PQ_SET_F32_CONFIG;
1439
1440 length = POWERQUAD_MAKE_MATRIX_LEN(src->numRows, src->numCols, src->numRows);
1441
1442 PQ_MatrixInversion(POWERQUAD, length, src->pData, tmp, dst->pData);
1443
1444 /* Wait for the completion */
1445 PQ_WaitDone(POWERQUAD);
1446
1447 status = ARM_MATH_SUCCESS;
1448 }
1449
1450 /* Return to application */
1451 return status;
1452 }
1453
arm_mat_trans_f32(const arm_matrix_instance_f32 * pSrc,arm_matrix_instance_f32 * pDst)1454 arm_status arm_mat_trans_f32(const arm_matrix_instance_f32 *pSrc, arm_matrix_instance_f32 *pDst)
1455 {
1456 assert(NULL != pSrc);
1457 assert(NULL != pDst);
1458
1459 arm_status status;
1460 uint32_t length;
1461
1462 #ifdef ARM_MATH_MATRIX_CHECK
1463 /* Check for matrix mismatch condition */
1464 if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1465 {
1466 /* Set status as ARM_MATH_SIZE_MISMATCH */
1467 status = ARM_MATH_SIZE_MISMATCH;
1468 }
1469 else
1470 #endif
1471 {
1472 length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1473
1474 PQ_SetFormat(POWERQUAD, kPQ_CP_MTX, kPQ_Float);
1475
1476 PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
1477
1478 /* Wait for the completion */
1479 PQ_WaitDone(POWERQUAD);
1480 status = ARM_MATH_SUCCESS;
1481 }
1482
1483 /* Return to application */
1484 return status;
1485 }
1486
arm_mat_trans_q31(const arm_matrix_instance_q31 * pSrc,arm_matrix_instance_q31 * pDst)1487 arm_status arm_mat_trans_q31(const arm_matrix_instance_q31 *pSrc, arm_matrix_instance_q31 *pDst)
1488 {
1489 assert(NULL != pSrc);
1490 assert(NULL != pDst);
1491
1492 arm_status status;
1493 uint32_t length;
1494
1495 #ifdef ARM_MATH_MATRIX_CHECK
1496 /* Check for matrix mismatch condition */
1497 if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1498 {
1499 /* Set status as ARM_MATH_SIZE_MISMATCH */
1500 status = ARM_MATH_SIZE_MISMATCH;
1501 }
1502 else
1503 #endif
1504 {
1505 PQ_SET_FIX32_CONFIG;
1506
1507 length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1508
1509 PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
1510
1511 /* Wait for the completion */
1512 PQ_WaitDone(POWERQUAD);
1513
1514 status = ARM_MATH_SUCCESS;
1515 }
1516
1517 /* Return to application */
1518 return status;
1519 }
1520
arm_mat_trans_q15(const arm_matrix_instance_q15 * pSrc,arm_matrix_instance_q15 * pDst)1521 arm_status arm_mat_trans_q15(const arm_matrix_instance_q15 *pSrc, arm_matrix_instance_q15 *pDst)
1522 {
1523 assert(NULL != pSrc);
1524 assert(NULL != pDst);
1525
1526 arm_status status;
1527 uint32_t length;
1528
1529 #ifdef ARM_MATH_MATRIX_CHECK
1530 /* Check for matrix mismatch condition */
1531 if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1532 {
1533 /* Set status as ARM_MATH_SIZE_MISMATCH */
1534 status = ARM_MATH_SIZE_MISMATCH;
1535 }
1536 else
1537 #endif
1538 {
1539 PQ_SET_FIX16_CONFIG;
1540
1541 length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1542
1543 PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
1544
1545 /* Wait for the completion */
1546 PQ_WaitDone(POWERQUAD);
1547
1548 status = ARM_MATH_SUCCESS;
1549 }
1550
1551 /* Return to application */
1552 return status;
1553 }
1554
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)1555 arm_status arm_mat_scale_f32(const arm_matrix_instance_f32 *pSrc, float32_t scale, arm_matrix_instance_f32 *pDst)
1556 {
1557 assert(NULL != pSrc);
1558 assert(NULL != pDst);
1559
1560 arm_status status;
1561 uint32_t length;
1562
1563 #ifdef ARM_MATH_MATRIX_CHECK
1564 /* Check for matrix mismatch condition */
1565 if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1566 {
1567 /* Set status as ARM_MATH_SIZE_MISMATCH */
1568 status = ARM_MATH_SIZE_MISMATCH;
1569 }
1570 else
1571 #endif
1572 {
1573 PQ_SET_F32_CONFIG;
1574
1575 length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1576
1577 PQ_MatrixScale(POWERQUAD, length, scale, pSrc->pData, pDst->pData);
1578
1579 /* Wait for the completion */
1580 PQ_WaitDone(POWERQUAD);
1581
1582 status = ARM_MATH_SUCCESS;
1583 }
1584
1585 /* Return to application */
1586 return status;
1587 }
1588
arm_mat_scale_q31(const arm_matrix_instance_q31 * pSrc,q31_t scaleFract,int32_t shift,arm_matrix_instance_q31 * pDst)1589 arm_status arm_mat_scale_q31(const arm_matrix_instance_q31 *pSrc,
1590 q31_t scaleFract,
1591 int32_t shift,
1592 arm_matrix_instance_q31 *pDst)
1593 {
1594 assert(NULL != pSrc);
1595 assert(NULL != pDst);
1596
1597 arm_status status;
1598 uint32_t length;
1599 float scaleFloat;
1600
1601 pq_config_t config = {
1602 kPQ_32Bit, /* inputAFormat */
1603 0, /* inputAPrescale */
1604 kPQ_32Bit, /* inputBFormat */
1605 0, /* inputBPrescale */
1606 kPQ_32Bit, /* outputFormat */
1607 (int8_t)shift, /* outputPrescale */
1608 kPQ_Float, /* tmpFormat */
1609 0, /* tmpPrescale */
1610 kPQ_Float, /* machineFormat */
1611 (uint32_t *)0xe0000000U, /* tmpBase */
1612 };
1613
1614 #ifdef ARM_MATH_MATRIX_CHECK
1615 /* Check for matrix mismatch condition */
1616 if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1617 {
1618 /* Set status as ARM_MATH_SIZE_MISMATCH */
1619 status = ARM_MATH_SIZE_MISMATCH;
1620 }
1621 else
1622 #endif
1623 {
1624 length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1625
1626 scaleFloat = PQ_Q31_2_FLOAT(scaleFract);
1627
1628 PQ_SetConfig(POWERQUAD, &config);
1629
1630 PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData);
1631
1632 /* Wait for the completion */
1633 PQ_WaitDone(POWERQUAD);
1634
1635 status = ARM_MATH_SUCCESS;
1636 }
1637
1638 /* Return to application */
1639 return status;
1640 }
1641
arm_mat_scale_q15(const arm_matrix_instance_q15 * pSrc,q15_t scaleFract,int32_t shift,arm_matrix_instance_q15 * pDst)1642 arm_status arm_mat_scale_q15(const arm_matrix_instance_q15 *pSrc,
1643 q15_t scaleFract,
1644 int32_t shift,
1645 arm_matrix_instance_q15 *pDst)
1646 {
1647 assert(NULL != pSrc);
1648 assert(NULL != pDst);
1649
1650 arm_status status;
1651 uint32_t length;
1652 float scaleFloat;
1653
1654 pq_config_t config = {
1655 kPQ_16Bit, /* inputAFormat */
1656 0, /* inputAPrescale */
1657 kPQ_16Bit, /* inputBFormat */
1658 0, /* inputBPrescale */
1659 kPQ_16Bit, /* outputFormat */
1660 (int8_t)shift, /* outputPrescale */
1661 kPQ_Float, /* tmpFormat */
1662 0, /* tmpPrescale */
1663 kPQ_Float, /* machineFormat */
1664 (uint32_t *)0xe0000000U, /* tmpBase */
1665 };
1666
1667 #ifdef ARM_MATH_MATRIX_CHECK
1668 /* Check for matrix mismatch condition */
1669 if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
1670 {
1671 /* Set status as ARM_MATH_SIZE_MISMATCH */
1672 status = ARM_MATH_SIZE_MISMATCH;
1673 }
1674 else
1675 #endif
1676 {
1677 length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
1678
1679 scaleFloat = PQ_Q15_2_FLOAT(scaleFract);
1680
1681 PQ_SetConfig(POWERQUAD, &config);
1682
1683 PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData);
1684
1685 /* Wait for the completion */
1686 PQ_WaitDone(POWERQUAD);
1687
1688 status = ARM_MATH_SUCCESS;
1689 }
1690
1691 /* Return to application */
1692 return status;
1693 }
1694