1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_rfft_q31.c
4 * Description: FFT & RIFFT Q31 process function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/transform_functions.h"
30
31 /* ----------------------------------------------------------------------
32 * Internal functions prototypes
33 * -------------------------------------------------------------------- */
34
35 ARM_DSP_ATTRIBUTE void arm_split_rfft_q31(
36 q31_t * pSrc,
37 uint32_t fftLen,
38 const q31_t * pATable,
39 const q31_t * pBTable,
40 q31_t * pDst,
41 uint32_t modifier);
42
43 ARM_DSP_ATTRIBUTE void arm_split_rifft_q31(
44 q31_t * pSrc,
45 uint32_t fftLen,
46 const q31_t * pATable,
47 const q31_t * pBTable,
48 q31_t * pDst,
49 uint32_t modifier);
50
51 /**
52 @addtogroup RealFFTQ31
53 @{
54 */
55
56 /**
57 @brief Processing function for the Q31 RFFT/RIFFT.
58 @param[in] S points to an instance of the Q31 RFFT/RIFFT structure
59 @param[in] pSrc points to input buffer (Source buffer is modified by this function)
60 @param[out] pDst points to output buffer
61
62 @par Input an output formats
63 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
64 Hence the output format is different for different RFFT sizes.
65 The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
66 @par Input and Output formats for RFFT Q31
67
68 | RFFT Size | Input Format | Output Format | Number of bits to upscale |
69 | ---------: | ------------: | -------------: | ------------------------: |
70 | 32 | 1.31 | 6.26 | 5 |
71 | 64 | 1.31 | 7.25 | 6 |
72 | 128 | 1.31 | 8.24 | 7 |
73 | 256 | 1.31 | 9.23 | 8 |
74 | 512 | 1.31 | 10.22 | 9 |
75 | 1024 | 1.31 | 11.21 | 10 |
76 | 2048 | 1.31 | 12.20 | 11 |
77 | 4096 | 1.31 | 13.19 | 12 |
78 | 8192 | 1.31 | 14.18 | 13 |
79
80 @par Input and Output formats for RIFFT Q31
81
82 | RIFFT Size | Input Format | Output Format | Number of bits to upscale |
83 | ----------: | ------------: | -------------: | ------------------------: |
84 | 32 | 1.31 | 6.26 | 0 |
85 | 64 | 1.31 | 7.25 | 0 |
86 | 128 | 1.31 | 8.24 | 0 |
87 | 256 | 1.31 | 9.23 | 0 |
88 | 512 | 1.31 | 10.22 | 0 |
89 | 1024 | 1.31 | 11.21 | 0 |
90 | 2048 | 1.31 | 12.20 | 0 |
91 | 4096 | 1.31 | 13.19 | 0 |
92 | 8192 | 1.31 | 14.18 | 0 |
93
94 @par
95 If the input buffer is of length N (fftLenReal), the output buffer must have length 2N
96 since it is containing the conjugate part (except for MVE version where N+2 is enough).
97 The input buffer is modified by this function.
98 @par
99 For the RIFFT, the source buffer must have length N+2 since the Nyquist frequency value
100 is needed but conjugate part is ignored.
101 It is not using the packing trick of the float version.
102
103 */
104
arm_rfft_q31(const arm_rfft_instance_q31 * S,q31_t * pSrc,q31_t * pDst)105 ARM_DSP_ATTRIBUTE void arm_rfft_q31(
106 const arm_rfft_instance_q31 * S,
107 q31_t * pSrc,
108 q31_t * pDst)
109 {
110 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
111 const arm_cfft_instance_q31 *S_CFFT = &(S->cfftInst);
112 #else
113 const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
114 #endif
115 uint32_t L2 = S->fftLenReal >> 1U;
116
117 /* Calculation of RIFFT of input */
118 if (S->ifftFlagR == 1U)
119 {
120 /* Real IFFT core process */
121 arm_split_rifft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
122
123 /* Complex IFFT process */
124 arm_cfft_q31 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
125
126 arm_shift_q31(pDst, 1, pDst, S->fftLenReal);
127 }
128 else
129 {
130 /* Calculation of RFFT of input */
131
132 /* Complex FFT process */
133 arm_cfft_q31 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
134
135 /* Real FFT core process */
136 arm_split_rfft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
137 }
138
139 }
140
141 /**
142 @} end of RealFFTQ31 group
143 */
144
145 /**
146 @brief Core Real FFT process
147 @param[in] pSrc points to input buffer
148 @param[in] fftLen length of FFT
149 @param[in] pATable points to twiddle Coef A buffer
150 @param[in] pBTable points to twiddle Coef B buffer
151 @param[out] pDst points to output buffer
152 @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
153 */
154
155 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
156
157 #include "arm_helium_utils.h"
158 #include "arm_vec_fft.h"
159
160 #if defined(ARM_DSP_BUILT_WITH_GCC)
161
162 #define MVE_CMPLX_MULT_FX_AxB_S32(A,B) vqdmladhxq_s32(vqdmlsdhq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
163 #define MVE_CMPLX_MULT_FX_AxConjB_S32(A,B) vqdmladhq_s32(vqdmlsdhxq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
164
165 #endif
166
arm_split_rfft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)167 ARM_DSP_ATTRIBUTE void arm_split_rfft_q31(
168 q31_t *pSrc,
169 uint32_t fftLen,
170 const q31_t *pATable,
171 const q31_t *pBTable,
172 q31_t *pDst,
173 uint32_t modifier)
174 {
175 uint32_t i; /* Loop Counter */
176 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
177 q31_t *pOut1 = &pDst[2];
178 q31_t *pIn1 = &pSrc[2];
179 uint32x4_t offset = { 2, 3, 0, 1 };
180 uint32x4_t offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 };
181
182 offset = offset + (2 * fftLen - 4);
183
184
185 /* Init coefficient pointers */
186 pCoefA = &pATable[modifier * 2];
187 pCoefB = &pBTable[modifier * 2];
188
189 const q31_t *pCoefAb, *pCoefBb;
190 pCoefAb = pCoefA;
191 pCoefBb = pCoefB;
192
193 pIn1 = &pSrc[2];
194
195 i = fftLen - 1U;
196 i = i / 2 + 1;
197 while (i > 0U) {
198 q31x4_t in1 = vld1q_s32(pIn1);
199 q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
200 q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
201 q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
202 #if defined(ARM_DSP_BUILT_WITH_GCC)
203 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2));
204 #else
205 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q31x4_t),
206 MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q31x4_t));
207 #endif
208 vst1q(pOut1, out);
209 pOut1 += 4;
210
211 offsetCoef += modifier * 4;
212 offset -= 4;
213
214 pIn1 += 4;
215 i -= 1;
216 }
217
218 pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
219 pDst[2 * fftLen + 1] = 0;
220
221 pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
222 pDst[1] = 0;
223 }
224 #else
arm_split_rfft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)225 ARM_DSP_ATTRIBUTE void arm_split_rfft_q31(
226 q31_t * pSrc,
227 uint32_t fftLen,
228 const q31_t * pATable,
229 const q31_t * pBTable,
230 q31_t * pDst,
231 uint32_t modifier)
232 {
233 uint32_t i; /* Loop Counter */
234 q31_t outR, outI; /* Temporary variables for output */
235 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
236 q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
237 q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[4 * fftLen - 1];
238 q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[2 * fftLen - 1];
239
240 /* Init coefficient pointers */
241 pCoefA = &pATable[modifier * 2];
242 pCoefB = &pBTable[modifier * 2];
243
244 i = fftLen - 1U;
245
246 while (i > 0U)
247 {
248 /*
249 outR = ( pSrc[2 * i] * pATable[2 * i]
250 - pSrc[2 * i + 1] * pATable[2 * i + 1]
251 + pSrc[2 * n - 2 * i] * pBTable[2 * i]
252 + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
253
254 outI = ( pIn[2 * i + 1] * pATable[2 * i]
255 + pIn[2 * i] * pATable[2 * i + 1]
256 + pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
257 - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
258 */
259
260 CoefA1 = *pCoefA++;
261 CoefA2 = *pCoefA;
262
263 /* outR = (pSrc[2 * i] * pATable[2 * i] */
264 mult_32x32_keep32_R (outR, *pIn1, CoefA1);
265
266 /* outI = pIn[2 * i] * pATable[2 * i + 1] */
267 mult_32x32_keep32_R (outI, *pIn1++, CoefA2);
268
269 /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
270 multSub_32x32_keep32_R (outR, *pIn1, CoefA2);
271
272 /* (pIn[2 * i + 1] * pATable[2 * i] */
273 multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
274
275 /* pSrc[2 * n - 2 * i] * pBTable[2 * i] */
276 multSub_32x32_keep32_R (outR, *pIn2, CoefA2);
277 CoefB1 = *pCoefB;
278
279 /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
280 multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
281
282 /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
283 multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
284
285 /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
286 multSub_32x32_keep32_R (outI, *pIn2--, CoefA2);
287
288 /* write output */
289 *pOut1++ = outR;
290 *pOut1++ = outI;
291
292 /* write complex conjugate output */
293 *pOut2-- = -outI;
294 *pOut2-- = outR;
295
296 /* update coefficient pointer */
297 pCoefB = pCoefB + (2 * modifier);
298 pCoefA = pCoefA + (2 * modifier - 1);
299
300 /* Decrement loop count */
301 i--;
302 }
303
304 pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
305 pDst[2 * fftLen + 1] = 0;
306
307 pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
308 pDst[1] = 0;
309 }
310 #endif /* defined(ARM_MATH_MVEI) */
311
312 /**
313 @brief Core Real IFFT process
314 @param[in] pSrc points to input buffer
315 @param[in] fftLen length of FFT
316 @param[in] pATable points to twiddle Coef A buffer
317 @param[in] pBTable points to twiddle Coef B buffer
318 @param[out] pDst points to output buffer
319 @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
320 */
321
322 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
323
arm_split_rifft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)324 ARM_DSP_ATTRIBUTE void arm_split_rifft_q31(
325 q31_t * pSrc,
326 uint32_t fftLen,
327 const q31_t * pATable,
328 const q31_t * pBTable,
329 q31_t * pDst,
330 uint32_t modifier)
331 {
332 uint32_t i; /* Loop Counter */
333 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
334 q31_t *pIn1;
335 uint32x4_t offset = { 2, 3, 0, 1 };
336 uint32x4_t offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 };
337 int32x4_t conj = { 1, -1, 1, -1 };
338
339 offset = offset + (2 * fftLen - 2);
340
341 /* Init coefficient pointers */
342 pCoefA = &pATable[0];
343 pCoefB = &pBTable[0];
344
345 const q31_t *pCoefAb, *pCoefBb;
346 pCoefAb = pCoefA;
347 pCoefBb = pCoefB;
348
349 pIn1 = &pSrc[0];
350
351 i = fftLen;
352 i = i >> 1;
353 while (i > 0U) {
354 q31x4_t in1 = vld1q_s32(pIn1);
355 q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
356 q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
357 q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
358
359 /* can we avoid the conjugate here ? */
360 #if defined(ARM_DSP_BUILT_WITH_GCC)
361 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA),
362 vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB)));
363 #else
364 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q31x4_t),
365 vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q31x4_t)));
366 #endif
367 vst1q_s32(pDst, out);
368 pDst += 4;
369
370 offsetCoef += modifier * 4;
371 offset -= 4;
372
373 pIn1 += 4;
374 i -= 1;
375 }
376 }
377 #else
arm_split_rifft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)378 ARM_DSP_ATTRIBUTE void arm_split_rifft_q31(
379 q31_t * pSrc,
380 uint32_t fftLen,
381 const q31_t * pATable,
382 const q31_t * pBTable,
383 q31_t * pDst,
384 uint32_t modifier)
385 {
386 q31_t outR, outI; /* Temporary variables for output */
387 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
388 q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
389 q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[2 * fftLen + 1];
390
391 pCoefA = &pATable[0];
392 pCoefB = &pBTable[0];
393
394 while (fftLen > 0U)
395 {
396 /*
397 outR = ( pIn[2 * i] * pATable[2 * i]
398 + pIn[2 * i + 1] * pATable[2 * i + 1]
399 + pIn[2 * n - 2 * i] * pBTable[2 * i]
400 - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
401
402 outI = ( pIn[2 * i + 1] * pATable[2 * i]
403 - pIn[2 * i] * pATable[2 * i + 1]
404 - pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
405 - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
406 */
407
408 CoefA1 = *pCoefA++;
409 CoefA2 = *pCoefA;
410
411 /* outR = (pIn[2 * i] * pATable[2 * i] */
412 mult_32x32_keep32_R (outR, *pIn1, CoefA1);
413
414 /* - pIn[2 * i] * pATable[2 * i + 1] */
415 mult_32x32_keep32_R (outI, *pIn1++, -CoefA2);
416
417 /* pIn[2 * i + 1] * pATable[2 * i + 1] */
418 multAcc_32x32_keep32_R (outR, *pIn1, CoefA2);
419
420 /* pIn[2 * i + 1] * pATable[2 * i] */
421 multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
422
423 /* pIn[2 * n - 2 * i] * pBTable[2 * i] */
424 multAcc_32x32_keep32_R (outR, *pIn2, CoefA2);
425 CoefB1 = *pCoefB;
426
427 /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
428 multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
429
430 /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
431 multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
432
433 /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
434 multAcc_32x32_keep32_R (outI, *pIn2--, CoefA2);
435
436 /* write output */
437 *pDst++ = outR;
438 *pDst++ = outI;
439
440 /* update coefficient pointer */
441 pCoefB = pCoefB + (modifier * 2);
442 pCoefA = pCoefA + (modifier * 2 - 1);
443
444 /* Decrement loop count */
445 fftLen--;
446 }
447
448 }
449
450 #endif /* defined(ARM_MATH_MVEI) */
451