1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q31.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/transform_functions.h"
31
32 void arm_radix4_butterfly_inverse_q31(
33 q31_t * pSrc,
34 uint32_t fftLen,
35 const q31_t * pCoef,
36 uint32_t twidCoefModifier);
37
38 void arm_radix4_butterfly_q31(
39 q31_t * pSrc,
40 uint32_t fftLen,
41 const q31_t * pCoef,
42 uint32_t twidCoefModifier);
43
44 void arm_bitreversal_q31(
45 q31_t * pSrc,
46 uint32_t fftLen,
47 uint16_t bitRevFactor,
48 const uint16_t * pBitRevTab);
49
50
51 /**
52 @addtogroup ComplexFFTDeprecated
53 @{
54 */
55
56 /**
57 @brief Processing function for the Q31 CFFT/CIFFT.
58 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
59 @param[in] S points to an instance of the Q31 CFFT/CIFFT structure
60 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
61 @return none
62
63 @par Input and output formats:
64 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
65 Hence the output format is different for different FFT sizes.
66 The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
67 @par
68
69 | CFFT Size | Input format | Output format | Number of bits to upscale |
70 | --------: | ------------: | ------------: | ------------------------: |
71 | 16 | 1.31 | 5.27 | 4 |
72 | 64 | 1.31 | 7.25 | 6 |
73 | 256 | 1.31 | 9.23 | 8 |
74 | 1024 | 1.31 | 11.21 | 10 |
75
76 | CIFFT Size | Input format | Output format | Number of bits to upscale |
77 | ---------: | ------------: | ------------: | ------------------------: |
78 | 16 | 1.31 | 5.27 | 0 |
79 | 64 | 1.31 | 7.25 | 0 |
80 | 256 | 1.31 | 9.23 | 0 |
81 | 1024 | 1.31 | 11.21 | 0 |
82
83 */
84
arm_cfft_radix4_q31(const arm_cfft_radix4_instance_q31 * S,q31_t * pSrc)85 void arm_cfft_radix4_q31(
86 const arm_cfft_radix4_instance_q31 * S,
87 q31_t * pSrc)
88 {
89 if (S->ifftFlag == 1U)
90 {
91 /* Complex IFFT radix-4 */
92 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
93 }
94 else
95 {
96 /* Complex FFT radix-4 */
97 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
98 }
99
100 if (S->bitReverseFlag == 1U)
101 {
102 /* Bit Reversal */
103 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
104 }
105
106 }
107
108 /**
109 @} end of ComplexFFTDeprecated group
110 */
111
112 /*
113 * Radix-4 FFT algorithm used is :
114 *
115 * Input real and imaginary data:
116 * x(n) = xa + j * ya
117 * x(n+N/4 ) = xb + j * yb
118 * x(n+N/2 ) = xc + j * yc
119 * x(n+3N 4) = xd + j * yd
120 *
121 *
122 * Output real and imaginary data:
123 * x(4r) = xa'+ j * ya'
124 * x(4r+1) = xb'+ j * yb'
125 * x(4r+2) = xc'+ j * yc'
126 * x(4r+3) = xd'+ j * yd'
127 *
128 *
129 * Twiddle factors for radix-4 FFT:
130 * Wn = co1 + j * (- si1)
131 * W2n = co2 + j * (- si2)
132 * W3n = co3 + j * (- si3)
133 *
134 * Butterfly implementation:
135 * xa' = xa + xb + xc + xd
136 * ya' = ya + yb + yc + yd
137 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
138 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
139 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
140 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
141 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
142 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
143 *
144 */
145
146 /**
147 @brief Core function for the Q31 CFFT butterfly process.
148 @param[in,out] pSrc points to the in-place buffer of Q31 data type.
149 @param[in] fftLen length of the FFT.
150 @param[in] pCoef points to twiddle coefficient buffer.
151 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
152 @return none
153 */
154
arm_radix4_butterfly_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pCoef,uint32_t twidCoefModifier)155 void arm_radix4_butterfly_q31(
156 q31_t * pSrc,
157 uint32_t fftLen,
158 const q31_t * pCoef,
159 uint32_t twidCoefModifier)
160 {
161 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
162 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
163
164 q31_t xa, xb, xc, xd;
165 q31_t ya, yb, yc, yd;
166 q31_t xa_out, xb_out, xc_out, xd_out;
167 q31_t ya_out, yb_out, yc_out, yd_out;
168
169 q31_t *ptr1;
170
171 /* Total process is divided into three stages */
172
173 /* process first stage, middle stages, & last stage */
174
175
176 /* start of first stage process */
177
178 /* Initializations for the first stage */
179 n2 = fftLen;
180 n1 = n2;
181 /* n2 = fftLen/4 */
182 n2 >>= 2U;
183 i0 = 0U;
184 ia1 = 0U;
185
186 j = n2;
187
188 /* Calculation of first stage */
189 do
190 {
191 /* index calculation for the input as, */
192 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
193 i1 = i0 + n2;
194 i2 = i1 + n2;
195 i3 = i2 + n2;
196
197 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
198
199 /* Butterfly implementation */
200 /* xa + xc */
201 r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U);
202 /* xa - xc */
203 r2 = (pSrc[(2U * i0)] >> 4U) - (pSrc[(2U * i2)] >> 4U);
204
205 /* xb + xd */
206 t1 = (pSrc[(2U * i1)] >> 4U) + (pSrc[(2U * i3)] >> 4U);
207
208 /* ya + yc */
209 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
210 /* ya - yc */
211 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
212
213 /* xa' = xa + xb + xc + xd */
214 pSrc[2U * i0] = (r1 + t1);
215 /* (xa + xc) - (xb + xd) */
216 r1 = r1 - t1;
217 /* yb + yd */
218 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
219
220 /* ya' = ya + yb + yc + yd */
221 pSrc[(2U * i0) + 1U] = (s1 + t2);
222
223 /* (ya + yc) - (yb + yd) */
224 s1 = s1 - t2;
225
226 /* yb - yd */
227 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
228 /* xb - xd */
229 t2 = (pSrc[(2U * i1)] >> 4U) - (pSrc[(2U * i3)] >> 4U);
230
231 /* index calculation for the coefficients */
232 ia2 = 2U * ia1;
233 co2 = pCoef[(ia2 * 2U)];
234 si2 = pCoef[(ia2 * 2U) + 1U];
235
236 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
237 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
238 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
239
240 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
241 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
242 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
243
244 /* (xa - xc) + (yb - yd) */
245 r1 = r2 + t1;
246 /* (xa - xc) - (yb - yd) */
247 r2 = r2 - t1;
248
249 /* (ya - yc) - (xb - xd) */
250 s1 = s2 - t2;
251 /* (ya - yc) + (xb - xd) */
252 s2 = s2 + t2;
253
254 co1 = pCoef[(ia1 * 2U)];
255 si1 = pCoef[(ia1 * 2U) + 1U];
256
257 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
258 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
259 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
260
261 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
262 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
263 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
264
265 /* index calculation for the coefficients */
266 ia3 = 3U * ia1;
267 co3 = pCoef[(ia3 * 2U)];
268 si3 = pCoef[(ia3 * 2U) + 1U];
269
270 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
271 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
272 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
273
274 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
275 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
276 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
277
278 /* Twiddle coefficients index modifier */
279 ia1 = ia1 + twidCoefModifier;
280
281 /* Updating input index */
282 i0 = i0 + 1U;
283
284 } while (--j);
285
286 /* end of first stage process */
287
288 /* data is in 5.27(q27) format */
289
290
291 /* start of Middle stages process */
292
293
294 /* each stage in middle stages provides two down scaling of the input */
295
296 twidCoefModifier <<= 2U;
297
298
299 for (k = fftLen / 4U; k > 4U; k >>= 2U)
300 {
301 /* Initializations for the first stage */
302 n1 = n2;
303 n2 >>= 2U;
304 ia1 = 0U;
305
306 /* Calculation of first stage */
307 for (j = 0U; j <= (n2 - 1U); j++)
308 {
309 /* index calculation for the coefficients */
310 ia2 = ia1 + ia1;
311 ia3 = ia2 + ia1;
312 co1 = pCoef[(ia1 * 2U)];
313 si1 = pCoef[(ia1 * 2U) + 1U];
314 co2 = pCoef[(ia2 * 2U)];
315 si2 = pCoef[(ia2 * 2U) + 1U];
316 co3 = pCoef[(ia3 * 2U)];
317 si3 = pCoef[(ia3 * 2U) + 1U];
318 /* Twiddle coefficients index modifier */
319 ia1 = ia1 + twidCoefModifier;
320
321 for (i0 = j; i0 < fftLen; i0 += n1)
322 {
323 /* index calculation for the input as, */
324 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
325 i1 = i0 + n2;
326 i2 = i1 + n2;
327 i3 = i2 + n2;
328
329 /* Butterfly implementation */
330 /* xa + xc */
331 r1 = pSrc[2U * i0] + pSrc[2U * i2];
332 /* xa - xc */
333 r2 = pSrc[2U * i0] - pSrc[2U * i2];
334
335 /* ya + yc */
336 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
337 /* ya - yc */
338 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
339
340 /* xb + xd */
341 t1 = pSrc[2U * i1] + pSrc[2U * i3];
342
343 /* xa' = xa + xb + xc + xd */
344 pSrc[2U * i0] = (r1 + t1) >> 2U;
345 /* xa + xc -(xb + xd) */
346 r1 = r1 - t1;
347
348 /* yb + yd */
349 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
350 /* ya' = ya + yb + yc + yd */
351 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
352
353 /* (ya + yc) - (yb + yd) */
354 s1 = s1 - t2;
355
356 /* (yb - yd) */
357 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
358 /* (xb - xd) */
359 t2 = pSrc[2U * i1] - pSrc[2U * i3];
360
361 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
362 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
363 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
364
365 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
366 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
367 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
368
369 /* (xa - xc) + (yb - yd) */
370 r1 = r2 + t1;
371 /* (xa - xc) - (yb - yd) */
372 r2 = r2 - t1;
373
374 /* (ya - yc) - (xb - xd) */
375 s1 = s2 - t2;
376 /* (ya - yc) + (xb - xd) */
377 s2 = s2 + t2;
378
379 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
380 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
381 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
382
383 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
384 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
385 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
386
387 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
388 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
389 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
390
391 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
392 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
393 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
394 }
395 }
396 twidCoefModifier <<= 2U;
397 }
398
399 /* End of Middle stages process */
400
401 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
402 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
403 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
404 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
405
406
407 /* start of Last stage process */
408 /* Initializations for the last stage */
409 j = fftLen >> 2;
410 ptr1 = &pSrc[0];
411
412 /* Calculations of last stage */
413 do
414 {
415 /* Read xa (real), ya(imag) input */
416 xa = *ptr1++;
417 ya = *ptr1++;
418
419 /* Read xb (real), yb(imag) input */
420 xb = *ptr1++;
421 yb = *ptr1++;
422
423 /* Read xc (real), yc(imag) input */
424 xc = *ptr1++;
425 yc = *ptr1++;
426
427 /* Read xc (real), yc(imag) input */
428 xd = *ptr1++;
429 yd = *ptr1++;
430
431 /* xa' = xa + xb + xc + xd */
432 xa_out = xa + xb + xc + xd;
433
434 /* ya' = ya + yb + yc + yd */
435 ya_out = ya + yb + yc + yd;
436
437 /* pointer updation for writing */
438 ptr1 = ptr1 - 8U;
439
440 /* writing xa' and ya' */
441 *ptr1++ = xa_out;
442 *ptr1++ = ya_out;
443
444 xc_out = (xa - xb + xc - xd);
445 yc_out = (ya - yb + yc - yd);
446
447 /* writing xc' and yc' */
448 *ptr1++ = xc_out;
449 *ptr1++ = yc_out;
450
451 xb_out = (xa + yb - xc - yd);
452 yb_out = (ya - xb - yc + xd);
453
454 /* writing xb' and yb' */
455 *ptr1++ = xb_out;
456 *ptr1++ = yb_out;
457
458 xd_out = (xa - yb - xc + yd);
459 yd_out = (ya + xb - yc - xd);
460
461 /* writing xd' and yd' */
462 *ptr1++ = xd_out;
463 *ptr1++ = yd_out;
464
465
466 } while (--j);
467
468 /* output is in 11.21(q21) format for the 1024 point */
469 /* output is in 9.23(q23) format for the 256 point */
470 /* output is in 7.25(q25) format for the 64 point */
471 /* output is in 5.27(q27) format for the 16 point */
472
473 /* End of last stage process */
474
475 }
476
477
478 /**
479 @brief Core function for the Q31 CIFFT butterfly process.
480 @param[in,out] pSrc points to the in-place buffer of Q31 data type.
481 @param[in] fftLen length of the FFT.
482 @param[in] pCoef points to twiddle coefficient buffer.
483 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
484 @return none
485 */
486
487 /*
488 * Radix-4 IFFT algorithm used is :
489 *
490 * CIFFT uses same twiddle coefficients as CFFT Function
491 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
492 *
493 *
494 * IFFT is implemented with following changes in equations from FFT
495 *
496 * Input real and imaginary data:
497 * x(n) = xa + j * ya
498 * x(n+N/4 ) = xb + j * yb
499 * x(n+N/2 ) = xc + j * yc
500 * x(n+3N 4) = xd + j * yd
501 *
502 *
503 * Output real and imaginary data:
504 * x(4r) = xa'+ j * ya'
505 * x(4r+1) = xb'+ j * yb'
506 * x(4r+2) = xc'+ j * yc'
507 * x(4r+3) = xd'+ j * yd'
508 *
509 *
510 * Twiddle factors for radix-4 IFFT:
511 * Wn = co1 + j * (si1)
512 * W2n = co2 + j * (si2)
513 * W3n = co3 + j * (si3)
514
515 * The real and imaginary output values for the radix-4 butterfly are
516 * xa' = xa + xb + xc + xd
517 * ya' = ya + yb + yc + yd
518 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
519 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
520 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
521 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
522 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
523 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
524 *
525 */
526
arm_radix4_butterfly_inverse_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pCoef,uint32_t twidCoefModifier)527 void arm_radix4_butterfly_inverse_q31(
528 q31_t * pSrc,
529 uint32_t fftLen,
530 const q31_t * pCoef,
531 uint32_t twidCoefModifier)
532 {
533 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
534 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
535 q31_t xa, xb, xc, xd;
536 q31_t ya, yb, yc, yd;
537 q31_t xa_out, xb_out, xc_out, xd_out;
538 q31_t ya_out, yb_out, yc_out, yd_out;
539
540 q31_t *ptr1;
541
542 /* input is be 1.31(q31) format for all FFT sizes */
543 /* Total process is divided into three stages */
544 /* process first stage, middle stages, & last stage */
545
546 /* Start of first stage process */
547
548 /* Initializations for the first stage */
549 n2 = fftLen;
550 n1 = n2;
551 /* n2 = fftLen/4 */
552 n2 >>= 2U;
553 i0 = 0U;
554 ia1 = 0U;
555
556 j = n2;
557
558 do
559 {
560 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
561
562 /* index calculation for the input as, */
563 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
564 i1 = i0 + n2;
565 i2 = i1 + n2;
566 i3 = i2 + n2;
567
568 /* Butterfly implementation */
569 /* xa + xc */
570 r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U);
571 /* xa - xc */
572 r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
573
574 /* xb + xd */
575 t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
576
577 /* ya + yc */
578 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
579 /* ya - yc */
580 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
581
582 /* xa' = xa + xb + xc + xd */
583 pSrc[2U * i0] = (r1 + t1);
584 /* (xa + xc) - (xb + xd) */
585 r1 = r1 - t1;
586 /* yb + yd */
587 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
588 /* ya' = ya + yb + yc + yd */
589 pSrc[(2U * i0) + 1U] = (s1 + t2);
590
591 /* (ya + yc) - (yb + yd) */
592 s1 = s1 - t2;
593
594 /* yb - yd */
595 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
596 /* xb - xd */
597 t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
598
599 /* index calculation for the coefficients */
600 ia2 = 2U * ia1;
601 co2 = pCoef[ia2 * 2U];
602 si2 = pCoef[(ia2 * 2U) + 1U];
603
604 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
605 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
606 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
607
608 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
609 pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
610 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
611
612 /* (xa - xc) - (yb - yd) */
613 r1 = r2 - t1;
614 /* (xa - xc) + (yb - yd) */
615 r2 = r2 + t1;
616
617 /* (ya - yc) + (xb - xd) */
618 s1 = s2 + t2;
619 /* (ya - yc) - (xb - xd) */
620 s2 = s2 - t2;
621
622 co1 = pCoef[ia1 * 2U];
623 si1 = pCoef[(ia1 * 2U) + 1U];
624
625 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
626 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
627 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
628
629 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
630 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
631 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
632
633 /* index calculation for the coefficients */
634 ia3 = 3U * ia1;
635 co3 = pCoef[ia3 * 2U];
636 si3 = pCoef[(ia3 * 2U) + 1U];
637
638 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
639 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
640 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
641
642 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
643 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
644 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
645
646 /* Twiddle coefficients index modifier */
647 ia1 = ia1 + twidCoefModifier;
648
649 /* Updating input index */
650 i0 = i0 + 1U;
651
652 } while (--j);
653
654 /* data is in 5.27(q27) format */
655 /* each stage provides two down scaling of the input */
656
657
658 /* Start of Middle stages process */
659
660 twidCoefModifier <<= 2U;
661
662 /* Calculation of second stage to excluding last stage */
663 for (k = fftLen / 4U; k > 4U; k >>= 2U)
664 {
665 /* Initializations for the first stage */
666 n1 = n2;
667 n2 >>= 2U;
668 ia1 = 0U;
669
670 for (j = 0; j <= (n2 - 1U); j++)
671 {
672 /* index calculation for the coefficients */
673 ia2 = ia1 + ia1;
674 ia3 = ia2 + ia1;
675 co1 = pCoef[(ia1 * 2U)];
676 si1 = pCoef[(ia1 * 2U) + 1U];
677 co2 = pCoef[(ia2 * 2U)];
678 si2 = pCoef[(ia2 * 2U) + 1U];
679 co3 = pCoef[(ia3 * 2U)];
680 si3 = pCoef[(ia3 * 2U) + 1U];
681 /* Twiddle coefficients index modifier */
682 ia1 = ia1 + twidCoefModifier;
683
684 for (i0 = j; i0 < fftLen; i0 += n1)
685 {
686 /* index calculation for the input as, */
687 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
688 i1 = i0 + n2;
689 i2 = i1 + n2;
690 i3 = i2 + n2;
691
692 /* Butterfly implementation */
693 /* xa + xc */
694 r1 = pSrc[2U * i0] + pSrc[2U * i2];
695 /* xa - xc */
696 r2 = pSrc[2U * i0] - pSrc[2U * i2];
697
698 /* ya + yc */
699 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
700 /* ya - yc */
701 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
702
703 /* xb + xd */
704 t1 = pSrc[2U * i1] + pSrc[2U * i3];
705
706 /* xa' = xa + xb + xc + xd */
707 pSrc[2U * i0] = (r1 + t1) >> 2U;
708 /* xa + xc -(xb + xd) */
709 r1 = r1 - t1;
710 /* yb + yd */
711 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
712 /* ya' = ya + yb + yc + yd */
713 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
714
715 /* (ya + yc) - (yb + yd) */
716 s1 = s1 - t2;
717
718 /* (yb - yd) */
719 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
720 /* (xb - xd) */
721 t2 = pSrc[2U * i1] - pSrc[2U * i3];
722
723 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
724 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
725 ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
726
727 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
728 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
729 ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
730
731 /* (xa - xc) - (yb - yd) */
732 r1 = r2 - t1;
733 /* (xa - xc) + (yb - yd) */
734 r2 = r2 + t1;
735
736 /* (ya - yc) + (xb - xd) */
737 s1 = s2 + t2;
738 /* (ya - yc) - (xb - xd) */
739 s2 = s2 - t2;
740
741 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
742 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
743 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
744
745 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
746 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
747 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
748
749 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
750 pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
751 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
752
753 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
754 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
755 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
756 }
757 }
758 twidCoefModifier <<= 2U;
759 }
760
761 /* End of Middle stages process */
762
763 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
764 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
765 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
766 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
767
768
769 /* Start of last stage process */
770
771
772 /* Initializations for the last stage */
773 j = fftLen >> 2;
774 ptr1 = &pSrc[0];
775
776 /* Calculations of last stage */
777 do
778 {
779 /* Read xa (real), ya(imag) input */
780 xa = *ptr1++;
781 ya = *ptr1++;
782
783 /* Read xb (real), yb(imag) input */
784 xb = *ptr1++;
785 yb = *ptr1++;
786
787 /* Read xc (real), yc(imag) input */
788 xc = *ptr1++;
789 yc = *ptr1++;
790
791 /* Read xc (real), yc(imag) input */
792 xd = *ptr1++;
793 yd = *ptr1++;
794
795 /* xa' = xa + xb + xc + xd */
796 xa_out = xa + xb + xc + xd;
797
798 /* ya' = ya + yb + yc + yd */
799 ya_out = ya + yb + yc + yd;
800
801 /* pointer updation for writing */
802 ptr1 = ptr1 - 8U;
803
804 /* writing xa' and ya' */
805 *ptr1++ = xa_out;
806 *ptr1++ = ya_out;
807
808 xc_out = (xa - xb + xc - xd);
809 yc_out = (ya - yb + yc - yd);
810
811 /* writing xc' and yc' */
812 *ptr1++ = xc_out;
813 *ptr1++ = yc_out;
814
815 xb_out = (xa - yb - xc + yd);
816 yb_out = (ya + xb - yc - xd);
817
818 /* writing xb' and yb' */
819 *ptr1++ = xb_out;
820 *ptr1++ = yb_out;
821
822 xd_out = (xa + yb - xc - yd);
823 yd_out = (ya - xb - yc + xd);
824
825 /* writing xd' and yd' */
826 *ptr1++ = xd_out;
827 *ptr1++ = yd_out;
828
829 } while (--j);
830
831 /* output is in 11.21(q21) format for the 1024 point */
832 /* output is in 9.23(q23) format for the 256 point */
833 /* output is in 7.25(q25) format for the 64 point */
834 /* output is in 5.27(q27) format for the 16 point */
835
836 /* End of last stage process */
837 }
838