1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q31.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/transform_functions.h"
31
32 void arm_radix4_butterfly_inverse_q31(
33 q31_t * pSrc,
34 uint32_t fftLen,
35 const q31_t * pCoef,
36 uint32_t twidCoefModifier);
37
38 void arm_radix4_butterfly_q31(
39 q31_t * pSrc,
40 uint32_t fftLen,
41 const q31_t * pCoef,
42 uint32_t twidCoefModifier);
43
44 void arm_bitreversal_q31(
45 q31_t * pSrc,
46 uint32_t fftLen,
47 uint16_t bitRevFactor,
48 const uint16_t * pBitRevTab);
49
50 /**
51 @ingroup groupTransforms
52 */
53
54 /**
55 @addtogroup ComplexFFT
56 @{
57 */
58
59 /**
60 @brief Processing function for the Q31 CFFT/CIFFT.
61 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
62 @param[in] S points to an instance of the Q31 CFFT/CIFFT structure
63 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
64 @return none
65
66 @par Input and output formats:
67 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
68 Hence the output format is different for different FFT sizes.
69 The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
70 @par
71 \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
72 \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
73 */
74
arm_cfft_radix4_q31(const arm_cfft_radix4_instance_q31 * S,q31_t * pSrc)75 void arm_cfft_radix4_q31(
76 const arm_cfft_radix4_instance_q31 * S,
77 q31_t * pSrc)
78 {
79 if (S->ifftFlag == 1U)
80 {
81 /* Complex IFFT radix-4 */
82 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
83 }
84 else
85 {
86 /* Complex FFT radix-4 */
87 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
88 }
89
90 if (S->bitReverseFlag == 1U)
91 {
92 /* Bit Reversal */
93 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
94 }
95
96 }
97
98 /**
99 @} end of ComplexFFT group
100 */
101
102 /*
103 * Radix-4 FFT algorithm used is :
104 *
105 * Input real and imaginary data:
106 * x(n) = xa + j * ya
107 * x(n+N/4 ) = xb + j * yb
108 * x(n+N/2 ) = xc + j * yc
109 * x(n+3N 4) = xd + j * yd
110 *
111 *
112 * Output real and imaginary data:
113 * x(4r) = xa'+ j * ya'
114 * x(4r+1) = xb'+ j * yb'
115 * x(4r+2) = xc'+ j * yc'
116 * x(4r+3) = xd'+ j * yd'
117 *
118 *
119 * Twiddle factors for radix-4 FFT:
120 * Wn = co1 + j * (- si1)
121 * W2n = co2 + j * (- si2)
122 * W3n = co3 + j * (- si3)
123 *
124 * Butterfly implementation:
125 * xa' = xa + xb + xc + xd
126 * ya' = ya + yb + yc + yd
127 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
128 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
129 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
130 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
131 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
132 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
133 *
134 */
135
136 /**
137 @brief Core function for the Q31 CFFT butterfly process.
138 @param[in,out] pSrc points to the in-place buffer of Q31 data type.
139 @param[in] fftLen length of the FFT.
140 @param[in] pCoef points to twiddle coefficient buffer.
141 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
142 @return none
143 */
144
arm_radix4_butterfly_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pCoef,uint32_t twidCoefModifier)145 void arm_radix4_butterfly_q31(
146 q31_t * pSrc,
147 uint32_t fftLen,
148 const q31_t * pCoef,
149 uint32_t twidCoefModifier)
150 {
151 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
152 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
153
154 q31_t xa, xb, xc, xd;
155 q31_t ya, yb, yc, yd;
156 q31_t xa_out, xb_out, xc_out, xd_out;
157 q31_t ya_out, yb_out, yc_out, yd_out;
158
159 q31_t *ptr1;
160
161 /* Total process is divided into three stages */
162
163 /* process first stage, middle stages, & last stage */
164
165
166 /* start of first stage process */
167
168 /* Initializations for the first stage */
169 n2 = fftLen;
170 n1 = n2;
171 /* n2 = fftLen/4 */
172 n2 >>= 2U;
173 i0 = 0U;
174 ia1 = 0U;
175
176 j = n2;
177
178 /* Calculation of first stage */
179 do
180 {
181 /* index calculation for the input as, */
182 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
183 i1 = i0 + n2;
184 i2 = i1 + n2;
185 i3 = i2 + n2;
186
187 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
188
189 /* Butterfly implementation */
190 /* xa + xc */
191 r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U);
192 /* xa - xc */
193 r2 = (pSrc[(2U * i0)] >> 4U) - (pSrc[(2U * i2)] >> 4U);
194
195 /* xb + xd */
196 t1 = (pSrc[(2U * i1)] >> 4U) + (pSrc[(2U * i3)] >> 4U);
197
198 /* ya + yc */
199 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
200 /* ya - yc */
201 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
202
203 /* xa' = xa + xb + xc + xd */
204 pSrc[2U * i0] = (r1 + t1);
205 /* (xa + xc) - (xb + xd) */
206 r1 = r1 - t1;
207 /* yb + yd */
208 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
209
210 /* ya' = ya + yb + yc + yd */
211 pSrc[(2U * i0) + 1U] = (s1 + t2);
212
213 /* (ya + yc) - (yb + yd) */
214 s1 = s1 - t2;
215
216 /* yb - yd */
217 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
218 /* xb - xd */
219 t2 = (pSrc[(2U * i1)] >> 4U) - (pSrc[(2U * i3)] >> 4U);
220
221 /* index calculation for the coefficients */
222 ia2 = 2U * ia1;
223 co2 = pCoef[(ia2 * 2U)];
224 si2 = pCoef[(ia2 * 2U) + 1U];
225
226 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
227 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
228 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
229
230 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
231 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
232 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
233
234 /* (xa - xc) + (yb - yd) */
235 r1 = r2 + t1;
236 /* (xa - xc) - (yb - yd) */
237 r2 = r2 - t1;
238
239 /* (ya - yc) - (xb - xd) */
240 s1 = s2 - t2;
241 /* (ya - yc) + (xb - xd) */
242 s2 = s2 + t2;
243
244 co1 = pCoef[(ia1 * 2U)];
245 si1 = pCoef[(ia1 * 2U) + 1U];
246
247 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
248 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
249 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
250
251 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
252 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
253 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
254
255 /* index calculation for the coefficients */
256 ia3 = 3U * ia1;
257 co3 = pCoef[(ia3 * 2U)];
258 si3 = pCoef[(ia3 * 2U) + 1U];
259
260 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
261 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
262 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
263
264 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
265 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
266 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
267
268 /* Twiddle coefficients index modifier */
269 ia1 = ia1 + twidCoefModifier;
270
271 /* Updating input index */
272 i0 = i0 + 1U;
273
274 } while (--j);
275
276 /* end of first stage process */
277
278 /* data is in 5.27(q27) format */
279
280
281 /* start of Middle stages process */
282
283
284 /* each stage in middle stages provides two down scaling of the input */
285
286 twidCoefModifier <<= 2U;
287
288
289 for (k = fftLen / 4U; k > 4U; k >>= 2U)
290 {
291 /* Initializations for the first stage */
292 n1 = n2;
293 n2 >>= 2U;
294 ia1 = 0U;
295
296 /* Calculation of first stage */
297 for (j = 0U; j <= (n2 - 1U); j++)
298 {
299 /* index calculation for the coefficients */
300 ia2 = ia1 + ia1;
301 ia3 = ia2 + ia1;
302 co1 = pCoef[(ia1 * 2U)];
303 si1 = pCoef[(ia1 * 2U) + 1U];
304 co2 = pCoef[(ia2 * 2U)];
305 si2 = pCoef[(ia2 * 2U) + 1U];
306 co3 = pCoef[(ia3 * 2U)];
307 si3 = pCoef[(ia3 * 2U) + 1U];
308 /* Twiddle coefficients index modifier */
309 ia1 = ia1 + twidCoefModifier;
310
311 for (i0 = j; i0 < fftLen; i0 += n1)
312 {
313 /* index calculation for the input as, */
314 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
315 i1 = i0 + n2;
316 i2 = i1 + n2;
317 i3 = i2 + n2;
318
319 /* Butterfly implementation */
320 /* xa + xc */
321 r1 = pSrc[2U * i0] + pSrc[2U * i2];
322 /* xa - xc */
323 r2 = pSrc[2U * i0] - pSrc[2U * i2];
324
325 /* ya + yc */
326 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
327 /* ya - yc */
328 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
329
330 /* xb + xd */
331 t1 = pSrc[2U * i1] + pSrc[2U * i3];
332
333 /* xa' = xa + xb + xc + xd */
334 pSrc[2U * i0] = (r1 + t1) >> 2U;
335 /* xa + xc -(xb + xd) */
336 r1 = r1 - t1;
337
338 /* yb + yd */
339 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
340 /* ya' = ya + yb + yc + yd */
341 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
342
343 /* (ya + yc) - (yb + yd) */
344 s1 = s1 - t2;
345
346 /* (yb - yd) */
347 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
348 /* (xb - xd) */
349 t2 = pSrc[2U * i1] - pSrc[2U * i3];
350
351 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
352 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
353 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
354
355 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
356 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
357 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
358
359 /* (xa - xc) + (yb - yd) */
360 r1 = r2 + t1;
361 /* (xa - xc) - (yb - yd) */
362 r2 = r2 - t1;
363
364 /* (ya - yc) - (xb - xd) */
365 s1 = s2 - t2;
366 /* (ya - yc) + (xb - xd) */
367 s2 = s2 + t2;
368
369 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
370 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
371 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
372
373 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
374 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
375 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
376
377 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
378 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
379 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
380
381 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
382 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
383 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
384 }
385 }
386 twidCoefModifier <<= 2U;
387 }
388
389 /* End of Middle stages process */
390
391 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
392 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
393 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
394 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
395
396
397 /* start of Last stage process */
398 /* Initializations for the last stage */
399 j = fftLen >> 2;
400 ptr1 = &pSrc[0];
401
402 /* Calculations of last stage */
403 do
404 {
405 /* Read xa (real), ya(imag) input */
406 xa = *ptr1++;
407 ya = *ptr1++;
408
409 /* Read xb (real), yb(imag) input */
410 xb = *ptr1++;
411 yb = *ptr1++;
412
413 /* Read xc (real), yc(imag) input */
414 xc = *ptr1++;
415 yc = *ptr1++;
416
417 /* Read xc (real), yc(imag) input */
418 xd = *ptr1++;
419 yd = *ptr1++;
420
421 /* xa' = xa + xb + xc + xd */
422 xa_out = xa + xb + xc + xd;
423
424 /* ya' = ya + yb + yc + yd */
425 ya_out = ya + yb + yc + yd;
426
427 /* pointer updation for writing */
428 ptr1 = ptr1 - 8U;
429
430 /* writing xa' and ya' */
431 *ptr1++ = xa_out;
432 *ptr1++ = ya_out;
433
434 xc_out = (xa - xb + xc - xd);
435 yc_out = (ya - yb + yc - yd);
436
437 /* writing xc' and yc' */
438 *ptr1++ = xc_out;
439 *ptr1++ = yc_out;
440
441 xb_out = (xa + yb - xc - yd);
442 yb_out = (ya - xb - yc + xd);
443
444 /* writing xb' and yb' */
445 *ptr1++ = xb_out;
446 *ptr1++ = yb_out;
447
448 xd_out = (xa - yb - xc + yd);
449 yd_out = (ya + xb - yc - xd);
450
451 /* writing xd' and yd' */
452 *ptr1++ = xd_out;
453 *ptr1++ = yd_out;
454
455
456 } while (--j);
457
458 /* output is in 11.21(q21) format for the 1024 point */
459 /* output is in 9.23(q23) format for the 256 point */
460 /* output is in 7.25(q25) format for the 64 point */
461 /* output is in 5.27(q27) format for the 16 point */
462
463 /* End of last stage process */
464
465 }
466
467
468 /**
469 @brief Core function for the Q31 CIFFT butterfly process.
470 @param[in,out] pSrc points to the in-place buffer of Q31 data type.
471 @param[in] fftLen length of the FFT.
472 @param[in] pCoef points to twiddle coefficient buffer.
473 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
474 @return none
475 */
476
477 /*
478 * Radix-4 IFFT algorithm used is :
479 *
480 * CIFFT uses same twiddle coefficients as CFFT Function
481 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
482 *
483 *
484 * IFFT is implemented with following changes in equations from FFT
485 *
486 * Input real and imaginary data:
487 * x(n) = xa + j * ya
488 * x(n+N/4 ) = xb + j * yb
489 * x(n+N/2 ) = xc + j * yc
490 * x(n+3N 4) = xd + j * yd
491 *
492 *
493 * Output real and imaginary data:
494 * x(4r) = xa'+ j * ya'
495 * x(4r+1) = xb'+ j * yb'
496 * x(4r+2) = xc'+ j * yc'
497 * x(4r+3) = xd'+ j * yd'
498 *
499 *
500 * Twiddle factors for radix-4 IFFT:
501 * Wn = co1 + j * (si1)
502 * W2n = co2 + j * (si2)
503 * W3n = co3 + j * (si3)
504
505 * The real and imaginary output values for the radix-4 butterfly are
506 * xa' = xa + xb + xc + xd
507 * ya' = ya + yb + yc + yd
508 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
509 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
510 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
511 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
512 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
513 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
514 *
515 */
516
arm_radix4_butterfly_inverse_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pCoef,uint32_t twidCoefModifier)517 void arm_radix4_butterfly_inverse_q31(
518 q31_t * pSrc,
519 uint32_t fftLen,
520 const q31_t * pCoef,
521 uint32_t twidCoefModifier)
522 {
523 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
524 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
525 q31_t xa, xb, xc, xd;
526 q31_t ya, yb, yc, yd;
527 q31_t xa_out, xb_out, xc_out, xd_out;
528 q31_t ya_out, yb_out, yc_out, yd_out;
529
530 q31_t *ptr1;
531
532 /* input is be 1.31(q31) format for all FFT sizes */
533 /* Total process is divided into three stages */
534 /* process first stage, middle stages, & last stage */
535
536 /* Start of first stage process */
537
538 /* Initializations for the first stage */
539 n2 = fftLen;
540 n1 = n2;
541 /* n2 = fftLen/4 */
542 n2 >>= 2U;
543 i0 = 0U;
544 ia1 = 0U;
545
546 j = n2;
547
548 do
549 {
550 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
551
552 /* index calculation for the input as, */
553 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
554 i1 = i0 + n2;
555 i2 = i1 + n2;
556 i3 = i2 + n2;
557
558 /* Butterfly implementation */
559 /* xa + xc */
560 r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U);
561 /* xa - xc */
562 r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
563
564 /* xb + xd */
565 t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
566
567 /* ya + yc */
568 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
569 /* ya - yc */
570 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
571
572 /* xa' = xa + xb + xc + xd */
573 pSrc[2U * i0] = (r1 + t1);
574 /* (xa + xc) - (xb + xd) */
575 r1 = r1 - t1;
576 /* yb + yd */
577 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
578 /* ya' = ya + yb + yc + yd */
579 pSrc[(2U * i0) + 1U] = (s1 + t2);
580
581 /* (ya + yc) - (yb + yd) */
582 s1 = s1 - t2;
583
584 /* yb - yd */
585 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
586 /* xb - xd */
587 t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
588
589 /* index calculation for the coefficients */
590 ia2 = 2U * ia1;
591 co2 = pCoef[ia2 * 2U];
592 si2 = pCoef[(ia2 * 2U) + 1U];
593
594 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
595 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
596 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
597
598 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
599 pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
600 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
601
602 /* (xa - xc) - (yb - yd) */
603 r1 = r2 - t1;
604 /* (xa - xc) + (yb - yd) */
605 r2 = r2 + t1;
606
607 /* (ya - yc) + (xb - xd) */
608 s1 = s2 + t2;
609 /* (ya - yc) - (xb - xd) */
610 s2 = s2 - t2;
611
612 co1 = pCoef[ia1 * 2U];
613 si1 = pCoef[(ia1 * 2U) + 1U];
614
615 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
616 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
617 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
618
619 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
620 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
621 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
622
623 /* index calculation for the coefficients */
624 ia3 = 3U * ia1;
625 co3 = pCoef[ia3 * 2U];
626 si3 = pCoef[(ia3 * 2U) + 1U];
627
628 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
629 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
630 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
631
632 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
633 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
634 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
635
636 /* Twiddle coefficients index modifier */
637 ia1 = ia1 + twidCoefModifier;
638
639 /* Updating input index */
640 i0 = i0 + 1U;
641
642 } while (--j);
643
644 /* data is in 5.27(q27) format */
645 /* each stage provides two down scaling of the input */
646
647
648 /* Start of Middle stages process */
649
650 twidCoefModifier <<= 2U;
651
652 /* Calculation of second stage to excluding last stage */
653 for (k = fftLen / 4U; k > 4U; k >>= 2U)
654 {
655 /* Initializations for the first stage */
656 n1 = n2;
657 n2 >>= 2U;
658 ia1 = 0U;
659
660 for (j = 0; j <= (n2 - 1U); j++)
661 {
662 /* index calculation for the coefficients */
663 ia2 = ia1 + ia1;
664 ia3 = ia2 + ia1;
665 co1 = pCoef[(ia1 * 2U)];
666 si1 = pCoef[(ia1 * 2U) + 1U];
667 co2 = pCoef[(ia2 * 2U)];
668 si2 = pCoef[(ia2 * 2U) + 1U];
669 co3 = pCoef[(ia3 * 2U)];
670 si3 = pCoef[(ia3 * 2U) + 1U];
671 /* Twiddle coefficients index modifier */
672 ia1 = ia1 + twidCoefModifier;
673
674 for (i0 = j; i0 < fftLen; i0 += n1)
675 {
676 /* index calculation for the input as, */
677 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
678 i1 = i0 + n2;
679 i2 = i1 + n2;
680 i3 = i2 + n2;
681
682 /* Butterfly implementation */
683 /* xa + xc */
684 r1 = pSrc[2U * i0] + pSrc[2U * i2];
685 /* xa - xc */
686 r2 = pSrc[2U * i0] - pSrc[2U * i2];
687
688 /* ya + yc */
689 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
690 /* ya - yc */
691 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
692
693 /* xb + xd */
694 t1 = pSrc[2U * i1] + pSrc[2U * i3];
695
696 /* xa' = xa + xb + xc + xd */
697 pSrc[2U * i0] = (r1 + t1) >> 2U;
698 /* xa + xc -(xb + xd) */
699 r1 = r1 - t1;
700 /* yb + yd */
701 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
702 /* ya' = ya + yb + yc + yd */
703 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
704
705 /* (ya + yc) - (yb + yd) */
706 s1 = s1 - t2;
707
708 /* (yb - yd) */
709 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
710 /* (xb - xd) */
711 t2 = pSrc[2U * i1] - pSrc[2U * i3];
712
713 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
714 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
715 ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
716
717 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
718 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
719 ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
720
721 /* (xa - xc) - (yb - yd) */
722 r1 = r2 - t1;
723 /* (xa - xc) + (yb - yd) */
724 r2 = r2 + t1;
725
726 /* (ya - yc) + (xb - xd) */
727 s1 = s2 + t2;
728 /* (ya - yc) - (xb - xd) */
729 s2 = s2 - t2;
730
731 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
732 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
733 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
734
735 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
736 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
737 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
738
739 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
740 pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
741 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
742
743 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
744 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
745 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
746 }
747 }
748 twidCoefModifier <<= 2U;
749 }
750
751 /* End of Middle stages process */
752
753 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
754 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
755 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
756 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
757
758
759 /* Start of last stage process */
760
761
762 /* Initializations for the last stage */
763 j = fftLen >> 2;
764 ptr1 = &pSrc[0];
765
766 /* Calculations of last stage */
767 do
768 {
769 /* Read xa (real), ya(imag) input */
770 xa = *ptr1++;
771 ya = *ptr1++;
772
773 /* Read xb (real), yb(imag) input */
774 xb = *ptr1++;
775 yb = *ptr1++;
776
777 /* Read xc (real), yc(imag) input */
778 xc = *ptr1++;
779 yc = *ptr1++;
780
781 /* Read xc (real), yc(imag) input */
782 xd = *ptr1++;
783 yd = *ptr1++;
784
785 /* xa' = xa + xb + xc + xd */
786 xa_out = xa + xb + xc + xd;
787
788 /* ya' = ya + yb + yc + yd */
789 ya_out = ya + yb + yc + yd;
790
791 /* pointer updation for writing */
792 ptr1 = ptr1 - 8U;
793
794 /* writing xa' and ya' */
795 *ptr1++ = xa_out;
796 *ptr1++ = ya_out;
797
798 xc_out = (xa - xb + xc - xd);
799 yc_out = (ya - yb + yc - yd);
800
801 /* writing xc' and yc' */
802 *ptr1++ = xc_out;
803 *ptr1++ = yc_out;
804
805 xb_out = (xa - yb - xc + yd);
806 yb_out = (ya + xb - yc - xd);
807
808 /* writing xb' and yb' */
809 *ptr1++ = xb_out;
810 *ptr1++ = yb_out;
811
812 xd_out = (xa + yb - xc - yd);
813 yd_out = (ya - xb - yc + xd);
814
815 /* writing xd' and yd' */
816 *ptr1++ = xd_out;
817 *ptr1++ = yd_out;
818
819 } while (--j);
820
821 /* output is in 11.21(q21) format for the 1024 point */
822 /* output is in 9.23(q23) format for the 256 point */
823 /* output is in 7.25(q25) format for the 64 point */
824 /* output is in 5.27(q27) format for the 16 point */
825
826 /* End of last stage process */
827 }
828