1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q31.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/transform_functions.h"
31
32 void arm_radix4_butterfly_inverse_q31(
33 q31_t * pSrc,
34 uint32_t fftLen,
35 const q31_t * pCoef,
36 uint32_t twidCoefModifier);
37
38 void arm_radix4_butterfly_q31(
39 q31_t * pSrc,
40 uint32_t fftLen,
41 const q31_t * pCoef,
42 uint32_t twidCoefModifier);
43
44 void arm_bitreversal_q31(
45 q31_t * pSrc,
46 uint32_t fftLen,
47 uint16_t bitRevFactor,
48 const uint16_t * pBitRevTab);
49
50
51 /**
52 @addtogroup ComplexFFTDeprecated
53 @{
54 */
55
56 /**
57 @brief Processing function for the Q31 CFFT/CIFFT.
58 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
59 @param[in] S points to an instance of the Q31 CFFT/CIFFT structure
60 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
61
62 @par Input and output formats:
63 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
64 Hence the output format is different for different FFT sizes.
65 The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
66 @par
67
68 | CFFT Size | Input format | Output format | Number of bits to upscale |
69 | --------: | ------------: | ------------: | ------------------------: |
70 | 16 | 1.31 | 5.27 | 4 |
71 | 64 | 1.31 | 7.25 | 6 |
72 | 256 | 1.31 | 9.23 | 8 |
73 | 1024 | 1.31 | 11.21 | 10 |
74
75 | CIFFT Size | Input format | Output format | Number of bits to upscale |
76 | ---------: | ------------: | ------------: | ------------------------: |
77 | 16 | 1.31 | 5.27 | 0 |
78 | 64 | 1.31 | 7.25 | 0 |
79 | 256 | 1.31 | 9.23 | 0 |
80 | 1024 | 1.31 | 11.21 | 0 |
81
82 */
83
arm_cfft_radix4_q31(const arm_cfft_radix4_instance_q31 * S,q31_t * pSrc)84 void arm_cfft_radix4_q31(
85 const arm_cfft_radix4_instance_q31 * S,
86 q31_t * pSrc)
87 {
88 if (S->ifftFlag == 1U)
89 {
90 /* Complex IFFT radix-4 */
91 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
92 }
93 else
94 {
95 /* Complex FFT radix-4 */
96 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
97 }
98
99 if (S->bitReverseFlag == 1U)
100 {
101 /* Bit Reversal */
102 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
103 }
104
105 }
106
107 /**
108 @} end of ComplexFFTDeprecated group
109 */
110
111 /*
112 * Radix-4 FFT algorithm used is :
113 *
114 * Input real and imaginary data:
115 * x(n) = xa + j * ya
116 * x(n+N/4 ) = xb + j * yb
117 * x(n+N/2 ) = xc + j * yc
118 * x(n+3N 4) = xd + j * yd
119 *
120 *
121 * Output real and imaginary data:
122 * x(4r) = xa'+ j * ya'
123 * x(4r+1) = xb'+ j * yb'
124 * x(4r+2) = xc'+ j * yc'
125 * x(4r+3) = xd'+ j * yd'
126 *
127 *
128 * Twiddle factors for radix-4 FFT:
129 * Wn = co1 + j * (- si1)
130 * W2n = co2 + j * (- si2)
131 * W3n = co3 + j * (- si3)
132 *
133 * Butterfly implementation:
134 * xa' = xa + xb + xc + xd
135 * ya' = ya + yb + yc + yd
136 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
137 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
138 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
139 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
140 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
141 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
142 *
143 */
144
145 /**
146 @brief Core function for the Q31 CFFT butterfly process.
147 @param[in,out] pSrc points to the in-place buffer of Q31 data type.
148 @param[in] fftLen length of the FFT.
149 @param[in] pCoef points to twiddle coefficient buffer.
150 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
151 */
152
arm_radix4_butterfly_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pCoef,uint32_t twidCoefModifier)153 void arm_radix4_butterfly_q31(
154 q31_t * pSrc,
155 uint32_t fftLen,
156 const q31_t * pCoef,
157 uint32_t twidCoefModifier)
158 {
159 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
160 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
161
162 q31_t xa, xb, xc, xd;
163 q31_t ya, yb, yc, yd;
164 q31_t xa_out, xb_out, xc_out, xd_out;
165 q31_t ya_out, yb_out, yc_out, yd_out;
166
167 q31_t *ptr1;
168
169 /* Total process is divided into three stages */
170
171 /* process first stage, middle stages, & last stage */
172
173
174 /* start of first stage process */
175
176 /* Initializations for the first stage */
177 n2 = fftLen;
178 n1 = n2;
179 /* n2 = fftLen/4 */
180 n2 >>= 2U;
181 i0 = 0U;
182 ia1 = 0U;
183
184 j = n2;
185
186 /* Calculation of first stage */
187 do
188 {
189 /* index calculation for the input as, */
190 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
191 i1 = i0 + n2;
192 i2 = i1 + n2;
193 i3 = i2 + n2;
194
195 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
196
197 /* Butterfly implementation */
198 /* xa + xc */
199 r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U);
200 /* xa - xc */
201 r2 = (pSrc[(2U * i0)] >> 4U) - (pSrc[(2U * i2)] >> 4U);
202
203 /* xb + xd */
204 t1 = (pSrc[(2U * i1)] >> 4U) + (pSrc[(2U * i3)] >> 4U);
205
206 /* ya + yc */
207 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
208 /* ya - yc */
209 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
210
211 /* xa' = xa + xb + xc + xd */
212 pSrc[2U * i0] = (r1 + t1);
213 /* (xa + xc) - (xb + xd) */
214 r1 = r1 - t1;
215 /* yb + yd */
216 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
217
218 /* ya' = ya + yb + yc + yd */
219 pSrc[(2U * i0) + 1U] = (s1 + t2);
220
221 /* (ya + yc) - (yb + yd) */
222 s1 = s1 - t2;
223
224 /* yb - yd */
225 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
226 /* xb - xd */
227 t2 = (pSrc[(2U * i1)] >> 4U) - (pSrc[(2U * i3)] >> 4U);
228
229 /* index calculation for the coefficients */
230 ia2 = 2U * ia1;
231 co2 = pCoef[(ia2 * 2U)];
232 si2 = pCoef[(ia2 * 2U) + 1U];
233
234 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
235 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
236 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
237
238 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
239 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
240 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
241
242 /* (xa - xc) + (yb - yd) */
243 r1 = r2 + t1;
244 /* (xa - xc) - (yb - yd) */
245 r2 = r2 - t1;
246
247 /* (ya - yc) - (xb - xd) */
248 s1 = s2 - t2;
249 /* (ya - yc) + (xb - xd) */
250 s2 = s2 + t2;
251
252 co1 = pCoef[(ia1 * 2U)];
253 si1 = pCoef[(ia1 * 2U) + 1U];
254
255 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
256 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
257 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
258
259 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
260 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
261 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
262
263 /* index calculation for the coefficients */
264 ia3 = 3U * ia1;
265 co3 = pCoef[(ia3 * 2U)];
266 si3 = pCoef[(ia3 * 2U) + 1U];
267
268 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
269 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
270 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
271
272 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
273 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
274 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
275
276 /* Twiddle coefficients index modifier */
277 ia1 = ia1 + twidCoefModifier;
278
279 /* Updating input index */
280 i0 = i0 + 1U;
281
282 } while (--j);
283
284 /* end of first stage process */
285
286 /* data is in 5.27(q27) format */
287
288
289 /* start of Middle stages process */
290
291
292 /* each stage in middle stages provides two down scaling of the input */
293
294 twidCoefModifier <<= 2U;
295
296
297 for (k = fftLen / 4U; k > 4U; k >>= 2U)
298 {
299 /* Initializations for the first stage */
300 n1 = n2;
301 n2 >>= 2U;
302 ia1 = 0U;
303
304 /* Calculation of first stage */
305 for (j = 0U; j <= (n2 - 1U); j++)
306 {
307 /* index calculation for the coefficients */
308 ia2 = ia1 + ia1;
309 ia3 = ia2 + ia1;
310 co1 = pCoef[(ia1 * 2U)];
311 si1 = pCoef[(ia1 * 2U) + 1U];
312 co2 = pCoef[(ia2 * 2U)];
313 si2 = pCoef[(ia2 * 2U) + 1U];
314 co3 = pCoef[(ia3 * 2U)];
315 si3 = pCoef[(ia3 * 2U) + 1U];
316 /* Twiddle coefficients index modifier */
317 ia1 = ia1 + twidCoefModifier;
318
319 for (i0 = j; i0 < fftLen; i0 += n1)
320 {
321 /* index calculation for the input as, */
322 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
323 i1 = i0 + n2;
324 i2 = i1 + n2;
325 i3 = i2 + n2;
326
327 /* Butterfly implementation */
328 /* xa + xc */
329 r1 = pSrc[2U * i0] + pSrc[2U * i2];
330 /* xa - xc */
331 r2 = pSrc[2U * i0] - pSrc[2U * i2];
332
333 /* ya + yc */
334 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
335 /* ya - yc */
336 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
337
338 /* xb + xd */
339 t1 = pSrc[2U * i1] + pSrc[2U * i3];
340
341 /* xa' = xa + xb + xc + xd */
342 pSrc[2U * i0] = (r1 + t1) >> 2U;
343 /* xa + xc -(xb + xd) */
344 r1 = r1 - t1;
345
346 /* yb + yd */
347 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
348 /* ya' = ya + yb + yc + yd */
349 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
350
351 /* (ya + yc) - (yb + yd) */
352 s1 = s1 - t2;
353
354 /* (yb - yd) */
355 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
356 /* (xb - xd) */
357 t2 = pSrc[2U * i1] - pSrc[2U * i3];
358
359 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
360 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
361 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
362
363 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
364 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
365 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
366
367 /* (xa - xc) + (yb - yd) */
368 r1 = r2 + t1;
369 /* (xa - xc) - (yb - yd) */
370 r2 = r2 - t1;
371
372 /* (ya - yc) - (xb - xd) */
373 s1 = s2 - t2;
374 /* (ya - yc) + (xb - xd) */
375 s2 = s2 + t2;
376
377 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
378 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
379 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
380
381 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
382 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
383 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
384
385 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
386 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
387 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
388
389 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
390 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
391 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
392 }
393 }
394 twidCoefModifier <<= 2U;
395 }
396
397 /* End of Middle stages process */
398
399 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
400 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
401 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
402 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
403
404
405 /* start of Last stage process */
406 /* Initializations for the last stage */
407 j = fftLen >> 2;
408 ptr1 = &pSrc[0];
409
410 /* Calculations of last stage */
411 do
412 {
413 /* Read xa (real), ya(imag) input */
414 xa = *ptr1++;
415 ya = *ptr1++;
416
417 /* Read xb (real), yb(imag) input */
418 xb = *ptr1++;
419 yb = *ptr1++;
420
421 /* Read xc (real), yc(imag) input */
422 xc = *ptr1++;
423 yc = *ptr1++;
424
425 /* Read xc (real), yc(imag) input */
426 xd = *ptr1++;
427 yd = *ptr1++;
428
429 /* xa' = xa + xb + xc + xd */
430 xa_out = xa + xb + xc + xd;
431
432 /* ya' = ya + yb + yc + yd */
433 ya_out = ya + yb + yc + yd;
434
435 /* pointer updation for writing */
436 ptr1 = ptr1 - 8U;
437
438 /* writing xa' and ya' */
439 *ptr1++ = xa_out;
440 *ptr1++ = ya_out;
441
442 xc_out = (xa - xb + xc - xd);
443 yc_out = (ya - yb + yc - yd);
444
445 /* writing xc' and yc' */
446 *ptr1++ = xc_out;
447 *ptr1++ = yc_out;
448
449 xb_out = (xa + yb - xc - yd);
450 yb_out = (ya - xb - yc + xd);
451
452 /* writing xb' and yb' */
453 *ptr1++ = xb_out;
454 *ptr1++ = yb_out;
455
456 xd_out = (xa - yb - xc + yd);
457 yd_out = (ya + xb - yc - xd);
458
459 /* writing xd' and yd' */
460 *ptr1++ = xd_out;
461 *ptr1++ = yd_out;
462
463
464 } while (--j);
465
466 /* output is in 11.21(q21) format for the 1024 point */
467 /* output is in 9.23(q23) format for the 256 point */
468 /* output is in 7.25(q25) format for the 64 point */
469 /* output is in 5.27(q27) format for the 16 point */
470
471 /* End of last stage process */
472
473 }
474
475
476 /**
477 @brief Core function for the Q31 CIFFT butterfly process.
478 @param[in,out] pSrc points to the in-place buffer of Q31 data type.
479 @param[in] fftLen length of the FFT.
480 @param[in] pCoef points to twiddle coefficient buffer.
481 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
482 */
483
484 /*
485 * Radix-4 IFFT algorithm used is :
486 *
487 * CIFFT uses same twiddle coefficients as CFFT Function
488 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
489 *
490 *
491 * IFFT is implemented with following changes in equations from FFT
492 *
493 * Input real and imaginary data:
494 * x(n) = xa + j * ya
495 * x(n+N/4 ) = xb + j * yb
496 * x(n+N/2 ) = xc + j * yc
497 * x(n+3N 4) = xd + j * yd
498 *
499 *
500 * Output real and imaginary data:
501 * x(4r) = xa'+ j * ya'
502 * x(4r+1) = xb'+ j * yb'
503 * x(4r+2) = xc'+ j * yc'
504 * x(4r+3) = xd'+ j * yd'
505 *
506 *
507 * Twiddle factors for radix-4 IFFT:
508 * Wn = co1 + j * (si1)
509 * W2n = co2 + j * (si2)
510 * W3n = co3 + j * (si3)
511
512 * The real and imaginary output values for the radix-4 butterfly are
513 * xa' = xa + xb + xc + xd
514 * ya' = ya + yb + yc + yd
515 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
516 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
517 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
518 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
519 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
520 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
521 *
522 */
523
arm_radix4_butterfly_inverse_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pCoef,uint32_t twidCoefModifier)524 void arm_radix4_butterfly_inverse_q31(
525 q31_t * pSrc,
526 uint32_t fftLen,
527 const q31_t * pCoef,
528 uint32_t twidCoefModifier)
529 {
530 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
531 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
532 q31_t xa, xb, xc, xd;
533 q31_t ya, yb, yc, yd;
534 q31_t xa_out, xb_out, xc_out, xd_out;
535 q31_t ya_out, yb_out, yc_out, yd_out;
536
537 q31_t *ptr1;
538
539 /* input is be 1.31(q31) format for all FFT sizes */
540 /* Total process is divided into three stages */
541 /* process first stage, middle stages, & last stage */
542
543 /* Start of first stage process */
544
545 /* Initializations for the first stage */
546 n2 = fftLen;
547 n1 = n2;
548 /* n2 = fftLen/4 */
549 n2 >>= 2U;
550 i0 = 0U;
551 ia1 = 0U;
552
553 j = n2;
554
555 do
556 {
557 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
558
559 /* index calculation for the input as, */
560 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
561 i1 = i0 + n2;
562 i2 = i1 + n2;
563 i3 = i2 + n2;
564
565 /* Butterfly implementation */
566 /* xa + xc */
567 r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U);
568 /* xa - xc */
569 r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
570
571 /* xb + xd */
572 t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
573
574 /* ya + yc */
575 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
576 /* ya - yc */
577 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
578
579 /* xa' = xa + xb + xc + xd */
580 pSrc[2U * i0] = (r1 + t1);
581 /* (xa + xc) - (xb + xd) */
582 r1 = r1 - t1;
583 /* yb + yd */
584 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
585 /* ya' = ya + yb + yc + yd */
586 pSrc[(2U * i0) + 1U] = (s1 + t2);
587
588 /* (ya + yc) - (yb + yd) */
589 s1 = s1 - t2;
590
591 /* yb - yd */
592 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
593 /* xb - xd */
594 t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
595
596 /* index calculation for the coefficients */
597 ia2 = 2U * ia1;
598 co2 = pCoef[ia2 * 2U];
599 si2 = pCoef[(ia2 * 2U) + 1U];
600
601 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
602 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
603 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
604
605 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
606 pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
607 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
608
609 /* (xa - xc) - (yb - yd) */
610 r1 = r2 - t1;
611 /* (xa - xc) + (yb - yd) */
612 r2 = r2 + t1;
613
614 /* (ya - yc) + (xb - xd) */
615 s1 = s2 + t2;
616 /* (ya - yc) - (xb - xd) */
617 s2 = s2 - t2;
618
619 co1 = pCoef[ia1 * 2U];
620 si1 = pCoef[(ia1 * 2U) + 1U];
621
622 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
623 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
624 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
625
626 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
627 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
628 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
629
630 /* index calculation for the coefficients */
631 ia3 = 3U * ia1;
632 co3 = pCoef[ia3 * 2U];
633 si3 = pCoef[(ia3 * 2U) + 1U];
634
635 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
636 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
637 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
638
639 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
640 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
641 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
642
643 /* Twiddle coefficients index modifier */
644 ia1 = ia1 + twidCoefModifier;
645
646 /* Updating input index */
647 i0 = i0 + 1U;
648
649 } while (--j);
650
651 /* data is in 5.27(q27) format */
652 /* each stage provides two down scaling of the input */
653
654
655 /* Start of Middle stages process */
656
657 twidCoefModifier <<= 2U;
658
659 /* Calculation of second stage to excluding last stage */
660 for (k = fftLen / 4U; k > 4U; k >>= 2U)
661 {
662 /* Initializations for the first stage */
663 n1 = n2;
664 n2 >>= 2U;
665 ia1 = 0U;
666
667 for (j = 0; j <= (n2 - 1U); j++)
668 {
669 /* index calculation for the coefficients */
670 ia2 = ia1 + ia1;
671 ia3 = ia2 + ia1;
672 co1 = pCoef[(ia1 * 2U)];
673 si1 = pCoef[(ia1 * 2U) + 1U];
674 co2 = pCoef[(ia2 * 2U)];
675 si2 = pCoef[(ia2 * 2U) + 1U];
676 co3 = pCoef[(ia3 * 2U)];
677 si3 = pCoef[(ia3 * 2U) + 1U];
678 /* Twiddle coefficients index modifier */
679 ia1 = ia1 + twidCoefModifier;
680
681 for (i0 = j; i0 < fftLen; i0 += n1)
682 {
683 /* index calculation for the input as, */
684 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
685 i1 = i0 + n2;
686 i2 = i1 + n2;
687 i3 = i2 + n2;
688
689 /* Butterfly implementation */
690 /* xa + xc */
691 r1 = pSrc[2U * i0] + pSrc[2U * i2];
692 /* xa - xc */
693 r2 = pSrc[2U * i0] - pSrc[2U * i2];
694
695 /* ya + yc */
696 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
697 /* ya - yc */
698 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
699
700 /* xb + xd */
701 t1 = pSrc[2U * i1] + pSrc[2U * i3];
702
703 /* xa' = xa + xb + xc + xd */
704 pSrc[2U * i0] = (r1 + t1) >> 2U;
705 /* xa + xc -(xb + xd) */
706 r1 = r1 - t1;
707 /* yb + yd */
708 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
709 /* ya' = ya + yb + yc + yd */
710 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
711
712 /* (ya + yc) - (yb + yd) */
713 s1 = s1 - t2;
714
715 /* (yb - yd) */
716 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
717 /* (xb - xd) */
718 t2 = pSrc[2U * i1] - pSrc[2U * i3];
719
720 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
721 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
722 ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
723
724 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
725 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
726 ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
727
728 /* (xa - xc) - (yb - yd) */
729 r1 = r2 - t1;
730 /* (xa - xc) + (yb - yd) */
731 r2 = r2 + t1;
732
733 /* (ya - yc) + (xb - xd) */
734 s1 = s2 + t2;
735 /* (ya - yc) - (xb - xd) */
736 s2 = s2 - t2;
737
738 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
739 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
740 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
741
742 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
743 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
744 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
745
746 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
747 pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
748 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
749
750 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
751 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
752 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
753 }
754 }
755 twidCoefModifier <<= 2U;
756 }
757
758 /* End of Middle stages process */
759
760 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
761 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
762 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
763 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
764
765
766 /* Start of last stage process */
767
768
769 /* Initializations for the last stage */
770 j = fftLen >> 2;
771 ptr1 = &pSrc[0];
772
773 /* Calculations of last stage */
774 do
775 {
776 /* Read xa (real), ya(imag) input */
777 xa = *ptr1++;
778 ya = *ptr1++;
779
780 /* Read xb (real), yb(imag) input */
781 xb = *ptr1++;
782 yb = *ptr1++;
783
784 /* Read xc (real), yc(imag) input */
785 xc = *ptr1++;
786 yc = *ptr1++;
787
788 /* Read xc (real), yc(imag) input */
789 xd = *ptr1++;
790 yd = *ptr1++;
791
792 /* xa' = xa + xb + xc + xd */
793 xa_out = xa + xb + xc + xd;
794
795 /* ya' = ya + yb + yc + yd */
796 ya_out = ya + yb + yc + yd;
797
798 /* pointer updation for writing */
799 ptr1 = ptr1 - 8U;
800
801 /* writing xa' and ya' */
802 *ptr1++ = xa_out;
803 *ptr1++ = ya_out;
804
805 xc_out = (xa - xb + xc - xd);
806 yc_out = (ya - yb + yc - yd);
807
808 /* writing xc' and yc' */
809 *ptr1++ = xc_out;
810 *ptr1++ = yc_out;
811
812 xb_out = (xa - yb - xc + yd);
813 yb_out = (ya + xb - yc - xd);
814
815 /* writing xb' and yb' */
816 *ptr1++ = xb_out;
817 *ptr1++ = yb_out;
818
819 xd_out = (xa + yb - xc - yd);
820 yd_out = (ya - xb - yc + xd);
821
822 /* writing xd' and yd' */
823 *ptr1++ = xd_out;
824 *ptr1++ = yd_out;
825
826 } while (--j);
827
828 /* output is in 11.21(q21) format for the 1024 point */
829 /* output is in 9.23(q23) format for the 256 point */
830 /* output is in 7.25(q25) format for the 64 point */
831 /* output is in 5.27(q27) format for the 16 point */
832
833 /* End of last stage process */
834 }
835