1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q15.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/transform_functions.h"
31
32
33 void arm_radix4_butterfly_q15(
34 q15_t * pSrc16,
35 uint32_t fftLen,
36 const q15_t * pCoef16,
37 uint32_t twidCoefModifier);
38
39 void arm_radix4_butterfly_inverse_q15(
40 q15_t * pSrc16,
41 uint32_t fftLen,
42 const q15_t * pCoef16,
43 uint32_t twidCoefModifier);
44
45 void arm_bitreversal_q15(
46 q15_t * pSrc,
47 uint32_t fftLen,
48 uint16_t bitRevFactor,
49 const uint16_t * pBitRevTab);
50
51 /**
52 @addtogroup ComplexFFTDeprecated
53 @{
54 */
55
56
57 /**
58 @brief Processing function for the Q15 CFFT/CIFFT.
59 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
60 @param[in] S points to an instance of the Q15 CFFT/CIFFT structure.
61 @param[in,out] pSrc points to the complex data buffer. Processing occurs in-place.
62
63 @par Input and output formats:
64 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
65 Hence the output format is different for different FFT sizes.
66 The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
67 @par
68
69 | CFFT Size | Input format | Output format | Number of bits to upscale |
70 | --------: | ------------: | ------------: | ------------------------: |
71 | 16 | 1.15 | 5.11 | 4 |
72 | 64 | 1.15 | 7.9 | 6 |
73 | 256 | 1.15 | 9.7 | 8 |
74 | 1024 | 1.15 | 11.5 | 10 |
75
76 | CIFFT Size | Input format | Output format | Number of bits to upscale |
77 | ---------: | ------------: | ------------: | ------------------------: |
78 | 16 | 1.15 | 5.11 | 0 |
79 | 64 | 1.15 | 7.9 | 0 |
80 | 256 | 1.15 | 9.7 | 0 |
81 | 1024 | 1.15 | 11.5 | 0 |
82
83 */
84
arm_cfft_radix4_q15(const arm_cfft_radix4_instance_q15 * S,q15_t * pSrc)85 void arm_cfft_radix4_q15(
86 const arm_cfft_radix4_instance_q15 * S,
87 q15_t * pSrc)
88 {
89 if (S->ifftFlag == 1U)
90 {
91 /* Complex IFFT radix-4 */
92 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
93 }
94 else
95 {
96 /* Complex FFT radix-4 */
97 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
98 }
99
100 if (S->bitReverseFlag == 1U)
101 {
102 /* Bit Reversal */
103 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
104 }
105
106 }
107
108 /**
109 @} end of ComplexFFTDeprecated group
110 */
111
112 /*
113 * Radix-4 FFT algorithm used is :
114 *
115 * Input real and imaginary data:
116 * x(n) = xa + j * ya
117 * x(n+N/4 ) = xb + j * yb
118 * x(n+N/2 ) = xc + j * yc
119 * x(n+3N 4) = xd + j * yd
120 *
121 *
122 * Output real and imaginary data:
123 * x(4r) = xa'+ j * ya'
124 * x(4r+1) = xb'+ j * yb'
125 * x(4r+2) = xc'+ j * yc'
126 * x(4r+3) = xd'+ j * yd'
127 *
128 *
129 * Twiddle factors for radix-4 FFT:
130 * Wn = co1 + j * (- si1)
131 * W2n = co2 + j * (- si2)
132 * W3n = co3 + j * (- si3)
133
134 * The real and imaginary output values for the radix-4 butterfly are
135 * xa' = xa + xb + xc + xd
136 * ya' = ya + yb + yc + yd
137 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
138 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
139 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
140 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
141 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
142 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
143 *
144 */
145
146 /**
147 @brief Core function for the Q15 CFFT butterfly process.
148 @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
149 @param[in] fftLen length of the FFT
150 @param[in] pCoef16 points to twiddle coefficient buffer
151 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
152 */
153
arm_radix4_butterfly_q15(q15_t * pSrc16,uint32_t fftLen,const q15_t * pCoef16,uint32_t twidCoefModifier)154 void arm_radix4_butterfly_q15(
155 q15_t * pSrc16,
156 uint32_t fftLen,
157 const q15_t * pCoef16,
158 uint32_t twidCoefModifier)
159 {
160
161 #if defined (ARM_MATH_DSP)
162
163 q31_t R, S, T, U;
164 q31_t C1, C2, C3, out1, out2;
165 uint32_t n1, n2, ic, i0, j, k;
166
167 q15_t *ptr1;
168 q15_t *pSi0;
169 q15_t *pSi1;
170 q15_t *pSi2;
171 q15_t *pSi3;
172
173 q31_t xaya, xbyb, xcyc, xdyd;
174
175 /* Total process is divided into three stages */
176
177 /* process first stage, middle stages, & last stage */
178
179 /* Initializations for the first stage */
180 n2 = fftLen;
181 n1 = n2;
182
183 /* n2 = fftLen/4 */
184 n2 >>= 2U;
185
186 /* Index for twiddle coefficient */
187 ic = 0U;
188
189 /* Index for input read and output write */
190 j = n2;
191
192 pSi0 = pSrc16;
193 pSi1 = pSi0 + 2 * n2;
194 pSi2 = pSi1 + 2 * n2;
195 pSi3 = pSi2 + 2 * n2;
196
197 /* Input is in 1.15(q15) format */
198
199 /* start of first stage process */
200 do
201 {
202 /* Butterfly implementation */
203
204 /* Reading i0, i0+fftLen/2 inputs */
205 /* Read ya (real), xa(imag) input */
206 T = read_q15x2 (pSi0);
207 T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
208 T = __SHADD16(T, 0); /* it turns out doing this twice is 2 cycles, the alternative takes 3 cycles */
209 /*
210 in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
211 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
212 */
213
214 /* Read yc (real), xc(imag) input */
215 S = read_q15x2 (pSi2);
216 S = __SHADD16(S, 0);
217 S = __SHADD16(S, 0);
218
219 /* R = packed((ya + yc), (xa + xc) ) */
220 R = __QADD16(T, S);
221
222 /* S = packed((ya - yc), (xa - xc) ) */
223 S = __QSUB16(T, S);
224
225 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
226 /* Read yb (real), xb(imag) input */
227 T = read_q15x2 (pSi1);
228 T = __SHADD16(T, 0);
229 T = __SHADD16(T, 0);
230
231 /* Read yd (real), xd(imag) input */
232 U = read_q15x2 (pSi3);
233 U = __SHADD16(U, 0);
234 U = __SHADD16(U, 0);
235
236 /* T = packed((yb + yd), (xb + xd) ) */
237 T = __QADD16(T, U);
238
239 /* writing the butterfly processed i0 sample */
240 /* xa' = xa + xb + xc + xd */
241 /* ya' = ya + yb + yc + yd */
242 write_q15x2_ia (&pSi0, __SHADD16(R, T));
243
244 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
245 R = __QSUB16(R, T);
246
247 /* co2 & si2 are read from SIMD Coefficient pointer */
248 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
249
250 #ifndef ARM_MATH_BIG_ENDIAN
251 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
252 out1 = __SMUAD(C2, R) >> 16U;
253 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
254 out2 = __SMUSDX(C2, R);
255 #else
256 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
257 out1 = __SMUSDX(R, C2) >> 16U;
258 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
259 out2 = __SMUAD(C2, R);
260 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
261
262 /* Reading i0+fftLen/4 */
263 /* T = packed(yb, xb) */
264 T = read_q15x2 (pSi1);
265 T = __SHADD16(T, 0);
266 T = __SHADD16(T, 0);
267
268 /* writing the butterfly processed i0 + fftLen/4 sample */
269 /* writing output(xc', yc') in little endian format */
270 write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
271
272 /* Butterfly calculations */
273 /* U = packed(yd, xd) */
274 U = read_q15x2 (pSi3);
275 U = __SHADD16(U, 0);
276 U = __SHADD16(U, 0);
277
278 /* T = packed(yb-yd, xb-xd) */
279 T = __QSUB16(T, U);
280
281 #ifndef ARM_MATH_BIG_ENDIAN
282 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
283 R = __QASX(S, T);
284 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
285 S = __QSAX(S, T);
286 #else
287 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
288 R = __QSAX(S, T);
289 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
290 S = __QASX(S, T);
291 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
292
293 /* co1 & si1 are read from SIMD Coefficient pointer */
294 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
295 /* Butterfly process for the i0+fftLen/2 sample */
296
297 #ifndef ARM_MATH_BIG_ENDIAN
298 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
299 out1 = __SMUAD(C1, S) >> 16U;
300 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
301 out2 = __SMUSDX(C1, S);
302 #else
303 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
304 out1 = __SMUSDX(S, C1) >> 16U;
305 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
306 out2 = __SMUAD(C1, S);
307 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
308
309 /* writing output(xb', yb') in little endian format */
310 write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
311
312 /* co3 & si3 are read from SIMD Coefficient pointer */
313 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
314 /* Butterfly process for the i0+3fftLen/4 sample */
315
316 #ifndef ARM_MATH_BIG_ENDIAN
317 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
318 out1 = __SMUAD(C3, R) >> 16U;
319 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
320 out2 = __SMUSDX(C3, R);
321 #else
322 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
323 out1 = __SMUSDX(R, C3) >> 16U;
324 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
325 out2 = __SMUAD(C3, R);
326 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
327
328 /* writing output(xd', yd') in little endian format */
329 write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
330
331 /* Twiddle coefficients index modifier */
332 ic = ic + twidCoefModifier;
333
334 } while (--j);
335 /* data is in 4.11(q11) format */
336
337 /* end of first stage process */
338
339
340 /* start of middle stage process */
341
342 /* Twiddle coefficients index modifier */
343 twidCoefModifier <<= 2U;
344
345 /* Calculation of Middle stage */
346 for (k = fftLen / 4U; k > 4U; k >>= 2U)
347 {
348 /* Initializations for the middle stage */
349 n1 = n2;
350 n2 >>= 2U;
351 ic = 0U;
352
353 for (j = 0U; j <= (n2 - 1U); j++)
354 {
355 /* index calculation for the coefficients */
356 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
357 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
358 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
359
360 /* Twiddle coefficients index modifier */
361 ic = ic + twidCoefModifier;
362
363 pSi0 = pSrc16 + 2 * j;
364 pSi1 = pSi0 + 2 * n2;
365 pSi2 = pSi1 + 2 * n2;
366 pSi3 = pSi2 + 2 * n2;
367
368 /* Butterfly implementation */
369 for (i0 = j; i0 < fftLen; i0 += n1)
370 {
371 /* Reading i0, i0+fftLen/2 inputs */
372 /* Read ya (real), xa(imag) input */
373 T = read_q15x2 (pSi0);
374
375 /* Read yc (real), xc(imag) input */
376 S = read_q15x2 (pSi2);
377
378 /* R = packed( (ya + yc), (xa + xc)) */
379 R = __QADD16(T, S);
380
381 /* S = packed((ya - yc), (xa - xc)) */
382 S = __QSUB16(T, S);
383
384 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
385 /* Read yb (real), xb(imag) input */
386 T = read_q15x2 (pSi1);
387
388 /* Read yd (real), xd(imag) input */
389 U = read_q15x2 (pSi3);
390
391 /* T = packed( (yb + yd), (xb + xd)) */
392 T = __QADD16(T, U);
393
394 /* writing the butterfly processed i0 sample */
395
396 /* xa' = xa + xb + xc + xd */
397 /* ya' = ya + yb + yc + yd */
398 out1 = __SHADD16(R, T);
399 out1 = __SHADD16(out1, 0);
400 write_q15x2 (pSi0, out1);
401 pSi0 += 2 * n1;
402
403 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
404 R = __SHSUB16(R, T);
405
406 #ifndef ARM_MATH_BIG_ENDIAN
407 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
408 out1 = __SMUAD(C2, R) >> 16U;
409
410 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
411 out2 = __SMUSDX(C2, R);
412 #else
413 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
414 out1 = __SMUSDX(R, C2) >> 16U;
415
416 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
417 out2 = __SMUAD(C2, R);
418 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
419
420 /* Reading i0+3fftLen/4 */
421 /* Read yb (real), xb(imag) input */
422 T = read_q15x2 (pSi1);
423
424 /* writing the butterfly processed i0 + fftLen/4 sample */
425 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
426 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
427 write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
428 pSi1 += 2 * n1;
429
430 /* Butterfly calculations */
431
432 /* Read yd (real), xd(imag) input */
433 U = read_q15x2 (pSi3);
434
435 /* T = packed(yb-yd, xb-xd) */
436 T = __QSUB16(T, U);
437
438 #ifndef ARM_MATH_BIG_ENDIAN
439 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
440 R = __SHASX(S, T);
441
442 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
443 S = __SHSAX(S, T);
444
445
446 /* Butterfly process for the i0+fftLen/2 sample */
447 out1 = __SMUAD(C1, S) >> 16U;
448 out2 = __SMUSDX(C1, S);
449 #else
450 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
451 R = __SHSAX(S, T);
452
453 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
454 S = __SHASX(S, T);
455
456
457 /* Butterfly process for the i0+fftLen/2 sample */
458 out1 = __SMUSDX(S, C1) >> 16U;
459 out2 = __SMUAD(C1, S);
460 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
461
462 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
463 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
464 write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
465 pSi2 += 2 * n1;
466
467 /* Butterfly process for the i0+3fftLen/4 sample */
468
469 #ifndef ARM_MATH_BIG_ENDIAN
470 out1 = __SMUAD(C3, R) >> 16U;
471 out2 = __SMUSDX(C3, R);
472 #else
473 out1 = __SMUSDX(R, C3) >> 16U;
474 out2 = __SMUAD(C3, R);
475 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
476
477 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
478 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
479 write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
480 pSi3 += 2 * n1;
481 }
482 }
483 /* Twiddle coefficients index modifier */
484 twidCoefModifier <<= 2U;
485 }
486 /* end of middle stage process */
487
488
489 /* data is in 10.6(q6) format for the 1024 point */
490 /* data is in 8.8(q8) format for the 256 point */
491 /* data is in 6.10(q10) format for the 64 point */
492 /* data is in 4.12(q12) format for the 16 point */
493
494 /* Initializations for the last stage */
495 j = fftLen >> 2;
496
497 ptr1 = &pSrc16[0];
498
499 /* start of last stage process */
500
501 /* Butterfly implementation */
502 do
503 {
504 /* Read xa (real), ya(imag) input */
505 xaya = read_q15x2_ia (&ptr1);
506
507 /* Read xb (real), yb(imag) input */
508 xbyb = read_q15x2_ia (&ptr1);
509
510 /* Read xc (real), yc(imag) input */
511 xcyc = read_q15x2_ia (&ptr1);
512
513 /* Read xd (real), yd(imag) input */
514 xdyd = read_q15x2_ia (&ptr1);
515
516 /* R = packed((ya + yc), (xa + xc)) */
517 R = __QADD16(xaya, xcyc);
518
519 /* T = packed((yb + yd), (xb + xd)) */
520 T = __QADD16(xbyb, xdyd);
521
522 /* pointer updation for writing */
523 ptr1 = ptr1 - 8U;
524
525
526 /* xa' = xa + xb + xc + xd */
527 /* ya' = ya + yb + yc + yd */
528 write_q15x2_ia (&ptr1, __SHADD16(R, T));
529
530 /* T = packed((yb + yd), (xb + xd)) */
531 T = __QADD16(xbyb, xdyd);
532
533 /* xc' = (xa-xb+xc-xd) */
534 /* yc' = (ya-yb+yc-yd) */
535 write_q15x2_ia (&ptr1, __SHSUB16(R, T));
536
537 /* S = packed((ya - yc), (xa - xc)) */
538 S = __QSUB16(xaya, xcyc);
539
540 /* Read yd (real), xd(imag) input */
541 /* T = packed( (yb - yd), (xb - xd)) */
542 U = __QSUB16(xbyb, xdyd);
543
544 #ifndef ARM_MATH_BIG_ENDIAN
545 /* xb' = (xa+yb-xc-yd) */
546 /* yb' = (ya-xb-yc+xd) */
547 write_q15x2_ia (&ptr1, __SHSAX(S, U));
548
549 /* xd' = (xa-yb-xc+yd) */
550 /* yd' = (ya+xb-yc-xd) */
551 write_q15x2_ia (&ptr1, __SHASX(S, U));
552 #else
553 /* xb' = (xa+yb-xc-yd) */
554 /* yb' = (ya-xb-yc+xd) */
555 write_q15x2_ia (&ptr1, __SHASX(S, U));
556
557 /* xd' = (xa-yb-xc+yd) */
558 /* yd' = (ya+xb-yc-xd) */
559 write_q15x2_ia (&ptr1, __SHSAX(S, U));
560 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
561
562 } while (--j);
563
564 /* end of last stage process */
565
566 /* output is in 11.5(q5) format for the 1024 point */
567 /* output is in 9.7(q7) format for the 256 point */
568 /* output is in 7.9(q9) format for the 64 point */
569 /* output is in 5.11(q11) format for the 16 point */
570
571
572 #else /* #if defined (ARM_MATH_DSP) */
573
574 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
575 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
576 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
577
578 /* Total process is divided into three stages */
579
580 /* process first stage, middle stages, & last stage */
581
582 /* Initializations for the first stage */
583 n2 = fftLen;
584 n1 = n2;
585
586 /* n2 = fftLen/4 */
587 n2 >>= 2U;
588
589 /* Index for twiddle coefficient */
590 ic = 0U;
591
592 /* Index for input read and output write */
593 i0 = 0U;
594 j = n2;
595
596 /* Input is in 1.15(q15) format */
597
598 /* start of first stage process */
599 do
600 {
601 /* Butterfly implementation */
602
603 /* index calculation for the input as, */
604 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
605 i1 = i0 + n2;
606 i2 = i1 + n2;
607 i3 = i2 + n2;
608
609 /* Reading i0, i0+fftLen/2 inputs */
610
611 /* input is down scale by 4 to avoid overflow */
612 /* Read ya (real), xa(imag) input */
613 T0 = pSrc16[i0 * 2U] >> 2U;
614 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
615
616 /* input is down scale by 4 to avoid overflow */
617 /* Read yc (real), xc(imag) input */
618 S0 = pSrc16[i2 * 2U] >> 2U;
619 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
620
621 /* R0 = (ya + yc) */
622 R0 = __SSAT(T0 + S0, 16U);
623 /* R1 = (xa + xc) */
624 R1 = __SSAT(T1 + S1, 16U);
625
626 /* S0 = (ya - yc) */
627 S0 = __SSAT(T0 - S0, 16);
628 /* S1 = (xa - xc) */
629 S1 = __SSAT(T1 - S1, 16);
630
631 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
632 /* input is down scale by 4 to avoid overflow */
633 /* Read yb (real), xb(imag) input */
634 T0 = pSrc16[i1 * 2U] >> 2U;
635 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
636
637 /* input is down scale by 4 to avoid overflow */
638 /* Read yd (real), xd(imag) input */
639 U0 = pSrc16[i3 * 2U] >> 2U;
640 U1 = pSrc16[(i3 * 2U) + 1] >> 2U;
641
642 /* T0 = (yb + yd) */
643 T0 = __SSAT(T0 + U0, 16U);
644 /* T1 = (xb + xd) */
645 T1 = __SSAT(T1 + U1, 16U);
646
647 /* writing the butterfly processed i0 sample */
648 /* ya' = ya + yb + yc + yd */
649 /* xa' = xa + xb + xc + xd */
650 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
651 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
652
653 /* R0 = (ya + yc) - (yb + yd) */
654 /* R1 = (xa + xc) - (xb + xd) */
655 R0 = __SSAT(R0 - T0, 16U);
656 R1 = __SSAT(R1 - T1, 16U);
657
658 /* co2 & si2 are read from Coefficient pointer */
659 Co2 = pCoef16[2U * ic * 2U];
660 Si2 = pCoef16[(2U * ic * 2U) + 1];
661
662 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
663 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
664 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
665 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
666
667 /* Reading i0+fftLen/4 */
668 /* input is down scale by 4 to avoid overflow */
669 /* T0 = yb, T1 = xb */
670 T0 = pSrc16[i1 * 2U] >> 2;
671 T1 = pSrc16[(i1 * 2U) + 1] >> 2;
672
673 /* writing the butterfly processed i0 + fftLen/4 sample */
674 /* writing output(xc', yc') in little endian format */
675 pSrc16[i1 * 2U] = out1;
676 pSrc16[(i1 * 2U) + 1] = out2;
677
678 /* Butterfly calculations */
679 /* input is down scale by 4 to avoid overflow */
680 /* U0 = yd, U1 = xd */
681 U0 = pSrc16[i3 * 2U] >> 2;
682 U1 = pSrc16[(i3 * 2U) + 1] >> 2;
683 /* T0 = yb-yd */
684 T0 = __SSAT(T0 - U0, 16);
685 /* T1 = xb-xd */
686 T1 = __SSAT(T1 - U1, 16);
687
688 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
689 R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
690 R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
691
692 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
693 S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U);
694 S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U);
695
696 /* co1 & si1 are read from Coefficient pointer */
697 Co1 = pCoef16[ic * 2U];
698 Si1 = pCoef16[(ic * 2U) + 1];
699 /* Butterfly process for the i0+fftLen/2 sample */
700 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
701 out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
702 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
703 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
704
705 /* writing output(xb', yb') in little endian format */
706 pSrc16[i2 * 2U] = out1;
707 pSrc16[(i2 * 2U) + 1] = out2;
708
709 /* Co3 & si3 are read from Coefficient pointer */
710 Co3 = pCoef16[3U * (ic * 2U)];
711 Si3 = pCoef16[(3U * (ic * 2U)) + 1];
712 /* Butterfly process for the i0+3fftLen/4 sample */
713 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
714 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
715 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
716 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
717 /* writing output(xd', yd') in little endian format */
718 pSrc16[i3 * 2U] = out1;
719 pSrc16[(i3 * 2U) + 1] = out2;
720
721 /* Twiddle coefficients index modifier */
722 ic = ic + twidCoefModifier;
723
724 /* Updating input index */
725 i0 = i0 + 1U;
726
727 } while (--j);
728 /* data is in 4.11(q11) format */
729
730 /* end of first stage process */
731
732
733 /* start of middle stage process */
734
735 /* Twiddle coefficients index modifier */
736 twidCoefModifier <<= 2U;
737
738 /* Calculation of Middle stage */
739 for (k = fftLen / 4U; k > 4U; k >>= 2U)
740 {
741 /* Initializations for the middle stage */
742 n1 = n2;
743 n2 >>= 2U;
744 ic = 0U;
745
746 for (j = 0U; j <= (n2 - 1U); j++)
747 {
748 /* index calculation for the coefficients */
749 Co1 = pCoef16[ic * 2U];
750 Si1 = pCoef16[(ic * 2U) + 1U];
751 Co2 = pCoef16[2U * (ic * 2U)];
752 Si2 = pCoef16[(2U * (ic * 2U)) + 1U];
753 Co3 = pCoef16[3U * (ic * 2U)];
754 Si3 = pCoef16[(3U * (ic * 2U)) + 1U];
755
756 /* Twiddle coefficients index modifier */
757 ic = ic + twidCoefModifier;
758
759 /* Butterfly implementation */
760 for (i0 = j; i0 < fftLen; i0 += n1)
761 {
762 /* index calculation for the input as, */
763 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
764 i1 = i0 + n2;
765 i2 = i1 + n2;
766 i3 = i2 + n2;
767
768 /* Reading i0, i0+fftLen/2 inputs */
769 /* Read ya (real), xa(imag) input */
770 T0 = pSrc16[i0 * 2U];
771 T1 = pSrc16[(i0 * 2U) + 1U];
772
773 /* Read yc (real), xc(imag) input */
774 S0 = pSrc16[i2 * 2U];
775 S1 = pSrc16[(i2 * 2U) + 1U];
776
777 /* R0 = (ya + yc), R1 = (xa + xc) */
778 R0 = __SSAT(T0 + S0, 16);
779 R1 = __SSAT(T1 + S1, 16);
780
781 /* S0 = (ya - yc), S1 =(xa - xc) */
782 S0 = __SSAT(T0 - S0, 16);
783 S1 = __SSAT(T1 - S1, 16);
784
785 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
786 /* Read yb (real), xb(imag) input */
787 T0 = pSrc16[i1 * 2U];
788 T1 = pSrc16[(i1 * 2U) + 1U];
789
790 /* Read yd (real), xd(imag) input */
791 U0 = pSrc16[i3 * 2U];
792 U1 = pSrc16[(i3 * 2U) + 1U];
793
794
795 /* T0 = (yb + yd), T1 = (xb + xd) */
796 T0 = __SSAT(T0 + U0, 16);
797 T1 = __SSAT(T1 + U1, 16);
798
799 /* writing the butterfly processed i0 sample */
800
801 /* xa' = xa + xb + xc + xd */
802 /* ya' = ya + yb + yc + yd */
803 out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
804 out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
805
806 pSrc16[i0 * 2U] = out1;
807 pSrc16[(2U * i0) + 1U] = out2;
808
809 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
810 R0 = (R0 >> 1U) - (T0 >> 1U);
811 R1 = (R1 >> 1U) - (T1 >> 1U);
812
813 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
814 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
815
816 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
817 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
818
819 /* Reading i0+3fftLen/4 */
820 /* Read yb (real), xb(imag) input */
821 T0 = pSrc16[i1 * 2U];
822 T1 = pSrc16[(i1 * 2U) + 1U];
823
824 /* writing the butterfly processed i0 + fftLen/4 sample */
825 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
826 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
827 pSrc16[i1 * 2U] = out1;
828 pSrc16[(i1 * 2U) + 1U] = out2;
829
830 /* Butterfly calculations */
831
832 /* Read yd (real), xd(imag) input */
833 U0 = pSrc16[i3 * 2U];
834 U1 = pSrc16[(i3 * 2U) + 1U];
835
836 /* T0 = yb-yd, T1 = xb-xd */
837 T0 = __SSAT(T0 - U0, 16);
838 T1 = __SSAT(T1 - U1, 16);
839
840 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
841 R0 = (S0 >> 1U) - (T1 >> 1U);
842 R1 = (S1 >> 1U) + (T0 >> 1U);
843
844 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
845 S0 = (S0 >> 1U) + (T1 >> 1U);
846 S1 = (S1 >> 1U) - (T0 >> 1U);
847
848 /* Butterfly process for the i0+fftLen/2 sample */
849 out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U);
850
851 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U);
852
853 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
854 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
855 pSrc16[i2 * 2U] = out1;
856 pSrc16[(i2 * 2U) + 1U] = out2;
857
858 /* Butterfly process for the i0+3fftLen/4 sample */
859 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
860
861 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
862 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
863 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
864 pSrc16[i3 * 2U] = out1;
865 pSrc16[(i3 * 2U) + 1U] = out2;
866 }
867 }
868 /* Twiddle coefficients index modifier */
869 twidCoefModifier <<= 2U;
870 }
871 /* end of middle stage process */
872
873
874 /* data is in 10.6(q6) format for the 1024 point */
875 /* data is in 8.8(q8) format for the 256 point */
876 /* data is in 6.10(q10) format for the 64 point */
877 /* data is in 4.12(q12) format for the 16 point */
878
879 /* Initializations for the last stage */
880 n1 = n2;
881 n2 >>= 2U;
882
883 /* start of last stage process */
884
885 /* Butterfly implementation */
886 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
887 {
888 /* index calculation for the input as, */
889 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
890 i1 = i0 + n2;
891 i2 = i1 + n2;
892 i3 = i2 + n2;
893
894 /* Reading i0, i0+fftLen/2 inputs */
895 /* Read ya (real), xa(imag) input */
896 T0 = pSrc16[i0 * 2U];
897 T1 = pSrc16[(i0 * 2U) + 1U];
898
899 /* Read yc (real), xc(imag) input */
900 S0 = pSrc16[i2 * 2U];
901 S1 = pSrc16[(i2 * 2U) + 1U];
902
903 /* R0 = (ya + yc), R1 = (xa + xc) */
904 R0 = __SSAT(T0 + S0, 16U);
905 R1 = __SSAT(T1 + S1, 16U);
906
907 /* S0 = (ya - yc), S1 = (xa - xc) */
908 S0 = __SSAT(T0 - S0, 16U);
909 S1 = __SSAT(T1 - S1, 16U);
910
911 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
912 /* Read yb (real), xb(imag) input */
913 T0 = pSrc16[i1 * 2U];
914 T1 = pSrc16[(i1 * 2U) + 1U];
915 /* Read yd (real), xd(imag) input */
916 U0 = pSrc16[i3 * 2U];
917 U1 = pSrc16[(i3 * 2U) + 1U];
918
919 /* T0 = (yb + yd), T1 = (xb + xd)) */
920 T0 = __SSAT(T0 + U0, 16U);
921 T1 = __SSAT(T1 + U1, 16U);
922
923 /* writing the butterfly processed i0 sample */
924 /* xa' = xa + xb + xc + xd */
925 /* ya' = ya + yb + yc + yd */
926 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
927 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
928
929 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
930 R0 = (R0 >> 1U) - (T0 >> 1U);
931 R1 = (R1 >> 1U) - (T1 >> 1U);
932 /* Read yb (real), xb(imag) input */
933 T0 = pSrc16[i1 * 2U];
934 T1 = pSrc16[(i1 * 2U) + 1U];
935
936 /* writing the butterfly processed i0 + fftLen/4 sample */
937 /* xc' = (xa-xb+xc-xd) */
938 /* yc' = (ya-yb+yc-yd) */
939 pSrc16[i1 * 2U] = R0;
940 pSrc16[(i1 * 2U) + 1U] = R1;
941
942 /* Read yd (real), xd(imag) input */
943 U0 = pSrc16[i3 * 2U];
944 U1 = pSrc16[(i3 * 2U) + 1U];
945 /* T0 = (yb - yd), T1 = (xb - xd) */
946 T0 = __SSAT(T0 - U0, 16U);
947 T1 = __SSAT(T1 - U1, 16U);
948
949 /* writing the butterfly processed i0 + fftLen/2 sample */
950 /* xb' = (xa+yb-xc-yd) */
951 /* yb' = (ya-xb-yc+xd) */
952 pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U);
953 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
954
955 /* writing the butterfly processed i0 + 3fftLen/4 sample */
956 /* xd' = (xa-yb-xc+yd) */
957 /* yd' = (ya+xb-yc-xd) */
958 pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U);
959 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
960
961 }
962
963 /* end of last stage process */
964
965 /* output is in 11.5(q5) format for the 1024 point */
966 /* output is in 9.7(q7) format for the 256 point */
967 /* output is in 7.9(q9) format for the 64 point */
968 /* output is in 5.11(q11) format for the 16 point */
969
970 #endif /* #if defined (ARM_MATH_DSP) */
971
972 }
973
974
975 /**
976 @brief Core function for the Q15 CIFFT butterfly process.
977 @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
978 @param[in] fftLen length of the FFT
979 @param[in] pCoef16 points to twiddle coefficient buffer
980 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
981 */
982
983 /*
984 * Radix-4 IFFT algorithm used is :
985 *
986 * CIFFT uses same twiddle coefficients as CFFT function
987 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
988 *
989 *
990 * IFFT is implemented with following changes in equations from FFT
991 *
992 * Input real and imaginary data:
993 * x(n) = xa + j * ya
994 * x(n+N/4 ) = xb + j * yb
995 * x(n+N/2 ) = xc + j * yc
996 * x(n+3N 4) = xd + j * yd
997 *
998 *
999 * Output real and imaginary data:
1000 * x(4r) = xa'+ j * ya'
1001 * x(4r+1) = xb'+ j * yb'
1002 * x(4r+2) = xc'+ j * yc'
1003 * x(4r+3) = xd'+ j * yd'
1004 *
1005 *
1006 * Twiddle factors for radix-4 IFFT:
1007 * Wn = co1 + j * (si1)
1008 * W2n = co2 + j * (si2)
1009 * W3n = co3 + j * (si3)
1010
1011 * The real and imaginary output values for the radix-4 butterfly are
1012 * xa' = xa + xb + xc + xd
1013 * ya' = ya + yb + yc + yd
1014 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
1015 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
1016 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
1017 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
1018 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
1019 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
1020 *
1021 */
1022
arm_radix4_butterfly_inverse_q15(q15_t * pSrc16,uint32_t fftLen,const q15_t * pCoef16,uint32_t twidCoefModifier)1023 void arm_radix4_butterfly_inverse_q15(
1024 q15_t * pSrc16,
1025 uint32_t fftLen,
1026 const q15_t * pCoef16,
1027 uint32_t twidCoefModifier)
1028 {
1029
1030 #if defined (ARM_MATH_DSP)
1031
1032 q31_t R, S, T, U;
1033 q31_t C1, C2, C3, out1, out2;
1034 uint32_t n1, n2, ic, i0, j, k;
1035
1036 q15_t *ptr1;
1037 q15_t *pSi0;
1038 q15_t *pSi1;
1039 q15_t *pSi2;
1040 q15_t *pSi3;
1041
1042 q31_t xaya, xbyb, xcyc, xdyd;
1043
1044 /* Total process is divided into three stages */
1045
1046 /* process first stage, middle stages, & last stage */
1047
1048 /* Initializations for the first stage */
1049 n2 = fftLen;
1050 n1 = n2;
1051
1052 /* n2 = fftLen/4 */
1053 n2 >>= 2U;
1054
1055 /* Index for twiddle coefficient */
1056 ic = 0U;
1057
1058 /* Index for input read and output write */
1059 j = n2;
1060
1061 pSi0 = pSrc16;
1062 pSi1 = pSi0 + 2 * n2;
1063 pSi2 = pSi1 + 2 * n2;
1064 pSi3 = pSi2 + 2 * n2;
1065
1066 /* Input is in 1.15(q15) format */
1067
1068 /* start of first stage process */
1069 do
1070 {
1071 /* Butterfly implementation */
1072
1073 /* Reading i0, i0+fftLen/2 inputs */
1074 /* Read ya (real), xa(imag) input */
1075 T = read_q15x2 (pSi0);
1076 T = __SHADD16(T, 0);
1077 T = __SHADD16(T, 0);
1078
1079 /* Read yc (real), xc(imag) input */
1080 S = read_q15x2 (pSi2);
1081 S = __SHADD16(S, 0);
1082 S = __SHADD16(S, 0);
1083
1084 /* R = packed((ya + yc), (xa + xc) ) */
1085 R = __QADD16(T, S);
1086
1087 /* S = packed((ya - yc), (xa - xc) ) */
1088 S = __QSUB16(T, S);
1089
1090 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1091 /* Read yb (real), xb(imag) input */
1092 T = read_q15x2 (pSi1);
1093 T = __SHADD16(T, 0);
1094 T = __SHADD16(T, 0);
1095
1096 /* Read yd (real), xd(imag) input */
1097 U = read_q15x2 (pSi3);
1098 U = __SHADD16(U, 0);
1099 U = __SHADD16(U, 0);
1100
1101 /* T = packed((yb + yd), (xb + xd) ) */
1102 T = __QADD16(T, U);
1103
1104 /* writing the butterfly processed i0 sample */
1105 /* xa' = xa + xb + xc + xd */
1106 /* ya' = ya + yb + yc + yd */
1107 write_q15x2_ia (&pSi0, __SHADD16(R, T));
1108
1109 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
1110 R = __QSUB16(R, T);
1111
1112 /* co2 & si2 are read from SIMD Coefficient pointer */
1113 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
1114
1115 #ifndef ARM_MATH_BIG_ENDIAN
1116 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1117 out1 = __SMUSD(C2, R) >> 16U;
1118 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1119 out2 = __SMUADX(C2, R);
1120 #else
1121 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1122 out1 = __SMUADX(C2, R) >> 16U;
1123 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1124 out2 = __SMUSD(__QSUB16(0, C2), R);
1125 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1126
1127 /* Reading i0+fftLen/4 */
1128 /* T = packed(yb, xb) */
1129 T = read_q15x2 (pSi1);
1130 T = __SHADD16(T, 0);
1131 T = __SHADD16(T, 0);
1132
1133 /* writing the butterfly processed i0 + fftLen/4 sample */
1134 /* writing output(xc', yc') in little endian format */
1135 write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
1136
1137 /* Butterfly calculations */
1138 /* U = packed(yd, xd) */
1139 U = read_q15x2 (pSi3);
1140 U = __SHADD16(U, 0);
1141 U = __SHADD16(U, 0);
1142
1143 /* T = packed(yb-yd, xb-xd) */
1144 T = __QSUB16(T, U);
1145
1146 #ifndef ARM_MATH_BIG_ENDIAN
1147 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1148 R = __QSAX(S, T);
1149 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1150 S = __QASX(S, T);
1151 #else
1152 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1153 R = __QASX(S, T);
1154 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1155 S = __QSAX(S, T);
1156 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1157
1158 /* co1 & si1 are read from SIMD Coefficient pointer */
1159 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
1160 /* Butterfly process for the i0+fftLen/2 sample */
1161
1162 #ifndef ARM_MATH_BIG_ENDIAN
1163 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1164 out1 = __SMUSD(C1, S) >> 16U;
1165 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1166 out2 = __SMUADX(C1, S);
1167 #else
1168 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1169 out1 = __SMUADX(C1, S) >> 16U;
1170 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1171 out2 = __SMUSD(__QSUB16(0, C1), S);
1172 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1173
1174 /* writing output(xb', yb') in little endian format */
1175 write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
1176
1177 /* co3 & si3 are read from SIMD Coefficient pointer */
1178 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
1179 /* Butterfly process for the i0+3fftLen/4 sample */
1180
1181 #ifndef ARM_MATH_BIG_ENDIAN
1182 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1183 out1 = __SMUSD(C3, R) >> 16U;
1184 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1185 out2 = __SMUADX(C3, R);
1186 #else
1187 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1188 out1 = __SMUADX(C3, R) >> 16U;
1189 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1190 out2 = __SMUSD(__QSUB16(0, C3), R);
1191 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1192
1193 /* writing output(xd', yd') in little endian format */
1194 write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
1195
1196 /* Twiddle coefficients index modifier */
1197 ic = ic + twidCoefModifier;
1198
1199 } while (--j);
1200 /* data is in 4.11(q11) format */
1201
1202 /* end of first stage process */
1203
1204
1205 /* start of middle stage process */
1206
1207 /* Twiddle coefficients index modifier */
1208 twidCoefModifier <<= 2U;
1209
1210 /* Calculation of Middle stage */
1211 for (k = fftLen / 4U; k > 4U; k >>= 2U)
1212 {
1213 /* Initializations for the middle stage */
1214 n1 = n2;
1215 n2 >>= 2U;
1216 ic = 0U;
1217
1218 for (j = 0U; j <= (n2 - 1U); j++)
1219 {
1220 /* index calculation for the coefficients */
1221 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
1222 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
1223 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
1224
1225 /* Twiddle coefficients index modifier */
1226 ic = ic + twidCoefModifier;
1227
1228 pSi0 = pSrc16 + 2 * j;
1229 pSi1 = pSi0 + 2 * n2;
1230 pSi2 = pSi1 + 2 * n2;
1231 pSi3 = pSi2 + 2 * n2;
1232
1233 /* Butterfly implementation */
1234 for (i0 = j; i0 < fftLen; i0 += n1)
1235 {
1236 /* Reading i0, i0+fftLen/2 inputs */
1237 /* Read ya (real), xa(imag) input */
1238 T = read_q15x2 (pSi0);
1239
1240 /* Read yc (real), xc(imag) input */
1241 S = read_q15x2 (pSi2);
1242
1243 /* R = packed( (ya + yc), (xa + xc)) */
1244 R = __QADD16(T, S);
1245
1246 /* S = packed((ya - yc), (xa - xc)) */
1247 S = __QSUB16(T, S);
1248
1249 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1250 /* Read yb (real), xb(imag) input */
1251 T = read_q15x2 (pSi1);
1252
1253 /* Read yd (real), xd(imag) input */
1254 U = read_q15x2 (pSi3);
1255
1256 /* T = packed( (yb + yd), (xb + xd)) */
1257 T = __QADD16(T, U);
1258
1259 /* writing the butterfly processed i0 sample */
1260
1261 /* xa' = xa + xb + xc + xd */
1262 /* ya' = ya + yb + yc + yd */
1263 out1 = __SHADD16(R, T);
1264 out1 = __SHADD16(out1, 0);
1265 write_q15x2 (pSi0, out1);
1266 pSi0 += 2 * n1;
1267
1268 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1269 R = __SHSUB16(R, T);
1270
1271 #ifndef ARM_MATH_BIG_ENDIAN
1272 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1273 out1 = __SMUSD(C2, R) >> 16U;
1274
1275 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1276 out2 = __SMUADX(C2, R);
1277 #else
1278 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1279 out1 = __SMUADX(R, C2) >> 16U;
1280
1281 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1282 out2 = __SMUSD(__QSUB16(0, C2), R);
1283 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1284
1285 /* Reading i0+3fftLen/4 */
1286 /* Read yb (real), xb(imag) input */
1287 T = read_q15x2 (pSi1);
1288
1289 /* writing the butterfly processed i0 + fftLen/4 sample */
1290 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1291 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1292 write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
1293 pSi1 += 2 * n1;
1294
1295 /* Butterfly calculations */
1296
1297 /* Read yd (real), xd(imag) input */
1298 U = read_q15x2 (pSi3);
1299
1300 /* T = packed(yb-yd, xb-xd) */
1301 T = __QSUB16(T, U);
1302
1303 #ifndef ARM_MATH_BIG_ENDIAN
1304 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1305 R = __SHSAX(S, T);
1306
1307 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1308 S = __SHASX(S, T);
1309
1310 /* Butterfly process for the i0+fftLen/2 sample */
1311 out1 = __SMUSD(C1, S) >> 16U;
1312 out2 = __SMUADX(C1, S);
1313 #else
1314 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1315 R = __SHASX(S, T);
1316
1317 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1318 S = __SHSAX(S, T);
1319
1320 /* Butterfly process for the i0+fftLen/2 sample */
1321 out1 = __SMUADX(S, C1) >> 16U;
1322 out2 = __SMUSD(__QSUB16(0, C1), S);
1323 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1324
1325 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1326 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1327 write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
1328 pSi2 += 2 * n1;
1329
1330 /* Butterfly process for the i0+3fftLen/4 sample */
1331
1332 #ifndef ARM_MATH_BIG_ENDIAN
1333 out1 = __SMUSD(C3, R) >> 16U;
1334 out2 = __SMUADX(C3, R);
1335 #else
1336 out1 = __SMUADX(C3, R) >> 16U;
1337 out2 = __SMUSD(__QSUB16(0, C3), R);
1338 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1339
1340 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1341 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1342 write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
1343 pSi3 += 2 * n1;
1344 }
1345 }
1346 /* Twiddle coefficients index modifier */
1347 twidCoefModifier <<= 2U;
1348 }
1349 /* end of middle stage process */
1350
1351 /* data is in 10.6(q6) format for the 1024 point */
1352 /* data is in 8.8(q8) format for the 256 point */
1353 /* data is in 6.10(q10) format for the 64 point */
1354 /* data is in 4.12(q12) format for the 16 point */
1355
1356 /* Initializations for the last stage */
1357 j = fftLen >> 2;
1358
1359 ptr1 = &pSrc16[0];
1360
1361 /* start of last stage process */
1362
1363 /* Butterfly implementation */
1364 do
1365 {
1366 /* Read xa (real), ya(imag) input */
1367 xaya = read_q15x2_ia (&ptr1);
1368
1369 /* Read xb (real), yb(imag) input */
1370 xbyb = read_q15x2_ia (&ptr1);
1371
1372 /* Read xc (real), yc(imag) input */
1373 xcyc = read_q15x2_ia (&ptr1);
1374
1375 /* Read xd (real), yd(imag) input */
1376 xdyd = read_q15x2_ia (&ptr1);
1377
1378 /* R = packed((ya + yc), (xa + xc)) */
1379 R = __QADD16(xaya, xcyc);
1380
1381 /* T = packed((yb + yd), (xb + xd)) */
1382 T = __QADD16(xbyb, xdyd);
1383
1384 /* pointer updation for writing */
1385 ptr1 = ptr1 - 8U;
1386
1387
1388 /* xa' = xa + xb + xc + xd */
1389 /* ya' = ya + yb + yc + yd */
1390 write_q15x2_ia (&ptr1, __SHADD16(R, T));
1391
1392 /* T = packed((yb + yd), (xb + xd)) */
1393 T = __QADD16(xbyb, xdyd);
1394
1395 /* xc' = (xa-xb+xc-xd) */
1396 /* yc' = (ya-yb+yc-yd) */
1397 write_q15x2_ia (&ptr1, __SHSUB16(R, T));
1398
1399 /* S = packed((ya - yc), (xa - xc)) */
1400 S = __QSUB16(xaya, xcyc);
1401
1402 /* Read yd (real), xd(imag) input */
1403 /* T = packed( (yb - yd), (xb - xd)) */
1404 U = __QSUB16(xbyb, xdyd);
1405
1406 #ifndef ARM_MATH_BIG_ENDIAN
1407 /* xb' = (xa+yb-xc-yd) */
1408 /* yb' = (ya-xb-yc+xd) */
1409 write_q15x2_ia (&ptr1, __SHASX(S, U));
1410
1411 /* xd' = (xa-yb-xc+yd) */
1412 /* yd' = (ya+xb-yc-xd) */
1413 write_q15x2_ia (&ptr1, __SHSAX(S, U));
1414 #else
1415 /* xb' = (xa+yb-xc-yd) */
1416 /* yb' = (ya-xb-yc+xd) */
1417 write_q15x2_ia (&ptr1, __SHSAX(S, U));
1418
1419 /* xd' = (xa-yb-xc+yd) */
1420 /* yd' = (ya+xb-yc-xd) */
1421 write_q15x2_ia (&ptr1, __SHASX(S, U));
1422 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1423
1424 } while (--j);
1425
1426 /* end of last stage process */
1427
1428 /* output is in 11.5(q5) format for the 1024 point */
1429 /* output is in 9.7(q7) format for the 256 point */
1430 /* output is in 7.9(q9) format for the 64 point */
1431 /* output is in 5.11(q11) format for the 16 point */
1432
1433
1434 #else /* arm_radix4_butterfly_inverse_q15 */
1435
1436 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
1437 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
1438 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
1439
1440 /* Total process is divided into three stages */
1441
1442 /* process first stage, middle stages, & last stage */
1443
1444 /* Initializations for the first stage */
1445 n2 = fftLen;
1446 n1 = n2;
1447
1448 /* n2 = fftLen/4 */
1449 n2 >>= 2U;
1450
1451 /* Index for twiddle coefficient */
1452 ic = 0U;
1453
1454 /* Index for input read and output write */
1455 i0 = 0U;
1456
1457 j = n2;
1458
1459 /* Input is in 1.15(q15) format */
1460
1461 /* Start of first stage process */
1462 do
1463 {
1464 /* Butterfly implementation */
1465
1466 /* index calculation for the input as, */
1467 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1468 i1 = i0 + n2;
1469 i2 = i1 + n2;
1470 i3 = i2 + n2;
1471
1472 /* Reading i0, i0+fftLen/2 inputs */
1473 /* input is down scale by 4 to avoid overflow */
1474 /* Read ya (real), xa(imag) input */
1475 T0 = pSrc16[i0 * 2U] >> 2U;
1476 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
1477 /* input is down scale by 4 to avoid overflow */
1478 /* Read yc (real), xc(imag) input */
1479 S0 = pSrc16[i2 * 2U] >> 2U;
1480 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
1481
1482 /* R0 = (ya + yc), R1 = (xa + xc) */
1483 R0 = __SSAT(T0 + S0, 16U);
1484 R1 = __SSAT(T1 + S1, 16U);
1485 /* S0 = (ya - yc), S1 = (xa - xc) */
1486 S0 = __SSAT(T0 - S0, 16U);
1487 S1 = __SSAT(T1 - S1, 16U);
1488
1489 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1490 /* input is down scale by 4 to avoid overflow */
1491 /* Read yb (real), xb(imag) input */
1492 T0 = pSrc16[i1 * 2U] >> 2U;
1493 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
1494 /* Read yd (real), xd(imag) input */
1495 /* input is down scale by 4 to avoid overflow */
1496 U0 = pSrc16[i3 * 2U] >> 2U;
1497 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
1498
1499 /* T0 = (yb + yd), T1 = (xb + xd) */
1500 T0 = __SSAT(T0 + U0, 16U);
1501 T1 = __SSAT(T1 + U1, 16U);
1502
1503 /* writing the butterfly processed i0 sample */
1504 /* xa' = xa + xb + xc + xd */
1505 /* ya' = ya + yb + yc + yd */
1506 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
1507 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
1508
1509 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
1510 R0 = __SSAT(R0 - T0, 16U);
1511 R1 = __SSAT(R1 - T1, 16U);
1512 /* co2 & si2 are read from Coefficient pointer */
1513 Co2 = pCoef16[2U * ic * 2U];
1514 Si2 = pCoef16[(2U * ic * 2U) + 1U];
1515 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1516 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U);
1517 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1518 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U);
1519
1520 /* Reading i0+fftLen/4 */
1521 /* input is down scale by 4 to avoid overflow */
1522 /* T0 = yb, T1 = xb */
1523 T0 = pSrc16[i1 * 2U] >> 2U;
1524 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
1525
1526 /* writing the butterfly processed i0 + fftLen/4 sample */
1527 /* writing output(xc', yc') in little endian format */
1528 pSrc16[i1 * 2U] = out1;
1529 pSrc16[(i1 * 2U) + 1U] = out2;
1530
1531 /* Butterfly calculations */
1532 /* input is down scale by 4 to avoid overflow */
1533 /* U0 = yd, U1 = xd) */
1534 U0 = pSrc16[i3 * 2U] >> 2U;
1535 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
1536
1537 /* T0 = yb-yd, T1 = xb-xd) */
1538 T0 = __SSAT(T0 - U0, 16U);
1539 T1 = __SSAT(T1 - U1, 16U);
1540 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1541 R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
1542 R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
1543 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1544 S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
1545 S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
1546
1547 /* co1 & si1 are read from Coefficient pointer */
1548 Co1 = pCoef16[ic * 2U];
1549 Si1 = pCoef16[(ic * 2U) + 1U];
1550 /* Butterfly process for the i0+fftLen/2 sample */
1551 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1552 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
1553 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1554 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
1555 /* writing output(xb', yb') in little endian format */
1556 pSrc16[i2 * 2U] = out1;
1557 pSrc16[(i2 * 2U) + 1U] = out2;
1558
1559 /* Co3 & si3 are read from Coefficient pointer */
1560 Co3 = pCoef16[3U * ic * 2U];
1561 Si3 = pCoef16[(3U * ic * 2U) + 1U];
1562 /* Butterfly process for the i0+3fftLen/4 sample */
1563 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1564 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
1565 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1566 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
1567 /* writing output(xd', yd') in little endian format */
1568 pSrc16[i3 * 2U] = out1;
1569 pSrc16[(i3 * 2U) + 1U] = out2;
1570
1571 /* Twiddle coefficients index modifier */
1572 ic = ic + twidCoefModifier;
1573
1574 /* Updating input index */
1575 i0 = i0 + 1U;
1576
1577 } while (--j);
1578
1579 /* End of first stage process */
1580
1581 /* data is in 4.11(q11) format */
1582
1583
1584 /* Start of Middle stage process */
1585
1586 /* Twiddle coefficients index modifier */
1587 twidCoefModifier <<= 2U;
1588
1589 /* Calculation of Middle stage */
1590 for (k = fftLen / 4U; k > 4U; k >>= 2U)
1591 {
1592 /* Initializations for the middle stage */
1593 n1 = n2;
1594 n2 >>= 2U;
1595 ic = 0U;
1596
1597 for (j = 0U; j <= (n2 - 1U); j++)
1598 {
1599 /* index calculation for the coefficients */
1600 Co1 = pCoef16[ic * 2U];
1601 Si1 = pCoef16[(ic * 2U) + 1U];
1602 Co2 = pCoef16[2U * ic * 2U];
1603 Si2 = pCoef16[2U * ic * 2U + 1U];
1604 Co3 = pCoef16[3U * ic * 2U];
1605 Si3 = pCoef16[(3U * ic * 2U) + 1U];
1606
1607 /* Twiddle coefficients index modifier */
1608 ic = ic + twidCoefModifier;
1609
1610 /* Butterfly implementation */
1611 for (i0 = j; i0 < fftLen; i0 += n1)
1612 {
1613 /* index calculation for the input as, */
1614 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1615 i1 = i0 + n2;
1616 i2 = i1 + n2;
1617 i3 = i2 + n2;
1618
1619 /* Reading i0, i0+fftLen/2 inputs */
1620 /* Read ya (real), xa(imag) input */
1621 T0 = pSrc16[i0 * 2U];
1622 T1 = pSrc16[(i0 * 2U) + 1U];
1623
1624 /* Read yc (real), xc(imag) input */
1625 S0 = pSrc16[i2 * 2U];
1626 S1 = pSrc16[(i2 * 2U) + 1U];
1627
1628
1629 /* R0 = (ya + yc), R1 = (xa + xc) */
1630 R0 = __SSAT(T0 + S0, 16U);
1631 R1 = __SSAT(T1 + S1, 16U);
1632 /* S0 = (ya - yc), S1 = (xa - xc) */
1633 S0 = __SSAT(T0 - S0, 16U);
1634 S1 = __SSAT(T1 - S1, 16U);
1635
1636 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1637 /* Read yb (real), xb(imag) input */
1638 T0 = pSrc16[i1 * 2U];
1639 T1 = pSrc16[(i1 * 2U) + 1U];
1640
1641 /* Read yd (real), xd(imag) input */
1642 U0 = pSrc16[i3 * 2U];
1643 U1 = pSrc16[(i3 * 2U) + 1U];
1644
1645 /* T0 = (yb + yd), T1 = (xb + xd) */
1646 T0 = __SSAT(T0 + U0, 16U);
1647 T1 = __SSAT(T1 + U1, 16U);
1648
1649 /* writing the butterfly processed i0 sample */
1650 /* xa' = xa + xb + xc + xd */
1651 /* ya' = ya + yb + yc + yd */
1652 pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
1653 pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
1654
1655 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1656 R0 = (R0 >> 1U) - (T0 >> 1U);
1657 R1 = (R1 >> 1U) - (T1 >> 1U);
1658
1659 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1660 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
1661 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1662 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
1663
1664 /* Reading i0+3fftLen/4 */
1665 /* Read yb (real), xb(imag) input */
1666 T0 = pSrc16[i1 * 2U];
1667 T1 = pSrc16[(i1 * 2U) + 1U];
1668
1669 /* writing the butterfly processed i0 + fftLen/4 sample */
1670 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1671 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1672 pSrc16[i1 * 2U] = out1;
1673 pSrc16[(i1 * 2U) + 1U] = out2;
1674
1675 /* Butterfly calculations */
1676 /* Read yd (real), xd(imag) input */
1677 U0 = pSrc16[i3 * 2U];
1678 U1 = pSrc16[(i3 * 2U) + 1U];
1679
1680 /* T0 = yb-yd, T1 = xb-xd) */
1681 T0 = __SSAT(T0 - U0, 16U);
1682 T1 = __SSAT(T1 - U1, 16U);
1683
1684 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1685 R0 = (S0 >> 1U) + (T1 >> 1U);
1686 R1 = (S1 >> 1U) - (T0 >> 1U);
1687
1688 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1689 S0 = (S0 >> 1U) - (T1 >> 1U);
1690 S1 = (S1 >> 1U) + (T0 >> 1U);
1691
1692 /* Butterfly process for the i0+fftLen/2 sample */
1693 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
1694 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
1695 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1696 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1697 pSrc16[i2 * 2U] = out1;
1698 pSrc16[(i2 * 2U) + 1U] = out2;
1699
1700 /* Butterfly process for the i0+3fftLen/4 sample */
1701 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
1702
1703 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
1704 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1705 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1706 pSrc16[i3 * 2U] = out1;
1707 pSrc16[(i3 * 2U) + 1U] = out2;
1708
1709
1710 }
1711 }
1712 /* Twiddle coefficients index modifier */
1713 twidCoefModifier <<= 2U;
1714 }
1715 /* End of Middle stages process */
1716
1717
1718 /* data is in 10.6(q6) format for the 1024 point */
1719 /* data is in 8.8(q8) format for the 256 point */
1720 /* data is in 6.10(q10) format for the 64 point */
1721 /* data is in 4.12(q12) format for the 16 point */
1722
1723 /* start of last stage process */
1724
1725
1726 /* Initializations for the last stage */
1727 n1 = n2;
1728 n2 >>= 2U;
1729
1730 /* Butterfly implementation */
1731 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1732 {
1733 /* index calculation for the input as, */
1734 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1735 i1 = i0 + n2;
1736 i2 = i1 + n2;
1737 i3 = i2 + n2;
1738
1739 /* Reading i0, i0+fftLen/2 inputs */
1740 /* Read ya (real), xa(imag) input */
1741 T0 = pSrc16[i0 * 2U];
1742 T1 = pSrc16[(i0 * 2U) + 1U];
1743 /* Read yc (real), xc(imag) input */
1744 S0 = pSrc16[i2 * 2U];
1745 S1 = pSrc16[(i2 * 2U) + 1U];
1746
1747 /* R0 = (ya + yc), R1 = (xa + xc) */
1748 R0 = __SSAT(T0 + S0, 16U);
1749 R1 = __SSAT(T1 + S1, 16U);
1750 /* S0 = (ya - yc), S1 = (xa - xc) */
1751 S0 = __SSAT(T0 - S0, 16U);
1752 S1 = __SSAT(T1 - S1, 16U);
1753
1754 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1755 /* Read yb (real), xb(imag) input */
1756 T0 = pSrc16[i1 * 2U];
1757 T1 = pSrc16[(i1 * 2U) + 1U];
1758 /* Read yd (real), xd(imag) input */
1759 U0 = pSrc16[i3 * 2U];
1760 U1 = pSrc16[(i3 * 2U) + 1U];
1761
1762 /* T0 = (yb + yd), T1 = (xb + xd) */
1763 T0 = __SSAT(T0 + U0, 16U);
1764 T1 = __SSAT(T1 + U1, 16U);
1765
1766 /* writing the butterfly processed i0 sample */
1767 /* xa' = xa + xb + xc + xd */
1768 /* ya' = ya + yb + yc + yd */
1769 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
1770 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
1771
1772 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1773 R0 = (R0 >> 1U) - (T0 >> 1U);
1774 R1 = (R1 >> 1U) - (T1 >> 1U);
1775
1776 /* Read yb (real), xb(imag) input */
1777 T0 = pSrc16[i1 * 2U];
1778 T1 = pSrc16[(i1 * 2U) + 1U];
1779
1780 /* writing the butterfly processed i0 + fftLen/4 sample */
1781 /* xc' = (xa-xb+xc-xd) */
1782 /* yc' = (ya-yb+yc-yd) */
1783 pSrc16[i1 * 2U] = R0;
1784 pSrc16[(i1 * 2U) + 1U] = R1;
1785
1786 /* Read yd (real), xd(imag) input */
1787 U0 = pSrc16[i3 * 2U];
1788 U1 = pSrc16[(i3 * 2U) + 1U];
1789 /* T0 = (yb - yd), T1 = (xb - xd) */
1790 T0 = __SSAT(T0 - U0, 16U);
1791 T1 = __SSAT(T1 - U1, 16U);
1792
1793 /* writing the butterfly processed i0 + fftLen/2 sample */
1794 /* xb' = (xa-yb-xc+yd) */
1795 /* yb' = (ya+xb-yc-xd) */
1796 pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U);
1797 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
1798
1799
1800 /* writing the butterfly processed i0 + 3fftLen/4 sample */
1801 /* xd' = (xa+yb-xc-yd) */
1802 /* yd' = (ya-xb-yc+xd) */
1803 pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U);
1804 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
1805 }
1806 /* end of last stage process */
1807
1808 /* output is in 11.5(q5) format for the 1024 point */
1809 /* output is in 9.7(q7) format for the 256 point */
1810 /* output is in 7.9(q9) format for the 64 point */
1811 /* output is in 5.11(q11) format for the 16 point */
1812
1813 #endif /* #if defined (ARM_MATH_DSP) */
1814
1815 }
1816