1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q15.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/transform_functions.h"
31
32
33 void arm_radix4_butterfly_q15(
34 q15_t * pSrc16,
35 uint32_t fftLen,
36 const q15_t * pCoef16,
37 uint32_t twidCoefModifier);
38
39 void arm_radix4_butterfly_inverse_q15(
40 q15_t * pSrc16,
41 uint32_t fftLen,
42 const q15_t * pCoef16,
43 uint32_t twidCoefModifier);
44
45 void arm_bitreversal_q15(
46 q15_t * pSrc,
47 uint32_t fftLen,
48 uint16_t bitRevFactor,
49 const uint16_t * pBitRevTab);
50
51 /**
52 @ingroup groupTransforms
53 */
54
55 /**
56 @addtogroup ComplexFFT
57 @{
58 */
59
60
61 /**
62 @brief Processing function for the Q15 CFFT/CIFFT.
63 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
64 @param[in] S points to an instance of the Q15 CFFT/CIFFT structure.
65 @param[in,out] pSrc points to the complex data buffer. Processing occurs in-place.
66 @return none
67
68 @par Input and output formats:
69 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
70 Hence the output format is different for different FFT sizes.
71 The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
72 @par
73 \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
74 \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
75 */
76
arm_cfft_radix4_q15(const arm_cfft_radix4_instance_q15 * S,q15_t * pSrc)77 void arm_cfft_radix4_q15(
78 const arm_cfft_radix4_instance_q15 * S,
79 q15_t * pSrc)
80 {
81 if (S->ifftFlag == 1U)
82 {
83 /* Complex IFFT radix-4 */
84 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
85 }
86 else
87 {
88 /* Complex FFT radix-4 */
89 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
90 }
91
92 if (S->bitReverseFlag == 1U)
93 {
94 /* Bit Reversal */
95 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
96 }
97
98 }
99
100 /**
101 @} end of ComplexFFT group
102 */
103
104 /*
105 * Radix-4 FFT algorithm used is :
106 *
107 * Input real and imaginary data:
108 * x(n) = xa + j * ya
109 * x(n+N/4 ) = xb + j * yb
110 * x(n+N/2 ) = xc + j * yc
111 * x(n+3N 4) = xd + j * yd
112 *
113 *
114 * Output real and imaginary data:
115 * x(4r) = xa'+ j * ya'
116 * x(4r+1) = xb'+ j * yb'
117 * x(4r+2) = xc'+ j * yc'
118 * x(4r+3) = xd'+ j * yd'
119 *
120 *
121 * Twiddle factors for radix-4 FFT:
122 * Wn = co1 + j * (- si1)
123 * W2n = co2 + j * (- si2)
124 * W3n = co3 + j * (- si3)
125
126 * The real and imaginary output values for the radix-4 butterfly are
127 * xa' = xa + xb + xc + xd
128 * ya' = ya + yb + yc + yd
129 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
130 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
131 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
132 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
133 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
134 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
135 *
136 */
137
138 /**
139 @brief Core function for the Q15 CFFT butterfly process.
140 @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
141 @param[in] fftLen length of the FFT
142 @param[in] pCoef16 points to twiddle coefficient buffer
143 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
144 @return none
145 */
146
arm_radix4_butterfly_q15(q15_t * pSrc16,uint32_t fftLen,const q15_t * pCoef16,uint32_t twidCoefModifier)147 void arm_radix4_butterfly_q15(
148 q15_t * pSrc16,
149 uint32_t fftLen,
150 const q15_t * pCoef16,
151 uint32_t twidCoefModifier)
152 {
153
154 #if defined (ARM_MATH_DSP)
155
156 q31_t R, S, T, U;
157 q31_t C1, C2, C3, out1, out2;
158 uint32_t n1, n2, ic, i0, j, k;
159
160 q15_t *ptr1;
161 q15_t *pSi0;
162 q15_t *pSi1;
163 q15_t *pSi2;
164 q15_t *pSi3;
165
166 q31_t xaya, xbyb, xcyc, xdyd;
167
168 /* Total process is divided into three stages */
169
170 /* process first stage, middle stages, & last stage */
171
172 /* Initializations for the first stage */
173 n2 = fftLen;
174 n1 = n2;
175
176 /* n2 = fftLen/4 */
177 n2 >>= 2U;
178
179 /* Index for twiddle coefficient */
180 ic = 0U;
181
182 /* Index for input read and output write */
183 j = n2;
184
185 pSi0 = pSrc16;
186 pSi1 = pSi0 + 2 * n2;
187 pSi2 = pSi1 + 2 * n2;
188 pSi3 = pSi2 + 2 * n2;
189
190 /* Input is in 1.15(q15) format */
191
192 /* start of first stage process */
193 do
194 {
195 /* Butterfly implementation */
196
197 /* Reading i0, i0+fftLen/2 inputs */
198 /* Read ya (real), xa(imag) input */
199 T = read_q15x2 (pSi0);
200 T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
201 T = __SHADD16(T, 0); /* it turns out doing this twice is 2 cycles, the alternative takes 3 cycles */
202 /*
203 in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
204 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
205 */
206
207 /* Read yc (real), xc(imag) input */
208 S = read_q15x2 (pSi2);
209 S = __SHADD16(S, 0);
210 S = __SHADD16(S, 0);
211
212 /* R = packed((ya + yc), (xa + xc) ) */
213 R = __QADD16(T, S);
214
215 /* S = packed((ya - yc), (xa - xc) ) */
216 S = __QSUB16(T, S);
217
218 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
219 /* Read yb (real), xb(imag) input */
220 T = read_q15x2 (pSi1);
221 T = __SHADD16(T, 0);
222 T = __SHADD16(T, 0);
223
224 /* Read yd (real), xd(imag) input */
225 U = read_q15x2 (pSi3);
226 U = __SHADD16(U, 0);
227 U = __SHADD16(U, 0);
228
229 /* T = packed((yb + yd), (xb + xd) ) */
230 T = __QADD16(T, U);
231
232 /* writing the butterfly processed i0 sample */
233 /* xa' = xa + xb + xc + xd */
234 /* ya' = ya + yb + yc + yd */
235 write_q15x2_ia (&pSi0, __SHADD16(R, T));
236
237 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
238 R = __QSUB16(R, T);
239
240 /* co2 & si2 are read from SIMD Coefficient pointer */
241 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
242
243 #ifndef ARM_MATH_BIG_ENDIAN
244 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
245 out1 = __SMUAD(C2, R) >> 16U;
246 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
247 out2 = __SMUSDX(C2, R);
248 #else
249 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
250 out1 = __SMUSDX(R, C2) >> 16U;
251 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
252 out2 = __SMUAD(C2, R);
253 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
254
255 /* Reading i0+fftLen/4 */
256 /* T = packed(yb, xb) */
257 T = read_q15x2 (pSi1);
258 T = __SHADD16(T, 0);
259 T = __SHADD16(T, 0);
260
261 /* writing the butterfly processed i0 + fftLen/4 sample */
262 /* writing output(xc', yc') in little endian format */
263 write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
264
265 /* Butterfly calculations */
266 /* U = packed(yd, xd) */
267 U = read_q15x2 (pSi3);
268 U = __SHADD16(U, 0);
269 U = __SHADD16(U, 0);
270
271 /* T = packed(yb-yd, xb-xd) */
272 T = __QSUB16(T, U);
273
274 #ifndef ARM_MATH_BIG_ENDIAN
275 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
276 R = __QASX(S, T);
277 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
278 S = __QSAX(S, T);
279 #else
280 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
281 R = __QSAX(S, T);
282 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
283 S = __QASX(S, T);
284 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
285
286 /* co1 & si1 are read from SIMD Coefficient pointer */
287 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
288 /* Butterfly process for the i0+fftLen/2 sample */
289
290 #ifndef ARM_MATH_BIG_ENDIAN
291 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
292 out1 = __SMUAD(C1, S) >> 16U;
293 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
294 out2 = __SMUSDX(C1, S);
295 #else
296 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
297 out1 = __SMUSDX(S, C1) >> 16U;
298 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
299 out2 = __SMUAD(C1, S);
300 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
301
302 /* writing output(xb', yb') in little endian format */
303 write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
304
305 /* co3 & si3 are read from SIMD Coefficient pointer */
306 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
307 /* Butterfly process for the i0+3fftLen/4 sample */
308
309 #ifndef ARM_MATH_BIG_ENDIAN
310 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
311 out1 = __SMUAD(C3, R) >> 16U;
312 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
313 out2 = __SMUSDX(C3, R);
314 #else
315 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
316 out1 = __SMUSDX(R, C3) >> 16U;
317 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
318 out2 = __SMUAD(C3, R);
319 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
320
321 /* writing output(xd', yd') in little endian format */
322 write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
323
324 /* Twiddle coefficients index modifier */
325 ic = ic + twidCoefModifier;
326
327 } while (--j);
328 /* data is in 4.11(q11) format */
329
330 /* end of first stage process */
331
332
333 /* start of middle stage process */
334
335 /* Twiddle coefficients index modifier */
336 twidCoefModifier <<= 2U;
337
338 /* Calculation of Middle stage */
339 for (k = fftLen / 4U; k > 4U; k >>= 2U)
340 {
341 /* Initializations for the middle stage */
342 n1 = n2;
343 n2 >>= 2U;
344 ic = 0U;
345
346 for (j = 0U; j <= (n2 - 1U); j++)
347 {
348 /* index calculation for the coefficients */
349 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
350 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
351 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
352
353 /* Twiddle coefficients index modifier */
354 ic = ic + twidCoefModifier;
355
356 pSi0 = pSrc16 + 2 * j;
357 pSi1 = pSi0 + 2 * n2;
358 pSi2 = pSi1 + 2 * n2;
359 pSi3 = pSi2 + 2 * n2;
360
361 /* Butterfly implementation */
362 for (i0 = j; i0 < fftLen; i0 += n1)
363 {
364 /* Reading i0, i0+fftLen/2 inputs */
365 /* Read ya (real), xa(imag) input */
366 T = read_q15x2 (pSi0);
367
368 /* Read yc (real), xc(imag) input */
369 S = read_q15x2 (pSi2);
370
371 /* R = packed( (ya + yc), (xa + xc)) */
372 R = __QADD16(T, S);
373
374 /* S = packed((ya - yc), (xa - xc)) */
375 S = __QSUB16(T, S);
376
377 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
378 /* Read yb (real), xb(imag) input */
379 T = read_q15x2 (pSi1);
380
381 /* Read yd (real), xd(imag) input */
382 U = read_q15x2 (pSi3);
383
384 /* T = packed( (yb + yd), (xb + xd)) */
385 T = __QADD16(T, U);
386
387 /* writing the butterfly processed i0 sample */
388
389 /* xa' = xa + xb + xc + xd */
390 /* ya' = ya + yb + yc + yd */
391 out1 = __SHADD16(R, T);
392 out1 = __SHADD16(out1, 0);
393 write_q15x2 (pSi0, out1);
394 pSi0 += 2 * n1;
395
396 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
397 R = __SHSUB16(R, T);
398
399 #ifndef ARM_MATH_BIG_ENDIAN
400 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
401 out1 = __SMUAD(C2, R) >> 16U;
402
403 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
404 out2 = __SMUSDX(C2, R);
405 #else
406 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
407 out1 = __SMUSDX(R, C2) >> 16U;
408
409 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
410 out2 = __SMUAD(C2, R);
411 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
412
413 /* Reading i0+3fftLen/4 */
414 /* Read yb (real), xb(imag) input */
415 T = read_q15x2 (pSi1);
416
417 /* writing the butterfly processed i0 + fftLen/4 sample */
418 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
419 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
420 write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
421 pSi1 += 2 * n1;
422
423 /* Butterfly calculations */
424
425 /* Read yd (real), xd(imag) input */
426 U = read_q15x2 (pSi3);
427
428 /* T = packed(yb-yd, xb-xd) */
429 T = __QSUB16(T, U);
430
431 #ifndef ARM_MATH_BIG_ENDIAN
432 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
433 R = __SHASX(S, T);
434
435 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
436 S = __SHSAX(S, T);
437
438
439 /* Butterfly process for the i0+fftLen/2 sample */
440 out1 = __SMUAD(C1, S) >> 16U;
441 out2 = __SMUSDX(C1, S);
442 #else
443 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
444 R = __SHSAX(S, T);
445
446 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
447 S = __SHASX(S, T);
448
449
450 /* Butterfly process for the i0+fftLen/2 sample */
451 out1 = __SMUSDX(S, C1) >> 16U;
452 out2 = __SMUAD(C1, S);
453 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
454
455 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
456 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
457 write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
458 pSi2 += 2 * n1;
459
460 /* Butterfly process for the i0+3fftLen/4 sample */
461
462 #ifndef ARM_MATH_BIG_ENDIAN
463 out1 = __SMUAD(C3, R) >> 16U;
464 out2 = __SMUSDX(C3, R);
465 #else
466 out1 = __SMUSDX(R, C3) >> 16U;
467 out2 = __SMUAD(C3, R);
468 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
469
470 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
471 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
472 write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
473 pSi3 += 2 * n1;
474 }
475 }
476 /* Twiddle coefficients index modifier */
477 twidCoefModifier <<= 2U;
478 }
479 /* end of middle stage process */
480
481
482 /* data is in 10.6(q6) format for the 1024 point */
483 /* data is in 8.8(q8) format for the 256 point */
484 /* data is in 6.10(q10) format for the 64 point */
485 /* data is in 4.12(q12) format for the 16 point */
486
487 /* Initializations for the last stage */
488 j = fftLen >> 2;
489
490 ptr1 = &pSrc16[0];
491
492 /* start of last stage process */
493
494 /* Butterfly implementation */
495 do
496 {
497 /* Read xa (real), ya(imag) input */
498 xaya = read_q15x2_ia ((q15_t **) &ptr1);
499
500 /* Read xb (real), yb(imag) input */
501 xbyb = read_q15x2_ia ((q15_t **) &ptr1);
502
503 /* Read xc (real), yc(imag) input */
504 xcyc = read_q15x2_ia ((q15_t **) &ptr1);
505
506 /* Read xd (real), yd(imag) input */
507 xdyd = read_q15x2_ia ((q15_t **) &ptr1);
508
509 /* R = packed((ya + yc), (xa + xc)) */
510 R = __QADD16(xaya, xcyc);
511
512 /* T = packed((yb + yd), (xb + xd)) */
513 T = __QADD16(xbyb, xdyd);
514
515 /* pointer updation for writing */
516 ptr1 = ptr1 - 8U;
517
518
519 /* xa' = xa + xb + xc + xd */
520 /* ya' = ya + yb + yc + yd */
521 write_q15x2_ia (&ptr1, __SHADD16(R, T));
522
523 /* T = packed((yb + yd), (xb + xd)) */
524 T = __QADD16(xbyb, xdyd);
525
526 /* xc' = (xa-xb+xc-xd) */
527 /* yc' = (ya-yb+yc-yd) */
528 write_q15x2_ia (&ptr1, __SHSUB16(R, T));
529
530 /* S = packed((ya - yc), (xa - xc)) */
531 S = __QSUB16(xaya, xcyc);
532
533 /* Read yd (real), xd(imag) input */
534 /* T = packed( (yb - yd), (xb - xd)) */
535 U = __QSUB16(xbyb, xdyd);
536
537 #ifndef ARM_MATH_BIG_ENDIAN
538 /* xb' = (xa+yb-xc-yd) */
539 /* yb' = (ya-xb-yc+xd) */
540 write_q15x2_ia (&ptr1, __SHSAX(S, U));
541
542 /* xd' = (xa-yb-xc+yd) */
543 /* yd' = (ya+xb-yc-xd) */
544 write_q15x2_ia (&ptr1, __SHASX(S, U));
545 #else
546 /* xb' = (xa+yb-xc-yd) */
547 /* yb' = (ya-xb-yc+xd) */
548 write_q15x2_ia (&ptr1, __SHASX(S, U));
549
550 /* xd' = (xa-yb-xc+yd) */
551 /* yd' = (ya+xb-yc-xd) */
552 write_q15x2_ia (&ptr1, __SHSAX(S, U));
553 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
554
555 } while (--j);
556
557 /* end of last stage process */
558
559 /* output is in 11.5(q5) format for the 1024 point */
560 /* output is in 9.7(q7) format for the 256 point */
561 /* output is in 7.9(q9) format for the 64 point */
562 /* output is in 5.11(q11) format for the 16 point */
563
564
565 #else /* #if defined (ARM_MATH_DSP) */
566
567 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
568 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
569 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
570
571 /* Total process is divided into three stages */
572
573 /* process first stage, middle stages, & last stage */
574
575 /* Initializations for the first stage */
576 n2 = fftLen;
577 n1 = n2;
578
579 /* n2 = fftLen/4 */
580 n2 >>= 2U;
581
582 /* Index for twiddle coefficient */
583 ic = 0U;
584
585 /* Index for input read and output write */
586 i0 = 0U;
587 j = n2;
588
589 /* Input is in 1.15(q15) format */
590
591 /* start of first stage process */
592 do
593 {
594 /* Butterfly implementation */
595
596 /* index calculation for the input as, */
597 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
598 i1 = i0 + n2;
599 i2 = i1 + n2;
600 i3 = i2 + n2;
601
602 /* Reading i0, i0+fftLen/2 inputs */
603
604 /* input is down scale by 4 to avoid overflow */
605 /* Read ya (real), xa(imag) input */
606 T0 = pSrc16[i0 * 2U] >> 2U;
607 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
608
609 /* input is down scale by 4 to avoid overflow */
610 /* Read yc (real), xc(imag) input */
611 S0 = pSrc16[i2 * 2U] >> 2U;
612 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
613
614 /* R0 = (ya + yc) */
615 R0 = __SSAT(T0 + S0, 16U);
616 /* R1 = (xa + xc) */
617 R1 = __SSAT(T1 + S1, 16U);
618
619 /* S0 = (ya - yc) */
620 S0 = __SSAT(T0 - S0, 16);
621 /* S1 = (xa - xc) */
622 S1 = __SSAT(T1 - S1, 16);
623
624 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
625 /* input is down scale by 4 to avoid overflow */
626 /* Read yb (real), xb(imag) input */
627 T0 = pSrc16[i1 * 2U] >> 2U;
628 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
629
630 /* input is down scale by 4 to avoid overflow */
631 /* Read yd (real), xd(imag) input */
632 U0 = pSrc16[i3 * 2U] >> 2U;
633 U1 = pSrc16[(i3 * 2U) + 1] >> 2U;
634
635 /* T0 = (yb + yd) */
636 T0 = __SSAT(T0 + U0, 16U);
637 /* T1 = (xb + xd) */
638 T1 = __SSAT(T1 + U1, 16U);
639
640 /* writing the butterfly processed i0 sample */
641 /* ya' = ya + yb + yc + yd */
642 /* xa' = xa + xb + xc + xd */
643 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
644 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
645
646 /* R0 = (ya + yc) - (yb + yd) */
647 /* R1 = (xa + xc) - (xb + xd) */
648 R0 = __SSAT(R0 - T0, 16U);
649 R1 = __SSAT(R1 - T1, 16U);
650
651 /* co2 & si2 are read from Coefficient pointer */
652 Co2 = pCoef16[2U * ic * 2U];
653 Si2 = pCoef16[(2U * ic * 2U) + 1];
654
655 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
656 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
657 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
658 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
659
660 /* Reading i0+fftLen/4 */
661 /* input is down scale by 4 to avoid overflow */
662 /* T0 = yb, T1 = xb */
663 T0 = pSrc16[i1 * 2U] >> 2;
664 T1 = pSrc16[(i1 * 2U) + 1] >> 2;
665
666 /* writing the butterfly processed i0 + fftLen/4 sample */
667 /* writing output(xc', yc') in little endian format */
668 pSrc16[i1 * 2U] = out1;
669 pSrc16[(i1 * 2U) + 1] = out2;
670
671 /* Butterfly calculations */
672 /* input is down scale by 4 to avoid overflow */
673 /* U0 = yd, U1 = xd */
674 U0 = pSrc16[i3 * 2U] >> 2;
675 U1 = pSrc16[(i3 * 2U) + 1] >> 2;
676 /* T0 = yb-yd */
677 T0 = __SSAT(T0 - U0, 16);
678 /* T1 = xb-xd */
679 T1 = __SSAT(T1 - U1, 16);
680
681 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
682 R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
683 R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
684
685 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
686 S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U);
687 S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U);
688
689 /* co1 & si1 are read from Coefficient pointer */
690 Co1 = pCoef16[ic * 2U];
691 Si1 = pCoef16[(ic * 2U) + 1];
692 /* Butterfly process for the i0+fftLen/2 sample */
693 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
694 out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
695 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
696 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
697
698 /* writing output(xb', yb') in little endian format */
699 pSrc16[i2 * 2U] = out1;
700 pSrc16[(i2 * 2U) + 1] = out2;
701
702 /* Co3 & si3 are read from Coefficient pointer */
703 Co3 = pCoef16[3U * (ic * 2U)];
704 Si3 = pCoef16[(3U * (ic * 2U)) + 1];
705 /* Butterfly process for the i0+3fftLen/4 sample */
706 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
707 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
708 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
709 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
710 /* writing output(xd', yd') in little endian format */
711 pSrc16[i3 * 2U] = out1;
712 pSrc16[(i3 * 2U) + 1] = out2;
713
714 /* Twiddle coefficients index modifier */
715 ic = ic + twidCoefModifier;
716
717 /* Updating input index */
718 i0 = i0 + 1U;
719
720 } while (--j);
721 /* data is in 4.11(q11) format */
722
723 /* end of first stage process */
724
725
726 /* start of middle stage process */
727
728 /* Twiddle coefficients index modifier */
729 twidCoefModifier <<= 2U;
730
731 /* Calculation of Middle stage */
732 for (k = fftLen / 4U; k > 4U; k >>= 2U)
733 {
734 /* Initializations for the middle stage */
735 n1 = n2;
736 n2 >>= 2U;
737 ic = 0U;
738
739 for (j = 0U; j <= (n2 - 1U); j++)
740 {
741 /* index calculation for the coefficients */
742 Co1 = pCoef16[ic * 2U];
743 Si1 = pCoef16[(ic * 2U) + 1U];
744 Co2 = pCoef16[2U * (ic * 2U)];
745 Si2 = pCoef16[(2U * (ic * 2U)) + 1U];
746 Co3 = pCoef16[3U * (ic * 2U)];
747 Si3 = pCoef16[(3U * (ic * 2U)) + 1U];
748
749 /* Twiddle coefficients index modifier */
750 ic = ic + twidCoefModifier;
751
752 /* Butterfly implementation */
753 for (i0 = j; i0 < fftLen; i0 += n1)
754 {
755 /* index calculation for the input as, */
756 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
757 i1 = i0 + n2;
758 i2 = i1 + n2;
759 i3 = i2 + n2;
760
761 /* Reading i0, i0+fftLen/2 inputs */
762 /* Read ya (real), xa(imag) input */
763 T0 = pSrc16[i0 * 2U];
764 T1 = pSrc16[(i0 * 2U) + 1U];
765
766 /* Read yc (real), xc(imag) input */
767 S0 = pSrc16[i2 * 2U];
768 S1 = pSrc16[(i2 * 2U) + 1U];
769
770 /* R0 = (ya + yc), R1 = (xa + xc) */
771 R0 = __SSAT(T0 + S0, 16);
772 R1 = __SSAT(T1 + S1, 16);
773
774 /* S0 = (ya - yc), S1 =(xa - xc) */
775 S0 = __SSAT(T0 - S0, 16);
776 S1 = __SSAT(T1 - S1, 16);
777
778 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
779 /* Read yb (real), xb(imag) input */
780 T0 = pSrc16[i1 * 2U];
781 T1 = pSrc16[(i1 * 2U) + 1U];
782
783 /* Read yd (real), xd(imag) input */
784 U0 = pSrc16[i3 * 2U];
785 U1 = pSrc16[(i3 * 2U) + 1U];
786
787
788 /* T0 = (yb + yd), T1 = (xb + xd) */
789 T0 = __SSAT(T0 + U0, 16);
790 T1 = __SSAT(T1 + U1, 16);
791
792 /* writing the butterfly processed i0 sample */
793
794 /* xa' = xa + xb + xc + xd */
795 /* ya' = ya + yb + yc + yd */
796 out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
797 out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
798
799 pSrc16[i0 * 2U] = out1;
800 pSrc16[(2U * i0) + 1U] = out2;
801
802 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
803 R0 = (R0 >> 1U) - (T0 >> 1U);
804 R1 = (R1 >> 1U) - (T1 >> 1U);
805
806 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
807 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
808
809 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
810 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
811
812 /* Reading i0+3fftLen/4 */
813 /* Read yb (real), xb(imag) input */
814 T0 = pSrc16[i1 * 2U];
815 T1 = pSrc16[(i1 * 2U) + 1U];
816
817 /* writing the butterfly processed i0 + fftLen/4 sample */
818 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
819 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
820 pSrc16[i1 * 2U] = out1;
821 pSrc16[(i1 * 2U) + 1U] = out2;
822
823 /* Butterfly calculations */
824
825 /* Read yd (real), xd(imag) input */
826 U0 = pSrc16[i3 * 2U];
827 U1 = pSrc16[(i3 * 2U) + 1U];
828
829 /* T0 = yb-yd, T1 = xb-xd */
830 T0 = __SSAT(T0 - U0, 16);
831 T1 = __SSAT(T1 - U1, 16);
832
833 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
834 R0 = (S0 >> 1U) - (T1 >> 1U);
835 R1 = (S1 >> 1U) + (T0 >> 1U);
836
837 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
838 S0 = (S0 >> 1U) + (T1 >> 1U);
839 S1 = (S1 >> 1U) - (T0 >> 1U);
840
841 /* Butterfly process for the i0+fftLen/2 sample */
842 out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U);
843
844 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U);
845
846 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
847 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
848 pSrc16[i2 * 2U] = out1;
849 pSrc16[(i2 * 2U) + 1U] = out2;
850
851 /* Butterfly process for the i0+3fftLen/4 sample */
852 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
853
854 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
855 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
856 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
857 pSrc16[i3 * 2U] = out1;
858 pSrc16[(i3 * 2U) + 1U] = out2;
859 }
860 }
861 /* Twiddle coefficients index modifier */
862 twidCoefModifier <<= 2U;
863 }
864 /* end of middle stage process */
865
866
867 /* data is in 10.6(q6) format for the 1024 point */
868 /* data is in 8.8(q8) format for the 256 point */
869 /* data is in 6.10(q10) format for the 64 point */
870 /* data is in 4.12(q12) format for the 16 point */
871
872 /* Initializations for the last stage */
873 n1 = n2;
874 n2 >>= 2U;
875
876 /* start of last stage process */
877
878 /* Butterfly implementation */
879 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
880 {
881 /* index calculation for the input as, */
882 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
883 i1 = i0 + n2;
884 i2 = i1 + n2;
885 i3 = i2 + n2;
886
887 /* Reading i0, i0+fftLen/2 inputs */
888 /* Read ya (real), xa(imag) input */
889 T0 = pSrc16[i0 * 2U];
890 T1 = pSrc16[(i0 * 2U) + 1U];
891
892 /* Read yc (real), xc(imag) input */
893 S0 = pSrc16[i2 * 2U];
894 S1 = pSrc16[(i2 * 2U) + 1U];
895
896 /* R0 = (ya + yc), R1 = (xa + xc) */
897 R0 = __SSAT(T0 + S0, 16U);
898 R1 = __SSAT(T1 + S1, 16U);
899
900 /* S0 = (ya - yc), S1 = (xa - xc) */
901 S0 = __SSAT(T0 - S0, 16U);
902 S1 = __SSAT(T1 - S1, 16U);
903
904 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
905 /* Read yb (real), xb(imag) input */
906 T0 = pSrc16[i1 * 2U];
907 T1 = pSrc16[(i1 * 2U) + 1U];
908 /* Read yd (real), xd(imag) input */
909 U0 = pSrc16[i3 * 2U];
910 U1 = pSrc16[(i3 * 2U) + 1U];
911
912 /* T0 = (yb + yd), T1 = (xb + xd)) */
913 T0 = __SSAT(T0 + U0, 16U);
914 T1 = __SSAT(T1 + U1, 16U);
915
916 /* writing the butterfly processed i0 sample */
917 /* xa' = xa + xb + xc + xd */
918 /* ya' = ya + yb + yc + yd */
919 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
920 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
921
922 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
923 R0 = (R0 >> 1U) - (T0 >> 1U);
924 R1 = (R1 >> 1U) - (T1 >> 1U);
925 /* Read yb (real), xb(imag) input */
926 T0 = pSrc16[i1 * 2U];
927 T1 = pSrc16[(i1 * 2U) + 1U];
928
929 /* writing the butterfly processed i0 + fftLen/4 sample */
930 /* xc' = (xa-xb+xc-xd) */
931 /* yc' = (ya-yb+yc-yd) */
932 pSrc16[i1 * 2U] = R0;
933 pSrc16[(i1 * 2U) + 1U] = R1;
934
935 /* Read yd (real), xd(imag) input */
936 U0 = pSrc16[i3 * 2U];
937 U1 = pSrc16[(i3 * 2U) + 1U];
938 /* T0 = (yb - yd), T1 = (xb - xd) */
939 T0 = __SSAT(T0 - U0, 16U);
940 T1 = __SSAT(T1 - U1, 16U);
941
942 /* writing the butterfly processed i0 + fftLen/2 sample */
943 /* xb' = (xa+yb-xc-yd) */
944 /* yb' = (ya-xb-yc+xd) */
945 pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U);
946 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
947
948 /* writing the butterfly processed i0 + 3fftLen/4 sample */
949 /* xd' = (xa-yb-xc+yd) */
950 /* yd' = (ya+xb-yc-xd) */
951 pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U);
952 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
953
954 }
955
956 /* end of last stage process */
957
958 /* output is in 11.5(q5) format for the 1024 point */
959 /* output is in 9.7(q7) format for the 256 point */
960 /* output is in 7.9(q9) format for the 64 point */
961 /* output is in 5.11(q11) format for the 16 point */
962
963 #endif /* #if defined (ARM_MATH_DSP) */
964
965 }
966
967
968 /**
969 @brief Core function for the Q15 CIFFT butterfly process.
970 @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
971 @param[in] fftLen length of the FFT
972 @param[in] pCoef16 points to twiddle coefficient buffer
973 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
974 @return none
975 */
976
977 /*
978 * Radix-4 IFFT algorithm used is :
979 *
980 * CIFFT uses same twiddle coefficients as CFFT function
981 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
982 *
983 *
984 * IFFT is implemented with following changes in equations from FFT
985 *
986 * Input real and imaginary data:
987 * x(n) = xa + j * ya
988 * x(n+N/4 ) = xb + j * yb
989 * x(n+N/2 ) = xc + j * yc
990 * x(n+3N 4) = xd + j * yd
991 *
992 *
993 * Output real and imaginary data:
994 * x(4r) = xa'+ j * ya'
995 * x(4r+1) = xb'+ j * yb'
996 * x(4r+2) = xc'+ j * yc'
997 * x(4r+3) = xd'+ j * yd'
998 *
999 *
1000 * Twiddle factors for radix-4 IFFT:
1001 * Wn = co1 + j * (si1)
1002 * W2n = co2 + j * (si2)
1003 * W3n = co3 + j * (si3)
1004
1005 * The real and imaginary output values for the radix-4 butterfly are
1006 * xa' = xa + xb + xc + xd
1007 * ya' = ya + yb + yc + yd
1008 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
1009 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
1010 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
1011 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
1012 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
1013 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
1014 *
1015 */
1016
arm_radix4_butterfly_inverse_q15(q15_t * pSrc16,uint32_t fftLen,const q15_t * pCoef16,uint32_t twidCoefModifier)1017 void arm_radix4_butterfly_inverse_q15(
1018 q15_t * pSrc16,
1019 uint32_t fftLen,
1020 const q15_t * pCoef16,
1021 uint32_t twidCoefModifier)
1022 {
1023
1024 #if defined (ARM_MATH_DSP)
1025
1026 q31_t R, S, T, U;
1027 q31_t C1, C2, C3, out1, out2;
1028 uint32_t n1, n2, ic, i0, j, k;
1029
1030 q15_t *ptr1;
1031 q15_t *pSi0;
1032 q15_t *pSi1;
1033 q15_t *pSi2;
1034 q15_t *pSi3;
1035
1036 q31_t xaya, xbyb, xcyc, xdyd;
1037
1038 /* Total process is divided into three stages */
1039
1040 /* process first stage, middle stages, & last stage */
1041
1042 /* Initializations for the first stage */
1043 n2 = fftLen;
1044 n1 = n2;
1045
1046 /* n2 = fftLen/4 */
1047 n2 >>= 2U;
1048
1049 /* Index for twiddle coefficient */
1050 ic = 0U;
1051
1052 /* Index for input read and output write */
1053 j = n2;
1054
1055 pSi0 = pSrc16;
1056 pSi1 = pSi0 + 2 * n2;
1057 pSi2 = pSi1 + 2 * n2;
1058 pSi3 = pSi2 + 2 * n2;
1059
1060 /* Input is in 1.15(q15) format */
1061
1062 /* start of first stage process */
1063 do
1064 {
1065 /* Butterfly implementation */
1066
1067 /* Reading i0, i0+fftLen/2 inputs */
1068 /* Read ya (real), xa(imag) input */
1069 T = read_q15x2 (pSi0);
1070 T = __SHADD16(T, 0);
1071 T = __SHADD16(T, 0);
1072
1073 /* Read yc (real), xc(imag) input */
1074 S = read_q15x2 (pSi2);
1075 S = __SHADD16(S, 0);
1076 S = __SHADD16(S, 0);
1077
1078 /* R = packed((ya + yc), (xa + xc) ) */
1079 R = __QADD16(T, S);
1080
1081 /* S = packed((ya - yc), (xa - xc) ) */
1082 S = __QSUB16(T, S);
1083
1084 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1085 /* Read yb (real), xb(imag) input */
1086 T = read_q15x2 (pSi1);
1087 T = __SHADD16(T, 0);
1088 T = __SHADD16(T, 0);
1089
1090 /* Read yd (real), xd(imag) input */
1091 U = read_q15x2 (pSi3);
1092 U = __SHADD16(U, 0);
1093 U = __SHADD16(U, 0);
1094
1095 /* T = packed((yb + yd), (xb + xd) ) */
1096 T = __QADD16(T, U);
1097
1098 /* writing the butterfly processed i0 sample */
1099 /* xa' = xa + xb + xc + xd */
1100 /* ya' = ya + yb + yc + yd */
1101 write_q15x2_ia (&pSi0, __SHADD16(R, T));
1102
1103 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
1104 R = __QSUB16(R, T);
1105
1106 /* co2 & si2 are read from SIMD Coefficient pointer */
1107 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
1108
1109 #ifndef ARM_MATH_BIG_ENDIAN
1110 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1111 out1 = __SMUSD(C2, R) >> 16U;
1112 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1113 out2 = __SMUADX(C2, R);
1114 #else
1115 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1116 out1 = __SMUADX(C2, R) >> 16U;
1117 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1118 out2 = __SMUSD(__QSUB16(0, C2), R);
1119 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1120
1121 /* Reading i0+fftLen/4 */
1122 /* T = packed(yb, xb) */
1123 T = read_q15x2 (pSi1);
1124 T = __SHADD16(T, 0);
1125 T = __SHADD16(T, 0);
1126
1127 /* writing the butterfly processed i0 + fftLen/4 sample */
1128 /* writing output(xc', yc') in little endian format */
1129 write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
1130
1131 /* Butterfly calculations */
1132 /* U = packed(yd, xd) */
1133 U = read_q15x2 (pSi3);
1134 U = __SHADD16(U, 0);
1135 U = __SHADD16(U, 0);
1136
1137 /* T = packed(yb-yd, xb-xd) */
1138 T = __QSUB16(T, U);
1139
1140 #ifndef ARM_MATH_BIG_ENDIAN
1141 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1142 R = __QSAX(S, T);
1143 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1144 S = __QASX(S, T);
1145 #else
1146 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1147 R = __QASX(S, T);
1148 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1149 S = __QSAX(S, T);
1150 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1151
1152 /* co1 & si1 are read from SIMD Coefficient pointer */
1153 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
1154 /* Butterfly process for the i0+fftLen/2 sample */
1155
1156 #ifndef ARM_MATH_BIG_ENDIAN
1157 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1158 out1 = __SMUSD(C1, S) >> 16U;
1159 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1160 out2 = __SMUADX(C1, S);
1161 #else
1162 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1163 out1 = __SMUADX(C1, S) >> 16U;
1164 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1165 out2 = __SMUSD(__QSUB16(0, C1), S);
1166 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1167
1168 /* writing output(xb', yb') in little endian format */
1169 write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
1170
1171 /* co3 & si3 are read from SIMD Coefficient pointer */
1172 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
1173 /* Butterfly process for the i0+3fftLen/4 sample */
1174
1175 #ifndef ARM_MATH_BIG_ENDIAN
1176 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1177 out1 = __SMUSD(C3, R) >> 16U;
1178 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1179 out2 = __SMUADX(C3, R);
1180 #else
1181 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1182 out1 = __SMUADX(C3, R) >> 16U;
1183 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1184 out2 = __SMUSD(__QSUB16(0, C3), R);
1185 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1186
1187 /* writing output(xd', yd') in little endian format */
1188 write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
1189
1190 /* Twiddle coefficients index modifier */
1191 ic = ic + twidCoefModifier;
1192
1193 } while (--j);
1194 /* data is in 4.11(q11) format */
1195
1196 /* end of first stage process */
1197
1198
1199 /* start of middle stage process */
1200
1201 /* Twiddle coefficients index modifier */
1202 twidCoefModifier <<= 2U;
1203
1204 /* Calculation of Middle stage */
1205 for (k = fftLen / 4U; k > 4U; k >>= 2U)
1206 {
1207 /* Initializations for the middle stage */
1208 n1 = n2;
1209 n2 >>= 2U;
1210 ic = 0U;
1211
1212 for (j = 0U; j <= (n2 - 1U); j++)
1213 {
1214 /* index calculation for the coefficients */
1215 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
1216 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
1217 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
1218
1219 /* Twiddle coefficients index modifier */
1220 ic = ic + twidCoefModifier;
1221
1222 pSi0 = pSrc16 + 2 * j;
1223 pSi1 = pSi0 + 2 * n2;
1224 pSi2 = pSi1 + 2 * n2;
1225 pSi3 = pSi2 + 2 * n2;
1226
1227 /* Butterfly implementation */
1228 for (i0 = j; i0 < fftLen; i0 += n1)
1229 {
1230 /* Reading i0, i0+fftLen/2 inputs */
1231 /* Read ya (real), xa(imag) input */
1232 T = read_q15x2 (pSi0);
1233
1234 /* Read yc (real), xc(imag) input */
1235 S = read_q15x2 (pSi2);
1236
1237 /* R = packed( (ya + yc), (xa + xc)) */
1238 R = __QADD16(T, S);
1239
1240 /* S = packed((ya - yc), (xa - xc)) */
1241 S = __QSUB16(T, S);
1242
1243 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1244 /* Read yb (real), xb(imag) input */
1245 T = read_q15x2 (pSi1);
1246
1247 /* Read yd (real), xd(imag) input */
1248 U = read_q15x2 (pSi3);
1249
1250 /* T = packed( (yb + yd), (xb + xd)) */
1251 T = __QADD16(T, U);
1252
1253 /* writing the butterfly processed i0 sample */
1254
1255 /* xa' = xa + xb + xc + xd */
1256 /* ya' = ya + yb + yc + yd */
1257 out1 = __SHADD16(R, T);
1258 out1 = __SHADD16(out1, 0);
1259 write_q15x2 (pSi0, out1);
1260 pSi0 += 2 * n1;
1261
1262 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1263 R = __SHSUB16(R, T);
1264
1265 #ifndef ARM_MATH_BIG_ENDIAN
1266 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1267 out1 = __SMUSD(C2, R) >> 16U;
1268
1269 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1270 out2 = __SMUADX(C2, R);
1271 #else
1272 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1273 out1 = __SMUADX(R, C2) >> 16U;
1274
1275 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1276 out2 = __SMUSD(__QSUB16(0, C2), R);
1277 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1278
1279 /* Reading i0+3fftLen/4 */
1280 /* Read yb (real), xb(imag) input */
1281 T = read_q15x2 (pSi1);
1282
1283 /* writing the butterfly processed i0 + fftLen/4 sample */
1284 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1285 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1286 write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
1287 pSi1 += 2 * n1;
1288
1289 /* Butterfly calculations */
1290
1291 /* Read yd (real), xd(imag) input */
1292 U = read_q15x2 (pSi3);
1293
1294 /* T = packed(yb-yd, xb-xd) */
1295 T = __QSUB16(T, U);
1296
1297 #ifndef ARM_MATH_BIG_ENDIAN
1298 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1299 R = __SHSAX(S, T);
1300
1301 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1302 S = __SHASX(S, T);
1303
1304 /* Butterfly process for the i0+fftLen/2 sample */
1305 out1 = __SMUSD(C1, S) >> 16U;
1306 out2 = __SMUADX(C1, S);
1307 #else
1308 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1309 R = __SHASX(S, T);
1310
1311 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1312 S = __SHSAX(S, T);
1313
1314 /* Butterfly process for the i0+fftLen/2 sample */
1315 out1 = __SMUADX(S, C1) >> 16U;
1316 out2 = __SMUSD(__QSUB16(0, C1), S);
1317 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1318
1319 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1320 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1321 write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
1322 pSi2 += 2 * n1;
1323
1324 /* Butterfly process for the i0+3fftLen/4 sample */
1325
1326 #ifndef ARM_MATH_BIG_ENDIAN
1327 out1 = __SMUSD(C3, R) >> 16U;
1328 out2 = __SMUADX(C3, R);
1329 #else
1330 out1 = __SMUADX(C3, R) >> 16U;
1331 out2 = __SMUSD(__QSUB16(0, C3), R);
1332 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1333
1334 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1335 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1336 write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
1337 pSi3 += 2 * n1;
1338 }
1339 }
1340 /* Twiddle coefficients index modifier */
1341 twidCoefModifier <<= 2U;
1342 }
1343 /* end of middle stage process */
1344
1345 /* data is in 10.6(q6) format for the 1024 point */
1346 /* data is in 8.8(q8) format for the 256 point */
1347 /* data is in 6.10(q10) format for the 64 point */
1348 /* data is in 4.12(q12) format for the 16 point */
1349
1350 /* Initializations for the last stage */
1351 j = fftLen >> 2;
1352
1353 ptr1 = &pSrc16[0];
1354
1355 /* start of last stage process */
1356
1357 /* Butterfly implementation */
1358 do
1359 {
1360 /* Read xa (real), ya(imag) input */
1361 xaya = read_q15x2_ia ((q15_t **) &ptr1);
1362
1363 /* Read xb (real), yb(imag) input */
1364 xbyb = read_q15x2_ia ((q15_t **) &ptr1);
1365
1366 /* Read xc (real), yc(imag) input */
1367 xcyc = read_q15x2_ia ((q15_t **) &ptr1);
1368
1369 /* Read xd (real), yd(imag) input */
1370 xdyd = read_q15x2_ia ((q15_t **) &ptr1);
1371
1372 /* R = packed((ya + yc), (xa + xc)) */
1373 R = __QADD16(xaya, xcyc);
1374
1375 /* T = packed((yb + yd), (xb + xd)) */
1376 T = __QADD16(xbyb, xdyd);
1377
1378 /* pointer updation for writing */
1379 ptr1 = ptr1 - 8U;
1380
1381
1382 /* xa' = xa + xb + xc + xd */
1383 /* ya' = ya + yb + yc + yd */
1384 write_q15x2_ia (&ptr1, __SHADD16(R, T));
1385
1386 /* T = packed((yb + yd), (xb + xd)) */
1387 T = __QADD16(xbyb, xdyd);
1388
1389 /* xc' = (xa-xb+xc-xd) */
1390 /* yc' = (ya-yb+yc-yd) */
1391 write_q15x2_ia (&ptr1, __SHSUB16(R, T));
1392
1393 /* S = packed((ya - yc), (xa - xc)) */
1394 S = __QSUB16(xaya, xcyc);
1395
1396 /* Read yd (real), xd(imag) input */
1397 /* T = packed( (yb - yd), (xb - xd)) */
1398 U = __QSUB16(xbyb, xdyd);
1399
1400 #ifndef ARM_MATH_BIG_ENDIAN
1401 /* xb' = (xa+yb-xc-yd) */
1402 /* yb' = (ya-xb-yc+xd) */
1403 write_q15x2_ia (&ptr1, __SHASX(S, U));
1404
1405 /* xd' = (xa-yb-xc+yd) */
1406 /* yd' = (ya+xb-yc-xd) */
1407 write_q15x2_ia (&ptr1, __SHSAX(S, U));
1408 #else
1409 /* xb' = (xa+yb-xc-yd) */
1410 /* yb' = (ya-xb-yc+xd) */
1411 write_q15x2_ia (&ptr1, __SHSAX(S, U));
1412
1413 /* xd' = (xa-yb-xc+yd) */
1414 /* yd' = (ya+xb-yc-xd) */
1415 write_q15x2_ia (&ptr1, __SHASX(S, U));
1416 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1417
1418 } while (--j);
1419
1420 /* end of last stage process */
1421
1422 /* output is in 11.5(q5) format for the 1024 point */
1423 /* output is in 9.7(q7) format for the 256 point */
1424 /* output is in 7.9(q9) format for the 64 point */
1425 /* output is in 5.11(q11) format for the 16 point */
1426
1427
1428 #else /* arm_radix4_butterfly_inverse_q15 */
1429
1430 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
1431 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
1432 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
1433
1434 /* Total process is divided into three stages */
1435
1436 /* process first stage, middle stages, & last stage */
1437
1438 /* Initializations for the first stage */
1439 n2 = fftLen;
1440 n1 = n2;
1441
1442 /* n2 = fftLen/4 */
1443 n2 >>= 2U;
1444
1445 /* Index for twiddle coefficient */
1446 ic = 0U;
1447
1448 /* Index for input read and output write */
1449 i0 = 0U;
1450
1451 j = n2;
1452
1453 /* Input is in 1.15(q15) format */
1454
1455 /* Start of first stage process */
1456 do
1457 {
1458 /* Butterfly implementation */
1459
1460 /* index calculation for the input as, */
1461 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1462 i1 = i0 + n2;
1463 i2 = i1 + n2;
1464 i3 = i2 + n2;
1465
1466 /* Reading i0, i0+fftLen/2 inputs */
1467 /* input is down scale by 4 to avoid overflow */
1468 /* Read ya (real), xa(imag) input */
1469 T0 = pSrc16[i0 * 2U] >> 2U;
1470 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
1471 /* input is down scale by 4 to avoid overflow */
1472 /* Read yc (real), xc(imag) input */
1473 S0 = pSrc16[i2 * 2U] >> 2U;
1474 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
1475
1476 /* R0 = (ya + yc), R1 = (xa + xc) */
1477 R0 = __SSAT(T0 + S0, 16U);
1478 R1 = __SSAT(T1 + S1, 16U);
1479 /* S0 = (ya - yc), S1 = (xa - xc) */
1480 S0 = __SSAT(T0 - S0, 16U);
1481 S1 = __SSAT(T1 - S1, 16U);
1482
1483 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1484 /* input is down scale by 4 to avoid overflow */
1485 /* Read yb (real), xb(imag) input */
1486 T0 = pSrc16[i1 * 2U] >> 2U;
1487 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
1488 /* Read yd (real), xd(imag) input */
1489 /* input is down scale by 4 to avoid overflow */
1490 U0 = pSrc16[i3 * 2U] >> 2U;
1491 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
1492
1493 /* T0 = (yb + yd), T1 = (xb + xd) */
1494 T0 = __SSAT(T0 + U0, 16U);
1495 T1 = __SSAT(T1 + U1, 16U);
1496
1497 /* writing the butterfly processed i0 sample */
1498 /* xa' = xa + xb + xc + xd */
1499 /* ya' = ya + yb + yc + yd */
1500 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
1501 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
1502
1503 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
1504 R0 = __SSAT(R0 - T0, 16U);
1505 R1 = __SSAT(R1 - T1, 16U);
1506 /* co2 & si2 are read from Coefficient pointer */
1507 Co2 = pCoef16[2U * ic * 2U];
1508 Si2 = pCoef16[(2U * ic * 2U) + 1U];
1509 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1510 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U);
1511 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1512 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U);
1513
1514 /* Reading i0+fftLen/4 */
1515 /* input is down scale by 4 to avoid overflow */
1516 /* T0 = yb, T1 = xb */
1517 T0 = pSrc16[i1 * 2U] >> 2U;
1518 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
1519
1520 /* writing the butterfly processed i0 + fftLen/4 sample */
1521 /* writing output(xc', yc') in little endian format */
1522 pSrc16[i1 * 2U] = out1;
1523 pSrc16[(i1 * 2U) + 1U] = out2;
1524
1525 /* Butterfly calculations */
1526 /* input is down scale by 4 to avoid overflow */
1527 /* U0 = yd, U1 = xd) */
1528 U0 = pSrc16[i3 * 2U] >> 2U;
1529 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
1530
1531 /* T0 = yb-yd, T1 = xb-xd) */
1532 T0 = __SSAT(T0 - U0, 16U);
1533 T1 = __SSAT(T1 - U1, 16U);
1534 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1535 R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
1536 R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
1537 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1538 S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
1539 S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
1540
1541 /* co1 & si1 are read from Coefficient pointer */
1542 Co1 = pCoef16[ic * 2U];
1543 Si1 = pCoef16[(ic * 2U) + 1U];
1544 /* Butterfly process for the i0+fftLen/2 sample */
1545 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1546 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
1547 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1548 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
1549 /* writing output(xb', yb') in little endian format */
1550 pSrc16[i2 * 2U] = out1;
1551 pSrc16[(i2 * 2U) + 1U] = out2;
1552
1553 /* Co3 & si3 are read from Coefficient pointer */
1554 Co3 = pCoef16[3U * ic * 2U];
1555 Si3 = pCoef16[(3U * ic * 2U) + 1U];
1556 /* Butterfly process for the i0+3fftLen/4 sample */
1557 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1558 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
1559 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1560 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
1561 /* writing output(xd', yd') in little endian format */
1562 pSrc16[i3 * 2U] = out1;
1563 pSrc16[(i3 * 2U) + 1U] = out2;
1564
1565 /* Twiddle coefficients index modifier */
1566 ic = ic + twidCoefModifier;
1567
1568 /* Updating input index */
1569 i0 = i0 + 1U;
1570
1571 } while (--j);
1572
1573 /* End of first stage process */
1574
1575 /* data is in 4.11(q11) format */
1576
1577
1578 /* Start of Middle stage process */
1579
1580 /* Twiddle coefficients index modifier */
1581 twidCoefModifier <<= 2U;
1582
1583 /* Calculation of Middle stage */
1584 for (k = fftLen / 4U; k > 4U; k >>= 2U)
1585 {
1586 /* Initializations for the middle stage */
1587 n1 = n2;
1588 n2 >>= 2U;
1589 ic = 0U;
1590
1591 for (j = 0U; j <= (n2 - 1U); j++)
1592 {
1593 /* index calculation for the coefficients */
1594 Co1 = pCoef16[ic * 2U];
1595 Si1 = pCoef16[(ic * 2U) + 1U];
1596 Co2 = pCoef16[2U * ic * 2U];
1597 Si2 = pCoef16[2U * ic * 2U + 1U];
1598 Co3 = pCoef16[3U * ic * 2U];
1599 Si3 = pCoef16[(3U * ic * 2U) + 1U];
1600
1601 /* Twiddle coefficients index modifier */
1602 ic = ic + twidCoefModifier;
1603
1604 /* Butterfly implementation */
1605 for (i0 = j; i0 < fftLen; i0 += n1)
1606 {
1607 /* index calculation for the input as, */
1608 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1609 i1 = i0 + n2;
1610 i2 = i1 + n2;
1611 i3 = i2 + n2;
1612
1613 /* Reading i0, i0+fftLen/2 inputs */
1614 /* Read ya (real), xa(imag) input */
1615 T0 = pSrc16[i0 * 2U];
1616 T1 = pSrc16[(i0 * 2U) + 1U];
1617
1618 /* Read yc (real), xc(imag) input */
1619 S0 = pSrc16[i2 * 2U];
1620 S1 = pSrc16[(i2 * 2U) + 1U];
1621
1622
1623 /* R0 = (ya + yc), R1 = (xa + xc) */
1624 R0 = __SSAT(T0 + S0, 16U);
1625 R1 = __SSAT(T1 + S1, 16U);
1626 /* S0 = (ya - yc), S1 = (xa - xc) */
1627 S0 = __SSAT(T0 - S0, 16U);
1628 S1 = __SSAT(T1 - S1, 16U);
1629
1630 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1631 /* Read yb (real), xb(imag) input */
1632 T0 = pSrc16[i1 * 2U];
1633 T1 = pSrc16[(i1 * 2U) + 1U];
1634
1635 /* Read yd (real), xd(imag) input */
1636 U0 = pSrc16[i3 * 2U];
1637 U1 = pSrc16[(i3 * 2U) + 1U];
1638
1639 /* T0 = (yb + yd), T1 = (xb + xd) */
1640 T0 = __SSAT(T0 + U0, 16U);
1641 T1 = __SSAT(T1 + U1, 16U);
1642
1643 /* writing the butterfly processed i0 sample */
1644 /* xa' = xa + xb + xc + xd */
1645 /* ya' = ya + yb + yc + yd */
1646 pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
1647 pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
1648
1649 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1650 R0 = (R0 >> 1U) - (T0 >> 1U);
1651 R1 = (R1 >> 1U) - (T1 >> 1U);
1652
1653 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1654 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
1655 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1656 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
1657
1658 /* Reading i0+3fftLen/4 */
1659 /* Read yb (real), xb(imag) input */
1660 T0 = pSrc16[i1 * 2U];
1661 T1 = pSrc16[(i1 * 2U) + 1U];
1662
1663 /* writing the butterfly processed i0 + fftLen/4 sample */
1664 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1665 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1666 pSrc16[i1 * 2U] = out1;
1667 pSrc16[(i1 * 2U) + 1U] = out2;
1668
1669 /* Butterfly calculations */
1670 /* Read yd (real), xd(imag) input */
1671 U0 = pSrc16[i3 * 2U];
1672 U1 = pSrc16[(i3 * 2U) + 1U];
1673
1674 /* T0 = yb-yd, T1 = xb-xd) */
1675 T0 = __SSAT(T0 - U0, 16U);
1676 T1 = __SSAT(T1 - U1, 16U);
1677
1678 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1679 R0 = (S0 >> 1U) + (T1 >> 1U);
1680 R1 = (S1 >> 1U) - (T0 >> 1U);
1681
1682 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1683 S0 = (S0 >> 1U) - (T1 >> 1U);
1684 S1 = (S1 >> 1U) + (T0 >> 1U);
1685
1686 /* Butterfly process for the i0+fftLen/2 sample */
1687 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
1688 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
1689 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1690 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1691 pSrc16[i2 * 2U] = out1;
1692 pSrc16[(i2 * 2U) + 1U] = out2;
1693
1694 /* Butterfly process for the i0+3fftLen/4 sample */
1695 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
1696
1697 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
1698 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1699 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1700 pSrc16[i3 * 2U] = out1;
1701 pSrc16[(i3 * 2U) + 1U] = out2;
1702
1703
1704 }
1705 }
1706 /* Twiddle coefficients index modifier */
1707 twidCoefModifier <<= 2U;
1708 }
1709 /* End of Middle stages process */
1710
1711
1712 /* data is in 10.6(q6) format for the 1024 point */
1713 /* data is in 8.8(q8) format for the 256 point */
1714 /* data is in 6.10(q10) format for the 64 point */
1715 /* data is in 4.12(q12) format for the 16 point */
1716
1717 /* start of last stage process */
1718
1719
1720 /* Initializations for the last stage */
1721 n1 = n2;
1722 n2 >>= 2U;
1723
1724 /* Butterfly implementation */
1725 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1726 {
1727 /* index calculation for the input as, */
1728 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1729 i1 = i0 + n2;
1730 i2 = i1 + n2;
1731 i3 = i2 + n2;
1732
1733 /* Reading i0, i0+fftLen/2 inputs */
1734 /* Read ya (real), xa(imag) input */
1735 T0 = pSrc16[i0 * 2U];
1736 T1 = pSrc16[(i0 * 2U) + 1U];
1737 /* Read yc (real), xc(imag) input */
1738 S0 = pSrc16[i2 * 2U];
1739 S1 = pSrc16[(i2 * 2U) + 1U];
1740
1741 /* R0 = (ya + yc), R1 = (xa + xc) */
1742 R0 = __SSAT(T0 + S0, 16U);
1743 R1 = __SSAT(T1 + S1, 16U);
1744 /* S0 = (ya - yc), S1 = (xa - xc) */
1745 S0 = __SSAT(T0 - S0, 16U);
1746 S1 = __SSAT(T1 - S1, 16U);
1747
1748 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1749 /* Read yb (real), xb(imag) input */
1750 T0 = pSrc16[i1 * 2U];
1751 T1 = pSrc16[(i1 * 2U) + 1U];
1752 /* Read yd (real), xd(imag) input */
1753 U0 = pSrc16[i3 * 2U];
1754 U1 = pSrc16[(i3 * 2U) + 1U];
1755
1756 /* T0 = (yb + yd), T1 = (xb + xd) */
1757 T0 = __SSAT(T0 + U0, 16U);
1758 T1 = __SSAT(T1 + U1, 16U);
1759
1760 /* writing the butterfly processed i0 sample */
1761 /* xa' = xa + xb + xc + xd */
1762 /* ya' = ya + yb + yc + yd */
1763 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
1764 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
1765
1766 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1767 R0 = (R0 >> 1U) - (T0 >> 1U);
1768 R1 = (R1 >> 1U) - (T1 >> 1U);
1769
1770 /* Read yb (real), xb(imag) input */
1771 T0 = pSrc16[i1 * 2U];
1772 T1 = pSrc16[(i1 * 2U) + 1U];
1773
1774 /* writing the butterfly processed i0 + fftLen/4 sample */
1775 /* xc' = (xa-xb+xc-xd) */
1776 /* yc' = (ya-yb+yc-yd) */
1777 pSrc16[i1 * 2U] = R0;
1778 pSrc16[(i1 * 2U) + 1U] = R1;
1779
1780 /* Read yd (real), xd(imag) input */
1781 U0 = pSrc16[i3 * 2U];
1782 U1 = pSrc16[(i3 * 2U) + 1U];
1783 /* T0 = (yb - yd), T1 = (xb - xd) */
1784 T0 = __SSAT(T0 - U0, 16U);
1785 T1 = __SSAT(T1 - U1, 16U);
1786
1787 /* writing the butterfly processed i0 + fftLen/2 sample */
1788 /* xb' = (xa-yb-xc+yd) */
1789 /* yb' = (ya+xb-yc-xd) */
1790 pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U);
1791 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
1792
1793
1794 /* writing the butterfly processed i0 + 3fftLen/4 sample */
1795 /* xd' = (xa+yb-xc-yd) */
1796 /* yd' = (ya-xb-yc+xd) */
1797 pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U);
1798 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
1799 }
1800 /* end of last stage process */
1801
1802 /* output is in 11.5(q5) format for the 1024 point */
1803 /* output is in 9.7(q7) format for the 256 point */
1804 /* output is in 7.9(q9) format for the 64 point */
1805 /* output is in 5.11(q11) format for the 16 point */
1806
1807 #endif /* #if defined (ARM_MATH_DSP) */
1808
1809 }
1810