1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cfft_radix4_f32.c
4  * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions.h"
30 
31 extern void arm_bitreversal_f32(
32         float32_t * pSrc,
33         uint16_t fftSize,
34         uint16_t bitRevFactor,
35   const uint16_t * pBitRevTab);
36 
37 void arm_radix4_butterfly_f32(
38         float32_t * pSrc,
39         uint16_t fftLen,
40   const float32_t * pCoef,
41         uint16_t twidCoefModifier);
42 
43 void arm_radix4_butterfly_inverse_f32(
44         float32_t * pSrc,
45         uint16_t fftLen,
46   const float32_t * pCoef,
47         uint16_t twidCoefModifier,
48         float32_t onebyfftLen);
49 
50 
51 
52 
53 /**
54   @addtogroup ComplexFFTDeprecated
55   @{
56  */
57 
58 
59 /**
60   @brief         Processing function for the floating-point Radix-4 CFFT/CIFFT.
61   @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
62   @param[in]     S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
63   @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
64   @return        none
65  */
66 
arm_cfft_radix4_f32(const arm_cfft_radix4_instance_f32 * S,float32_t * pSrc)67 void arm_cfft_radix4_f32(
68   const arm_cfft_radix4_instance_f32 * S,
69         float32_t * pSrc)
70 {
71    if (S->ifftFlag == 1U)
72    {
73       /*  Complex IFFT radix-4  */
74       arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
75    }
76    else
77    {
78       /*  Complex FFT radix-4  */
79       arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
80    }
81 
82    if (S->bitReverseFlag == 1U)
83    {
84       /*  Bit Reversal */
85       arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
86    }
87 
88 }
89 
90 /**
91   @} end of ComplexFFTDeprecated group
92  */
93 
94 /* ----------------------------------------------------------------------
95  * Internal helper function used by the FFTs
96  * ---------------------------------------------------------------------- */
97 
98 /**
99   brief         Core function for the floating-point CFFT butterfly process.
100   param[in,out] pSrc             points to the in-place buffer of floating-point data type
101   param[in]     fftLen           length of the FFT
102   param[in]     pCoef            points to the twiddle coefficient buffer
103   param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
104   return        none
105  */
106 
arm_radix4_butterfly_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier)107 void arm_radix4_butterfly_f32(
108         float32_t * pSrc,
109         uint16_t fftLen,
110   const float32_t * pCoef,
111         uint16_t twidCoefModifier)
112 {
113         float32_t co1, co2, co3, si1, si2, si3;
114         uint32_t ia1, ia2, ia3;
115         uint32_t i0, i1, i2, i3;
116         uint32_t n1, n2, j, k;
117 
118 #if defined (ARM_MATH_LOOPUNROLL)
119 
120         float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
121         float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
122         Ybminusd;
123         float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
124         float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
125         float32_t *ptr1;
126         float32_t p0,p1,p2,p3,p4,p5;
127         float32_t a0,a1,a2,a3,a4,a5,a6,a7;
128 
129    /*  Initializations for the first stage */
130    n2 = fftLen;
131    n1 = n2;
132 
133    /* n2 = fftLen/4 */
134    n2 >>= 2U;
135    i0 = 0U;
136    ia1 = 0U;
137 
138    j = n2;
139 
140    /*  Calculation of first stage */
141    do
142    {
143       /*  index calculation for the input as, */
144       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
145       i1 = i0 + n2;
146       i2 = i1 + n2;
147       i3 = i2 + n2;
148 
149       xaIn = pSrc[(2U * i0)];
150       yaIn = pSrc[(2U * i0) + 1U];
151 
152       xbIn = pSrc[(2U * i1)];
153       ybIn = pSrc[(2U * i1) + 1U];
154 
155       xcIn = pSrc[(2U * i2)];
156       ycIn = pSrc[(2U * i2) + 1U];
157 
158       xdIn = pSrc[(2U * i3)];
159       ydIn = pSrc[(2U * i3) + 1U];
160 
161       /* xa + xc */
162       Xaplusc = xaIn + xcIn;
163       /* xb + xd */
164       Xbplusd = xbIn + xdIn;
165       /* ya + yc */
166       Yaplusc = yaIn + ycIn;
167       /* yb + yd */
168       Ybplusd = ybIn + ydIn;
169 
170       /*  index calculation for the coefficients */
171       ia2 = ia1 + ia1;
172       co2 = pCoef[ia2 * 2U];
173       si2 = pCoef[(ia2 * 2U) + 1U];
174 
175       /* xa - xc */
176       Xaminusc = xaIn - xcIn;
177       /* xb - xd */
178       Xbminusd = xbIn - xdIn;
179       /* ya - yc */
180       Yaminusc = yaIn - ycIn;
181       /* yb - yd */
182       Ybminusd = ybIn - ydIn;
183 
184       /* xa' = xa + xb + xc + xd */
185       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
186       /* ya' = ya + yb + yc + yd */
187       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
188 
189       /* (xa - xc) + (yb - yd) */
190       Xb12C_out = (Xaminusc + Ybminusd);
191       /* (ya - yc) + (xb - xd) */
192       Yb12C_out = (Yaminusc - Xbminusd);
193       /* (xa + xc) - (xb + xd) */
194       Xc12C_out = (Xaplusc - Xbplusd);
195       /* (ya + yc) - (yb + yd) */
196       Yc12C_out = (Yaplusc - Ybplusd);
197       /* (xa - xc) - (yb - yd) */
198       Xd12C_out = (Xaminusc - Ybminusd);
199       /* (ya - yc) + (xb - xd) */
200       Yd12C_out = (Xbminusd + Yaminusc);
201 
202       co1 = pCoef[ia1 * 2U];
203       si1 = pCoef[(ia1 * 2U) + 1U];
204 
205       /*  index calculation for the coefficients */
206       ia3 = ia2 + ia1;
207       co3 = pCoef[ia3 * 2U];
208       si3 = pCoef[(ia3 * 2U) + 1U];
209 
210       Xb12_out = Xb12C_out * co1;
211       Yb12_out = Yb12C_out * co1;
212       Xc12_out = Xc12C_out * co2;
213       Yc12_out = Yc12C_out * co2;
214       Xd12_out = Xd12C_out * co3;
215       Yd12_out = Yd12C_out * co3;
216 
217       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
218       //Xb12_out -= Yb12C_out * si1;
219       p0 = Yb12C_out * si1;
220       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
221       //Yb12_out += Xb12C_out * si1;
222       p1 = Xb12C_out * si1;
223       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
224       //Xc12_out -= Yc12C_out * si2;
225       p2 = Yc12C_out * si2;
226       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
227       //Yc12_out += Xc12C_out * si2;
228       p3 = Xc12C_out * si2;
229       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
230       //Xd12_out -= Yd12C_out * si3;
231       p4 = Yd12C_out * si3;
232       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
233       //Yd12_out += Xd12C_out * si3;
234       p5 = Xd12C_out * si3;
235 
236       Xb12_out += p0;
237       Yb12_out -= p1;
238       Xc12_out += p2;
239       Yc12_out -= p3;
240       Xd12_out += p4;
241       Yd12_out -= p5;
242 
243       /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
244       pSrc[2U * i1] = Xc12_out;
245 
246       /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
247       pSrc[(2U * i1) + 1U] = Yc12_out;
248 
249       /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
250       pSrc[2U * i2] = Xb12_out;
251 
252       /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
253       pSrc[(2U * i2) + 1U] = Yb12_out;
254 
255       /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
256       pSrc[2U * i3] = Xd12_out;
257 
258       /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
259       pSrc[(2U * i3) + 1U] = Yd12_out;
260 
261       /*  Twiddle coefficients index modifier */
262       ia1 += twidCoefModifier;
263 
264       /*  Updating input index */
265       i0++;
266 
267    }
268    while (--j);
269 
270    twidCoefModifier <<= 2U;
271 
272    /*  Calculation of second stage to excluding last stage */
273    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
274    {
275       /*  Initializations for the first stage */
276       n1 = n2;
277       n2 >>= 2U;
278       ia1 = 0U;
279 
280       /*  Calculation of first stage */
281       j = 0;
282       do
283       {
284          /*  index calculation for the coefficients */
285          ia2 = ia1 + ia1;
286          ia3 = ia2 + ia1;
287          co1 = pCoef[(ia1 * 2U)];
288          si1 = pCoef[(ia1 * 2U) + 1U];
289          co2 = pCoef[(ia2 * 2U)];
290          si2 = pCoef[(ia2 * 2U) + 1U];
291          co3 = pCoef[(ia3 * 2U)];
292          si3 = pCoef[(ia3 * 2U) + 1U];
293 
294          /*  Twiddle coefficients index modifier */
295          ia1 += twidCoefModifier;
296 
297          i0 = j;
298          do
299          {
300             /*  index calculation for the input as, */
301             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
302             i1 = i0 + n2;
303             i2 = i1 + n2;
304             i3 = i2 + n2;
305 
306             xaIn = pSrc[(2U * i0)];
307             yaIn = pSrc[(2U * i0) + 1U];
308 
309             xbIn = pSrc[(2U * i1)];
310             ybIn = pSrc[(2U * i1) + 1U];
311 
312             xcIn = pSrc[(2U * i2)];
313             ycIn = pSrc[(2U * i2) + 1U];
314 
315             xdIn = pSrc[(2U * i3)];
316             ydIn = pSrc[(2U * i3) + 1U];
317 
318             /* xa - xc */
319             Xaminusc = xaIn - xcIn;
320             /* (xb - xd) */
321             Xbminusd = xbIn - xdIn;
322             /* ya - yc */
323             Yaminusc = yaIn - ycIn;
324             /* (yb - yd) */
325             Ybminusd = ybIn - ydIn;
326 
327             /* xa + xc */
328             Xaplusc = xaIn + xcIn;
329             /* xb + xd */
330             Xbplusd = xbIn + xdIn;
331             /* ya + yc */
332             Yaplusc = yaIn + ycIn;
333             /* yb + yd */
334             Ybplusd = ybIn + ydIn;
335 
336             /* (xa - xc) + (yb - yd) */
337             Xb12C_out = (Xaminusc + Ybminusd);
338             /* (ya - yc) -  (xb - xd) */
339             Yb12C_out = (Yaminusc - Xbminusd);
340             /* xa + xc -(xb + xd) */
341             Xc12C_out = (Xaplusc - Xbplusd);
342             /* (ya + yc) - (yb + yd) */
343             Yc12C_out = (Yaplusc - Ybplusd);
344             /* (xa - xc) - (yb - yd) */
345             Xd12C_out = (Xaminusc - Ybminusd);
346             /* (ya - yc) +  (xb - xd) */
347             Yd12C_out = (Xbminusd + Yaminusc);
348 
349             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
350             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
351 
352             Xb12_out = Xb12C_out * co1;
353             Yb12_out = Yb12C_out * co1;
354             Xc12_out = Xc12C_out * co2;
355             Yc12_out = Yc12C_out * co2;
356             Xd12_out = Xd12C_out * co3;
357             Yd12_out = Yd12C_out * co3;
358 
359             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
360             //Xb12_out -= Yb12C_out * si1;
361             p0 = Yb12C_out * si1;
362             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
363             //Yb12_out += Xb12C_out * si1;
364             p1 = Xb12C_out * si1;
365             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
366             //Xc12_out -= Yc12C_out * si2;
367             p2 = Yc12C_out * si2;
368             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
369             //Yc12_out += Xc12C_out * si2;
370             p3 = Xc12C_out * si2;
371             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
372             //Xd12_out -= Yd12C_out * si3;
373             p4 = Yd12C_out * si3;
374             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
375             //Yd12_out += Xd12C_out * si3;
376             p5 = Xd12C_out * si3;
377 
378             Xb12_out += p0;
379             Yb12_out -= p1;
380             Xc12_out += p2;
381             Yc12_out -= p3;
382             Xd12_out += p4;
383             Yd12_out -= p5;
384 
385             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
386             pSrc[2U * i1] = Xc12_out;
387 
388             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
389             pSrc[(2U * i1) + 1U] = Yc12_out;
390 
391             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
392             pSrc[2U * i2] = Xb12_out;
393 
394             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
395             pSrc[(2U * i2) + 1U] = Yb12_out;
396 
397             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
398             pSrc[2U * i3] = Xd12_out;
399 
400             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
401             pSrc[(2U * i3) + 1U] = Yd12_out;
402 
403             i0 += n1;
404          } while (i0 < fftLen);
405          j++;
406       } while (j <= (n2 - 1U));
407       twidCoefModifier <<= 2U;
408    }
409 
410    j = fftLen >> 2;
411    ptr1 = &pSrc[0];
412 
413    /*  Calculations of last stage */
414    do
415    {
416       xaIn = ptr1[0];
417       yaIn = ptr1[1];
418       xbIn = ptr1[2];
419       ybIn = ptr1[3];
420       xcIn = ptr1[4];
421       ycIn = ptr1[5];
422       xdIn = ptr1[6];
423       ydIn = ptr1[7];
424 
425       /* xa + xc */
426       Xaplusc = xaIn + xcIn;
427 
428       /* xa - xc */
429       Xaminusc = xaIn - xcIn;
430 
431       /* ya + yc */
432       Yaplusc = yaIn + ycIn;
433 
434       /* ya - yc */
435       Yaminusc = yaIn - ycIn;
436 
437       /* xb + xd */
438       Xbplusd = xbIn + xdIn;
439 
440       /* yb + yd */
441       Ybplusd = ybIn + ydIn;
442 
443       /* (xb-xd) */
444       Xbminusd = xbIn - xdIn;
445 
446       /* (yb-yd) */
447       Ybminusd = ybIn - ydIn;
448 
449       /* xa' = xa + xb + xc + xd */
450       a0 = (Xaplusc + Xbplusd);
451       /* ya' = ya + yb + yc + yd */
452       a1 = (Yaplusc + Ybplusd);
453       /* xc' = (xa-xb+xc-xd) */
454       a2 = (Xaplusc - Xbplusd);
455       /* yc' = (ya-yb+yc-yd) */
456       a3 = (Yaplusc - Ybplusd);
457       /* xb' = (xa+yb-xc-yd) */
458       a4 = (Xaminusc + Ybminusd);
459       /* yb' = (ya-xb-yc+xd) */
460       a5 = (Yaminusc - Xbminusd);
461       /* xd' = (xa-yb-xc+yd)) */
462       a6 = (Xaminusc - Ybminusd);
463       /* yd' = (ya+xb-yc-xd) */
464       a7 = (Xbminusd + Yaminusc);
465 
466       ptr1[0] = a0;
467       ptr1[1] = a1;
468       ptr1[2] = a2;
469       ptr1[3] = a3;
470       ptr1[4] = a4;
471       ptr1[5] = a5;
472       ptr1[6] = a6;
473       ptr1[7] = a7;
474 
475       /* increment pointer by 8 */
476       ptr1 += 8U;
477    } while (--j);
478 
479 #else
480 
481         float32_t t1, t2, r1, r2, s1, s2;
482 
483    /* Initializations for the fft calculation */
484    n2 = fftLen;
485    n1 = n2;
486    for (k = fftLen; k > 1U; k >>= 2U)
487    {
488       /*  Initializations for the fft calculation */
489       n1 = n2;
490       n2 >>= 2U;
491       ia1 = 0U;
492 
493       /*  FFT Calculation */
494       j = 0;
495       do
496       {
497          /*  index calculation for the coefficients */
498          ia2 = ia1 + ia1;
499          ia3 = ia2 + ia1;
500          co1 = pCoef[ia1 * 2U];
501          si1 = pCoef[(ia1 * 2U) + 1U];
502          co2 = pCoef[ia2 * 2U];
503          si2 = pCoef[(ia2 * 2U) + 1U];
504          co3 = pCoef[ia3 * 2U];
505          si3 = pCoef[(ia3 * 2U) + 1U];
506 
507          /*  Twiddle coefficients index modifier */
508          ia1 = ia1 + twidCoefModifier;
509 
510          i0 = j;
511          do
512          {
513             /*  index calculation for the input as, */
514             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
515             i1 = i0 + n2;
516             i2 = i1 + n2;
517             i3 = i2 + n2;
518 
519             /* xa + xc */
520             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
521 
522             /* xa - xc */
523             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
524 
525             /* ya + yc */
526             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
527 
528             /* ya - yc */
529             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
530 
531             /* xb + xd */
532             t1 = pSrc[2U * i1] + pSrc[2U * i3];
533 
534             /* xa' = xa + xb + xc + xd */
535             pSrc[2U * i0] = r1 + t1;
536 
537             /* xa + xc -(xb + xd) */
538             r1 = r1 - t1;
539 
540             /* yb + yd */
541             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
542 
543             /* ya' = ya + yb + yc + yd */
544             pSrc[(2U * i0) + 1U] = s1 + t2;
545 
546             /* (ya + yc) - (yb + yd) */
547             s1 = s1 - t2;
548 
549             /* (yb - yd) */
550             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
551 
552             /* (xb - xd) */
553             t2 = pSrc[2U * i1] - pSrc[2U * i3];
554 
555             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
556             pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
557 
558             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
559             pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
560 
561             /* (xa - xc) + (yb - yd) */
562             r1 = r2 + t1;
563 
564             /* (xa - xc) - (yb - yd) */
565             r2 = r2 - t1;
566 
567             /* (ya - yc) -  (xb - xd) */
568             s1 = s2 - t2;
569 
570             /* (ya - yc) +  (xb - xd) */
571             s2 = s2 + t2;
572 
573             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
574             pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
575 
576             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
577             pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
578 
579             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
580             pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
581 
582             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
583             pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
584 
585             i0 += n1;
586          } while ( i0 < fftLen);
587          j++;
588       } while (j <= (n2 - 1U));
589       twidCoefModifier <<= 2U;
590    }
591 
592 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
593 
594 }
595 
596 /**
597   brief         Core function for the floating-point CIFFT butterfly process.
598   param[in,out] pSrc             points to the in-place buffer of floating-point data type
599   param[in]     fftLen           length of the FFT
600   param[in]     pCoef            points to twiddle coefficient buffer
601   param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
602   param[in]     onebyfftLen      value of 1/fftLen
603   return        none
604  */
605 
arm_radix4_butterfly_inverse_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier,float32_t onebyfftLen)606 void arm_radix4_butterfly_inverse_f32(
607         float32_t * pSrc,
608         uint16_t fftLen,
609   const float32_t * pCoef,
610         uint16_t twidCoefModifier,
611         float32_t onebyfftLen)
612 {
613         float32_t co1, co2, co3, si1, si2, si3;
614         uint32_t ia1, ia2, ia3;
615         uint32_t i0, i1, i2, i3;
616         uint32_t n1, n2, j, k;
617 
618 #if defined (ARM_MATH_LOOPUNROLL)
619 
620         float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
621         float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
622         Ybminusd;
623         float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
624         float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
625         float32_t *ptr1;
626         float32_t p0,p1,p2,p3,p4,p5,p6,p7;
627         float32_t a0,a1,a2,a3,a4,a5,a6,a7;
628 
629 
630    /*  Initializations for the first stage */
631    n2 = fftLen;
632    n1 = n2;
633 
634    /* n2 = fftLen/4 */
635    n2 >>= 2U;
636    i0 = 0U;
637    ia1 = 0U;
638 
639    j = n2;
640 
641    /*  Calculation of first stage */
642    do
643    {
644       /*  index calculation for the input as, */
645       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
646       i1 = i0 + n2;
647       i2 = i1 + n2;
648       i3 = i2 + n2;
649 
650       /*  Butterfly implementation */
651       xaIn = pSrc[(2U * i0)];
652       yaIn = pSrc[(2U * i0) + 1U];
653 
654       xcIn = pSrc[(2U * i2)];
655       ycIn = pSrc[(2U * i2) + 1U];
656 
657       xbIn = pSrc[(2U * i1)];
658       ybIn = pSrc[(2U * i1) + 1U];
659 
660       xdIn = pSrc[(2U * i3)];
661       ydIn = pSrc[(2U * i3) + 1U];
662 
663       /* xa + xc */
664       Xaplusc = xaIn + xcIn;
665       /* xb + xd */
666       Xbplusd = xbIn + xdIn;
667       /* ya + yc */
668       Yaplusc = yaIn + ycIn;
669       /* yb + yd */
670       Ybplusd = ybIn + ydIn;
671 
672       /*  index calculation for the coefficients */
673       ia2 = ia1 + ia1;
674       co2 = pCoef[ia2 * 2U];
675       si2 = pCoef[(ia2 * 2U) + 1U];
676 
677       /* xa - xc */
678       Xaminusc = xaIn - xcIn;
679       /* xb - xd */
680       Xbminusd = xbIn - xdIn;
681       /* ya - yc */
682       Yaminusc = yaIn - ycIn;
683       /* yb - yd */
684       Ybminusd = ybIn - ydIn;
685 
686       /* xa' = xa + xb + xc + xd */
687       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
688 
689       /* ya' = ya + yb + yc + yd */
690       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
691 
692       /* (xa - xc) - (yb - yd) */
693       Xb12C_out = (Xaminusc - Ybminusd);
694       /* (ya - yc) + (xb - xd) */
695       Yb12C_out = (Yaminusc + Xbminusd);
696       /* (xa + xc) - (xb + xd) */
697       Xc12C_out = (Xaplusc - Xbplusd);
698       /* (ya + yc) - (yb + yd) */
699       Yc12C_out = (Yaplusc - Ybplusd);
700       /* (xa - xc) + (yb - yd) */
701       Xd12C_out = (Xaminusc + Ybminusd);
702       /* (ya - yc) - (xb - xd) */
703       Yd12C_out = (Yaminusc - Xbminusd);
704 
705       co1 = pCoef[ia1 * 2U];
706       si1 = pCoef[(ia1 * 2U) + 1U];
707 
708       /*  index calculation for the coefficients */
709       ia3 = ia2 + ia1;
710       co3 = pCoef[ia3 * 2U];
711       si3 = pCoef[(ia3 * 2U) + 1U];
712 
713       Xb12_out = Xb12C_out * co1;
714       Yb12_out = Yb12C_out * co1;
715       Xc12_out = Xc12C_out * co2;
716       Yc12_out = Yc12C_out * co2;
717       Xd12_out = Xd12C_out * co3;
718       Yd12_out = Yd12C_out * co3;
719 
720       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
721       //Xb12_out -= Yb12C_out * si1;
722       p0 = Yb12C_out * si1;
723       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
724       //Yb12_out += Xb12C_out * si1;
725       p1 = Xb12C_out * si1;
726       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
727       //Xc12_out -= Yc12C_out * si2;
728       p2 = Yc12C_out * si2;
729       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
730       //Yc12_out += Xc12C_out * si2;
731       p3 = Xc12C_out * si2;
732       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
733       //Xd12_out -= Yd12C_out * si3;
734       p4 = Yd12C_out * si3;
735       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
736       //Yd12_out += Xd12C_out * si3;
737       p5 = Xd12C_out * si3;
738 
739       Xb12_out -= p0;
740       Yb12_out += p1;
741       Xc12_out -= p2;
742       Yc12_out += p3;
743       Xd12_out -= p4;
744       Yd12_out += p5;
745 
746       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
747       pSrc[2U * i1] = Xc12_out;
748 
749       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
750       pSrc[(2U * i1) + 1U] = Yc12_out;
751 
752       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
753       pSrc[2U * i2] = Xb12_out;
754 
755       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
756       pSrc[(2U * i2) + 1U] = Yb12_out;
757 
758       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
759       pSrc[2U * i3] = Xd12_out;
760 
761       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
762       pSrc[(2U * i3) + 1U] = Yd12_out;
763 
764       /*  Twiddle coefficients index modifier */
765       ia1 = ia1 + twidCoefModifier;
766 
767       /*  Updating input index */
768       i0 = i0 + 1U;
769 
770    } while (--j);
771 
772    twidCoefModifier <<= 2U;
773 
774    /*  Calculation of second stage to excluding last stage */
775    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
776    {
777       /*  Initializations for the first stage */
778       n1 = n2;
779       n2 >>= 2U;
780       ia1 = 0U;
781 
782       /*  Calculation of first stage */
783       j = 0;
784       do
785       {
786          /*  index calculation for the coefficients */
787          ia2 = ia1 + ia1;
788          ia3 = ia2 + ia1;
789          co1 = pCoef[ia1 * 2U];
790          si1 = pCoef[(ia1 * 2U) + 1U];
791          co2 = pCoef[ia2 * 2U];
792          si2 = pCoef[(ia2 * 2U) + 1U];
793          co3 = pCoef[ia3 * 2U];
794          si3 = pCoef[(ia3 * 2U) + 1U];
795 
796          /*  Twiddle coefficients index modifier */
797          ia1 = ia1 + twidCoefModifier;
798 
799          i0 = j;
800          do
801          {
802             /*  index calculation for the input as, */
803             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
804             i1 = i0 + n2;
805             i2 = i1 + n2;
806             i3 = i2 + n2;
807 
808             xaIn = pSrc[(2U * i0)];
809             yaIn = pSrc[(2U * i0) + 1U];
810 
811             xbIn = pSrc[(2U * i1)];
812             ybIn = pSrc[(2U * i1) + 1U];
813 
814             xcIn = pSrc[(2U * i2)];
815             ycIn = pSrc[(2U * i2) + 1U];
816 
817             xdIn = pSrc[(2U * i3)];
818             ydIn = pSrc[(2U * i3) + 1U];
819 
820             /* xa - xc */
821             Xaminusc = xaIn - xcIn;
822             /* (xb - xd) */
823             Xbminusd = xbIn - xdIn;
824             /* ya - yc */
825             Yaminusc = yaIn - ycIn;
826             /* (yb - yd) */
827             Ybminusd = ybIn - ydIn;
828 
829             /* xa + xc */
830             Xaplusc = xaIn + xcIn;
831             /* xb + xd */
832             Xbplusd = xbIn + xdIn;
833             /* ya + yc */
834             Yaplusc = yaIn + ycIn;
835             /* yb + yd */
836             Ybplusd = ybIn + ydIn;
837 
838             /* (xa - xc) - (yb - yd) */
839             Xb12C_out = (Xaminusc - Ybminusd);
840             /* (ya - yc) +  (xb - xd) */
841             Yb12C_out = (Yaminusc + Xbminusd);
842             /* xa + xc -(xb + xd) */
843             Xc12C_out = (Xaplusc - Xbplusd);
844             /* (ya + yc) - (yb + yd) */
845             Yc12C_out = (Yaplusc - Ybplusd);
846             /* (xa - xc) + (yb - yd) */
847             Xd12C_out = (Xaminusc + Ybminusd);
848             /* (ya - yc) -  (xb - xd) */
849             Yd12C_out = (Yaminusc - Xbminusd);
850 
851             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
852             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
853 
854             Xb12_out = Xb12C_out * co1;
855             Yb12_out = Yb12C_out * co1;
856             Xc12_out = Xc12C_out * co2;
857             Yc12_out = Yc12C_out * co2;
858             Xd12_out = Xd12C_out * co3;
859             Yd12_out = Yd12C_out * co3;
860 
861             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
862             //Xb12_out -= Yb12C_out * si1;
863             p0 = Yb12C_out * si1;
864             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
865             //Yb12_out += Xb12C_out * si1;
866             p1 = Xb12C_out * si1;
867             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
868             //Xc12_out -= Yc12C_out * si2;
869             p2 = Yc12C_out * si2;
870             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
871             //Yc12_out += Xc12C_out * si2;
872             p3 = Xc12C_out * si2;
873             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
874             //Xd12_out -= Yd12C_out * si3;
875             p4 = Yd12C_out * si3;
876             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
877             //Yd12_out += Xd12C_out * si3;
878             p5 = Xd12C_out * si3;
879 
880             Xb12_out -= p0;
881             Yb12_out += p1;
882             Xc12_out -= p2;
883             Yc12_out += p3;
884             Xd12_out -= p4;
885             Yd12_out += p5;
886 
887             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
888             pSrc[2U * i1] = Xc12_out;
889 
890             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
891             pSrc[(2U * i1) + 1U] = Yc12_out;
892 
893             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
894             pSrc[2U * i2] = Xb12_out;
895 
896             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
897             pSrc[(2U * i2) + 1U] = Yb12_out;
898 
899             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
900             pSrc[2U * i3] = Xd12_out;
901 
902             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
903             pSrc[(2U * i3) + 1U] = Yd12_out;
904 
905             i0 += n1;
906          } while (i0 < fftLen);
907          j++;
908       } while (j <= (n2 - 1U));
909       twidCoefModifier <<= 2U;
910    }
911    /*  Initializations of last stage */
912 
913    j = fftLen >> 2;
914    ptr1 = &pSrc[0];
915 
916    /*  Calculations of last stage */
917    do
918    {
919       xaIn = ptr1[0];
920       yaIn = ptr1[1];
921       xbIn = ptr1[2];
922       ybIn = ptr1[3];
923       xcIn = ptr1[4];
924       ycIn = ptr1[5];
925       xdIn = ptr1[6];
926       ydIn = ptr1[7];
927 
928       /*  Butterfly implementation */
929       /* xa + xc */
930       Xaplusc = xaIn + xcIn;
931 
932       /* xa - xc */
933       Xaminusc = xaIn - xcIn;
934 
935       /* ya + yc */
936       Yaplusc = yaIn + ycIn;
937 
938       /* ya - yc */
939       Yaminusc = yaIn - ycIn;
940 
941       /* xb + xd */
942       Xbplusd = xbIn + xdIn;
943 
944       /* yb + yd */
945       Ybplusd = ybIn + ydIn;
946 
947       /* (xb-xd) */
948       Xbminusd = xbIn - xdIn;
949 
950       /* (yb-yd) */
951       Ybminusd = ybIn - ydIn;
952 
953       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
954       a0 = (Xaplusc + Xbplusd);
955       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
956       a1 = (Yaplusc + Ybplusd);
957       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
958       a2 = (Xaplusc - Xbplusd);
959       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
960       a3 = (Yaplusc - Ybplusd);
961       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
962       a4 = (Xaminusc - Ybminusd);
963       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
964       a5 = (Yaminusc + Xbminusd);
965       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
966       a6 = (Xaminusc + Ybminusd);
967       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
968       a7 = (Yaminusc - Xbminusd);
969 
970       p0 = a0 * onebyfftLen;
971       p1 = a1 * onebyfftLen;
972       p2 = a2 * onebyfftLen;
973       p3 = a3 * onebyfftLen;
974       p4 = a4 * onebyfftLen;
975       p5 = a5 * onebyfftLen;
976       p6 = a6 * onebyfftLen;
977       p7 = a7 * onebyfftLen;
978 
979       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
980       ptr1[0] = p0;
981       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
982       ptr1[1] = p1;
983       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
984       ptr1[2] = p2;
985       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
986       ptr1[3] = p3;
987       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
988       ptr1[4] = p4;
989       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
990       ptr1[5] = p5;
991       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
992       ptr1[6] = p6;
993       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
994       ptr1[7] = p7;
995 
996       /* increment source pointer by 8 for next calculations */
997       ptr1 = ptr1 + 8U;
998 
999    } while (--j);
1000 
1001 #else
1002 
1003         float32_t t1, t2, r1, r2, s1, s2;
1004 
1005    /*  Initializations for the first stage */
1006    n2 = fftLen;
1007    n1 = n2;
1008 
1009    /*  Calculation of first stage */
1010    for (k = fftLen; k > 4U; k >>= 2U)
1011    {
1012       /*  Initializations for the first stage */
1013       n1 = n2;
1014       n2 >>= 2U;
1015       ia1 = 0U;
1016 
1017       /*  Calculation of first stage */
1018       j = 0;
1019       do
1020       {
1021          /*  index calculation for the coefficients */
1022          ia2 = ia1 + ia1;
1023          ia3 = ia2 + ia1;
1024          co1 = pCoef[ia1 * 2U];
1025          si1 = pCoef[(ia1 * 2U) + 1U];
1026          co2 = pCoef[ia2 * 2U];
1027          si2 = pCoef[(ia2 * 2U) + 1U];
1028          co3 = pCoef[ia3 * 2U];
1029          si3 = pCoef[(ia3 * 2U) + 1U];
1030 
1031          /*  Twiddle coefficients index modifier */
1032          ia1 = ia1 + twidCoefModifier;
1033 
1034          i0 = j;
1035          do
1036          {
1037             /*  index calculation for the input as, */
1038             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1039             i1 = i0 + n2;
1040             i2 = i1 + n2;
1041             i3 = i2 + n2;
1042 
1043             /* xa + xc */
1044             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1045 
1046             /* xa - xc */
1047             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1048 
1049             /* ya + yc */
1050             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1051 
1052             /* ya - yc */
1053             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1054 
1055             /* xb + xd */
1056             t1 = pSrc[2U * i1] + pSrc[2U * i3];
1057 
1058             /* xa' = xa + xb + xc + xd */
1059             pSrc[2U * i0] = r1 + t1;
1060 
1061             /* xa + xc -(xb + xd) */
1062             r1 = r1 - t1;
1063 
1064             /* yb + yd */
1065             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1066 
1067             /* ya' = ya + yb + yc + yd */
1068             pSrc[(2U * i0) + 1U] = s1 + t2;
1069 
1070             /* (ya + yc) - (yb + yd) */
1071             s1 = s1 - t2;
1072 
1073             /* (yb - yd) */
1074             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1075 
1076             /* (xb - xd) */
1077             t2 = pSrc[2U * i1] - pSrc[2U * i3];
1078 
1079             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1080             pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1081 
1082             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1083             pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1084 
1085             /* (xa - xc) - (yb - yd) */
1086             r1 = r2 - t1;
1087 
1088             /* (xa - xc) + (yb - yd) */
1089             r2 = r2 + t1;
1090 
1091             /* (ya - yc) +  (xb - xd) */
1092             s1 = s2 + t2;
1093 
1094             /* (ya - yc) -  (xb - xd) */
1095             s2 = s2 - t2;
1096 
1097             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1098             pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1099 
1100             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1101             pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1102 
1103             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1104             pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1105 
1106             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1107             pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1108 
1109             i0 += n1;
1110          } while ( i0 < fftLen);
1111          j++;
1112       } while (j <= (n2 - 1U));
1113       twidCoefModifier <<= 2U;
1114    }
1115    /*  Initializations of last stage */
1116    n1 = n2;
1117    n2 >>= 2U;
1118 
1119    /*  Calculations of last stage */
1120    for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1121    {
1122       /*  index calculation for the input as, */
1123       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1124       i1 = i0 + n2;
1125       i2 = i1 + n2;
1126       i3 = i2 + n2;
1127 
1128       /*  Butterfly implementation */
1129       /* xa + xc */
1130       r1 = pSrc[2U * i0] + pSrc[2U * i2];
1131 
1132       /* xa - xc */
1133       r2 = pSrc[2U * i0] - pSrc[2U * i2];
1134 
1135       /* ya + yc */
1136       s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1137 
1138       /* ya - yc */
1139       s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1140 
1141       /* xc + xd */
1142       t1 = pSrc[2U * i1] + pSrc[2U * i3];
1143 
1144       /* xa' = xa + xb + xc + xd */
1145       pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1146 
1147       /* (xa + xb) - (xc + xd) */
1148       r1 = r1 - t1;
1149 
1150       /* yb + yd */
1151       t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1152 
1153       /* ya' = ya + yb + yc + yd */
1154       pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1155 
1156       /* (ya + yc) - (yb + yd) */
1157       s1 = s1 - t2;
1158 
1159       /* (yb-yd) */
1160       t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1161 
1162       /* (xb-xd) */
1163       t2 = pSrc[2U * i1] - pSrc[2U * i3];
1164 
1165       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1166       pSrc[2U * i1] = r1 * onebyfftLen;
1167 
1168       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1169       pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1170 
1171       /* (xa - xc) - (yb-yd) */
1172       r1 = r2 - t1;
1173 
1174       /* (xa - xc) + (yb-yd) */
1175       r2 = r2 + t1;
1176 
1177       /* (ya - yc) + (xb-xd) */
1178       s1 = s2 + t2;
1179 
1180       /* (ya - yc) - (xb-xd) */
1181       s2 = s2 - t2;
1182 
1183       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1184       pSrc[2U * i2] = r1 * onebyfftLen;
1185 
1186       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1187       pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1188 
1189       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1190       pSrc[2U * i3] = r2 * onebyfftLen;
1191 
1192       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1193       pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1194    }
1195 
1196 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
1197 }
1198 
1199 
1200