1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cfft_radix4_f32.c
4  * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions.h"
30 
31 extern void arm_bitreversal_f32(
32         float32_t * pSrc,
33         uint16_t fftSize,
34         uint16_t bitRevFactor,
35   const uint16_t * pBitRevTab);
36 
37 void arm_radix4_butterfly_f32(
38         float32_t * pSrc,
39         uint16_t fftLen,
40   const float32_t * pCoef,
41         uint16_t twidCoefModifier);
42 
43 void arm_radix4_butterfly_inverse_f32(
44         float32_t * pSrc,
45         uint16_t fftLen,
46   const float32_t * pCoef,
47         uint16_t twidCoefModifier,
48         float32_t onebyfftLen);
49 
50 
51 
52 
53 /**
54   @addtogroup ComplexFFTDeprecated
55   @{
56  */
57 
58 
59 /**
60   @brief         Processing function for the floating-point Radix-4 CFFT/CIFFT.
61   @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
62   @param[in]     S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
63   @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
64  */
65 
arm_cfft_radix4_f32(const arm_cfft_radix4_instance_f32 * S,float32_t * pSrc)66 void arm_cfft_radix4_f32(
67   const arm_cfft_radix4_instance_f32 * S,
68         float32_t * pSrc)
69 {
70    if (S->ifftFlag == 1U)
71    {
72       /*  Complex IFFT radix-4  */
73       arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
74    }
75    else
76    {
77       /*  Complex FFT radix-4  */
78       arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
79    }
80 
81    if (S->bitReverseFlag == 1U)
82    {
83       /*  Bit Reversal */
84       arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
85    }
86 
87 }
88 
89 /**
90   @} end of ComplexFFTDeprecated group
91  */
92 
93 /* ----------------------------------------------------------------------
94  * Internal helper function used by the FFTs
95  * ---------------------------------------------------------------------- */
96 
97 /**
98   brief         Core function for the floating-point CFFT butterfly process.
99   param[in,out] pSrc             points to the in-place buffer of floating-point data type
100   param[in]     fftLen           length of the FFT
101   param[in]     pCoef            points to the twiddle coefficient buffer
102   param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
103   return        none
104  */
105 
arm_radix4_butterfly_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier)106 void arm_radix4_butterfly_f32(
107         float32_t * pSrc,
108         uint16_t fftLen,
109   const float32_t * pCoef,
110         uint16_t twidCoefModifier)
111 {
112         float32_t co1, co2, co3, si1, si2, si3;
113         uint32_t ia1, ia2, ia3;
114         uint32_t i0, i1, i2, i3;
115         uint32_t n1, n2, j, k;
116 
117 #if defined (ARM_MATH_LOOPUNROLL)
118 
119         float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
120         float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
121         Ybminusd;
122         float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
123         float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
124         float32_t *ptr1;
125         float32_t p0,p1,p2,p3,p4,p5;
126         float32_t a0,a1,a2,a3,a4,a5,a6,a7;
127 
128    /*  Initializations for the first stage */
129    n2 = fftLen;
130    n1 = n2;
131 
132    /* n2 = fftLen/4 */
133    n2 >>= 2U;
134    i0 = 0U;
135    ia1 = 0U;
136 
137    j = n2;
138 
139    /*  Calculation of first stage */
140    do
141    {
142       /*  index calculation for the input as, */
143       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
144       i1 = i0 + n2;
145       i2 = i1 + n2;
146       i3 = i2 + n2;
147 
148       xaIn = pSrc[(2U * i0)];
149       yaIn = pSrc[(2U * i0) + 1U];
150 
151       xbIn = pSrc[(2U * i1)];
152       ybIn = pSrc[(2U * i1) + 1U];
153 
154       xcIn = pSrc[(2U * i2)];
155       ycIn = pSrc[(2U * i2) + 1U];
156 
157       xdIn = pSrc[(2U * i3)];
158       ydIn = pSrc[(2U * i3) + 1U];
159 
160       /* xa + xc */
161       Xaplusc = xaIn + xcIn;
162       /* xb + xd */
163       Xbplusd = xbIn + xdIn;
164       /* ya + yc */
165       Yaplusc = yaIn + ycIn;
166       /* yb + yd */
167       Ybplusd = ybIn + ydIn;
168 
169       /*  index calculation for the coefficients */
170       ia2 = ia1 + ia1;
171       co2 = pCoef[ia2 * 2U];
172       si2 = pCoef[(ia2 * 2U) + 1U];
173 
174       /* xa - xc */
175       Xaminusc = xaIn - xcIn;
176       /* xb - xd */
177       Xbminusd = xbIn - xdIn;
178       /* ya - yc */
179       Yaminusc = yaIn - ycIn;
180       /* yb - yd */
181       Ybminusd = ybIn - ydIn;
182 
183       /* xa' = xa + xb + xc + xd */
184       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
185       /* ya' = ya + yb + yc + yd */
186       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
187 
188       /* (xa - xc) + (yb - yd) */
189       Xb12C_out = (Xaminusc + Ybminusd);
190       /* (ya - yc) + (xb - xd) */
191       Yb12C_out = (Yaminusc - Xbminusd);
192       /* (xa + xc) - (xb + xd) */
193       Xc12C_out = (Xaplusc - Xbplusd);
194       /* (ya + yc) - (yb + yd) */
195       Yc12C_out = (Yaplusc - Ybplusd);
196       /* (xa - xc) - (yb - yd) */
197       Xd12C_out = (Xaminusc - Ybminusd);
198       /* (ya - yc) + (xb - xd) */
199       Yd12C_out = (Xbminusd + Yaminusc);
200 
201       co1 = pCoef[ia1 * 2U];
202       si1 = pCoef[(ia1 * 2U) + 1U];
203 
204       /*  index calculation for the coefficients */
205       ia3 = ia2 + ia1;
206       co3 = pCoef[ia3 * 2U];
207       si3 = pCoef[(ia3 * 2U) + 1U];
208 
209       Xb12_out = Xb12C_out * co1;
210       Yb12_out = Yb12C_out * co1;
211       Xc12_out = Xc12C_out * co2;
212       Yc12_out = Yc12C_out * co2;
213       Xd12_out = Xd12C_out * co3;
214       Yd12_out = Yd12C_out * co3;
215 
216       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
217       //Xb12_out -= Yb12C_out * si1;
218       p0 = Yb12C_out * si1;
219       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
220       //Yb12_out += Xb12C_out * si1;
221       p1 = Xb12C_out * si1;
222       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
223       //Xc12_out -= Yc12C_out * si2;
224       p2 = Yc12C_out * si2;
225       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
226       //Yc12_out += Xc12C_out * si2;
227       p3 = Xc12C_out * si2;
228       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
229       //Xd12_out -= Yd12C_out * si3;
230       p4 = Yd12C_out * si3;
231       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
232       //Yd12_out += Xd12C_out * si3;
233       p5 = Xd12C_out * si3;
234 
235       Xb12_out += p0;
236       Yb12_out -= p1;
237       Xc12_out += p2;
238       Yc12_out -= p3;
239       Xd12_out += p4;
240       Yd12_out -= p5;
241 
242       /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
243       pSrc[2U * i1] = Xc12_out;
244 
245       /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
246       pSrc[(2U * i1) + 1U] = Yc12_out;
247 
248       /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
249       pSrc[2U * i2] = Xb12_out;
250 
251       /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
252       pSrc[(2U * i2) + 1U] = Yb12_out;
253 
254       /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
255       pSrc[2U * i3] = Xd12_out;
256 
257       /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
258       pSrc[(2U * i3) + 1U] = Yd12_out;
259 
260       /*  Twiddle coefficients index modifier */
261       ia1 += twidCoefModifier;
262 
263       /*  Updating input index */
264       i0++;
265 
266    }
267    while (--j);
268 
269    twidCoefModifier <<= 2U;
270 
271    /*  Calculation of second stage to excluding last stage */
272    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
273    {
274       /*  Initializations for the first stage */
275       n1 = n2;
276       n2 >>= 2U;
277       ia1 = 0U;
278 
279       /*  Calculation of first stage */
280       j = 0;
281       do
282       {
283          /*  index calculation for the coefficients */
284          ia2 = ia1 + ia1;
285          ia3 = ia2 + ia1;
286          co1 = pCoef[(ia1 * 2U)];
287          si1 = pCoef[(ia1 * 2U) + 1U];
288          co2 = pCoef[(ia2 * 2U)];
289          si2 = pCoef[(ia2 * 2U) + 1U];
290          co3 = pCoef[(ia3 * 2U)];
291          si3 = pCoef[(ia3 * 2U) + 1U];
292 
293          /*  Twiddle coefficients index modifier */
294          ia1 += twidCoefModifier;
295 
296          i0 = j;
297          do
298          {
299             /*  index calculation for the input as, */
300             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
301             i1 = i0 + n2;
302             i2 = i1 + n2;
303             i3 = i2 + n2;
304 
305             xaIn = pSrc[(2U * i0)];
306             yaIn = pSrc[(2U * i0) + 1U];
307 
308             xbIn = pSrc[(2U * i1)];
309             ybIn = pSrc[(2U * i1) + 1U];
310 
311             xcIn = pSrc[(2U * i2)];
312             ycIn = pSrc[(2U * i2) + 1U];
313 
314             xdIn = pSrc[(2U * i3)];
315             ydIn = pSrc[(2U * i3) + 1U];
316 
317             /* xa - xc */
318             Xaminusc = xaIn - xcIn;
319             /* (xb - xd) */
320             Xbminusd = xbIn - xdIn;
321             /* ya - yc */
322             Yaminusc = yaIn - ycIn;
323             /* (yb - yd) */
324             Ybminusd = ybIn - ydIn;
325 
326             /* xa + xc */
327             Xaplusc = xaIn + xcIn;
328             /* xb + xd */
329             Xbplusd = xbIn + xdIn;
330             /* ya + yc */
331             Yaplusc = yaIn + ycIn;
332             /* yb + yd */
333             Ybplusd = ybIn + ydIn;
334 
335             /* (xa - xc) + (yb - yd) */
336             Xb12C_out = (Xaminusc + Ybminusd);
337             /* (ya - yc) -  (xb - xd) */
338             Yb12C_out = (Yaminusc - Xbminusd);
339             /* xa + xc -(xb + xd) */
340             Xc12C_out = (Xaplusc - Xbplusd);
341             /* (ya + yc) - (yb + yd) */
342             Yc12C_out = (Yaplusc - Ybplusd);
343             /* (xa - xc) - (yb - yd) */
344             Xd12C_out = (Xaminusc - Ybminusd);
345             /* (ya - yc) +  (xb - xd) */
346             Yd12C_out = (Xbminusd + Yaminusc);
347 
348             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
349             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
350 
351             Xb12_out = Xb12C_out * co1;
352             Yb12_out = Yb12C_out * co1;
353             Xc12_out = Xc12C_out * co2;
354             Yc12_out = Yc12C_out * co2;
355             Xd12_out = Xd12C_out * co3;
356             Yd12_out = Yd12C_out * co3;
357 
358             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
359             //Xb12_out -= Yb12C_out * si1;
360             p0 = Yb12C_out * si1;
361             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
362             //Yb12_out += Xb12C_out * si1;
363             p1 = Xb12C_out * si1;
364             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
365             //Xc12_out -= Yc12C_out * si2;
366             p2 = Yc12C_out * si2;
367             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
368             //Yc12_out += Xc12C_out * si2;
369             p3 = Xc12C_out * si2;
370             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
371             //Xd12_out -= Yd12C_out * si3;
372             p4 = Yd12C_out * si3;
373             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
374             //Yd12_out += Xd12C_out * si3;
375             p5 = Xd12C_out * si3;
376 
377             Xb12_out += p0;
378             Yb12_out -= p1;
379             Xc12_out += p2;
380             Yc12_out -= p3;
381             Xd12_out += p4;
382             Yd12_out -= p5;
383 
384             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
385             pSrc[2U * i1] = Xc12_out;
386 
387             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
388             pSrc[(2U * i1) + 1U] = Yc12_out;
389 
390             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
391             pSrc[2U * i2] = Xb12_out;
392 
393             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
394             pSrc[(2U * i2) + 1U] = Yb12_out;
395 
396             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
397             pSrc[2U * i3] = Xd12_out;
398 
399             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
400             pSrc[(2U * i3) + 1U] = Yd12_out;
401 
402             i0 += n1;
403          } while (i0 < fftLen);
404          j++;
405       } while (j <= (n2 - 1U));
406       twidCoefModifier <<= 2U;
407    }
408 
409    j = fftLen >> 2;
410    ptr1 = &pSrc[0];
411 
412    /*  Calculations of last stage */
413    do
414    {
415       xaIn = ptr1[0];
416       yaIn = ptr1[1];
417       xbIn = ptr1[2];
418       ybIn = ptr1[3];
419       xcIn = ptr1[4];
420       ycIn = ptr1[5];
421       xdIn = ptr1[6];
422       ydIn = ptr1[7];
423 
424       /* xa + xc */
425       Xaplusc = xaIn + xcIn;
426 
427       /* xa - xc */
428       Xaminusc = xaIn - xcIn;
429 
430       /* ya + yc */
431       Yaplusc = yaIn + ycIn;
432 
433       /* ya - yc */
434       Yaminusc = yaIn - ycIn;
435 
436       /* xb + xd */
437       Xbplusd = xbIn + xdIn;
438 
439       /* yb + yd */
440       Ybplusd = ybIn + ydIn;
441 
442       /* (xb-xd) */
443       Xbminusd = xbIn - xdIn;
444 
445       /* (yb-yd) */
446       Ybminusd = ybIn - ydIn;
447 
448       /* xa' = xa + xb + xc + xd */
449       a0 = (Xaplusc + Xbplusd);
450       /* ya' = ya + yb + yc + yd */
451       a1 = (Yaplusc + Ybplusd);
452       /* xc' = (xa-xb+xc-xd) */
453       a2 = (Xaplusc - Xbplusd);
454       /* yc' = (ya-yb+yc-yd) */
455       a3 = (Yaplusc - Ybplusd);
456       /* xb' = (xa+yb-xc-yd) */
457       a4 = (Xaminusc + Ybminusd);
458       /* yb' = (ya-xb-yc+xd) */
459       a5 = (Yaminusc - Xbminusd);
460       /* xd' = (xa-yb-xc+yd)) */
461       a6 = (Xaminusc - Ybminusd);
462       /* yd' = (ya+xb-yc-xd) */
463       a7 = (Xbminusd + Yaminusc);
464 
465       ptr1[0] = a0;
466       ptr1[1] = a1;
467       ptr1[2] = a2;
468       ptr1[3] = a3;
469       ptr1[4] = a4;
470       ptr1[5] = a5;
471       ptr1[6] = a6;
472       ptr1[7] = a7;
473 
474       /* increment pointer by 8 */
475       ptr1 += 8U;
476    } while (--j);
477 
478 #else
479 
480         float32_t t1, t2, r1, r2, s1, s2;
481 
482    /* Initializations for the fft calculation */
483    n2 = fftLen;
484    n1 = n2;
485    for (k = fftLen; k > 1U; k >>= 2U)
486    {
487       /*  Initializations for the fft calculation */
488       n1 = n2;
489       n2 >>= 2U;
490       ia1 = 0U;
491 
492       /*  FFT Calculation */
493       j = 0;
494       do
495       {
496          /*  index calculation for the coefficients */
497          ia2 = ia1 + ia1;
498          ia3 = ia2 + ia1;
499          co1 = pCoef[ia1 * 2U];
500          si1 = pCoef[(ia1 * 2U) + 1U];
501          co2 = pCoef[ia2 * 2U];
502          si2 = pCoef[(ia2 * 2U) + 1U];
503          co3 = pCoef[ia3 * 2U];
504          si3 = pCoef[(ia3 * 2U) + 1U];
505 
506          /*  Twiddle coefficients index modifier */
507          ia1 = ia1 + twidCoefModifier;
508 
509          i0 = j;
510          do
511          {
512             /*  index calculation for the input as, */
513             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
514             i1 = i0 + n2;
515             i2 = i1 + n2;
516             i3 = i2 + n2;
517 
518             /* xa + xc */
519             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
520 
521             /* xa - xc */
522             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
523 
524             /* ya + yc */
525             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
526 
527             /* ya - yc */
528             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
529 
530             /* xb + xd */
531             t1 = pSrc[2U * i1] + pSrc[2U * i3];
532 
533             /* xa' = xa + xb + xc + xd */
534             pSrc[2U * i0] = r1 + t1;
535 
536             /* xa + xc -(xb + xd) */
537             r1 = r1 - t1;
538 
539             /* yb + yd */
540             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
541 
542             /* ya' = ya + yb + yc + yd */
543             pSrc[(2U * i0) + 1U] = s1 + t2;
544 
545             /* (ya + yc) - (yb + yd) */
546             s1 = s1 - t2;
547 
548             /* (yb - yd) */
549             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
550 
551             /* (xb - xd) */
552             t2 = pSrc[2U * i1] - pSrc[2U * i3];
553 
554             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
555             pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
556 
557             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
558             pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
559 
560             /* (xa - xc) + (yb - yd) */
561             r1 = r2 + t1;
562 
563             /* (xa - xc) - (yb - yd) */
564             r2 = r2 - t1;
565 
566             /* (ya - yc) -  (xb - xd) */
567             s1 = s2 - t2;
568 
569             /* (ya - yc) +  (xb - xd) */
570             s2 = s2 + t2;
571 
572             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
573             pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
574 
575             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
576             pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
577 
578             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
579             pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
580 
581             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
582             pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
583 
584             i0 += n1;
585          } while ( i0 < fftLen);
586          j++;
587       } while (j <= (n2 - 1U));
588       twidCoefModifier <<= 2U;
589    }
590 
591 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
592 
593 }
594 
595 /**
596   brief         Core function for the floating-point CIFFT butterfly process.
597   param[in,out] pSrc             points to the in-place buffer of floating-point data type
598   param[in]     fftLen           length of the FFT
599   param[in]     pCoef            points to twiddle coefficient buffer
600   param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
601   param[in]     onebyfftLen      value of 1/fftLen
602   return        none
603  */
604 
arm_radix4_butterfly_inverse_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier,float32_t onebyfftLen)605 void arm_radix4_butterfly_inverse_f32(
606         float32_t * pSrc,
607         uint16_t fftLen,
608   const float32_t * pCoef,
609         uint16_t twidCoefModifier,
610         float32_t onebyfftLen)
611 {
612         float32_t co1, co2, co3, si1, si2, si3;
613         uint32_t ia1, ia2, ia3;
614         uint32_t i0, i1, i2, i3;
615         uint32_t n1, n2, j, k;
616 
617 #if defined (ARM_MATH_LOOPUNROLL)
618 
619         float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
620         float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
621         Ybminusd;
622         float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
623         float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
624         float32_t *ptr1;
625         float32_t p0,p1,p2,p3,p4,p5,p6,p7;
626         float32_t a0,a1,a2,a3,a4,a5,a6,a7;
627 
628 
629    /*  Initializations for the first stage */
630    n2 = fftLen;
631    n1 = n2;
632 
633    /* n2 = fftLen/4 */
634    n2 >>= 2U;
635    i0 = 0U;
636    ia1 = 0U;
637 
638    j = n2;
639 
640    /*  Calculation of first stage */
641    do
642    {
643       /*  index calculation for the input as, */
644       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
645       i1 = i0 + n2;
646       i2 = i1 + n2;
647       i3 = i2 + n2;
648 
649       /*  Butterfly implementation */
650       xaIn = pSrc[(2U * i0)];
651       yaIn = pSrc[(2U * i0) + 1U];
652 
653       xcIn = pSrc[(2U * i2)];
654       ycIn = pSrc[(2U * i2) + 1U];
655 
656       xbIn = pSrc[(2U * i1)];
657       ybIn = pSrc[(2U * i1) + 1U];
658 
659       xdIn = pSrc[(2U * i3)];
660       ydIn = pSrc[(2U * i3) + 1U];
661 
662       /* xa + xc */
663       Xaplusc = xaIn + xcIn;
664       /* xb + xd */
665       Xbplusd = xbIn + xdIn;
666       /* ya + yc */
667       Yaplusc = yaIn + ycIn;
668       /* yb + yd */
669       Ybplusd = ybIn + ydIn;
670 
671       /*  index calculation for the coefficients */
672       ia2 = ia1 + ia1;
673       co2 = pCoef[ia2 * 2U];
674       si2 = pCoef[(ia2 * 2U) + 1U];
675 
676       /* xa - xc */
677       Xaminusc = xaIn - xcIn;
678       /* xb - xd */
679       Xbminusd = xbIn - xdIn;
680       /* ya - yc */
681       Yaminusc = yaIn - ycIn;
682       /* yb - yd */
683       Ybminusd = ybIn - ydIn;
684 
685       /* xa' = xa + xb + xc + xd */
686       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
687 
688       /* ya' = ya + yb + yc + yd */
689       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
690 
691       /* (xa - xc) - (yb - yd) */
692       Xb12C_out = (Xaminusc - Ybminusd);
693       /* (ya - yc) + (xb - xd) */
694       Yb12C_out = (Yaminusc + Xbminusd);
695       /* (xa + xc) - (xb + xd) */
696       Xc12C_out = (Xaplusc - Xbplusd);
697       /* (ya + yc) - (yb + yd) */
698       Yc12C_out = (Yaplusc - Ybplusd);
699       /* (xa - xc) + (yb - yd) */
700       Xd12C_out = (Xaminusc + Ybminusd);
701       /* (ya - yc) - (xb - xd) */
702       Yd12C_out = (Yaminusc - Xbminusd);
703 
704       co1 = pCoef[ia1 * 2U];
705       si1 = pCoef[(ia1 * 2U) + 1U];
706 
707       /*  index calculation for the coefficients */
708       ia3 = ia2 + ia1;
709       co3 = pCoef[ia3 * 2U];
710       si3 = pCoef[(ia3 * 2U) + 1U];
711 
712       Xb12_out = Xb12C_out * co1;
713       Yb12_out = Yb12C_out * co1;
714       Xc12_out = Xc12C_out * co2;
715       Yc12_out = Yc12C_out * co2;
716       Xd12_out = Xd12C_out * co3;
717       Yd12_out = Yd12C_out * co3;
718 
719       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
720       //Xb12_out -= Yb12C_out * si1;
721       p0 = Yb12C_out * si1;
722       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
723       //Yb12_out += Xb12C_out * si1;
724       p1 = Xb12C_out * si1;
725       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
726       //Xc12_out -= Yc12C_out * si2;
727       p2 = Yc12C_out * si2;
728       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
729       //Yc12_out += Xc12C_out * si2;
730       p3 = Xc12C_out * si2;
731       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
732       //Xd12_out -= Yd12C_out * si3;
733       p4 = Yd12C_out * si3;
734       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
735       //Yd12_out += Xd12C_out * si3;
736       p5 = Xd12C_out * si3;
737 
738       Xb12_out -= p0;
739       Yb12_out += p1;
740       Xc12_out -= p2;
741       Yc12_out += p3;
742       Xd12_out -= p4;
743       Yd12_out += p5;
744 
745       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
746       pSrc[2U * i1] = Xc12_out;
747 
748       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
749       pSrc[(2U * i1) + 1U] = Yc12_out;
750 
751       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
752       pSrc[2U * i2] = Xb12_out;
753 
754       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
755       pSrc[(2U * i2) + 1U] = Yb12_out;
756 
757       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
758       pSrc[2U * i3] = Xd12_out;
759 
760       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
761       pSrc[(2U * i3) + 1U] = Yd12_out;
762 
763       /*  Twiddle coefficients index modifier */
764       ia1 = ia1 + twidCoefModifier;
765 
766       /*  Updating input index */
767       i0 = i0 + 1U;
768 
769    } while (--j);
770 
771    twidCoefModifier <<= 2U;
772 
773    /*  Calculation of second stage to excluding last stage */
774    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
775    {
776       /*  Initializations for the first stage */
777       n1 = n2;
778       n2 >>= 2U;
779       ia1 = 0U;
780 
781       /*  Calculation of first stage */
782       j = 0;
783       do
784       {
785          /*  index calculation for the coefficients */
786          ia2 = ia1 + ia1;
787          ia3 = ia2 + ia1;
788          co1 = pCoef[ia1 * 2U];
789          si1 = pCoef[(ia1 * 2U) + 1U];
790          co2 = pCoef[ia2 * 2U];
791          si2 = pCoef[(ia2 * 2U) + 1U];
792          co3 = pCoef[ia3 * 2U];
793          si3 = pCoef[(ia3 * 2U) + 1U];
794 
795          /*  Twiddle coefficients index modifier */
796          ia1 = ia1 + twidCoefModifier;
797 
798          i0 = j;
799          do
800          {
801             /*  index calculation for the input as, */
802             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
803             i1 = i0 + n2;
804             i2 = i1 + n2;
805             i3 = i2 + n2;
806 
807             xaIn = pSrc[(2U * i0)];
808             yaIn = pSrc[(2U * i0) + 1U];
809 
810             xbIn = pSrc[(2U * i1)];
811             ybIn = pSrc[(2U * i1) + 1U];
812 
813             xcIn = pSrc[(2U * i2)];
814             ycIn = pSrc[(2U * i2) + 1U];
815 
816             xdIn = pSrc[(2U * i3)];
817             ydIn = pSrc[(2U * i3) + 1U];
818 
819             /* xa - xc */
820             Xaminusc = xaIn - xcIn;
821             /* (xb - xd) */
822             Xbminusd = xbIn - xdIn;
823             /* ya - yc */
824             Yaminusc = yaIn - ycIn;
825             /* (yb - yd) */
826             Ybminusd = ybIn - ydIn;
827 
828             /* xa + xc */
829             Xaplusc = xaIn + xcIn;
830             /* xb + xd */
831             Xbplusd = xbIn + xdIn;
832             /* ya + yc */
833             Yaplusc = yaIn + ycIn;
834             /* yb + yd */
835             Ybplusd = ybIn + ydIn;
836 
837             /* (xa - xc) - (yb - yd) */
838             Xb12C_out = (Xaminusc - Ybminusd);
839             /* (ya - yc) +  (xb - xd) */
840             Yb12C_out = (Yaminusc + Xbminusd);
841             /* xa + xc -(xb + xd) */
842             Xc12C_out = (Xaplusc - Xbplusd);
843             /* (ya + yc) - (yb + yd) */
844             Yc12C_out = (Yaplusc - Ybplusd);
845             /* (xa - xc) + (yb - yd) */
846             Xd12C_out = (Xaminusc + Ybminusd);
847             /* (ya - yc) -  (xb - xd) */
848             Yd12C_out = (Yaminusc - Xbminusd);
849 
850             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
851             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
852 
853             Xb12_out = Xb12C_out * co1;
854             Yb12_out = Yb12C_out * co1;
855             Xc12_out = Xc12C_out * co2;
856             Yc12_out = Yc12C_out * co2;
857             Xd12_out = Xd12C_out * co3;
858             Yd12_out = Yd12C_out * co3;
859 
860             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
861             //Xb12_out -= Yb12C_out * si1;
862             p0 = Yb12C_out * si1;
863             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
864             //Yb12_out += Xb12C_out * si1;
865             p1 = Xb12C_out * si1;
866             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
867             //Xc12_out -= Yc12C_out * si2;
868             p2 = Yc12C_out * si2;
869             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
870             //Yc12_out += Xc12C_out * si2;
871             p3 = Xc12C_out * si2;
872             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
873             //Xd12_out -= Yd12C_out * si3;
874             p4 = Yd12C_out * si3;
875             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
876             //Yd12_out += Xd12C_out * si3;
877             p5 = Xd12C_out * si3;
878 
879             Xb12_out -= p0;
880             Yb12_out += p1;
881             Xc12_out -= p2;
882             Yc12_out += p3;
883             Xd12_out -= p4;
884             Yd12_out += p5;
885 
886             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
887             pSrc[2U * i1] = Xc12_out;
888 
889             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
890             pSrc[(2U * i1) + 1U] = Yc12_out;
891 
892             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
893             pSrc[2U * i2] = Xb12_out;
894 
895             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
896             pSrc[(2U * i2) + 1U] = Yb12_out;
897 
898             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
899             pSrc[2U * i3] = Xd12_out;
900 
901             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
902             pSrc[(2U * i3) + 1U] = Yd12_out;
903 
904             i0 += n1;
905          } while (i0 < fftLen);
906          j++;
907       } while (j <= (n2 - 1U));
908       twidCoefModifier <<= 2U;
909    }
910    /*  Initializations of last stage */
911 
912    j = fftLen >> 2;
913    ptr1 = &pSrc[0];
914 
915    /*  Calculations of last stage */
916    do
917    {
918       xaIn = ptr1[0];
919       yaIn = ptr1[1];
920       xbIn = ptr1[2];
921       ybIn = ptr1[3];
922       xcIn = ptr1[4];
923       ycIn = ptr1[5];
924       xdIn = ptr1[6];
925       ydIn = ptr1[7];
926 
927       /*  Butterfly implementation */
928       /* xa + xc */
929       Xaplusc = xaIn + xcIn;
930 
931       /* xa - xc */
932       Xaminusc = xaIn - xcIn;
933 
934       /* ya + yc */
935       Yaplusc = yaIn + ycIn;
936 
937       /* ya - yc */
938       Yaminusc = yaIn - ycIn;
939 
940       /* xb + xd */
941       Xbplusd = xbIn + xdIn;
942 
943       /* yb + yd */
944       Ybplusd = ybIn + ydIn;
945 
946       /* (xb-xd) */
947       Xbminusd = xbIn - xdIn;
948 
949       /* (yb-yd) */
950       Ybminusd = ybIn - ydIn;
951 
952       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
953       a0 = (Xaplusc + Xbplusd);
954       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
955       a1 = (Yaplusc + Ybplusd);
956       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
957       a2 = (Xaplusc - Xbplusd);
958       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
959       a3 = (Yaplusc - Ybplusd);
960       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
961       a4 = (Xaminusc - Ybminusd);
962       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
963       a5 = (Yaminusc + Xbminusd);
964       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
965       a6 = (Xaminusc + Ybminusd);
966       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
967       a7 = (Yaminusc - Xbminusd);
968 
969       p0 = a0 * onebyfftLen;
970       p1 = a1 * onebyfftLen;
971       p2 = a2 * onebyfftLen;
972       p3 = a3 * onebyfftLen;
973       p4 = a4 * onebyfftLen;
974       p5 = a5 * onebyfftLen;
975       p6 = a6 * onebyfftLen;
976       p7 = a7 * onebyfftLen;
977 
978       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
979       ptr1[0] = p0;
980       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
981       ptr1[1] = p1;
982       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
983       ptr1[2] = p2;
984       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
985       ptr1[3] = p3;
986       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
987       ptr1[4] = p4;
988       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
989       ptr1[5] = p5;
990       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
991       ptr1[6] = p6;
992       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
993       ptr1[7] = p7;
994 
995       /* increment source pointer by 8 for next calculations */
996       ptr1 = ptr1 + 8U;
997 
998    } while (--j);
999 
1000 #else
1001 
1002         float32_t t1, t2, r1, r2, s1, s2;
1003 
1004    /*  Initializations for the first stage */
1005    n2 = fftLen;
1006    n1 = n2;
1007 
1008    /*  Calculation of first stage */
1009    for (k = fftLen; k > 4U; k >>= 2U)
1010    {
1011       /*  Initializations for the first stage */
1012       n1 = n2;
1013       n2 >>= 2U;
1014       ia1 = 0U;
1015 
1016       /*  Calculation of first stage */
1017       j = 0;
1018       do
1019       {
1020          /*  index calculation for the coefficients */
1021          ia2 = ia1 + ia1;
1022          ia3 = ia2 + ia1;
1023          co1 = pCoef[ia1 * 2U];
1024          si1 = pCoef[(ia1 * 2U) + 1U];
1025          co2 = pCoef[ia2 * 2U];
1026          si2 = pCoef[(ia2 * 2U) + 1U];
1027          co3 = pCoef[ia3 * 2U];
1028          si3 = pCoef[(ia3 * 2U) + 1U];
1029 
1030          /*  Twiddle coefficients index modifier */
1031          ia1 = ia1 + twidCoefModifier;
1032 
1033          i0 = j;
1034          do
1035          {
1036             /*  index calculation for the input as, */
1037             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1038             i1 = i0 + n2;
1039             i2 = i1 + n2;
1040             i3 = i2 + n2;
1041 
1042             /* xa + xc */
1043             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1044 
1045             /* xa - xc */
1046             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1047 
1048             /* ya + yc */
1049             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1050 
1051             /* ya - yc */
1052             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1053 
1054             /* xb + xd */
1055             t1 = pSrc[2U * i1] + pSrc[2U * i3];
1056 
1057             /* xa' = xa + xb + xc + xd */
1058             pSrc[2U * i0] = r1 + t1;
1059 
1060             /* xa + xc -(xb + xd) */
1061             r1 = r1 - t1;
1062 
1063             /* yb + yd */
1064             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1065 
1066             /* ya' = ya + yb + yc + yd */
1067             pSrc[(2U * i0) + 1U] = s1 + t2;
1068 
1069             /* (ya + yc) - (yb + yd) */
1070             s1 = s1 - t2;
1071 
1072             /* (yb - yd) */
1073             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1074 
1075             /* (xb - xd) */
1076             t2 = pSrc[2U * i1] - pSrc[2U * i3];
1077 
1078             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1079             pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1080 
1081             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1082             pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1083 
1084             /* (xa - xc) - (yb - yd) */
1085             r1 = r2 - t1;
1086 
1087             /* (xa - xc) + (yb - yd) */
1088             r2 = r2 + t1;
1089 
1090             /* (ya - yc) +  (xb - xd) */
1091             s1 = s2 + t2;
1092 
1093             /* (ya - yc) -  (xb - xd) */
1094             s2 = s2 - t2;
1095 
1096             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1097             pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1098 
1099             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1100             pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1101 
1102             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1103             pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1104 
1105             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1106             pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1107 
1108             i0 += n1;
1109          } while ( i0 < fftLen);
1110          j++;
1111       } while (j <= (n2 - 1U));
1112       twidCoefModifier <<= 2U;
1113    }
1114    /*  Initializations of last stage */
1115    n1 = n2;
1116    n2 >>= 2U;
1117 
1118    /*  Calculations of last stage */
1119    for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1120    {
1121       /*  index calculation for the input as, */
1122       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1123       i1 = i0 + n2;
1124       i2 = i1 + n2;
1125       i3 = i2 + n2;
1126 
1127       /*  Butterfly implementation */
1128       /* xa + xc */
1129       r1 = pSrc[2U * i0] + pSrc[2U * i2];
1130 
1131       /* xa - xc */
1132       r2 = pSrc[2U * i0] - pSrc[2U * i2];
1133 
1134       /* ya + yc */
1135       s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1136 
1137       /* ya - yc */
1138       s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1139 
1140       /* xc + xd */
1141       t1 = pSrc[2U * i1] + pSrc[2U * i3];
1142 
1143       /* xa' = xa + xb + xc + xd */
1144       pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1145 
1146       /* (xa + xb) - (xc + xd) */
1147       r1 = r1 - t1;
1148 
1149       /* yb + yd */
1150       t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1151 
1152       /* ya' = ya + yb + yc + yd */
1153       pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1154 
1155       /* (ya + yc) - (yb + yd) */
1156       s1 = s1 - t2;
1157 
1158       /* (yb-yd) */
1159       t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1160 
1161       /* (xb-xd) */
1162       t2 = pSrc[2U * i1] - pSrc[2U * i3];
1163 
1164       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1165       pSrc[2U * i1] = r1 * onebyfftLen;
1166 
1167       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1168       pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1169 
1170       /* (xa - xc) - (yb-yd) */
1171       r1 = r2 - t1;
1172 
1173       /* (xa - xc) + (yb-yd) */
1174       r2 = r2 + t1;
1175 
1176       /* (ya - yc) + (xb-xd) */
1177       s1 = s2 + t2;
1178 
1179       /* (ya - yc) - (xb-xd) */
1180       s2 = s2 - t2;
1181 
1182       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1183       pSrc[2U * i2] = r1 * onebyfftLen;
1184 
1185       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1186       pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1187 
1188       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1189       pSrc[2U * i3] = r2 * onebyfftLen;
1190 
1191       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1192       pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1193    }
1194 
1195 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
1196 }
1197 
1198 
1199