1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cfft_radix4_f16.c
4  * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 extern void arm_bitreversal_f16(
34         float16_t * pSrc,
35         uint16_t fftSize,
36         uint16_t bitRevFactor,
37   const uint16_t * pBitRevTab);
38 
39 void arm_radix4_butterfly_f16(
40         float16_t * pSrc,
41         uint16_t fftLen,
42   const float16_t * pCoef,
43         uint16_t twidCoefModifier);
44 
45 void arm_radix4_butterfly_inverse_f16(
46         float16_t * pSrc,
47         uint16_t fftLen,
48   const float16_t * pCoef,
49         uint16_t twidCoefModifier,
50         float16_t onebyfftLen);
51 
52 
53 void arm_cfft_radix4by2_f16(
54     float16_t * pSrc,
55     uint32_t fftLen,
56     const float16_t * pCoef);
57 
58 
59 /**
60   @ingroup groupTransforms
61  */
62 
63 /**
64   @addtogroup ComplexFFT
65   @{
66  */
67 
68 /*
69 * @brief  Core function for the floating-point CFFT butterfly process.
70 * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
71 * @param[in]      fftLen           length of the FFT.
72 * @param[in]      *pCoef           points to the twiddle coefficient buffer.
73 * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
74 * @return none.
75 */
76 
arm_cfft_radix4by2_f16(float16_t * pSrc,uint32_t fftLen,const float16_t * pCoef)77 void arm_cfft_radix4by2_f16(
78     float16_t * pSrc,
79     uint32_t fftLen,
80     const float16_t * pCoef)
81 {
82     uint32_t i, l;
83     uint32_t n2, ia;
84     float16_t xt, yt, cosVal, sinVal;
85     float16_t p0, p1,p2,p3,a0,a1;
86 
87     n2 = fftLen >> 1;
88     ia = 0;
89     for (i = 0; i < n2; i++)
90     {
91         cosVal = pCoef[2*ia];
92         sinVal = pCoef[2*ia + 1];
93         ia++;
94 
95         l = i + n2;
96 
97         /*  Butterfly implementation */
98         a0 = pSrc[2 * i] + pSrc[2 * l];
99         xt = pSrc[2 * i] - pSrc[2 * l];
100 
101         yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
102         a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
103 
104         p0 = xt * cosVal;
105         p1 = yt * sinVal;
106         p2 = yt * cosVal;
107         p3 = xt * sinVal;
108 
109         pSrc[2 * i]     = a0;
110         pSrc[2 * i + 1] = a1;
111 
112         pSrc[2 * l]     = p0 + p1;
113         pSrc[2 * l + 1] = p2 - p3;
114 
115     }
116 
117     // first col
118     arm_radix4_butterfly_f16( pSrc, n2, (float16_t*)pCoef, 2U);
119     // second col
120     arm_radix4_butterfly_f16( pSrc + fftLen, n2, (float16_t*)pCoef, 2U);
121 
122 }
123 
124 
125 /**
126   @brief         Processing function for the floating-point Radix-4 CFFT/CIFFT.
127   @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future.
128   @param[in]     S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
129   @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
130   @return        none
131  */
132 
arm_cfft_radix4_f16(const arm_cfft_radix4_instance_f16 * S,float16_t * pSrc)133 void arm_cfft_radix4_f16(
134   const arm_cfft_radix4_instance_f16 * S,
135         float16_t * pSrc)
136 {
137    if (S->ifftFlag == 1U)
138    {
139       /*  Complex IFFT radix-4  */
140       arm_radix4_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
141    }
142    else
143    {
144       /*  Complex FFT radix-4  */
145       arm_radix4_butterfly_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
146    }
147 
148    if (S->bitReverseFlag == 1U)
149    {
150       /*  Bit Reversal */
151       arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
152    }
153 
154 }
155 
156 /**
157   @} end of ComplexFFT group
158  */
159 
160 /* ----------------------------------------------------------------------
161  * Internal helper function used by the FFTs
162  * ---------------------------------------------------------------------- */
163 
164 /*
165 * @brief  Core function for the floating-point CFFT butterfly process.
166 * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
167 * @param[in]      fftLen           length of the FFT.
168 * @param[in]      *pCoef           points to the twiddle coefficient buffer.
169 * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
170 * @return none.
171 */
172 
arm_radix4_butterfly_f16(float16_t * pSrc,uint16_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier)173 void arm_radix4_butterfly_f16(
174 float16_t * pSrc,
175 uint16_t fftLen,
176 const float16_t * pCoef,
177 uint16_t twidCoefModifier)
178 {
179 
180    float16_t co1, co2, co3, si1, si2, si3;
181    uint32_t ia1, ia2, ia3;
182    uint32_t i0, i1, i2, i3;
183    uint32_t n1, n2, j, k;
184 
185 #if defined (ARM_MATH_DSP)
186 
187    /* Run the below code for Cortex-M4 and Cortex-M3 */
188 
189    float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
190    float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
191    Ybminusd;
192    float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
193    float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
194    float16_t *ptr1;
195    float16_t p0,p1,p2,p3,p4,p5;
196    float16_t a0,a1,a2,a3,a4,a5,a6,a7;
197 
198    /*  Initializations for the first stage */
199    n2 = fftLen;
200    n1 = n2;
201 
202    /* n2 = fftLen/4 */
203    n2 >>= 2U;
204    i0 = 0U;
205    ia1 = 0U;
206 
207    j = n2;
208 
209    /*  Calculation of first stage */
210    do
211    {
212       /*  index calculation for the input as, */
213       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
214       i1 = i0 + n2;
215       i2 = i1 + n2;
216       i3 = i2 + n2;
217 
218       xaIn = pSrc[(2U * i0)];
219       yaIn = pSrc[(2U * i0) + 1U];
220 
221       xbIn = pSrc[(2U * i1)];
222       ybIn = pSrc[(2U * i1) + 1U];
223 
224       xcIn = pSrc[(2U * i2)];
225       ycIn = pSrc[(2U * i2) + 1U];
226 
227       xdIn = pSrc[(2U * i3)];
228       ydIn = pSrc[(2U * i3) + 1U];
229 
230       /* xa + xc */
231       Xaplusc = xaIn + xcIn;
232       /* xb + xd */
233       Xbplusd = xbIn + xdIn;
234       /* ya + yc */
235       Yaplusc = yaIn + ycIn;
236       /* yb + yd */
237       Ybplusd = ybIn + ydIn;
238 
239       /*  index calculation for the coefficients */
240       ia2 = ia1 + ia1;
241       co2 = pCoef[ia2 * 2U];
242       si2 = pCoef[(ia2 * 2U) + 1U];
243 
244       /* xa - xc */
245       Xaminusc = xaIn - xcIn;
246       /* xb - xd */
247       Xbminusd = xbIn - xdIn;
248       /* ya - yc */
249       Yaminusc = yaIn - ycIn;
250       /* yb - yd */
251       Ybminusd = ybIn - ydIn;
252 
253       /* xa' = xa + xb + xc + xd */
254       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
255       /* ya' = ya + yb + yc + yd */
256       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
257 
258       /* (xa - xc) + (yb - yd) */
259       Xb12C_out = (Xaminusc + Ybminusd);
260       /* (ya - yc) + (xb - xd) */
261       Yb12C_out = (Yaminusc - Xbminusd);
262       /* (xa + xc) - (xb + xd) */
263       Xc12C_out = (Xaplusc - Xbplusd);
264       /* (ya + yc) - (yb + yd) */
265       Yc12C_out = (Yaplusc - Ybplusd);
266       /* (xa - xc) - (yb - yd) */
267       Xd12C_out = (Xaminusc - Ybminusd);
268       /* (ya - yc) + (xb - xd) */
269       Yd12C_out = (Xbminusd + Yaminusc);
270 
271       co1 = pCoef[ia1 * 2U];
272       si1 = pCoef[(ia1 * 2U) + 1U];
273 
274       /*  index calculation for the coefficients */
275       ia3 = ia2 + ia1;
276       co3 = pCoef[ia3 * 2U];
277       si3 = pCoef[(ia3 * 2U) + 1U];
278 
279       Xb12_out = Xb12C_out * co1;
280       Yb12_out = Yb12C_out * co1;
281       Xc12_out = Xc12C_out * co2;
282       Yc12_out = Yc12C_out * co2;
283       Xd12_out = Xd12C_out * co3;
284       Yd12_out = Yd12C_out * co3;
285 
286       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
287       //Xb12_out -= Yb12C_out * si1;
288       p0 = Yb12C_out * si1;
289       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
290       //Yb12_out += Xb12C_out * si1;
291       p1 = Xb12C_out * si1;
292       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
293       //Xc12_out -= Yc12C_out * si2;
294       p2 = Yc12C_out * si2;
295       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
296       //Yc12_out += Xc12C_out * si2;
297       p3 = Xc12C_out * si2;
298       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
299       //Xd12_out -= Yd12C_out * si3;
300       p4 = Yd12C_out * si3;
301       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
302       //Yd12_out += Xd12C_out * si3;
303       p5 = Xd12C_out * si3;
304 
305       Xb12_out += p0;
306       Yb12_out -= p1;
307       Xc12_out += p2;
308       Yc12_out -= p3;
309       Xd12_out += p4;
310       Yd12_out -= p5;
311 
312       /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
313       pSrc[2U * i1] = Xc12_out;
314 
315       /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
316       pSrc[(2U * i1) + 1U] = Yc12_out;
317 
318       /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
319       pSrc[2U * i2] = Xb12_out;
320 
321       /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
322       pSrc[(2U * i2) + 1U] = Yb12_out;
323 
324       /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
325       pSrc[2U * i3] = Xd12_out;
326 
327       /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
328       pSrc[(2U * i3) + 1U] = Yd12_out;
329 
330       /*  Twiddle coefficients index modifier */
331       ia1 += twidCoefModifier;
332 
333       /*  Updating input index */
334       i0++;
335 
336    }
337    while (--j);
338 
339    twidCoefModifier <<= 2U;
340 
341    /*  Calculation of second stage to excluding last stage */
342    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
343    {
344       /*  Initializations for the first stage */
345       n1 = n2;
346       n2 >>= 2U;
347       ia1 = 0U;
348 
349       /*  Calculation of first stage */
350       j = 0;
351       do
352       {
353          /*  index calculation for the coefficients */
354          ia2 = ia1 + ia1;
355          ia3 = ia2 + ia1;
356          co1 = pCoef[ia1 * 2U];
357          si1 = pCoef[(ia1 * 2U) + 1U];
358          co2 = pCoef[ia2 * 2U];
359          si2 = pCoef[(ia2 * 2U) + 1U];
360          co3 = pCoef[ia3 * 2U];
361          si3 = pCoef[(ia3 * 2U) + 1U];
362 
363          /*  Twiddle coefficients index modifier */
364          ia1 += twidCoefModifier;
365 
366          i0 = j;
367          do
368          {
369             /*  index calculation for the input as, */
370             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
371             i1 = i0 + n2;
372             i2 = i1 + n2;
373             i3 = i2 + n2;
374 
375             xaIn = pSrc[(2U * i0)];
376             yaIn = pSrc[(2U * i0) + 1U];
377 
378             xbIn = pSrc[(2U * i1)];
379             ybIn = pSrc[(2U * i1) + 1U];
380 
381             xcIn = pSrc[(2U * i2)];
382             ycIn = pSrc[(2U * i2) + 1U];
383 
384             xdIn = pSrc[(2U * i3)];
385             ydIn = pSrc[(2U * i3) + 1U];
386 
387             /* xa - xc */
388             Xaminusc = xaIn - xcIn;
389             /* (xb - xd) */
390             Xbminusd = xbIn - xdIn;
391             /* ya - yc */
392             Yaminusc = yaIn - ycIn;
393             /* (yb - yd) */
394             Ybminusd = ybIn - ydIn;
395 
396             /* xa + xc */
397             Xaplusc = xaIn + xcIn;
398             /* xb + xd */
399             Xbplusd = xbIn + xdIn;
400             /* ya + yc */
401             Yaplusc = yaIn + ycIn;
402             /* yb + yd */
403             Ybplusd = ybIn + ydIn;
404 
405             /* (xa - xc) + (yb - yd) */
406             Xb12C_out = (Xaminusc + Ybminusd);
407             /* (ya - yc) -  (xb - xd) */
408             Yb12C_out = (Yaminusc - Xbminusd);
409             /* xa + xc -(xb + xd) */
410             Xc12C_out = (Xaplusc - Xbplusd);
411             /* (ya + yc) - (yb + yd) */
412             Yc12C_out = (Yaplusc - Ybplusd);
413             /* (xa - xc) - (yb - yd) */
414             Xd12C_out = (Xaminusc - Ybminusd);
415             /* (ya - yc) +  (xb - xd) */
416             Yd12C_out = (Xbminusd + Yaminusc);
417 
418             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
419             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
420 
421             Xb12_out = Xb12C_out * co1;
422             Yb12_out = Yb12C_out * co1;
423             Xc12_out = Xc12C_out * co2;
424             Yc12_out = Yc12C_out * co2;
425             Xd12_out = Xd12C_out * co3;
426             Yd12_out = Yd12C_out * co3;
427 
428             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
429             //Xb12_out -= Yb12C_out * si1;
430             p0 = Yb12C_out * si1;
431             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
432             //Yb12_out += Xb12C_out * si1;
433             p1 = Xb12C_out * si1;
434             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
435             //Xc12_out -= Yc12C_out * si2;
436             p2 = Yc12C_out * si2;
437             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
438             //Yc12_out += Xc12C_out * si2;
439             p3 = Xc12C_out * si2;
440             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
441             //Xd12_out -= Yd12C_out * si3;
442             p4 = Yd12C_out * si3;
443             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
444             //Yd12_out += Xd12C_out * si3;
445             p5 = Xd12C_out * si3;
446 
447             Xb12_out += p0;
448             Yb12_out -= p1;
449             Xc12_out += p2;
450             Yc12_out -= p3;
451             Xd12_out += p4;
452             Yd12_out -= p5;
453 
454             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
455             pSrc[2U * i1] = Xc12_out;
456 
457             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
458             pSrc[(2U * i1) + 1U] = Yc12_out;
459 
460             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
461             pSrc[2U * i2] = Xb12_out;
462 
463             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
464             pSrc[(2U * i2) + 1U] = Yb12_out;
465 
466             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
467             pSrc[2U * i3] = Xd12_out;
468 
469             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
470             pSrc[(2U * i3) + 1U] = Yd12_out;
471 
472             i0 += n1;
473          } while (i0 < fftLen);
474          j++;
475       } while (j <= (n2 - 1U));
476       twidCoefModifier <<= 2U;
477    }
478 
479    j = fftLen >> 2;
480    ptr1 = &pSrc[0];
481 
482    /*  Calculations of last stage */
483    do
484    {
485       xaIn = ptr1[0];
486       yaIn = ptr1[1];
487       xbIn = ptr1[2];
488       ybIn = ptr1[3];
489       xcIn = ptr1[4];
490       ycIn = ptr1[5];
491       xdIn = ptr1[6];
492       ydIn = ptr1[7];
493 
494       /* xa + xc */
495       Xaplusc = xaIn + xcIn;
496 
497       /* xa - xc */
498       Xaminusc = xaIn - xcIn;
499 
500       /* ya + yc */
501       Yaplusc = yaIn + ycIn;
502 
503       /* ya - yc */
504       Yaminusc = yaIn - ycIn;
505 
506       /* xb + xd */
507       Xbplusd = xbIn + xdIn;
508 
509       /* yb + yd */
510       Ybplusd = ybIn + ydIn;
511 
512       /* (xb-xd) */
513       Xbminusd = xbIn - xdIn;
514 
515       /* (yb-yd) */
516       Ybminusd = ybIn - ydIn;
517 
518       /* xa' = xa + xb + xc + xd */
519       a0 = (Xaplusc + Xbplusd);
520       /* ya' = ya + yb + yc + yd */
521       a1 = (Yaplusc + Ybplusd);
522       /* xc' = (xa-xb+xc-xd) */
523       a2 = (Xaplusc - Xbplusd);
524       /* yc' = (ya-yb+yc-yd) */
525       a3 = (Yaplusc - Ybplusd);
526       /* xb' = (xa+yb-xc-yd) */
527       a4 = (Xaminusc + Ybminusd);
528       /* yb' = (ya-xb-yc+xd) */
529       a5 = (Yaminusc - Xbminusd);
530       /* xd' = (xa-yb-xc+yd)) */
531       a6 = (Xaminusc - Ybminusd);
532       /* yd' = (ya+xb-yc-xd) */
533       a7 = (Xbminusd + Yaminusc);
534 
535       ptr1[0] = a0;
536       ptr1[1] = a1;
537       ptr1[2] = a2;
538       ptr1[3] = a3;
539       ptr1[4] = a4;
540       ptr1[5] = a5;
541       ptr1[6] = a6;
542       ptr1[7] = a7;
543 
544       /* increment pointer by 8 */
545       ptr1 += 8U;
546    } while (--j);
547 
548 #else
549 
550    float16_t t1, t2, r1, r2, s1, s2;
551 
552    /* Run the below code for Cortex-M0 */
553 
554    /*  Initializations for the fft calculation */
555    n2 = fftLen;
556    n1 = n2;
557    for (k = fftLen; k > 1U; k >>= 2U)
558    {
559       /*  Initializations for the fft calculation */
560       n1 = n2;
561       n2 >>= 2U;
562       ia1 = 0U;
563 
564       /*  FFT Calculation */
565       j = 0;
566       do
567       {
568          /*  index calculation for the coefficients */
569          ia2 = ia1 + ia1;
570          ia3 = ia2 + ia1;
571          co1 = pCoef[ia1 * 2U];
572          si1 = pCoef[(ia1 * 2U) + 1U];
573          co2 = pCoef[ia2 * 2U];
574          si2 = pCoef[(ia2 * 2U) + 1U];
575          co3 = pCoef[ia3 * 2U];
576          si3 = pCoef[(ia3 * 2U) + 1U];
577 
578          /*  Twiddle coefficients index modifier */
579          ia1 = ia1 + twidCoefModifier;
580 
581          i0 = j;
582          do
583          {
584             /*  index calculation for the input as, */
585             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
586             i1 = i0 + n2;
587             i2 = i1 + n2;
588             i3 = i2 + n2;
589 
590             /* xa + xc */
591             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
592 
593             /* xa - xc */
594             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
595 
596             /* ya + yc */
597             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
598 
599             /* ya - yc */
600             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
601 
602             /* xb + xd */
603             t1 = pSrc[2U * i1] + pSrc[2U * i3];
604 
605             /* xa' = xa + xb + xc + xd */
606             pSrc[2U * i0] = r1 + t1;
607 
608             /* xa + xc -(xb + xd) */
609             r1 = r1 - t1;
610 
611             /* yb + yd */
612             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
613 
614             /* ya' = ya + yb + yc + yd */
615             pSrc[(2U * i0) + 1U] = s1 + t2;
616 
617             /* (ya + yc) - (yb + yd) */
618             s1 = s1 - t2;
619 
620             /* (yb - yd) */
621             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
622 
623             /* (xb - xd) */
624             t2 = pSrc[2U * i1] - pSrc[2U * i3];
625 
626             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
627             pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
628 
629             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
630             pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
631 
632             /* (xa - xc) + (yb - yd) */
633             r1 = r2 + t1;
634 
635             /* (xa - xc) - (yb - yd) */
636             r2 = r2 - t1;
637 
638             /* (ya - yc) -  (xb - xd) */
639             s1 = s2 - t2;
640 
641             /* (ya - yc) +  (xb - xd) */
642             s2 = s2 + t2;
643 
644             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
645             pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
646 
647             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
648             pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
649 
650             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
651             pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
652 
653             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
654             pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
655 
656             i0 += n1;
657          } while ( i0 < fftLen);
658          j++;
659       } while (j <= (n2 - 1U));
660       twidCoefModifier <<= 2U;
661    }
662 
663 #endif /* #if defined (ARM_MATH_DSP) */
664 
665 }
666 
667 /*
668 * @brief  Core function for the floating-point CIFFT butterfly process.
669 * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
670 * @param[in]      fftLen           length of the FFT.
671 * @param[in]      *pCoef           points to twiddle coefficient buffer.
672 * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
673 * @param[in]      onebyfftLen      value of 1/fftLen.
674 * @return none.
675 */
676 
arm_radix4_butterfly_inverse_f16(float16_t * pSrc,uint16_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier,float16_t onebyfftLen)677 void arm_radix4_butterfly_inverse_f16(
678 float16_t * pSrc,
679 uint16_t fftLen,
680 const float16_t * pCoef,
681 uint16_t twidCoefModifier,
682 float16_t onebyfftLen)
683 {
684    float16_t co1, co2, co3, si1, si2, si3;
685    uint32_t ia1, ia2, ia3;
686    uint32_t i0, i1, i2, i3;
687    uint32_t n1, n2, j, k;
688 
689 #if defined (ARM_MATH_DSP)
690 
691    float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
692    float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
693    Ybminusd;
694    float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
695    float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
696    float16_t *ptr1;
697    float16_t p0,p1,p2,p3,p4,p5,p6,p7;
698    float16_t a0,a1,a2,a3,a4,a5,a6,a7;
699 
700 
701    /*  Initializations for the first stage */
702    n2 = fftLen;
703    n1 = n2;
704 
705    /* n2 = fftLen/4 */
706    n2 >>= 2U;
707    i0 = 0U;
708    ia1 = 0U;
709 
710    j = n2;
711 
712    /*  Calculation of first stage */
713    do
714    {
715       /*  index calculation for the input as, */
716       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
717       i1 = i0 + n2;
718       i2 = i1 + n2;
719       i3 = i2 + n2;
720 
721       /*  Butterfly implementation */
722       xaIn = pSrc[(2U * i0)];
723       yaIn = pSrc[(2U * i0) + 1U];
724 
725       xcIn = pSrc[(2U * i2)];
726       ycIn = pSrc[(2U * i2) + 1U];
727 
728       xbIn = pSrc[(2U * i1)];
729       ybIn = pSrc[(2U * i1) + 1U];
730 
731       xdIn = pSrc[(2U * i3)];
732       ydIn = pSrc[(2U * i3) + 1U];
733 
734       /* xa + xc */
735       Xaplusc = xaIn + xcIn;
736       /* xb + xd */
737       Xbplusd = xbIn + xdIn;
738       /* ya + yc */
739       Yaplusc = yaIn + ycIn;
740       /* yb + yd */
741       Ybplusd = ybIn + ydIn;
742 
743       /*  index calculation for the coefficients */
744       ia2 = ia1 + ia1;
745       co2 = pCoef[ia2 * 2U];
746       si2 = pCoef[(ia2 * 2U) + 1U];
747 
748       /* xa - xc */
749       Xaminusc = xaIn - xcIn;
750       /* xb - xd */
751       Xbminusd = xbIn - xdIn;
752       /* ya - yc */
753       Yaminusc = yaIn - ycIn;
754       /* yb - yd */
755       Ybminusd = ybIn - ydIn;
756 
757       /* xa' = xa + xb + xc + xd */
758       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
759 
760       /* ya' = ya + yb + yc + yd */
761       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
762 
763       /* (xa - xc) - (yb - yd) */
764       Xb12C_out = (Xaminusc - Ybminusd);
765       /* (ya - yc) + (xb - xd) */
766       Yb12C_out = (Yaminusc + Xbminusd);
767       /* (xa + xc) - (xb + xd) */
768       Xc12C_out = (Xaplusc - Xbplusd);
769       /* (ya + yc) - (yb + yd) */
770       Yc12C_out = (Yaplusc - Ybplusd);
771       /* (xa - xc) + (yb - yd) */
772       Xd12C_out = (Xaminusc + Ybminusd);
773       /* (ya - yc) - (xb - xd) */
774       Yd12C_out = (Yaminusc - Xbminusd);
775 
776       co1 = pCoef[ia1 * 2U];
777       si1 = pCoef[(ia1 * 2U) + 1U];
778 
779       /*  index calculation for the coefficients */
780       ia3 = ia2 + ia1;
781       co3 = pCoef[ia3 * 2U];
782       si3 = pCoef[(ia3 * 2U) + 1U];
783 
784       Xb12_out = Xb12C_out * co1;
785       Yb12_out = Yb12C_out * co1;
786       Xc12_out = Xc12C_out * co2;
787       Yc12_out = Yc12C_out * co2;
788       Xd12_out = Xd12C_out * co3;
789       Yd12_out = Yd12C_out * co3;
790 
791       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
792       //Xb12_out -= Yb12C_out * si1;
793       p0 = Yb12C_out * si1;
794       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
795       //Yb12_out += Xb12C_out * si1;
796       p1 = Xb12C_out * si1;
797       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
798       //Xc12_out -= Yc12C_out * si2;
799       p2 = Yc12C_out * si2;
800       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
801       //Yc12_out += Xc12C_out * si2;
802       p3 = Xc12C_out * si2;
803       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
804       //Xd12_out -= Yd12C_out * si3;
805       p4 = Yd12C_out * si3;
806       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
807       //Yd12_out += Xd12C_out * si3;
808       p5 = Xd12C_out * si3;
809 
810       Xb12_out -= p0;
811       Yb12_out += p1;
812       Xc12_out -= p2;
813       Yc12_out += p3;
814       Xd12_out -= p4;
815       Yd12_out += p5;
816 
817       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
818       pSrc[2U * i1] = Xc12_out;
819 
820       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
821       pSrc[(2U * i1) + 1U] = Yc12_out;
822 
823       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
824       pSrc[2U * i2] = Xb12_out;
825 
826       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
827       pSrc[(2U * i2) + 1U] = Yb12_out;
828 
829       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
830       pSrc[2U * i3] = Xd12_out;
831 
832       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
833       pSrc[(2U * i3) + 1U] = Yd12_out;
834 
835       /*  Twiddle coefficients index modifier */
836       ia1 = ia1 + twidCoefModifier;
837 
838       /*  Updating input index */
839       i0 = i0 + 1U;
840 
841    } while (--j);
842 
843    twidCoefModifier <<= 2U;
844 
845    /*  Calculation of second stage to excluding last stage */
846    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
847    {
848       /*  Initializations for the first stage */
849       n1 = n2;
850       n2 >>= 2U;
851       ia1 = 0U;
852 
853       /*  Calculation of first stage */
854       j = 0;
855       do
856       {
857          /*  index calculation for the coefficients */
858          ia2 = ia1 + ia1;
859          ia3 = ia2 + ia1;
860          co1 = pCoef[ia1 * 2U];
861          si1 = pCoef[(ia1 * 2U) + 1U];
862          co2 = pCoef[ia2 * 2U];
863          si2 = pCoef[(ia2 * 2U) + 1U];
864          co3 = pCoef[ia3 * 2U];
865          si3 = pCoef[(ia3 * 2U) + 1U];
866 
867          /*  Twiddle coefficients index modifier */
868          ia1 = ia1 + twidCoefModifier;
869 
870          i0 = j;
871          do
872          {
873             /*  index calculation for the input as, */
874             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
875             i1 = i0 + n2;
876             i2 = i1 + n2;
877             i3 = i2 + n2;
878 
879             xaIn = pSrc[(2U * i0)];
880             yaIn = pSrc[(2U * i0) + 1U];
881 
882             xbIn = pSrc[(2U * i1)];
883             ybIn = pSrc[(2U * i1) + 1U];
884 
885             xcIn = pSrc[(2U * i2)];
886             ycIn = pSrc[(2U * i2) + 1U];
887 
888             xdIn = pSrc[(2U * i3)];
889             ydIn = pSrc[(2U * i3) + 1U];
890 
891             /* xa - xc */
892             Xaminusc = xaIn - xcIn;
893             /* (xb - xd) */
894             Xbminusd = xbIn - xdIn;
895             /* ya - yc */
896             Yaminusc = yaIn - ycIn;
897             /* (yb - yd) */
898             Ybminusd = ybIn - ydIn;
899 
900             /* xa + xc */
901             Xaplusc = xaIn + xcIn;
902             /* xb + xd */
903             Xbplusd = xbIn + xdIn;
904             /* ya + yc */
905             Yaplusc = yaIn + ycIn;
906             /* yb + yd */
907             Ybplusd = ybIn + ydIn;
908 
909             /* (xa - xc) - (yb - yd) */
910             Xb12C_out = (Xaminusc - Ybminusd);
911             /* (ya - yc) +  (xb - xd) */
912             Yb12C_out = (Yaminusc + Xbminusd);
913             /* xa + xc -(xb + xd) */
914             Xc12C_out = (Xaplusc - Xbplusd);
915             /* (ya + yc) - (yb + yd) */
916             Yc12C_out = (Yaplusc - Ybplusd);
917             /* (xa - xc) + (yb - yd) */
918             Xd12C_out = (Xaminusc + Ybminusd);
919             /* (ya - yc) -  (xb - xd) */
920             Yd12C_out = (Yaminusc - Xbminusd);
921 
922             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
923             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
924 
925             Xb12_out = Xb12C_out * co1;
926             Yb12_out = Yb12C_out * co1;
927             Xc12_out = Xc12C_out * co2;
928             Yc12_out = Yc12C_out * co2;
929             Xd12_out = Xd12C_out * co3;
930             Yd12_out = Yd12C_out * co3;
931 
932             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
933             //Xb12_out -= Yb12C_out * si1;
934             p0 = Yb12C_out * si1;
935             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
936             //Yb12_out += Xb12C_out * si1;
937             p1 = Xb12C_out * si1;
938             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
939             //Xc12_out -= Yc12C_out * si2;
940             p2 = Yc12C_out * si2;
941             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
942             //Yc12_out += Xc12C_out * si2;
943             p3 = Xc12C_out * si2;
944             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
945             //Xd12_out -= Yd12C_out * si3;
946             p4 = Yd12C_out * si3;
947             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
948             //Yd12_out += Xd12C_out * si3;
949             p5 = Xd12C_out * si3;
950 
951             Xb12_out -= p0;
952             Yb12_out += p1;
953             Xc12_out -= p2;
954             Yc12_out += p3;
955             Xd12_out -= p4;
956             Yd12_out += p5;
957 
958             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
959             pSrc[2U * i1] = Xc12_out;
960 
961             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
962             pSrc[(2U * i1) + 1U] = Yc12_out;
963 
964             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
965             pSrc[2U * i2] = Xb12_out;
966 
967             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
968             pSrc[(2U * i2) + 1U] = Yb12_out;
969 
970             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
971             pSrc[2U * i3] = Xd12_out;
972 
973             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
974             pSrc[(2U * i3) + 1U] = Yd12_out;
975 
976             i0 += n1;
977          } while (i0 < fftLen);
978          j++;
979       } while (j <= (n2 - 1U));
980       twidCoefModifier <<= 2U;
981    }
982    /*  Initializations of last stage */
983 
984    j = fftLen >> 2;
985    ptr1 = &pSrc[0];
986 
987    /*  Calculations of last stage */
988    do
989    {
990       xaIn = ptr1[0];
991       yaIn = ptr1[1];
992       xbIn = ptr1[2];
993       ybIn = ptr1[3];
994       xcIn = ptr1[4];
995       ycIn = ptr1[5];
996       xdIn = ptr1[6];
997       ydIn = ptr1[7];
998 
999       /*  Butterfly implementation */
1000       /* xa + xc */
1001       Xaplusc = xaIn + xcIn;
1002 
1003       /* xa - xc */
1004       Xaminusc = xaIn - xcIn;
1005 
1006       /* ya + yc */
1007       Yaplusc = yaIn + ycIn;
1008 
1009       /* ya - yc */
1010       Yaminusc = yaIn - ycIn;
1011 
1012       /* xb + xd */
1013       Xbplusd = xbIn + xdIn;
1014 
1015       /* yb + yd */
1016       Ybplusd = ybIn + ydIn;
1017 
1018       /* (xb-xd) */
1019       Xbminusd = xbIn - xdIn;
1020 
1021       /* (yb-yd) */
1022       Ybminusd = ybIn - ydIn;
1023 
1024       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
1025       a0 = (Xaplusc + Xbplusd);
1026       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
1027       a1 = (Yaplusc + Ybplusd);
1028       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
1029       a2 = (Xaplusc - Xbplusd);
1030       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
1031       a3 = (Yaplusc - Ybplusd);
1032       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
1033       a4 = (Xaminusc - Ybminusd);
1034       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
1035       a5 = (Yaminusc + Xbminusd);
1036       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1037       a6 = (Xaminusc + Ybminusd);
1038       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1039       a7 = (Yaminusc - Xbminusd);
1040 
1041       p0 = a0 * onebyfftLen;
1042       p1 = a1 * onebyfftLen;
1043       p2 = a2 * onebyfftLen;
1044       p3 = a3 * onebyfftLen;
1045       p4 = a4 * onebyfftLen;
1046       p5 = a5 * onebyfftLen;
1047       p6 = a6 * onebyfftLen;
1048       p7 = a7 * onebyfftLen;
1049 
1050       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
1051       ptr1[0] = p0;
1052       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
1053       ptr1[1] = p1;
1054       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
1055       ptr1[2] = p2;
1056       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
1057       ptr1[3] = p3;
1058       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
1059       ptr1[4] = p4;
1060       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
1061       ptr1[5] = p5;
1062       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1063       ptr1[6] = p6;
1064       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1065       ptr1[7] = p7;
1066 
1067       /* increment source pointer by 8 for next calculations */
1068       ptr1 = ptr1 + 8U;
1069 
1070    } while (--j);
1071 
1072 #else
1073 
1074    float16_t t1, t2, r1, r2, s1, s2;
1075 
1076    /* Run the below code for Cortex-M0 */
1077 
1078    /*  Initializations for the first stage */
1079    n2 = fftLen;
1080    n1 = n2;
1081 
1082    /*  Calculation of first stage */
1083    for (k = fftLen; k > 4U; k >>= 2U)
1084    {
1085       /*  Initializations for the first stage */
1086       n1 = n2;
1087       n2 >>= 2U;
1088       ia1 = 0U;
1089 
1090       /*  Calculation of first stage */
1091       j = 0;
1092       do
1093       {
1094          /*  index calculation for the coefficients */
1095          ia2 = ia1 + ia1;
1096          ia3 = ia2 + ia1;
1097          co1 = pCoef[ia1 * 2U];
1098          si1 = pCoef[(ia1 * 2U) + 1U];
1099          co2 = pCoef[ia2 * 2U];
1100          si2 = pCoef[(ia2 * 2U) + 1U];
1101          co3 = pCoef[ia3 * 2U];
1102          si3 = pCoef[(ia3 * 2U) + 1U];
1103 
1104          /*  Twiddle coefficients index modifier */
1105          ia1 = ia1 + twidCoefModifier;
1106 
1107          i0 = j;
1108          do
1109          {
1110             /*  index calculation for the input as, */
1111             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1112             i1 = i0 + n2;
1113             i2 = i1 + n2;
1114             i3 = i2 + n2;
1115 
1116             /* xa + xc */
1117             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1118 
1119             /* xa - xc */
1120             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1121 
1122             /* ya + yc */
1123             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1124 
1125             /* ya - yc */
1126             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1127 
1128             /* xb + xd */
1129             t1 = pSrc[2U * i1] + pSrc[2U * i3];
1130 
1131             /* xa' = xa + xb + xc + xd */
1132             pSrc[2U * i0] = r1 + t1;
1133 
1134             /* xa + xc -(xb + xd) */
1135             r1 = r1 - t1;
1136 
1137             /* yb + yd */
1138             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1139 
1140             /* ya' = ya + yb + yc + yd */
1141             pSrc[(2U * i0) + 1U] = s1 + t2;
1142 
1143             /* (ya + yc) - (yb + yd) */
1144             s1 = s1 - t2;
1145 
1146             /* (yb - yd) */
1147             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1148 
1149             /* (xb - xd) */
1150             t2 = pSrc[2U * i1] - pSrc[2U * i3];
1151 
1152             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1153             pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1154 
1155             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1156             pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1157 
1158             /* (xa - xc) - (yb - yd) */
1159             r1 = r2 - t1;
1160 
1161             /* (xa - xc) + (yb - yd) */
1162             r2 = r2 + t1;
1163 
1164             /* (ya - yc) +  (xb - xd) */
1165             s1 = s2 + t2;
1166 
1167             /* (ya - yc) -  (xb - xd) */
1168             s2 = s2 - t2;
1169 
1170             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1171             pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1172 
1173             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1174             pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1175 
1176             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1177             pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1178 
1179             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1180             pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1181 
1182             i0 += n1;
1183          } while ( i0 < fftLen);
1184          j++;
1185       } while (j <= (n2 - 1U));
1186       twidCoefModifier <<= 2U;
1187    }
1188    /*  Initializations of last stage */
1189    n1 = n2;
1190    n2 >>= 2U;
1191 
1192    /*  Calculations of last stage */
1193    for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1194    {
1195       /*  index calculation for the input as, */
1196       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1197       i1 = i0 + n2;
1198       i2 = i1 + n2;
1199       i3 = i2 + n2;
1200 
1201       /*  Butterfly implementation */
1202       /* xa + xc */
1203       r1 = pSrc[2U * i0] + pSrc[2U * i2];
1204 
1205       /* xa - xc */
1206       r2 = pSrc[2U * i0] - pSrc[2U * i2];
1207 
1208       /* ya + yc */
1209       s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1210 
1211       /* ya - yc */
1212       s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1213 
1214       /* xc + xd */
1215       t1 = pSrc[2U * i1] + pSrc[2U * i3];
1216 
1217       /* xa' = xa + xb + xc + xd */
1218       pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1219 
1220       /* (xa + xb) - (xc + xd) */
1221       r1 = r1 - t1;
1222 
1223       /* yb + yd */
1224       t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1225 
1226       /* ya' = ya + yb + yc + yd */
1227       pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1228 
1229       /* (ya + yc) - (yb + yd) */
1230       s1 = s1 - t2;
1231 
1232       /* (yb-yd) */
1233       t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1234 
1235       /* (xb-xd) */
1236       t2 = pSrc[2U * i1] - pSrc[2U * i3];
1237 
1238       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1239       pSrc[2U * i1] = r1 * onebyfftLen;
1240 
1241       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1242       pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1243 
1244       /* (xa - xc) - (yb-yd) */
1245       r1 = r2 - t1;
1246 
1247       /* (xa - xc) + (yb-yd) */
1248       r2 = r2 + t1;
1249 
1250       /* (ya - yc) + (xb-xd) */
1251       s1 = s2 + t2;
1252 
1253       /* (ya - yc) - (xb-xd) */
1254       s2 = s2 - t2;
1255 
1256       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1257       pSrc[2U * i2] = r1 * onebyfftLen;
1258 
1259       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1260       pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1261 
1262       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1263       pSrc[2U * i3] = r2 * onebyfftLen;
1264 
1265       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1266       pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1267    }
1268 
1269 #endif /* #if defined (ARM_MATH_DSP) */
1270 }
1271 
1272 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */