1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cfft_radix2_f16.c
4  * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions_f16.h"
30 
31 /**
32  * @defgroup ComplexFFTDeprecated Deprecated Complex FFT functions
33  */
34 
35 #if defined(ARM_FLOAT16_SUPPORTED)
36 
37 void arm_radix2_butterfly_f16(
38         float16_t * pSrc,
39         uint32_t fftLen,
40   const float16_t * pCoef,
41         uint16_t twidCoefModifier);
42 
43 void arm_radix2_butterfly_inverse_f16(
44         float16_t * pSrc,
45         uint32_t fftLen,
46   const float16_t * pCoef,
47         uint16_t twidCoefModifier,
48         float16_t onebyfftLen);
49 
50 extern void arm_bitreversal_f16(
51         float16_t * pSrc,
52         uint16_t fftSize,
53         uint16_t bitRevFactor,
54   const uint16_t * pBitRevTab);
55 
56 /**
57   @ingroup ComplexFFT
58  */
59 
60 /**
61   @addtogroup ComplexFFTDeprecated
62   @{
63  */
64 
65 /**
66   @brief         Radix-2 CFFT/CIFFT.
67   @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future
68   @param[in]     S    points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
69   @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
70  */
71 
arm_cfft_radix2_f16(const arm_cfft_radix2_instance_f16 * S,float16_t * pSrc)72 void arm_cfft_radix2_f16(
73 const arm_cfft_radix2_instance_f16 * S,
74       float16_t * pSrc)
75 {
76 
77    if (S->ifftFlag == 1U)
78    {
79       /* Complex IFFT radix-2 */
80       arm_radix2_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle,
81       S->twidCoefModifier, S->onebyfftLen);
82    }
83    else
84    {
85       /* Complex FFT radix-2 */
86       arm_radix2_butterfly_f16(pSrc, S->fftLen, S->pTwiddle,
87       S->twidCoefModifier);
88    }
89 
90    if (S->bitReverseFlag == 1U)
91    {
92       /* Bit Reversal */
93       arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
94    }
95 
96 }
97 
98 
99 /**
100   @} end of ComplexFFTDeprecated group
101  */
102 
103 
104 
105 /* ----------------------------------------------------------------------
106 ** Internal helper function used by the FFTs
107 ** ------------------------------------------------------------------- */
108 
109 /*
110 * @brief  Core function for the floating-point CFFT butterfly process.
111 * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
112 * @param[in]      fftLen           length of the FFT.
113 * @param[in]      *pCoef           points to the twiddle coefficient buffer.
114 * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
115 */
116 
arm_radix2_butterfly_f16(float16_t * pSrc,uint32_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier)117 void arm_radix2_butterfly_f16(
118 float16_t * pSrc,
119 uint32_t fftLen,
120 const float16_t * pCoef,
121 uint16_t twidCoefModifier)
122 {
123 
124    uint32_t i, j, k, l;
125    uint32_t n1, n2, ia;
126    float16_t xt, yt, cosVal, sinVal;
127    float16_t p0, p1, p2, p3;
128    float16_t a0, a1;
129 
130 #if defined (ARM_MATH_DSP)
131 
132    /*  Initializations for the first stage */
133    n2 = fftLen >> 1;
134    ia = 0;
135    i = 0;
136 
137    // loop for groups
138    for (k = n2; k > 0; k--)
139    {
140       cosVal = pCoef[ia * 2];
141       sinVal = pCoef[(ia * 2) + 1];
142 
143       /*  Twiddle coefficients index modifier */
144       ia += twidCoefModifier;
145 
146       /*  index calculation for the input as, */
147       /*  pSrc[i + 0], pSrc[i + fftLen/1] */
148       l = i + n2;
149 
150       /*  Butterfly implementation */
151       a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
152       xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
153 
154       yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
155       a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
156 
157       p0 = (_Float16)xt * (_Float16)cosVal;
158       p1 = (_Float16)yt * (_Float16)sinVal;
159       p2 = (_Float16)yt * (_Float16)cosVal;
160       p3 = (_Float16)xt * (_Float16)sinVal;
161 
162       pSrc[2 * i]     = a0;
163       pSrc[2 * i + 1] = a1;
164 
165       pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
166       pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
167 
168       i++;
169    }                             // groups loop end
170 
171    twidCoefModifier <<= 1U;
172 
173    // loop for stage
174    for (k = n2; k > 2; k = k >> 1)
175    {
176       n1 = n2;
177       n2 = n2 >> 1;
178       ia = 0;
179 
180       // loop for groups
181       j = 0;
182       do
183       {
184          cosVal = pCoef[ia * 2];
185          sinVal = pCoef[(ia * 2) + 1];
186          ia += twidCoefModifier;
187 
188          // loop for butterfly
189          i = j;
190          do
191          {
192             l = i + n2;
193             a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
194             xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
195 
196             yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
197             a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
198 
199             p0 = (_Float16)xt * (_Float16)cosVal;
200             p1 = (_Float16)yt * (_Float16)sinVal;
201             p2 = (_Float16)yt * (_Float16)cosVal;
202             p3 = (_Float16)xt * (_Float16)sinVal;
203 
204             pSrc[2 * i] = a0;
205             pSrc[2 * i + 1] = a1;
206 
207             pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
208             pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
209 
210             i += n1;
211          } while ( i < fftLen );                        // butterfly loop end
212          j++;
213       } while ( j < n2);                          // groups loop end
214       twidCoefModifier <<= 1U;
215    }                             // stages loop end
216 
217    // loop for butterfly
218    for (i = 0; i < fftLen; i += 2)
219    {
220       a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
221       xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
222 
223       yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
224       a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
225 
226       pSrc[2 * i] = a0;
227       pSrc[2 * i + 1] = a1;
228       pSrc[2 * i + 2] = xt;
229       pSrc[2 * i + 3] = yt;
230    }                             // groups loop end
231 
232 #else
233 
234    n2 = fftLen;
235 
236    // loop for stage
237    for (k = fftLen; k > 1; k = k >> 1)
238    {
239       n1 = n2;
240       n2 = n2 >> 1;
241       ia = 0;
242 
243       // loop for groups
244       j = 0;
245       do
246       {
247          cosVal = pCoef[ia * 2];
248          sinVal = pCoef[(ia * 2) + 1];
249          ia += twidCoefModifier;
250 
251          // loop for butterfly
252          i = j;
253          do
254          {
255             l = i + n2;
256             a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
257             xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
258 
259             yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
260             a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
261 
262             p0 = (_Float16)xt * (_Float16)cosVal;
263             p1 = (_Float16)yt * (_Float16)sinVal;
264             p2 = (_Float16)yt * (_Float16)cosVal;
265             p3 = (_Float16)xt * (_Float16)sinVal;
266 
267             pSrc[2 * i] = a0;
268             pSrc[2 * i + 1] = a1;
269 
270             pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
271             pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
272 
273             i += n1;
274          } while (i < fftLen);
275          j++;
276       } while (j < n2);
277       twidCoefModifier <<= 1U;
278    }
279 
280 #endif //    #if defined (ARM_MATH_DSP)
281 
282 }
283 
284 
arm_radix2_butterfly_inverse_f16(float16_t * pSrc,uint32_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier,float16_t onebyfftLen)285 void arm_radix2_butterfly_inverse_f16(
286 float16_t * pSrc,
287 uint32_t fftLen,
288 const float16_t * pCoef,
289 uint16_t twidCoefModifier,
290 float16_t onebyfftLen)
291 {
292 
293    uint32_t i, j, k, l;
294    uint32_t n1, n2, ia;
295    float16_t xt, yt, cosVal, sinVal;
296    float16_t p0, p1, p2, p3;
297    float16_t a0, a1;
298 
299 #if defined (ARM_MATH_DSP)
300 
301    n2 = fftLen >> 1;
302    ia = 0;
303 
304    // loop for groups
305    for (i = 0; i < n2; i++)
306    {
307       cosVal = pCoef[ia * 2];
308       sinVal = pCoef[(ia * 2) + 1];
309       ia += twidCoefModifier;
310 
311       l = i + n2;
312       a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
313       xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
314 
315       yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
316       a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
317 
318       p0 = (_Float16)xt * (_Float16)cosVal;
319       p1 = (_Float16)yt * (_Float16)sinVal;
320       p2 = (_Float16)yt * (_Float16)cosVal;
321       p3 = (_Float16)xt * (_Float16)sinVal;
322 
323       pSrc[2 * i] = a0;
324       pSrc[2 * i + 1] = a1;
325 
326       pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
327       pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
328    }                             // groups loop end
329 
330    twidCoefModifier <<= 1U;
331 
332    // loop for stage
333    for (k = fftLen / 2; k > 2; k = k >> 1)
334    {
335       n1 = n2;
336       n2 = n2 >> 1;
337       ia = 0;
338 
339       // loop for groups
340       j = 0;
341       do
342       {
343          cosVal = pCoef[ia * 2];
344          sinVal = pCoef[(ia * 2) + 1];
345          ia += twidCoefModifier;
346 
347          // loop for butterfly
348          i = j;
349          do
350          {
351             l = i + n2;
352             a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
353             xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
354 
355             yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
356             a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
357 
358             p0 = (_Float16)xt * (_Float16)cosVal;
359             p1 = (_Float16)yt * (_Float16)sinVal;
360             p2 = (_Float16)yt * (_Float16)cosVal;
361             p3 = (_Float16)xt * (_Float16)sinVal;
362 
363             pSrc[2 * i] = a0;
364             pSrc[2 * i + 1] = a1;
365 
366             pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
367             pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
368 
369             i += n1;
370          } while ( i < fftLen );                 // butterfly loop end
371          j++;
372       } while (j < n2);                      // groups loop end
373 
374       twidCoefModifier <<= 1U;
375    }                             // stages loop end
376 
377    // loop for butterfly
378    for (i = 0; i < fftLen; i += 2)
379    {
380       a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
381       xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
382 
383       a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
384       yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
385 
386       p0 = (_Float16)a0 * (_Float16)onebyfftLen;
387       p2 = (_Float16)xt * (_Float16)onebyfftLen;
388       p1 = (_Float16)a1 * (_Float16)onebyfftLen;
389       p3 = (_Float16)yt * (_Float16)onebyfftLen;
390 
391       pSrc[2 * i] = p0;
392       pSrc[2 * i + 1] = p1;
393       pSrc[2 * i + 2] = p2;
394       pSrc[2 * i + 3] = p3;
395    }                             // butterfly loop end
396 
397 #else
398 
399    n2 = fftLen;
400 
401    // loop for stage
402    for (k = fftLen; k > 2; k = k >> 1)
403    {
404       n1 = n2;
405       n2 = n2 >> 1;
406       ia = 0;
407 
408       // loop for groups
409       j = 0;
410       do
411       {
412          cosVal = pCoef[ia * 2];
413          sinVal = pCoef[(ia * 2) + 1];
414          ia = ia + twidCoefModifier;
415 
416          // loop for butterfly
417          i = j;
418          do
419          {
420             l = i + n2;
421             a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
422             xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
423 
424             yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
425             a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
426 
427             p0 = (_Float16)xt * (_Float16)cosVal;
428             p1 = (_Float16)yt * (_Float16)sinVal;
429             p2 = (_Float16)yt * (_Float16)cosVal;
430             p3 = (_Float16)xt * (_Float16)sinVal;
431 
432             pSrc[2 * i] = a0;
433             pSrc[2 * i + 1] = a1;
434 
435             pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
436             pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
437 
438             i += n1;
439          } while ( i < fftLen );                    // butterfly loop end
440          j++;
441       } while ( j < n2 );                      // groups loop end
442 
443       twidCoefModifier = twidCoefModifier << 1U;
444    }                             // stages loop end
445 
446    n1 = n2;
447    n2 = n2 >> 1;
448 
449    // loop for butterfly
450    for (i = 0; i < fftLen; i += n1)
451    {
452       l = i + n2;
453 
454       a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
455       xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
456 
457       a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
458       yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
459 
460       p0 = (_Float16)a0 * (_Float16)onebyfftLen;
461       p2 = (_Float16)xt * (_Float16)onebyfftLen;
462       p1 = (_Float16)a1 * (_Float16)onebyfftLen;
463       p3 = (_Float16)yt * (_Float16)onebyfftLen;
464 
465       pSrc[2 * i] = p0;
466       pSrc[2U * l] = p2;
467 
468       pSrc[2 * i + 1] = p1;
469       pSrc[2U * l + 1U] = p3;
470    }                             // butterfly loop end
471 
472 #endif //      #if defined (ARM_MATH_DSP)
473 
474 }
475 
476 
477 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
478