1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix2_f16.c
4 * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/transform_functions_f16.h"
30
31 /**
32 * @defgroup ComplexFFTDeprecated Deprecated Complex FFT functions
33 */
34
35 #if defined(ARM_FLOAT16_SUPPORTED)
36
37 void arm_radix2_butterfly_f16(
38 float16_t * pSrc,
39 uint32_t fftLen,
40 const float16_t * pCoef,
41 uint16_t twidCoefModifier);
42
43 void arm_radix2_butterfly_inverse_f16(
44 float16_t * pSrc,
45 uint32_t fftLen,
46 const float16_t * pCoef,
47 uint16_t twidCoefModifier,
48 float16_t onebyfftLen);
49
50 extern void arm_bitreversal_f16(
51 float16_t * pSrc,
52 uint16_t fftSize,
53 uint16_t bitRevFactor,
54 const uint16_t * pBitRevTab);
55
56 /**
57 @ingroup ComplexFFT
58 */
59
60 /**
61 @addtogroup ComplexFFTDeprecated
62 @{
63 */
64
65 /**
66 @brief Radix-2 CFFT/CIFFT.
67 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future
68 @param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
69 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
70 */
71
arm_cfft_radix2_f16(const arm_cfft_radix2_instance_f16 * S,float16_t * pSrc)72 void arm_cfft_radix2_f16(
73 const arm_cfft_radix2_instance_f16 * S,
74 float16_t * pSrc)
75 {
76
77 if (S->ifftFlag == 1U)
78 {
79 /* Complex IFFT radix-2 */
80 arm_radix2_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle,
81 S->twidCoefModifier, S->onebyfftLen);
82 }
83 else
84 {
85 /* Complex FFT radix-2 */
86 arm_radix2_butterfly_f16(pSrc, S->fftLen, S->pTwiddle,
87 S->twidCoefModifier);
88 }
89
90 if (S->bitReverseFlag == 1U)
91 {
92 /* Bit Reversal */
93 arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
94 }
95
96 }
97
98
99 /**
100 @} end of ComplexFFTDeprecated group
101 */
102
103
104
105 /* ----------------------------------------------------------------------
106 ** Internal helper function used by the FFTs
107 ** ------------------------------------------------------------------- */
108
109 /*
110 * @brief Core function for the floating-point CFFT butterfly process.
111 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
112 * @param[in] fftLen length of the FFT.
113 * @param[in] *pCoef points to the twiddle coefficient buffer.
114 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
115 */
116
arm_radix2_butterfly_f16(float16_t * pSrc,uint32_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier)117 void arm_radix2_butterfly_f16(
118 float16_t * pSrc,
119 uint32_t fftLen,
120 const float16_t * pCoef,
121 uint16_t twidCoefModifier)
122 {
123
124 uint32_t i, j, k, l;
125 uint32_t n1, n2, ia;
126 float16_t xt, yt, cosVal, sinVal;
127 float16_t p0, p1, p2, p3;
128 float16_t a0, a1;
129
130 #if defined (ARM_MATH_DSP)
131
132 /* Initializations for the first stage */
133 n2 = fftLen >> 1;
134 ia = 0;
135 i = 0;
136
137 // loop for groups
138 for (k = n2; k > 0; k--)
139 {
140 cosVal = pCoef[ia * 2];
141 sinVal = pCoef[(ia * 2) + 1];
142
143 /* Twiddle coefficients index modifier */
144 ia += twidCoefModifier;
145
146 /* index calculation for the input as, */
147 /* pSrc[i + 0], pSrc[i + fftLen/1] */
148 l = i + n2;
149
150 /* Butterfly implementation */
151 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
152 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
153
154 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
155 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
156
157 p0 = (_Float16)xt * (_Float16)cosVal;
158 p1 = (_Float16)yt * (_Float16)sinVal;
159 p2 = (_Float16)yt * (_Float16)cosVal;
160 p3 = (_Float16)xt * (_Float16)sinVal;
161
162 pSrc[2 * i] = a0;
163 pSrc[2 * i + 1] = a1;
164
165 pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
166 pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
167
168 i++;
169 } // groups loop end
170
171 twidCoefModifier <<= 1U;
172
173 // loop for stage
174 for (k = n2; k > 2; k = k >> 1)
175 {
176 n1 = n2;
177 n2 = n2 >> 1;
178 ia = 0;
179
180 // loop for groups
181 j = 0;
182 do
183 {
184 cosVal = pCoef[ia * 2];
185 sinVal = pCoef[(ia * 2) + 1];
186 ia += twidCoefModifier;
187
188 // loop for butterfly
189 i = j;
190 do
191 {
192 l = i + n2;
193 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
194 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
195
196 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
197 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
198
199 p0 = (_Float16)xt * (_Float16)cosVal;
200 p1 = (_Float16)yt * (_Float16)sinVal;
201 p2 = (_Float16)yt * (_Float16)cosVal;
202 p3 = (_Float16)xt * (_Float16)sinVal;
203
204 pSrc[2 * i] = a0;
205 pSrc[2 * i + 1] = a1;
206
207 pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
208 pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
209
210 i += n1;
211 } while ( i < fftLen ); // butterfly loop end
212 j++;
213 } while ( j < n2); // groups loop end
214 twidCoefModifier <<= 1U;
215 } // stages loop end
216
217 // loop for butterfly
218 for (i = 0; i < fftLen; i += 2)
219 {
220 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
221 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
222
223 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
224 a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
225
226 pSrc[2 * i] = a0;
227 pSrc[2 * i + 1] = a1;
228 pSrc[2 * i + 2] = xt;
229 pSrc[2 * i + 3] = yt;
230 } // groups loop end
231
232 #else
233
234 n2 = fftLen;
235
236 // loop for stage
237 for (k = fftLen; k > 1; k = k >> 1)
238 {
239 n1 = n2;
240 n2 = n2 >> 1;
241 ia = 0;
242
243 // loop for groups
244 j = 0;
245 do
246 {
247 cosVal = pCoef[ia * 2];
248 sinVal = pCoef[(ia * 2) + 1];
249 ia += twidCoefModifier;
250
251 // loop for butterfly
252 i = j;
253 do
254 {
255 l = i + n2;
256 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
257 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
258
259 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
260 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
261
262 p0 = (_Float16)xt * (_Float16)cosVal;
263 p1 = (_Float16)yt * (_Float16)sinVal;
264 p2 = (_Float16)yt * (_Float16)cosVal;
265 p3 = (_Float16)xt * (_Float16)sinVal;
266
267 pSrc[2 * i] = a0;
268 pSrc[2 * i + 1] = a1;
269
270 pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
271 pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
272
273 i += n1;
274 } while (i < fftLen);
275 j++;
276 } while (j < n2);
277 twidCoefModifier <<= 1U;
278 }
279
280 #endif // #if defined (ARM_MATH_DSP)
281
282 }
283
284
arm_radix2_butterfly_inverse_f16(float16_t * pSrc,uint32_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier,float16_t onebyfftLen)285 void arm_radix2_butterfly_inverse_f16(
286 float16_t * pSrc,
287 uint32_t fftLen,
288 const float16_t * pCoef,
289 uint16_t twidCoefModifier,
290 float16_t onebyfftLen)
291 {
292
293 uint32_t i, j, k, l;
294 uint32_t n1, n2, ia;
295 float16_t xt, yt, cosVal, sinVal;
296 float16_t p0, p1, p2, p3;
297 float16_t a0, a1;
298
299 #if defined (ARM_MATH_DSP)
300
301 n2 = fftLen >> 1;
302 ia = 0;
303
304 // loop for groups
305 for (i = 0; i < n2; i++)
306 {
307 cosVal = pCoef[ia * 2];
308 sinVal = pCoef[(ia * 2) + 1];
309 ia += twidCoefModifier;
310
311 l = i + n2;
312 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
313 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
314
315 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
316 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
317
318 p0 = (_Float16)xt * (_Float16)cosVal;
319 p1 = (_Float16)yt * (_Float16)sinVal;
320 p2 = (_Float16)yt * (_Float16)cosVal;
321 p3 = (_Float16)xt * (_Float16)sinVal;
322
323 pSrc[2 * i] = a0;
324 pSrc[2 * i + 1] = a1;
325
326 pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
327 pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
328 } // groups loop end
329
330 twidCoefModifier <<= 1U;
331
332 // loop for stage
333 for (k = fftLen / 2; k > 2; k = k >> 1)
334 {
335 n1 = n2;
336 n2 = n2 >> 1;
337 ia = 0;
338
339 // loop for groups
340 j = 0;
341 do
342 {
343 cosVal = pCoef[ia * 2];
344 sinVal = pCoef[(ia * 2) + 1];
345 ia += twidCoefModifier;
346
347 // loop for butterfly
348 i = j;
349 do
350 {
351 l = i + n2;
352 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
353 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
354
355 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
356 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
357
358 p0 = (_Float16)xt * (_Float16)cosVal;
359 p1 = (_Float16)yt * (_Float16)sinVal;
360 p2 = (_Float16)yt * (_Float16)cosVal;
361 p3 = (_Float16)xt * (_Float16)sinVal;
362
363 pSrc[2 * i] = a0;
364 pSrc[2 * i + 1] = a1;
365
366 pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
367 pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
368
369 i += n1;
370 } while ( i < fftLen ); // butterfly loop end
371 j++;
372 } while (j < n2); // groups loop end
373
374 twidCoefModifier <<= 1U;
375 } // stages loop end
376
377 // loop for butterfly
378 for (i = 0; i < fftLen; i += 2)
379 {
380 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
381 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
382
383 a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
384 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
385
386 p0 = (_Float16)a0 * (_Float16)onebyfftLen;
387 p2 = (_Float16)xt * (_Float16)onebyfftLen;
388 p1 = (_Float16)a1 * (_Float16)onebyfftLen;
389 p3 = (_Float16)yt * (_Float16)onebyfftLen;
390
391 pSrc[2 * i] = p0;
392 pSrc[2 * i + 1] = p1;
393 pSrc[2 * i + 2] = p2;
394 pSrc[2 * i + 3] = p3;
395 } // butterfly loop end
396
397 #else
398
399 n2 = fftLen;
400
401 // loop for stage
402 for (k = fftLen; k > 2; k = k >> 1)
403 {
404 n1 = n2;
405 n2 = n2 >> 1;
406 ia = 0;
407
408 // loop for groups
409 j = 0;
410 do
411 {
412 cosVal = pCoef[ia * 2];
413 sinVal = pCoef[(ia * 2) + 1];
414 ia = ia + twidCoefModifier;
415
416 // loop for butterfly
417 i = j;
418 do
419 {
420 l = i + n2;
421 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
422 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
423
424 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
425 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
426
427 p0 = (_Float16)xt * (_Float16)cosVal;
428 p1 = (_Float16)yt * (_Float16)sinVal;
429 p2 = (_Float16)yt * (_Float16)cosVal;
430 p3 = (_Float16)xt * (_Float16)sinVal;
431
432 pSrc[2 * i] = a0;
433 pSrc[2 * i + 1] = a1;
434
435 pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
436 pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
437
438 i += n1;
439 } while ( i < fftLen ); // butterfly loop end
440 j++;
441 } while ( j < n2 ); // groups loop end
442
443 twidCoefModifier = twidCoefModifier << 1U;
444 } // stages loop end
445
446 n1 = n2;
447 n2 = n2 >> 1;
448
449 // loop for butterfly
450 for (i = 0; i < fftLen; i += n1)
451 {
452 l = i + n2;
453
454 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
455 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
456
457 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
458 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
459
460 p0 = (_Float16)a0 * (_Float16)onebyfftLen;
461 p2 = (_Float16)xt * (_Float16)onebyfftLen;
462 p1 = (_Float16)a1 * (_Float16)onebyfftLen;
463 p3 = (_Float16)yt * (_Float16)onebyfftLen;
464
465 pSrc[2 * i] = p0;
466 pSrc[2U * l] = p2;
467
468 pSrc[2 * i + 1] = p1;
469 pSrc[2U * l + 1U] = p3;
470 } // butterfly loop end
471
472 #endif // #if defined (ARM_MATH_DSP)
473
474 }
475
476
477 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
478