1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_f16.c
4 * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/transform_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 extern void arm_bitreversal_f16(
34 float16_t * pSrc,
35 uint16_t fftSize,
36 uint16_t bitRevFactor,
37 const uint16_t * pBitRevTab);
38
39 ARM_DSP_ATTRIBUTE void arm_radix4_butterfly_f16(
40 float16_t * pSrc,
41 uint16_t fftLen,
42 const float16_t * pCoef,
43 uint16_t twidCoefModifier);
44
45 ARM_DSP_ATTRIBUTE void arm_radix4_butterfly_inverse_f16(
46 float16_t * pSrc,
47 uint16_t fftLen,
48 const float16_t * pCoef,
49 uint16_t twidCoefModifier,
50 float16_t onebyfftLen);
51
52
53 ARM_DSP_ATTRIBUTE void arm_cfft_radix4by2_f16(
54 float16_t * pSrc,
55 uint32_t fftLen,
56 const float16_t * pCoef);
57
58
59 /**
60 @addtogroup ComplexFFTDeprecated
61 @{
62 */
63
64 /*
65 * @brief Core function for the floating-point CFFT butterfly process.
66 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
67 * @param[in] fftLen length of the FFT.
68 * @param[in] *pCoef points to the twiddle coefficient buffer.
69 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
70 */
71
arm_cfft_radix4by2_f16(float16_t * pSrc,uint32_t fftLen,const float16_t * pCoef)72 ARM_DSP_ATTRIBUTE void arm_cfft_radix4by2_f16(
73 float16_t * pSrc,
74 uint32_t fftLen,
75 const float16_t * pCoef)
76 {
77 uint32_t i, l;
78 uint32_t n2, ia;
79 float16_t xt, yt, cosVal, sinVal;
80 float16_t p0, p1,p2,p3,a0,a1;
81
82 n2 = fftLen >> 1;
83 ia = 0;
84 for (i = 0; i < n2; i++)
85 {
86 cosVal = pCoef[2*ia];
87 sinVal = pCoef[2*ia + 1];
88 ia++;
89
90 l = i + n2;
91
92 /* Butterfly implementation */
93 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
94 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
95
96 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
97 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
98
99 p0 = (_Float16)xt * (_Float16)cosVal;
100 p1 = (_Float16)yt * (_Float16)sinVal;
101 p2 = (_Float16)yt * (_Float16)cosVal;
102 p3 = (_Float16)xt * (_Float16)sinVal;
103
104 pSrc[2 * i] = a0;
105 pSrc[2 * i + 1] = a1;
106
107 pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
108 pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
109
110 }
111
112 // first col
113 arm_radix4_butterfly_f16( pSrc, n2, (float16_t*)pCoef, 2U);
114 // second col
115 arm_radix4_butterfly_f16( pSrc + fftLen, n2, (float16_t*)pCoef, 2U);
116
117 }
118
119
120 /**
121 @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
122 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future.
123 @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
124 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
125 */
126
arm_cfft_radix4_f16(const arm_cfft_radix4_instance_f16 * S,float16_t * pSrc)127 ARM_DSP_ATTRIBUTE void arm_cfft_radix4_f16(
128 const arm_cfft_radix4_instance_f16 * S,
129 float16_t * pSrc)
130 {
131 if (S->ifftFlag == 1U)
132 {
133 /* Complex IFFT radix-4 */
134 arm_radix4_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
135 }
136 else
137 {
138 /* Complex FFT radix-4 */
139 arm_radix4_butterfly_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
140 }
141
142 if (S->bitReverseFlag == 1U)
143 {
144 /* Bit Reversal */
145 arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
146 }
147
148 }
149
150 /**
151 @} end of ComplexFFTDeprecated group
152 */
153
154 /* ----------------------------------------------------------------------
155 * Internal helper function used by the FFTs
156 * ---------------------------------------------------------------------- */
157
158 /*
159 * @brief Core function for the floating-point CFFT butterfly process.
160 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
161 * @param[in] fftLen length of the FFT.
162 * @param[in] *pCoef points to the twiddle coefficient buffer.
163 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
164 */
165
arm_radix4_butterfly_f16(float16_t * pSrc,uint16_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier)166 ARM_DSP_ATTRIBUTE void arm_radix4_butterfly_f16(
167 float16_t * pSrc,
168 uint16_t fftLen,
169 const float16_t * pCoef,
170 uint16_t twidCoefModifier)
171 {
172
173 float16_t co1, co2, co3, si1, si2, si3;
174 uint32_t ia1, ia2, ia3;
175 uint32_t i0, i1, i2, i3;
176 uint32_t n1, n2, j, k;
177
178 #if defined (ARM_MATH_DSP)
179
180 /* Run the below code for Cortex-M4 and Cortex-M3 */
181
182 float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
183 float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
184 Ybminusd;
185 float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
186 float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
187 float16_t *ptr1;
188 float16_t p0,p1,p2,p3,p4,p5;
189 float16_t a0,a1,a2,a3,a4,a5,a6,a7;
190
191 /* Initializations for the first stage */
192 n2 = fftLen;
193 n1 = n2;
194
195 /* n2 = fftLen/4 */
196 n2 >>= 2U;
197 i0 = 0U;
198 ia1 = 0U;
199
200 j = n2;
201
202 /* Calculation of first stage */
203 do
204 {
205 /* index calculation for the input as, */
206 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
207 i1 = i0 + n2;
208 i2 = i1 + n2;
209 i3 = i2 + n2;
210
211 xaIn = pSrc[(2U * i0)];
212 yaIn = pSrc[(2U * i0) + 1U];
213
214 xbIn = pSrc[(2U * i1)];
215 ybIn = pSrc[(2U * i1) + 1U];
216
217 xcIn = pSrc[(2U * i2)];
218 ycIn = pSrc[(2U * i2) + 1U];
219
220 xdIn = pSrc[(2U * i3)];
221 ydIn = pSrc[(2U * i3) + 1U];
222
223 /* xa + xc */
224 Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
225 /* xb + xd */
226 Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
227 /* ya + yc */
228 Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
229 /* yb + yd */
230 Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
231
232 /* index calculation for the coefficients */
233 ia2 = ia1 + ia1;
234 co2 = pCoef[ia2 * 2U];
235 si2 = pCoef[(ia2 * 2U) + 1U];
236
237 /* xa - xc */
238 Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
239 /* xb - xd */
240 Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
241 /* ya - yc */
242 Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
243 /* yb - yd */
244 Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
245
246 /* xa' = xa + xb + xc + xd */
247 pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
248 /* ya' = ya + yb + yc + yd */
249 pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
250
251 /* (xa - xc) + (yb - yd) */
252 Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
253 /* (ya - yc) + (xb - xd) */
254 Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
255 /* (xa + xc) - (xb + xd) */
256 Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
257 /* (ya + yc) - (yb + yd) */
258 Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
259 /* (xa - xc) - (yb - yd) */
260 Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
261 /* (ya - yc) + (xb - xd) */
262 Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
263
264 co1 = pCoef[ia1 * 2U];
265 si1 = pCoef[(ia1 * 2U) + 1U];
266
267 /* index calculation for the coefficients */
268 ia3 = ia2 + ia1;
269 co3 = pCoef[ia3 * 2U];
270 si3 = pCoef[(ia3 * 2U) + 1U];
271
272 Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
273 Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
274 Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
275 Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
276 Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
277 Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
278
279 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
280 //Xb12_out -= Yb12C_out * si1;
281 p0 = (_Float16)Yb12C_out * (_Float16)si1;
282 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
283 //Yb12_out += Xb12C_out * si1;
284 p1 = (_Float16)Xb12C_out * (_Float16)si1;
285 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
286 //Xc12_out -= Yc12C_out * si2;
287 p2 = (_Float16)Yc12C_out * (_Float16)si2;
288 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
289 //Yc12_out += Xc12C_out * si2;
290 p3 = (_Float16)Xc12C_out * (_Float16)si2;
291 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
292 //Xd12_out -= Yd12C_out * si3;
293 p4 = (_Float16)Yd12C_out * (_Float16)si3;
294 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
295 //Yd12_out += Xd12C_out * si3;
296 p5 = (_Float16)Xd12C_out * (_Float16)si3;
297
298 Xb12_out += (_Float16)p0;
299 Yb12_out -= (_Float16)p1;
300 Xc12_out += (_Float16)p2;
301 Yc12_out -= (_Float16)p3;
302 Xd12_out += (_Float16)p4;
303 Yd12_out -= (_Float16)p5;
304
305 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
306 pSrc[2U * i1] = Xc12_out;
307
308 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
309 pSrc[(2U * i1) + 1U] = Yc12_out;
310
311 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
312 pSrc[2U * i2] = Xb12_out;
313
314 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
315 pSrc[(2U * i2) + 1U] = Yb12_out;
316
317 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
318 pSrc[2U * i3] = Xd12_out;
319
320 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
321 pSrc[(2U * i3) + 1U] = Yd12_out;
322
323 /* Twiddle coefficients index modifier */
324 ia1 += twidCoefModifier;
325
326 /* Updating input index */
327 i0++;
328
329 }
330 while (--j);
331
332 twidCoefModifier <<= 2U;
333
334 /* Calculation of second stage to excluding last stage */
335 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
336 {
337 /* Initializations for the first stage */
338 n1 = n2;
339 n2 >>= 2U;
340 ia1 = 0U;
341
342 /* Calculation of first stage */
343 j = 0;
344 do
345 {
346 /* index calculation for the coefficients */
347 ia2 = ia1 + ia1;
348 ia3 = ia2 + ia1;
349 co1 = pCoef[ia1 * 2U];
350 si1 = pCoef[(ia1 * 2U) + 1U];
351 co2 = pCoef[ia2 * 2U];
352 si2 = pCoef[(ia2 * 2U) + 1U];
353 co3 = pCoef[ia3 * 2U];
354 si3 = pCoef[(ia3 * 2U) + 1U];
355
356 /* Twiddle coefficients index modifier */
357 ia1 += twidCoefModifier;
358
359 i0 = j;
360 do
361 {
362 /* index calculation for the input as, */
363 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
364 i1 = i0 + n2;
365 i2 = i1 + n2;
366 i3 = i2 + n2;
367
368 xaIn = pSrc[(2U * i0)];
369 yaIn = pSrc[(2U * i0) + 1U];
370
371 xbIn = pSrc[(2U * i1)];
372 ybIn = pSrc[(2U * i1) + 1U];
373
374 xcIn = pSrc[(2U * i2)];
375 ycIn = pSrc[(2U * i2) + 1U];
376
377 xdIn = pSrc[(2U * i3)];
378 ydIn = pSrc[(2U * i3) + 1U];
379
380 /* xa - xc */
381 Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
382 /* (xb - xd) */
383 Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
384 /* ya - yc */
385 Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
386 /* (yb - yd) */
387 Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
388
389 /* xa + xc */
390 Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
391 /* xb + xd */
392 Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
393 /* ya + yc */
394 Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
395 /* yb + yd */
396 Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
397
398 /* (xa - xc) + (yb - yd) */
399 Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
400 /* (ya - yc) - (xb - xd) */
401 Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
402 /* xa + xc -(xb + xd) */
403 Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
404 /* (ya + yc) - (yb + yd) */
405 Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
406 /* (xa - xc) - (yb - yd) */
407 Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
408 /* (ya - yc) + (xb - xd) */
409 Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
410
411 pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
412 pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
413
414 Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
415 Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
416 Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
417 Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
418 Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
419 Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
420
421 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
422 //Xb12_out -= Yb12C_out * si1;
423 p0 = (_Float16)Yb12C_out * (_Float16)si1;
424 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
425 //Yb12_out += Xb12C_out * si1;
426 p1 = (_Float16)Xb12C_out * (_Float16)si1;
427 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
428 //Xc12_out -= Yc12C_out * si2;
429 p2 = (_Float16)Yc12C_out * (_Float16)si2;
430 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
431 //Yc12_out += Xc12C_out * si2;
432 p3 = (_Float16)Xc12C_out * (_Float16)si2;
433 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
434 //Xd12_out -= Yd12C_out * si3;
435 p4 = (_Float16)Yd12C_out * (_Float16)si3;
436 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
437 //Yd12_out += Xd12C_out * si3;
438 p5 = (_Float16)Xd12C_out * (_Float16)si3;
439
440 Xb12_out += (_Float16)p0;
441 Yb12_out -= (_Float16)p1;
442 Xc12_out += (_Float16)p2;
443 Yc12_out -= (_Float16)p3;
444 Xd12_out += (_Float16)p4;
445 Yd12_out -= (_Float16)p5;
446
447 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
448 pSrc[2U * i1] = Xc12_out;
449
450 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
451 pSrc[(2U * i1) + 1U] = Yc12_out;
452
453 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
454 pSrc[2U * i2] = Xb12_out;
455
456 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
457 pSrc[(2U * i2) + 1U] = Yb12_out;
458
459 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
460 pSrc[2U * i3] = Xd12_out;
461
462 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
463 pSrc[(2U * i3) + 1U] = Yd12_out;
464
465 i0 += n1;
466 } while (i0 < fftLen);
467 j++;
468 } while (j <= (n2 - 1U));
469 twidCoefModifier <<= 2U;
470 }
471
472 j = fftLen >> 2;
473 ptr1 = &pSrc[0];
474
475 /* Calculations of last stage */
476 do
477 {
478 xaIn = ptr1[0];
479 yaIn = ptr1[1];
480 xbIn = ptr1[2];
481 ybIn = ptr1[3];
482 xcIn = ptr1[4];
483 ycIn = ptr1[5];
484 xdIn = ptr1[6];
485 ydIn = ptr1[7];
486
487 /* xa + xc */
488 Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
489
490 /* xa - xc */
491 Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
492
493 /* ya + yc */
494 Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
495
496 /* ya - yc */
497 Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
498
499 /* xb + xd */
500 Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
501
502 /* yb + yd */
503 Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
504
505 /* (xb-xd) */
506 Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
507
508 /* (yb-yd) */
509 Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
510
511 /* xa' = xa + xb + xc + xd */
512 a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
513 /* ya' = ya + yb + yc + yd */
514 a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
515 /* xc' = (xa-xb+xc-xd) */
516 a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
517 /* yc' = (ya-yb+yc-yd) */
518 a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
519 /* xb' = (xa+yb-xc-yd) */
520 a4 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
521 /* yb' = (ya-xb-yc+xd) */
522 a5 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
523 /* xd' = (xa-yb-xc+yd)) */
524 a6 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
525 /* yd' = (ya+xb-yc-xd) */
526 a7 = ((_Float16)Xbminusd + (_Float16)Yaminusc);
527
528 ptr1[0] = a0;
529 ptr1[1] = a1;
530 ptr1[2] = a2;
531 ptr1[3] = a3;
532 ptr1[4] = a4;
533 ptr1[5] = a5;
534 ptr1[6] = a6;
535 ptr1[7] = a7;
536
537 /* increment pointer by 8 */
538 ptr1 += 8U;
539 } while (--j);
540
541 #else
542
543 float16_t t1, t2, r1, r2, s1, s2;
544
545 /* Run the below code for Cortex-M0 */
546
547 /* Initializations for the fft calculation */
548 n2 = fftLen;
549 n1 = n2;
550 for (k = fftLen; k > 1U; k >>= 2U)
551 {
552 /* Initializations for the fft calculation */
553 n1 = n2;
554 n2 >>= 2U;
555 ia1 = 0U;
556
557 /* FFT Calculation */
558 j = 0;
559 do
560 {
561 /* index calculation for the coefficients */
562 ia2 = ia1 + ia1;
563 ia3 = ia2 + ia1;
564 co1 = pCoef[ia1 * 2U];
565 si1 = pCoef[(ia1 * 2U) + 1U];
566 co2 = pCoef[ia2 * 2U];
567 si2 = pCoef[(ia2 * 2U) + 1U];
568 co3 = pCoef[ia3 * 2U];
569 si3 = pCoef[(ia3 * 2U) + 1U];
570
571 /* Twiddle coefficients index modifier */
572 ia1 = ia1 + twidCoefModifier;
573
574 i0 = j;
575 do
576 {
577 /* index calculation for the input as, */
578 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
579 i1 = i0 + n2;
580 i2 = i1 + n2;
581 i3 = i2 + n2;
582
583 /* xa + xc */
584 r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
585
586 /* xa - xc */
587 r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
588
589 /* ya + yc */
590 s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
591
592 /* ya - yc */
593 s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
594
595 /* xb + xd */
596 t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
597
598 /* xa' = xa + xb + xc + xd */
599 pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
600
601 /* xa + xc -(xb + xd) */
602 r1 = (_Float16)r1 - (_Float16)t1;
603
604 /* yb + yd */
605 t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
606
607 /* ya' = ya + yb + yc + yd */
608 pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
609
610 /* (ya + yc) - (yb + yd) */
611 s1 = (_Float16)s1 - (_Float16)t2;
612
613 /* (yb - yd) */
614 t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
615
616 /* (xb - xd) */
617 t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
618
619 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
620 pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) + ((_Float16)s1 * (_Float16)si2);
621
622 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
623 pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) - ((_Float16)r1 * (_Float16)si2);
624
625 /* (xa - xc) + (yb - yd) */
626 r1 = (_Float16)r2 + (_Float16)t1;
627
628 /* (xa - xc) - (yb - yd) */
629 r2 = (_Float16)r2 - (_Float16)t1;
630
631 /* (ya - yc) - (xb - xd) */
632 s1 = (_Float16)s2 - (_Float16)t2;
633
634 /* (ya - yc) + (xb - xd) */
635 s2 = (_Float16)s2 + (_Float16)t2;
636
637 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
638 pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) + ((_Float16)s1 * (_Float16)si1);
639
640 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
641 pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) - ((_Float16)r1 * (_Float16)si1);
642
643 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
644 pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) + ((_Float16)s2 * (_Float16)si3);
645
646 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
647 pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) - ((_Float16)r2 * (_Float16)si3);
648
649 i0 += n1;
650 } while ( i0 < fftLen);
651 j++;
652 } while (j <= (n2 - 1U));
653 twidCoefModifier <<= 2U;
654 }
655
656 #endif /* #if defined (ARM_MATH_DSP) */
657
658 }
659
660 /*
661 * @brief Core function for the floating-point CIFFT butterfly process.
662 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
663 * @param[in] fftLen length of the FFT.
664 * @param[in] *pCoef points to twiddle coefficient buffer.
665 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
666 * @param[in] onebyfftLen value of 1/fftLen.
667 */
668
arm_radix4_butterfly_inverse_f16(float16_t * pSrc,uint16_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier,float16_t onebyfftLen)669 ARM_DSP_ATTRIBUTE void arm_radix4_butterfly_inverse_f16(
670 float16_t * pSrc,
671 uint16_t fftLen,
672 const float16_t * pCoef,
673 uint16_t twidCoefModifier,
674 float16_t onebyfftLen)
675 {
676 float16_t co1, co2, co3, si1, si2, si3;
677 uint32_t ia1, ia2, ia3;
678 uint32_t i0, i1, i2, i3;
679 uint32_t n1, n2, j, k;
680
681 #if defined (ARM_MATH_DSP)
682
683 float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
684 float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
685 Ybminusd;
686 float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
687 float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
688 float16_t *ptr1;
689 float16_t p0,p1,p2,p3,p4,p5,p6,p7;
690 float16_t a0,a1,a2,a3,a4,a5,a6,a7;
691
692
693 /* Initializations for the first stage */
694 n2 = fftLen;
695 n1 = n2;
696
697 /* n2 = fftLen/4 */
698 n2 >>= 2U;
699 i0 = 0U;
700 ia1 = 0U;
701
702 j = n2;
703
704 /* Calculation of first stage */
705 do
706 {
707 /* index calculation for the input as, */
708 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
709 i1 = i0 + n2;
710 i2 = i1 + n2;
711 i3 = i2 + n2;
712
713 /* Butterfly implementation */
714 xaIn = pSrc[(2U * i0)];
715 yaIn = pSrc[(2U * i0) + 1U];
716
717 xcIn = pSrc[(2U * i2)];
718 ycIn = pSrc[(2U * i2) + 1U];
719
720 xbIn = pSrc[(2U * i1)];
721 ybIn = pSrc[(2U * i1) + 1U];
722
723 xdIn = pSrc[(2U * i3)];
724 ydIn = pSrc[(2U * i3) + 1U];
725
726 /* xa + xc */
727 Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
728 /* xb + xd */
729 Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
730 /* ya + yc */
731 Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
732 /* yb + yd */
733 Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
734
735 /* index calculation for the coefficients */
736 ia2 = ia1 + ia1;
737 co2 = pCoef[ia2 * 2U];
738 si2 = pCoef[(ia2 * 2U) + 1U];
739
740 /* xa - xc */
741 Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
742 /* xb - xd */
743 Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
744 /* ya - yc */
745 Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
746 /* yb - yd */
747 Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
748
749 /* xa' = xa + xb + xc + xd */
750 pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
751
752 /* ya' = ya + yb + yc + yd */
753 pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
754
755 /* (xa - xc) - (yb - yd) */
756 Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
757 /* (ya - yc) + (xb - xd) */
758 Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
759 /* (xa + xc) - (xb + xd) */
760 Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
761 /* (ya + yc) - (yb + yd) */
762 Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
763 /* (xa - xc) + (yb - yd) */
764 Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
765 /* (ya - yc) - (xb - xd) */
766 Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
767
768 co1 = pCoef[ia1 * 2U];
769 si1 = pCoef[(ia1 * 2U) + 1U];
770
771 /* index calculation for the coefficients */
772 ia3 = ia2 + ia1;
773 co3 = pCoef[ia3 * 2U];
774 si3 = pCoef[(ia3 * 2U) + 1U];
775
776 Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
777 Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
778 Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
779 Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
780 Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
781 Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
782
783 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
784 //Xb12_out -= Yb12C_out * si1;
785 p0 = (_Float16)Yb12C_out * (_Float16)si1;
786 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
787 //Yb12_out += Xb12C_out * si1;
788 p1 = (_Float16)Xb12C_out * (_Float16)si1;
789 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
790 //Xc12_out -= Yc12C_out * si2;
791 p2 = (_Float16)Yc12C_out * (_Float16)si2;
792 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
793 //Yc12_out += Xc12C_out * si2;
794 p3 = (_Float16)Xc12C_out * (_Float16)si2;
795 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
796 //Xd12_out -= Yd12C_out * si3;
797 p4 = (_Float16)Yd12C_out * (_Float16)si3;
798 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
799 //Yd12_out += Xd12C_out * si3;
800 p5 =(_Float16) Xd12C_out * (_Float16)si3;
801
802 Xb12_out -= (_Float16)p0;
803 Yb12_out += (_Float16)p1;
804 Xc12_out -= (_Float16)p2;
805 Yc12_out += (_Float16)p3;
806 Xd12_out -= (_Float16)p4;
807 Yd12_out += (_Float16)p5;
808
809 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
810 pSrc[2U * i1] = Xc12_out;
811
812 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
813 pSrc[(2U * i1) + 1U] = Yc12_out;
814
815 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
816 pSrc[2U * i2] = Xb12_out;
817
818 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
819 pSrc[(2U * i2) + 1U] = Yb12_out;
820
821 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
822 pSrc[2U * i3] = Xd12_out;
823
824 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
825 pSrc[(2U * i3) + 1U] = Yd12_out;
826
827 /* Twiddle coefficients index modifier */
828 ia1 = ia1 + twidCoefModifier;
829
830 /* Updating input index */
831 i0 = i0 + 1U;
832
833 } while (--j);
834
835 twidCoefModifier <<= 2U;
836
837 /* Calculation of second stage to excluding last stage */
838 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
839 {
840 /* Initializations for the first stage */
841 n1 = n2;
842 n2 >>= 2U;
843 ia1 = 0U;
844
845 /* Calculation of first stage */
846 j = 0;
847 do
848 {
849 /* index calculation for the coefficients */
850 ia2 = ia1 + ia1;
851 ia3 = ia2 + ia1;
852 co1 = pCoef[ia1 * 2U];
853 si1 = pCoef[(ia1 * 2U) + 1U];
854 co2 = pCoef[ia2 * 2U];
855 si2 = pCoef[(ia2 * 2U) + 1U];
856 co3 = pCoef[ia3 * 2U];
857 si3 = pCoef[(ia3 * 2U) + 1U];
858
859 /* Twiddle coefficients index modifier */
860 ia1 = ia1 + twidCoefModifier;
861
862 i0 = j;
863 do
864 {
865 /* index calculation for the input as, */
866 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
867 i1 = i0 + n2;
868 i2 = i1 + n2;
869 i3 = i2 + n2;
870
871 xaIn = pSrc[(2U * i0)];
872 yaIn = pSrc[(2U * i0) + 1U];
873
874 xbIn = pSrc[(2U * i1)];
875 ybIn = pSrc[(2U * i1) + 1U];
876
877 xcIn = pSrc[(2U * i2)];
878 ycIn = pSrc[(2U * i2) + 1U];
879
880 xdIn = pSrc[(2U * i3)];
881 ydIn = pSrc[(2U * i3) + 1U];
882
883 /* xa - xc */
884 Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
885 /* (xb - xd) */
886 Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
887 /* ya - yc */
888 Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
889 /* (yb - yd) */
890 Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
891
892 /* xa + xc */
893 Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
894 /* xb + xd */
895 Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
896 /* ya + yc */
897 Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
898 /* yb + yd */
899 Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
900
901 /* (xa - xc) - (yb - yd) */
902 Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
903 /* (ya - yc) + (xb - xd) */
904 Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
905 /* xa + xc -(xb + xd) */
906 Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
907 /* (ya + yc) - (yb + yd) */
908 Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
909 /* (xa - xc) + (yb - yd) */
910 Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
911 /* (ya - yc) - (xb - xd) */
912 Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
913
914 pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
915 pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
916
917 Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
918 Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
919 Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
920 Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
921 Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
922 Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
923
924 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
925 //Xb12_out -= Yb12C_out * si1;
926 p0 = (_Float16)Yb12C_out * (_Float16)si1;
927 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
928 //Yb12_out += Xb12C_out * si1;
929 p1 = (_Float16)Xb12C_out * (_Float16)si1;
930 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
931 //Xc12_out -= Yc12C_out * si2;
932 p2 = (_Float16)Yc12C_out * (_Float16)si2;
933 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
934 //Yc12_out += Xc12C_out * si2;
935 p3 = (_Float16)Xc12C_out * (_Float16)si2;
936 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
937 //Xd12_out -= Yd12C_out * si3;
938 p4 = (_Float16)Yd12C_out * (_Float16)si3;
939 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
940 //Yd12_out += Xd12C_out * si3;
941 p5 = (_Float16)Xd12C_out * (_Float16)si3;
942
943 Xb12_out -= (_Float16)p0;
944 Yb12_out += (_Float16)p1;
945 Xc12_out -= (_Float16)p2;
946 Yc12_out += (_Float16)p3;
947 Xd12_out -= (_Float16)p4;
948 Yd12_out += (_Float16)p5;
949
950 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
951 pSrc[2U * i1] = Xc12_out;
952
953 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
954 pSrc[(2U * i1) + 1U] = Yc12_out;
955
956 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
957 pSrc[2U * i2] = Xb12_out;
958
959 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
960 pSrc[(2U * i2) + 1U] = Yb12_out;
961
962 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
963 pSrc[2U * i3] = Xd12_out;
964
965 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
966 pSrc[(2U * i3) + 1U] = Yd12_out;
967
968 i0 += n1;
969 } while (i0 < fftLen);
970 j++;
971 } while (j <= (n2 - 1U));
972 twidCoefModifier <<= 2U;
973 }
974 /* Initializations of last stage */
975
976 j = fftLen >> 2;
977 ptr1 = &pSrc[0];
978
979 /* Calculations of last stage */
980 do
981 {
982 xaIn = ptr1[0];
983 yaIn = ptr1[1];
984 xbIn = ptr1[2];
985 ybIn = ptr1[3];
986 xcIn = ptr1[4];
987 ycIn = ptr1[5];
988 xdIn = ptr1[6];
989 ydIn = ptr1[7];
990
991 /* Butterfly implementation */
992 /* xa + xc */
993 Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
994
995 /* xa - xc */
996 Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
997
998 /* ya + yc */
999 Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
1000
1001 /* ya - yc */
1002 Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
1003
1004 /* xb + xd */
1005 Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
1006
1007 /* yb + yd */
1008 Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
1009
1010 /* (xb-xd) */
1011 Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
1012
1013 /* (yb-yd) */
1014 Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
1015
1016 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
1017 a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
1018 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
1019 a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
1020 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
1021 a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
1022 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
1023 a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
1024 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
1025 a4 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
1026 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
1027 a5 = ((_Float16)Yaminusc + (_Float16)Xbminusd);
1028 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1029 a6 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
1030 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1031 a7 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
1032
1033 p0 = (_Float16)a0 * (_Float16)onebyfftLen;
1034 p1 = (_Float16)a1 * (_Float16)onebyfftLen;
1035 p2 = (_Float16)a2 * (_Float16)onebyfftLen;
1036 p3 = (_Float16)a3 * (_Float16)onebyfftLen;
1037 p4 = (_Float16)a4 * (_Float16)onebyfftLen;
1038 p5 = (_Float16)a5 * (_Float16)onebyfftLen;
1039 p6 = (_Float16)a6 * (_Float16)onebyfftLen;
1040 p7 = (_Float16)a7 * (_Float16)onebyfftLen;
1041
1042 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
1043 ptr1[0] = p0;
1044 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
1045 ptr1[1] = p1;
1046 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
1047 ptr1[2] = p2;
1048 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
1049 ptr1[3] = p3;
1050 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
1051 ptr1[4] = p4;
1052 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
1053 ptr1[5] = p5;
1054 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1055 ptr1[6] = p6;
1056 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1057 ptr1[7] = p7;
1058
1059 /* increment source pointer by 8 for next calculations */
1060 ptr1 = ptr1 + 8U;
1061
1062 } while (--j);
1063
1064 #else
1065
1066 float16_t t1, t2, r1, r2, s1, s2;
1067
1068 /* Run the below code for Cortex-M0 */
1069
1070 /* Initializations for the first stage */
1071 n2 = fftLen;
1072 n1 = n2;
1073
1074 /* Calculation of first stage */
1075 for (k = fftLen; k > 4U; k >>= 2U)
1076 {
1077 /* Initializations for the first stage */
1078 n1 = n2;
1079 n2 >>= 2U;
1080 ia1 = 0U;
1081
1082 /* Calculation of first stage */
1083 j = 0;
1084 do
1085 {
1086 /* index calculation for the coefficients */
1087 ia2 = ia1 + ia1;
1088 ia3 = ia2 + ia1;
1089 co1 = pCoef[ia1 * 2U];
1090 si1 = pCoef[(ia1 * 2U) + 1U];
1091 co2 = pCoef[ia2 * 2U];
1092 si2 = pCoef[(ia2 * 2U) + 1U];
1093 co3 = pCoef[ia3 * 2U];
1094 si3 = pCoef[(ia3 * 2U) + 1U];
1095
1096 /* Twiddle coefficients index modifier */
1097 ia1 = ia1 + twidCoefModifier;
1098
1099 i0 = j;
1100 do
1101 {
1102 /* index calculation for the input as, */
1103 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1104 i1 = i0 + n2;
1105 i2 = i1 + n2;
1106 i3 = i2 + n2;
1107
1108 /* xa + xc */
1109 r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
1110
1111 /* xa - xc */
1112 r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
1113
1114 /* ya + yc */
1115 s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
1116
1117 /* ya - yc */
1118 s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
1119
1120 /* xb + xd */
1121 t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
1122
1123 /* xa' = xa + xb + xc + xd */
1124 pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
1125
1126 /* xa + xc -(xb + xd) */
1127 r1 = (_Float16)r1 - (_Float16)t1;
1128
1129 /* yb + yd */
1130 t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
1131
1132 /* ya' = ya + yb + yc + yd */
1133 pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
1134
1135 /* (ya + yc) - (yb + yd) */
1136 s1 = (_Float16)s1 - (_Float16)t2;
1137
1138 /* (yb - yd) */
1139 t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
1140
1141 /* (xb - xd) */
1142 t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
1143
1144 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1145 pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) - ((_Float16)s1 * (_Float16)si2);
1146
1147 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1148 pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) + ((_Float16)r1 * (_Float16)si2);
1149
1150 /* (xa - xc) - (yb - yd) */
1151 r1 = (_Float16)r2 - (_Float16)t1;
1152
1153 /* (xa - xc) + (yb - yd) */
1154 r2 = (_Float16)r2 + (_Float16)t1;
1155
1156 /* (ya - yc) + (xb - xd) */
1157 s1 = (_Float16)s2 + (_Float16)t2;
1158
1159 /* (ya - yc) - (xb - xd) */
1160 s2 = (_Float16)s2 - (_Float16)t2;
1161
1162 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1163 pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) - ((_Float16)s1 * (_Float16)si1);
1164
1165 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1166 pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) + ((_Float16)r1 * (_Float16)si1);
1167
1168 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1169 pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) - ((_Float16)s2 * (_Float16)si3);
1170
1171 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1172 pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) + ((_Float16)r2 * (_Float16)si3);
1173
1174 i0 += n1;
1175 } while ( i0 < fftLen);
1176 j++;
1177 } while (j <= (n2 - 1U));
1178 twidCoefModifier <<= 2U;
1179 }
1180 /* Initializations of last stage */
1181 n1 = n2;
1182 n2 >>= 2U;
1183
1184 /* Calculations of last stage */
1185 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1186 {
1187 /* index calculation for the input as, */
1188 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1189 i1 = i0 + n2;
1190 i2 = i1 + n2;
1191 i3 = i2 + n2;
1192
1193 /* Butterfly implementation */
1194 /* xa + xc */
1195 r1 = (_Float16)pSrc[2U * i0] + (_Float16)pSrc[2U * i2];
1196
1197 /* xa - xc */
1198 r2 = (_Float16)pSrc[2U * i0] - (_Float16)pSrc[2U * i2];
1199
1200 /* ya + yc */
1201 s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
1202
1203 /* ya - yc */
1204 s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
1205
1206 /* xc + xd */
1207 t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
1208
1209 /* xa' = xa + xb + xc + xd */
1210 pSrc[2U * i0] = ((_Float16)r1 + (_Float16)t1) * (_Float16)onebyfftLen;
1211
1212 /* (xa + xb) - (xc + xd) */
1213 r1 = (_Float16)r1 - (_Float16)t1;
1214
1215 /* yb + yd */
1216 t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
1217
1218 /* ya' = ya + yb + yc + yd */
1219 pSrc[(2U * i0) + 1U] = ((_Float16)s1 + (_Float16)t2) * (_Float16)onebyfftLen;
1220
1221 /* (ya + yc) - (yb + yd) */
1222 s1 = (_Float16)s1 - (_Float16)t2;
1223
1224 /* (yb-yd) */
1225 t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
1226
1227 /* (xb-xd) */
1228 t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
1229
1230 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1231 pSrc[2U * i1] = (_Float16)r1 * (_Float16)onebyfftLen;
1232
1233 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1234 pSrc[(2U * i1) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
1235
1236 /* (xa - xc) - (yb-yd) */
1237 r1 = (_Float16)r2 - (_Float16)t1;
1238
1239 /* (xa - xc) + (yb-yd) */
1240 r2 = (_Float16)r2 + (_Float16)t1;
1241
1242 /* (ya - yc) + (xb-xd) */
1243 s1 = (_Float16)s2 + (_Float16)t2;
1244
1245 /* (ya - yc) - (xb-xd) */
1246 s2 = (_Float16)s2 - (_Float16)t2;
1247
1248 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1249 pSrc[2U * i2] = (_Float16)r1 * (_Float16)onebyfftLen;
1250
1251 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1252 pSrc[(2U * i2) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
1253
1254 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1255 pSrc[2U * i3] = (_Float16)r2 * (_Float16)onebyfftLen;
1256
1257 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1258 pSrc[(2U * i3) + 1U] = (_Float16)s2 * (_Float16)onebyfftLen;
1259 }
1260
1261 #endif /* #if defined (ARM_MATH_DSP) */
1262 }
1263
1264 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
1265