1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_f32.c
4 * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/transform_functions.h"
30
31 extern void arm_bitreversal_f32(
32 float32_t * pSrc,
33 uint16_t fftSize,
34 uint16_t bitRevFactor,
35 const uint16_t * pBitRevTab);
36
37 void arm_radix4_butterfly_f32(
38 float32_t * pSrc,
39 uint16_t fftLen,
40 const float32_t * pCoef,
41 uint16_t twidCoefModifier);
42
43 void arm_radix4_butterfly_inverse_f32(
44 float32_t * pSrc,
45 uint16_t fftLen,
46 const float32_t * pCoef,
47 uint16_t twidCoefModifier,
48 float32_t onebyfftLen);
49
50
51
52
53 /**
54 @addtogroup ComplexFFTDeprecated
55 @{
56 */
57
58
59 /**
60 @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
61 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
62 @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
63 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
64 */
65
arm_cfft_radix4_f32(const arm_cfft_radix4_instance_f32 * S,float32_t * pSrc)66 void arm_cfft_radix4_f32(
67 const arm_cfft_radix4_instance_f32 * S,
68 float32_t * pSrc)
69 {
70 if (S->ifftFlag == 1U)
71 {
72 /* Complex IFFT radix-4 */
73 arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
74 }
75 else
76 {
77 /* Complex FFT radix-4 */
78 arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
79 }
80
81 if (S->bitReverseFlag == 1U)
82 {
83 /* Bit Reversal */
84 arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
85 }
86
87 }
88
89 /**
90 @} end of ComplexFFTDeprecated group
91 */
92
93 /* ----------------------------------------------------------------------
94 * Internal helper function used by the FFTs
95 * ---------------------------------------------------------------------- */
96
97 /**
98 brief Core function for the floating-point CFFT butterfly process.
99 param[in,out] pSrc points to the in-place buffer of floating-point data type
100 param[in] fftLen length of the FFT
101 param[in] pCoef points to the twiddle coefficient buffer
102 param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
103 return none
104 */
105
arm_radix4_butterfly_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier)106 void arm_radix4_butterfly_f32(
107 float32_t * pSrc,
108 uint16_t fftLen,
109 const float32_t * pCoef,
110 uint16_t twidCoefModifier)
111 {
112 float32_t co1, co2, co3, si1, si2, si3;
113 uint32_t ia1, ia2, ia3;
114 uint32_t i0, i1, i2, i3;
115 uint32_t n1, n2, j, k;
116
117 #if defined (ARM_MATH_LOOPUNROLL)
118
119 float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
120 float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
121 Ybminusd;
122 float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
123 float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
124 float32_t *ptr1;
125 float32_t p0,p1,p2,p3,p4,p5;
126 float32_t a0,a1,a2,a3,a4,a5,a6,a7;
127
128 /* Initializations for the first stage */
129 n2 = fftLen;
130 n1 = n2;
131
132 /* n2 = fftLen/4 */
133 n2 >>= 2U;
134 i0 = 0U;
135 ia1 = 0U;
136
137 j = n2;
138
139 /* Calculation of first stage */
140 do
141 {
142 /* index calculation for the input as, */
143 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
144 i1 = i0 + n2;
145 i2 = i1 + n2;
146 i3 = i2 + n2;
147
148 xaIn = pSrc[(2U * i0)];
149 yaIn = pSrc[(2U * i0) + 1U];
150
151 xbIn = pSrc[(2U * i1)];
152 ybIn = pSrc[(2U * i1) + 1U];
153
154 xcIn = pSrc[(2U * i2)];
155 ycIn = pSrc[(2U * i2) + 1U];
156
157 xdIn = pSrc[(2U * i3)];
158 ydIn = pSrc[(2U * i3) + 1U];
159
160 /* xa + xc */
161 Xaplusc = xaIn + xcIn;
162 /* xb + xd */
163 Xbplusd = xbIn + xdIn;
164 /* ya + yc */
165 Yaplusc = yaIn + ycIn;
166 /* yb + yd */
167 Ybplusd = ybIn + ydIn;
168
169 /* index calculation for the coefficients */
170 ia2 = ia1 + ia1;
171 co2 = pCoef[ia2 * 2U];
172 si2 = pCoef[(ia2 * 2U) + 1U];
173
174 /* xa - xc */
175 Xaminusc = xaIn - xcIn;
176 /* xb - xd */
177 Xbminusd = xbIn - xdIn;
178 /* ya - yc */
179 Yaminusc = yaIn - ycIn;
180 /* yb - yd */
181 Ybminusd = ybIn - ydIn;
182
183 /* xa' = xa + xb + xc + xd */
184 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
185 /* ya' = ya + yb + yc + yd */
186 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
187
188 /* (xa - xc) + (yb - yd) */
189 Xb12C_out = (Xaminusc + Ybminusd);
190 /* (ya - yc) + (xb - xd) */
191 Yb12C_out = (Yaminusc - Xbminusd);
192 /* (xa + xc) - (xb + xd) */
193 Xc12C_out = (Xaplusc - Xbplusd);
194 /* (ya + yc) - (yb + yd) */
195 Yc12C_out = (Yaplusc - Ybplusd);
196 /* (xa - xc) - (yb - yd) */
197 Xd12C_out = (Xaminusc - Ybminusd);
198 /* (ya - yc) + (xb - xd) */
199 Yd12C_out = (Xbminusd + Yaminusc);
200
201 co1 = pCoef[ia1 * 2U];
202 si1 = pCoef[(ia1 * 2U) + 1U];
203
204 /* index calculation for the coefficients */
205 ia3 = ia2 + ia1;
206 co3 = pCoef[ia3 * 2U];
207 si3 = pCoef[(ia3 * 2U) + 1U];
208
209 Xb12_out = Xb12C_out * co1;
210 Yb12_out = Yb12C_out * co1;
211 Xc12_out = Xc12C_out * co2;
212 Yc12_out = Yc12C_out * co2;
213 Xd12_out = Xd12C_out * co3;
214 Yd12_out = Yd12C_out * co3;
215
216 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
217 //Xb12_out -= Yb12C_out * si1;
218 p0 = Yb12C_out * si1;
219 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
220 //Yb12_out += Xb12C_out * si1;
221 p1 = Xb12C_out * si1;
222 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
223 //Xc12_out -= Yc12C_out * si2;
224 p2 = Yc12C_out * si2;
225 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
226 //Yc12_out += Xc12C_out * si2;
227 p3 = Xc12C_out * si2;
228 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
229 //Xd12_out -= Yd12C_out * si3;
230 p4 = Yd12C_out * si3;
231 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
232 //Yd12_out += Xd12C_out * si3;
233 p5 = Xd12C_out * si3;
234
235 Xb12_out += p0;
236 Yb12_out -= p1;
237 Xc12_out += p2;
238 Yc12_out -= p3;
239 Xd12_out += p4;
240 Yd12_out -= p5;
241
242 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
243 pSrc[2U * i1] = Xc12_out;
244
245 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
246 pSrc[(2U * i1) + 1U] = Yc12_out;
247
248 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
249 pSrc[2U * i2] = Xb12_out;
250
251 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
252 pSrc[(2U * i2) + 1U] = Yb12_out;
253
254 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
255 pSrc[2U * i3] = Xd12_out;
256
257 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
258 pSrc[(2U * i3) + 1U] = Yd12_out;
259
260 /* Twiddle coefficients index modifier */
261 ia1 += twidCoefModifier;
262
263 /* Updating input index */
264 i0++;
265
266 }
267 while (--j);
268
269 twidCoefModifier <<= 2U;
270
271 /* Calculation of second stage to excluding last stage */
272 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
273 {
274 /* Initializations for the first stage */
275 n1 = n2;
276 n2 >>= 2U;
277 ia1 = 0U;
278
279 /* Calculation of first stage */
280 j = 0;
281 do
282 {
283 /* index calculation for the coefficients */
284 ia2 = ia1 + ia1;
285 ia3 = ia2 + ia1;
286 co1 = pCoef[(ia1 * 2U)];
287 si1 = pCoef[(ia1 * 2U) + 1U];
288 co2 = pCoef[(ia2 * 2U)];
289 si2 = pCoef[(ia2 * 2U) + 1U];
290 co3 = pCoef[(ia3 * 2U)];
291 si3 = pCoef[(ia3 * 2U) + 1U];
292
293 /* Twiddle coefficients index modifier */
294 ia1 += twidCoefModifier;
295
296 i0 = j;
297 do
298 {
299 /* index calculation for the input as, */
300 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
301 i1 = i0 + n2;
302 i2 = i1 + n2;
303 i3 = i2 + n2;
304
305 xaIn = pSrc[(2U * i0)];
306 yaIn = pSrc[(2U * i0) + 1U];
307
308 xbIn = pSrc[(2U * i1)];
309 ybIn = pSrc[(2U * i1) + 1U];
310
311 xcIn = pSrc[(2U * i2)];
312 ycIn = pSrc[(2U * i2) + 1U];
313
314 xdIn = pSrc[(2U * i3)];
315 ydIn = pSrc[(2U * i3) + 1U];
316
317 /* xa - xc */
318 Xaminusc = xaIn - xcIn;
319 /* (xb - xd) */
320 Xbminusd = xbIn - xdIn;
321 /* ya - yc */
322 Yaminusc = yaIn - ycIn;
323 /* (yb - yd) */
324 Ybminusd = ybIn - ydIn;
325
326 /* xa + xc */
327 Xaplusc = xaIn + xcIn;
328 /* xb + xd */
329 Xbplusd = xbIn + xdIn;
330 /* ya + yc */
331 Yaplusc = yaIn + ycIn;
332 /* yb + yd */
333 Ybplusd = ybIn + ydIn;
334
335 /* (xa - xc) + (yb - yd) */
336 Xb12C_out = (Xaminusc + Ybminusd);
337 /* (ya - yc) - (xb - xd) */
338 Yb12C_out = (Yaminusc - Xbminusd);
339 /* xa + xc -(xb + xd) */
340 Xc12C_out = (Xaplusc - Xbplusd);
341 /* (ya + yc) - (yb + yd) */
342 Yc12C_out = (Yaplusc - Ybplusd);
343 /* (xa - xc) - (yb - yd) */
344 Xd12C_out = (Xaminusc - Ybminusd);
345 /* (ya - yc) + (xb - xd) */
346 Yd12C_out = (Xbminusd + Yaminusc);
347
348 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
349 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
350
351 Xb12_out = Xb12C_out * co1;
352 Yb12_out = Yb12C_out * co1;
353 Xc12_out = Xc12C_out * co2;
354 Yc12_out = Yc12C_out * co2;
355 Xd12_out = Xd12C_out * co3;
356 Yd12_out = Yd12C_out * co3;
357
358 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
359 //Xb12_out -= Yb12C_out * si1;
360 p0 = Yb12C_out * si1;
361 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
362 //Yb12_out += Xb12C_out * si1;
363 p1 = Xb12C_out * si1;
364 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
365 //Xc12_out -= Yc12C_out * si2;
366 p2 = Yc12C_out * si2;
367 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
368 //Yc12_out += Xc12C_out * si2;
369 p3 = Xc12C_out * si2;
370 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
371 //Xd12_out -= Yd12C_out * si3;
372 p4 = Yd12C_out * si3;
373 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
374 //Yd12_out += Xd12C_out * si3;
375 p5 = Xd12C_out * si3;
376
377 Xb12_out += p0;
378 Yb12_out -= p1;
379 Xc12_out += p2;
380 Yc12_out -= p3;
381 Xd12_out += p4;
382 Yd12_out -= p5;
383
384 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
385 pSrc[2U * i1] = Xc12_out;
386
387 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
388 pSrc[(2U * i1) + 1U] = Yc12_out;
389
390 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
391 pSrc[2U * i2] = Xb12_out;
392
393 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
394 pSrc[(2U * i2) + 1U] = Yb12_out;
395
396 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
397 pSrc[2U * i3] = Xd12_out;
398
399 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
400 pSrc[(2U * i3) + 1U] = Yd12_out;
401
402 i0 += n1;
403 } while (i0 < fftLen);
404 j++;
405 } while (j <= (n2 - 1U));
406 twidCoefModifier <<= 2U;
407 }
408
409 j = fftLen >> 2;
410 ptr1 = &pSrc[0];
411
412 /* Calculations of last stage */
413 do
414 {
415 xaIn = ptr1[0];
416 yaIn = ptr1[1];
417 xbIn = ptr1[2];
418 ybIn = ptr1[3];
419 xcIn = ptr1[4];
420 ycIn = ptr1[5];
421 xdIn = ptr1[6];
422 ydIn = ptr1[7];
423
424 /* xa + xc */
425 Xaplusc = xaIn + xcIn;
426
427 /* xa - xc */
428 Xaminusc = xaIn - xcIn;
429
430 /* ya + yc */
431 Yaplusc = yaIn + ycIn;
432
433 /* ya - yc */
434 Yaminusc = yaIn - ycIn;
435
436 /* xb + xd */
437 Xbplusd = xbIn + xdIn;
438
439 /* yb + yd */
440 Ybplusd = ybIn + ydIn;
441
442 /* (xb-xd) */
443 Xbminusd = xbIn - xdIn;
444
445 /* (yb-yd) */
446 Ybminusd = ybIn - ydIn;
447
448 /* xa' = xa + xb + xc + xd */
449 a0 = (Xaplusc + Xbplusd);
450 /* ya' = ya + yb + yc + yd */
451 a1 = (Yaplusc + Ybplusd);
452 /* xc' = (xa-xb+xc-xd) */
453 a2 = (Xaplusc - Xbplusd);
454 /* yc' = (ya-yb+yc-yd) */
455 a3 = (Yaplusc - Ybplusd);
456 /* xb' = (xa+yb-xc-yd) */
457 a4 = (Xaminusc + Ybminusd);
458 /* yb' = (ya-xb-yc+xd) */
459 a5 = (Yaminusc - Xbminusd);
460 /* xd' = (xa-yb-xc+yd)) */
461 a6 = (Xaminusc - Ybminusd);
462 /* yd' = (ya+xb-yc-xd) */
463 a7 = (Xbminusd + Yaminusc);
464
465 ptr1[0] = a0;
466 ptr1[1] = a1;
467 ptr1[2] = a2;
468 ptr1[3] = a3;
469 ptr1[4] = a4;
470 ptr1[5] = a5;
471 ptr1[6] = a6;
472 ptr1[7] = a7;
473
474 /* increment pointer by 8 */
475 ptr1 += 8U;
476 } while (--j);
477
478 #else
479
480 float32_t t1, t2, r1, r2, s1, s2;
481
482 /* Initializations for the fft calculation */
483 n2 = fftLen;
484 n1 = n2;
485 for (k = fftLen; k > 1U; k >>= 2U)
486 {
487 /* Initializations for the fft calculation */
488 n1 = n2;
489 n2 >>= 2U;
490 ia1 = 0U;
491
492 /* FFT Calculation */
493 j = 0;
494 do
495 {
496 /* index calculation for the coefficients */
497 ia2 = ia1 + ia1;
498 ia3 = ia2 + ia1;
499 co1 = pCoef[ia1 * 2U];
500 si1 = pCoef[(ia1 * 2U) + 1U];
501 co2 = pCoef[ia2 * 2U];
502 si2 = pCoef[(ia2 * 2U) + 1U];
503 co3 = pCoef[ia3 * 2U];
504 si3 = pCoef[(ia3 * 2U) + 1U];
505
506 /* Twiddle coefficients index modifier */
507 ia1 = ia1 + twidCoefModifier;
508
509 i0 = j;
510 do
511 {
512 /* index calculation for the input as, */
513 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
514 i1 = i0 + n2;
515 i2 = i1 + n2;
516 i3 = i2 + n2;
517
518 /* xa + xc */
519 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
520
521 /* xa - xc */
522 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
523
524 /* ya + yc */
525 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
526
527 /* ya - yc */
528 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
529
530 /* xb + xd */
531 t1 = pSrc[2U * i1] + pSrc[2U * i3];
532
533 /* xa' = xa + xb + xc + xd */
534 pSrc[2U * i0] = r1 + t1;
535
536 /* xa + xc -(xb + xd) */
537 r1 = r1 - t1;
538
539 /* yb + yd */
540 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
541
542 /* ya' = ya + yb + yc + yd */
543 pSrc[(2U * i0) + 1U] = s1 + t2;
544
545 /* (ya + yc) - (yb + yd) */
546 s1 = s1 - t2;
547
548 /* (yb - yd) */
549 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
550
551 /* (xb - xd) */
552 t2 = pSrc[2U * i1] - pSrc[2U * i3];
553
554 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
555 pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
556
557 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
558 pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
559
560 /* (xa - xc) + (yb - yd) */
561 r1 = r2 + t1;
562
563 /* (xa - xc) - (yb - yd) */
564 r2 = r2 - t1;
565
566 /* (ya - yc) - (xb - xd) */
567 s1 = s2 - t2;
568
569 /* (ya - yc) + (xb - xd) */
570 s2 = s2 + t2;
571
572 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
573 pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
574
575 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
576 pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
577
578 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
579 pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
580
581 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
582 pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
583
584 i0 += n1;
585 } while ( i0 < fftLen);
586 j++;
587 } while (j <= (n2 - 1U));
588 twidCoefModifier <<= 2U;
589 }
590
591 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
592
593 }
594
595 /**
596 brief Core function for the floating-point CIFFT butterfly process.
597 param[in,out] pSrc points to the in-place buffer of floating-point data type
598 param[in] fftLen length of the FFT
599 param[in] pCoef points to twiddle coefficient buffer
600 param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
601 param[in] onebyfftLen value of 1/fftLen
602 return none
603 */
604
arm_radix4_butterfly_inverse_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier,float32_t onebyfftLen)605 void arm_radix4_butterfly_inverse_f32(
606 float32_t * pSrc,
607 uint16_t fftLen,
608 const float32_t * pCoef,
609 uint16_t twidCoefModifier,
610 float32_t onebyfftLen)
611 {
612 float32_t co1, co2, co3, si1, si2, si3;
613 uint32_t ia1, ia2, ia3;
614 uint32_t i0, i1, i2, i3;
615 uint32_t n1, n2, j, k;
616
617 #if defined (ARM_MATH_LOOPUNROLL)
618
619 float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
620 float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
621 Ybminusd;
622 float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
623 float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
624 float32_t *ptr1;
625 float32_t p0,p1,p2,p3,p4,p5,p6,p7;
626 float32_t a0,a1,a2,a3,a4,a5,a6,a7;
627
628
629 /* Initializations for the first stage */
630 n2 = fftLen;
631 n1 = n2;
632
633 /* n2 = fftLen/4 */
634 n2 >>= 2U;
635 i0 = 0U;
636 ia1 = 0U;
637
638 j = n2;
639
640 /* Calculation of first stage */
641 do
642 {
643 /* index calculation for the input as, */
644 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
645 i1 = i0 + n2;
646 i2 = i1 + n2;
647 i3 = i2 + n2;
648
649 /* Butterfly implementation */
650 xaIn = pSrc[(2U * i0)];
651 yaIn = pSrc[(2U * i0) + 1U];
652
653 xcIn = pSrc[(2U * i2)];
654 ycIn = pSrc[(2U * i2) + 1U];
655
656 xbIn = pSrc[(2U * i1)];
657 ybIn = pSrc[(2U * i1) + 1U];
658
659 xdIn = pSrc[(2U * i3)];
660 ydIn = pSrc[(2U * i3) + 1U];
661
662 /* xa + xc */
663 Xaplusc = xaIn + xcIn;
664 /* xb + xd */
665 Xbplusd = xbIn + xdIn;
666 /* ya + yc */
667 Yaplusc = yaIn + ycIn;
668 /* yb + yd */
669 Ybplusd = ybIn + ydIn;
670
671 /* index calculation for the coefficients */
672 ia2 = ia1 + ia1;
673 co2 = pCoef[ia2 * 2U];
674 si2 = pCoef[(ia2 * 2U) + 1U];
675
676 /* xa - xc */
677 Xaminusc = xaIn - xcIn;
678 /* xb - xd */
679 Xbminusd = xbIn - xdIn;
680 /* ya - yc */
681 Yaminusc = yaIn - ycIn;
682 /* yb - yd */
683 Ybminusd = ybIn - ydIn;
684
685 /* xa' = xa + xb + xc + xd */
686 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
687
688 /* ya' = ya + yb + yc + yd */
689 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
690
691 /* (xa - xc) - (yb - yd) */
692 Xb12C_out = (Xaminusc - Ybminusd);
693 /* (ya - yc) + (xb - xd) */
694 Yb12C_out = (Yaminusc + Xbminusd);
695 /* (xa + xc) - (xb + xd) */
696 Xc12C_out = (Xaplusc - Xbplusd);
697 /* (ya + yc) - (yb + yd) */
698 Yc12C_out = (Yaplusc - Ybplusd);
699 /* (xa - xc) + (yb - yd) */
700 Xd12C_out = (Xaminusc + Ybminusd);
701 /* (ya - yc) - (xb - xd) */
702 Yd12C_out = (Yaminusc - Xbminusd);
703
704 co1 = pCoef[ia1 * 2U];
705 si1 = pCoef[(ia1 * 2U) + 1U];
706
707 /* index calculation for the coefficients */
708 ia3 = ia2 + ia1;
709 co3 = pCoef[ia3 * 2U];
710 si3 = pCoef[(ia3 * 2U) + 1U];
711
712 Xb12_out = Xb12C_out * co1;
713 Yb12_out = Yb12C_out * co1;
714 Xc12_out = Xc12C_out * co2;
715 Yc12_out = Yc12C_out * co2;
716 Xd12_out = Xd12C_out * co3;
717 Yd12_out = Yd12C_out * co3;
718
719 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
720 //Xb12_out -= Yb12C_out * si1;
721 p0 = Yb12C_out * si1;
722 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
723 //Yb12_out += Xb12C_out * si1;
724 p1 = Xb12C_out * si1;
725 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
726 //Xc12_out -= Yc12C_out * si2;
727 p2 = Yc12C_out * si2;
728 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
729 //Yc12_out += Xc12C_out * si2;
730 p3 = Xc12C_out * si2;
731 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
732 //Xd12_out -= Yd12C_out * si3;
733 p4 = Yd12C_out * si3;
734 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
735 //Yd12_out += Xd12C_out * si3;
736 p5 = Xd12C_out * si3;
737
738 Xb12_out -= p0;
739 Yb12_out += p1;
740 Xc12_out -= p2;
741 Yc12_out += p3;
742 Xd12_out -= p4;
743 Yd12_out += p5;
744
745 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
746 pSrc[2U * i1] = Xc12_out;
747
748 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
749 pSrc[(2U * i1) + 1U] = Yc12_out;
750
751 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
752 pSrc[2U * i2] = Xb12_out;
753
754 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
755 pSrc[(2U * i2) + 1U] = Yb12_out;
756
757 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
758 pSrc[2U * i3] = Xd12_out;
759
760 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
761 pSrc[(2U * i3) + 1U] = Yd12_out;
762
763 /* Twiddle coefficients index modifier */
764 ia1 = ia1 + twidCoefModifier;
765
766 /* Updating input index */
767 i0 = i0 + 1U;
768
769 } while (--j);
770
771 twidCoefModifier <<= 2U;
772
773 /* Calculation of second stage to excluding last stage */
774 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
775 {
776 /* Initializations for the first stage */
777 n1 = n2;
778 n2 >>= 2U;
779 ia1 = 0U;
780
781 /* Calculation of first stage */
782 j = 0;
783 do
784 {
785 /* index calculation for the coefficients */
786 ia2 = ia1 + ia1;
787 ia3 = ia2 + ia1;
788 co1 = pCoef[ia1 * 2U];
789 si1 = pCoef[(ia1 * 2U) + 1U];
790 co2 = pCoef[ia2 * 2U];
791 si2 = pCoef[(ia2 * 2U) + 1U];
792 co3 = pCoef[ia3 * 2U];
793 si3 = pCoef[(ia3 * 2U) + 1U];
794
795 /* Twiddle coefficients index modifier */
796 ia1 = ia1 + twidCoefModifier;
797
798 i0 = j;
799 do
800 {
801 /* index calculation for the input as, */
802 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
803 i1 = i0 + n2;
804 i2 = i1 + n2;
805 i3 = i2 + n2;
806
807 xaIn = pSrc[(2U * i0)];
808 yaIn = pSrc[(2U * i0) + 1U];
809
810 xbIn = pSrc[(2U * i1)];
811 ybIn = pSrc[(2U * i1) + 1U];
812
813 xcIn = pSrc[(2U * i2)];
814 ycIn = pSrc[(2U * i2) + 1U];
815
816 xdIn = pSrc[(2U * i3)];
817 ydIn = pSrc[(2U * i3) + 1U];
818
819 /* xa - xc */
820 Xaminusc = xaIn - xcIn;
821 /* (xb - xd) */
822 Xbminusd = xbIn - xdIn;
823 /* ya - yc */
824 Yaminusc = yaIn - ycIn;
825 /* (yb - yd) */
826 Ybminusd = ybIn - ydIn;
827
828 /* xa + xc */
829 Xaplusc = xaIn + xcIn;
830 /* xb + xd */
831 Xbplusd = xbIn + xdIn;
832 /* ya + yc */
833 Yaplusc = yaIn + ycIn;
834 /* yb + yd */
835 Ybplusd = ybIn + ydIn;
836
837 /* (xa - xc) - (yb - yd) */
838 Xb12C_out = (Xaminusc - Ybminusd);
839 /* (ya - yc) + (xb - xd) */
840 Yb12C_out = (Yaminusc + Xbminusd);
841 /* xa + xc -(xb + xd) */
842 Xc12C_out = (Xaplusc - Xbplusd);
843 /* (ya + yc) - (yb + yd) */
844 Yc12C_out = (Yaplusc - Ybplusd);
845 /* (xa - xc) + (yb - yd) */
846 Xd12C_out = (Xaminusc + Ybminusd);
847 /* (ya - yc) - (xb - xd) */
848 Yd12C_out = (Yaminusc - Xbminusd);
849
850 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
851 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
852
853 Xb12_out = Xb12C_out * co1;
854 Yb12_out = Yb12C_out * co1;
855 Xc12_out = Xc12C_out * co2;
856 Yc12_out = Yc12C_out * co2;
857 Xd12_out = Xd12C_out * co3;
858 Yd12_out = Yd12C_out * co3;
859
860 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
861 //Xb12_out -= Yb12C_out * si1;
862 p0 = Yb12C_out * si1;
863 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
864 //Yb12_out += Xb12C_out * si1;
865 p1 = Xb12C_out * si1;
866 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
867 //Xc12_out -= Yc12C_out * si2;
868 p2 = Yc12C_out * si2;
869 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
870 //Yc12_out += Xc12C_out * si2;
871 p3 = Xc12C_out * si2;
872 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
873 //Xd12_out -= Yd12C_out * si3;
874 p4 = Yd12C_out * si3;
875 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
876 //Yd12_out += Xd12C_out * si3;
877 p5 = Xd12C_out * si3;
878
879 Xb12_out -= p0;
880 Yb12_out += p1;
881 Xc12_out -= p2;
882 Yc12_out += p3;
883 Xd12_out -= p4;
884 Yd12_out += p5;
885
886 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
887 pSrc[2U * i1] = Xc12_out;
888
889 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
890 pSrc[(2U * i1) + 1U] = Yc12_out;
891
892 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
893 pSrc[2U * i2] = Xb12_out;
894
895 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
896 pSrc[(2U * i2) + 1U] = Yb12_out;
897
898 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
899 pSrc[2U * i3] = Xd12_out;
900
901 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
902 pSrc[(2U * i3) + 1U] = Yd12_out;
903
904 i0 += n1;
905 } while (i0 < fftLen);
906 j++;
907 } while (j <= (n2 - 1U));
908 twidCoefModifier <<= 2U;
909 }
910 /* Initializations of last stage */
911
912 j = fftLen >> 2;
913 ptr1 = &pSrc[0];
914
915 /* Calculations of last stage */
916 do
917 {
918 xaIn = ptr1[0];
919 yaIn = ptr1[1];
920 xbIn = ptr1[2];
921 ybIn = ptr1[3];
922 xcIn = ptr1[4];
923 ycIn = ptr1[5];
924 xdIn = ptr1[6];
925 ydIn = ptr1[7];
926
927 /* Butterfly implementation */
928 /* xa + xc */
929 Xaplusc = xaIn + xcIn;
930
931 /* xa - xc */
932 Xaminusc = xaIn - xcIn;
933
934 /* ya + yc */
935 Yaplusc = yaIn + ycIn;
936
937 /* ya - yc */
938 Yaminusc = yaIn - ycIn;
939
940 /* xb + xd */
941 Xbplusd = xbIn + xdIn;
942
943 /* yb + yd */
944 Ybplusd = ybIn + ydIn;
945
946 /* (xb-xd) */
947 Xbminusd = xbIn - xdIn;
948
949 /* (yb-yd) */
950 Ybminusd = ybIn - ydIn;
951
952 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
953 a0 = (Xaplusc + Xbplusd);
954 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
955 a1 = (Yaplusc + Ybplusd);
956 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
957 a2 = (Xaplusc - Xbplusd);
958 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
959 a3 = (Yaplusc - Ybplusd);
960 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
961 a4 = (Xaminusc - Ybminusd);
962 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
963 a5 = (Yaminusc + Xbminusd);
964 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
965 a6 = (Xaminusc + Ybminusd);
966 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
967 a7 = (Yaminusc - Xbminusd);
968
969 p0 = a0 * onebyfftLen;
970 p1 = a1 * onebyfftLen;
971 p2 = a2 * onebyfftLen;
972 p3 = a3 * onebyfftLen;
973 p4 = a4 * onebyfftLen;
974 p5 = a5 * onebyfftLen;
975 p6 = a6 * onebyfftLen;
976 p7 = a7 * onebyfftLen;
977
978 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
979 ptr1[0] = p0;
980 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
981 ptr1[1] = p1;
982 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
983 ptr1[2] = p2;
984 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
985 ptr1[3] = p3;
986 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
987 ptr1[4] = p4;
988 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
989 ptr1[5] = p5;
990 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
991 ptr1[6] = p6;
992 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
993 ptr1[7] = p7;
994
995 /* increment source pointer by 8 for next calculations */
996 ptr1 = ptr1 + 8U;
997
998 } while (--j);
999
1000 #else
1001
1002 float32_t t1, t2, r1, r2, s1, s2;
1003
1004 /* Initializations for the first stage */
1005 n2 = fftLen;
1006 n1 = n2;
1007
1008 /* Calculation of first stage */
1009 for (k = fftLen; k > 4U; k >>= 2U)
1010 {
1011 /* Initializations for the first stage */
1012 n1 = n2;
1013 n2 >>= 2U;
1014 ia1 = 0U;
1015
1016 /* Calculation of first stage */
1017 j = 0;
1018 do
1019 {
1020 /* index calculation for the coefficients */
1021 ia2 = ia1 + ia1;
1022 ia3 = ia2 + ia1;
1023 co1 = pCoef[ia1 * 2U];
1024 si1 = pCoef[(ia1 * 2U) + 1U];
1025 co2 = pCoef[ia2 * 2U];
1026 si2 = pCoef[(ia2 * 2U) + 1U];
1027 co3 = pCoef[ia3 * 2U];
1028 si3 = pCoef[(ia3 * 2U) + 1U];
1029
1030 /* Twiddle coefficients index modifier */
1031 ia1 = ia1 + twidCoefModifier;
1032
1033 i0 = j;
1034 do
1035 {
1036 /* index calculation for the input as, */
1037 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1038 i1 = i0 + n2;
1039 i2 = i1 + n2;
1040 i3 = i2 + n2;
1041
1042 /* xa + xc */
1043 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1044
1045 /* xa - xc */
1046 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1047
1048 /* ya + yc */
1049 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1050
1051 /* ya - yc */
1052 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1053
1054 /* xb + xd */
1055 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1056
1057 /* xa' = xa + xb + xc + xd */
1058 pSrc[2U * i0] = r1 + t1;
1059
1060 /* xa + xc -(xb + xd) */
1061 r1 = r1 - t1;
1062
1063 /* yb + yd */
1064 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1065
1066 /* ya' = ya + yb + yc + yd */
1067 pSrc[(2U * i0) + 1U] = s1 + t2;
1068
1069 /* (ya + yc) - (yb + yd) */
1070 s1 = s1 - t2;
1071
1072 /* (yb - yd) */
1073 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1074
1075 /* (xb - xd) */
1076 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1077
1078 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1079 pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1080
1081 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1082 pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1083
1084 /* (xa - xc) - (yb - yd) */
1085 r1 = r2 - t1;
1086
1087 /* (xa - xc) + (yb - yd) */
1088 r2 = r2 + t1;
1089
1090 /* (ya - yc) + (xb - xd) */
1091 s1 = s2 + t2;
1092
1093 /* (ya - yc) - (xb - xd) */
1094 s2 = s2 - t2;
1095
1096 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1097 pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1098
1099 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1100 pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1101
1102 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1103 pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1104
1105 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1106 pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1107
1108 i0 += n1;
1109 } while ( i0 < fftLen);
1110 j++;
1111 } while (j <= (n2 - 1U));
1112 twidCoefModifier <<= 2U;
1113 }
1114 /* Initializations of last stage */
1115 n1 = n2;
1116 n2 >>= 2U;
1117
1118 /* Calculations of last stage */
1119 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1120 {
1121 /* index calculation for the input as, */
1122 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1123 i1 = i0 + n2;
1124 i2 = i1 + n2;
1125 i3 = i2 + n2;
1126
1127 /* Butterfly implementation */
1128 /* xa + xc */
1129 r1 = pSrc[2U * i0] + pSrc[2U * i2];
1130
1131 /* xa - xc */
1132 r2 = pSrc[2U * i0] - pSrc[2U * i2];
1133
1134 /* ya + yc */
1135 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1136
1137 /* ya - yc */
1138 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1139
1140 /* xc + xd */
1141 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1142
1143 /* xa' = xa + xb + xc + xd */
1144 pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1145
1146 /* (xa + xb) - (xc + xd) */
1147 r1 = r1 - t1;
1148
1149 /* yb + yd */
1150 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1151
1152 /* ya' = ya + yb + yc + yd */
1153 pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1154
1155 /* (ya + yc) - (yb + yd) */
1156 s1 = s1 - t2;
1157
1158 /* (yb-yd) */
1159 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1160
1161 /* (xb-xd) */
1162 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1163
1164 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1165 pSrc[2U * i1] = r1 * onebyfftLen;
1166
1167 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1168 pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1169
1170 /* (xa - xc) - (yb-yd) */
1171 r1 = r2 - t1;
1172
1173 /* (xa - xc) + (yb-yd) */
1174 r2 = r2 + t1;
1175
1176 /* (ya - yc) + (xb-xd) */
1177 s1 = s2 + t2;
1178
1179 /* (ya - yc) - (xb-xd) */
1180 s2 = s2 - t2;
1181
1182 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1183 pSrc[2U * i2] = r1 * onebyfftLen;
1184
1185 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1186 pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1187
1188 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1189 pSrc[2U * i3] = r2 * onebyfftLen;
1190
1191 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1192 pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1193 }
1194
1195 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
1196 }
1197
1198
1199