1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cfft_radix8_f32.c
4  * Description:  Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions.h"
30 
31 
32 /* ----------------------------------------------------------------------
33  * Internal helper function used by the FFTs
34  * -------------------------------------------------------------------- */
35 
36 /**
37   brief         Core function for the floating-point CFFT butterfly process.
38   param[in,out] pSrc             points to the in-place buffer of floating-point data type.
39   param[in]     fftLen           length of the FFT.
40   param[in]     pCoef            points to the twiddle coefficient buffer.
41   param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
42   return        none
43 */
44 
arm_radix8_butterfly_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier)45 void arm_radix8_butterfly_f32(
46   float32_t * pSrc,
47   uint16_t fftLen,
48   const float32_t * pCoef,
49   uint16_t twidCoefModifier)
50 {
51    uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
52    uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
53    uint32_t id;
54    uint32_t n1, n2, j;
55 
56    float32_t r1, r2, r3, r4, r5, r6, r7, r8;
57    float32_t t1, t2;
58    float32_t s1, s2, s3, s4, s5, s6, s7, s8;
59    float32_t p1, p2, p3, p4;
60    float32_t co2, co3, co4, co5, co6, co7, co8;
61    float32_t si2, si3, si4, si5, si6, si7, si8;
62    const float32_t C81 = 0.70710678118f;
63 
64    n2 = fftLen;
65 
66    do
67    {
68       n1 = n2;
69       n2 = n2 >> 3;
70       i1 = 0;
71 
72       do
73       {
74          i2 = i1 + n2;
75          i3 = i2 + n2;
76          i4 = i3 + n2;
77          i5 = i4 + n2;
78          i6 = i5 + n2;
79          i7 = i6 + n2;
80          i8 = i7 + n2;
81          r1 = pSrc[2 * i1] + pSrc[2 * i5];
82          r5 = pSrc[2 * i1] - pSrc[2 * i5];
83          r2 = pSrc[2 * i2] + pSrc[2 * i6];
84          r6 = pSrc[2 * i2] - pSrc[2 * i6];
85          r3 = pSrc[2 * i3] + pSrc[2 * i7];
86          r7 = pSrc[2 * i3] - pSrc[2 * i7];
87          r4 = pSrc[2 * i4] + pSrc[2 * i8];
88          r8 = pSrc[2 * i4] - pSrc[2 * i8];
89          t1 = r1 - r3;
90          r1 = r1 + r3;
91          r3 = r2 - r4;
92          r2 = r2 + r4;
93          pSrc[2 * i1] = r1 + r2;
94          pSrc[2 * i5] = r1 - r2;
95          r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
96          s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
97          r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
98          s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
99          s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
100          s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
101          r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
102          s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
103          t2 = r1 - s3;
104          r1 = r1 + s3;
105          s3 = r2 - r4;
106          r2 = r2 + r4;
107          pSrc[2 * i1 + 1] = r1 + r2;
108          pSrc[2 * i5 + 1] = r1 - r2;
109          pSrc[2 * i3]     = t1 + s3;
110          pSrc[2 * i7]     = t1 - s3;
111          pSrc[2 * i3 + 1] = t2 - r3;
112          pSrc[2 * i7 + 1] = t2 + r3;
113          r1 = (r6 - r8) * C81;
114          r6 = (r6 + r8) * C81;
115          r2 = (s6 - s8) * C81;
116          s6 = (s6 + s8) * C81;
117          t1 = r5 - r1;
118          r5 = r5 + r1;
119          r8 = r7 - r6;
120          r7 = r7 + r6;
121          t2 = s5 - r2;
122          s5 = s5 + r2;
123          s8 = s7 - s6;
124          s7 = s7 + s6;
125          pSrc[2 * i2]     = r5 + s7;
126          pSrc[2 * i8]     = r5 - s7;
127          pSrc[2 * i6]     = t1 + s8;
128          pSrc[2 * i4]     = t1 - s8;
129          pSrc[2 * i2 + 1] = s5 - r7;
130          pSrc[2 * i8 + 1] = s5 + r7;
131          pSrc[2 * i6 + 1] = t2 - r8;
132          pSrc[2 * i4 + 1] = t2 + r8;
133 
134          i1 += n1;
135       } while (i1 < fftLen);
136 
137       if (n2 < 8)
138          break;
139 
140       ia1 = 0;
141       j = 1;
142 
143       do
144       {
145          /*  index calculation for the coefficients */
146          id  = ia1 + twidCoefModifier;
147          ia1 = id;
148          ia2 = ia1 + id;
149          ia3 = ia2 + id;
150          ia4 = ia3 + id;
151          ia5 = ia4 + id;
152          ia6 = ia5 + id;
153          ia7 = ia6 + id;
154 
155          co2 = pCoef[2 * ia1];
156          co3 = pCoef[2 * ia2];
157          co4 = pCoef[2 * ia3];
158          co5 = pCoef[2 * ia4];
159          co6 = pCoef[2 * ia5];
160          co7 = pCoef[2 * ia6];
161          co8 = pCoef[2 * ia7];
162          si2 = pCoef[2 * ia1 + 1];
163          si3 = pCoef[2 * ia2 + 1];
164          si4 = pCoef[2 * ia3 + 1];
165          si5 = pCoef[2 * ia4 + 1];
166          si6 = pCoef[2 * ia5 + 1];
167          si7 = pCoef[2 * ia6 + 1];
168          si8 = pCoef[2 * ia7 + 1];
169 
170          i1 = j;
171 
172          do
173          {
174             /*  index calculation for the input */
175             i2 = i1 + n2;
176             i3 = i2 + n2;
177             i4 = i3 + n2;
178             i5 = i4 + n2;
179             i6 = i5 + n2;
180             i7 = i6 + n2;
181             i8 = i7 + n2;
182             r1 = pSrc[2 * i1] + pSrc[2 * i5];
183             r5 = pSrc[2 * i1] - pSrc[2 * i5];
184             r2 = pSrc[2 * i2] + pSrc[2 * i6];
185             r6 = pSrc[2 * i2] - pSrc[2 * i6];
186             r3 = pSrc[2 * i3] + pSrc[2 * i7];
187             r7 = pSrc[2 * i3] - pSrc[2 * i7];
188             r4 = pSrc[2 * i4] + pSrc[2 * i8];
189             r8 = pSrc[2 * i4] - pSrc[2 * i8];
190             t1 = r1 - r3;
191             r1 = r1 + r3;
192             r3 = r2 - r4;
193             r2 = r2 + r4;
194             pSrc[2 * i1] = r1 + r2;
195             r2 = r1 - r2;
196             s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
197             s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
198             s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
199             s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
200             s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
201             s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
202             s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
203             s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
204             t2 = s1 - s3;
205             s1 = s1 + s3;
206             s3 = s2 - s4;
207             s2 = s2 + s4;
208             r1 = t1 + s3;
209             t1 = t1 - s3;
210             pSrc[2 * i1 + 1] = s1 + s2;
211             s2 = s1 - s2;
212             s1 = t2 - r3;
213             t2 = t2 + r3;
214             p1 = co5 * r2;
215             p2 = si5 * s2;
216             p3 = co5 * s2;
217             p4 = si5 * r2;
218             pSrc[2 * i5]     = p1 + p2;
219             pSrc[2 * i5 + 1] = p3 - p4;
220             p1 = co3 * r1;
221             p2 = si3 * s1;
222             p3 = co3 * s1;
223             p4 = si3 * r1;
224             pSrc[2 * i3]     = p1 + p2;
225             pSrc[2 * i3 + 1] = p3 - p4;
226             p1 = co7 * t1;
227             p2 = si7 * t2;
228             p3 = co7 * t2;
229             p4 = si7 * t1;
230             pSrc[2 * i7]     = p1 + p2;
231             pSrc[2 * i7 + 1] = p3 - p4;
232             r1 = (r6 - r8) * C81;
233             r6 = (r6 + r8) * C81;
234             s1 = (s6 - s8) * C81;
235             s6 = (s6 + s8) * C81;
236             t1 = r5 - r1;
237             r5 = r5 + r1;
238             r8 = r7 - r6;
239             r7 = r7 + r6;
240             t2 = s5 - s1;
241             s5 = s5 + s1;
242             s8 = s7 - s6;
243             s7 = s7 + s6;
244             r1 = r5 + s7;
245             r5 = r5 - s7;
246             r6 = t1 + s8;
247             t1 = t1 - s8;
248             s1 = s5 - r7;
249             s5 = s5 + r7;
250             s6 = t2 - r8;
251             t2 = t2 + r8;
252             p1 = co2 * r1;
253             p2 = si2 * s1;
254             p3 = co2 * s1;
255             p4 = si2 * r1;
256             pSrc[2 * i2]     = p1 + p2;
257             pSrc[2 * i2 + 1] = p3 - p4;
258             p1 = co8 * r5;
259             p2 = si8 * s5;
260             p3 = co8 * s5;
261             p4 = si8 * r5;
262             pSrc[2 * i8]     = p1 + p2;
263             pSrc[2 * i8 + 1] = p3 - p4;
264             p1 = co6 * r6;
265             p2 = si6 * s6;
266             p3 = co6 * s6;
267             p4 = si6 * r6;
268             pSrc[2 * i6]     = p1 + p2;
269             pSrc[2 * i6 + 1] = p3 - p4;
270             p1 = co4 * t1;
271             p2 = si4 * t2;
272             p3 = co4 * t2;
273             p4 = si4 * t1;
274             pSrc[2 * i4]     = p1 + p2;
275             pSrc[2 * i4 + 1] = p3 - p4;
276 
277             i1 += n1;
278          } while (i1 < fftLen);
279 
280          j++;
281       } while (j < n2);
282 
283       twidCoefModifier <<= 3;
284    } while (n2 > 7);
285 }
286