1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_rfft_fast_f32.c
4  * Description:  RFFT & RIFFT Floating point process function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions.h"
30 
31 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
stage_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)32 void stage_rfft_f32(
33   const arm_rfft_fast_instance_f32 * S,
34         float32_t * p,
35         float32_t * pOut)
36 {
37         int32_t  k;                                /* Loop Counter */
38         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
39   const float32_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
40         float32_t *pA = p;                          /* increasing pointer */
41         float32_t *pB = p;                          /* decreasing pointer */
42         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
43         float32_t t1a, t1b;                         /* temporary variables */
44         float32_t p0, p1, p2, p3;                   /* temporary variables */
45 
46         float32x4x2_t tw,xA,xB;
47         float32x4x2_t tmp1, tmp2, res;
48 
49         uint32x4_t     vecStridesFwd, vecStridesBkwd;
50 
51         vecStridesFwd = vidupq_u32((uint32_t)0, 2);
52         vecStridesBkwd = -vecStridesFwd;
53 
54         int blockCnt;
55 
56 
57    k = (S->Sint).fftLen - 1;
58 
59    /* Pack first and last sample of the frequency domain together */
60 
61    xBR = pB[0];
62    xBI = pB[1];
63    xAR = pA[0];
64    xAI = pA[1];
65 
66    twR = *pCoeff++ ;
67    twI = *pCoeff++ ;
68 
69    // U1 = XA(1) + XB(1); % It is real
70    t1a = xBR + xAR  ;
71 
72    // U2 = XB(1) - XA(1); % It is imaginary
73    t1b = xBI + xAI  ;
74 
75    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
76    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
77    *pOut++ = 0.5f * ( t1a + t1b );
78    *pOut++ = 0.5f * ( t1a - t1b );
79 
80    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
81    pB  = p + 2*k;
82    pA += 2;
83 
84    blockCnt = k >> 2;
85    while (blockCnt > 0)
86    {
87       /*
88          function X = my_split_rfft(X, ifftFlag)
89          % X is a series of real numbers
90          L  = length(X);
91          XC = X(1:2:end) +i*X(2:2:end);
92          XA = fft(XC);
93          XB = conj(XA([1 end:-1:2]));
94          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
95          for l = 2:L/2
96             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
97          end
98          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
99          X = XA;
100       */
101 
102 
103       xA = vld2q_f32(pA);
104       pA += 8;
105 
106       xB = vld2q_f32(pB);
107 
108       xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
109       xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
110 
111       xB.val[1] = vnegq_f32(xB.val[1]);
112       pB -= 8;
113 
114 
115       tw = vld2q_f32(pCoeff);
116       pCoeff += 8;
117 
118 
119       tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
120       tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
121 
122       tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
123       tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
124 
125       res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
126       res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
127 
128       res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
129       res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
130 
131       res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
132       res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
133 
134       res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
135       res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
136 
137 
138       vst2q_f32(pOut, res);
139       pOut += 8;
140 
141 
142       blockCnt--;
143    }
144 
145    blockCnt = k & 3;
146    while (blockCnt > 0)
147    {
148       /*
149          function X = my_split_rfft(X, ifftFlag)
150          % X is a series of real numbers
151          L  = length(X);
152          XC = X(1:2:end) +i*X(2:2:end);
153          XA = fft(XC);
154          XB = conj(XA([1 end:-1:2]));
155          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
156          for l = 2:L/2
157             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
158          end
159          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
160          X = XA;
161       */
162 
163       xBI = pB[1];
164       xBR = pB[0];
165       xAR = pA[0];
166       xAI = pA[1];
167 
168       twR = *pCoeff++;
169       twI = *pCoeff++;
170 
171       t1a = xBR - xAR ;
172       t1b = xBI + xAI ;
173 
174       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
175       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
176       p0 = twR * t1a;
177       p1 = twI * t1a;
178       p2 = twR * t1b;
179       p3 = twI * t1b;
180 
181       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
182       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
183 
184       pA += 2;
185       pB -= 2;
186       blockCnt--;
187    }
188 }
189 
190 /* Prepares data for inverse cfft */
merge_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)191 void merge_rfft_f32(
192   const arm_rfft_fast_instance_f32 * S,
193         float32_t * p,
194         float32_t * pOut)
195 {
196         int32_t  k;                                /* Loop Counter */
197         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
198   const float32_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
199         float32_t *pA = p;                          /* increasing pointer */
200         float32_t *pB = p;                          /* decreasing pointer */
201         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
202         float32_t t1a, t1b, r, s, t, u;             /* temporary variables */
203 
204         float32x4x2_t tw,xA,xB;
205         float32x4x2_t tmp1, tmp2, res;
206         uint32x4_t     vecStridesFwd, vecStridesBkwd;
207 
208         vecStridesFwd = vidupq_u32((uint32_t)0, 2);
209         vecStridesBkwd = -vecStridesFwd;
210 
211         int blockCnt;
212 
213 
214    k = (S->Sint).fftLen - 1;
215 
216    xAR = pA[0];
217    xAI = pA[1];
218 
219    pCoeff += 2 ;
220 
221    *pOut++ = 0.5f * ( xAR + xAI );
222    *pOut++ = 0.5f * ( xAR - xAI );
223 
224    pB  =  p + 2*k ;
225    pA +=  2    ;
226 
227    blockCnt = k >> 2;
228    while (blockCnt > 0)
229    {
230       /* G is half of the frequency complex spectrum */
231       //for k = 2:N
232       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
233       xA = vld2q_f32(pA);
234       pA += 8;
235 
236       xB = vld2q_f32(pB);
237 
238       xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
239       xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
240 
241       xB.val[1] = vnegq_f32(xB.val[1]);
242       pB -= 8;
243 
244 
245       tw = vld2q_f32(pCoeff);
246       tw.val[1] = vnegq_f32(tw.val[1]);
247       pCoeff += 8;
248 
249 
250       tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
251       tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
252 
253       tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
254       tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
255 
256       res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
257       res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
258 
259       res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
260       res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
261 
262       res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
263       res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
264 
265       res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
266       res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
267 
268 
269       vst2q_f32(pOut, res);
270       pOut += 8;
271 
272 
273       blockCnt--;
274    }
275 
276    blockCnt = k & 3;
277    while (blockCnt > 0)
278    {
279       /* G is half of the frequency complex spectrum */
280       //for k = 2:N
281       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
282       xBI =   pB[1]    ;
283       xBR =   pB[0]    ;
284       xAR =  pA[0];
285       xAI =  pA[1];
286 
287       twR = *pCoeff++;
288       twI = *pCoeff++;
289 
290       t1a = xAR - xBR ;
291       t1b = xAI + xBI ;
292 
293       r = twR * t1a;
294       s = twI * t1b;
295       t = twI * t1a;
296       u = twR * t1b;
297 
298       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
299       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
300       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
301       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
302 
303       pA += 2;
304       pB -= 2;
305       blockCnt--;
306    }
307 
308 }
309 #else
stage_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)310 void stage_rfft_f32(
311   const arm_rfft_fast_instance_f32 * S,
312         float32_t * p,
313         float32_t * pOut)
314 {
315         int32_t  k;                                /* Loop Counter */
316         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
317   const float32_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
318         float32_t *pA = p;                          /* increasing pointer */
319         float32_t *pB = p;                          /* decreasing pointer */
320         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
321         float32_t t1a, t1b;                         /* temporary variables */
322         float32_t p0, p1, p2, p3;                   /* temporary variables */
323 
324 
325    k = (S->Sint).fftLen - 1;
326 
327    /* Pack first and last sample of the frequency domain together */
328 
329    xBR = pB[0];
330    xBI = pB[1];
331    xAR = pA[0];
332    xAI = pA[1];
333 
334    twR = *pCoeff++ ;
335    twI = *pCoeff++ ;
336 
337 
338    // U1 = XA(1) + XB(1); % It is real
339    t1a = xBR + xAR  ;
340 
341    // U2 = XB(1) - XA(1); % It is imaginary
342    t1b = xBI + xAI  ;
343 
344    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
345    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
346    *pOut++ = 0.5f * ( t1a + t1b );
347    *pOut++ = 0.5f * ( t1a - t1b );
348 
349    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
350    pB  = p + 2*k;
351    pA += 2;
352 
353    do
354    {
355       /*
356          function X = my_split_rfft(X, ifftFlag)
357          % X is a series of real numbers
358          L  = length(X);
359          XC = X(1:2:end) +i*X(2:2:end);
360          XA = fft(XC);
361          XB = conj(XA([1 end:-1:2]));
362          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
363          for l = 2:L/2
364             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
365          end
366          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
367          X = XA;
368       */
369 
370       xBI = pB[1];
371       xBR = pB[0];
372       xAR = pA[0];
373       xAI = pA[1];
374 
375       twR = *pCoeff++;
376       twI = *pCoeff++;
377 
378       t1a = xBR - xAR ;
379       t1b = xBI + xAI ;
380 
381       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
382       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
383       p0 = twR * t1a;
384       p1 = twI * t1a;
385       p2 = twR * t1b;
386       p3 = twI * t1b;
387 
388       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
389       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
390 
391 
392       pA += 2;
393       pB -= 2;
394       k--;
395    } while (k > 0);
396 }
397 
398 /* Prepares data for inverse cfft */
merge_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)399 void merge_rfft_f32(
400   const arm_rfft_fast_instance_f32 * S,
401         float32_t * p,
402         float32_t * pOut)
403 {
404         int32_t  k;                                /* Loop Counter */
405         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
406   const float32_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
407         float32_t *pA = p;                          /* increasing pointer */
408         float32_t *pB = p;                          /* decreasing pointer */
409         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
410         float32_t t1a, t1b, r, s, t, u;             /* temporary variables */
411 
412    k = (S->Sint).fftLen - 1;
413 
414    xAR = pA[0];
415    xAI = pA[1];
416 
417    pCoeff += 2 ;
418 
419    *pOut++ = 0.5f * ( xAR + xAI );
420    *pOut++ = 0.5f * ( xAR - xAI );
421 
422    pB  =  p + 2*k ;
423    pA +=  2	   ;
424 
425    while (k > 0)
426    {
427       /* G is half of the frequency complex spectrum */
428       //for k = 2:N
429       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
430       xBI =   pB[1]    ;
431       xBR =   pB[0]    ;
432       xAR =  pA[0];
433       xAI =  pA[1];
434 
435       twR = *pCoeff++;
436       twI = *pCoeff++;
437 
438       t1a = xAR - xBR ;
439       t1b = xAI + xBI ;
440 
441       r = twR * t1a;
442       s = twI * t1b;
443       t = twI * t1a;
444       u = twR * t1b;
445 
446       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
447       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
448       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
449       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
450 
451       pA += 2;
452       pB -= 2;
453       k--;
454    }
455 
456 }
457 
458 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
459 
460 /**
461   @ingroup groupTransforms
462 */
463 
464 /**
465   @defgroup RealFFT Real FFT Functions
466 
467   @par
468                    The CMSIS DSP library includes specialized algorithms for computing the
469                    FFT of real data sequences.  The FFT is defined over complex data but
470                    in many applications the input is real.  Real FFT algorithms take advantage
471                    of the symmetry properties of the FFT and have a speed advantage over complex
472                    algorithms of the same length.
473   @par
474                    The Fast RFFT algorithm relays on the mixed radix CFFT that save processor usage.
475   @par
476                    The real length N forward FFT of a sequence is computed using the steps shown below.
477   @par
478                    \image html RFFT.gif "Real Fast Fourier Transform"
479   @par
480                    The real sequence is initially treated as if it were complex to perform a CFFT.
481                    Later, a processing stage reshapes the data to obtain half of the frequency spectrum
482                    in complex format.
483 
484   @par
485                    The input for the inverse RFFT should keep the same format as the output of the
486                    forward RFFT. A first processing stage pre-process the data to later perform an
487                    inverse CFFT.
488   @par
489                    \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
490   @par
491                    The algorithms for floating-point, Q15, and Q31 data are slightly different
492                    and we describe each algorithm in turn.
493   @par           Floating-point
494                    The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32().
495                    The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated
496                    but are still documented.
497                    For f16, the functions are \ref arm_rfft_fast_f16() and \ref arm_rfft_fast_init_f16().
498                    For f64, the functions are \ref arm_rfft_fast_f64() and \ref arm_rfft_fast_init_f64().
499   @par
500                    The FFT of a real N-point sequence has even symmetry in the frequency domain.
501                    The second half of the data equals the conjugate of the first half flipped in frequency.
502                    This conjugate part is not computed by the float RFFT. As consequence, the output of
503                    a N point real FFT should be a N//2 + 1 complex numbers so N + 2 floats.
504   @par
505                    It happens that the first complex of number of the RFFT output is actually
506                    all real. Its real part represents the DC offset.
507                    The value at Nyquist frequency is also real.
508 
509   @par
510                    Those two complex numbers can be encoded with 2 floats rather than using two numbers
511                    with an imaginary part set to zero.
512   @par
513                    The implementation is using a trick so that the output buffer can be N float :
514                    the last real is packaged in the imaginary part of the first complex (since
515                    this imaginary part is not used and is zero).
516 
517   @par
518                    The real FFT functions pack the frequency domain data in this fashion.
519                    The forward transform outputs the data in this form and the inverse
520                    transform expects input data in this form. The function always performs
521                    the needed bitreversal so that the input and output data is always in
522                    normal order. The functions support lengths of [32, 64, 128, ..., 4096]
523                    samples.
524   @par           Q15 and Q31
525                    The real algorithms are defined in a similar manner and utilize N/2 complex
526                    transforms behind the scenes.
527 
528   @par
529                    But warning, contrary to the float version, the fixed point implementation
530                    RFFT is also computing the conjugate part (except for MVE version) so the
531                    output buffer must be bigger.
532                    Also the fixed point RFFTs are not using any trick to pack the DC and Nyquist
533                    frequency in the same complex number.
534                    The RIFFT is not using the conjugate part but it is still using the Nyquist
535                    frequency value. The details are given in the documentation for the functions.
536   @par
537                    The complex transforms used internally include scaling to prevent fixed-point
538                    overflows.  The overall scaling equals 1/(fftLen/2).
539                    Due to the use of complex transform internally, the source buffer is
540                    modified by the rfft.
541   @par
542                    A separate instance structure must be defined for each transform used but
543                    twiddle factor and bit reversal tables can be reused.
544   @par
545                    There is also an associated initialization function for each data type.
546                    The initialization function performs the following operations:
547                     - Sets the values of the internal structure fields.
548                     - Initializes twiddle factor table and bit reversal table pointers.
549                     - Initializes the internal complex FFT data structure.
550   @par
551                    Use of the initialization function is optional **except for MVE versions where it is mandatory**.
552                    If you don't use the initialization functions, then the structures should be initialized with code
553                    similar to the one below:
554   <pre>
555       arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
556       arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
557   </pre>
558                    where <code>fftLenReal</code> is the length of the real transform;
559                    <code>fftLenBy2</code> length of  the internal complex transform (fftLenReal/2).
560                    <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
561                    <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
562                    output (=1).
563                    <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
564                    The value is based on the FFT length;
565                    <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
566                    <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
567                    <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
568                    must also be initialized.
569 @par
570                    Note that with MVE versions you can't initialize instance structures directly and **must
571                    use the initialization function**.
572  */
573 
574 /**
575   @defgroup DeprecatedRealFFT Deprecated Real FFT Functions
576 */
577 
578 /**
579   @defgroup RealFFTF32 Real FFT F32 Functions
580 */
581 /**
582   @addtogroup RealFFTF32
583   @{
584 */
585 
586 /**
587   @brief         Processing function for the floating-point real FFT.
588   @param[in]     S         points to an arm_rfft_fast_instance_f32 structure
589   @param[in]     p         points to input buffer (Source buffer is modified by this function.)
590   @param[in]     pOut      points to output buffer
591   @param[in]     ifftFlag
592                    - value = 0: RFFT
593                    - value = 1: RIFFT
594 */
595 
arm_rfft_fast_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut,uint8_t ifftFlag)596 void arm_rfft_fast_f32(
597   const arm_rfft_fast_instance_f32 * S,
598   float32_t * p,
599   float32_t * pOut,
600   uint8_t ifftFlag)
601 {
602    const arm_cfft_instance_f32 * Sint = &(S->Sint);
603 
604    /* Calculation of Real FFT */
605    if (ifftFlag)
606    {
607       /*  Real FFT compression */
608       merge_rfft_f32(S, p, pOut);
609       /* Complex radix-4 IFFT process */
610       arm_cfft_f32( Sint, pOut, ifftFlag, 1);
611    }
612    else
613    {
614       /* Calculation of RFFT of input */
615       arm_cfft_f32( Sint, p, ifftFlag, 1);
616 
617       /*  Real FFT extraction */
618       stage_rfft_f32(S, p, pOut);
619    }
620 }
621 
622 /**
623 * @} end of RRealFFTF16ealFFT group
624 */
625