1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_fir_decimate_f64.c
4  * Description:  FIR decimation for floating-point sequences
5  *
6  * $Date:        17 February 2024
7  * $Revision:    V1.16.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2024 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @addtogroup FIR_decimate
37   @{
38  */
39 
40 /**
41   @brief         Processing function for floating-point FIR decimator.
42   @param[in]     S         points to an instance of the floating-point FIR decimator structure
43   @param[in]     pSrc      points to the block of input data
44   @param[out]    pDst      points to the block of output data
45   @param[in]     blockSize number of input samples to process
46  */
47 
arm_fir_decimate_f64(const arm_fir_decimate_instance_f64 * S,const float64_t * pSrc,float64_t * pDst,uint32_t blockSize)48 ARM_DSP_ATTRIBUTE void arm_fir_decimate_f64(
49   const arm_fir_decimate_instance_f64 * S,
50   const float64_t * pSrc,
51         float64_t * pDst,
52         uint32_t blockSize)
53 {
54         float64_t *pState = S->pState;                 /* State pointer */
55   const float64_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
56         float64_t *pStateCur;                          /* Points to the current sample of the state */
57         float64_t *px0;                                /* Temporary pointer for state buffer */
58   const float64_t *pb;                                 /* Temporary pointer for coefficient buffer */
59         float64_t x0, c0;                              /* Temporary variables to hold state and coefficient values */
60         float64_t acc0;                                /* Accumulator */
61         uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
62         uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
63 
64 #if defined (ARM_MATH_LOOPUNROLL)
65         float64_t *px1, *px2, *px3;
66         float64_t x1, x2, x3;
67         float64_t acc1, acc2, acc3;
68 #endif
69 
70   /* S->pState buffer contains previous frame (numTaps - 1) samples */
71   /* pStateCur points to the location where the new input data should be written */
72   pStateCur = S->pState + (numTaps - 1U);
73 
74 #if defined (ARM_MATH_LOOPUNROLL)
75 
76     /* Loop unrolling: Compute 4 samples at a time */
77   blkCnt = outBlockSize >> 2U;
78 
79   /* Samples loop unrolled by 4 */
80   while (blkCnt > 0U)
81   {
82     /* Copy 4 * decimation factor number of new input samples into the state buffer */
83     i = S->M * 4;
84 
85     do
86     {
87       *pStateCur++ = *pSrc++;
88 
89     } while (--i);
90 
91     /* Set accumulators to zero */
92     acc0 = 0.0;
93     acc1 = 0.0;
94     acc2 = 0.0;
95     acc3 = 0.0;
96 
97     /* Initialize state pointer for all the samples */
98     px0 = pState;
99     px1 = pState + S->M;
100     px2 = pState + 2 * S->M;
101     px3 = pState + 3 * S->M;
102 
103     /* Initialize coeff pointer */
104     pb = pCoeffs;
105 
106     /* Loop unrolling: Compute 4 taps at a time */
107     tapCnt = numTaps >> 2U;
108 
109     while (tapCnt > 0U)
110     {
111       /* Read the b[numTaps-1] coefficient */
112       c0 = *(pb++);
113 
114       /* Read x[n-numTaps-1] sample for acc0 */
115       x0 = *(px0++);
116       /* Read x[n-numTaps-1] sample for acc1 */
117       x1 = *(px1++);
118       /* Read x[n-numTaps-1] sample for acc2 */
119       x2 = *(px2++);
120       /* Read x[n-numTaps-1] sample for acc3 */
121       x3 = *(px3++);
122 
123       /* Perform the multiply-accumulate */
124       acc0 += x0 * c0;
125       acc1 += x1 * c0;
126       acc2 += x2 * c0;
127       acc3 += x3 * c0;
128 
129       /* Read the b[numTaps-2] coefficient */
130       c0 = *(pb++);
131 
132       /* Read x[n-numTaps-2] sample for acc0, acc1, acc2, acc3 */
133       x0 = *(px0++);
134       x1 = *(px1++);
135       x2 = *(px2++);
136       x3 = *(px3++);
137 
138       /* Perform the multiply-accumulate */
139       acc0 += x0 * c0;
140       acc1 += x1 * c0;
141       acc2 += x2 * c0;
142       acc3 += x3 * c0;
143 
144       /* Read the b[numTaps-3] coefficient */
145       c0 = *(pb++);
146 
147       /* Read x[n-numTaps-3] sample acc0, acc1, acc2, acc3 */
148       x0 = *(px0++);
149       x1 = *(px1++);
150       x2 = *(px2++);
151       x3 = *(px3++);
152 
153       /* Perform the multiply-accumulate */
154       acc0 += x0 * c0;
155       acc1 += x1 * c0;
156       acc2 += x2 * c0;
157       acc3 += x3 * c0;
158 
159       /* Read the b[numTaps-4] coefficient */
160       c0 = *(pb++);
161 
162       /* Read x[n-numTaps-4] sample acc0, acc1, acc2, acc3 */
163       x0 = *(px0++);
164       x1 = *(px1++);
165       x2 = *(px2++);
166       x3 = *(px3++);
167 
168       /* Perform the multiply-accumulate */
169       acc0 += x0 * c0;
170       acc1 += x1 * c0;
171       acc2 += x2 * c0;
172       acc3 += x3 * c0;
173 
174       /* Decrement loop counter */
175       tapCnt--;
176     }
177 
178     /* Loop unrolling: Compute remaining taps */
179     tapCnt = numTaps % 0x4U;
180 
181     while (tapCnt > 0U)
182     {
183       /* Read coefficients */
184       c0 = *(pb++);
185 
186       /* Fetch state variables for acc0, acc1, acc2, acc3 */
187       x0 = *(px0++);
188       x1 = *(px1++);
189       x2 = *(px2++);
190       x3 = *(px3++);
191 
192       /* Perform the multiply-accumulate */
193       acc0 += x0 * c0;
194       acc1 += x1 * c0;
195       acc2 += x2 * c0;
196       acc3 += x3 * c0;
197 
198       /* Decrement loop counter */
199       tapCnt--;
200     }
201 
202     /* Advance the state pointer by the decimation factor
203      * to process the next group of decimation factor number samples */
204     pState = pState + S->M * 4;
205 
206     /* The result is in the accumulator, store in the destination buffer. */
207     *pDst++ = acc0;
208     *pDst++ = acc1;
209     *pDst++ = acc2;
210     *pDst++ = acc3;
211 
212     /* Decrement loop counter */
213     blkCnt--;
214   }
215 
216   /* Loop unrolling: Compute remaining samples */
217   blkCnt = outBlockSize % 0x4U;
218 
219 #else
220 
221   /* Initialize blkCnt with number of samples */
222   blkCnt = outBlockSize;
223 
224 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
225 
226   while (blkCnt > 0U)
227   {
228     /* Copy decimation factor number of new input samples into the state buffer */
229     i = S->M;
230 
231     do
232     {
233       *pStateCur++ = *pSrc++;
234 
235     } while (--i);
236 
237     /* Set accumulator to zero */
238     acc0 = 0.0;
239 
240     /* Initialize state pointer */
241     px0 = pState;
242 
243     /* Initialize coeff pointer */
244     pb = pCoeffs;
245 
246 #if defined (ARM_MATH_LOOPUNROLL)
247 
248     /* Loop unrolling: Compute 4 taps at a time */
249     tapCnt = numTaps >> 2U;
250 
251     while (tapCnt > 0U)
252     {
253       /* Read the b[numTaps-1] coefficient */
254       c0 = *pb++;
255 
256       /* Read x[n-numTaps-1] sample */
257       x0 = *px0++;
258 
259       /* Perform the multiply-accumulate */
260       acc0 += x0 * c0;
261 
262       /* Read the b[numTaps-2] coefficient */
263       c0 = *pb++;
264 
265       /* Read x[n-numTaps-2] sample */
266       x0 = *px0++;
267 
268       /* Perform the multiply-accumulate */
269       acc0 += x0 * c0;
270 
271       /* Read the b[numTaps-3] coefficient */
272       c0 = *pb++;
273 
274       /* Read x[n-numTaps-3] sample */
275       x0 = *px0++;
276 
277       /* Perform the multiply-accumulate */
278       acc0 += x0 * c0;
279 
280       /* Read the b[numTaps-4] coefficient */
281       c0 = *pb++;
282 
283       /* Read x[n-numTaps-4] sample */
284       x0 = *px0++;
285 
286       /* Perform the multiply-accumulate */
287       acc0 += x0 * c0;
288 
289       /* Decrement loop counter */
290       tapCnt--;
291     }
292 
293     /* Loop unrolling: Compute remaining taps */
294     tapCnt = numTaps % 0x4U;
295 
296 #else
297 
298     /* Initialize tapCnt with number of taps */
299     tapCnt = numTaps;
300 
301 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
302 
303     while (tapCnt > 0U)
304     {
305       /* Read coefficients */
306       c0 = *pb++;
307 
308       /* Fetch 1 state variable */
309       x0 = *px0++;
310 
311       /* Perform the multiply-accumulate */
312       acc0 += x0 * c0;
313 
314       /* Decrement loop counter */
315       tapCnt--;
316     }
317 
318     /* Advance the state pointer by the decimation factor
319      * to process the next group of decimation factor number samples */
320     pState = pState + S->M;
321 
322     /* The result is in the accumulator, store in the destination buffer. */
323     *pDst++ = acc0;
324 
325     /* Decrement loop counter */
326     blkCnt--;
327   }
328 
329   /* Processing is complete.
330      Now copy the last numTaps - 1 samples to the satrt of the state buffer.
331      This prepares the state buffer for the next function call. */
332 
333   /* Points to the start of the state buffer */
334   pStateCur = S->pState;
335 
336 #if defined (ARM_MATH_LOOPUNROLL)
337 
338   /* Loop unrolling: Compute 4 taps at a time */
339   tapCnt = (numTaps - 1U) >> 2U;
340 
341   /* Copy data */
342   while (tapCnt > 0U)
343   {
344     *pStateCur++ = *pState++;
345     *pStateCur++ = *pState++;
346     *pStateCur++ = *pState++;
347     *pStateCur++ = *pState++;
348 
349     /* Decrement loop counter */
350     tapCnt--;
351   }
352 
353   /* Loop unrolling: Compute remaining taps */
354   tapCnt = (numTaps - 1U) % 0x04U;
355 
356 #else
357 
358   /* Initialize tapCnt with number of taps */
359   tapCnt = (numTaps - 1U);
360 
361 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
362 
363   /* Copy data */
364   while (tapCnt > 0U)
365   {
366     *pStateCur++ = *pState++;
367 
368     /* Decrement loop counter */
369     tapCnt--;
370   }
371 
372 }
373 /**
374   @} end of FIR_decimate group
375  */
376