1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_fir_decimate_f64.c
4 * Description: FIR decimation for floating-point sequences
5 *
6 * $Date: 17 February 2024
7 * $Revision: V1.16.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2024 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup FIR_decimate
37 @{
38 */
39
40 /**
41 @brief Processing function for floating-point FIR decimator.
42 @param[in] S points to an instance of the floating-point FIR decimator structure
43 @param[in] pSrc points to the block of input data
44 @param[out] pDst points to the block of output data
45 @param[in] blockSize number of input samples to process
46 */
47
arm_fir_decimate_f64(const arm_fir_decimate_instance_f64 * S,const float64_t * pSrc,float64_t * pDst,uint32_t blockSize)48 ARM_DSP_ATTRIBUTE void arm_fir_decimate_f64(
49 const arm_fir_decimate_instance_f64 * S,
50 const float64_t * pSrc,
51 float64_t * pDst,
52 uint32_t blockSize)
53 {
54 float64_t *pState = S->pState; /* State pointer */
55 const float64_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
56 float64_t *pStateCur; /* Points to the current sample of the state */
57 float64_t *px0; /* Temporary pointer for state buffer */
58 const float64_t *pb; /* Temporary pointer for coefficient buffer */
59 float64_t x0, c0; /* Temporary variables to hold state and coefficient values */
60 float64_t acc0; /* Accumulator */
61 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
62 uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */
63
64 #if defined (ARM_MATH_LOOPUNROLL)
65 float64_t *px1, *px2, *px3;
66 float64_t x1, x2, x3;
67 float64_t acc1, acc2, acc3;
68 #endif
69
70 /* S->pState buffer contains previous frame (numTaps - 1) samples */
71 /* pStateCur points to the location where the new input data should be written */
72 pStateCur = S->pState + (numTaps - 1U);
73
74 #if defined (ARM_MATH_LOOPUNROLL)
75
76 /* Loop unrolling: Compute 4 samples at a time */
77 blkCnt = outBlockSize >> 2U;
78
79 /* Samples loop unrolled by 4 */
80 while (blkCnt > 0U)
81 {
82 /* Copy 4 * decimation factor number of new input samples into the state buffer */
83 i = S->M * 4;
84
85 do
86 {
87 *pStateCur++ = *pSrc++;
88
89 } while (--i);
90
91 /* Set accumulators to zero */
92 acc0 = 0.0;
93 acc1 = 0.0;
94 acc2 = 0.0;
95 acc3 = 0.0;
96
97 /* Initialize state pointer for all the samples */
98 px0 = pState;
99 px1 = pState + S->M;
100 px2 = pState + 2 * S->M;
101 px3 = pState + 3 * S->M;
102
103 /* Initialize coeff pointer */
104 pb = pCoeffs;
105
106 /* Loop unrolling: Compute 4 taps at a time */
107 tapCnt = numTaps >> 2U;
108
109 while (tapCnt > 0U)
110 {
111 /* Read the b[numTaps-1] coefficient */
112 c0 = *(pb++);
113
114 /* Read x[n-numTaps-1] sample for acc0 */
115 x0 = *(px0++);
116 /* Read x[n-numTaps-1] sample for acc1 */
117 x1 = *(px1++);
118 /* Read x[n-numTaps-1] sample for acc2 */
119 x2 = *(px2++);
120 /* Read x[n-numTaps-1] sample for acc3 */
121 x3 = *(px3++);
122
123 /* Perform the multiply-accumulate */
124 acc0 += x0 * c0;
125 acc1 += x1 * c0;
126 acc2 += x2 * c0;
127 acc3 += x3 * c0;
128
129 /* Read the b[numTaps-2] coefficient */
130 c0 = *(pb++);
131
132 /* Read x[n-numTaps-2] sample for acc0, acc1, acc2, acc3 */
133 x0 = *(px0++);
134 x1 = *(px1++);
135 x2 = *(px2++);
136 x3 = *(px3++);
137
138 /* Perform the multiply-accumulate */
139 acc0 += x0 * c0;
140 acc1 += x1 * c0;
141 acc2 += x2 * c0;
142 acc3 += x3 * c0;
143
144 /* Read the b[numTaps-3] coefficient */
145 c0 = *(pb++);
146
147 /* Read x[n-numTaps-3] sample acc0, acc1, acc2, acc3 */
148 x0 = *(px0++);
149 x1 = *(px1++);
150 x2 = *(px2++);
151 x3 = *(px3++);
152
153 /* Perform the multiply-accumulate */
154 acc0 += x0 * c0;
155 acc1 += x1 * c0;
156 acc2 += x2 * c0;
157 acc3 += x3 * c0;
158
159 /* Read the b[numTaps-4] coefficient */
160 c0 = *(pb++);
161
162 /* Read x[n-numTaps-4] sample acc0, acc1, acc2, acc3 */
163 x0 = *(px0++);
164 x1 = *(px1++);
165 x2 = *(px2++);
166 x3 = *(px3++);
167
168 /* Perform the multiply-accumulate */
169 acc0 += x0 * c0;
170 acc1 += x1 * c0;
171 acc2 += x2 * c0;
172 acc3 += x3 * c0;
173
174 /* Decrement loop counter */
175 tapCnt--;
176 }
177
178 /* Loop unrolling: Compute remaining taps */
179 tapCnt = numTaps % 0x4U;
180
181 while (tapCnt > 0U)
182 {
183 /* Read coefficients */
184 c0 = *(pb++);
185
186 /* Fetch state variables for acc0, acc1, acc2, acc3 */
187 x0 = *(px0++);
188 x1 = *(px1++);
189 x2 = *(px2++);
190 x3 = *(px3++);
191
192 /* Perform the multiply-accumulate */
193 acc0 += x0 * c0;
194 acc1 += x1 * c0;
195 acc2 += x2 * c0;
196 acc3 += x3 * c0;
197
198 /* Decrement loop counter */
199 tapCnt--;
200 }
201
202 /* Advance the state pointer by the decimation factor
203 * to process the next group of decimation factor number samples */
204 pState = pState + S->M * 4;
205
206 /* The result is in the accumulator, store in the destination buffer. */
207 *pDst++ = acc0;
208 *pDst++ = acc1;
209 *pDst++ = acc2;
210 *pDst++ = acc3;
211
212 /* Decrement loop counter */
213 blkCnt--;
214 }
215
216 /* Loop unrolling: Compute remaining samples */
217 blkCnt = outBlockSize % 0x4U;
218
219 #else
220
221 /* Initialize blkCnt with number of samples */
222 blkCnt = outBlockSize;
223
224 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
225
226 while (blkCnt > 0U)
227 {
228 /* Copy decimation factor number of new input samples into the state buffer */
229 i = S->M;
230
231 do
232 {
233 *pStateCur++ = *pSrc++;
234
235 } while (--i);
236
237 /* Set accumulator to zero */
238 acc0 = 0.0;
239
240 /* Initialize state pointer */
241 px0 = pState;
242
243 /* Initialize coeff pointer */
244 pb = pCoeffs;
245
246 #if defined (ARM_MATH_LOOPUNROLL)
247
248 /* Loop unrolling: Compute 4 taps at a time */
249 tapCnt = numTaps >> 2U;
250
251 while (tapCnt > 0U)
252 {
253 /* Read the b[numTaps-1] coefficient */
254 c0 = *pb++;
255
256 /* Read x[n-numTaps-1] sample */
257 x0 = *px0++;
258
259 /* Perform the multiply-accumulate */
260 acc0 += x0 * c0;
261
262 /* Read the b[numTaps-2] coefficient */
263 c0 = *pb++;
264
265 /* Read x[n-numTaps-2] sample */
266 x0 = *px0++;
267
268 /* Perform the multiply-accumulate */
269 acc0 += x0 * c0;
270
271 /* Read the b[numTaps-3] coefficient */
272 c0 = *pb++;
273
274 /* Read x[n-numTaps-3] sample */
275 x0 = *px0++;
276
277 /* Perform the multiply-accumulate */
278 acc0 += x0 * c0;
279
280 /* Read the b[numTaps-4] coefficient */
281 c0 = *pb++;
282
283 /* Read x[n-numTaps-4] sample */
284 x0 = *px0++;
285
286 /* Perform the multiply-accumulate */
287 acc0 += x0 * c0;
288
289 /* Decrement loop counter */
290 tapCnt--;
291 }
292
293 /* Loop unrolling: Compute remaining taps */
294 tapCnt = numTaps % 0x4U;
295
296 #else
297
298 /* Initialize tapCnt with number of taps */
299 tapCnt = numTaps;
300
301 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
302
303 while (tapCnt > 0U)
304 {
305 /* Read coefficients */
306 c0 = *pb++;
307
308 /* Fetch 1 state variable */
309 x0 = *px0++;
310
311 /* Perform the multiply-accumulate */
312 acc0 += x0 * c0;
313
314 /* Decrement loop counter */
315 tapCnt--;
316 }
317
318 /* Advance the state pointer by the decimation factor
319 * to process the next group of decimation factor number samples */
320 pState = pState + S->M;
321
322 /* The result is in the accumulator, store in the destination buffer. */
323 *pDst++ = acc0;
324
325 /* Decrement loop counter */
326 blkCnt--;
327 }
328
329 /* Processing is complete.
330 Now copy the last numTaps - 1 samples to the satrt of the state buffer.
331 This prepares the state buffer for the next function call. */
332
333 /* Points to the start of the state buffer */
334 pStateCur = S->pState;
335
336 #if defined (ARM_MATH_LOOPUNROLL)
337
338 /* Loop unrolling: Compute 4 taps at a time */
339 tapCnt = (numTaps - 1U) >> 2U;
340
341 /* Copy data */
342 while (tapCnt > 0U)
343 {
344 *pStateCur++ = *pState++;
345 *pStateCur++ = *pState++;
346 *pStateCur++ = *pState++;
347 *pStateCur++ = *pState++;
348
349 /* Decrement loop counter */
350 tapCnt--;
351 }
352
353 /* Loop unrolling: Compute remaining taps */
354 tapCnt = (numTaps - 1U) % 0x04U;
355
356 #else
357
358 /* Initialize tapCnt with number of taps */
359 tapCnt = (numTaps - 1U);
360
361 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
362
363 /* Copy data */
364 while (tapCnt > 0U)
365 {
366 *pStateCur++ = *pState++;
367
368 /* Decrement loop counter */
369 tapCnt--;
370 }
371
372 }
373 /**
374 @} end of FIR_decimate group
375 */
376