1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_fir_lattice_q31.c
4 * Description: Q31 FIR lattice filter processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup FIR_Lattice
37 @{
38 */
39
40 /**
41 @brief Processing function for the Q31 FIR lattice filter.
42 @param[in] S points to an instance of the Q31 FIR lattice structure
43 @param[in] pSrc points to the block of input data
44 @param[out] pDst points to the block of output data
45 @param[in] blockSize number of samples to process
46
47 @par Scaling and Overflow Behavior
48 In order to avoid overflows the input signal must be scaled down by 2*log2(numStages) bits.
49 */
50
arm_fir_lattice_q31(const arm_fir_lattice_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)51 void arm_fir_lattice_q31(
52 const arm_fir_lattice_instance_q31 * S,
53 const q31_t * pSrc,
54 q31_t * pDst,
55 uint32_t blockSize)
56 {
57 q31_t *pState = S->pState; /* State pointer */
58 const q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
59 q31_t *px; /* Temporary state pointer */
60 const q31_t *pk; /* Temporary coefficient pointer */
61 uint32_t numStages = S->numStages; /* Number of stages in the filter */
62 uint32_t blkCnt, stageCnt; /* Loop counters */
63 q31_t fcurr0, fnext0, gnext0, gcurr0; /* Temporary variables */
64
65 #if (1)
66 //#if !defined(ARM_MATH_CM0_FAMILY)
67
68 #if defined (ARM_MATH_LOOPUNROLL)
69 q31_t fcurr1, fnext1, gnext1; /* Temporary variables for second sample in loop unrolling */
70 q31_t fcurr2, fnext2, gnext2; /* Temporary variables for third sample in loop unrolling */
71 q31_t fcurr3, fnext3, gnext3; /* Temporary variables for fourth sample in loop unrolling */
72 #endif
73
74 gcurr0 = 0;
75
76 #if defined (ARM_MATH_LOOPUNROLL)
77
78 /* Loop unrolling: Compute 4 outputs at a time */
79 blkCnt = blockSize >> 2U;
80
81 while (blkCnt > 0U)
82 {
83 /* Read two samples from input buffer */
84 /* f0(n) = x(n) */
85 fcurr0 = *pSrc++;
86 fcurr1 = *pSrc++;
87
88 /* Initialize state pointer */
89 px = pState;
90
91 /* Initialize coeff pointer */
92 pk = pCoeffs;
93
94 /* Read g0(n-1) from state buffer */
95 gcurr0 = *px;
96
97 /* Process first sample for first tap */
98 /* f1(n) = f0(n) + K1 * g0(n-1) */
99 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
100 fnext0 = (fnext0 << 1U) + fcurr0;
101
102 /* g1(n) = f0(n) * K1 + g0(n-1) */
103 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk)) >> 32U);
104 gnext0 = (gnext0 << 1U) + gcurr0;
105
106 /* Process second sample for first tap */
107 fnext1 = (q31_t) (((q63_t) fcurr0 * (*pk)) >> 32U);
108 fnext1 = (fnext1 << 1U) + fcurr1;
109 gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
110 gnext1 = (gnext1 << 1U) + fcurr0;
111
112 /* Read next two samples from input buffer */
113 /* f0(n+2) = x(n+2) */
114 fcurr2 = *pSrc++;
115 fcurr3 = *pSrc++;
116
117 /* Process third sample for first tap */
118 fnext2 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
119 fnext2 = (fnext2 << 1U) + fcurr2;
120 gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
121 gnext2 = (gnext2 << 1U) + fcurr1;
122
123 /* Process fourth sample for first tap */
124 fnext3 = (q31_t) (((q63_t) fcurr2 * (*pk )) >> 32U);
125 fnext3 = (fnext3 << 1U) + fcurr3;
126 gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk++)) >> 32U);
127 gnext3 = (gnext3 << 1U) + fcurr2;
128
129 /* Copy only last input sample into the state buffer
130 which will be used for next samples processing */
131 *px++ = fcurr3;
132
133 /* Update of f values for next coefficient set processing */
134 fcurr0 = fnext0;
135 fcurr1 = fnext1;
136 fcurr2 = fnext2;
137 fcurr3 = fnext3;
138
139 /* Loop unrolling. Process 4 taps at a time . */
140 stageCnt = (numStages - 1U) >> 2U;
141
142 /* Loop over the number of taps. Unroll by a factor of 4.
143 Repeat until we've computed numStages-3 coefficients. */
144
145 /* Process 2nd, 3rd, 4th and 5th taps ... here */
146 while (stageCnt > 0U)
147 {
148 /* Read g1(n-1), g3(n-1) .... from state */
149 gcurr0 = *px;
150
151 /* save g1(n) in state buffer */
152 *px++ = gnext3;
153
154 /* Process first sample for 2nd, 6th .. tap */
155 /* Sample processing for K2, K6.... */
156 /* f1(n) = f0(n) + K1 * g0(n-1) */
157 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
158 fnext0 = (fnext0 << 1U) + fcurr0;
159
160 /* Process second sample for 2nd, 6th .. tap */
161 /* for sample 2 processing */
162 fnext1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
163 fnext1 = (fnext1 << 1U) + fcurr1;
164
165 /* Process third sample for 2nd, 6th .. tap */
166 fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
167 fnext2 = (fnext2 << 1U) + fcurr2;
168
169 /* Process fourth sample for 2nd, 6th .. tap */
170 fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
171 fnext3 = (fnext3 << 1U) + fcurr3;
172
173 /* g1(n) = f0(n) * K1 + g0(n-1) */
174 /* Calculation of state values for next stage */
175 gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 32U);
176 gnext3 = (gnext3 << 1U) + gnext2;
177
178 gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
179 gnext2 = (gnext2 << 1U) + gnext1;
180
181 gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
182 gnext1 = (gnext1 << 1U) + gnext0;
183
184 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
185 gnext0 = (gnext0 << 1U) + gcurr0;
186
187
188 /* Read g2(n-1), g4(n-1) .... from state */
189 gcurr0 = *px;
190
191 /* save g1(n) in state buffer */
192 *px++ = gnext3;
193
194 /* Sample processing for K3, K7.... */
195 /* Process first sample for 3rd, 7th .. tap */
196 /* f3(n) = f2(n) + K3 * g2(n-1) */
197 fcurr0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
198 fcurr0 = (fcurr0 << 1U) + fnext0;
199
200 /* Process second sample for 3rd, 7th .. tap */
201 fcurr1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
202 fcurr1 = (fcurr1 << 1U) + fnext1;
203
204 /* Process third sample for 3rd, 7th .. tap */
205 fcurr2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
206 fcurr2 = (fcurr2 << 1U) + fnext2;
207
208 /* Process fourth sample for 3rd, 7th .. tap */
209 fcurr3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
210 fcurr3 = (fcurr3 << 1U) + fnext3;
211
212 /* Calculation of state values for next stage */
213 /* g3(n) = f2(n) * K3 + g2(n-1) */
214 gnext3 = (q31_t) (((q63_t) fnext3 * (*pk)) >> 32U);
215 gnext3 = (gnext3 << 1U) + gnext2;
216
217 gnext2 = (q31_t) (((q63_t) fnext2 * (*pk)) >> 32U);
218 gnext2 = (gnext2 << 1U) + gnext1;
219
220 gnext1 = (q31_t) (((q63_t) fnext1 * (*pk)) >> 32U);
221 gnext1 = (gnext1 << 1U) + gnext0;
222
223 gnext0 = (q31_t) (((q63_t) fnext0 * (*pk++)) >> 32U);
224 gnext0 = (gnext0 << 1U) + gcurr0;
225
226 /* Read g1(n-1), g3(n-1) .... from state */
227 gcurr0 = *px;
228
229 /* save g1(n) in state buffer */
230 *px++ = gnext3;
231
232 /* Sample processing for K4, K8.... */
233 /* Process first sample for 4th, 8th .. tap */
234 /* f4(n) = f3(n) + K4 * g3(n-1) */
235 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
236 fnext0 = (fnext0 << 1U) + fcurr0;
237
238 /* Process second sample for 4th, 8th .. tap */
239 /* for sample 2 processing */
240 fnext1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
241 fnext1 = (fnext1 << 1U) + fcurr1;
242
243 /* Process third sample for 4th, 8th .. tap */
244 fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
245 fnext2 = (fnext2 << 1U) + fcurr2;
246
247 /* Process fourth sample for 4th, 8th .. tap */
248 fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
249 fnext3 = (fnext3 << 1U) + fcurr3;
250
251 /* g4(n) = f3(n) * K4 + g3(n-1) */
252 /* Calculation of state values for next stage */
253 gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 32U);
254 gnext3 = (gnext3 << 1U) + gnext2;
255
256 gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
257 gnext2 = (gnext2 << 1U) + gnext1;
258
259 gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
260 gnext1 = (gnext1 << 1U) + gnext0;
261
262 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
263 gnext0 = (gnext0 << 1U) + gcurr0;
264
265 /* Read g2(n-1), g4(n-1) .... from state */
266 gcurr0 = *px;
267
268 /* save g4(n) in state buffer */
269 *px++ = gnext3;
270
271 /* Sample processing for K5, K9.... */
272 /* Process first sample for 5th, 9th .. tap */
273 /* f5(n) = f4(n) + K5 * g4(n-1) */
274 fcurr0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
275 fcurr0 = (fcurr0 << 1U) + fnext0;
276
277 /* Process second sample for 5th, 9th .. tap */
278 fcurr1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
279 fcurr1 = (fcurr1 << 1U) + fnext1;
280
281 /* Process third sample for 5th, 9th .. tap */
282 fcurr2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
283 fcurr2 = (fcurr2 << 1U) + fnext2;
284
285 /* Process fourth sample for 5th, 9th .. tap */
286 fcurr3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
287 fcurr3 = (fcurr3 << 1U) + fnext3;
288
289 /* Calculation of state values for next stage */
290 /* g5(n) = f4(n) * K5 + g4(n-1) */
291 gnext3 = (q31_t) (((q63_t) fnext3 * (*pk)) >> 32U);
292 gnext3 = (gnext3 << 1U) + gnext2;
293
294 gnext2 = (q31_t) (((q63_t) fnext2 * (*pk)) >> 32U);
295 gnext2 = (gnext2 << 1U) + gnext1;
296
297 gnext1 = (q31_t) (((q63_t) fnext1 * (*pk)) >> 32U);
298 gnext1 = (gnext1 << 1U) + gnext0;
299
300 gnext0 = (q31_t) (((q63_t) fnext0 * (*pk++)) >> 32U);
301 gnext0 = (gnext0 << 1U) + gcurr0;
302
303 stageCnt--;
304 }
305
306 /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
307 stageCnt = (numStages - 1U) % 0x4U;
308
309 while (stageCnt > 0U)
310 {
311 gcurr0 = *px;
312
313 /* save g value in state buffer */
314 *px++ = gnext3;
315
316 /* Process four samples for last three taps here */
317 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
318 fnext0 = (fnext0 << 1U) + fcurr0;
319
320 fnext1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
321 fnext1 = (fnext1 << 1U) + fcurr1;
322
323 fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
324 fnext2 = (fnext2 << 1U) + fcurr2;
325
326 fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
327 fnext3 = (fnext3 << 1U) + fcurr3;
328
329 /* g1(n) = f0(n) * K1 + g0(n-1) */
330 gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 32U);
331 gnext3 = (gnext3 << 1U) + gnext2;
332
333 gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
334 gnext2 = (gnext2 << 1U) + gnext1;
335
336 gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
337 gnext1 = (gnext1 << 1U) + gnext0;
338
339 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
340 gnext0 = (gnext0 << 1U) + gcurr0;
341
342 /* Update of f values for next coefficient set processing */
343 fcurr0 = fnext0;
344 fcurr1 = fnext1;
345 fcurr2 = fnext2;
346 fcurr3 = fnext3;
347
348 stageCnt--;
349 }
350
351 /* The results in the 4 accumulators, store in the destination buffer. */
352 /* y(n) = fN(n) */
353 *pDst++ = fcurr0;
354 *pDst++ = fcurr1;
355 *pDst++ = fcurr2;
356 *pDst++ = fcurr3;
357
358 blkCnt--;
359 }
360
361 /* Loop unrolling: Compute remaining outputs */
362 blkCnt = blockSize % 0x4U;
363
364 #else
365
366 /* Initialize blkCnt with number of samples */
367 blkCnt = blockSize;
368
369 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
370
371 while (blkCnt > 0U)
372 {
373 /* f0(n) = x(n) */
374 fcurr0 = *pSrc++;
375
376 /* Initialize state pointer */
377 px = pState;
378
379 /* Initialize coeff pointer */
380 pk = pCoeffs;
381
382 /* read g2(n) from state buffer */
383 gcurr0 = *px;
384
385 /* for sample 1 processing */
386 /* f1(n) = f0(n) + K1 * g0(n-1) */
387 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
388 fnext0 = (fnext0 << 1U) + fcurr0;
389
390 /* g1(n) = f0(n) * K1 + g0(n-1) */
391 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
392 gnext0 = (gnext0 << 1U) + gcurr0;
393
394 /* save g1(n) in state buffer */
395 *px++ = fcurr0;
396
397 /* f1(n) is saved in fcurr0 for next stage processing */
398 fcurr0 = fnext0;
399
400 stageCnt = (numStages - 1U);
401
402 /* stage loop */
403 while (stageCnt > 0U)
404 {
405 /* read g2(n) from state buffer */
406 gcurr0 = *px;
407
408 /* save g1(n) in state buffer */
409 *px++ = gnext0;
410
411 /* Sample processing for K2, K3.... */
412 /* f2(n) = f1(n) + K2 * g1(n-1) */
413 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
414 fnext0 = (fnext0 << 1U) + fcurr0;
415
416 /* g2(n) = f1(n) * K2 + g1(n-1) */
417 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
418 gnext0 = (gnext0 << 1U) + gcurr0;
419
420 /* f1(n) is saved in fcurr0 for next stage processing */
421 fcurr0 = fnext0;
422
423 stageCnt--;
424 }
425
426 /* y(n) = fN(n) */
427 *pDst++ = fcurr0;
428
429 blkCnt--;
430 }
431
432 #else
433 /* alternate version for CM0_FAMILY */
434
435 blkCnt = blockSize;
436
437 while (blkCnt > 0U)
438 {
439 /* f0(n) = x(n) */
440 fcurr0 = *pSrc++;
441
442 /* Initialize state pointer */
443 px = pState;
444
445 /* Initialize coeff pointer */
446 pk = pCoeffs;
447
448 /* read g0(n-1) from state buffer */
449 gcurr0 = *px;
450
451 /* for sample 1 processing */
452 /* f1(n) = f0(n) + K1 * g0(n-1) */
453 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
454 fnext0 = (fnext << 1U) + fcurr0;
455
456 /* g1(n) = f0(n) * K1 + g0(n-1) */
457 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
458 gnext0 = (gnext0 << 1U) + gcurr0;
459
460 /* save f0(n) in state buffer */
461 *px++ = fcurr0;
462
463 /* f1(n) is saved in fcurr for next stage processing */
464 fcurr0 = fnext0;
465
466 stageCnt = (numStages - 1U);
467
468 /* stage loop */
469 while (stageCnt > 0U)
470 {
471 /* read g1(n-1) from state buffer */
472 gcurr0 = *px;
473
474 /* save g0(n-1) in state buffer */
475 *px++ = gnext0;
476
477 /* Sample processing for K2, K3.... */
478 /* f2(n) = f1(n) + K2 * g1(n-1) */
479 fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
480 fnext0 = (fnext0 << 1U) + fcurr0;
481
482 /* g2(n) = f1(n) * K2 + g1(n-1) */
483 gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
484 gnext0 = (gnext0 << 1U) + gcurr0;
485
486 /* f1(n) is saved in fcurr0 for next stage processing */
487 fcurr0 = fnext0;
488
489 stageCnt--;
490 }
491
492 /* y(n) = fN(n) */
493 *pDst++ = fcurr0;
494
495 blkCnt--;
496 }
497
498 #endif /* #if !defined(ARM_MATH_CM0_FAMILY) */
499
500 }
501
502 /**
503 @} end of FIR_Lattice group
504 */
505