1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_fir_lattice_q31.c
4  * Description:  Q31 FIR lattice filter processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @addtogroup FIR_Lattice
37   @{
38  */
39 
40 /**
41   @brief         Processing function for the Q31 FIR lattice filter.
42   @param[in]     S          points to an instance of the Q31 FIR lattice structure
43   @param[in]     pSrc       points to the block of input data
44   @param[out]    pDst       points to the block of output data
45   @param[in]     blockSize  number of samples to process
46 
47   @par           Scaling and Overflow Behavior
48                    In order to avoid overflows the input signal must be scaled down by 2*log2(numStages) bits.
49  */
50 
arm_fir_lattice_q31(const arm_fir_lattice_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)51 void arm_fir_lattice_q31(
52   const arm_fir_lattice_instance_q31 * S,
53   const q31_t * pSrc,
54         q31_t * pDst,
55         uint32_t blockSize)
56 {
57         q31_t *pState = S->pState;                     /* State pointer */
58   const q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
59         q31_t *px;                                     /* Temporary state pointer */
60   const q31_t *pk;                                     /* Temporary coefficient pointer */
61         uint32_t numStages = S->numStages;             /* Number of stages in the filter */
62         uint32_t blkCnt, stageCnt;                     /* Loop counters */
63         q31_t fcurr0, fnext0, gnext0, gcurr0;          /* Temporary variables */
64 
65 #if (1)
66 //#if !defined(ARM_MATH_CM0_FAMILY)
67 
68 #if defined (ARM_MATH_LOOPUNROLL)
69         q31_t fcurr1, fnext1, gnext1;                  /* Temporary variables for second sample in loop unrolling */
70         q31_t fcurr2, fnext2, gnext2;                  /* Temporary variables for third sample in loop unrolling */
71         q31_t fcurr3, fnext3, gnext3;                  /* Temporary variables for fourth sample in loop unrolling */
72 #endif
73 
74   gcurr0 = 0;
75 
76 #if defined (ARM_MATH_LOOPUNROLL)
77 
78   /* Loop unrolling: Compute 4 outputs at a time */
79   blkCnt = blockSize >> 2U;
80 
81   while (blkCnt > 0U)
82   {
83     /* Read two samples from input buffer */
84     /* f0(n) = x(n) */
85     fcurr0 = *pSrc++;
86     fcurr1 = *pSrc++;
87 
88     /* Initialize state pointer */
89     px = pState;
90 
91     /* Initialize coeff pointer */
92     pk = pCoeffs;
93 
94     /* Read g0(n-1) from state buffer */
95     gcurr0 = *px;
96 
97     /* Process first sample for first tap */
98     /* f1(n) = f0(n) +  K1 * g0(n-1) */
99     fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
100     fnext0 = (fnext0 << 1U) + fcurr0;
101 
102     /* g1(n) = f0(n) * K1  +  g0(n-1) */
103     gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk)) >> 32U);
104     gnext0 = (gnext0 << 1U) + gcurr0;
105 
106     /* Process second sample for first tap */
107     fnext1 = (q31_t) (((q63_t) fcurr0 * (*pk)) >> 32U);
108     fnext1 = (fnext1 << 1U) + fcurr1;
109     gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
110     gnext1 = (gnext1 << 1U) + fcurr0;
111 
112     /* Read next two samples from input buffer */
113     /* f0(n+2) = x(n+2) */
114     fcurr2 = *pSrc++;
115     fcurr3 = *pSrc++;
116 
117     /* Process third sample for first tap */
118     fnext2 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
119     fnext2 = (fnext2 << 1U) + fcurr2;
120     gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
121     gnext2 = (gnext2 << 1U) + fcurr1;
122 
123     /* Process fourth sample for first tap */
124     fnext3 = (q31_t) (((q63_t) fcurr2 * (*pk  )) >> 32U);
125     fnext3 = (fnext3 << 1U) + fcurr3;
126     gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk++)) >> 32U);
127     gnext3 = (gnext3 << 1U) + fcurr2;
128 
129     /* Copy only last input sample into the state buffer
130        which will be used for next samples processing */
131     *px++ = fcurr3;
132 
133     /* Update of f values for next coefficient set processing */
134     fcurr0 = fnext0;
135     fcurr1 = fnext1;
136     fcurr2 = fnext2;
137     fcurr3 = fnext3;
138 
139     /* Loop unrolling.  Process 4 taps at a time . */
140     stageCnt = (numStages - 1U) >> 2U;
141 
142     /* Loop over the number of taps.  Unroll by a factor of 4.
143        Repeat until we've computed numStages-3 coefficients. */
144 
145     /* Process 2nd, 3rd, 4th and 5th taps ... here */
146     while (stageCnt > 0U)
147     {
148       /* Read g1(n-1), g3(n-1) .... from state */
149       gcurr0 = *px;
150 
151       /* save g1(n) in state buffer */
152       *px++ = gnext3;
153 
154       /* Process first sample for 2nd, 6th .. tap */
155       /* Sample processing for K2, K6.... */
156       /* f1(n) = f0(n) +  K1 * g0(n-1) */
157       fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
158       fnext0 = (fnext0 << 1U) + fcurr0;
159 
160       /* Process second sample for 2nd, 6th .. tap */
161       /* for sample 2 processing */
162       fnext1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
163       fnext1 = (fnext1 << 1U) + fcurr1;
164 
165       /* Process third sample for 2nd, 6th .. tap */
166       fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
167       fnext2 = (fnext2 << 1U) + fcurr2;
168 
169       /* Process fourth sample for 2nd, 6th .. tap */
170       fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
171       fnext3 = (fnext3 << 1U) + fcurr3;
172 
173       /* g1(n) = f0(n) * K1  +  g0(n-1) */
174       /* Calculation of state values for next stage */
175       gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 32U);
176       gnext3 = (gnext3 << 1U) + gnext2;
177 
178       gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
179       gnext2 = (gnext2 << 1U) + gnext1;
180 
181       gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
182       gnext1 = (gnext1 << 1U) + gnext0;
183 
184       gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
185       gnext0 = (gnext0 << 1U) + gcurr0;
186 
187 
188       /* Read g2(n-1), g4(n-1) .... from state */
189       gcurr0 = *px;
190 
191       /* save g1(n) in state buffer */
192       *px++ = gnext3;
193 
194       /* Sample processing for K3, K7.... */
195       /* Process first sample for 3rd, 7th .. tap */
196       /* f3(n) = f2(n) +  K3 * g2(n-1) */
197       fcurr0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
198       fcurr0 = (fcurr0 << 1U) + fnext0;
199 
200       /* Process second sample for 3rd, 7th .. tap */
201       fcurr1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
202       fcurr1 = (fcurr1 << 1U) + fnext1;
203 
204       /* Process third sample for 3rd, 7th .. tap */
205       fcurr2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
206       fcurr2 = (fcurr2 << 1U) + fnext2;
207 
208       /* Process fourth sample for 3rd, 7th .. tap */
209       fcurr3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
210       fcurr3 = (fcurr3 << 1U) + fnext3;
211 
212       /* Calculation of state values for next stage */
213       /* g3(n) = f2(n) * K3  +  g2(n-1) */
214       gnext3 = (q31_t) (((q63_t) fnext3 * (*pk)) >> 32U);
215       gnext3 = (gnext3 << 1U) + gnext2;
216 
217       gnext2 = (q31_t) (((q63_t) fnext2 * (*pk)) >> 32U);
218       gnext2 = (gnext2 << 1U) + gnext1;
219 
220       gnext1 = (q31_t) (((q63_t) fnext1 * (*pk)) >> 32U);
221       gnext1 = (gnext1 << 1U) + gnext0;
222 
223       gnext0 = (q31_t) (((q63_t) fnext0 * (*pk++)) >> 32U);
224       gnext0 = (gnext0 << 1U) + gcurr0;
225 
226       /* Read g1(n-1), g3(n-1) .... from state */
227       gcurr0 = *px;
228 
229       /* save g1(n) in state buffer */
230       *px++ = gnext3;
231 
232       /* Sample processing for K4, K8.... */
233       /* Process first sample for 4th, 8th .. tap */
234       /* f4(n) = f3(n) +  K4 * g3(n-1) */
235       fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
236       fnext0 = (fnext0 << 1U) + fcurr0;
237 
238       /* Process second sample for 4th, 8th .. tap */
239       /* for sample 2 processing */
240       fnext1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
241       fnext1 = (fnext1 << 1U) + fcurr1;
242 
243       /* Process third sample for 4th, 8th .. tap */
244       fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
245       fnext2 = (fnext2 << 1U) + fcurr2;
246 
247       /* Process fourth sample for 4th, 8th .. tap */
248       fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
249       fnext3 = (fnext3 << 1U) + fcurr3;
250 
251       /* g4(n) = f3(n) * K4  +  g3(n-1) */
252       /* Calculation of state values for next stage */
253       gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 32U);
254       gnext3 = (gnext3 << 1U) + gnext2;
255 
256       gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
257       gnext2 = (gnext2 << 1U) + gnext1;
258 
259       gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
260       gnext1 = (gnext1 << 1U) + gnext0;
261 
262       gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
263       gnext0 = (gnext0 << 1U) + gcurr0;
264 
265       /* Read g2(n-1), g4(n-1) .... from state */
266       gcurr0 = *px;
267 
268       /* save g4(n) in state buffer */
269       *px++ = gnext3;
270 
271       /* Sample processing for K5, K9.... */
272       /* Process first sample for 5th, 9th .. tap */
273       /* f5(n) = f4(n) +  K5 * g4(n-1) */
274       fcurr0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
275       fcurr0 = (fcurr0 << 1U) + fnext0;
276 
277       /* Process second sample for 5th, 9th .. tap */
278       fcurr1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
279       fcurr1 = (fcurr1 << 1U) + fnext1;
280 
281       /* Process third sample for 5th, 9th .. tap */
282       fcurr2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
283       fcurr2 = (fcurr2 << 1U) + fnext2;
284 
285       /* Process fourth sample for 5th, 9th .. tap */
286       fcurr3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
287       fcurr3 = (fcurr3 << 1U) + fnext3;
288 
289       /* Calculation of state values for next stage */
290       /* g5(n) = f4(n) * K5  +  g4(n-1) */
291       gnext3 = (q31_t) (((q63_t) fnext3 * (*pk)) >> 32U);
292       gnext3 = (gnext3 << 1U) + gnext2;
293 
294       gnext2 = (q31_t) (((q63_t) fnext2 * (*pk)) >> 32U);
295       gnext2 = (gnext2 << 1U) + gnext1;
296 
297       gnext1 = (q31_t) (((q63_t) fnext1 * (*pk)) >> 32U);
298       gnext1 = (gnext1 << 1U) + gnext0;
299 
300       gnext0 = (q31_t) (((q63_t) fnext0 * (*pk++)) >> 32U);
301       gnext0 = (gnext0 << 1U) + gcurr0;
302 
303       stageCnt--;
304     }
305 
306     /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
307     stageCnt = (numStages - 1U) % 0x4U;
308 
309     while (stageCnt > 0U)
310     {
311       gcurr0 = *px;
312 
313       /* save g value in state buffer */
314       *px++ = gnext3;
315 
316       /* Process four samples for last three taps here */
317       fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
318       fnext0 = (fnext0 << 1U) + fcurr0;
319 
320       fnext1 = (q31_t) (((q63_t) gnext0 * (*pk)) >> 32U);
321       fnext1 = (fnext1 << 1U) + fcurr1;
322 
323       fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 32U);
324       fnext2 = (fnext2 << 1U) + fcurr2;
325 
326       fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 32U);
327       fnext3 = (fnext3 << 1U) + fcurr3;
328 
329       /* g1(n) = f0(n) * K1  +  g0(n-1) */
330       gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 32U);
331       gnext3 = (gnext3 << 1U) + gnext2;
332 
333       gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 32U);
334       gnext2 = (gnext2 << 1U) + gnext1;
335 
336       gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 32U);
337       gnext1 = (gnext1 << 1U) + gnext0;
338 
339       gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
340       gnext0 = (gnext0 << 1U) + gcurr0;
341 
342       /* Update of f values for next coefficient set processing */
343       fcurr0 = fnext0;
344       fcurr1 = fnext1;
345       fcurr2 = fnext2;
346       fcurr3 = fnext3;
347 
348       stageCnt--;
349     }
350 
351     /* The results in the 4 accumulators, store in the destination buffer. */
352     /* y(n) = fN(n) */
353     *pDst++ = fcurr0;
354     *pDst++ = fcurr1;
355     *pDst++ = fcurr2;
356     *pDst++ = fcurr3;
357 
358     blkCnt--;
359   }
360 
361   /* Loop unrolling: Compute remaining outputs */
362   blkCnt = blockSize % 0x4U;
363 
364 #else
365 
366   /* Initialize blkCnt with number of samples */
367   blkCnt = blockSize;
368 
369 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
370 
371   while (blkCnt > 0U)
372   {
373     /* f0(n) = x(n) */
374     fcurr0 = *pSrc++;
375 
376     /* Initialize state pointer */
377     px = pState;
378 
379     /* Initialize coeff pointer */
380     pk = pCoeffs;
381 
382     /* read g2(n) from state buffer */
383     gcurr0 = *px;
384 
385     /* for sample 1 processing */
386     /* f1(n) = f0(n) +  K1 * g0(n-1) */
387     fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
388     fnext0 = (fnext0 << 1U) + fcurr0;
389 
390     /* g1(n) = f0(n) * K1  +  g0(n-1) */
391     gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
392     gnext0 = (gnext0 << 1U) + gcurr0;
393 
394     /* save g1(n) in state buffer */
395     *px++ = fcurr0;
396 
397     /* f1(n) is saved in fcurr0 for next stage processing */
398     fcurr0 = fnext0;
399 
400     stageCnt = (numStages - 1U);
401 
402     /* stage loop */
403     while (stageCnt > 0U)
404     {
405       /* read g2(n) from state buffer */
406       gcurr0 = *px;
407 
408       /* save g1(n) in state buffer */
409       *px++ = gnext0;
410 
411       /* Sample processing for K2, K3.... */
412       /* f2(n) = f1(n) +  K2 * g1(n-1) */
413       fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
414       fnext0 = (fnext0 << 1U) + fcurr0;
415 
416       /* g2(n) = f1(n) * K2  +  g1(n-1) */
417       gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
418       gnext0 = (gnext0 << 1U) + gcurr0;
419 
420       /* f1(n) is saved in fcurr0 for next stage processing */
421       fcurr0 = fnext0;
422 
423       stageCnt--;
424     }
425 
426     /* y(n) = fN(n) */
427     *pDst++ = fcurr0;
428 
429     blkCnt--;
430   }
431 
432 #else
433 /* alternate version for CM0_FAMILY */
434 
435   blkCnt = blockSize;
436 
437   while (blkCnt > 0U)
438   {
439     /* f0(n) = x(n) */
440     fcurr0 = *pSrc++;
441 
442     /* Initialize state pointer */
443     px = pState;
444 
445     /* Initialize coeff pointer */
446     pk = pCoeffs;
447 
448     /* read g0(n-1) from state buffer */
449     gcurr0 = *px;
450 
451     /* for sample 1 processing */
452     /* f1(n) = f0(n) +  K1 * g0(n-1) */
453     fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
454     fnext0 = (fnext << 1U) + fcurr0;
455 
456     /* g1(n) = f0(n) * K1  +  g0(n-1) */
457     gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
458     gnext0 = (gnext0 << 1U) + gcurr0;
459 
460     /* save f0(n) in state buffer */
461     *px++ = fcurr0;
462 
463     /* f1(n) is saved in fcurr for next stage processing */
464     fcurr0 = fnext0;
465 
466     stageCnt = (numStages - 1U);
467 
468     /* stage loop */
469     while (stageCnt > 0U)
470     {
471       /* read g1(n-1) from state buffer */
472       gcurr0 = *px;
473 
474       /* save g0(n-1) in state buffer */
475       *px++ = gnext0;
476 
477       /* Sample processing for K2, K3.... */
478       /* f2(n) = f1(n) +  K2 * g1(n-1) */
479       fnext0 = (q31_t) (((q63_t) gcurr0 * (*pk)) >> 32U);
480       fnext0 = (fnext0 << 1U) + fcurr0;
481 
482       /* g2(n) = f1(n) * K2  +  g1(n-1) */
483       gnext0 = (q31_t) (((q63_t) fcurr0 * (*pk++)) >> 32U);
484       gnext0 = (gnext0 << 1U) + gcurr0;
485 
486       /* f1(n) is saved in fcurr0 for next stage processing */
487       fcurr0 = fnext0;
488 
489       stageCnt--;
490     }
491 
492     /* y(n) = fN(n) */
493     *pDst++ = fcurr0;
494 
495     blkCnt--;
496   }
497 
498 #endif /* #if !defined(ARM_MATH_CM0_FAMILY) */
499 
500 }
501 
502 /**
503   @} end of FIR_Lattice group
504  */
505