1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_iir_lattice_q31.c
4  * Description:  Q31 IIR Lattice filter processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @addtogroup IIR_Lattice
37   @{
38  */
39 
40 /**
41   @brief         Processing function for the Q31 IIR lattice filter.
42   @param[in]     S          points to an instance of the Q31 IIR lattice structure
43   @param[in]     pSrc       points to the block of input data
44   @param[out]    pDst       points to the block of output data
45   @param[in]     blockSize  number of samples to process
46 
47   @par           Scaling and Overflow Behavior
48                    The function is implemented using an internal 64-bit accumulator.
49                    The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
50                    Thus, if the accumulator result overflows it wraps around rather than clip.
51                    In order to avoid overflows completely the input signal must be scaled down by 2*log2(numStages) bits.
52                    After all multiply-accumulates are performed, the 2.62 accumulator is saturated to 1.32 format and then truncated to 1.31 format.
53  */
54 
arm_iir_lattice_q31(const arm_iir_lattice_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)55 void arm_iir_lattice_q31(
56   const arm_iir_lattice_instance_q31 * S,
57   const q31_t * pSrc,
58         q31_t * pDst,
59         uint32_t blockSize)
60 {
61         q31_t *pState = S->pState;                       /* State pointer */
62         q31_t *pStateCur;                                /* State current pointer */
63         q31_t fcurr, fnext = 0, gcurr = 0, gnext;        /* Temporary variables for lattice stages */
64         q63_t acc;                                       /* Accumlator */
65         q31_t *px1, *px2, *pk, *pv;                      /* Temporary pointers for state and coef */
66         uint32_t numStages = S->numStages;               /* Number of stages */
67         uint32_t blkCnt, tapCnt;                         /* Temporary variables for counts */
68 
69 
70   /* initialise loop count */
71   blkCnt = blockSize;
72 
73 #if defined (ARM_MATH_DSP)
74 
75   /* Sample processing */
76   while (blkCnt > 0U)
77   {
78     /* Read Sample from input buffer */
79     /* fN(n) = x(n) */
80     fcurr = *pSrc++;
81 
82     /* Initialize Ladder coeff pointer */
83     pv = &S->pvCoeffs[0];
84 
85     /* Initialize Reflection coeff pointer */
86     pk = &S->pkCoeffs[0];
87 
88     /* Initialize state read pointer */
89     px1 = pState;
90 
91     /* Initialize state write pointer */
92     px2 = pState;
93 
94     /* Set accumulator to zero */
95     acc = 0;
96 
97     /* Process sample for first tap */
98     gcurr = *px1++;
99     /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
100     fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk  )) >> 31));
101 
102     /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
103     gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
104 
105     /* write gN-1(n-1) into state for next sample processing */
106     *px2++ = gnext;
107 
108     /* y(n) += gN(n) * vN */
109     acc += ((q63_t) gnext * *pv++);
110 
111     /* Update f values for next coefficient processing */
112     fcurr = fnext;
113 
114 
115 #if defined (ARM_MATH_LOOPUNROLL)
116 
117     /* Loop unrolling: Compute 4 taps at a time. */
118     tapCnt = (numStages - 1U) >> 2U;
119 
120     while (tapCnt > 0U)
121     {
122       /* Process sample for 2nd, 6th ...taps */
123       /* Read gN-2(n-1) from state buffer */
124       gcurr = *px1++;
125       /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
126       fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk  )) >> 31));
127       /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
128       gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
129       /* y(n) += gN-1(n) * vN-1  */
130       /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
131       acc += ((q63_t) gnext * *pv++);
132       /* write gN-1(n) into state for next sample processing */
133       *px2++ = gnext;
134 
135       /* Process sample for 3nd, 7th ...taps */
136       /* Read gN-3(n-1) from state buffer */
137       gcurr = *px1++;
138       /* Process sample for 3rd, 7th .. taps */
139       /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
140       fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk  )) >> 31));
141       /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
142       gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31));
143       /* y(n) += gN-2(n) * vN-2  */
144       /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
145       acc += ((q63_t) gnext * *pv++);
146       /* write gN-2(n) into state for next sample processing */
147       *px2++ = gnext;
148 
149       /* Process sample for 4th, 8th ...taps */
150       /* Read gN-4(n-1) from state buffer */
151       gcurr = *px1++;
152       /* Process sample for 4th, 8th .. taps */
153       /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
154       fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk  )) >> 31));
155       /* gN-3(n) = kN-3 * fN-4(n) + gN-4(n-1) */
156       gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
157       /* y(n) += gN-3(n) * vN-3  */
158       /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
159       acc += ((q63_t) gnext * *pv++);
160       /* write gN-3(n) into state for next sample processing */
161       *px2++ = gnext;
162 
163       /* Process sample for 5th, 9th ...taps */
164       /* Read gN-5(n-1) from state buffer */
165       gcurr = *px1++;
166       /* Process sample for 5th, 9th .. taps */
167       /* fN-5(n) = fN-4(n) - kN-4 * gN-1(n-1) */
168       fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk  )) >> 31));
169       /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
170       gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31));
171       /* y(n) += gN-4(n) * vN-4  */
172       /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
173       acc += ((q63_t) gnext * *pv++);
174 
175       /* write gN-4(n) into state for next sample processing */
176       *px2++ = gnext;
177 
178       /* Decrement loop counter */
179       tapCnt--;
180     }
181 
182     fnext = fcurr;
183 
184     /* Loop unrolling: Compute remaining taps */
185     tapCnt = (numStages - 1U) % 0x4U;
186 
187 #else
188 
189     /* Initialize blkCnt with number of samples */
190     tapCnt = (numStages - 1U);
191 
192 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
193 
194     while (tapCnt > 0U)
195     {
196       gcurr = *px1++;
197       /* Process sample for last taps */
198       fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk  )) >> 31));
199       gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
200 
201       /* Output samples for last taps */
202       acc += ((q63_t) gnext * *pv++);
203       *px2++ = gnext;
204       fcurr = fnext;
205 
206       /* Decrement loop counter */
207       tapCnt--;
208     }
209 
210     /* y(n) += g0(n) * v0 */
211     acc += ((q63_t) fnext * *pv++);
212 
213     *px2++ = fnext;
214 
215     /* write out into pDst */
216     *pDst++ = (q31_t) (acc >> 31U);
217 
218     /* Advance the state pointer by 4 to process the next group of 4 samples */
219     pState = pState + 1U;
220 
221     /* Decrement loop counter */
222     blkCnt--;
223   }
224 
225   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
226      for the preperation of next frame process */
227 
228   /* Points to the start of the state buffer */
229   pStateCur = &S->pState[0];
230   pState = &S->pState[blockSize];
231 
232   /* Copy data */
233 #if defined (ARM_MATH_LOOPUNROLL)
234 
235   /* Loop unrolling: Compute 4 taps at a time. */
236   tapCnt = numStages >> 2U;
237 
238   while (tapCnt > 0U)
239   {
240     *pStateCur++ = *pState++;
241     *pStateCur++ = *pState++;
242     *pStateCur++ = *pState++;
243     *pStateCur++ = *pState++;
244 
245     /* Decrement loop counter */
246     tapCnt--;
247   }
248 
249   /* Loop unrolling: Compute remaining taps */
250   tapCnt = numStages % 0x4U;
251 
252 #else
253 
254   /* Initialize blkCnt with number of samples */
255   tapCnt = (numStages - 1U);
256 
257 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
258 
259   while (tapCnt > 0U)
260   {
261     *pStateCur++ = *pState++;
262 
263     /* Decrement loop counter */
264     tapCnt--;
265   }
266 
267 #else /* #if defined (ARM_MATH_DSP) */
268 
269   /* Sample processing */
270   while (blkCnt > 0U)
271   {
272     /* Read Sample from input buffer */
273     /* fN(n) = x(n) */
274     fcurr = *pSrc++;
275 
276     /* Initialize Ladder coeff pointer */
277     pv = &S->pvCoeffs[0];
278 
279     /* Initialize Reflection coeff pointer */
280     pk = &S->pkCoeffs[0];
281 
282     /* Initialize state read pointer */
283     px1 = pState;
284 
285     /* Initialize state write pointer */
286     px2 = pState;
287 
288     /* Set accumulator to zero */
289     acc = 0;
290 
291     tapCnt = numStages;
292 
293     while (tapCnt > 0U)
294     {
295       gcurr = *px1++;
296       /* Process sample */
297       /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
298       fnext = clip_q63_to_q31(((q63_t) fcurr - ((q31_t) (((q63_t) gcurr * (*pk  )) >> 31))));
299 
300       /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
301       gnext = clip_q63_to_q31(((q63_t) gcurr + ((q31_t) (((q63_t) fnext * (*pk++)) >> 31))));
302 
303       /* Output samples */
304       /* y(n) += gN(n) * vN */
305       acc += ((q63_t) gnext * *pv++);
306 
307       /* write gN-1(n-1) into state for next sample processing */
308       *px2++ = gnext;
309 
310       /* Update f values for next coefficient processing */
311       fcurr = fnext;
312 
313       tapCnt--;
314     }
315 
316     /* y(n) += g0(n) * v0 */
317     acc += ((q63_t) fnext * *pv++);
318 
319     *px2++ = fnext;
320 
321     /* write out into pDst */
322     *pDst++ = (q31_t) (acc >> 31U);
323 
324     /* Advance the state pointer by 1 to process the next group of samples */
325     pState = pState + 1U;
326 
327     /* Decrement loop counter */
328     blkCnt--;
329   }
330 
331   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
332      for the preperation of next frame process */
333 
334   /* Points to the start of the state buffer */
335   pStateCur = &S->pState[0];
336   pState = &S->pState[blockSize];
337 
338   tapCnt = numStages;
339 
340   /* Copy data */
341   while (tapCnt > 0U)
342   {
343     *pStateCur++ = *pState++;
344 
345     /* Decrement loop counter */
346     tapCnt--;
347   }
348 
349 #endif /* #if defined (ARM_MATH_DSP) */
350 
351 }
352 
353 /**
354   @} end of IIR_Lattice group
355  */
356