1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_iir_lattice_q15.c
4  * Description:  Q15 IIR Lattice filter processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @addtogroup IIR_Lattice
37   @{
38  */
39 
40 /**
41   @brief         Processing function for the Q15 IIR lattice filter.
42   @param[in]     S          points to an instance of the Q15 IIR lattice structure
43   @param[in]     pSrc       points to the block of input data
44   @param[out]    pDst       points to the block of output data
45   @param[in]     blockSize  number of samples to process
46 
47   @par           Scaling and Overflow Behavior
48                    The function is implemented using an internal 64-bit accumulator.
49                    Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
50                    The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
51                    There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
52                    After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
53                    Lastly, the accumulator is saturated to yield a result in 1.15 format.
54  */
55 
arm_iir_lattice_q15(const arm_iir_lattice_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)56 void arm_iir_lattice_q15(
57   const arm_iir_lattice_instance_q15 * S,
58   const q15_t * pSrc,
59         q15_t * pDst,
60         uint32_t blockSize)
61 {
62         q15_t *pState = S->pState;                     /* State pointer */
63         q15_t *pStateCur;                              /* State current pointer */
64         q31_t fcurr, fnext = 0, gcurr = 0, gnext;      /* Temporary variables for lattice stages */
65         q63_t acc;                                     /* Accumlator */
66         q15_t *px1, *px2, *pk, *pv;                    /* Temporary pointers for state and coef */
67         uint32_t numStages = S->numStages;             /* Number of stages */
68         uint32_t blkCnt, tapCnt;                       /* Temporary variables for counts */
69         q15_t out;                                     /* Temporary variable for output */
70 
71 #if defined (ARM_MATH_DSP) && defined (ARM_MATH_LOOPUNROLL)
72         q15_t gnext1, gnext2;                          /* Temporary variables for lattice stages */
73         q31_t v;                                       /* Temporary variable for ladder coefficient */
74 #endif
75 
76   /* initialise loop count */
77   blkCnt = blockSize;
78 
79 #if defined (ARM_MATH_DSP)
80 
81   /* Sample processing */
82   while (blkCnt > 0U)
83   {
84     /* Read Sample from input buffer */
85     /* fN(n) = x(n) */
86     fcurr = *pSrc++;
87 
88     /* Initialize Ladder coeff pointer */
89     pv = &S->pvCoeffs[0];
90 
91     /* Initialize Reflection coeff pointer */
92     pk = &S->pkCoeffs[0];
93 
94     /* Initialize state read pointer */
95     px1 = pState;
96 
97     /* Initialize state write pointer */
98     px2 = pState;
99 
100     /* Set accumulator to zero */
101     acc = 0;
102 
103     /* Process sample for first tap */
104     gcurr = *px1++;
105     /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
106     fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
107     fnext = __SSAT(fnext, 16);
108 
109     /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
110     gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
111     gnext = __SSAT(gnext, 16);
112 
113     /* write gN(n) into state for next sample processing */
114     *px2++ = (q15_t) gnext;
115 
116     /* y(n) += gN(n) * vN */
117     acc += (q31_t) ((gnext * (*pv++)));
118 
119     /* Update f values for next coefficient processing */
120     fcurr = fnext;
121 
122 
123 #if defined (ARM_MATH_LOOPUNROLL)
124 
125     /* Loop unrolling: Compute 4 taps at a time. */
126     tapCnt = (numStages - 1U) >> 2U;
127 
128     while (tapCnt > 0U)
129     {
130       /* Process sample for 2nd, 6th ...taps */
131       /* Read gN-2(n-1) from state buffer */
132       gcurr = *px1++;
133       /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
134       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
135       fnext = __SSAT(fnext, 16);
136       /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
137       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
138       gnext1 = (q15_t) __SSAT(gnext, 16);
139       /* write gN-1(n) into state for next sample processing */
140       *px2++ = (q15_t) gnext1;
141 
142       /* Process sample for 3nd, 7th ...taps */
143       /* Read gN-3(n-1) from state buffer */
144       gcurr = *px1++;
145       /* Process sample for 3rd, 7th .. taps */
146       /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
147       fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
148       fcurr = __SSAT(fcurr, 16);
149       /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
150       gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
151       gnext2 = (q15_t) __SSAT(gnext, 16);
152       /* write gN-2(n) into state */
153       *px2++ = (q15_t) gnext2;
154 
155       /* Read vN-1 and vN-2 at a time */
156       v = read_q15x2_ia (&pv);
157 
158       /* Pack gN-1(n) and gN-2(n) */
159 
160 #ifndef  ARM_MATH_BIG_ENDIAN
161       gnext = __PKHBT(gnext1, gnext2, 16);
162 #else
163       gnext = __PKHBT(gnext2, gnext1, 16);
164 #endif /* #ifndef  ARM_MATH_BIG_ENDIAN */
165 
166       /* y(n) += gN-1(n) * vN-1  */
167       /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
168       /* y(n) += gN-2(n) * vN-2  */
169       /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
170       acc = __SMLALD(gnext, v, acc);
171 
172       /* Process sample for 4th, 8th ...taps */
173       /* Read gN-4(n-1) from state buffer */
174       gcurr = *px1++;
175       /* Process sample for 4th, 8th .. taps */
176       /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
177       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
178       fnext = __SSAT(fnext, 16);
179       /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
180       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
181       gnext1 = (q15_t) __SSAT(gnext, 16);
182       /* write  gN-3(n) for the next sample process */
183       *px2++ = (q15_t) gnext1;
184 
185       /* Process sample for 5th, 9th ...taps */
186       /* Read gN-5(n-1) from state buffer */
187       gcurr = *px1++;
188       /* Process sample for 5th, 9th .. taps */
189       /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
190       fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
191       fcurr = __SSAT(fcurr, 16);
192       /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
193       gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
194       gnext2 = (q15_t) __SSAT(gnext, 16);
195       /* write      gN-4(n) for the next sample process */
196       *px2++ = (q15_t) gnext2;
197 
198       /* Read vN-3 and vN-4 at a time */
199       v = read_q15x2_ia (&pv);
200 
201       /* Pack gN-3(n) and gN-4(n) */
202 #ifndef ARM_MATH_BIG_ENDIAN
203       gnext = __PKHBT(gnext1, gnext2, 16);
204 #else
205       gnext = __PKHBT(gnext2, gnext1, 16);
206 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
207 
208       /* y(n) += gN-4(n) * vN-4  */
209       /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
210       /* y(n) += gN-3(n) * vN-3  */
211       /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
212       acc = __SMLALD(gnext, v, acc);
213 
214       /* Decrement loop counter */
215       tapCnt--;
216     }
217 
218     fnext = fcurr;
219 
220     /* Loop unrolling: Compute remaining taps */
221     tapCnt = (numStages - 1U) % 0x4U;
222 
223 #else
224 
225     /* Initialize blkCnt with number of samples */
226     tapCnt = (numStages - 1U);
227 
228 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
229 
230     while (tapCnt > 0U)
231     {
232       gcurr = *px1++;
233       /* Process sample for last taps */
234       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
235       fnext = __SSAT(fnext, 16);
236       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
237       gnext = __SSAT(gnext, 16);
238 
239       /* Output samples for last taps */
240       acc += (q31_t) (((q31_t) gnext * (*pv++)));
241       *px2++ = (q15_t) gnext;
242       fcurr = fnext;
243 
244       /* Decrement loop counter */
245       tapCnt--;
246     }
247 
248     /* y(n) += g0(n) * v0 */
249     acc += (q31_t) (((q31_t) fnext * (*pv++)));
250 
251     out = (q15_t) __SSAT(acc >> 15, 16);
252     *px2++ = (q15_t) fnext;
253 
254     /* write out into pDst */
255     *pDst++ = out;
256 
257     /* Advance the state pointer by 4 to process the next group of 4 samples */
258     pState = pState + 1U;
259 
260     /* Decrement loop counter */
261     blkCnt--;
262   }
263 
264   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
265      for the preperation of next frame process */
266 
267   /* Points to the start of the state buffer */
268   pStateCur = &S->pState[0];
269   pState = &S->pState[blockSize];
270 
271   /* copy data */
272 #if defined (ARM_MATH_LOOPUNROLL)
273 
274   /* Loop unrolling: Compute 4 taps at a time. */
275   tapCnt = numStages >> 2U;
276 
277   while (tapCnt > 0U)
278   {
279     write_q15x2_ia (&pStateCur, read_q15x2_ia (&pState));
280     write_q15x2_ia (&pStateCur, read_q15x2_ia (&pState));
281 
282     /* Decrement loop counter */
283     tapCnt--;
284   }
285 
286   /* Loop unrolling: Compute remaining taps */
287   tapCnt = numStages % 0x4U;
288 
289 #else
290 
291   /* Initialize blkCnt with number of samples */
292   tapCnt = (numStages - 1U);
293 
294 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
295 
296   while (tapCnt > 0U)
297   {
298     *pStateCur++ = *pState++;
299 
300     /* Decrement loop counter */
301     tapCnt--;
302   }
303 
304 #else /* #if defined (ARM_MATH_DSP) */
305 
306   /* Sample processing */
307   while (blkCnt > 0U)
308   {
309     /* Read Sample from input buffer */
310     /* fN(n) = x(n) */
311     fcurr = *pSrc++;
312 
313     /* Initialize Ladder coeff pointer */
314     pv = &S->pvCoeffs[0];
315 
316     /* Initialize Reflection coeff pointer */
317     pk = &S->pkCoeffs[0];
318 
319     /* Initialize state read pointer */
320     px1 = pState;
321 
322     /* Initialize state write pointer */
323     px2 = pState;
324 
325     /* Set accumulator to zero */
326     acc = 0;
327 
328     tapCnt = numStages;
329 
330     while (tapCnt > 0U)
331     {
332       gcurr = *px1++;
333       /* Process sample */
334       /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
335       fnext = fcurr - ((gcurr * (*pk)) >> 15);
336       fnext = __SSAT(fnext, 16);
337 
338       /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
339       gnext = ((fnext * (*pk++)) >> 15) + gcurr;
340       gnext = __SSAT(gnext, 16);
341 
342       /* Output samples */
343       /* y(n) += gN(n) * vN */
344       acc += (q31_t) ((gnext * (*pv++)));
345 
346       /* write gN(n) into state for next sample processing */
347       *px2++ = (q15_t) gnext;
348 
349       /* Update f values for next coefficient processing */
350       fcurr = fnext;
351 
352       tapCnt--;
353     }
354 
355     /* y(n) += g0(n) * v0 */
356     acc += (q31_t) ((fnext * (*pv++)));
357 
358     out = (q15_t) __SSAT(acc >> 15, 16);
359     *px2++ = (q15_t) fnext;
360 
361     /* write out into pDst */
362     *pDst++ = out;
363 
364     /* Advance the state pointer by 1 to process the next group of samples */
365     pState = pState + 1U;
366 
367     /* Decrement loop counter */
368     blkCnt--;
369   }
370 
371   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
372      for the preperation of next frame process */
373 
374   /* Points to the start of the state buffer */
375   pStateCur = &S->pState[0];
376   pState = &S->pState[blockSize];
377 
378   tapCnt = numStages;
379 
380   /* Copy data */
381   while (tapCnt > 0U)
382   {
383     *pStateCur++ = *pState++;
384 
385     /* Decrement loop counter */
386     tapCnt--;
387   }
388 
389 #endif /* #if defined (ARM_MATH_DSP) */
390 
391 }
392 
393 /**
394   @} end of IIR_Lattice group
395  */
396