1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_iir_lattice_q15.c
4 * Description: Q15 IIR Lattice filter processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup IIR_Lattice
37 @{
38 */
39
40 /**
41 @brief Processing function for the Q15 IIR lattice filter.
42 @param[in] S points to an instance of the Q15 IIR lattice structure
43 @param[in] pSrc points to the block of input data
44 @param[out] pDst points to the block of output data
45 @param[in] blockSize number of samples to process
46
47 @par Scaling and Overflow Behavior
48 The function is implemented using an internal 64-bit accumulator.
49 Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
50 The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
51 There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
52 After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
53 Lastly, the accumulator is saturated to yield a result in 1.15 format.
54 */
55
arm_iir_lattice_q15(const arm_iir_lattice_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)56 void arm_iir_lattice_q15(
57 const arm_iir_lattice_instance_q15 * S,
58 const q15_t * pSrc,
59 q15_t * pDst,
60 uint32_t blockSize)
61 {
62 q15_t *pState = S->pState; /* State pointer */
63 q15_t *pStateCur; /* State current pointer */
64 q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */
65 q63_t acc; /* Accumlator */
66 q15_t *px1, *px2, *pk, *pv; /* Temporary pointers for state and coef */
67 uint32_t numStages = S->numStages; /* Number of stages */
68 uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
69 q15_t out; /* Temporary variable for output */
70
71 #if defined (ARM_MATH_DSP) && defined (ARM_MATH_LOOPUNROLL)
72 q15_t gnext1, gnext2; /* Temporary variables for lattice stages */
73 q31_t v; /* Temporary variable for ladder coefficient */
74 #endif
75
76 /* initialise loop count */
77 blkCnt = blockSize;
78
79 #if defined (ARM_MATH_DSP)
80
81 /* Sample processing */
82 while (blkCnt > 0U)
83 {
84 /* Read Sample from input buffer */
85 /* fN(n) = x(n) */
86 fcurr = *pSrc++;
87
88 /* Initialize Ladder coeff pointer */
89 pv = &S->pvCoeffs[0];
90
91 /* Initialize Reflection coeff pointer */
92 pk = &S->pkCoeffs[0];
93
94 /* Initialize state read pointer */
95 px1 = pState;
96
97 /* Initialize state write pointer */
98 px2 = pState;
99
100 /* Set accumulator to zero */
101 acc = 0;
102
103 /* Process sample for first tap */
104 gcurr = *px1++;
105 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
106 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
107 fnext = __SSAT(fnext, 16);
108
109 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
110 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
111 gnext = __SSAT(gnext, 16);
112
113 /* write gN(n) into state for next sample processing */
114 *px2++ = (q15_t) gnext;
115
116 /* y(n) += gN(n) * vN */
117 acc += (q31_t) ((gnext * (*pv++)));
118
119 /* Update f values for next coefficient processing */
120 fcurr = fnext;
121
122
123 #if defined (ARM_MATH_LOOPUNROLL)
124
125 /* Loop unrolling: Compute 4 taps at a time. */
126 tapCnt = (numStages - 1U) >> 2U;
127
128 while (tapCnt > 0U)
129 {
130 /* Process sample for 2nd, 6th ...taps */
131 /* Read gN-2(n-1) from state buffer */
132 gcurr = *px1++;
133 /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
134 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
135 fnext = __SSAT(fnext, 16);
136 /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
137 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
138 gnext1 = (q15_t) __SSAT(gnext, 16);
139 /* write gN-1(n) into state for next sample processing */
140 *px2++ = (q15_t) gnext1;
141
142 /* Process sample for 3nd, 7th ...taps */
143 /* Read gN-3(n-1) from state buffer */
144 gcurr = *px1++;
145 /* Process sample for 3rd, 7th .. taps */
146 /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
147 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
148 fcurr = __SSAT(fcurr, 16);
149 /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
150 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
151 gnext2 = (q15_t) __SSAT(gnext, 16);
152 /* write gN-2(n) into state */
153 *px2++ = (q15_t) gnext2;
154
155 /* Read vN-1 and vN-2 at a time */
156 v = read_q15x2_ia (&pv);
157
158 /* Pack gN-1(n) and gN-2(n) */
159
160 #ifndef ARM_MATH_BIG_ENDIAN
161 gnext = __PKHBT(gnext1, gnext2, 16);
162 #else
163 gnext = __PKHBT(gnext2, gnext1, 16);
164 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
165
166 /* y(n) += gN-1(n) * vN-1 */
167 /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
168 /* y(n) += gN-2(n) * vN-2 */
169 /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
170 acc = __SMLALD(gnext, v, acc);
171
172 /* Process sample for 4th, 8th ...taps */
173 /* Read gN-4(n-1) from state buffer */
174 gcurr = *px1++;
175 /* Process sample for 4th, 8th .. taps */
176 /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
177 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
178 fnext = __SSAT(fnext, 16);
179 /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
180 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
181 gnext1 = (q15_t) __SSAT(gnext, 16);
182 /* write gN-3(n) for the next sample process */
183 *px2++ = (q15_t) gnext1;
184
185 /* Process sample for 5th, 9th ...taps */
186 /* Read gN-5(n-1) from state buffer */
187 gcurr = *px1++;
188 /* Process sample for 5th, 9th .. taps */
189 /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
190 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
191 fcurr = __SSAT(fcurr, 16);
192 /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
193 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
194 gnext2 = (q15_t) __SSAT(gnext, 16);
195 /* write gN-4(n) for the next sample process */
196 *px2++ = (q15_t) gnext2;
197
198 /* Read vN-3 and vN-4 at a time */
199 v = read_q15x2_ia (&pv);
200
201 /* Pack gN-3(n) and gN-4(n) */
202 #ifndef ARM_MATH_BIG_ENDIAN
203 gnext = __PKHBT(gnext1, gnext2, 16);
204 #else
205 gnext = __PKHBT(gnext2, gnext1, 16);
206 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
207
208 /* y(n) += gN-4(n) * vN-4 */
209 /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
210 /* y(n) += gN-3(n) * vN-3 */
211 /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
212 acc = __SMLALD(gnext, v, acc);
213
214 /* Decrement loop counter */
215 tapCnt--;
216 }
217
218 fnext = fcurr;
219
220 /* Loop unrolling: Compute remaining taps */
221 tapCnt = (numStages - 1U) % 0x4U;
222
223 #else
224
225 /* Initialize blkCnt with number of samples */
226 tapCnt = (numStages - 1U);
227
228 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
229
230 while (tapCnt > 0U)
231 {
232 gcurr = *px1++;
233 /* Process sample for last taps */
234 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
235 fnext = __SSAT(fnext, 16);
236 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
237 gnext = __SSAT(gnext, 16);
238
239 /* Output samples for last taps */
240 acc += (q31_t) (((q31_t) gnext * (*pv++)));
241 *px2++ = (q15_t) gnext;
242 fcurr = fnext;
243
244 /* Decrement loop counter */
245 tapCnt--;
246 }
247
248 /* y(n) += g0(n) * v0 */
249 acc += (q31_t) (((q31_t) fnext * (*pv++)));
250
251 out = (q15_t) __SSAT(acc >> 15, 16);
252 *px2++ = (q15_t) fnext;
253
254 /* write out into pDst */
255 *pDst++ = out;
256
257 /* Advance the state pointer by 4 to process the next group of 4 samples */
258 pState = pState + 1U;
259
260 /* Decrement loop counter */
261 blkCnt--;
262 }
263
264 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
265 for the preperation of next frame process */
266
267 /* Points to the start of the state buffer */
268 pStateCur = &S->pState[0];
269 pState = &S->pState[blockSize];
270
271 /* copy data */
272 #if defined (ARM_MATH_LOOPUNROLL)
273
274 /* Loop unrolling: Compute 4 taps at a time. */
275 tapCnt = numStages >> 2U;
276
277 while (tapCnt > 0U)
278 {
279 write_q15x2_ia (&pStateCur, read_q15x2_ia (&pState));
280 write_q15x2_ia (&pStateCur, read_q15x2_ia (&pState));
281
282 /* Decrement loop counter */
283 tapCnt--;
284 }
285
286 /* Loop unrolling: Compute remaining taps */
287 tapCnt = numStages % 0x4U;
288
289 #else
290
291 /* Initialize blkCnt with number of samples */
292 tapCnt = (numStages - 1U);
293
294 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
295
296 while (tapCnt > 0U)
297 {
298 *pStateCur++ = *pState++;
299
300 /* Decrement loop counter */
301 tapCnt--;
302 }
303
304 #else /* #if defined (ARM_MATH_DSP) */
305
306 /* Sample processing */
307 while (blkCnt > 0U)
308 {
309 /* Read Sample from input buffer */
310 /* fN(n) = x(n) */
311 fcurr = *pSrc++;
312
313 /* Initialize Ladder coeff pointer */
314 pv = &S->pvCoeffs[0];
315
316 /* Initialize Reflection coeff pointer */
317 pk = &S->pkCoeffs[0];
318
319 /* Initialize state read pointer */
320 px1 = pState;
321
322 /* Initialize state write pointer */
323 px2 = pState;
324
325 /* Set accumulator to zero */
326 acc = 0;
327
328 tapCnt = numStages;
329
330 while (tapCnt > 0U)
331 {
332 gcurr = *px1++;
333 /* Process sample */
334 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
335 fnext = fcurr - ((gcurr * (*pk)) >> 15);
336 fnext = __SSAT(fnext, 16);
337
338 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
339 gnext = ((fnext * (*pk++)) >> 15) + gcurr;
340 gnext = __SSAT(gnext, 16);
341
342 /* Output samples */
343 /* y(n) += gN(n) * vN */
344 acc += (q31_t) ((gnext * (*pv++)));
345
346 /* write gN(n) into state for next sample processing */
347 *px2++ = (q15_t) gnext;
348
349 /* Update f values for next coefficient processing */
350 fcurr = fnext;
351
352 tapCnt--;
353 }
354
355 /* y(n) += g0(n) * v0 */
356 acc += (q31_t) ((fnext * (*pv++)));
357
358 out = (q15_t) __SSAT(acc >> 15, 16);
359 *px2++ = (q15_t) fnext;
360
361 /* write out into pDst */
362 *pDst++ = out;
363
364 /* Advance the state pointer by 1 to process the next group of samples */
365 pState = pState + 1U;
366
367 /* Decrement loop counter */
368 blkCnt--;
369 }
370
371 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
372 for the preperation of next frame process */
373
374 /* Points to the start of the state buffer */
375 pStateCur = &S->pState[0];
376 pState = &S->pState[blockSize];
377
378 tapCnt = numStages;
379
380 /* Copy data */
381 while (tapCnt > 0U)
382 {
383 *pStateCur++ = *pState++;
384
385 /* Decrement loop counter */
386 tapCnt--;
387 }
388
389 #endif /* #if defined (ARM_MATH_DSP) */
390
391 }
392
393 /**
394 @} end of IIR_Lattice group
395 */
396