1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_iir_lattice_q31.c
4 * Description: Q31 IIR Lattice filter processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup IIR_Lattice
37 @{
38 */
39
40 /**
41 @brief Processing function for the Q31 IIR lattice filter.
42 @param[in] S points to an instance of the Q31 IIR lattice structure
43 @param[in] pSrc points to the block of input data
44 @param[out] pDst points to the block of output data
45 @param[in] blockSize number of samples to process
46
47 @par Scaling and Overflow Behavior
48 The function is implemented using an internal 64-bit accumulator.
49 The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
50 Thus, if the accumulator result overflows it wraps around rather than clip.
51 In order to avoid overflows completely the input signal must be scaled down by 2*log2(numStages) bits.
52 After all multiply-accumulates are performed, the 2.62 accumulator is saturated to 1.32 format and then truncated to 1.31 format.
53 */
54
arm_iir_lattice_q31(const arm_iir_lattice_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)55 void arm_iir_lattice_q31(
56 const arm_iir_lattice_instance_q31 * S,
57 const q31_t * pSrc,
58 q31_t * pDst,
59 uint32_t blockSize)
60 {
61 q31_t *pState = S->pState; /* State pointer */
62 q31_t *pStateCur; /* State current pointer */
63 q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */
64 q63_t acc; /* Accumlator */
65 q31_t *px1, *px2, *pk, *pv; /* Temporary pointers for state and coef */
66 uint32_t numStages = S->numStages; /* Number of stages */
67 uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
68
69
70 /* initialise loop count */
71 blkCnt = blockSize;
72
73 #if defined (ARM_MATH_DSP)
74
75 /* Sample processing */
76 while (blkCnt > 0U)
77 {
78 /* Read Sample from input buffer */
79 /* fN(n) = x(n) */
80 fcurr = *pSrc++;
81
82 /* Initialize Ladder coeff pointer */
83 pv = &S->pvCoeffs[0];
84
85 /* Initialize Reflection coeff pointer */
86 pk = &S->pkCoeffs[0];
87
88 /* Initialize state read pointer */
89 px1 = pState;
90
91 /* Initialize state write pointer */
92 px2 = pState;
93
94 /* Set accumulator to zero */
95 acc = 0;
96
97 /* Process sample for first tap */
98 gcurr = *px1++;
99 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
100 fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk )) >> 31));
101
102 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
103 gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
104
105 /* write gN-1(n-1) into state for next sample processing */
106 *px2++ = gnext;
107
108 /* y(n) += gN(n) * vN */
109 acc += ((q63_t) gnext * *pv++);
110
111 /* Update f values for next coefficient processing */
112 fcurr = fnext;
113
114
115 #if defined (ARM_MATH_LOOPUNROLL)
116
117 /* Loop unrolling: Compute 4 taps at a time. */
118 tapCnt = (numStages - 1U) >> 2U;
119
120 while (tapCnt > 0U)
121 {
122 /* Process sample for 2nd, 6th ...taps */
123 /* Read gN-2(n-1) from state buffer */
124 gcurr = *px1++;
125 /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
126 fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk )) >> 31));
127 /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
128 gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
129 /* y(n) += gN-1(n) * vN-1 */
130 /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
131 acc += ((q63_t) gnext * *pv++);
132 /* write gN-1(n) into state for next sample processing */
133 *px2++ = gnext;
134
135 /* Process sample for 3nd, 7th ...taps */
136 /* Read gN-3(n-1) from state buffer */
137 gcurr = *px1++;
138 /* Process sample for 3rd, 7th .. taps */
139 /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
140 fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk )) >> 31));
141 /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
142 gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31));
143 /* y(n) += gN-2(n) * vN-2 */
144 /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
145 acc += ((q63_t) gnext * *pv++);
146 /* write gN-2(n) into state for next sample processing */
147 *px2++ = gnext;
148
149 /* Process sample for 4th, 8th ...taps */
150 /* Read gN-4(n-1) from state buffer */
151 gcurr = *px1++;
152 /* Process sample for 4th, 8th .. taps */
153 /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
154 fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk )) >> 31));
155 /* gN-3(n) = kN-3 * fN-4(n) + gN-4(n-1) */
156 gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
157 /* y(n) += gN-3(n) * vN-3 */
158 /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
159 acc += ((q63_t) gnext * *pv++);
160 /* write gN-3(n) into state for next sample processing */
161 *px2++ = gnext;
162
163 /* Process sample for 5th, 9th ...taps */
164 /* Read gN-5(n-1) from state buffer */
165 gcurr = *px1++;
166 /* Process sample for 5th, 9th .. taps */
167 /* fN-5(n) = fN-4(n) - kN-4 * gN-1(n-1) */
168 fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk )) >> 31));
169 /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
170 gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31));
171 /* y(n) += gN-4(n) * vN-4 */
172 /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
173 acc += ((q63_t) gnext * *pv++);
174
175 /* write gN-4(n) into state for next sample processing */
176 *px2++ = gnext;
177
178 /* Decrement loop counter */
179 tapCnt--;
180 }
181
182 fnext = fcurr;
183
184 /* Loop unrolling: Compute remaining taps */
185 tapCnt = (numStages - 1U) % 0x4U;
186
187 #else
188
189 /* Initialize blkCnt with number of samples */
190 tapCnt = (numStages - 1U);
191
192 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
193
194 while (tapCnt > 0U)
195 {
196 gcurr = *px1++;
197 /* Process sample for last taps */
198 fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk )) >> 31));
199 gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
200
201 /* Output samples for last taps */
202 acc += ((q63_t) gnext * *pv++);
203 *px2++ = gnext;
204 fcurr = fnext;
205
206 /* Decrement loop counter */
207 tapCnt--;
208 }
209
210 /* y(n) += g0(n) * v0 */
211 acc += ((q63_t) fnext * *pv++);
212
213 *px2++ = fnext;
214
215 /* write out into pDst */
216 *pDst++ = (q31_t) (acc >> 31U);
217
218 /* Advance the state pointer by 4 to process the next group of 4 samples */
219 pState = pState + 1U;
220
221 /* Decrement loop counter */
222 blkCnt--;
223 }
224
225 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
226 for the preperation of next frame process */
227
228 /* Points to the start of the state buffer */
229 pStateCur = &S->pState[0];
230 pState = &S->pState[blockSize];
231
232 /* Copy data */
233 #if defined (ARM_MATH_LOOPUNROLL)
234
235 /* Loop unrolling: Compute 4 taps at a time. */
236 tapCnt = numStages >> 2U;
237
238 while (tapCnt > 0U)
239 {
240 *pStateCur++ = *pState++;
241 *pStateCur++ = *pState++;
242 *pStateCur++ = *pState++;
243 *pStateCur++ = *pState++;
244
245 /* Decrement loop counter */
246 tapCnt--;
247 }
248
249 /* Loop unrolling: Compute remaining taps */
250 tapCnt = numStages % 0x4U;
251
252 #else
253
254 /* Initialize blkCnt with number of samples */
255 tapCnt = (numStages - 1U);
256
257 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
258
259 while (tapCnt > 0U)
260 {
261 *pStateCur++ = *pState++;
262
263 /* Decrement loop counter */
264 tapCnt--;
265 }
266
267 #else /* #if defined (ARM_MATH_DSP) */
268
269 /* Sample processing */
270 while (blkCnt > 0U)
271 {
272 /* Read Sample from input buffer */
273 /* fN(n) = x(n) */
274 fcurr = *pSrc++;
275
276 /* Initialize Ladder coeff pointer */
277 pv = &S->pvCoeffs[0];
278
279 /* Initialize Reflection coeff pointer */
280 pk = &S->pkCoeffs[0];
281
282 /* Initialize state read pointer */
283 px1 = pState;
284
285 /* Initialize state write pointer */
286 px2 = pState;
287
288 /* Set accumulator to zero */
289 acc = 0;
290
291 tapCnt = numStages;
292
293 while (tapCnt > 0U)
294 {
295 gcurr = *px1++;
296 /* Process sample */
297 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
298 fnext = clip_q63_to_q31(((q63_t) fcurr - ((q31_t) (((q63_t) gcurr * (*pk )) >> 31))));
299
300 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
301 gnext = clip_q63_to_q31(((q63_t) gcurr + ((q31_t) (((q63_t) fnext * (*pk++)) >> 31))));
302
303 /* Output samples */
304 /* y(n) += gN(n) * vN */
305 acc += ((q63_t) gnext * *pv++);
306
307 /* write gN-1(n-1) into state for next sample processing */
308 *px2++ = gnext;
309
310 /* Update f values for next coefficient processing */
311 fcurr = fnext;
312
313 tapCnt--;
314 }
315
316 /* y(n) += g0(n) * v0 */
317 acc += ((q63_t) fnext * *pv++);
318
319 *px2++ = fnext;
320
321 /* write out into pDst */
322 *pDst++ = (q31_t) (acc >> 31U);
323
324 /* Advance the state pointer by 1 to process the next group of samples */
325 pState = pState + 1U;
326
327 /* Decrement loop counter */
328 blkCnt--;
329 }
330
331 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
332 for the preperation of next frame process */
333
334 /* Points to the start of the state buffer */
335 pStateCur = &S->pState[0];
336 pState = &S->pState[blockSize];
337
338 tapCnt = numStages;
339
340 /* Copy data */
341 while (tapCnt > 0U)
342 {
343 *pStateCur++ = *pState++;
344
345 /* Decrement loop counter */
346 tapCnt--;
347 }
348
349 #endif /* #if defined (ARM_MATH_DSP) */
350
351 }
352
353 /**
354 @} end of IIR_Lattice group
355 */
356