1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_fir_lattice_f32.c
4  * Description:  Processing function for floating-point FIR Lattice filter
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @defgroup FIR_Lattice Finite Impulse Response (FIR) Lattice Filters
37 
38   @deprecated Those functions are no more tested nor maintained and will be removed in
39               a future version.
40 
41   This set of functions implements Finite Impulse Response (FIR) lattice filters
42   for Q15, Q31 and floating-point data types.  Lattice filters are used in a
43   variety of adaptive filter applications. The filter structure is feedforward and
44   the net impulse response is finite length.
45   The functions operate on blocks
46   of input and output data and each call to the function processes
47   <code>blockSize</code> samples through the filter.  <code>pSrc</code> and
48   <code>pDst</code> point to input and output arrays containing <code>blockSize</code> values.
49 
50   @par           Algorithm
51                    \image html FIRLattice.gif "Finite Impulse Response Lattice filter"
52                    The following difference equation is implemented:
53   @par
54   <pre>
55       f0[n] = g0[n] = x[n]
56       fm[n] = fm-1[n] + km * gm-1[n-1] for m = 1, 2, ...M
57       gm[n] = km * fm-1[n] + gm-1[n-1] for m = 1, 2, ...M
58       y[n] = fM[n]
59   </pre>
60   @par
61                    <code>pCoeffs</code> points to tha array of reflection coefficients of size <code>numStages</code>.
62                    Reflection Coefficients are stored in the following order.
63   @par
64   <pre>
65       {k1, k2, ..., kM}
66   </pre>
67                    where M is number of stages
68   @par
69                    <code>pState</code> points to a state array of size <code>numStages</code>.
70                    The state variables (g values) hold previous inputs and are stored in the following order.
71   <pre>
72     {g0[n], g1[n], g2[n] ...gM-1[n]}
73   </pre>
74                    The state variables are updated after each block of data is processed; the coefficients are untouched.
75 
76   @par           Instance Structure
77                    The coefficients and state variables for a filter are stored together in an instance data structure.
78                    A separate instance structure must be defined for each filter.
79                    Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
80                    There are separate instance structure declarations for each of the 3 supported data types.
81 
82   @par           Initialization Functions
83                    There is also an associated initialization function for each data type.
84                    The initialization function performs the following operations:
85                    - Sets the values of the internal structure fields.
86                    - Zeros out the values in the state buffer.
87                    To do this manually without calling the init function, assign the follow subfields of the instance structure:
88                    numStages, pCoeffs, pState. Also set all of the values in pState to zero.
89   @par
90                    Use of the initialization function is optional.
91                    However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
92                    To place an instance structure into a const data section, the instance structure must be manually initialized.
93                    Set the values in the state buffer to zeros and then manually initialize the instance structure as follows:
94   <pre>
95       arm_fir_lattice_instance_f32 S = {numStages, pState, pCoeffs};
96       arm_fir_lattice_instance_q31 S = {numStages, pState, pCoeffs};
97       arm_fir_lattice_instance_q15 S = {numStages, pState, pCoeffs};
98   </pre>
99   @par
100                    where <code>numStages</code> is the number of stages in the filter;
101                    <code>pState</code> is the address of the state buffer;
102                    <code>pCoeffs</code> is the address of the coefficient buffer.
103 
104   @par           Fixed-Point Behavior
105                    Care must be taken when using the fixed-point versions of the FIR Lattice filter functions.
106                    In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
107                    Refer to the function specific documentation below for usage guidelines.
108  */
109 
110 /**
111   @addtogroup FIR_Lattice
112   @{
113  */
114 
115 /**
116   @brief         Processing function for the floating-point FIR lattice filter.
117   @param[in]     S          points to an instance of the floating-point FIR lattice structure
118   @param[in]     pSrc       points to the block of input data
119   @param[out]    pDst       points to the block of output data
120   @param[in]     blockSize  number of samples to process
121  */
122 
arm_fir_lattice_f32(const arm_fir_lattice_instance_f32 * S,const float32_t * pSrc,float32_t * pDst,uint32_t blockSize)123 void arm_fir_lattice_f32(
124   const arm_fir_lattice_instance_f32 * S,
125   const float32_t * pSrc,
126         float32_t * pDst,
127         uint32_t blockSize)
128 {
129         float32_t *pState = S->pState;                 /* State pointer */
130   const float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
131         float32_t *px;                                 /* Temporary state pointer */
132   const float32_t *pk;                                 /* Temporary coefficient pointer */
133         uint32_t numStages = S->numStages;             /* Number of stages in the filter */
134         uint32_t blkCnt, stageCnt;                     /* Loop counters */
135         float32_t fcurr0, fnext0, gnext0, gcurr0;      /* Temporary variables */
136 
137 #if defined (ARM_MATH_LOOPUNROLL)
138         float32_t fcurr1, fnext1, gnext1;              /* Temporary variables for second sample in loop unrolling */
139         float32_t fcurr2, fnext2, gnext2;              /* Temporary variables for third sample in loop unrolling */
140         float32_t fcurr3, fnext3, gnext3;              /* Temporary variables for fourth sample in loop unrolling */
141 #endif
142 
143   gcurr0 = 0.0f;
144 
145 #if defined (ARM_MATH_LOOPUNROLL)
146 
147   /* Loop unrolling: Compute 4 outputs at a time */
148   blkCnt = blockSize >> 2U;
149 
150   while (blkCnt > 0U)
151   {
152     /* Read two samples from input buffer */
153     /* f0(n) = x(n) */
154     fcurr0 = *pSrc++;
155     fcurr1 = *pSrc++;
156 
157     /* Initialize state pointer */
158     px = pState;
159 
160     /* Initialize coeff pointer */
161     pk = pCoeffs;
162 
163     /* Read g0(n-1) from state buffer */
164     gcurr0 = *px;
165 
166     /* Process first sample for first tap */
167     /* f1(n) = f0(n) +  K1 * g0(n-1) */
168     fnext0 = (gcurr0 * (*pk)) + fcurr0;
169 
170     /* g1(n) = f0(n) * K1  +  g0(n-1) */
171     gnext0 = (fcurr0 * (*pk)) + gcurr0;
172 
173     /* Process second sample for first tap */
174     fnext1 = (fcurr0 * (*pk)) + fcurr1;
175     gnext1 = (fcurr1 * (*pk)) + fcurr0;
176 
177     /* Read next two samples from input buffer */
178     /* f0(n+2) = x(n+2) */
179     fcurr2 = *pSrc++;
180     fcurr3 = *pSrc++;
181 
182     /* Process third sample for first tap */
183     fnext2 = (fcurr1 * (*pk)) + fcurr2;
184     gnext2 = (fcurr2 * (*pk)) + fcurr1;
185 
186     /* Process fourth sample for first tap */
187     fnext3 = (fcurr2 * (*pk  )) + fcurr3;
188     gnext3 = (fcurr3 * (*pk++)) + fcurr2;
189 
190     /* Copy only last input sample into the state buffer
191        which will be used for next samples processing */
192     *px++ = fcurr3;
193 
194     /* Update of f values for next coefficient set processing */
195     fcurr0 = fnext0;
196     fcurr1 = fnext1;
197     fcurr2 = fnext2;
198     fcurr3 = fnext3;
199 
200     /* Loop unrolling.  Process 4 taps at a time . */
201     stageCnt = (numStages - 1U) >> 2U;
202 
203     /* Loop over the number of taps.  Unroll by a factor of 4.
204        Repeat until we've computed numStages-3 coefficients. */
205 
206     /* Process 2nd, 3rd, 4th and 5th taps ... here */
207     while (stageCnt > 0U)
208     {
209       /* Read g1(n-1), g3(n-1) .... from state */
210       gcurr0 = *px;
211 
212       /* save g1(n) in state buffer */
213       *px++ = gnext3;
214 
215       /* Process first sample for 2nd, 6th .. tap */
216       /* Sample processing for K2, K6.... */
217       /* f2(n) = f1(n) +  K2 * g1(n-1) */
218       fnext0 = (gcurr0 * (*pk)) + fcurr0;
219 
220       /* Process second sample for 2nd, 6th .. tap */
221       /* for sample 2 processing */
222       fnext1 = (gnext0 * (*pk)) + fcurr1;
223 
224       /* Process third sample for 2nd, 6th .. tap */
225       fnext2 = (gnext1 * (*pk)) + fcurr2;
226 
227       /* Process fourth sample for 2nd, 6th .. tap */
228       fnext3 = (gnext2 * (*pk)) + fcurr3;
229 
230       /* g2(n) = f1(n) * K2  +  g1(n-1) */
231       /* Calculation of state values for next stage */
232       gnext3 = (fcurr3 * (*pk)) + gnext2;
233 
234       gnext2 = (fcurr2 * (*pk)) + gnext1;
235 
236       gnext1 = (fcurr1 * (*pk)) + gnext0;
237 
238       gnext0 = (fcurr0 * (*pk++)) + gcurr0;
239 
240 
241       /* Read g2(n-1), g4(n-1) .... from state */
242       gcurr0 = *px;
243 
244       /* save g2(n) in state buffer */
245       *px++ = gnext3;
246 
247       /* Sample processing for K3, K7.... */
248       /* Process first sample for 3rd, 7th .. tap */
249       /* f3(n) = f2(n) +  K3 * g2(n-1) */
250       fcurr0 = (gcurr0 * (*pk)) + fnext0;
251 
252       /* Process second sample for 3rd, 7th .. tap */
253       fcurr1 = (gnext0 * (*pk)) + fnext1;
254 
255       /* Process third sample for 3rd, 7th .. tap */
256       fcurr2 = (gnext1 * (*pk)) + fnext2;
257 
258       /* Process fourth sample for 3rd, 7th .. tap */
259       fcurr3 = (gnext2 * (*pk)) + fnext3;
260 
261       /* Calculation of state values for next stage */
262       /* g3(n) = f2(n) * K3  +  g2(n-1) */
263       gnext3 = (fnext3 * (*pk)) + gnext2;
264 
265       gnext2 = (fnext2 * (*pk)) + gnext1;
266 
267       gnext1 = (fnext1 * (*pk)) + gnext0;
268 
269       gnext0 = (fnext0 * (*pk++)) + gcurr0;
270 
271 
272       /* Read g1(n-1), g3(n-1) .... from state */
273       gcurr0 = *px;
274 
275       /* save g3(n) in state buffer */
276       *px++ = gnext3;
277 
278       /* Sample processing for K4, K8.... */
279       /* Process first sample for 4th, 8th .. tap */
280       /* f4(n) = f3(n) +  K4 * g3(n-1) */
281       fnext0 = (gcurr0 * (*pk)) + fcurr0;
282 
283       /* Process second sample for 4th, 8th .. tap */
284       /* for sample 2 processing */
285       fnext1 = (gnext0 * (*pk)) + fcurr1;
286 
287       /* Process third sample for 4th, 8th .. tap */
288       fnext2 = (gnext1 * (*pk)) + fcurr2;
289 
290       /* Process fourth sample for 4th, 8th .. tap */
291       fnext3 = (gnext2 * (*pk)) + fcurr3;
292 
293       /* g4(n) = f3(n) * K4  +  g3(n-1) */
294       /* Calculation of state values for next stage */
295       gnext3 = (fcurr3 * (*pk)) + gnext2;
296 
297       gnext2 = (fcurr2 * (*pk)) + gnext1;
298 
299       gnext1 = (fcurr1 * (*pk)) + gnext0;
300 
301       gnext0 = (fcurr0 * (*pk++)) + gcurr0;
302 
303 
304       /* Read g2(n-1), g4(n-1) .... from state */
305       gcurr0 = *px;
306 
307       /* save g4(n) in state buffer */
308       *px++ = gnext3;
309 
310       /* Sample processing for K5, K9.... */
311       /* Process first sample for 5th, 9th .. tap */
312       /* f5(n) = f4(n) +  K5 * g4(n-1) */
313       fcurr0 = (gcurr0 * (*pk)) + fnext0;
314 
315       /* Process second sample for 5th, 9th .. tap */
316       fcurr1 = (gnext0 * (*pk)) + fnext1;
317 
318       /* Process third sample for 5th, 9th .. tap */
319       fcurr2 = (gnext1 * (*pk)) + fnext2;
320 
321       /* Process fourth sample for 5th, 9th .. tap */
322       fcurr3 = (gnext2 * (*pk)) + fnext3;
323 
324       /* Calculation of state values for next stage */
325       /* g5(n) = f4(n) * K5  +  g4(n-1) */
326       gnext3 = (fnext3 * (*pk)) + gnext2;
327 
328       gnext2 = (fnext2 * (*pk)) + gnext1;
329 
330       gnext1 = (fnext1 * (*pk)) + gnext0;
331 
332       gnext0 = (fnext0 * (*pk++)) + gcurr0;
333 
334       stageCnt--;
335     }
336 
337     /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
338     stageCnt = (numStages - 1U) % 0x4U;
339 
340     while (stageCnt > 0U)
341     {
342       gcurr0 = *px;
343 
344       /* save g value in state buffer */
345       *px++ = gnext3;
346 
347       /* Process four samples for last three taps here */
348       fnext0 = (gcurr0 * (*pk)) + fcurr0;
349 
350       fnext1 = (gnext0 * (*pk)) + fcurr1;
351 
352       fnext2 = (gnext1 * (*pk)) + fcurr2;
353 
354       fnext3 = (gnext2 * (*pk)) + fcurr3;
355 
356       /* g1(n) = f0(n) * K1  +  g0(n-1) */
357       gnext3 = (fcurr3 * (*pk)) + gnext2;
358 
359       gnext2 = (fcurr2 * (*pk)) + gnext1;
360 
361       gnext1 = (fcurr1 * (*pk)) + gnext0;
362 
363       gnext0 = (fcurr0 * (*pk++)) + gcurr0;
364 
365       /* Update of f values for next coefficient set processing */
366       fcurr0 = fnext0;
367       fcurr1 = fnext1;
368       fcurr2 = fnext2;
369       fcurr3 = fnext3;
370 
371       stageCnt--;
372     }
373 
374     /* The results in the 4 accumulators, store in the destination buffer. */
375     /* y(n) = fN(n) */
376     *pDst++ = fcurr0;
377     *pDst++ = fcurr1;
378     *pDst++ = fcurr2;
379     *pDst++ = fcurr3;
380 
381     blkCnt--;
382   }
383 
384   /* Loop unrolling: Compute remaining outputs */
385   blkCnt = blockSize % 0x4U;
386 
387 #else
388 
389   /* Initialize blkCnt with number of samples */
390   blkCnt = blockSize;
391 
392 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
393 
394   while (blkCnt > 0U)
395   {
396     /* f0(n) = x(n) */
397     fcurr0 = *pSrc++;
398 
399     /* Initialize state pointer */
400     px = pState;
401 
402     /* Initialize coeff pointer */
403     pk = pCoeffs;
404 
405     /* read g2(n) from state buffer */
406     gcurr0 = *px;
407 
408     /* for sample 1 processing */
409     /* f1(n) = f0(n) +  K1 * g0(n-1) */
410     fnext0 = (gcurr0 * (*pk)) + fcurr0;
411 
412     /* g1(n) = f0(n) * K1  +  g0(n-1) */
413     gnext0 = (fcurr0 * (*pk++)) + gcurr0;
414 
415     /* save g1(n) in state buffer */
416     *px++ = fcurr0;
417 
418     /* f1(n) is saved in fcurr0 for next stage processing */
419     fcurr0 = fnext0;
420 
421     stageCnt = (numStages - 1U);
422 
423     /* stage loop */
424     while (stageCnt > 0U)
425     {
426       /* read g2(n) from state buffer */
427       gcurr0 = *px;
428 
429       /* save g1(n) in state buffer */
430       *px++ = gnext0;
431 
432       /* Sample processing for K2, K3.... */
433       /* f2(n) = f1(n) +  K2 * g1(n-1) */
434       fnext0 = (gcurr0 * (*pk)) + fcurr0;
435 
436       /* g2(n) = f1(n) * K2  +  g1(n-1) */
437       gnext0 = (fcurr0 * (*pk++)) + gcurr0;
438 
439       /* f1(n) is saved in fcurr0 for next stage processing */
440       fcurr0 = fnext0;
441 
442       stageCnt--;
443     }
444 
445     /* y(n) = fN(n) */
446     *pDst++ = fcurr0;
447 
448     blkCnt--;
449   }
450 
451 }
452 
453 /**
454   @} end of FIR_Lattice group
455  */
456