1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_lms_q31.c
4  * Description:  Processing function for the Q31 LMS filter
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @addtogroup LMS
37   @{
38  */
39 
40 /**
41   @brief         Processing function for Q31 LMS filter.
42   @param[in]     S         points to an instance of the Q31 LMS filter structure.
43   @param[in]     pSrc      points to the block of input data.
44   @param[in]     pRef      points to the block of reference data.
45   @param[out]    pOut      points to the block of output data.
46   @param[out]    pErr      points to the block of error data.
47   @param[in]     blockSize number of samples to process.
48 
49   @par           Scaling and Overflow Behavior
50                    The function is implemented using an internal 64-bit accumulator.
51                    The accumulator has a 2.62 format and maintains full precision of the intermediate
52                    multiplication results but provides only a single guard bit.
53                    Thus, if the accumulator result overflows it wraps around rather than clips.
54                    In order to avoid overflows completely the input signal must be scaled down by
55                    log2(numTaps) bits.
56                    The reference signal should not be scaled down.
57                    After all multiply-accumulates are performed, the 2.62 accumulator is shifted
58                    and saturated to 1.31 format to yield the final result.
59                    The output signal and error signal are in 1.31 format.
60  @par
61                    In this filter, filter coefficients are updated for each sample and
62                    the updation of filter cofficients are saturted.
63  */
64 
arm_lms_q31(const arm_lms_instance_q31 * S,const q31_t * pSrc,q31_t * pRef,q31_t * pOut,q31_t * pErr,uint32_t blockSize)65 ARM_DSP_ATTRIBUTE void arm_lms_q31(
66   const arm_lms_instance_q31 * S,
67   const q31_t * pSrc,
68         q31_t * pRef,
69         q31_t * pOut,
70         q31_t * pErr,
71         uint32_t blockSize)
72 {
73         q31_t *pState = S->pState;                     /* State pointer */
74         q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
75         q31_t *pStateCurnt;                            /* Points to the current sample of the state */
76         q31_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */
77         q31_t mu = S->mu;                              /* Adaptive factor */
78         uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
79         uint32_t tapCnt, blkCnt;                       /* Loop counters */
80         q63_t acc;                                     /* Accumulator */
81         q31_t e = 0;                                   /* Error of data sample */
82         q31_t alpha;                                   /* Intermediate constant for taps update */
83         q31_t coef;                                    /* Temporary variable for coef */
84         q31_t acc_l, acc_h;                            /* Temporary input */
85         uint32_t uShift = ((uint32_t) S->postShift + 1U);
86         uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */
87 
88   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
89   /* pStateCurnt points to the location where the new input data should be written */
90   pStateCurnt = &(S->pState[(numTaps - 1U)]);
91 
92   /* initialise loop count */
93   blkCnt = blockSize;
94 
95   while (blkCnt > 0U)
96   {
97     /* Copy the new input sample into the state buffer */
98     *pStateCurnt++ = *pSrc++;
99 
100     /* Initialize pState pointer */
101     px = pState;
102 
103     /* Initialize coefficient pointer */
104     pb = pCoeffs;
105 
106     /* Set the accumulator to zero */
107     acc = 0;
108 
109 #if defined (ARM_MATH_LOOPUNROLL)
110 
111     /* Loop unrolling: Compute 4 taps at a time. */
112     tapCnt = numTaps >> 2U;
113 
114     while (tapCnt > 0U)
115     {
116       /* Perform the multiply-accumulate */
117       /* acc +=  b[N] * x[n-N] */
118       acc += ((q63_t) (*px++)) * (*pb++);
119 
120       /* acc +=  b[N-1] * x[n-N-1] */
121       acc += ((q63_t) (*px++)) * (*pb++);
122 
123       /* acc +=  b[N-2] * x[n-N-2] */
124       acc += ((q63_t) (*px++)) * (*pb++);
125 
126       /* acc +=  b[N-3] * x[n-N-3] */
127       acc += ((q63_t) (*px++)) * (*pb++);
128 
129       /* Decrement loop counter */
130       tapCnt--;
131     }
132 
133     /* Loop unrolling: Compute remaining taps */
134     tapCnt = numTaps % 0x4U;
135 
136 #else
137 
138     /* Initialize tapCnt with number of samples */
139     tapCnt = numTaps;
140 
141 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
142 
143     while (tapCnt > 0U)
144     {
145       /* Perform the multiply-accumulate */
146       acc += ((q63_t) (*px++)) * (*pb++);
147 
148       /* Decrement the loop counter */
149       tapCnt--;
150     }
151 
152     /* Converting the result to 1.31 format */
153     /* Calc lower part of acc */
154     acc_l = acc & 0xffffffff;
155 
156     /* Calc upper part of acc */
157     acc_h = (acc >> 32) & 0xffffffff;
158 
159     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
160 
161     /* Store the result from accumulator into the destination buffer. */
162     *pOut++ = (q31_t) acc;
163 
164     /* Compute and store error */
165     e = *pRef++ - (q31_t) acc;
166     *pErr++ = e;
167 
168     /* Compute alpha i.e. intermediate constant for taps update */
169     alpha = (q31_t) (((q63_t) e * mu) >> 31);
170 
171     /* Initialize pState pointer */
172     /* Advance state pointer by 1 for the next sample */
173     px = pState++;
174 
175     /* Initialize coefficient pointer */
176     pb = pCoeffs;
177 
178 #if defined (ARM_MATH_LOOPUNROLL)
179 
180     /* Loop unrolling: Compute 4 taps at a time. */
181     tapCnt = numTaps >> 2U;
182 
183     /* Update filter coefficients */
184     while (tapCnt > 0U)
185     {
186       /* Perform the multiply-accumulate */
187 
188       /* coef is in 2.30 format */
189       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
190       /* get coef in 1.31 format by left shifting */
191       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
192       /* update coefficient buffer to next coefficient */
193       pb++;
194 
195       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
196       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
197       pb++;
198 
199       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
200       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
201       pb++;
202 
203       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
204       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
205       pb++;
206 
207       /* Decrement loop counter */
208       tapCnt--;
209     }
210 
211     /* Loop unrolling: Compute remaining taps */
212     tapCnt = numTaps % 0x4U;
213 
214 #else
215 
216     /* Initialize tapCnt with number of samples */
217     tapCnt = numTaps;
218 
219 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
220 
221     while (tapCnt > 0U)
222     {
223       /* Perform the multiply-accumulate */
224       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
225       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
226       pb++;
227 
228       /* Decrement loop counter */
229       tapCnt--;
230     }
231 
232     /* Decrement loop counter */
233     blkCnt--;
234   }
235 
236   /* Processing is complete.
237      Now copy the last numTaps - 1 samples to the start of the state buffer.
238      This prepares the state buffer for the next function call. */
239 
240   /* Points to the start of the pState buffer */
241   pStateCurnt = S->pState;
242 
243   /* copy data */
244 #if defined (ARM_MATH_LOOPUNROLL)
245 
246   /* Loop unrolling: Compute 4 taps at a time. */
247   tapCnt = (numTaps - 1U) >> 2U;
248 
249   while (tapCnt > 0U)
250   {
251     *pStateCurnt++ = *pState++;
252     *pStateCurnt++ = *pState++;
253     *pStateCurnt++ = *pState++;
254     *pStateCurnt++ = *pState++;
255 
256     /* Decrement loop counter */
257     tapCnt--;
258   }
259 
260   /* Loop unrolling: Compute remaining taps */
261   tapCnt = (numTaps - 1U) % 0x4U;
262 
263 #else
264 
265   /* Initialize tapCnt with number of samples */
266   tapCnt = (numTaps - 1U);
267 
268 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
269 
270   while (tapCnt > 0U)
271   {
272     *pStateCurnt++ = *pState++;
273 
274     /* Decrement loop counter */
275     tapCnt--;
276   }
277 
278 }
279 
280 /**
281   @} end of LMS group
282  */
283