1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_lms_q31.c
4 * Description: Processing function for the Q31 LMS filter
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup LMS
37 @{
38 */
39
40 /**
41 @brief Processing function for Q31 LMS filter.
42 @param[in] S points to an instance of the Q31 LMS filter structure.
43 @param[in] pSrc points to the block of input data.
44 @param[in] pRef points to the block of reference data.
45 @param[out] pOut points to the block of output data.
46 @param[out] pErr points to the block of error data.
47 @param[in] blockSize number of samples to process.
48
49 @par Scaling and Overflow Behavior
50 The function is implemented using an internal 64-bit accumulator.
51 The accumulator has a 2.62 format and maintains full precision of the intermediate
52 multiplication results but provides only a single guard bit.
53 Thus, if the accumulator result overflows it wraps around rather than clips.
54 In order to avoid overflows completely the input signal must be scaled down by
55 log2(numTaps) bits.
56 The reference signal should not be scaled down.
57 After all multiply-accumulates are performed, the 2.62 accumulator is shifted
58 and saturated to 1.31 format to yield the final result.
59 The output signal and error signal are in 1.31 format.
60 @par
61 In this filter, filter coefficients are updated for each sample and
62 the updation of filter cofficients are saturted.
63 */
64
arm_lms_q31(const arm_lms_instance_q31 * S,const q31_t * pSrc,q31_t * pRef,q31_t * pOut,q31_t * pErr,uint32_t blockSize)65 ARM_DSP_ATTRIBUTE void arm_lms_q31(
66 const arm_lms_instance_q31 * S,
67 const q31_t * pSrc,
68 q31_t * pRef,
69 q31_t * pOut,
70 q31_t * pErr,
71 uint32_t blockSize)
72 {
73 q31_t *pState = S->pState; /* State pointer */
74 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
75 q31_t *pStateCurnt; /* Points to the current sample of the state */
76 q31_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
77 q31_t mu = S->mu; /* Adaptive factor */
78 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
79 uint32_t tapCnt, blkCnt; /* Loop counters */
80 q63_t acc; /* Accumulator */
81 q31_t e = 0; /* Error of data sample */
82 q31_t alpha; /* Intermediate constant for taps update */
83 q31_t coef; /* Temporary variable for coef */
84 q31_t acc_l, acc_h; /* Temporary input */
85 uint32_t uShift = ((uint32_t) S->postShift + 1U);
86 uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
87
88 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
89 /* pStateCurnt points to the location where the new input data should be written */
90 pStateCurnt = &(S->pState[(numTaps - 1U)]);
91
92 /* initialise loop count */
93 blkCnt = blockSize;
94
95 while (blkCnt > 0U)
96 {
97 /* Copy the new input sample into the state buffer */
98 *pStateCurnt++ = *pSrc++;
99
100 /* Initialize pState pointer */
101 px = pState;
102
103 /* Initialize coefficient pointer */
104 pb = pCoeffs;
105
106 /* Set the accumulator to zero */
107 acc = 0;
108
109 #if defined (ARM_MATH_LOOPUNROLL)
110
111 /* Loop unrolling: Compute 4 taps at a time. */
112 tapCnt = numTaps >> 2U;
113
114 while (tapCnt > 0U)
115 {
116 /* Perform the multiply-accumulate */
117 /* acc += b[N] * x[n-N] */
118 acc += ((q63_t) (*px++)) * (*pb++);
119
120 /* acc += b[N-1] * x[n-N-1] */
121 acc += ((q63_t) (*px++)) * (*pb++);
122
123 /* acc += b[N-2] * x[n-N-2] */
124 acc += ((q63_t) (*px++)) * (*pb++);
125
126 /* acc += b[N-3] * x[n-N-3] */
127 acc += ((q63_t) (*px++)) * (*pb++);
128
129 /* Decrement loop counter */
130 tapCnt--;
131 }
132
133 /* Loop unrolling: Compute remaining taps */
134 tapCnt = numTaps % 0x4U;
135
136 #else
137
138 /* Initialize tapCnt with number of samples */
139 tapCnt = numTaps;
140
141 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
142
143 while (tapCnt > 0U)
144 {
145 /* Perform the multiply-accumulate */
146 acc += ((q63_t) (*px++)) * (*pb++);
147
148 /* Decrement the loop counter */
149 tapCnt--;
150 }
151
152 /* Converting the result to 1.31 format */
153 /* Calc lower part of acc */
154 acc_l = acc & 0xffffffff;
155
156 /* Calc upper part of acc */
157 acc_h = (acc >> 32) & 0xffffffff;
158
159 acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
160
161 /* Store the result from accumulator into the destination buffer. */
162 *pOut++ = (q31_t) acc;
163
164 /* Compute and store error */
165 e = *pRef++ - (q31_t) acc;
166 *pErr++ = e;
167
168 /* Compute alpha i.e. intermediate constant for taps update */
169 alpha = (q31_t) (((q63_t) e * mu) >> 31);
170
171 /* Initialize pState pointer */
172 /* Advance state pointer by 1 for the next sample */
173 px = pState++;
174
175 /* Initialize coefficient pointer */
176 pb = pCoeffs;
177
178 #if defined (ARM_MATH_LOOPUNROLL)
179
180 /* Loop unrolling: Compute 4 taps at a time. */
181 tapCnt = numTaps >> 2U;
182
183 /* Update filter coefficients */
184 while (tapCnt > 0U)
185 {
186 /* Perform the multiply-accumulate */
187
188 /* coef is in 2.30 format */
189 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
190 /* get coef in 1.31 format by left shifting */
191 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
192 /* update coefficient buffer to next coefficient */
193 pb++;
194
195 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
196 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
197 pb++;
198
199 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
200 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
201 pb++;
202
203 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
204 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
205 pb++;
206
207 /* Decrement loop counter */
208 tapCnt--;
209 }
210
211 /* Loop unrolling: Compute remaining taps */
212 tapCnt = numTaps % 0x4U;
213
214 #else
215
216 /* Initialize tapCnt with number of samples */
217 tapCnt = numTaps;
218
219 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
220
221 while (tapCnt > 0U)
222 {
223 /* Perform the multiply-accumulate */
224 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
225 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
226 pb++;
227
228 /* Decrement loop counter */
229 tapCnt--;
230 }
231
232 /* Decrement loop counter */
233 blkCnt--;
234 }
235
236 /* Processing is complete.
237 Now copy the last numTaps - 1 samples to the start of the state buffer.
238 This prepares the state buffer for the next function call. */
239
240 /* Points to the start of the pState buffer */
241 pStateCurnt = S->pState;
242
243 /* copy data */
244 #if defined (ARM_MATH_LOOPUNROLL)
245
246 /* Loop unrolling: Compute 4 taps at a time. */
247 tapCnt = (numTaps - 1U) >> 2U;
248
249 while (tapCnt > 0U)
250 {
251 *pStateCurnt++ = *pState++;
252 *pStateCurnt++ = *pState++;
253 *pStateCurnt++ = *pState++;
254 *pStateCurnt++ = *pState++;
255
256 /* Decrement loop counter */
257 tapCnt--;
258 }
259
260 /* Loop unrolling: Compute remaining taps */
261 tapCnt = (numTaps - 1U) % 0x4U;
262
263 #else
264
265 /* Initialize tapCnt with number of samples */
266 tapCnt = (numTaps - 1U);
267
268 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
269
270 while (tapCnt > 0U)
271 {
272 *pStateCurnt++ = *pState++;
273
274 /* Decrement loop counter */
275 tapCnt--;
276 }
277
278 }
279
280 /**
281 @} end of LMS group
282 */
283