1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_lms_q15.c
4 * Description: Processing function for Q15 LMS filter
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup LMS
37 @{
38 */
39
40 /**
41 @brief Processing function for Q15 LMS filter.
42 @param[in] S points to an instance of the Q15 LMS filter structure
43 @param[in] pSrc points to the block of input data
44 @param[in] pRef points to the block of reference data
45 @param[out] pOut points to the block of output data
46 @param[out] pErr points to the block of error data
47 @param[in] blockSize number of samples to process
48
49 @par Scaling and Overflow Behavior
50 The function is implemented using an internal 64-bit accumulator.
51 Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
52 The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
53 There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
54 After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
55 Lastly, the accumulator is saturated to yield a result in 1.15 format.
56 @par
57 In this filter, filter coefficients are updated for each sample and
58 the updation of filter cofficients are saturted.
59 */
60
arm_lms_q15(const arm_lms_instance_q15 * S,const q15_t * pSrc,q15_t * pRef,q15_t * pOut,q15_t * pErr,uint32_t blockSize)61 void arm_lms_q15(
62 const arm_lms_instance_q15 * S,
63 const q15_t * pSrc,
64 q15_t * pRef,
65 q15_t * pOut,
66 q15_t * pErr,
67 uint32_t blockSize)
68 {
69 q15_t *pState = S->pState; /* State pointer */
70 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
71 q15_t *pStateCurnt; /* Points to the current sample of the state */
72 q15_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
73 q15_t mu = S->mu; /* Adaptive factor */
74 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
75 uint32_t tapCnt, blkCnt; /* Loop counters */
76 q63_t acc; /* Accumulator */
77 q15_t e = 0; /* Error of data sample */
78 q15_t alpha; /* Intermediate constant for taps update */
79 q31_t coef; /* Temporary variable for coefficient */
80 q31_t acc_l, acc_h; /* Temporary input */
81 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
82 int32_t uShift = (32 - lShift);
83
84 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
85 /* pStateCurnt points to the location where the new input data should be written */
86 pStateCurnt = &(S->pState[(numTaps - 1U)]);
87
88 /* initialise loop count */
89 blkCnt = blockSize;
90
91 while (blkCnt > 0U)
92 {
93 /* Copy the new input sample into the state buffer */
94 *pStateCurnt++ = *pSrc++;
95
96 /* Initialize pState pointer */
97 px = pState;
98
99 /* Initialize coefficient pointer */
100 pb = pCoeffs;
101
102 /* Set the accumulator to zero */
103 acc = 0;
104
105 #if defined (ARM_MATH_LOOPUNROLL)
106
107 /* Loop unrolling: Compute 4 taps at a time. */
108 tapCnt = numTaps >> 2U;
109
110 while (tapCnt > 0U)
111 {
112 /* Perform the multiply-accumulate */
113 /* acc += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
114 acc = __SMLALD(read_q15x2_ia (&px), read_q15x2_ia (&pb), acc);
115 acc = __SMLALD(read_q15x2_ia (&px), read_q15x2_ia (&pb), acc);
116
117 /* Decrement loop counter */
118 tapCnt--;
119 }
120
121 /* Loop unrolling: Compute remaining taps */
122 tapCnt = numTaps % 0x4U;
123
124 #else
125
126 /* Initialize tapCnt with number of samples */
127 tapCnt = numTaps;
128
129 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
130
131 while (tapCnt > 0U)
132 {
133 /* Perform the multiply-accumulate */
134 acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
135
136 /* Decrement the loop counter */
137 tapCnt--;
138 }
139
140 /* Calc lower part of acc */
141 acc_l = acc & 0xffffffff;
142
143 /* Calc upper part of acc */
144 acc_h = (acc >> 32) & 0xffffffff;
145
146 /* Apply shift for lower part of acc and upper part of acc */
147 acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
148
149 /* Converting the result to 1.15 format and saturate the output */
150 acc = __SSAT(acc, 16U);
151
152 /* Store the result from accumulator into the destination buffer. */
153 *pOut++ = (q15_t) acc;
154
155 /* Compute and store error */
156 e = *pRef++ - (q15_t) acc;
157 *pErr++ = (q15_t) e;
158
159 /* Compute alpha i.e. intermediate constant for taps update */
160 alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
161
162 /* Initialize pState pointer */
163 /* Advance state pointer by 1 for the next sample */
164 px = pState++;
165
166 /* Initialize coefficient pointer */
167 pb = pCoeffs;
168
169 #if defined (ARM_MATH_LOOPUNROLL)
170
171 /* Loop unrolling: Compute 4 taps at a time. */
172 tapCnt = numTaps >> 2U;
173
174 /* Update filter coefficients */
175 while (tapCnt > 0U)
176 {
177 coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15);
178 *pb++ = (q15_t) __SSAT((coef), 16);
179
180 coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15);
181 *pb++ = (q15_t) __SSAT((coef), 16);
182
183 coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15);
184 *pb++ = (q15_t) __SSAT((coef), 16);
185
186 coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15);
187 *pb++ = (q15_t) __SSAT((coef), 16);
188
189 /* Decrement loop counter */
190 tapCnt--;
191 }
192
193 /* Loop unrolling: Compute remaining taps */
194 tapCnt = numTaps % 0x4U;
195
196 #else
197
198 /* Initialize tapCnt with number of samples */
199 tapCnt = numTaps;
200
201 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
202
203 while (tapCnt > 0U)
204 {
205 /* Perform the multiply-accumulate */
206 coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15);
207 *pb++ = (q15_t) __SSAT((coef), 16);
208
209 /* Decrement loop counter */
210 tapCnt--;
211 }
212
213 /* Decrement loop counter */
214 blkCnt--;
215 }
216
217 /* Processing is complete.
218 Now copy the last numTaps - 1 samples to the start of the state buffer.
219 This prepares the state buffer for the next function call. */
220
221 /* Points to the start of the pState buffer */
222 pStateCurnt = S->pState;
223
224 /* copy data */
225 #if defined (ARM_MATH_LOOPUNROLL)
226
227 /* Loop unrolling: Compute 4 taps at a time. */
228 tapCnt = (numTaps - 1U) >> 2U;
229
230 while (tapCnt > 0U)
231 {
232 write_q15x2_ia (&pStateCurnt, read_q15x2_ia (&pState));
233 write_q15x2_ia (&pStateCurnt, read_q15x2_ia (&pState));
234
235 /* Decrement loop counter */
236 tapCnt--;
237 }
238
239 /* Loop unrolling: Compute remaining taps */
240 tapCnt = (numTaps - 1U) % 0x4U;
241
242 #else
243
244 /* Initialize tapCnt with number of samples */
245 tapCnt = (numTaps - 1U);
246
247 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
248
249 while (tapCnt > 0U)
250 {
251 *pStateCurnt++ = *pState++;
252
253 /* Decrement loop counter */
254 tapCnt--;
255 }
256
257 }
258
259 /**
260 @} end of LMS group
261 */
262