1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_fir_lattice_q15.c
4 * Description: Q15 FIR lattice filter processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup FIR_Lattice
37 @{
38 */
39
40 /**
41 @brief Processing function for Q15 FIR lattice filter.
42 @param[in] S points to an instance of the Q15 FIR lattice structure
43 @param[in] pSrc points to the block of input data
44 @param[out] pDst points to the block of output data
45 @param[in] blockSize number of samples to process
46 */
47
arm_fir_lattice_q15(const arm_fir_lattice_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)48 void arm_fir_lattice_q15(
49 const arm_fir_lattice_instance_q15 * S,
50 const q15_t * pSrc,
51 q15_t * pDst,
52 uint32_t blockSize)
53 {
54 q15_t *pState = S->pState; /* State pointer */
55 const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
56 q15_t *px; /* Temporary state pointer */
57 const q15_t *pk; /* Temporary coefficient pointer */
58 uint32_t numStages = S->numStages; /* Number of stages in the filter */
59 uint32_t blkCnt, stageCnt; /* Loop counters */
60 q31_t fcurr0, fnext0, gnext0, gcurr0; /* Temporary variables */
61
62 #if (1)
63 //#if !defined(ARM_MATH_CM0_FAMILY)
64
65 #if defined (ARM_MATH_LOOPUNROLL)
66 q31_t fcurr1, fnext1, gnext1; /* Temporary variables for second sample in loop unrolling */
67 q31_t fcurr2, fnext2, gnext2; /* Temporary variables for third sample in loop unrolling */
68 q31_t fcurr3, fnext3, gnext3; /* Temporary variables for fourth sample in loop unrolling */
69 #endif
70
71 gcurr0 = 0;
72
73 #if defined (ARM_MATH_LOOPUNROLL)
74
75 /* Loop unrolling: Compute 4 outputs at a time */
76 blkCnt = blockSize >> 2U;
77
78 while (blkCnt > 0U)
79 {
80 /* Read two samples from input buffer */
81 /* f0(n) = x(n) */
82 fcurr0 = *pSrc++;
83 fcurr1 = *pSrc++;
84
85 /* Initialize state pointer */
86 px = pState;
87
88 /* Initialize coeff pointer */
89 pk = pCoeffs;
90
91 /* Read g0(n-1) from state buffer */
92 gcurr0 = *px;
93
94 /* Process first sample for first tap */
95 /* f1(n) = f0(n) + K1 * g0(n-1) */
96 fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
97 fnext0 = __SSAT(fnext0, 16);
98
99 /* g1(n) = f0(n) * K1 + g0(n-1) */
100 gnext0 = (q31_t) ((fcurr0 * (*pk)) >> 15U) + gcurr0;
101 gnext0 = __SSAT(gnext0, 16);
102
103 /* Process second sample for first tap */
104 fnext1 = (q31_t) ((fcurr0 * (*pk)) >> 15U) + fcurr1;
105 fnext1 = __SSAT(fnext1, 16);
106 gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + fcurr0;
107 gnext1 = __SSAT(gnext1, 16);
108
109 /* Read next two samples from input buffer */
110 /* f0(n+2) = x(n+2) */
111 fcurr2 = *pSrc++;
112 fcurr3 = *pSrc++;
113
114 /* Process third sample for first tap */
115 fnext2 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + fcurr2;
116 fnext2 = __SSAT(fnext2, 16);
117 gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + fcurr1;
118 gnext2 = __SSAT(gnext2, 16);
119
120 /* Process fourth sample for first tap */
121 fnext3 = (q31_t) ((fcurr2 * (*pk )) >> 15U) + fcurr3;
122 fnext3 = __SSAT(fnext3, 16);
123 gnext3 = (q31_t) ((fcurr3 * (*pk++)) >> 15U) + fcurr2;
124 gnext3 = __SSAT(gnext3, 16);
125
126 /* Copy only last input sample into the state buffer
127 which will be used for next samples processing */
128 *px++ = (q15_t) fcurr3;
129
130 /* Update of f values for next coefficient set processing */
131 fcurr0 = fnext0;
132 fcurr1 = fnext1;
133 fcurr2 = fnext2;
134 fcurr3 = fnext3;
135
136 /* Loop unrolling. Process 4 taps at a time . */
137 stageCnt = (numStages - 1U) >> 2U;
138
139 /* Loop over the number of taps. Unroll by a factor of 4.
140 Repeat until we've computed numStages-3 coefficients. */
141
142 /* Process 2nd, 3rd, 4th and 5th taps ... here */
143 while (stageCnt > 0U)
144 {
145 /* Read g1(n-1), g3(n-1) .... from state */
146 gcurr0 = *px;
147
148 /* save g1(n) in state buffer */
149 *px++ = (q15_t) gnext3;
150
151 /* Process first sample for 2nd, 6th .. tap */
152 /* Sample processing for K2, K6.... */
153 /* f1(n) = f0(n) + K1 * g0(n-1) */
154 fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
155 fnext0 = __SSAT(fnext0, 16);
156
157 /* Process second sample for 2nd, 6th .. tap */
158 /* for sample 2 processing */
159 fnext1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fcurr1;
160 fnext1 = __SSAT(fnext1, 16);
161
162 /* Process third sample for 2nd, 6th .. tap */
163 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fcurr2;
164 fnext2 = __SSAT(fnext2, 16);
165
166 /* Process fourth sample for 2nd, 6th .. tap */
167 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fcurr3;
168 fnext3 = __SSAT(fnext3, 16);
169
170 /* g1(n) = f0(n) * K1 + g0(n-1) */
171 /* Calculation of state values for next stage */
172 gnext3 = (q31_t) ((fcurr3 * (*pk)) >> 15U) + gnext2;
173 gnext3 = __SSAT(gnext3, 16);
174
175 gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + gnext1;
176 gnext2 = __SSAT(gnext2, 16);
177
178 gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + gnext0;
179 gnext1 = __SSAT(gnext1, 16);
180
181 gnext0 = (q31_t) ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
182 gnext0 = __SSAT(gnext0, 16);
183
184
185 /* Read g2(n-1), g4(n-1) .... from state */
186 gcurr0 = *px;
187
188 /* save g1(n) in state buffer */
189 *px++ = (q15_t) gnext3;
190
191 /* Sample processing for K3, K7.... */
192 /* Process first sample for 3rd, 7th .. tap */
193 /* f3(n) = f2(n) + K3 * g2(n-1) */
194 fcurr0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fnext0;
195 fcurr0 = __SSAT(fcurr0, 16);
196
197 /* Process second sample for 3rd, 7th .. tap */
198 fcurr1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fnext1;
199 fcurr1 = __SSAT(fcurr1, 16);
200
201 /* Process third sample for 3rd, 7th .. tap */
202 fcurr2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fnext2;
203 fcurr2 = __SSAT(fcurr2, 16);
204
205 /* Process fourth sample for 3rd, 7th .. tap */
206 fcurr3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fnext3;
207 fcurr3 = __SSAT(fcurr3, 16);
208
209 /* Calculation of state values for next stage */
210 /* g3(n) = f2(n) * K3 + g2(n-1) */
211 gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15U) + gnext2;
212 gnext3 = __SSAT(gnext3, 16);
213
214 gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15U) + gnext1;
215 gnext2 = __SSAT(gnext2, 16);
216
217 gnext1 = (q31_t) ((fnext1 * (*pk)) >> 15U) + gnext0;
218 gnext1 = __SSAT(gnext1, 16);
219
220 gnext0 = (q31_t) ((fnext0 * (*pk++)) >> 15U) + gcurr0;
221 gnext0 = __SSAT(gnext0, 16);
222
223 /* Read g1(n-1), g3(n-1) .... from state */
224 gcurr0 = *px;
225
226 /* save g1(n) in state buffer */
227 *px++ = (q15_t) gnext3;
228
229 /* Sample processing for K4, K8.... */
230 /* Process first sample for 4th, 8th .. tap */
231 /* f4(n) = f3(n) + K4 * g3(n-1) */
232 fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
233 fnext0 = __SSAT(fnext0, 16);
234
235 /* Process second sample for 4th, 8th .. tap */
236 /* for sample 2 processing */
237 fnext1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fcurr1;
238 fnext1 = __SSAT(fnext1, 16);
239
240 /* Process third sample for 4th, 8th .. tap */
241 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fcurr2;
242 fnext2 = __SSAT(fnext2, 16);
243
244 /* Process fourth sample for 4th, 8th .. tap */
245 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fcurr3;
246 fnext3 = __SSAT(fnext3, 16);
247
248 /* g4(n) = f3(n) * K4 + g3(n-1) */
249 /* Calculation of state values for next stage */
250 gnext3 = (q31_t) ((fcurr3 * (*pk)) >> 15U) + gnext2;
251 gnext3 = __SSAT(gnext3, 16);
252
253 gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + gnext1;
254 gnext2 = __SSAT(gnext2, 16);
255
256 gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + gnext0;
257 gnext1 = __SSAT(gnext1, 16);
258
259 gnext0 = (q31_t) ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
260 gnext0 = __SSAT(gnext0, 16);
261
262 /* Read g2(n-1), g4(n-1) .... from state */
263 gcurr0 = *px;
264
265 /* save g4(n) in state buffer */
266 *px++ = (q15_t) gnext3;
267
268 /* Sample processing for K5, K9.... */
269 /* Process first sample for 5th, 9th .. tap */
270 /* f5(n) = f4(n) + K5 * g4(n-1) */
271 fcurr0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fnext0;
272 fcurr0 = __SSAT(fcurr0, 16);
273
274 /* Process second sample for 5th, 9th .. tap */
275 fcurr1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fnext1;
276 fcurr1 = __SSAT(fcurr1, 16);
277
278 /* Process third sample for 5th, 9th .. tap */
279 fcurr2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fnext2;
280 fcurr2 = __SSAT(fcurr2, 16);
281
282 /* Process fourth sample for 5th, 9th .. tap */
283 fcurr3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fnext3;
284 fcurr3 = __SSAT(fcurr3, 16);
285
286 /* Calculation of state values for next stage */
287 /* g5(n) = f4(n) * K5 + g4(n-1) */
288 gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15U) + gnext2;
289 gnext3 = __SSAT(gnext3, 16);
290
291 gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15U) + gnext1;
292 gnext2 = __SSAT(gnext2, 16);
293
294 gnext1 = (q31_t) ((fnext1 * (*pk)) >> 15U) + gnext0;
295 gnext1 = __SSAT(gnext1, 16);
296
297 gnext0 = (q31_t) ((fnext0 * (*pk++)) >> 15U) + gcurr0;
298 gnext0 = __SSAT(gnext0, 16);
299
300 stageCnt--;
301 }
302
303 /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
304 stageCnt = (numStages - 1U) % 0x4U;
305
306 while (stageCnt > 0U)
307 {
308 gcurr0 = *px;
309
310 /* save g value in state buffer */
311 *px++ = (q15_t) gnext3;
312
313 /* Process four samples for last three taps here */
314 fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
315 fnext0 = __SSAT(fnext0, 16);
316
317 fnext1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fcurr1;
318 fnext1 = __SSAT(fnext1, 16);
319
320 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fcurr2;
321 fnext2 = __SSAT(fnext2, 16);
322
323 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fcurr3;
324 fnext3 = __SSAT(fnext3, 16);
325
326 /* g1(n) = f0(n) * K1 + g0(n-1) */
327 gnext3 = (q31_t) ((fcurr3 * (*pk)) >> 15U) + gnext2;
328 gnext3 = __SSAT(gnext3, 16);
329
330 gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + gnext1;
331 gnext2 = __SSAT(gnext2, 16);
332
333 gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + gnext0;
334 gnext1 = __SSAT(gnext1, 16);
335
336 gnext0 = (q31_t) ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
337 gnext0 = __SSAT(gnext0, 16);
338
339 /* Update of f values for next coefficient set processing */
340 fcurr0 = fnext0;
341 fcurr1 = fnext1;
342 fcurr2 = fnext2;
343 fcurr3 = fnext3;
344
345 stageCnt--;
346 }
347
348 /* The results in the 4 accumulators, store in the destination buffer. */
349 /* y(n) = fN(n) */
350
351 #ifndef ARM_MATH_BIG_ENDIAN
352 write_q15x2_ia (&pDst, __PKHBT(fcurr0, fcurr1, 16));
353 write_q15x2_ia (&pDst, __PKHBT(fcurr2, fcurr3, 16));
354 #else
355 write_q15x2_ia (&pDst, __PKHBT(fcurr1, fcurr0, 16));
356 write_q15x2_ia (&pDst, __PKHBT(fcurr3, fcurr2, 16));
357 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
358
359 blkCnt--;
360 }
361
362 /* Loop unrolling: Compute remaining outputs */
363 blkCnt = blockSize % 0x4U;
364
365 #else
366
367 /* Initialize blkCnt with number of samples */
368 blkCnt = blockSize;
369
370 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
371
372 while (blkCnt > 0U)
373 {
374 /* f0(n) = x(n) */
375 fcurr0 = *pSrc++;
376
377 /* Initialize state pointer */
378 px = pState;
379
380 /* Initialize coeff pointer */
381 pk = pCoeffs;
382
383 /* read g2(n) from state buffer */
384 gcurr0 = *px;
385
386 /* for sample 1 processing */
387 /* f1(n) = f0(n) + K1 * g0(n-1) */
388 fnext0 = (((q31_t) gcurr0 * (*pk)) >> 15U) + fcurr0;
389 fnext0 = __SSAT(fnext0, 16);
390
391 /* g1(n) = f0(n) * K1 + g0(n-1) */
392 gnext0 = (((q31_t) fcurr0 * (*pk++)) >> 15U) + gcurr0;
393 gnext0 = __SSAT(gnext0, 16);
394
395 /* save g1(n) in state buffer */
396 *px++ = (q15_t) fcurr0;
397
398 /* f1(n) is saved in fcurr0 for next stage processing */
399 fcurr0 = fnext0;
400
401 stageCnt = (numStages - 1U);
402
403 /* stage loop */
404 while (stageCnt > 0U)
405 {
406 /* read g2(n) from state buffer */
407 gcurr0 = *px;
408
409 /* save g1(n) in state buffer */
410 *px++ = (q15_t) gnext0;
411
412 /* Sample processing for K2, K3.... */
413 /* f2(n) = f1(n) + K2 * g1(n-1) */
414 fnext0 = (((q31_t) gcurr0 * (*pk)) >> 15U) + fcurr0;
415 fnext0 = __SSAT(fnext0, 16);
416
417 /* g2(n) = f1(n) * K2 + g1(n-1) */
418 gnext0 = (((q31_t) fcurr0 * (*pk++)) >> 15U) + gcurr0;
419 gnext0 = __SSAT(gnext0, 16);
420
421 /* f1(n) is saved in fcurr0 for next stage processing */
422 fcurr0 = fnext0;
423
424 stageCnt--;
425 }
426
427 /* y(n) = fN(n) */
428 *pDst++ = __SSAT(fcurr0, 16);
429
430 blkCnt--;
431 }
432
433 #else
434 /* alternate version for CM0_FAMILY */
435
436 blkCnt = blockSize;
437
438 while (blkCnt > 0U)
439 {
440 /* f0(n) = x(n) */
441 fcurr0 = *pSrc++;
442
443 /* Initialize state pointer */
444 px = pState;
445
446 /* Initialize coeff pointer */
447 pk = pCoeffs;
448
449 /* read g0(n-1) from state buffer */
450 gcurr0 = *px;
451
452 /* for sample 1 processing */
453 /* f1(n) = f0(n) + K1 * g0(n-1) */
454 fnext0 = ((gcurr0 * (*pk)) >> 15U) + fcurr0;
455 fnext0 = __SSAT(fnext, 16);
456
457 /* g1(n) = f0(n) * K1 + g0(n-1) */
458 gnext0 = ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
459 gnext0 = __SSAT(gnext0, 16);
460
461 /* save f0(n) in state buffer */
462 *px++ = (q15_t) fcurr0;
463
464 /* f1(n) is saved in fcurr for next stage processing */
465 fcurr0 = fnext0;
466
467 stageCnt = (numStages - 1U);
468
469 /* stage loop */
470 while (stageCnt > 0U)
471 {
472 /* read g1(n-1) from state buffer */
473 gcurr0 = *px;
474
475 /* save g0(n-1) in state buffer */
476 *px++ = (q15_t) gnext0;
477
478 /* Sample processing for K2, K3.... */
479 /* f2(n) = f1(n) + K2 * g1(n-1) */
480 fnext0 = ((gcurr0 * (*pk)) >> 15U) + fcurr0;
481 fnext0 = __SSAT(fnext0, 16);
482
483 /* g2(n) = f1(n) * K2 + g1(n-1) */
484 gnext0 = ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
485 gnext0 = __SSAT(gnext0, 16);
486
487 /* f1(n) is saved in fcurr0 for next stage processing */
488 fcurr0 = fnext0;
489
490 stageCnt--;
491 }
492
493 /* y(n) = fN(n) */
494 *pDst++ = __SSAT(fcurr0, 16);
495
496 blkCnt--;
497 }
498
499 #endif /* #if !defined(ARM_MATH_CM0_FAMILY) */
500
501 }
502
503 /**
504 @} end of FIR_Lattice group
505 */
506