1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_fir_lattice_q15.c
4  * Description:  Q15 FIR lattice filter processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @addtogroup FIR_Lattice
37   @{
38  */
39 
40 /**
41   @brief         Processing function for Q15 FIR lattice filter.
42   @param[in]     S          points to an instance of the Q15 FIR lattice structure
43   @param[in]     pSrc       points to the block of input data
44   @param[out]    pDst       points to the block of output data
45   @param[in]     blockSize  number of samples to process
46  */
47 
arm_fir_lattice_q15(const arm_fir_lattice_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)48 void arm_fir_lattice_q15(
49   const arm_fir_lattice_instance_q15 * S,
50   const q15_t * pSrc,
51         q15_t * pDst,
52         uint32_t blockSize)
53 {
54         q15_t *pState = S->pState;                     /* State pointer */
55   const q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
56         q15_t *px;                                     /* Temporary state pointer */
57   const q15_t *pk;                                     /* Temporary coefficient pointer */
58         uint32_t numStages = S->numStages;             /* Number of stages in the filter */
59         uint32_t blkCnt, stageCnt;                     /* Loop counters */
60         q31_t fcurr0, fnext0, gnext0, gcurr0;          /* Temporary variables */
61 
62 #if (1)
63 //#if !defined(ARM_MATH_CM0_FAMILY)
64 
65 #if defined (ARM_MATH_LOOPUNROLL)
66   q31_t fcurr1, fnext1, gnext1;                  /* Temporary variables for second sample in loop unrolling */
67   q31_t fcurr2, fnext2, gnext2;                  /* Temporary variables for third sample in loop unrolling */
68   q31_t fcurr3, fnext3, gnext3;                  /* Temporary variables for fourth sample in loop unrolling */
69 #endif
70 
71   gcurr0 = 0;
72 
73 #if defined (ARM_MATH_LOOPUNROLL)
74 
75   /* Loop unrolling: Compute 4 outputs at a time */
76   blkCnt = blockSize >> 2U;
77 
78   while (blkCnt > 0U)
79   {
80     /* Read two samples from input buffer */
81     /* f0(n) = x(n) */
82     fcurr0 = *pSrc++;
83     fcurr1 = *pSrc++;
84 
85     /* Initialize state pointer */
86     px = pState;
87 
88     /* Initialize coeff pointer */
89     pk = pCoeffs;
90 
91     /* Read g0(n-1) from state buffer */
92     gcurr0 = *px;
93 
94     /* Process first sample for first tap */
95     /* f1(n) = f0(n) +  K1 * g0(n-1) */
96     fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
97     fnext0 = __SSAT(fnext0, 16);
98 
99     /* g1(n) = f0(n) * K1  +  g0(n-1) */
100     gnext0 = (q31_t) ((fcurr0 * (*pk)) >> 15U) + gcurr0;
101     gnext0 = __SSAT(gnext0, 16);
102 
103     /* Process second sample for first tap */
104     fnext1 = (q31_t) ((fcurr0 * (*pk)) >> 15U) + fcurr1;
105     fnext1 = __SSAT(fnext1, 16);
106     gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + fcurr0;
107     gnext1 = __SSAT(gnext1, 16);
108 
109     /* Read next two samples from input buffer */
110     /* f0(n+2) = x(n+2) */
111     fcurr2 = *pSrc++;
112     fcurr3 = *pSrc++;
113 
114     /* Process third sample for first tap */
115     fnext2 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + fcurr2;
116     fnext2 = __SSAT(fnext2, 16);
117     gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + fcurr1;
118     gnext2 = __SSAT(gnext2, 16);
119 
120     /* Process fourth sample for first tap */
121     fnext3 = (q31_t) ((fcurr2 * (*pk  )) >> 15U) + fcurr3;
122     fnext3 = __SSAT(fnext3, 16);
123     gnext3 = (q31_t) ((fcurr3 * (*pk++)) >> 15U) + fcurr2;
124     gnext3 = __SSAT(gnext3, 16);
125 
126     /* Copy only last input sample into the state buffer
127        which will be used for next samples processing */
128     *px++ = (q15_t) fcurr3;
129 
130     /* Update of f values for next coefficient set processing */
131     fcurr0 = fnext0;
132     fcurr1 = fnext1;
133     fcurr2 = fnext2;
134     fcurr3 = fnext3;
135 
136     /* Loop unrolling.  Process 4 taps at a time . */
137     stageCnt = (numStages - 1U) >> 2U;
138 
139     /* Loop over the number of taps.  Unroll by a factor of 4.
140        Repeat until we've computed numStages-3 coefficients. */
141 
142     /* Process 2nd, 3rd, 4th and 5th taps ... here */
143     while (stageCnt > 0U)
144     {
145       /* Read g1(n-1), g3(n-1) .... from state */
146       gcurr0 = *px;
147 
148       /* save g1(n) in state buffer */
149       *px++ = (q15_t) gnext3;
150 
151       /* Process first sample for 2nd, 6th .. tap */
152       /* Sample processing for K2, K6.... */
153       /* f1(n) = f0(n) +  K1 * g0(n-1) */
154       fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
155       fnext0 = __SSAT(fnext0, 16);
156 
157       /* Process second sample for 2nd, 6th .. tap */
158       /* for sample 2 processing */
159       fnext1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fcurr1;
160       fnext1 = __SSAT(fnext1, 16);
161 
162       /* Process third sample for 2nd, 6th .. tap */
163       fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fcurr2;
164       fnext2 = __SSAT(fnext2, 16);
165 
166       /* Process fourth sample for 2nd, 6th .. tap */
167       fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fcurr3;
168       fnext3 = __SSAT(fnext3, 16);
169 
170       /* g1(n) = f0(n) * K1  +  g0(n-1) */
171       /* Calculation of state values for next stage */
172       gnext3 = (q31_t) ((fcurr3 * (*pk)) >> 15U) + gnext2;
173       gnext3 = __SSAT(gnext3, 16);
174 
175       gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + gnext1;
176       gnext2 = __SSAT(gnext2, 16);
177 
178       gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + gnext0;
179       gnext1 = __SSAT(gnext1, 16);
180 
181       gnext0 = (q31_t) ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
182       gnext0 = __SSAT(gnext0, 16);
183 
184 
185       /* Read g2(n-1), g4(n-1) .... from state */
186       gcurr0 = *px;
187 
188       /* save g1(n) in state buffer */
189       *px++ = (q15_t) gnext3;
190 
191       /* Sample processing for K3, K7.... */
192       /* Process first sample for 3rd, 7th .. tap */
193       /* f3(n) = f2(n) +  K3 * g2(n-1) */
194       fcurr0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fnext0;
195       fcurr0 = __SSAT(fcurr0, 16);
196 
197       /* Process second sample for 3rd, 7th .. tap */
198       fcurr1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fnext1;
199       fcurr1 = __SSAT(fcurr1, 16);
200 
201       /* Process third sample for 3rd, 7th .. tap */
202       fcurr2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fnext2;
203       fcurr2 = __SSAT(fcurr2, 16);
204 
205       /* Process fourth sample for 3rd, 7th .. tap */
206       fcurr3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fnext3;
207       fcurr3 = __SSAT(fcurr3, 16);
208 
209       /* Calculation of state values for next stage */
210       /* g3(n) = f2(n) * K3  +  g2(n-1) */
211       gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15U) + gnext2;
212       gnext3 = __SSAT(gnext3, 16);
213 
214       gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15U) + gnext1;
215       gnext2 = __SSAT(gnext2, 16);
216 
217       gnext1 = (q31_t) ((fnext1 * (*pk)) >> 15U) + gnext0;
218       gnext1 = __SSAT(gnext1, 16);
219 
220       gnext0 = (q31_t) ((fnext0 * (*pk++)) >> 15U) + gcurr0;
221       gnext0 = __SSAT(gnext0, 16);
222 
223       /* Read g1(n-1), g3(n-1) .... from state */
224       gcurr0 = *px;
225 
226       /* save g1(n) in state buffer */
227       *px++ = (q15_t) gnext3;
228 
229       /* Sample processing for K4, K8.... */
230       /* Process first sample for 4th, 8th .. tap */
231       /* f4(n) = f3(n) +  K4 * g3(n-1) */
232       fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
233       fnext0 = __SSAT(fnext0, 16);
234 
235       /* Process second sample for 4th, 8th .. tap */
236       /* for sample 2 processing */
237       fnext1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fcurr1;
238       fnext1 = __SSAT(fnext1, 16);
239 
240       /* Process third sample for 4th, 8th .. tap */
241       fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fcurr2;
242       fnext2 = __SSAT(fnext2, 16);
243 
244       /* Process fourth sample for 4th, 8th .. tap */
245       fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fcurr3;
246       fnext3 = __SSAT(fnext3, 16);
247 
248       /* g4(n) = f3(n) * K4  +  g3(n-1) */
249       /* Calculation of state values for next stage */
250       gnext3 = (q31_t) ((fcurr3 * (*pk)) >> 15U) + gnext2;
251       gnext3 = __SSAT(gnext3, 16);
252 
253       gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + gnext1;
254       gnext2 = __SSAT(gnext2, 16);
255 
256       gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + gnext0;
257       gnext1 = __SSAT(gnext1, 16);
258 
259       gnext0 = (q31_t) ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
260       gnext0 = __SSAT(gnext0, 16);
261 
262       /* Read g2(n-1), g4(n-1) .... from state */
263       gcurr0 = *px;
264 
265       /* save g4(n) in state buffer */
266       *px++ = (q15_t) gnext3;
267 
268       /* Sample processing for K5, K9.... */
269       /* Process first sample for 5th, 9th .. tap */
270       /* f5(n) = f4(n) +  K5 * g4(n-1) */
271       fcurr0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fnext0;
272       fcurr0 = __SSAT(fcurr0, 16);
273 
274       /* Process second sample for 5th, 9th .. tap */
275       fcurr1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fnext1;
276       fcurr1 = __SSAT(fcurr1, 16);
277 
278       /* Process third sample for 5th, 9th .. tap */
279       fcurr2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fnext2;
280       fcurr2 = __SSAT(fcurr2, 16);
281 
282       /* Process fourth sample for 5th, 9th .. tap */
283       fcurr3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fnext3;
284       fcurr3 = __SSAT(fcurr3, 16);
285 
286       /* Calculation of state values for next stage */
287       /* g5(n) = f4(n) * K5  +  g4(n-1) */
288       gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15U) + gnext2;
289       gnext3 = __SSAT(gnext3, 16);
290 
291       gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15U) + gnext1;
292       gnext2 = __SSAT(gnext2, 16);
293 
294       gnext1 = (q31_t) ((fnext1 * (*pk)) >> 15U) + gnext0;
295       gnext1 = __SSAT(gnext1, 16);
296 
297       gnext0 = (q31_t) ((fnext0 * (*pk++)) >> 15U) + gcurr0;
298       gnext0 = __SSAT(gnext0, 16);
299 
300       stageCnt--;
301     }
302 
303     /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
304     stageCnt = (numStages - 1U) % 0x4U;
305 
306     while (stageCnt > 0U)
307     {
308       gcurr0 = *px;
309 
310       /* save g value in state buffer */
311       *px++ = (q15_t) gnext3;
312 
313       /* Process four samples for last three taps here */
314       fnext0 = (q31_t) ((gcurr0 * (*pk)) >> 15U) + fcurr0;
315       fnext0 = __SSAT(fnext0, 16);
316 
317       fnext1 = (q31_t) ((gnext0 * (*pk)) >> 15U) + fcurr1;
318       fnext1 = __SSAT(fnext1, 16);
319 
320       fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15U) + fcurr2;
321       fnext2 = __SSAT(fnext2, 16);
322 
323       fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15U) + fcurr3;
324       fnext3 = __SSAT(fnext3, 16);
325 
326       /* g1(n) = f0(n) * K1  +  g0(n-1) */
327       gnext3 = (q31_t) ((fcurr3 * (*pk)) >> 15U) + gnext2;
328       gnext3 = __SSAT(gnext3, 16);
329 
330       gnext2 = (q31_t) ((fcurr2 * (*pk)) >> 15U) + gnext1;
331       gnext2 = __SSAT(gnext2, 16);
332 
333       gnext1 = (q31_t) ((fcurr1 * (*pk)) >> 15U) + gnext0;
334       gnext1 = __SSAT(gnext1, 16);
335 
336       gnext0 = (q31_t) ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
337       gnext0 = __SSAT(gnext0, 16);
338 
339       /* Update of f values for next coefficient set processing */
340       fcurr0 = fnext0;
341       fcurr1 = fnext1;
342       fcurr2 = fnext2;
343       fcurr3 = fnext3;
344 
345       stageCnt--;
346     }
347 
348     /* The results in the 4 accumulators, store in the destination buffer. */
349     /* y(n) = fN(n) */
350 
351 #ifndef  ARM_MATH_BIG_ENDIAN
352     write_q15x2_ia (&pDst, __PKHBT(fcurr0, fcurr1, 16));
353     write_q15x2_ia (&pDst, __PKHBT(fcurr2, fcurr3, 16));
354 #else
355     write_q15x2_ia (&pDst, __PKHBT(fcurr1, fcurr0, 16));
356     write_q15x2_ia (&pDst, __PKHBT(fcurr3, fcurr2, 16));
357 #endif /* #ifndef  ARM_MATH_BIG_ENDIAN */
358 
359     blkCnt--;
360   }
361 
362   /* Loop unrolling: Compute remaining outputs */
363   blkCnt = blockSize % 0x4U;
364 
365 #else
366 
367   /* Initialize blkCnt with number of samples */
368   blkCnt = blockSize;
369 
370 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
371 
372   while (blkCnt > 0U)
373   {
374     /* f0(n) = x(n) */
375     fcurr0 = *pSrc++;
376 
377     /* Initialize state pointer */
378     px = pState;
379 
380     /* Initialize coeff pointer */
381     pk = pCoeffs;
382 
383     /* read g2(n) from state buffer */
384     gcurr0 = *px;
385 
386     /* for sample 1 processing */
387     /* f1(n) = f0(n) +  K1 * g0(n-1) */
388     fnext0 = (((q31_t) gcurr0 * (*pk)) >> 15U) + fcurr0;
389     fnext0 = __SSAT(fnext0, 16);
390 
391     /* g1(n) = f0(n) * K1  +  g0(n-1) */
392     gnext0 = (((q31_t) fcurr0 * (*pk++)) >> 15U) + gcurr0;
393     gnext0 = __SSAT(gnext0, 16);
394 
395     /* save g1(n) in state buffer */
396     *px++ = (q15_t) fcurr0;
397 
398     /* f1(n) is saved in fcurr0 for next stage processing */
399     fcurr0 = fnext0;
400 
401     stageCnt = (numStages - 1U);
402 
403     /* stage loop */
404     while (stageCnt > 0U)
405     {
406       /* read g2(n) from state buffer */
407       gcurr0 = *px;
408 
409       /* save g1(n) in state buffer */
410       *px++ = (q15_t) gnext0;
411 
412       /* Sample processing for K2, K3.... */
413       /* f2(n) = f1(n) +  K2 * g1(n-1) */
414       fnext0 = (((q31_t) gcurr0 * (*pk)) >> 15U) + fcurr0;
415       fnext0 = __SSAT(fnext0, 16);
416 
417       /* g2(n) = f1(n) * K2  +  g1(n-1) */
418       gnext0 = (((q31_t) fcurr0 * (*pk++)) >> 15U) + gcurr0;
419       gnext0 = __SSAT(gnext0, 16);
420 
421       /* f1(n) is saved in fcurr0 for next stage processing */
422       fcurr0 = fnext0;
423 
424       stageCnt--;
425     }
426 
427     /* y(n) = fN(n) */
428     *pDst++ = __SSAT(fcurr0, 16);
429 
430     blkCnt--;
431   }
432 
433 #else
434 /* alternate version for CM0_FAMILY */
435 
436   blkCnt = blockSize;
437 
438   while (blkCnt > 0U)
439   {
440     /* f0(n) = x(n) */
441     fcurr0 = *pSrc++;
442 
443     /* Initialize state pointer */
444     px = pState;
445 
446     /* Initialize coeff pointer */
447     pk = pCoeffs;
448 
449     /* read g0(n-1) from state buffer */
450     gcurr0 = *px;
451 
452     /* for sample 1 processing */
453     /* f1(n) = f0(n) +  K1 * g0(n-1) */
454     fnext0 = ((gcurr0 * (*pk)) >> 15U) + fcurr0;
455     fnext0 = __SSAT(fnext, 16);
456 
457     /* g1(n) = f0(n) * K1  +  g0(n-1) */
458     gnext0 = ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
459     gnext0 = __SSAT(gnext0, 16);
460 
461     /* save f0(n) in state buffer */
462     *px++ = (q15_t) fcurr0;
463 
464     /* f1(n) is saved in fcurr for next stage processing */
465     fcurr0 = fnext0;
466 
467     stageCnt = (numStages - 1U);
468 
469     /* stage loop */
470     while (stageCnt > 0U)
471     {
472       /* read g1(n-1) from state buffer */
473       gcurr0 = *px;
474 
475       /* save g0(n-1) in state buffer */
476       *px++ = (q15_t) gnext0;
477 
478       /* Sample processing for K2, K3.... */
479       /* f2(n) = f1(n) +  K2 * g1(n-1) */
480       fnext0 = ((gcurr0 * (*pk)) >> 15U) + fcurr0;
481       fnext0 = __SSAT(fnext0, 16);
482 
483       /* g2(n) = f1(n) * K2  +  g1(n-1) */
484       gnext0 = ((fcurr0 * (*pk++)) >> 15U) + gcurr0;
485       gnext0 = __SSAT(gnext0, 16);
486 
487       /* f1(n) is saved in fcurr0 for next stage processing */
488       fcurr0 = fnext0;
489 
490       stageCnt--;
491     }
492 
493     /* y(n) = fN(n) */
494     *pDst++ = __SSAT(fcurr0, 16);
495 
496     blkCnt--;
497   }
498 
499 #endif /* #if !defined(ARM_MATH_CM0_FAMILY) */
500 
501 }
502 
503 /**
504   @} end of FIR_Lattice group
505  */
506