1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_fir_interpolate_q31.c
4 * Description: Q31 FIR interpolation
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup FIR_Interpolate
37 @{
38 */
39
40 /**
41 @brief Processing function for the Q31 FIR interpolator.
42 @param[in] S points to an instance of the Q31 FIR interpolator structure
43 @param[in] pSrc points to the block of input data
44 @param[out] pDst points to the block of output data
45 @param[in] blockSize number of input samples to process
46
47 @par Scaling and Overflow Behavior
48 The function is implemented using an internal 64-bit accumulator.
49 The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
50 Thus, if the accumulator result overflows it wraps around rather than clip.
51 In order to avoid overflows completely the input signal must be scaled down by <code>1/(numTaps/L)</code>.
52 since <code>numTaps/L</code> additions occur per output sample.
53 After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
54 */
55
56 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
57
58 #include "arm_helium_utils.h"
arm_fir_interpolate_q31(const arm_fir_interpolate_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)59 ARM_DSP_ATTRIBUTE void arm_fir_interpolate_q31(
60 const arm_fir_interpolate_instance_q31 * S,
61 const q31_t * pSrc,
62 q31_t * pDst,
63 uint32_t blockSize)
64 {
65 q31_t *pState = S->pState; /* State pointer */
66 const q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
67 q31_t *pStateCurnt; /* Points to the current sample of the state */
68 const q31_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
69
70 uint32_t i, blkCnt; /* Loop counters */
71 uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
72 uint32_t strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L };
73 uint32x4_t vec_strides0 = vld1q_u32(strides);
74 uint32x4_t vec_strides1 = vec_strides0 + 1;
75 uint32x4_t vec_strides2 = vec_strides0 + 2;
76 uint32x4_t vec_strides3 = vec_strides0 + 3;
77 q31x4_t vecState, vecCoef;
78
79 /*
80 * S->pState buffer contains previous frame (phaseLen - 1) samples
81 * pStateCurnt points to the location where the new input data should be written
82 */
83 pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
84 /*
85 * Total number of intput samples
86 */
87 blkCnt = blockSize;
88 /*
89 * Loop over the blockSize.
90 */
91 while (blkCnt > 0U)
92 {
93 /*
94 * Copy new input sample into the state buffer
95 */
96 *pStateCurnt++ = *pSrc++;
97 /*
98 * Loop over the Interpolation factor.
99 */
100 i = S->L;
101 while (i > 0U)
102 {
103 /*
104 * Initialize state pointer
105 */
106 ptr1 = pState;
107 if (i >= 4)
108 {
109 /*
110 * Initialize coefficient pointer
111 */
112 ptr2 = pCoeffs + (i - 1 - 3U);
113
114 q63_t acc0 = 0LL;
115 q63_t acc1 = 0LL;
116 q63_t acc2 = 0LL;
117 q63_t acc3 = 0LL;
118
119 uint32_t tapCnt = phaseLen >> 2;
120 while (tapCnt > 0U)
121 {
122 vecState = vldrwq_s32(ptr1);
123
124 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides3);
125 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
126
127 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides2);
128 acc1 = vrmlaldavhaq(acc1, vecState, vecCoef);
129
130 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides1);
131 acc2 = vrmlaldavhaq(acc2, vecState, vecCoef);
132
133 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides0);
134 acc3 = vrmlaldavhaq(acc3, vecState, vecCoef);
135
136 ptr1 += 4;
137 ptr2 = ptr2 + S->L * 4;
138 tapCnt--;
139 }
140 tapCnt = phaseLen & 3;
141 if (tapCnt > 0U)
142 {
143 mve_pred16_t p0 = vctp32q(tapCnt);
144
145 vecState = vldrwq_z_s32(ptr1, p0);
146
147 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides3, p0);
148 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
149
150 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides2, p0);
151 acc1 = vrmlaldavhaq(acc1, vecState, vecCoef);
152
153 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides1, p0);
154 acc2 = vrmlaldavhaq(acc2, vecState, vecCoef);
155
156 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides0, p0);
157 acc3 = vrmlaldavhaq(acc3, vecState, vecCoef);
158 }
159
160 acc0 = asrl(acc0, 31 - 8);
161 acc1 = asrl(acc1, 31 - 8);
162 acc2 = asrl(acc2, 31 - 8);
163 acc3 = asrl(acc3, 31 - 8);
164
165 *pDst++ = (q31_t) acc0;
166 *pDst++ = (q31_t) acc1;
167 *pDst++ = (q31_t) acc2;
168 *pDst++ = (q31_t) acc3;
169 i -= 4;
170 }
171 else if (i >= 3)
172 {
173 /*
174 * Initialize coefficient pointer
175 */
176 ptr2 = pCoeffs + (i - 1U - 2);
177
178 q63_t acc0 = 0LL;
179 q63_t acc1 = 0LL;
180 q63_t acc2 = 0LL;
181
182 uint32_t tapCnt = phaseLen >> 2;
183 while (tapCnt > 0U)
184 {
185 vecState = vldrwq_s32(ptr1);
186
187 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides2);
188 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
189
190 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides1);
191 acc1 = vrmlaldavhaq(acc1, vecState, vecCoef);
192
193 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides0);
194 acc2 = vrmlaldavhaq(acc2, vecState, vecCoef);
195
196 ptr1 += 4;
197 ptr2 = ptr2 + S->L * 4;
198 tapCnt--;
199 }
200 tapCnt = phaseLen & 3;
201 if (tapCnt > 0U)
202 {
203 mve_pred16_t p0 = vctp32q(tapCnt);
204
205 vecState = vldrwq_z_s32(ptr1, p0);
206
207 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides2, p0);
208 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
209
210 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides1, p0);
211 acc1 = vrmlaldavhaq(acc1, vecState, vecCoef);
212
213 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides0, p0);
214 acc2 = vrmlaldavhaq(acc2, vecState, vecCoef);
215 }
216
217 acc0 = asrl(acc0, 31 - 8);
218 acc1 = asrl(acc1, 31 - 8);
219 acc2 = asrl(acc2, 31 - 8);
220
221 *pDst++ = (q31_t) acc0;
222 *pDst++ = (q31_t) acc1;
223 *pDst++ = (q31_t) acc2;
224 i -= 3;
225 }
226 else if (i >= 2)
227 {
228 /*
229 * Initialize coefficient pointer
230 */
231 ptr2 = pCoeffs + (i - 1U - 1);
232
233 q63_t acc0 = 0LL;
234 q63_t acc1 = 0LL;
235
236 uint32_t tapCnt = phaseLen >> 2;
237 while (tapCnt > 0U)
238 {
239 vecState = vldrwq_s32(ptr1);
240
241 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides1);
242 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
243
244 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides0);
245 acc1 = vrmlaldavhaq(acc1, vecState, vecCoef);
246
247 ptr1 += 4;
248 ptr2 = ptr2 + S->L * 4;
249 tapCnt--;
250 }
251 tapCnt = phaseLen & 3;
252 if (tapCnt > 0U)
253 {
254 mve_pred16_t p0 = vctp32q(tapCnt);
255
256 vecState = vldrwq_z_s32(ptr1, p0);
257
258 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides1, p0);
259 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
260
261 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides0, p0);
262 acc1 = vrmlaldavhaq(acc1, vecState, vecCoef);
263 }
264
265 acc0 = asrl(acc0, 31 - 8);
266 acc1 = asrl(acc1, 31 - 8);
267
268 *pDst++ = (q31_t) acc0;
269 *pDst++ = (q31_t) acc1;
270 i -= 2;
271 }
272 else
273 {
274 /*
275 * Initialize coefficient pointer
276 */
277 ptr2 = pCoeffs + (i - 1U);
278
279 q63_t acc0 = 0LL;
280
281 uint32_t tapCnt = phaseLen >> 2;
282 while (tapCnt > 0U)
283 {
284 vecState = vldrwq_s32(ptr1);
285 vecCoef = vldrwq_gather_shifted_offset_s32(ptr2, vec_strides0);
286
287 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
288
289 ptr1 += 4;
290 ptr2 = ptr2 + S->L * 4;
291 tapCnt--;
292 }
293 tapCnt = phaseLen & 3;
294 if (tapCnt > 0U)
295 {
296 mve_pred16_t p0 = vctp32q(tapCnt);
297
298 vecState = vldrwq_z_s32(ptr1, p0);
299 vecCoef = vldrwq_gather_shifted_offset_z_s32(ptr2, vec_strides0, p0);
300 acc0 = vrmlaldavhaq(acc0, vecState, vecCoef);
301 }
302
303 acc0 = asrl(acc0, 31 - 8);
304 *pDst++ = (q31_t) acc0;
305 /*
306 * Decrement the loop counter
307 */
308 i--;
309 }
310 }
311 /*
312 * Advance the state pointer by 1
313 * * to process the next group of interpolation factor number samples
314 */
315 pState = pState + 1;
316 /*
317 * Decrement the loop counter
318 */
319 blkCnt--;
320 }
321
322 /*
323 * Processing is complete.
324 * ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
325 * ** This prepares the state buffer for the next function call.
326 */
327
328 /*
329 * Points to the start of the state buffer
330 */
331 pStateCurnt = S->pState;
332 blkCnt = (phaseLen - 1U) >> 2;
333 while (blkCnt > 0U)
334 {
335 vst1q(pStateCurnt, vldrwq_s32(pState));
336 pState += 4;
337 pStateCurnt += 4;
338 blkCnt--;
339 }
340 blkCnt = (phaseLen - 1U) & 3;
341 if (blkCnt > 0U)
342 {
343 mve_pred16_t p0 = vctp32q(blkCnt);
344 vstrwq_p_s32(pStateCurnt, vldrwq_s32(pState), p0);
345 }
346 }
347 #else
arm_fir_interpolate_q31(const arm_fir_interpolate_instance_q31 * S,const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)348 ARM_DSP_ATTRIBUTE void arm_fir_interpolate_q31(
349 const arm_fir_interpolate_instance_q31 * S,
350 const q31_t * pSrc,
351 q31_t * pDst,
352 uint32_t blockSize)
353 {
354 #if (1)
355 //#if !defined(ARM_MATH_CM0_FAMILY)
356
357 q31_t *pState = S->pState; /* State pointer */
358 const q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
359 q31_t *pStateCur; /* Points to the current sample of the state */
360 q31_t *ptr1; /* Temporary pointer for state buffer */
361 const q31_t *ptr2; /* Temporary pointer for coefficient buffer */
362 q63_t sum0; /* Accumulators */
363 uint32_t i, blkCnt, tapCnt; /* Loop counters */
364 uint32_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
365 uint32_t j;
366
367 #if defined (ARM_MATH_LOOPUNROLL)
368 q63_t acc0, acc1, acc2, acc3;
369 q31_t x0, x1, x2, x3;
370 q31_t c0, c1, c2, c3;
371 #endif
372
373 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
374 /* pStateCur points to the location where the new input data should be written */
375 pStateCur = S->pState + (phaseLen - 1U);
376
377 #if defined (ARM_MATH_LOOPUNROLL)
378
379 /* Loop unrolling: Compute 4 outputs at a time */
380 blkCnt = blockSize >> 2U;
381
382 while (blkCnt > 0U)
383 {
384 /* Copy new input sample into the state buffer */
385 *pStateCur++ = *pSrc++;
386 *pStateCur++ = *pSrc++;
387 *pStateCur++ = *pSrc++;
388 *pStateCur++ = *pSrc++;
389
390 /* Address modifier index of coefficient buffer */
391 j = 1U;
392
393 /* Loop over the Interpolation factor. */
394 i = (S->L);
395
396 while (i > 0U)
397 {
398 /* Set accumulator to zero */
399 acc0 = 0;
400 acc1 = 0;
401 acc2 = 0;
402 acc3 = 0;
403
404 /* Initialize state pointer */
405 ptr1 = pState;
406
407 /* Initialize coefficient pointer */
408 ptr2 = pCoeffs + (S->L - j);
409
410 /* Loop over the polyPhase length. Unroll by a factor of 4.
411 Repeat until we've computed numTaps-(4*S->L) coefficients. */
412 tapCnt = phaseLen >> 2U;
413
414 x0 = *(ptr1++);
415 x1 = *(ptr1++);
416 x2 = *(ptr1++);
417
418 while (tapCnt > 0U)
419 {
420 /* Read the input sample */
421 x3 = *(ptr1++);
422
423 /* Read the coefficient */
424 c0 = *(ptr2);
425
426 /* Perform the multiply-accumulate */
427 acc0 += (q63_t) x0 * c0;
428 acc1 += (q63_t) x1 * c0;
429 acc2 += (q63_t) x2 * c0;
430 acc3 += (q63_t) x3 * c0;
431
432 /* Read the coefficient */
433 c1 = *(ptr2 + S->L);
434
435 /* Read the input sample */
436 x0 = *(ptr1++);
437
438 /* Perform the multiply-accumulate */
439 acc0 += (q63_t) x1 * c1;
440 acc1 += (q63_t) x2 * c1;
441 acc2 += (q63_t) x3 * c1;
442 acc3 += (q63_t) x0 * c1;
443
444 /* Read the coefficient */
445 c2 = *(ptr2 + S->L * 2);
446
447 /* Read the input sample */
448 x1 = *(ptr1++);
449
450 /* Perform the multiply-accumulate */
451 acc0 += (q63_t) x2 * c2;
452 acc1 += (q63_t) x3 * c2;
453 acc2 += (q63_t) x0 * c2;
454 acc3 += (q63_t) x1 * c2;
455
456 /* Read the coefficient */
457 c3 = *(ptr2 + S->L * 3);
458
459 /* Read the input sample */
460 x2 = *(ptr1++);
461
462 /* Perform the multiply-accumulate */
463 acc0 += (q63_t) x3 * c3;
464 acc1 += (q63_t) x0 * c3;
465 acc2 += (q63_t) x1 * c3;
466 acc3 += (q63_t) x2 * c3;
467
468
469 /* Upsampling is done by stuffing L-1 zeros between each sample.
470 * So instead of multiplying zeros with coefficients,
471 * Increment the coefficient pointer by interpolation factor times. */
472 ptr2 += 4 * S->L;
473
474 /* Decrement loop counter */
475 tapCnt--;
476 }
477
478 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
479 tapCnt = phaseLen % 0x4U;
480
481 while (tapCnt > 0U)
482 {
483 /* Read the input sample */
484 x3 = *(ptr1++);
485
486 /* Read the coefficient */
487 c0 = *(ptr2);
488
489 /* Perform the multiply-accumulate */
490 acc0 += (q63_t) x0 * c0;
491 acc1 += (q63_t) x1 * c0;
492 acc2 += (q63_t) x2 * c0;
493 acc3 += (q63_t) x3 * c0;
494
495 /* Increment the coefficient pointer by interpolation factor times. */
496 ptr2 += S->L;
497
498 /* update states for next sample processing */
499 x0 = x1;
500 x1 = x2;
501 x2 = x3;
502
503 /* Decrement loop counter */
504 tapCnt--;
505 }
506
507 /* The result is in the accumulator, store in the destination buffer. */
508 *(pDst ) = (q31_t) (acc0 >> 31);
509 *(pDst + S->L) = (q31_t) (acc1 >> 31);
510 *(pDst + 2 * S->L) = (q31_t) (acc2 >> 31);
511 *(pDst + 3 * S->L) = (q31_t) (acc3 >> 31);
512
513 pDst++;
514
515 /* Increment the address modifier index of coefficient buffer */
516 j++;
517
518 /* Decrement loop counter */
519 i--;
520 }
521
522 /* Advance the state pointer by 1
523 * to process the next group of interpolation factor number samples */
524 pState = pState + 4;
525
526 pDst += S->L * 3;
527
528 /* Decrement loop counter */
529 blkCnt--;
530 }
531
532 /* Loop unrolling: Compute remaining outputs */
533 blkCnt = blockSize % 0x4U;
534
535 #else
536
537 /* Initialize blkCnt with number of samples */
538 blkCnt = blockSize;
539
540 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
541
542 while (blkCnt > 0U)
543 {
544 /* Copy new input sample into the state buffer */
545 *pStateCur++ = *pSrc++;
546
547 /* Address modifier index of coefficient buffer */
548 j = 1U;
549
550 /* Loop over the Interpolation factor. */
551 i = S->L;
552 while (i > 0U)
553 {
554 /* Set accumulator to zero */
555 sum0 = 0;
556
557 /* Initialize state pointer */
558 ptr1 = pState;
559
560 /* Initialize coefficient pointer */
561 ptr2 = pCoeffs + (S->L - j);
562
563 /* Loop over the polyPhase length.
564 Repeat until we've computed numTaps-(4*S->L) coefficients. */
565
566 #if defined (ARM_MATH_LOOPUNROLL)
567
568 /* Loop unrolling: Compute 4 outputs at a time */
569 tapCnt = phaseLen >> 2U;
570
571 while (tapCnt > 0U)
572 {
573 /* Perform the multiply-accumulate */
574 sum0 += (q63_t) *ptr1++ * *ptr2;
575
576 /* Upsampling is done by stuffing L-1 zeros between each sample.
577 * So instead of multiplying zeros with coefficients,
578 * Increment the coefficient pointer by interpolation factor times. */
579 ptr2 += S->L;
580
581 sum0 += (q63_t) *ptr1++ * *ptr2;
582 ptr2 += S->L;
583
584 sum0 += (q63_t) *ptr1++ * *ptr2;
585 ptr2 += S->L;
586
587 sum0 += (q63_t) *ptr1++ * *ptr2;
588 ptr2 += S->L;
589
590 /* Decrement loop counter */
591 tapCnt--;
592 }
593
594 /* Loop unrolling: Compute remaining outputs */
595 tapCnt = phaseLen % 0x4U;
596
597 #else
598
599 /* Initialize tapCnt with number of samples */
600 tapCnt = phaseLen;
601
602 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
603
604 while (tapCnt > 0U)
605 {
606 /* Perform the multiply-accumulate */
607 sum0 += (q63_t) *ptr1++ * *ptr2;
608
609 /* Upsampling is done by stuffing L-1 zeros between each sample.
610 * So instead of multiplying zeros with coefficients,
611 * Increment the coefficient pointer by interpolation factor times. */
612 ptr2 += S->L;
613
614 /* Decrement loop counter */
615 tapCnt--;
616 }
617
618 /* The result is in the accumulator, store in the destination buffer. */
619 *pDst++ = (q31_t) (sum0 >> 31);
620
621 /* Increment the address modifier index of coefficient buffer */
622 j++;
623
624 /* Decrement the loop counter */
625 i--;
626 }
627
628 /* Advance the state pointer by 1
629 * to process the next group of interpolation factor number samples */
630 pState = pState + 1;
631
632 /* Decrement the loop counter */
633 blkCnt--;
634 }
635
636 /* Processing is complete.
637 Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
638 This prepares the state buffer for the next function call. */
639
640 /* Points to the start of the state buffer */
641 pStateCur = S->pState;
642
643 #if defined (ARM_MATH_LOOPUNROLL)
644
645 /* Loop unrolling: Compute 4 outputs at a time */
646 tapCnt = (phaseLen - 1U) >> 2U;
647
648 /* copy data */
649 while (tapCnt > 0U)
650 {
651 *pStateCur++ = *pState++;
652 *pStateCur++ = *pState++;
653 *pStateCur++ = *pState++;
654 *pStateCur++ = *pState++;
655
656 /* Decrement loop counter */
657 tapCnt--;
658 }
659
660 /* Loop unrolling: Compute remaining outputs */
661 tapCnt = (phaseLen - 1U) % 0x04U;
662
663 #else
664
665 /* Initialize tapCnt with number of samples */
666 tapCnt = (phaseLen - 1U);
667
668 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
669
670 /* Copy data */
671 while (tapCnt > 0U)
672 {
673 *pStateCur++ = *pState++;
674
675 /* Decrement loop counter */
676 tapCnt--;
677 }
678
679 #else
680 /* alternate version for CM0_FAMILY */
681
682 q31_t *pState = S->pState; /* State pointer */
683 const q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
684 q31_t *pStateCur; /* Points to the current sample of the state */
685 q31_t *ptr1; /* Temporary pointer for state buffer */
686 const q31_t *ptr2; /* Temporary pointer for coefficient buffer */
687 q63_t sum0; /* Accumulators */
688 uint32_t i, blkCnt, tapCnt; /* Loop counters */
689 uint32_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
690
691 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
692 /* pStateCur points to the location where the new input data should be written */
693 pStateCur = S->pState + (phaseLen - 1U);
694
695 /* Total number of intput samples */
696 blkCnt = blockSize;
697
698 /* Loop over the blockSize. */
699 while (blkCnt > 0U)
700 {
701 /* Copy new input sample into the state buffer */
702 *pStateCur++ = *pSrc++;
703
704 /* Loop over the Interpolation factor. */
705 i = S->L;
706
707 while (i > 0U)
708 {
709 /* Set accumulator to zero */
710 sum0 = 0;
711
712 /* Initialize state pointer */
713 ptr1 = pState;
714
715 /* Initialize coefficient pointer */
716 ptr2 = pCoeffs + (i - 1U);
717
718 /* Loop over the polyPhase length */
719 tapCnt = phaseLen;
720
721 while (tapCnt > 0U)
722 {
723 /* Perform the multiply-accumulate */
724 sum0 += ((q63_t) *ptr1++ * *ptr2);
725
726 /* Increment the coefficient pointer by interpolation factor times. */
727 ptr2 += S->L;
728
729 /* Decrement the loop counter */
730 tapCnt--;
731 }
732
733 /* The result is in the accumulator, store in the destination buffer. */
734 *pDst++ = (q31_t) (sum0 >> 31);
735
736 /* Decrement loop counter */
737 i--;
738 }
739
740 /* Advance the state pointer by 1
741 * to process the next group of interpolation factor number samples */
742 pState = pState + 1;
743
744 /* Decrement loop counter */
745 blkCnt--;
746 }
747
748 /* Processing is complete.
749 ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
750 ** This prepares the state buffer for the next function call. */
751
752 /* Points to the start of the state buffer */
753 pStateCur = S->pState;
754
755 tapCnt = phaseLen - 1U;
756
757 /* Copy data */
758 while (tapCnt > 0U)
759 {
760 *pStateCur++ = *pState++;
761
762 /* Decrement loop counter */
763 tapCnt--;
764 }
765
766 #endif /* #if !defined(ARM_MATH_CM0_FAMILY) */
767
768 }
769 #endif /* defined(ARM_MATH_MVEI) */
770 /**
771 @} end of FIR_Interpolate group
772 */
773