1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_fir_interpolate_q15.c
4 * Description: Q15 FIR interpolation
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @addtogroup FIR_Interpolate
37 @{
38 */
39
40 /**
41 @brief Processing function for the Q15 FIR interpolator.
42 @param[in] S points to an instance of the Q15 FIR interpolator structure
43 @param[in] pSrc points to the block of input data
44 @param[out] pDst points to the block of output data
45 @param[in] blockSize number of input samples to process
46
47 @par Scaling and Overflow Behavior
48 The function is implemented using a 64-bit internal accumulator.
49 Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
50 The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
51 There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
52 After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
53 Lastly, the accumulator is saturated to yield a result in 1.15 format.
54 */
55
56 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
57
58 #include "arm_helium_utils.h"
arm_fir_interpolate_q15(const arm_fir_interpolate_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)59 ARM_DSP_ATTRIBUTE void arm_fir_interpolate_q15(
60 const arm_fir_interpolate_instance_q15 * S,
61 const q15_t * pSrc,
62 q15_t * pDst,
63 uint32_t blockSize)
64 {
65 q15_t *pState = S->pState; /* State pointer */
66 const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
67 q15_t *pStateCurnt; /* Points to the current sample of the state */
68 const q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
69
70 uint32_t i, blkCnt; /* Loop counters */
71 uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
72 uint16_t strides[8] = {
73 0, 1 * S->L, 2 * S->L, 3 * S->L,
74 4 * S->L, 5 * S->L, 6 * S->L, 7 * S->L
75 };
76 uint16x8_t vec_strides0 = *(uint16x8_t *) strides;
77 uint16x8_t vec_strides1 = vec_strides0 + 1;
78 uint16x8_t vec_strides2 = vec_strides0 + 2;
79 uint16x8_t vec_strides3 = vec_strides0 + 3;
80 q15x8_t vecState, vecCoef;
81
82 /*
83 * S->pState buffer contains previous frame (phaseLen - 1) samples
84 * pStateCurnt points to the location where the new input data should be written
85 */
86 pStateCurnt = S->pState + ((q15_t) phaseLen - 1);
87 /*
88 * Total number of intput samples
89 */
90 blkCnt = blockSize;
91 /*
92 * Loop over the blockSize.
93 */
94 while (blkCnt > 0U)
95 {
96 /*
97 * Copy new input sample into the state buffer
98 */
99 *pStateCurnt++ = *pSrc++;
100 /*
101 * Loop over the Interpolation factor.
102 */
103 i = S->L;
104 while (i > 0U)
105 {
106 /*
107 * Initialize state pointer
108 */
109 ptr1 = pState;
110 if (i >= 4)
111 {
112 /*
113 * Initialize coefficient pointer
114 */
115 ptr2 = pCoeffs + (i - 1 - 3U);
116
117 q63_t acc0 = 0LL;
118 q63_t acc1 = 0LL;
119 q63_t acc2 = 0LL;
120 q63_t acc3 = 0LL;
121
122 uint32_t tapCnt = phaseLen >> 3;
123 while (tapCnt > 0U)
124 {
125 vecState = vldrhq_s16(ptr1);
126
127 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides3);
128 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
129
130 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides2);
131 acc1 = vmlaldavaq(acc1, vecState, vecCoef);
132
133 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides1);
134 acc2 = vmlaldavaq(acc2, vecState, vecCoef);
135
136 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides0);
137 acc3 = vmlaldavaq(acc3, vecState, vecCoef);
138
139 ptr1 += 8;
140 ptr2 = ptr2 + S->L * 8;
141 tapCnt--;
142 }
143 tapCnt = phaseLen & 7;
144 if (tapCnt > 0U)
145 {
146 mve_pred16_t p0 = vctp16q(tapCnt);
147
148 vecState = vldrhq_z_s16(ptr1, p0);
149
150 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides3, p0);
151 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
152
153 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides2, p0);
154 acc1 = vmlaldavaq(acc1, vecState, vecCoef);
155
156 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides1, p0);
157 acc2 = vmlaldavaq(acc2, vecState, vecCoef);
158
159 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides0, p0);
160 acc3 = vmlaldavaq(acc3, vecState, vecCoef);
161 }
162
163 acc0 = asrl(acc0, 15);
164 acc1 = asrl(acc1, 15);
165 acc2 = asrl(acc2, 15);
166 acc3 = asrl(acc3, 15);
167
168 *pDst++ = (q15_t) __SSAT(acc0, 16);
169 *pDst++ = (q15_t) __SSAT(acc1, 16);
170 *pDst++ = (q15_t) __SSAT(acc2, 16);
171 *pDst++ = (q15_t) __SSAT(acc3, 16);
172 i -= 4;
173 }
174 else if (i >= 3)
175 {
176 /*
177 * Initialize coefficient pointer
178 */
179 ptr2 = pCoeffs + (i - 1U - 2);
180
181 q63_t acc0 = 0LL;
182 q63_t acc1 = 0LL;
183 q63_t acc2 = 0LL;
184
185 uint32_t tapCnt = phaseLen >> 3;
186 while (tapCnt > 0U)
187 {
188 vecState = vldrhq_s16(ptr1);
189
190 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides2);
191 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
192
193 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides1);
194 acc1 = vmlaldavaq(acc1, vecState, vecCoef);
195
196 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides0);
197 acc2 = vmlaldavaq(acc2, vecState, vecCoef);
198
199 ptr1 += 8;
200 ptr2 = ptr2 + S->L * 8;
201 tapCnt--;
202 }
203 tapCnt = phaseLen & 7;
204 if (tapCnt > 0U)
205 {
206 mve_pred16_t p0 = vctp16q(tapCnt);
207
208 vecState = vldrhq_z_s16(ptr1, p0);
209
210 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides2, p0);
211 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
212
213 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides1, p0);
214 acc1 = vmlaldavaq(acc1, vecState, vecCoef);
215
216 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides0, p0);
217 acc2 = vmlaldavaq(acc2, vecState, vecCoef);
218 }
219
220 acc0 = asrl(acc0, 15);
221 acc1 = asrl(acc1, 15);
222 acc2 = asrl(acc2, 15);
223
224 *pDst++ = (q15_t) __SSAT(acc0, 16);;
225 *pDst++ = (q15_t) __SSAT(acc1, 16);;
226 *pDst++ = (q15_t) __SSAT(acc2, 16);;
227 i -= 3;
228 }
229 else if (i >= 2)
230 {
231 /*
232 * Initialize coefficient pointer
233 */
234 ptr2 = pCoeffs + (i - 1U - 1);
235
236 q63_t acc0 = 0LL;
237 q63_t acc1 = 0LL;
238
239 uint32_t tapCnt = phaseLen >> 3;
240 while (tapCnt > 0U)
241 {
242 vecState = vldrhq_s16(ptr1);
243
244 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides1);
245 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
246
247 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides0);
248 acc1 = vmlaldavaq(acc1, vecState, vecCoef);
249
250 ptr1 += 8;
251 ptr2 = ptr2 + S->L * 8;
252 tapCnt--;
253 }
254 tapCnt = phaseLen & 7;
255 if (tapCnt > 0U)
256 {
257 mve_pred16_t p0 = vctp16q(tapCnt);
258
259 vecState = vldrhq_z_s16(ptr1, p0);
260
261 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides1, p0);
262 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
263
264 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides0, p0);
265 acc1 = vmlaldavaq(acc1, vecState, vecCoef);
266 }
267
268 acc0 = asrl(acc0, 15);
269 acc1 = asrl(acc1, 15);
270
271 *pDst++ = (q15_t) __SSAT(acc0, 16);
272 *pDst++ = (q15_t) __SSAT(acc1, 16);
273 i -= 2;
274 }
275 else
276 {
277 /*
278 * Initialize coefficient pointer
279 */
280 ptr2 = pCoeffs + (i - 1U);
281
282 q63_t acc0 = 0LL;
283
284 uint32_t tapCnt = phaseLen >> 3;
285 while (tapCnt > 0U)
286 {
287 vecState = vldrhq_s16(ptr1);
288 vecCoef = vldrhq_gather_shifted_offset_s16(ptr2, vec_strides0);
289
290 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
291
292 ptr1 += 8;
293 ptr2 = ptr2 + S->L * 8;
294 tapCnt--;
295 }
296 tapCnt = phaseLen & 7;
297 if (tapCnt > 0U)
298 {
299 mve_pred16_t p0 = vctp16q(tapCnt);
300
301 vecState = vldrhq_z_s16(ptr1, p0);
302 vecCoef = vldrhq_gather_shifted_offset_z_s16(ptr2, vec_strides0, p0);
303 acc0 = vmlaldavaq(acc0, vecState, vecCoef);
304 }
305
306 acc0 = asrl(acc0, 15);
307 *pDst++ = (q15_t) __SSAT(acc0, 16);
308 /*
309 * Decrement the loop counter
310 */
311 i--;
312 }
313 }
314 /*
315 * Advance the state pointer by 1
316 * * to process the next group of interpolation factor number samples
317 */
318 pState = pState + 1;
319 /*
320 * Decrement the loop counter
321 */
322 blkCnt--;
323 }
324
325 /*
326 * Processing is complete.
327 * Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
328 * This prepares the state buffer for the next function call.
329 */
330
331 /*
332 * Points to the start of the state buffer
333 */
334 pStateCurnt = S->pState;
335 blkCnt = (phaseLen - 1U) >> 3;
336 while (blkCnt > 0U)
337 {
338 vstrhq_s16(pStateCurnt, vldrhq_s16(pState));
339 pState += 8;
340 pStateCurnt += 8;
341 blkCnt--;
342 }
343 blkCnt = (phaseLen - 1U) & 7;
344 if (blkCnt > 0U)
345 {
346 mve_pred16_t p0 = vctp16q(blkCnt);
347 vstrhq_p_s16(pStateCurnt, vldrhq_s16(pState), p0);
348 }
349 }
350 #else
arm_fir_interpolate_q15(const arm_fir_interpolate_instance_q15 * S,const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)351 ARM_DSP_ATTRIBUTE void arm_fir_interpolate_q15(
352 const arm_fir_interpolate_instance_q15 * S,
353 const q15_t * pSrc,
354 q15_t * pDst,
355 uint32_t blockSize)
356 {
357 #if (1)
358 //#if !defined(ARM_MATH_CM0_FAMILY)
359
360 q15_t *pState = S->pState; /* State pointer */
361 const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
362 q15_t *pStateCur; /* Points to the current sample of the state */
363 q15_t *ptr1; /* Temporary pointer for state buffer */
364 const q15_t *ptr2; /* Temporary pointer for coefficient buffer */
365 q63_t sum0; /* Accumulators */
366 uint32_t i, blkCnt, tapCnt; /* Loop counters */
367 uint32_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
368 uint32_t j;
369
370 #if defined (ARM_MATH_LOOPUNROLL)
371 q63_t acc0, acc1, acc2, acc3;
372 q15_t x0, x1, x2, x3;
373 q15_t c0, c1, c2, c3;
374 #endif
375
376 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
377 /* pStateCur points to the location where the new input data should be written */
378 pStateCur = S->pState + (phaseLen - 1U);
379
380 #if defined (ARM_MATH_LOOPUNROLL)
381
382 /* Loop unrolling: Compute 4 outputs at a time */
383 blkCnt = blockSize >> 2U;
384
385 while (blkCnt > 0U)
386 {
387 /* Copy new input sample into the state buffer */
388 *pStateCur++ = *pSrc++;
389 *pStateCur++ = *pSrc++;
390 *pStateCur++ = *pSrc++;
391 *pStateCur++ = *pSrc++;
392
393 /* Address modifier index of coefficient buffer */
394 j = 1U;
395
396 /* Loop over the Interpolation factor. */
397 i = (S->L);
398
399 while (i > 0U)
400 {
401 /* Set accumulator to zero */
402 acc0 = 0;
403 acc1 = 0;
404 acc2 = 0;
405 acc3 = 0;
406
407 /* Initialize state pointer */
408 ptr1 = pState;
409
410 /* Initialize coefficient pointer */
411 ptr2 = pCoeffs + (S->L - j);
412
413 /* Loop over the polyPhase length. Unroll by a factor of 4.
414 Repeat until we've computed numTaps-(4*S->L) coefficients. */
415 tapCnt = phaseLen >> 2U;
416
417 x0 = *(ptr1++);
418 x1 = *(ptr1++);
419 x2 = *(ptr1++);
420
421 while (tapCnt > 0U)
422 {
423 /* Read the input sample */
424 x3 = *(ptr1++);
425
426 /* Read the coefficient */
427 c0 = *(ptr2);
428
429 /* Perform the multiply-accumulate */
430 acc0 += (q63_t) x0 * c0;
431 acc1 += (q63_t) x1 * c0;
432 acc2 += (q63_t) x2 * c0;
433 acc3 += (q63_t) x3 * c0;
434
435 /* Read the coefficient */
436 c1 = *(ptr2 + S->L);
437
438 /* Read the input sample */
439 x0 = *(ptr1++);
440
441 /* Perform the multiply-accumulate */
442 acc0 += (q63_t) x1 * c1;
443 acc1 += (q63_t) x2 * c1;
444 acc2 += (q63_t) x3 * c1;
445 acc3 += (q63_t) x0 * c1;
446
447 /* Read the coefficient */
448 c2 = *(ptr2 + S->L * 2);
449
450 /* Read the input sample */
451 x1 = *(ptr1++);
452
453 /* Perform the multiply-accumulate */
454 acc0 += (q63_t) x2 * c2;
455 acc1 += (q63_t) x3 * c2;
456 acc2 += (q63_t) x0 * c2;
457 acc3 += (q63_t) x1 * c2;
458
459 /* Read the coefficient */
460 c3 = *(ptr2 + S->L * 3);
461
462 /* Read the input sample */
463 x2 = *(ptr1++);
464
465 /* Perform the multiply-accumulate */
466 acc0 += (q63_t) x3 * c3;
467 acc1 += (q63_t) x0 * c3;
468 acc2 += (q63_t) x1 * c3;
469 acc3 += (q63_t) x2 * c3;
470
471
472 /* Upsampling is done by stuffing L-1 zeros between each sample.
473 * So instead of multiplying zeros with coefficients,
474 * Increment the coefficient pointer by interpolation factor times. */
475 ptr2 += 4 * S->L;
476
477 /* Decrement loop counter */
478 tapCnt--;
479 }
480
481 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
482 tapCnt = phaseLen % 0x4U;
483
484 while (tapCnt > 0U)
485 {
486 /* Read the input sample */
487 x3 = *(ptr1++);
488
489 /* Read the coefficient */
490 c0 = *(ptr2);
491
492 /* Perform the multiply-accumulate */
493 acc0 += (q63_t) x0 * c0;
494 acc1 += (q63_t) x1 * c0;
495 acc2 += (q63_t) x2 * c0;
496 acc3 += (q63_t) x3 * c0;
497
498 /* Increment the coefficient pointer by interpolation factor times. */
499 ptr2 += S->L;
500
501 /* update states for next sample processing */
502 x0 = x1;
503 x1 = x2;
504 x2 = x3;
505
506 /* Decrement loop counter */
507 tapCnt--;
508 }
509
510 /* The result is in the accumulator, store in the destination buffer. */
511 *(pDst ) = (q15_t) (__SSAT((acc0 >> 15), 16));
512 *(pDst + S->L) = (q15_t) (__SSAT((acc1 >> 15), 16));
513 *(pDst + 2 * S->L) = (q15_t) (__SSAT((acc2 >> 15), 16));
514 *(pDst + 3 * S->L) = (q15_t) (__SSAT((acc3 >> 15), 16));
515
516 pDst++;
517
518 /* Increment the address modifier index of coefficient buffer */
519 j++;
520
521 /* Decrement loop counter */
522 i--;
523 }
524
525 /* Advance the state pointer by 1
526 * to process the next group of interpolation factor number samples */
527 pState = pState + 4;
528
529 pDst += S->L * 3;
530
531 /* Decrement loop counter */
532 blkCnt--;
533 }
534
535 /* Loop unrolling: Compute remaining outputs */
536 blkCnt = blockSize % 0x4U;
537
538 #else
539
540 /* Initialize blkCnt with number of samples */
541 blkCnt = blockSize;
542
543 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
544
545 while (blkCnt > 0U)
546 {
547 /* Copy new input sample into the state buffer */
548 *pStateCur++ = *pSrc++;
549
550 /* Address modifier index of coefficient buffer */
551 j = 1U;
552
553 /* Loop over the Interpolation factor. */
554 i = S->L;
555 while (i > 0U)
556 {
557 /* Set accumulator to zero */
558 sum0 = 0;
559
560 /* Initialize state pointer */
561 ptr1 = pState;
562
563 /* Initialize coefficient pointer */
564 ptr2 = pCoeffs + (S->L - j);
565
566 /* Loop over the polyPhase length.
567 Repeat until we've computed numTaps-(4*S->L) coefficients. */
568
569 #if defined (ARM_MATH_LOOPUNROLL)
570
571 /* Loop unrolling: Compute 4 outputs at a time */
572 tapCnt = phaseLen >> 2U;
573
574 while (tapCnt > 0U)
575 {
576 /* Perform the multiply-accumulate */
577 sum0 += (q63_t) *ptr1++ * *ptr2;
578
579 /* Upsampling is done by stuffing L-1 zeros between each sample.
580 * So instead of multiplying zeros with coefficients,
581 * Increment the coefficient pointer by interpolation factor times. */
582 ptr2 += S->L;
583
584 sum0 += (q63_t) *ptr1++ * *ptr2;
585 ptr2 += S->L;
586
587 sum0 += (q63_t) *ptr1++ * *ptr2;
588 ptr2 += S->L;
589
590 sum0 += (q63_t) *ptr1++ * *ptr2;
591 ptr2 += S->L;
592
593 /* Decrement loop counter */
594 tapCnt--;
595 }
596
597 /* Loop unrolling: Compute remaining outputs */
598 tapCnt = phaseLen % 0x4U;
599
600 #else
601
602 /* Initialize tapCnt with number of samples */
603 tapCnt = phaseLen;
604
605 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
606
607 while (tapCnt > 0U)
608 {
609 /* Perform the multiply-accumulate */
610 sum0 += (q63_t) *ptr1++ * *ptr2;
611
612 /* Upsampling is done by stuffing L-1 zeros between each sample.
613 * So instead of multiplying zeros with coefficients,
614 * Increment the coefficient pointer by interpolation factor times. */
615 ptr2 += S->L;
616
617 /* Decrement loop counter */
618 tapCnt--;
619 }
620
621 /* The result is in the accumulator, store in the destination buffer. */
622 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
623
624 /* Increment the address modifier index of coefficient buffer */
625 j++;
626
627 /* Decrement the loop counter */
628 i--;
629 }
630
631 /* Advance the state pointer by 1
632 * to process the next group of interpolation factor number samples */
633 pState = pState + 1;
634
635 /* Decrement the loop counter */
636 blkCnt--;
637 }
638
639 /* Processing is complete.
640 Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
641 This prepares the state buffer for the next function call. */
642
643 /* Points to the start of the state buffer */
644 pStateCur = S->pState;
645
646 #if defined (ARM_MATH_LOOPUNROLL)
647
648 /* Loop unrolling: Compute 4 outputs at a time */
649 tapCnt = (phaseLen - 1U) >> 2U;
650
651 /* copy data */
652 while (tapCnt > 0U)
653 {
654 write_q15x2_ia (&pStateCur, read_q15x2_ia (&pState));
655 write_q15x2_ia (&pStateCur, read_q15x2_ia (&pState));
656
657 /* Decrement loop counter */
658 tapCnt--;
659 }
660
661 /* Loop unrolling: Compute remaining outputs */
662 tapCnt = (phaseLen - 1U) % 0x04U;
663
664 #else
665
666 /* Initialize tapCnt with number of samples */
667 tapCnt = (phaseLen - 1U);
668
669 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
670
671 /* Copy data */
672 while (tapCnt > 0U)
673 {
674 *pStateCur++ = *pState++;
675
676 /* Decrement loop counter */
677 tapCnt--;
678 }
679
680 #else
681 /* alternate version for CM0_FAMILY */
682
683 q15_t *pState = S->pState; /* State pointer */
684 const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
685 q15_t *pStateCur; /* Points to the current sample of the state */
686 q15_t *ptr1; /* Temporary pointer for state buffer */
687 const q15_t *ptr2; /* Temporary pointer for coefficient buffer */
688 q63_t sum0; /* Accumulators */
689 uint32_t i, blkCnt, tapCnt; /* Loop counters */
690 uint32_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
691
692 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
693 /* pStateCur points to the location where the new input data should be written */
694 pStateCur = S->pState + (phaseLen - 1U);
695
696 /* Total number of intput samples */
697 blkCnt = blockSize;
698
699 /* Loop over the blockSize. */
700 while (blkCnt > 0U)
701 {
702 /* Copy new input sample into the state buffer */
703 *pStateCur++ = *pSrc++;
704
705 /* Loop over the Interpolation factor. */
706 i = S->L;
707
708 while (i > 0U)
709 {
710 /* Set accumulator to zero */
711 sum0 = 0;
712
713 /* Initialize state pointer */
714 ptr1 = pState;
715
716 /* Initialize coefficient pointer */
717 ptr2 = pCoeffs + (i - 1U);
718
719 /* Loop over the polyPhase length */
720 tapCnt = phaseLen;
721
722 while (tapCnt > 0U)
723 {
724 /* Perform the multiply-accumulate */
725 sum0 += ((q63_t) *ptr1++ * *ptr2);
726
727 /* Increment the coefficient pointer by interpolation factor times. */
728 ptr2 += S->L;
729
730 /* Decrement the loop counter */
731 tapCnt--;
732 }
733
734 /* Store the result after converting to 1.15 format in the destination buffer. */
735 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
736
737 /* Decrement loop counter */
738 i--;
739 }
740
741 /* Advance the state pointer by 1
742 * to process the next group of interpolation factor number samples */
743 pState = pState + 1;
744
745 /* Decrement loop counter */
746 blkCnt--;
747 }
748
749 /* Processing is complete.
750 ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
751 ** This prepares the state buffer for the next function call. */
752
753 /* Points to the start of the state buffer */
754 pStateCur = S->pState;
755
756 tapCnt = phaseLen - 1U;
757
758 /* Copy data */
759 while (tapCnt > 0U)
760 {
761 *pStateCur++ = *pState++;
762
763 /* Decrement loop counter */
764 tapCnt--;
765 }
766
767 #endif /* #if !defined(ARM_MATH_CM0_FAMILY) */
768
769 }
770 #endif /* defined(ARM_MATH_MVEI)*/
771 /**
772 @} end of FIR_Interpolate group
773 */
774