1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_biquad_cascade_df2T_f64.c
4 * Description: Processing function for floating-point transposed direct form II Biquad cascade filter
5 *
6 * $Date: 03 June 2022
7 * $Revision: V1.9.1
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
37
38 This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
39 The filters are implemented as a cascade of second order Biquad sections.
40 These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
41 Only floating-point data is supported.
42
43 This function operate on blocks of input and output data and each call to the function
44 processes <code>blockSize</code> samples through the filter.
45 <code>pSrc</code> points to the array of input data and
46 <code>pDst</code> points to the array of output data.
47 Both arrays contain <code>blockSize</code> values.
48
49 @par Algorithm
50 Each Biquad stage implements a second order filter using the difference equation:
51 <pre>
52 y[n] = b0 * x[n] + d1
53 d1 = b1 * x[n] + a1 * y[n] + d2
54 d2 = b2 * x[n] + a2 * y[n]
55 </pre>
56 where d1 and d2 represent the two state values.
57 @par
58 A Biquad filter using a transposed Direct Form II structure is shown below.
59 \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
60 Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
61 Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
62 Pay careful attention to the sign of the feedback coefficients.
63 Some design tools flip the sign of the feedback coefficients:
64 <pre>
65 y[n] = b0 * x[n] + d1;
66 d1 = b1 * x[n] - a1 * y[n] + d2;
67 d2 = b2 * x[n] - a2 * y[n];
68 </pre>
69 In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
70 @par
71 Higher order filters are realized as a cascade of second order sections.
72 <code>numStages</code> refers to the number of second order stages used.
73 For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
74 A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
75 coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
76 @par
77 <code>pState</code> points to the state variable array.
78 Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
79 The state variables are arranged in the <code>pState</code> array as:
80 <pre>
81 {d11, d12, d21, d22, ...}
82 </pre>
83 where <code>d1x</code> refers to the state variables for the first Biquad and
84 <code>d2x</code> refers to the state variables for the second Biquad.
85 The state array has a total length of <code>2*numStages</code> values.
86 The state variables are updated after each block of data is processed; the coefficients are untouched.
87 @par
88 The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
89 The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
90 That is why the Direct Form I structure supports Q15 and Q31 data types.
91 The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
92 Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
93 The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
94
95 @par Instance Structure
96 The coefficients and state variables for a filter are stored together in an instance data structure.
97 A separate instance structure must be defined for each filter.
98 Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
99
100 @par Init Functions
101 There is also an associated initialization function.
102 The initialization function performs following operations:
103 - Sets the values of the internal structure fields.
104 - Zeros out the values in the state buffer.
105 To do this manually without calling the init function, assign the follow subfields of the instance structure:
106 numStages, pCoeffs, pState. Also set all of the values in pState to zero.
107 @par
108 Use of the initialization function is optional except for the vectorized versions (Helium and Neon).
109 However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
110 To place an instance structure into a const data section, the instance structure must be manually initialized.
111 Set the values in the state buffer to zeros before static initialization.
112 For example, to statically initialize the instance structure use
113 <pre>
114 arm_biquad_cascade_df2T_instance_f64 S1 = {numStages, pState, pCoeffs};
115 arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};
116 </pre>
117 where <code>numStages</code> is the number of Biquad stages in the filter;
118 <code>pState</code> is the address of the state buffer.
119 <code>pCoeffs</code> is the address of the coefficient buffer;
120
121 @par Neon version
122 For Neon version, the function arm_biquad_cascade_df2T_compute_coefs_x must be
123 used in addition to arm_biquad_cascade_df2T_init_x.
124
125 See the documentation of arm_biquad_cascade_df2T_init_x for more details.
126
127 */
128
129 /**
130 @addtogroup BiquadCascadeDF2T
131 @{
132 */
133
134 /**
135 @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
136 @param[in] S points to an instance of the filter data structure
137 @param[in] pSrc points to the block of input data
138 @param[out] pDst points to the block of output data
139 @param[in] blockSize number of samples to process
140 */
141
142
143 #if defined(ARM_MATH_NEON) && defined(__aarch64__)
arm_biquad_cascade_df2T_f64(const arm_biquad_cascade_df2T_instance_f64 * S,const float64_t * pSrc,float64_t * pDst,uint32_t blockSize)144 ARM_DSP_ATTRIBUTE void arm_biquad_cascade_df2T_f64(
145 const arm_biquad_cascade_df2T_instance_f64 * S,
146 const float64_t * pSrc,
147 float64_t * pDst,
148 uint32_t blockSize)
149 {
150
151
152
153 const float64_t *pIn = pSrc; /* source pointer */
154 float64_t Xn0;
155 float64_t acc0;
156 float64_t *pOut = pDst; /* destination pointer */
157 float64_t *pState = S->pState; /* State pointer */
158 uint32_t sample, stage = S->numStages; /* loop counters */
159 float64_t const *pCurCoeffs = /* coefficient pointer */
160 (float64_t const *) S->pCoeffs;
161 float64x2_t b0Coeffs, a0Coeffs; /* Coefficients vector */
162 float64x2_t state; /* State vector*/
163 float64x2_t zeroV = vdupq_n_f64(0);
164
165 float64_t b0 ;
166
167
168 do
169 {
170
171 /* Reading the coefficients */
172 b0 = *pCurCoeffs++ ;
173 b0Coeffs = vld1q_f64(pCurCoeffs);
174 pCurCoeffs += 2 ;
175 a0Coeffs = vld1q_f64(pCurCoeffs);
176 pCurCoeffs +=2 ;
177
178 state = vld1q_f64(pState);
179
180 sample = blockSize >> 1U;
181 while (sample > 0U)
182 {
183
184 /* y[n] = b0 * x[n] + d1 */
185 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
186 /* d2 = b2 * x[n] + a2 * y[n] */
187
188 Xn0 = *pIn++ ;
189
190 /* Calculation of acc0*/
191 acc0 = b0*Xn0+vgetq_lane_f64(state, 0);
192
193
194 /*
195 * final initial
196 * state b0Coeffs state a0Coeffs
197 * | | | |
198 * __ __ __ __
199 * / \ / \ / \ / \
200 * | d1 | = | b1 | * Xn0 + | d2 | + | a1 | x acc0
201 * | d2 | | b2 | | 0 | | a2 |
202 * \__/ \__/ \__/ \__/
203 */
204
205 /* state -> initial state (see above) */
206
207 state = vextq_f64(state, zeroV, 1);
208
209 /* Calculation of final state */
210 state = vfmaq_n_f64(state, b0Coeffs, Xn0);
211 state = vfmaq_n_f64(state, a0Coeffs, acc0);
212
213 *pOut++ = acc0 ;
214
215 /* y[n] = b0 * x[n] + d1 */
216 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
217 /* d2 = b2 * x[n] + a2 * y[n] */
218
219 Xn0 = *pIn++ ;
220
221 /* Calculation of acc0*/
222 acc0 = b0*Xn0+vgetq_lane_f64(state, 0);
223
224
225 /*
226 * final initial
227 * state b0Coeffs state a0Coeffs
228 * | | | |
229 * __ __ __ __
230 * / \ / \ / \ / \
231 * | d1 | = | b1 | * Xn0 + | d2 | + | a1 | x acc0
232 * | d2 | | b2 | | 0 | | a2 |
233 * \__/ \__/ \__/ \__/
234 */
235
236 /* state -> initial state (see above) */
237
238 state = vextq_f64(state, zeroV, 1);
239
240 /* Calculation of final state */
241 state = vfmaq_n_f64(state, b0Coeffs, Xn0);
242 state = vfmaq_n_f64(state, a0Coeffs, acc0);
243
244 *pOut++ = acc0;
245 sample--;
246 }
247 sample = blockSize & 1 ;
248 while (sample > 0U)
249 {
250
251 /* y[n] = b0 * x[n] + d1 */
252 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
253 /* d2 = b2 * x[n] + a2 * y[n] */
254
255 Xn0 = *pIn++ ;
256
257 /* Calculation of acc0*/
258 acc0 = b0*Xn0+vgetq_lane_f64(state, 0);
259
260
261 /*
262 * final initial
263 * state b0Coeffs state a0Coeffs
264 * | | | |
265 * __ __ __ __
266 * / \ / \ / \ / \
267 * | d1 | = | b1 | * Xn0 + | d2 | + | a1 | x acc0
268 * | d2 | | b2 | | 0 | | a2 |
269 * \__/ \__/ \__/ \__/
270 */
271
272 /* state -> initial state (see above) */
273
274 state = vextq_f64(state, zeroV, 1);
275
276 /* Calculation of final state */
277 state = vfmaq_n_f64(state, b0Coeffs, Xn0);
278 state = vfmaq_n_f64(state, a0Coeffs, acc0);
279
280 *pOut++ = acc0 ;
281 sample--;
282 }
283 /* Store the updated state variables back into the state array */
284 pState[0] = vgetq_lane_f64(state, 0);
285 pState[1] = vgetq_lane_f64(state, 1);
286
287 pState += 2U;
288
289 /* The current stage output is given as the input to the next stage */
290 pIn = pDst;
291
292 /* Reset the output working pointer */
293 pOut = pDst;
294
295 stage--;
296
297 } while (stage > 0U);
298
299 }
300 #else
301
arm_biquad_cascade_df2T_f64(const arm_biquad_cascade_df2T_instance_f64 * S,const float64_t * pSrc,float64_t * pDst,uint32_t blockSize)302 ARM_DSP_ATTRIBUTE void arm_biquad_cascade_df2T_f64(
303 const arm_biquad_cascade_df2T_instance_f64 * S,
304 const float64_t * pSrc,
305 float64_t * pDst,
306 uint32_t blockSize)
307 {
308
309 const float64_t *pIn = pSrc; /* Source pointer */
310 float64_t *pOut = pDst; /* Destination pointer */
311 float64_t *pState = S->pState; /* State pointer */
312 const float64_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
313 float64_t acc1; /* Accumulator */
314 float64_t b0, b1, b2, a1, a2; /* Filter coefficients */
315 float64_t Xn1; /* Temporary input */
316 float64_t d1, d2; /* State variables */
317 uint32_t sample, stage = S->numStages; /* Loop counters */
318
319
320 do
321 {
322 /* Reading the coefficients */
323 b0 = pCoeffs[0];
324 b1 = pCoeffs[1];
325 b2 = pCoeffs[2];
326 a1 = pCoeffs[3];
327 a2 = pCoeffs[4];
328
329 /* Reading the state values */
330 d1 = pState[0];
331 d2 = pState[1];
332
333 pCoeffs += 5U;
334
335 #if defined (ARM_MATH_LOOPUNROLL)
336
337 /* Loop unrolling: Compute 16 outputs at a time */
338 sample = blockSize >> 4U;
339
340 while (sample > 0U) {
341
342 /* y[n] = b0 * x[n] + d1 */
343 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
344 /* d2 = b2 * x[n] + a2 * y[n] */
345
346 /* 1 */
347 Xn1 = *pIn++;
348
349 acc1 = b0 * Xn1 + d1;
350
351 d1 = b1 * Xn1 + d2;
352 d1 += a1 * acc1;
353
354 d2 = b2 * Xn1;
355 d2 += a2 * acc1;
356
357 *pOut++ = acc1;
358
359
360 /* 2 */
361 Xn1 = *pIn++;
362
363 acc1 = b0 * Xn1 + d1;
364
365 d1 = b1 * Xn1 + d2;
366 d1 += a1 * acc1;
367
368 d2 = b2 * Xn1;
369 d2 += a2 * acc1;
370
371 *pOut++ = acc1;
372
373 /* 3 */
374 Xn1 = *pIn++;
375
376 acc1 = b0 * Xn1 + d1;
377
378 d1 = b1 * Xn1 + d2;
379 d1 += a1 * acc1;
380
381 d2 = b2 * Xn1;
382 d2 += a2 * acc1;
383
384 *pOut++ = acc1;
385
386 /* 4 */
387 Xn1 = *pIn++;
388
389 acc1 = b0 * Xn1 + d1;
390
391 d1 = b1 * Xn1 + d2;
392 d1 += a1 * acc1;
393
394 d2 = b2 * Xn1;
395 d2 += a2 * acc1;
396
397 *pOut++ = acc1;
398
399 /* 5 */
400 Xn1 = *pIn++;
401
402 acc1 = b0 * Xn1 + d1;
403
404 d1 = b1 * Xn1 + d2;
405 d1 += a1 * acc1;
406
407 d2 = b2 * Xn1;
408 d2 += a2 * acc1;
409
410 *pOut++ = acc1;
411
412 /* 6 */
413 Xn1 = *pIn++;
414
415 acc1 = b0 * Xn1 + d1;
416
417 d1 = b1 * Xn1 + d2;
418 d1 += a1 * acc1;
419
420 d2 = b2 * Xn1;
421 d2 += a2 * acc1;
422
423 *pOut++ = acc1;
424
425 /* 7 */
426 Xn1 = *pIn++;
427
428 acc1 = b0 * Xn1 + d1;
429
430 d1 = b1 * Xn1 + d2;
431 d1 += a1 * acc1;
432
433 d2 = b2 * Xn1;
434 d2 += a2 * acc1;
435
436 *pOut++ = acc1;
437
438 /* 8 */
439 Xn1 = *pIn++;
440
441 acc1 = b0 * Xn1 + d1;
442
443 d1 = b1 * Xn1 + d2;
444 d1 += a1 * acc1;
445
446 d2 = b2 * Xn1;
447 d2 += a2 * acc1;
448
449 *pOut++ = acc1;
450
451 /* 9 */
452 Xn1 = *pIn++;
453
454 acc1 = b0 * Xn1 + d1;
455
456 d1 = b1 * Xn1 + d2;
457 d1 += a1 * acc1;
458
459 d2 = b2 * Xn1;
460 d2 += a2 * acc1;
461
462 *pOut++ = acc1;
463
464 /* 10 */
465 Xn1 = *pIn++;
466
467 acc1 = b0 * Xn1 + d1;
468
469 d1 = b1 * Xn1 + d2;
470 d1 += a1 * acc1;
471
472 d2 = b2 * Xn1;
473 d2 += a2 * acc1;
474
475 *pOut++ = acc1;
476
477 /* 11 */
478 Xn1 = *pIn++;
479
480 acc1 = b0 * Xn1 + d1;
481
482 d1 = b1 * Xn1 + d2;
483 d1 += a1 * acc1;
484
485 d2 = b2 * Xn1;
486 d2 += a2 * acc1;
487
488 *pOut++ = acc1;
489
490 /* 12 */
491 Xn1 = *pIn++;
492
493 acc1 = b0 * Xn1 + d1;
494
495 d1 = b1 * Xn1 + d2;
496 d1 += a1 * acc1;
497
498 d2 = b2 * Xn1;
499 d2 += a2 * acc1;
500
501 *pOut++ = acc1;
502
503 /* 13 */
504 Xn1 = *pIn++;
505
506 acc1 = b0 * Xn1 + d1;
507
508 d1 = b1 * Xn1 + d2;
509 d1 += a1 * acc1;
510
511 d2 = b2 * Xn1;
512 d2 += a2 * acc1;
513
514 *pOut++ = acc1;
515
516 /* 14 */
517 Xn1 = *pIn++;
518
519 acc1 = b0 * Xn1 + d1;
520
521 d1 = b1 * Xn1 + d2;
522 d1 += a1 * acc1;
523
524 d2 = b2 * Xn1;
525 d2 += a2 * acc1;
526
527 *pOut++ = acc1;
528
529 /* 15 */
530 Xn1 = *pIn++;
531
532 acc1 = b0 * Xn1 + d1;
533
534 d1 = b1 * Xn1 + d2;
535 d1 += a1 * acc1;
536
537 d2 = b2 * Xn1;
538 d2 += a2 * acc1;
539
540 *pOut++ = acc1;
541
542 /* 16 */
543 Xn1 = *pIn++;
544
545 acc1 = b0 * Xn1 + d1;
546
547 d1 = b1 * Xn1 + d2;
548 d1 += a1 * acc1;
549
550 d2 = b2 * Xn1;
551 d2 += a2 * acc1;
552
553 *pOut++ = acc1;
554
555 /* decrement loop counter */
556 sample--;
557 }
558
559 /* Loop unrolling: Compute remaining outputs */
560 sample = blockSize & 0xFU;
561
562 #else
563
564 /* Initialize blkCnt with number of samples */
565 sample = blockSize;
566
567 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
568
569 while (sample > 0U) {
570 Xn1 = *pIn++;
571
572 acc1 = b0 * Xn1 + d1;
573
574 d1 = b1 * Xn1 + d2;
575 d1 += a1 * acc1;
576
577 d2 = b2 * Xn1;
578 d2 += a2 * acc1;
579
580 *pOut++ = acc1;
581
582 /* decrement loop counter */
583 sample--;
584 }
585
586 /* Store the updated state variables back into the state array */
587 pState[0] = d1;
588 pState[1] = d2;
589
590 pState += 2U;
591
592 /* The current stage output is given as the input to the next stage */
593 pIn = pDst;
594
595 /* Reset the output working pointer */
596 pOut = pDst;
597
598 /* decrement loop counter */
599 stage--;
600
601 } while (stage > 0U);
602
603 }
604 #endif
605
606
607
608 /**
609 @} end of BiquadCascadeDF2T group
610 */
611