1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_levinson_durbin_f32.c
4 * Description: f32 version of Levinson Durbin algorithm
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35 /**
36 @defgroup LD Levinson Durbin Algorithm
37
38 */
39
40 /**
41 @addtogroup LD
42 @{
43 */
44
45 /**
46 @brief Levinson Durbin
47 @param[in] phi autocovariance vector starting with lag 0 (length is nbCoefs + 1)
48 @param[out] a autoregressive coefficients
49 @param[out] err prediction error (variance)
50 @param[in] nbCoefs number of autoregressive coefficients
51 */
52
53 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
54 #pragma message "Scalar version of arm_levinson_durbin_f32 built. Helium version has build issues with gcc."
55 #endif
56
57 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
58
59 #include "arm_helium_utils.h"
60
61 #define LANE23_MASK 0xFF00
62
arm_levinson_durbin_f32(const float32_t * phi,float32_t * a,float32_t * err,int nbCoefs)63 ARM_DSP_ATTRIBUTE void arm_levinson_durbin_f32(const float32_t *phi,
64 float32_t *a,
65 float32_t *err,
66 int nbCoefs)
67 {
68 float32_t e;
69 static const uint32_t revOffsetArray[4] = {3,2,1,0};
70
71 a[0] = phi[1] / phi[0];
72
73 e = phi[0] - phi[1] * a[0];
74 for(int p=1; p < nbCoefs; p++)
75 {
76 float32_t suma = 0.0f;
77 float32_t sumb = 0.0f;
78 f32x4_t vecA,vecRevPhi,vecPhi,vecSumA, vecSumB;
79 float32_t k;
80 uint32_t blkCnt;
81 const float32_t *pPhi,*pRevPhi,*pA;
82 uint32x4_t revOffset;
83
84 int nb,j,i;
85
86 revOffset = vld1q(revOffsetArray);
87 vecSumA = vdupq_n_f32(0.0f);
88 vecSumB = vdupq_n_f32(0.0f);
89
90 pRevPhi = &phi[p-3];
91 pPhi = &phi[1];
92 pA = a;
93
94 i = 0;
95 blkCnt = p >> 2;
96 while(blkCnt > 0)
97 {
98 vecA = vld1q(pA);
99 pA += 4;
100
101 vecPhi = vld1q(pPhi);
102 pPhi += 4;
103
104 vecRevPhi = vldrwq_gather_shifted_offset_f32(pRevPhi,revOffset);
105 pRevPhi -= 4;
106
107 vecSumA = vfmaq(vecSumA,vecA,vecRevPhi);
108 vecSumB = vfmaq(vecSumB,vecA,vecPhi);
109
110 i += 4;
111 blkCnt--;
112
113 }
114
115 suma = vecAddAcrossF32Mve(vecSumA);
116 sumb = vecAddAcrossF32Mve(vecSumB);
117
118 blkCnt = p & 3;
119 while(blkCnt > 0)
120 {
121 suma += a[i] * phi[p - i];
122 sumb += a[i] * phi[i + 1];
123
124 i++;
125 blkCnt--;
126 }
127
128 k = (phi[p+1] - suma)/(phi[0] - sumb);
129
130 f32x4_t vecRevA,tmp;
131 static int32_t orgOffsetArray[4]={0,1,-1,-2};
132 static const int32_t offsetIncArray[4]={2,2,-2,-2};
133
134 uint32x4_t offset,offsetInc,vecTmp;
135
136
137 offset = vld1q_u32((uint32_t*)orgOffsetArray);
138 vecTmp = vdupq_n_u32(p);
139
140 offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK);
141 offsetInc = vld1q_u32((uint32_t*)offsetIncArray);
142
143 nb = p >> 2;
144 j=0;
145 for(int i = 0; i < nb ; i++)
146 {
147
148 /*
149 x0=a[j] - k * a[p-1-j];
150 x1=a[j+1] - k * a[p-2-j];
151 x3=a[p-1-j] - k * a[j];
152 x4=a[p-2-j] - k * a[j+1];
153
154 a[j] = x0;
155 a[j+1] = x1;
156 a[p-1-j] = x2;
157 a[p-2-j] = x3;
158 */
159
160 uint64_t tmpa,tmpb;
161 vecA = vldrwq_gather_shifted_offset_f32(a,offset);
162
163
164 tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
165 tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
166 vecRevA = (f32x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
167 vecRevA = (f32x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
168
169
170 tmp = vsubq(vecA,vmulq_n_f32(vecRevA,k));
171 vstrwq_scatter_shifted_offset_f32(a, offset, tmp);
172
173 offset = vaddq(offset,offsetInc);
174
175 j+=2;
176
177 }
178
179 switch(p & 3)
180 {
181 case 3:
182 {
183 float32_t x,y;
184 x = a[j] - k * a[p-1-j];
185 y = a[p-1-j] - k * a[j];
186
187 a[j] = x;
188 a[p-1-j] = y;
189
190 a[j+1] = a[j+1] - k * a[p-1-(j+1)];
191 }
192 break;
193
194 case 2:
195 {
196 float32_t x,y;
197 x = a[j] - k * a[p-1-j];
198 y = a[p-1-j] - k * a[j];
199
200 a[j] = x;
201 a[p-1-j] = y;
202 }
203 break;
204
205 case 1:
206 a[j] = a[j]- k * a[p-1-j];
207 break;
208 }
209
210 a[p] = k;
211 e = e * (1.0f - k*k);
212
213
214 }
215 *err = e;
216 }
217
218 #else
arm_levinson_durbin_f32(const float32_t * phi,float32_t * a,float32_t * err,int nbCoefs)219 ARM_DSP_ATTRIBUTE void arm_levinson_durbin_f32(const float32_t *phi,
220 float32_t *a,
221 float32_t *err,
222 int nbCoefs)
223 {
224 float32_t e;
225 int p;
226
227 a[0] = phi[1] / phi[0];
228
229 e = phi[0] - phi[1] * a[0];
230 for(p=1; p < nbCoefs; p++)
231 {
232 float32_t suma=0.0f;
233 float32_t sumb=0.0f;
234 float32_t k;
235 int nb,j,i;
236
237 for(i=0; i < p; i++)
238 {
239 suma += a[i] * phi[p - i];
240 sumb += a[i] * phi[i + 1];
241 }
242
243 k = (phi[p+1]-suma)/(phi[0] - sumb);
244
245
246 nb = p >> 1;
247 j=0;
248 for(i =0; i < nb ; i++)
249 {
250 float32_t x,y;
251
252 x=a[j] - k * a[p-1-j];
253 y=a[p-1-j] - k * a[j];
254
255 a[j] = x;
256 a[p-1-j] = y;
257
258 j++;
259 }
260
261 nb = p & 1;
262 if (nb)
263 {
264 a[j]=a[j]- k * a[p-1-j];
265 }
266
267 a[p] = k;
268 e = e * (1.0f - k*k);
269
270
271 }
272 *err = e;
273 }
274 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
275
276 /**
277 @} end of LD group
278 */
279