1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_levinson_durbin_f32.c
4  * Description:  f32 version of Levinson Durbin algorithm
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/filtering_functions.h"
30 
31 /**
32   @ingroup groupFilters
33  */
34 
35 /**
36   @defgroup LD Levinson Durbin Algorithm
37 
38  */
39 
40 /**
41   @addtogroup LD
42   @{
43  */
44 
45 /**
46   @brief         Levinson Durbin
47   @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
48   @param[out]    a        autoregressive coefficients
49   @param[out]    err      prediction error (variance)
50   @param[in]     nbCoefs  number of autoregressive coefficients
51  */
52 
53 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
54 #pragma message "Scalar version of arm_levinson_durbin_f32 built. Helium version has build issues with gcc."
55 #endif
56 
57 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(ARM_DSP_BUILT_WITH_GCC)
58 
59 #include "arm_helium_utils.h"
60 
61 #define LANE23_MASK 0xFF00
62 
arm_levinson_durbin_f32(const float32_t * phi,float32_t * a,float32_t * err,int nbCoefs)63 ARM_DSP_ATTRIBUTE void arm_levinson_durbin_f32(const float32_t *phi,
64   float32_t *a,
65   float32_t *err,
66   int nbCoefs)
67 {
68    float32_t e;
69    static const uint32_t revOffsetArray[4] = {3,2,1,0};
70 
71    a[0] = phi[1] / phi[0];
72 
73    e = phi[0] - phi[1] * a[0];
74    for(int p=1; p < nbCoefs; p++)
75    {
76       float32_t suma = 0.0f;
77       float32_t sumb = 0.0f;
78       f32x4_t vecA,vecRevPhi,vecPhi,vecSumA, vecSumB;
79       float32_t k;
80       uint32_t blkCnt;
81       const float32_t *pPhi,*pRevPhi,*pA;
82       uint32x4_t revOffset;
83 
84       int nb,j,i;
85 
86       revOffset = vld1q(revOffsetArray);
87       vecSumA = vdupq_n_f32(0.0f);
88       vecSumB = vdupq_n_f32(0.0f);
89 
90       pRevPhi = &phi[p-3];
91       pPhi = &phi[1];
92       pA = a;
93 
94       i = 0;
95       blkCnt = p >> 2;
96       while(blkCnt > 0)
97       {
98          vecA = vld1q(pA);
99          pA += 4;
100 
101          vecPhi = vld1q(pPhi);
102          pPhi += 4;
103 
104          vecRevPhi = vldrwq_gather_shifted_offset_f32(pRevPhi,revOffset);
105          pRevPhi -= 4;
106 
107          vecSumA = vfmaq(vecSumA,vecA,vecRevPhi);
108          vecSumB = vfmaq(vecSumB,vecA,vecPhi);
109 
110          i += 4;
111          blkCnt--;
112 
113       }
114 
115       suma = vecAddAcrossF32Mve(vecSumA);
116       sumb = vecAddAcrossF32Mve(vecSumB);
117 
118       blkCnt = p & 3;
119       while(blkCnt > 0)
120       {
121          suma += a[i] * phi[p - i];
122          sumb += a[i] * phi[i + 1];
123 
124          i++;
125          blkCnt--;
126       }
127 
128       k = (phi[p+1] - suma)/(phi[0] - sumb);
129 
130       f32x4_t vecRevA,tmp;
131       static int32_t orgOffsetArray[4]={0,1,-1,-2};
132       static const int32_t offsetIncArray[4]={2,2,-2,-2};
133 
134       uint32x4_t offset,offsetInc,vecTmp;
135 
136 
137       offset = vld1q_u32((uint32_t*)orgOffsetArray);
138       vecTmp = vdupq_n_u32(p);
139 
140       offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK);
141       offsetInc = vld1q_u32((uint32_t*)offsetIncArray);
142 
143       nb = p >> 2;
144       j=0;
145       for(int i = 0; i < nb ; i++)
146       {
147 
148           /*
149             x0=a[j] - k * a[p-1-j];
150             x1=a[j+1] - k * a[p-2-j];
151             x3=a[p-1-j] - k * a[j];
152             x4=a[p-2-j] - k * a[j+1];
153 
154             a[j] = x0;
155             a[j+1] = x1;
156             a[p-1-j] = x2;
157             a[p-2-j] = x3;
158           */
159 
160           uint64_t tmpa,tmpb;
161           vecA = vldrwq_gather_shifted_offset_f32(a,offset);
162 
163 
164           tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
165           tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
166           vecRevA = (f32x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
167           vecRevA = (f32x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
168 
169 
170           tmp = vsubq(vecA,vmulq_n_f32(vecRevA,k));
171           vstrwq_scatter_shifted_offset_f32(a, offset, tmp);
172 
173           offset = vaddq(offset,offsetInc);
174 
175           j+=2;
176 
177       }
178 
179       switch(p & 3)
180       {
181          case 3:
182          {
183             float32_t x,y;
184             x = a[j] - k * a[p-1-j];
185             y = a[p-1-j] - k * a[j];
186 
187             a[j] = x;
188             a[p-1-j] = y;
189 
190             a[j+1] = a[j+1] - k * a[p-1-(j+1)];
191          }
192          break;
193 
194          case 2:
195          {
196             float32_t x,y;
197             x = a[j] - k * a[p-1-j];
198             y = a[p-1-j] - k * a[j];
199 
200             a[j] = x;
201             a[p-1-j] = y;
202          }
203          break;
204 
205          case 1:
206             a[j] = a[j]- k * a[p-1-j];
207          break;
208       }
209 
210       a[p] = k;
211       e = e * (1.0f - k*k);
212 
213 
214    }
215    *err = e;
216 }
217 
218 #else
arm_levinson_durbin_f32(const float32_t * phi,float32_t * a,float32_t * err,int nbCoefs)219 ARM_DSP_ATTRIBUTE void arm_levinson_durbin_f32(const float32_t *phi,
220   float32_t *a,
221   float32_t *err,
222   int nbCoefs)
223 {
224    float32_t e;
225    int p;
226 
227    a[0] = phi[1] / phi[0];
228 
229    e = phi[0] - phi[1] * a[0];
230    for(p=1; p < nbCoefs; p++)
231    {
232       float32_t suma=0.0f;
233       float32_t sumb=0.0f;
234       float32_t k;
235       int nb,j,i;
236 
237       for(i=0; i < p; i++)
238       {
239          suma += a[i] * phi[p - i];
240          sumb += a[i] * phi[i + 1];
241       }
242 
243       k = (phi[p+1]-suma)/(phi[0] - sumb);
244 
245 
246       nb = p >> 1;
247       j=0;
248       for(i =0; i < nb ; i++)
249       {
250           float32_t x,y;
251 
252           x=a[j] - k * a[p-1-j];
253           y=a[p-1-j] - k * a[j];
254 
255           a[j] = x;
256           a[p-1-j] = y;
257 
258           j++;
259       }
260 
261       nb = p & 1;
262       if (nb)
263       {
264             a[j]=a[j]- k * a[p-1-j];
265       }
266 
267       a[p] = k;
268       e = e * (1.0f - k*k);
269 
270 
271    }
272    *err = e;
273 }
274 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
275 
276 /**
277   @} end of LD group
278  */
279