1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_levinson_durbin_f16.c
4 * Description: f16 version of Levinson Durbin algorithm
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/filtering_functions_f16.h"
30
31 /**
32 @ingroup groupFilters
33 */
34
35
36
37 /**
38 @addtogroup LD
39 @{
40 */
41
42 /**
43 @brief Levinson Durbin
44 @param[in] phi autocovariance vector starting with lag 0 (length is nbCoefs + 1)
45 @param[out] a autoregressive coefficients
46 @param[out] err prediction error (variance)
47 @param[in] nbCoefs number of autoregressive coefficients
48 */
49
50 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
51 #pragma message "Scalar version of arm_levinson_durbin_f16 built. Helium version has build issues with gcc."
52 #endif
53
54 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
55
56 #include "arm_helium_utils.h"
57
58 #define LANE4567_MASK 0xFF00
59
arm_levinson_durbin_f16(const float16_t * phi,float16_t * a,float16_t * err,int nbCoefs)60 ARM_DSP_ATTRIBUTE void arm_levinson_durbin_f16(const float16_t *phi,
61 float16_t *a,
62 float16_t *err,
63 int nbCoefs)
64 {
65 _Float16 e;
66 static const uint16_t revOffsetArray[8] = {7,6,5,4,3,2,1,0};
67
68 a[0] = (_Float16)phi[1] / (_Float16)phi[0];
69
70 e = (_Float16)phi[0] - (_Float16)phi[1] * (_Float16)a[0];
71 for(int p=1; p < nbCoefs; p++)
72 {
73 _Float16 suma = 0.0f16;
74 _Float16 sumb = 0.0f16;
75 f16x8_t vecA,vecRevPhi,vecPhi,vecSumA, vecSumB;
76 _Float16 k;
77 uint32_t blkCnt;
78 const float16_t *pPhi,*pRevPhi,*pA;
79 uint16x8_t revOffset;
80
81 int nb,j,i;
82
83 revOffset = vld1q(revOffsetArray);
84 vecSumA = vdupq_n_f16(0.0f16);
85 vecSumB = vdupq_n_f16(0.0f16);
86
87 pRevPhi = &phi[p-7];
88 pPhi = &phi[1];
89 pA = a;
90
91 i = 0;
92 blkCnt = p >> 3;
93 while(blkCnt > 0)
94 {
95 vecA = vld1q(pA);
96 pA += 8;
97
98 vecPhi = vld1q(pPhi);
99 pPhi += 8;
100
101 vecRevPhi = vldrhq_gather_shifted_offset_f16(pRevPhi,revOffset);
102 pRevPhi -= 8;
103
104 vecSumA = vfmaq(vecSumA,vecA,vecRevPhi);
105 vecSumB = vfmaq(vecSumB,vecA,vecPhi);
106
107 i += 8;
108 blkCnt--;
109
110 }
111
112 suma = vecAddAcrossF16Mve(vecSumA);
113 sumb = vecAddAcrossF16Mve(vecSumB);
114
115 blkCnt = p & 7;
116 while(blkCnt > 0)
117 {
118 suma += (_Float16)a[i] * (_Float16)phi[p - i];
119 sumb += (_Float16)a[i] * (_Float16)phi[i + 1];
120
121 i++;
122 blkCnt--;
123 }
124
125 k = ((_Float16)phi[p+1] - suma)/((_Float16)phi[0] - sumb);
126
127 f16x8_t vecRevA,tmp;
128 static int16_t orgOffsetArray[8]={0,1,2,3,-1,-2,-3,-4};
129 static const int16_t offsetIncArray[8]={4,4,4,4,-4,-4,-4,-4};
130
131 uint16x8_t offset,offsetInc,vecTmp;
132
133
134 offset = vld1q_u16((uint16_t*)orgOffsetArray);
135 vecTmp = vdupq_n_u16(p);
136
137 offset = vaddq_m_u16(offset,offset,vecTmp,LANE4567_MASK);
138 offsetInc = vld1q_u16((uint16_t*)offsetIncArray);
139
140 nb = p >> 3;
141 j=0;
142 for(int i = 0; i < nb ; i++)
143 {
144
145 /*
146 x0=a[j] - k * a[p-1-j];
147 x1=a[j+1] - k * a[p-2-j];
148 x3=a[p-1-j] - k * a[j];
149 x4=a[p-2-j] - k * a[j+1];
150
151 a[j] = x0;
152 a[j+1] = x1;
153 a[p-1-j] = x2;
154 a[p-2-j] = x3;
155 */
156
157 uint64_t tmpa,tmpb;
158 vecA = vldrhq_gather_shifted_offset_f16(a,offset);
159
160
161 tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
162 tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
163 vecRevA = (f16x8_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
164 vecRevA = (f16x8_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
165
166
167 tmp = vsubq(vecA,vmulq_n_f16(vecRevA,k));
168 vstrhq_scatter_shifted_offset_f16(a, offset, tmp);
169
170 offset = vaddq(offset,offsetInc);
171
172 j+=4;
173
174 }
175
176 blkCnt = p & 7;
177
178 if (blkCnt)
179 {
180 nb = blkCnt >> 1;
181 for(int i =0;i < nb ; i++)
182 {
183 _Float16 x,y;
184
185 x=(_Float16)a[j] - (_Float16)k * (_Float16)a[p-1-j];
186 y=(_Float16)a[p-1-j] - (_Float16)k * (_Float16)a[j];
187
188 a[j] = x;
189 a[p-1-j] = y;
190
191 j++;
192 }
193
194 nb = blkCnt & 1;
195 if (nb)
196 {
197 a[j]=(_Float16)a[j]- (_Float16)k * (_Float16)a[p-1-j];
198 }
199 }
200
201
202 a[p] = k;
203 e = e * (1.0f16 - k*k);
204
205
206 }
207 *err = e;
208 }
209
210 #else
211
212 #if defined(ARM_FLOAT16_SUPPORTED)
213
arm_levinson_durbin_f16(const float16_t * phi,float16_t * a,float16_t * err,int nbCoefs)214 ARM_DSP_ATTRIBUTE void arm_levinson_durbin_f16(const float16_t *phi,
215 float16_t *a,
216 float16_t *err,
217 int nbCoefs)
218 {
219 _Float16 e;
220
221 a[0] = (_Float16)phi[1] / (_Float16)phi[0];
222
223 e = (_Float16)phi[0] - (_Float16)phi[1] * (_Float16)a[0];
224 for(int p=1; p < nbCoefs; p++)
225 {
226 _Float16 suma=0.0f16;
227 _Float16 sumb=0.0f16;
228 _Float16 k;
229 int nb,j;
230
231 for(int i=0; i < p; i++)
232 {
233 suma += (_Float16)a[i] * (_Float16)phi[p - i];
234 sumb += (_Float16)a[i] * (_Float16)phi[i + 1];
235 }
236
237 k = ((_Float16)phi[p+1]-suma)/((_Float16)phi[0] - sumb);
238
239
240 nb = p >> 1;
241 j=0;
242 for(int i =0;i < nb ; i++)
243 {
244 _Float16 x,y;
245
246 x=(_Float16)a[j] - (_Float16)k * (_Float16)a[p-1-j];
247 y=(_Float16)a[p-1-j] - (_Float16)k * (_Float16)a[j];
248
249 a[j] = x;
250 a[p-1-j] = y;
251
252 j++;
253 }
254
255 nb = p & 1;
256 if (nb)
257 {
258 a[j]=(_Float16)a[j]- (_Float16)k * (_Float16)a[p-1-j];
259 }
260
261 a[p] = k;
262 e = e * (1.0f16 - k*k);
263
264
265 }
266 *err = e;
267 }
268 #endif /* defined(ARM_FLOAT16_SUPPORTED */
269 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
270 /**
271 @} end of LD group
272 */
273