1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_barycenter_f32.c
4 * Description: Barycenter
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/support_functions.h"
30 #include <limits.h>
31 #include <math.h>
32
33
34 /**
35 @ingroup barycenter
36 @{
37 */
38
39
40 /**
41 * @brief Barycenter
42 *
43 *
44 * @param[in] *in List of vectors
45 * @param[in] *weights Weights of the vectors
46 * @param[out] *out Barycenter
47 * @param[in] nbVectors Number of vectors
48 * @param[in] vecDim Dimension of space (vector dimension)
49 * @return None
50 *
51 */
52
53 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_barycenter_f32(const float32_t * in,const float32_t * weights,float32_t * out,uint32_t nbVectors,uint32_t vecDim)54 void arm_barycenter_f32(const float32_t *in,
55 const float32_t *weights,
56 float32_t *out,
57 uint32_t nbVectors,
58 uint32_t vecDim)
59 {
60 const float32_t *pIn, *pW;
61 const float32_t *pIn1, *pIn2, *pIn3, *pIn4;
62 float32_t *pOut;
63 uint32_t blkCntVector, blkCntSample;
64 float32_t accum, w;
65
66 blkCntVector = nbVectors;
67 blkCntSample = vecDim;
68
69 accum = 0.0f;
70
71 pW = weights;
72 pIn = in;
73
74
75 arm_fill_f32(0.0f, out, vecDim);
76
77
78 /* Sum */
79 pIn1 = pIn;
80 pIn2 = pIn1 + vecDim;
81 pIn3 = pIn2 + vecDim;
82 pIn4 = pIn3 + vecDim;
83
84 blkCntVector = nbVectors >> 2;
85 while (blkCntVector > 0)
86 {
87 f32x4_t outV, inV1, inV2, inV3, inV4;
88 float32_t w1, w2, w3, w4;
89
90 pOut = out;
91 w1 = *pW++;
92 w2 = *pW++;
93 w3 = *pW++;
94 w4 = *pW++;
95 accum += w1 + w2 + w3 + w4;
96
97 blkCntSample = vecDim >> 2;
98 while (blkCntSample > 0) {
99 outV = vld1q((const float32_t *) pOut);
100 inV1 = vld1q(pIn1);
101 inV2 = vld1q(pIn2);
102 inV3 = vld1q(pIn3);
103 inV4 = vld1q(pIn4);
104 outV = vfmaq(outV, inV1, w1);
105 outV = vfmaq(outV, inV2, w2);
106 outV = vfmaq(outV, inV3, w3);
107 outV = vfmaq(outV, inV4, w4);
108 vst1q(pOut, outV);
109
110 pOut += 4;
111 pIn1 += 4;
112 pIn2 += 4;
113 pIn3 += 4;
114 pIn4 += 4;
115
116 blkCntSample--;
117 }
118
119 blkCntSample = vecDim & 3;
120 while (blkCntSample > 0) {
121 *pOut = *pOut + *pIn1++ * w1;
122 *pOut = *pOut + *pIn2++ * w2;
123 *pOut = *pOut + *pIn3++ * w3;
124 *pOut = *pOut + *pIn4++ * w4;
125 pOut++;
126 blkCntSample--;
127 }
128
129 pIn1 += 3 * vecDim;
130 pIn2 += 3 * vecDim;
131 pIn3 += 3 * vecDim;
132 pIn4 += 3 * vecDim;
133
134 blkCntVector--;
135 }
136
137 pIn = pIn1;
138
139 blkCntVector = nbVectors & 3;
140 while (blkCntVector > 0)
141 {
142 f32x4_t inV, outV;
143
144 pOut = out;
145 w = *pW++;
146 accum += w;
147
148 blkCntSample = vecDim >> 2;
149 while (blkCntSample > 0)
150 {
151 outV = vld1q_f32(pOut);
152 inV = vld1q_f32(pIn);
153 outV = vfmaq(outV, inV, w);
154 vst1q_f32(pOut, outV);
155 pOut += 4;
156 pIn += 4;
157
158 blkCntSample--;
159 }
160
161 blkCntSample = vecDim & 3;
162 while (blkCntSample > 0)
163 {
164 *pOut = *pOut + *pIn++ * w;
165 pOut++;
166 blkCntSample--;
167 }
168
169 blkCntVector--;
170 }
171
172 /* Normalize */
173 pOut = out;
174 accum = 1.0f / accum;
175
176 blkCntSample = vecDim >> 2;
177 while (blkCntSample > 0)
178 {
179 f32x4_t tmp;
180
181 tmp = vld1q((const float32_t *) pOut);
182 tmp = vmulq(tmp, accum);
183 vst1q(pOut, tmp);
184 pOut += 4;
185 blkCntSample--;
186 }
187
188 blkCntSample = vecDim & 3;
189 while (blkCntSample > 0)
190 {
191 *pOut = *pOut * accum;
192 pOut++;
193 blkCntSample--;
194 }
195 }
196 #else
197 #if defined(ARM_MATH_NEON)
198
199 #include "NEMath.h"
arm_barycenter_f32(const float32_t * in,const float32_t * weights,float32_t * out,uint32_t nbVectors,uint32_t vecDim)200 void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
201 {
202
203 const float32_t *pIn,*pW, *pIn1, *pIn2, *pIn3, *pIn4;
204 float32_t *pOut;
205 uint32_t blkCntVector,blkCntSample;
206 float32_t accum, w,w1,w2,w3,w4;
207
208 float32x4_t tmp, inV,outV, inV1, inV2, inV3, inV4;
209
210 blkCntVector = nbVectors;
211 blkCntSample = vecDim;
212
213 accum = 0.0f;
214
215 pW = weights;
216 pIn = in;
217
218 /* Set counters to 0 */
219 tmp = vdupq_n_f32(0.0f);
220 pOut = out;
221
222 blkCntSample = vecDim >> 2;
223 while(blkCntSample > 0)
224 {
225 vst1q_f32(pOut, tmp);
226 pOut += 4;
227 blkCntSample--;
228 }
229
230 blkCntSample = vecDim & 3;
231 while(blkCntSample > 0)
232 {
233 *pOut = 0.0f;
234 pOut++;
235 blkCntSample--;
236 }
237
238 /* Sum */
239
240 pIn1 = pIn;
241 pIn2 = pIn1 + vecDim;
242 pIn3 = pIn2 + vecDim;
243 pIn4 = pIn3 + vecDim;
244
245 blkCntVector = nbVectors >> 2;
246 while(blkCntVector > 0)
247 {
248 pOut = out;
249 w1 = *pW++;
250 w2 = *pW++;
251 w3 = *pW++;
252 w4 = *pW++;
253 accum += w1 + w2 + w3 + w4;
254
255 blkCntSample = vecDim >> 2;
256 while(blkCntSample > 0)
257 {
258 outV = vld1q_f32(pOut);
259 inV1 = vld1q_f32(pIn1);
260 inV2 = vld1q_f32(pIn2);
261 inV3 = vld1q_f32(pIn3);
262 inV4 = vld1q_f32(pIn4);
263 outV = vmlaq_n_f32(outV,inV1,w1);
264 outV = vmlaq_n_f32(outV,inV2,w2);
265 outV = vmlaq_n_f32(outV,inV3,w3);
266 outV = vmlaq_n_f32(outV,inV4,w4);
267 vst1q_f32(pOut, outV);
268 pOut += 4;
269 pIn1 += 4;
270 pIn2 += 4;
271 pIn3 += 4;
272 pIn4 += 4;
273
274 blkCntSample--;
275 }
276
277 blkCntSample = vecDim & 3;
278 while(blkCntSample > 0)
279 {
280 *pOut = *pOut + *pIn1++ * w1;
281 *pOut = *pOut + *pIn2++ * w2;
282 *pOut = *pOut + *pIn3++ * w3;
283 *pOut = *pOut + *pIn4++ * w4;
284 pOut++;
285 blkCntSample--;
286 }
287
288 pIn1 += 3*vecDim;
289 pIn2 += 3*vecDim;
290 pIn3 += 3*vecDim;
291 pIn4 += 3*vecDim;
292
293 blkCntVector--;
294 }
295
296 pIn = pIn1;
297
298 blkCntVector = nbVectors & 3;
299 while(blkCntVector > 0)
300 {
301 pOut = out;
302 w = *pW++;
303 accum += w;
304
305 blkCntSample = vecDim >> 2;
306 while(blkCntSample > 0)
307 {
308 outV = vld1q_f32(pOut);
309 inV = vld1q_f32(pIn);
310 outV = vmlaq_n_f32(outV,inV,w);
311 vst1q_f32(pOut, outV);
312 pOut += 4;
313 pIn += 4;
314
315 blkCntSample--;
316 }
317
318 blkCntSample = vecDim & 3;
319 while(blkCntSample > 0)
320 {
321 *pOut = *pOut + *pIn++ * w;
322 pOut++;
323 blkCntSample--;
324 }
325
326 blkCntVector--;
327 }
328
329 /* Normalize */
330 pOut = out;
331 accum = 1.0f / accum;
332
333 blkCntSample = vecDim >> 2;
334 while(blkCntSample > 0)
335 {
336 tmp = vld1q_f32(pOut);
337 tmp = vmulq_n_f32(tmp,accum);
338 vst1q_f32(pOut, tmp);
339 pOut += 4;
340 blkCntSample--;
341 }
342
343 blkCntSample = vecDim & 3;
344 while(blkCntSample > 0)
345 {
346 *pOut = *pOut * accum;
347 pOut++;
348 blkCntSample--;
349 }
350
351 }
352 #else
arm_barycenter_f32(const float32_t * in,const float32_t * weights,float32_t * out,uint32_t nbVectors,uint32_t vecDim)353 void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
354 {
355
356 const float32_t *pIn,*pW;
357 float32_t *pOut;
358 uint32_t blkCntVector,blkCntSample;
359 float32_t accum, w;
360
361 blkCntVector = nbVectors;
362 blkCntSample = vecDim;
363
364 accum = 0.0f;
365
366 pW = weights;
367 pIn = in;
368
369 /* Set counters to 0 */
370 blkCntSample = vecDim;
371 pOut = out;
372
373 while(blkCntSample > 0)
374 {
375 *pOut = 0.0f;
376 pOut++;
377 blkCntSample--;
378 }
379
380 /* Sum */
381 while(blkCntVector > 0)
382 {
383 pOut = out;
384 w = *pW++;
385 accum += w;
386
387 blkCntSample = vecDim;
388 while(blkCntSample > 0)
389 {
390 *pOut = *pOut + *pIn++ * w;
391 pOut++;
392 blkCntSample--;
393 }
394
395 blkCntVector--;
396 }
397
398 /* Normalize */
399 blkCntSample = vecDim;
400 pOut = out;
401
402 while(blkCntSample > 0)
403 {
404 *pOut = *pOut / accum;
405 pOut++;
406 blkCntSample--;
407 }
408
409 }
410 #endif
411 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
412
413 /**
414 * @} end of barycenter group
415 */
416