1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_weighted_sum_f16.c
4 * Description: Weighted Sum
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include <limits.h>
30 #include <math.h>
31
32 #include "dsp/support_functions_f16.h"
33
34 #if defined(ARM_FLOAT16_SUPPORTED)
35
36 /**
37 @ingroup groupSupport
38 */
39
40 /**
41 @defgroup weightedsum Weighted Sum
42
43 Weighted sum of values
44 */
45
46
47 /**
48 * @addtogroup weightedsum
49 * @{
50 */
51
52
53 /**
54 * @brief Weighted sum
55 *
56 *
57 * @param[in] *in Array of input values.
58 * @param[in] *weigths Weights
59 * @param[in] blockSize Number of samples in the input array.
60 * @return Weighted sum
61 *
62 */
63
64 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
65
66 #include "arm_helium_utils.h"
67
arm_weighted_sum_f16(const float16_t * in,const float16_t * weigths,uint32_t blockSize)68 float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uint32_t blockSize)
69 {
70 _Float16 accum1, accum2;
71 float16x8_t accum1V, accum2V;
72 float16x8_t inV, wV;
73 const float16_t *pIn, *pW;
74 uint32_t blkCnt;
75
76
77 pIn = in;
78 pW = weigths;
79
80
81 accum1V = vdupq_n_f16(0.0f16);
82 accum2V = vdupq_n_f16(0.0f16);
83
84 blkCnt = blockSize >> 3;
85 while (blkCnt > 0)
86 {
87 inV = vld1q(pIn);
88 wV = vld1q(pW);
89
90 pIn += 4;
91 pW += 4;
92
93 accum1V = vfmaq(accum1V, inV, wV);
94 accum2V = vaddq(accum2V, wV);
95 blkCnt--;
96 }
97
98 accum1 = vecAddAcrossF16Mve(accum1V);
99 accum2 = vecAddAcrossF16Mve(accum2V);
100
101 blkCnt = blockSize & 7;
102 while(blkCnt > 0)
103 {
104 accum1 += (_Float16)*pIn++ * (_Float16)*pW;
105 accum2 += (_Float16)*pW++;
106 blkCnt--;
107 }
108
109
110 return (accum1 / accum2);
111 }
112
113 #else
114
arm_weighted_sum_f16(const float16_t * in,const float16_t * weigths,uint32_t blockSize)115 float16_t arm_weighted_sum_f16(const float16_t *in, const float16_t *weigths, uint32_t blockSize)
116 {
117
118 _Float16 accum1, accum2;
119 const float16_t *pIn, *pW;
120 uint32_t blkCnt;
121
122
123 pIn = in;
124 pW = weigths;
125
126 accum1=0.0f16;
127 accum2=0.0f16;
128
129 blkCnt = blockSize;
130 while(blkCnt > 0)
131 {
132 accum1 += (_Float16)*pIn++ * (_Float16)*pW;
133 accum2 += (_Float16)*pW++;
134 blkCnt--;
135 }
136
137 return(accum1 / accum2);
138 }
139 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
140
141 /**
142 * @} end of weightedsum group
143 */
144
145 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
146
147