1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_var_f16.c
4 * Description: Variance of the elements of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31
32 #if defined(ARM_FLOAT16_SUPPORTED)
33
34
35 /**
36 @ingroup groupStats
37 */
38
39
40 /**
41 @addtogroup variance
42 @{
43 */
44
45 /**
46 @brief Variance of the elements of a floating-point vector.
47 @param[in] pSrc points to the input vector
48 @param[in] blockSize number of samples in input vector
49 @param[out] pResult variance value returned here
50 */
51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
52
53 #include "arm_helium_utils.h"
54
55
arm_var_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)56 void arm_var_f16(
57 const float16_t * pSrc,
58 uint32_t blockSize,
59 float16_t * pResult)
60 {
61 int32_t blkCnt; /* loop counters */
62 f16x8_t vecSrc;
63 f16x8_t sumVec = vdupq_n_f16(0.0f16);
64 float16_t fMean;
65
66 if (blockSize <= 1U) {
67 *pResult = 0;
68 return;
69 }
70
71
72 arm_mean_f16(pSrc, blockSize, &fMean);
73
74 blkCnt = blockSize;
75 do {
76 mve_pred16_t p = vctp16q(blkCnt);
77
78 vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
79 /*
80 * sum lanes
81 */
82 vecSrc = vsubq_m(vuninitializedq_f16(), vecSrc, fMean, p);
83 sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
84
85 blkCnt -= 8;
86 pSrc += 8;
87 }
88 while (blkCnt > 0);
89
90 /* Variance */
91 *pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) (blockSize - 1.0f16);
92 }
93 #else
94
arm_var_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)95 void arm_var_f16(
96 const float16_t * pSrc,
97 uint32_t blockSize,
98 float16_t * pResult)
99 {
100 uint32_t blkCnt; /* Loop counter */
101 _Float16 sum = 0.0f; /* Temporary result storage */
102 _Float16 fSum = 0.0f;
103 _Float16 fMean, fValue;
104 const float16_t * pInput = pSrc;
105
106 if (blockSize <= 1U)
107 {
108 *pResult = 0;
109 return;
110 }
111
112 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
113
114 /* Loop unrolling: Compute 4 outputs at a time */
115 blkCnt = blockSize >> 2U;
116
117 while (blkCnt > 0U)
118 {
119 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
120
121 sum += (_Float16)*pInput++;
122 sum += (_Float16)*pInput++;
123 sum += (_Float16)*pInput++;
124 sum += (_Float16)*pInput++;
125
126
127 /* Decrement loop counter */
128 blkCnt--;
129 }
130
131 /* Loop unrolling: Compute remaining outputs */
132 blkCnt = blockSize % 0x4U;
133
134 #else
135
136 /* Initialize blkCnt with number of samples */
137 blkCnt = blockSize;
138
139 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
140
141 while (blkCnt > 0U)
142 {
143 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
144
145 sum += (_Float16)*pInput++;
146
147 /* Decrement loop counter */
148 blkCnt--;
149 }
150
151 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
152 fMean = (_Float16)sum / (_Float16) blockSize;
153
154 pInput = pSrc;
155
156 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
157
158 /* Loop unrolling: Compute 4 outputs at a time */
159 blkCnt = blockSize >> 2U;
160
161 while (blkCnt > 0U)
162 {
163 fValue = (_Float16)*pInput++ - (_Float16)fMean;
164 fSum += (_Float16)fValue * (_Float16)fValue;
165
166 fValue = (_Float16)*pInput++ - (_Float16)fMean;
167 fSum += (_Float16)fValue * (_Float16)fValue;
168
169 fValue = (_Float16)*pInput++ - (_Float16)fMean;
170 fSum += (_Float16)fValue * (_Float16)fValue;
171
172 fValue = (_Float16)*pInput++ - (_Float16)fMean;
173 fSum += (_Float16)fValue * (_Float16)fValue;
174
175 /* Decrement loop counter */
176 blkCnt--;
177 }
178
179 /* Loop unrolling: Compute remaining outputs */
180 blkCnt = blockSize % 0x4U;
181
182 #else
183
184 /* Initialize blkCnt with number of samples */
185 blkCnt = blockSize;
186
187 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
188
189 while (blkCnt > 0U)
190 {
191 fValue = (_Float16)*pInput++ - (_Float16)fMean;
192 fSum += (_Float16)fValue * (_Float16)fValue;
193
194 /* Decrement loop counter */
195 blkCnt--;
196 }
197
198 /* Variance */
199 *pResult = (_Float16)fSum / ((_Float16)blockSize - 1.0f16);
200 }
201 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
202
203 /**
204 @} end of variance group
205 */
206
207 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
208
209