1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_absmin_f16.c
4 * Description: Minimum value of absolute values of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33
34 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
35 #include <limits.h>
36 #endif
37
38
39 /**
40 @ingroup groupStats
41 */
42
43 /**
44 @addtogroup AbsMin
45 @{
46 */
47
48 /**
49 @brief Minimum value of absolute values of a floating-point vector.
50 @param[in] pSrc points to the input vector
51 @param[in] blockSize number of samples in input vector
52 @param[out] pResult minimum value returned here
53 @param[out] pIndex index of minimum value returned here
54 @return none
55 */
56
57 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
58
59 #include "arm_helium_utils.h"
arm_absmin_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)60 void arm_absmin_f16(
61 const float16_t * pSrc,
62 uint32_t blockSize,
63 float16_t * pResult,
64 uint32_t * pIndex)
65 {
66 uint16_t blkCnt; /* loop counters */
67 f16x8_t vecSrc;
68 float16_t const *pSrcVec;
69 f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
70 float16_t minValue = F16_ABSMAX;
71 uint16_t idx = blockSize;
72 uint16x8_t indexVec;
73 uint16x8_t curExtremIdxVec;
74 mve_pred16_t p0;
75
76
77 indexVec = vidupq_u16((uint32_t)0, 1);
78 curExtremIdxVec = vdupq_n_u16(0);
79
80 pSrcVec = (float16_t const *) pSrc;
81 blkCnt = blockSize >> 3;
82 while (blkCnt > 0U)
83 {
84 vecSrc = vldrhq_f16(pSrcVec);
85 pSrcVec += 8;
86 vecSrc = vabsq(vecSrc);
87 /*
88 * Get current max per lane and current index per lane
89 * when a max is selected
90 */
91 p0 = vcmpleq(vecSrc, curExtremValVec);
92 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
93 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
94
95 indexVec = indexVec + 8;
96 /*
97 * Decrement the blockSize loop counter
98 */
99 blkCnt--;
100 }
101 /*
102 * tail
103 * (will be merged thru tail predication)
104 */
105 blkCnt = blockSize & 7;
106 if (blkCnt > 0U)
107 {
108 p0 = vctp16q(blkCnt);
109
110 vecSrc = vldrhq_f16(pSrcVec);
111 pSrcVec += 8;
112 vecSrc = vabsq(vecSrc);
113 /*
114 * Get current max per lane and current index per lane
115 * when a max is selected
116 */
117 p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
118 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
119 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
120 }
121 /*
122 * Get min value across the vector
123 */
124 minValue = vminnmvq(minValue, curExtremValVec);
125 /*
126 * set index for lower values to max possible index
127 */
128 p0 = vcmpleq(curExtremValVec, minValue);
129 indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
130 /*
131 * Get min index which is thus for a max value
132 */
133 idx = vminvq(idx, indexVec);
134 /*
135 * Save result
136 */
137 *pIndex = idx;
138 *pResult = minValue;
139 }
140
141 #else
142 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmin_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)143 void arm_absmin_f16(
144 const float16_t * pSrc,
145 uint32_t blockSize,
146 float16_t * pResult,
147 uint32_t * pIndex)
148 {
149 float16_t cur_absmin, out; /* Temporary variables to store the output value. */\
150 uint32_t blkCnt, outIndex; /* Loop counter */ \
151 uint32_t index; /* index of maximum value */ \
152 \
153 /* Initialize index value to zero. */ \
154 outIndex = 0U; \
155 /* Load first input value that act as reference value for comparision */ \
156 out = *pSrc++; \
157 out = (out > 0.0f16) ? out : -out; \
158 /* Initialize index of extrema value. */ \
159 index = 0U; \
160 \
161 /* Loop unrolling: Compute 4 outputs at a time */ \
162 blkCnt = (blockSize - 1U) >> 2U; \
163 \
164 while (blkCnt > 0U) \
165 { \
166 /* Initialize cur_absmin to next consecutive values one by one */ \
167 cur_absmin = *pSrc++; \
168 cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
169 /* compare for the extrema value */ \
170 if (cur_absmin < out) \
171 { \
172 /* Update the extrema value and it's index */ \
173 out = cur_absmin; \
174 outIndex = index + 1U; \
175 } \
176 \
177 cur_absmin = *pSrc++; \
178 cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
179 if (cur_absmin < out) \
180 { \
181 out = cur_absmin; \
182 outIndex = index + 2U; \
183 } \
184 \
185 cur_absmin = *pSrc++; \
186 cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
187 if (cur_absmin < out) \
188 { \
189 out = cur_absmin; \
190 outIndex = index + 3U; \
191 } \
192 \
193 cur_absmin = *pSrc++; \
194 cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
195 if (cur_absmin < out) \
196 { \
197 out = cur_absmin; \
198 outIndex = index + 4U; \
199 } \
200 \
201 index += 4U; \
202 \
203 /* Decrement loop counter */ \
204 blkCnt--; \
205 } \
206 \
207 /* Loop unrolling: Compute remaining outputs */ \
208 blkCnt = (blockSize - 1U) % 4U; \
209 \
210 \
211 while (blkCnt > 0U) \
212 { \
213 cur_absmin = *pSrc++; \
214 cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
215 if (cur_absmin < out) \
216 { \
217 out = cur_absmin; \
218 outIndex = blockSize - blkCnt; \
219 } \
220 \
221 /* Decrement loop counter */ \
222 blkCnt--; \
223 } \
224 \
225 /* Store the extrema value and it's index into destination pointers */ \
226 *pResult = out; \
227 *pIndex = outIndex;
228 }
229 #else
arm_absmin_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)230 void arm_absmin_f16(
231 const float16_t * pSrc,
232 uint32_t blockSize,
233 float16_t * pResult,
234 uint32_t * pIndex)
235 {
236 float16_t minVal, out; /* Temporary variables to store the output value. */
237 uint32_t blkCnt, outIndex; /* Loop counter */
238
239 /* Initialise index value to zero. */
240 outIndex = 0U;
241
242 /* Load first input value that act as reference value for comparision */
243 out = fabsf(*pSrc++);
244
245 /* Initialize blkCnt with number of samples */
246 blkCnt = (blockSize - 1U);
247
248 while (blkCnt > 0U)
249 {
250 /* Initialize minVal to the next consecutive values one by one */
251 minVal = fabsf(*pSrc++);
252
253 /* compare for the minimum value */
254 if (out > minVal)
255 {
256 /* Update the minimum value and it's index */
257 out = minVal;
258 outIndex = blockSize - blkCnt;
259 }
260
261 /* Decrement loop counter */
262 blkCnt--;
263 }
264
265 /* Store the minimum value and it's index into destination pointers */
266 *pResult = out;
267 *pIndex = outIndex;
268 }
269 #endif /* defined(ARM_MATH_LOOPUNROLL) */
270 #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
271 /**
272 @} end of AbsMin group
273 */
274
275 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
276
277