1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_min_f32.c
4 * Description: Minimum value of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
32 #include <limits.h>
33 #endif
34
35
36 /**
37 @ingroup groupStats
38 */
39
40 /**
41 @defgroup Min Minimum
42
43 Computes the minimum value of an array of data.
44 The function returns both the minimum value and its position within the array.
45 There are separate functions for floating-point, Q31, Q15, and Q7 data types.
46 */
47
48 /**
49 @addtogroup Min
50 @{
51 */
52
53 /**
54 @brief Minimum value of a floating-point vector.
55 @param[in] pSrc points to the input vector
56 @param[in] blockSize number of samples in input vector
57 @param[out] pResult minimum value returned here
58 @param[out] pIndex index of minimum value returned here
59 @return none
60 */
61
62 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
63
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)64 void arm_min_f32(
65 const float32_t * pSrc,
66 uint32_t blockSize,
67 float32_t * pResult,
68 uint32_t * pIndex)
69 {
70 uint32_t blkCnt; /* loop counters */
71 f32x4_t vecSrc;
72 float32_t const *pSrcVec;
73 f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
74 float32_t minValue = F32_MAX;
75 uint32_t idx = blockSize;
76 uint32x4_t indexVec;
77 uint32x4_t curExtremIdxVec;
78 float32_t tmp;
79 mve_pred16_t p0;
80
81 indexVec = vidupq_u32((uint32_t)0, 1);
82 curExtremIdxVec = vdupq_n_u32(0);
83
84 pSrcVec = (float32_t const *) pSrc;
85 /* Compute 4 outputs at a time */
86 blkCnt = blockSize >> 2U;
87 while (blkCnt > 0U)
88 {
89 vecSrc = vldrwq_f32(pSrcVec);
90 pSrcVec += 4;
91 /*
92 * Get current max per lane and current index per lane
93 * when a max is selected
94 */
95 p0 = vcmpleq(vecSrc, curExtremValVec);
96 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
97 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
98
99 indexVec = indexVec + 4;
100 /*
101 * Decrement the blockSize loop counter
102 */
103 blkCnt--;
104 }
105
106 /*
107 * Get min value across the vector
108 */
109 minValue = vminnmvq(minValue, curExtremValVec);
110 /*
111 * set index for lower values to max possible index
112 */
113 p0 = vcmpleq(curExtremValVec, minValue);
114 indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
115 /*
116 * Get min index which is thus for a max value
117 */
118 idx = vminvq(idx, indexVec);
119
120 /*
121 * tail
122 */
123 blkCnt = blockSize & 0x3;
124
125 while (blkCnt > 0U)
126 {
127 /* Initialize minVal to the next consecutive values one by one */
128 tmp = *pSrc++;
129
130 /* compare for the minimum value */
131 if (minValue > tmp)
132 {
133 /* Update the minimum value and it's index */
134 minValue = tmp;
135 idx = blockSize - blkCnt;
136 }
137 blkCnt--;
138 }
139 /*
140 * Save result
141 */
142 *pIndex = idx;
143 *pResult = minValue;
144 }
145
146 #else
147 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)148 void arm_min_f32(
149 const float32_t * pSrc,
150 uint32_t blockSize,
151 float32_t * pResult,
152 uint32_t * pIndex)
153 {
154 float32_t maxVal1, out; /* Temporary variables to store the output value. */
155 uint32_t blkCnt, outIndex; /* loop counter */
156
157 float32x4_t outV, srcV;
158 float32x2_t outV2;
159
160 uint32x4_t idxV;
161 static const uint32_t indexInit[4]={4,5,6,7};
162 static const uint32_t countVInit[4]={0,1,2,3};
163 uint32x4_t maxIdx;
164 uint32x4_t index;
165 uint32x4_t delta;
166 uint32x4_t countV;
167 uint32x2_t countV2;
168
169 maxIdx = vdupq_n_u32(UINT_MAX);
170 delta = vdupq_n_u32(4);
171 index = vld1q_u32(indexInit);
172 countV = vld1q_u32(countVInit);
173
174 /* Initialise the index value to zero. */
175 outIndex = 0U;
176
177 /* Load first input value that act as reference value for comparison */
178 if (blockSize <= 3)
179 {
180 out = *pSrc++;
181
182 blkCnt = blockSize - 1;
183
184 while (blkCnt > 0U)
185 {
186 /* Initialize maxVal to the next consecutive values one by one */
187 maxVal1 = *pSrc++;
188
189 /* compare for the maximum value */
190 if (out > maxVal1)
191 {
192 /* Update the maximum value and it's index */
193 out = maxVal1;
194 outIndex = blockSize - blkCnt;
195 }
196
197 /* Decrement the loop counter */
198 blkCnt--;
199 }
200 }
201 else
202 {
203 outV = vld1q_f32(pSrc);
204 pSrc += 4;
205
206 /* Compute 4 outputs at a time */
207 blkCnt = (blockSize - 4 ) >> 2U;
208
209 while (blkCnt > 0U)
210 {
211 srcV = vld1q_f32(pSrc);
212 pSrc += 4;
213
214 idxV = vcltq_f32(srcV, outV);
215 outV = vbslq_f32(idxV, srcV, outV );
216 countV = vbslq_u32(idxV, index,countV );
217
218 index = vaddq_u32(index,delta);
219
220 /* Decrement the loop counter */
221 blkCnt--;
222 }
223
224 outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
225 outV2 = vpmin_f32(outV2,outV2);
226 out = vget_lane_f32(outV2,0);
227
228 idxV = vceqq_f32(outV, vdupq_n_f32(out));
229 countV = vbslq_u32(idxV, countV,maxIdx);
230
231 countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
232 countV2 = vpmin_u32(countV2,countV2);
233 outIndex = vget_lane_u32(countV2,0);
234
235 /* if (blockSize - 1U) is not multiple of 4 */
236 blkCnt = (blockSize - 4 ) % 4U;
237
238 while (blkCnt > 0U)
239 {
240 /* Initialize maxVal to the next consecutive values one by one */
241 maxVal1 = *pSrc++;
242
243 /* compare for the maximum value */
244 if (out > maxVal1)
245 {
246 /* Update the maximum value and it's index */
247 out = maxVal1;
248 outIndex = blockSize - blkCnt ;
249 }
250
251 /* Decrement the loop counter */
252 blkCnt--;
253 }
254 }
255
256 /* Store the maximum value and it's index into destination pointers */
257 *pResult = out;
258 *pIndex = outIndex;
259 }
260 #else
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)261 void arm_min_f32(
262 const float32_t * pSrc,
263 uint32_t blockSize,
264 float32_t * pResult,
265 uint32_t * pIndex)
266 {
267 float32_t minVal, out; /* Temporary variables to store the output value. */
268 uint32_t blkCnt, outIndex; /* Loop counter */
269
270 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
271 uint32_t index; /* index of maximum value */
272 #endif
273
274 /* Initialise index value to zero. */
275 outIndex = 0U;
276
277 /* Load first input value that act as reference value for comparision */
278 out = *pSrc++;
279
280 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
281 /* Initialise index of maximum value. */
282 index = 0U;
283
284 /* Loop unrolling: Compute 4 outputs at a time */
285 blkCnt = (blockSize - 1U) >> 2U;
286
287 while (blkCnt > 0U)
288 {
289 /* Initialize minVal to next consecutive values one by one */
290 minVal = *pSrc++;
291
292 /* compare for the minimum value */
293 if (out > minVal)
294 {
295 /* Update the minimum value and it's index */
296 out = minVal;
297 outIndex = index + 1U;
298 }
299
300 minVal = *pSrc++;
301 if (out > minVal)
302 {
303 out = minVal;
304 outIndex = index + 2U;
305 }
306
307 minVal = *pSrc++;
308 if (out > minVal)
309 {
310 out = minVal;
311 outIndex = index + 3U;
312 }
313
314 minVal = *pSrc++;
315 if (out > minVal)
316 {
317 out = minVal;
318 outIndex = index + 4U;
319 }
320
321 index += 4U;
322
323 /* Decrement loop counter */
324 blkCnt--;
325 }
326
327 /* Loop unrolling: Compute remaining outputs */
328 blkCnt = (blockSize - 1U) % 4U;
329
330 #else
331
332 /* Initialize blkCnt with number of samples */
333 blkCnt = (blockSize - 1U);
334
335 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
336
337 while (blkCnt > 0U)
338 {
339 /* Initialize minVal to the next consecutive values one by one */
340 minVal = *pSrc++;
341
342 /* compare for the minimum value */
343 if (out > minVal)
344 {
345 /* Update the minimum value and it's index */
346 out = minVal;
347 outIndex = blockSize - blkCnt;
348 }
349
350 /* Decrement loop counter */
351 blkCnt--;
352 }
353
354 /* Store the minimum value and it's index into destination pointers */
355 *pResult = out;
356 *pIndex = outIndex;
357 }
358 #endif /* #if defined(ARM_MATH_NEON) */
359 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
360
361 /**
362 @} end of Min group
363 */
364