1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_min_f32.c
4 * Description: Minimum value of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
32 #include <limits.h>
33 #endif
34
35
36 /**
37 @ingroup groupStats
38 */
39
40 /**
41 @defgroup Min Minimum
42
43 Computes the minimum value of an array of data.
44 The function returns both the minimum value and its position within the array.
45 There are separate functions for floating-point, Q31, Q15, and Q7 data types.
46 */
47
48 /**
49 @addtogroup Min
50 @{
51 */
52
53 /**
54 @brief Minimum value of a floating-point vector.
55 @param[in] pSrc points to the input vector
56 @param[in] blockSize number of samples in input vector
57 @param[out] pResult minimum value returned here
58 @param[out] pIndex index of minimum value returned here
59 */
60
61 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
62
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)63 void arm_min_f32(
64 const float32_t * pSrc,
65 uint32_t blockSize,
66 float32_t * pResult,
67 uint32_t * pIndex)
68 {
69 uint32_t blkCnt; /* loop counters */
70 f32x4_t vecSrc;
71 float32_t const *pSrcVec;
72 f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
73 float32_t minValue = F32_MAX;
74 uint32_t idx = blockSize;
75 uint32x4_t indexVec;
76 uint32x4_t curExtremIdxVec;
77 float32_t tmp;
78 mve_pred16_t p0;
79
80 indexVec = vidupq_u32((uint32_t)0, 1);
81 curExtremIdxVec = vdupq_n_u32(0);
82
83 pSrcVec = (float32_t const *) pSrc;
84 /* Compute 4 outputs at a time */
85 blkCnt = blockSize >> 2U;
86 while (blkCnt > 0U)
87 {
88 vecSrc = vldrwq_f32(pSrcVec);
89 pSrcVec += 4;
90 /*
91 * Get current max per lane and current index per lane
92 * when a max is selected
93 */
94 p0 = vcmpleq(vecSrc, curExtremValVec);
95 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
96 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
97
98 indexVec = indexVec + 4;
99 /*
100 * Decrement the blockSize loop counter
101 */
102 blkCnt--;
103 }
104
105 /*
106 * Get min value across the vector
107 */
108 minValue = vminnmvq(minValue, curExtremValVec);
109 /*
110 * set index for lower values to max possible index
111 */
112 p0 = vcmpleq(curExtremValVec, minValue);
113 indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
114 /*
115 * Get min index which is thus for a max value
116 */
117 idx = vminvq(idx, indexVec);
118
119 /*
120 * tail
121 */
122 blkCnt = blockSize & 0x3;
123
124 while (blkCnt > 0U)
125 {
126 /* Initialize minVal to the next consecutive values one by one */
127 tmp = *pSrc++;
128
129 /* compare for the minimum value */
130 if (minValue > tmp)
131 {
132 /* Update the minimum value and it's index */
133 minValue = tmp;
134 idx = blockSize - blkCnt;
135 }
136 blkCnt--;
137 }
138 /*
139 * Save result
140 */
141 *pIndex = idx;
142 *pResult = minValue;
143 }
144
145 #else
146 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)147 void arm_min_f32(
148 const float32_t * pSrc,
149 uint32_t blockSize,
150 float32_t * pResult,
151 uint32_t * pIndex)
152 {
153 float32_t maxVal1, out; /* Temporary variables to store the output value. */
154 uint32_t blkCnt, outIndex; /* loop counter */
155
156 float32x4_t outV, srcV;
157 float32x2_t outV2;
158
159 uint32x4_t idxV;
160 static const uint32_t indexInit[4]={4,5,6,7};
161 static const uint32_t countVInit[4]={0,1,2,3};
162 uint32x4_t maxIdx;
163 uint32x4_t index;
164 uint32x4_t delta;
165 uint32x4_t countV;
166 uint32x2_t countV2;
167
168 maxIdx = vdupq_n_u32(UINT_MAX);
169 delta = vdupq_n_u32(4);
170 index = vld1q_u32(indexInit);
171 countV = vld1q_u32(countVInit);
172
173 /* Initialise the index value to zero. */
174 outIndex = 0U;
175
176 /* Load first input value that act as reference value for comparison */
177 if (blockSize <= 3)
178 {
179 out = *pSrc++;
180
181 blkCnt = blockSize - 1;
182
183 while (blkCnt > 0U)
184 {
185 /* Initialize maxVal to the next consecutive values one by one */
186 maxVal1 = *pSrc++;
187
188 /* compare for the maximum value */
189 if (out > maxVal1)
190 {
191 /* Update the maximum value and it's index */
192 out = maxVal1;
193 outIndex = blockSize - blkCnt;
194 }
195
196 /* Decrement the loop counter */
197 blkCnt--;
198 }
199 }
200 else
201 {
202 outV = vld1q_f32(pSrc);
203 pSrc += 4;
204
205 /* Compute 4 outputs at a time */
206 blkCnt = (blockSize - 4 ) >> 2U;
207
208 while (blkCnt > 0U)
209 {
210 srcV = vld1q_f32(pSrc);
211 pSrc += 4;
212
213 idxV = vcltq_f32(srcV, outV);
214 outV = vbslq_f32(idxV, srcV, outV );
215 countV = vbslq_u32(idxV, index,countV );
216
217 index = vaddq_u32(index,delta);
218
219 /* Decrement the loop counter */
220 blkCnt--;
221 }
222
223 outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
224 outV2 = vpmin_f32(outV2,outV2);
225 out = vget_lane_f32(outV2,0);
226
227 idxV = vceqq_f32(outV, vdupq_n_f32(out));
228 countV = vbslq_u32(idxV, countV,maxIdx);
229
230 countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
231 countV2 = vpmin_u32(countV2,countV2);
232 outIndex = vget_lane_u32(countV2,0);
233
234 /* if (blockSize - 1U) is not multiple of 4 */
235 blkCnt = (blockSize - 4 ) % 4U;
236
237 while (blkCnt > 0U)
238 {
239 /* Initialize maxVal to the next consecutive values one by one */
240 maxVal1 = *pSrc++;
241
242 /* compare for the maximum value */
243 if (out > maxVal1)
244 {
245 /* Update the maximum value and it's index */
246 out = maxVal1;
247 outIndex = blockSize - blkCnt ;
248 }
249
250 /* Decrement the loop counter */
251 blkCnt--;
252 }
253 }
254
255 /* Store the maximum value and it's index into destination pointers */
256 *pResult = out;
257 *pIndex = outIndex;
258 }
259 #else
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)260 void arm_min_f32(
261 const float32_t * pSrc,
262 uint32_t blockSize,
263 float32_t * pResult,
264 uint32_t * pIndex)
265 {
266 float32_t minVal, out; /* Temporary variables to store the output value. */
267 uint32_t blkCnt, outIndex; /* Loop counter */
268
269 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
270 uint32_t index; /* index of maximum value */
271 #endif
272
273 /* Initialise index value to zero. */
274 outIndex = 0U;
275
276 /* Load first input value that act as reference value for comparision */
277 out = *pSrc++;
278
279 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
280 /* Initialise index of maximum value. */
281 index = 0U;
282
283 /* Loop unrolling: Compute 4 outputs at a time */
284 blkCnt = (blockSize - 1U) >> 2U;
285
286 while (blkCnt > 0U)
287 {
288 /* Initialize minVal to next consecutive values one by one */
289 minVal = *pSrc++;
290
291 /* compare for the minimum value */
292 if (out > minVal)
293 {
294 /* Update the minimum value and it's index */
295 out = minVal;
296 outIndex = index + 1U;
297 }
298
299 minVal = *pSrc++;
300 if (out > minVal)
301 {
302 out = minVal;
303 outIndex = index + 2U;
304 }
305
306 minVal = *pSrc++;
307 if (out > minVal)
308 {
309 out = minVal;
310 outIndex = index + 3U;
311 }
312
313 minVal = *pSrc++;
314 if (out > minVal)
315 {
316 out = minVal;
317 outIndex = index + 4U;
318 }
319
320 index += 4U;
321
322 /* Decrement loop counter */
323 blkCnt--;
324 }
325
326 /* Loop unrolling: Compute remaining outputs */
327 blkCnt = (blockSize - 1U) % 4U;
328
329 #else
330
331 /* Initialize blkCnt with number of samples */
332 blkCnt = (blockSize - 1U);
333
334 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
335
336 while (blkCnt > 0U)
337 {
338 /* Initialize minVal to the next consecutive values one by one */
339 minVal = *pSrc++;
340
341 /* compare for the minimum value */
342 if (out > minVal)
343 {
344 /* Update the minimum value and it's index */
345 out = minVal;
346 outIndex = blockSize - blkCnt;
347 }
348
349 /* Decrement loop counter */
350 blkCnt--;
351 }
352
353 /* Store the minimum value and it's index into destination pointers */
354 *pResult = out;
355 *pIndex = outIndex;
356 }
357 #endif /* #if defined(ARM_MATH_NEON) */
358 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
359
360 /**
361 @} end of Min group
362 */
363