1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_min_f32.c
4  * Description:  Minimum value of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
32 #include <limits.h>
33 #endif
34 
35 
36 /**
37   @ingroup groupStats
38  */
39 
40 /**
41   @defgroup Min Minimum
42 
43   Computes the minimum value of an array of data.
44   The function returns both the minimum value and its position within the array.
45   There are separate functions for floating-point, Q31, Q15, and Q7 data types.
46  */
47 
48 /**
49   @addtogroup Min
50   @{
51  */
52 
53 /**
54   @brief         Minimum value of a floating-point vector.
55   @param[in]     pSrc       points to the input vector
56   @param[in]     blockSize  number of samples in input vector
57   @param[out]    pResult    minimum value returned here
58   @param[out]    pIndex     index of minimum value returned here
59   @return        none
60  */
61 
62 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
63 
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)64 void arm_min_f32(
65   const float32_t * pSrc,
66   uint32_t blockSize,
67   float32_t * pResult,
68   uint32_t * pIndex)
69 {
70     uint32_t  blkCnt;           /* loop counters */
71     f32x4_t vecSrc;
72     float32_t const *pSrcVec;
73     f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
74     float32_t minValue = F32_MAX;
75     uint32_t  idx = blockSize;
76     uint32x4_t indexVec;
77     uint32x4_t curExtremIdxVec;
78     float32_t tmp;
79     mve_pred16_t p0;
80 
81     indexVec = vidupq_u32((uint32_t)0, 1);
82     curExtremIdxVec = vdupq_n_u32(0);
83 
84     pSrcVec = (float32_t const *) pSrc;
85     /* Compute 4 outputs at a time */
86     blkCnt = blockSize >> 2U;
87     while (blkCnt > 0U)
88     {
89         vecSrc = vldrwq_f32(pSrcVec);
90         pSrcVec += 4;
91         /*
92          * Get current max per lane and current index per lane
93          * when a max is selected
94          */
95         p0 = vcmpleq(vecSrc, curExtremValVec);
96         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
97         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
98 
99         indexVec = indexVec +  4;
100         /*
101          * Decrement the blockSize loop counter
102          */
103         blkCnt--;
104     }
105 
106     /*
107      * Get min value across the vector
108      */
109     minValue = vminnmvq(minValue, curExtremValVec);
110     /*
111      * set index for lower values to max possible index
112      */
113     p0 = vcmpleq(curExtremValVec, minValue);
114     indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
115     /*
116      * Get min index which is thus for a max value
117      */
118     idx = vminvq(idx, indexVec);
119 
120     /*
121      * tail
122      */
123     blkCnt = blockSize & 0x3;
124 
125     while (blkCnt > 0U)
126     {
127       /* Initialize minVal to the next consecutive values one by one */
128       tmp = *pSrc++;
129 
130       /* compare for the minimum value */
131       if (minValue > tmp)
132       {
133         /* Update the minimum value and it's index */
134         minValue = tmp;
135         idx = blockSize - blkCnt;
136       }
137       blkCnt--;
138     }
139     /*
140      * Save result
141      */
142     *pIndex = idx;
143     *pResult = minValue;
144 }
145 
146 #else
147 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)148 void arm_min_f32(
149   const float32_t * pSrc,
150   uint32_t blockSize,
151   float32_t * pResult,
152   uint32_t * pIndex)
153 {
154   float32_t maxVal1, out;               /* Temporary variables to store the output value. */
155   uint32_t blkCnt, outIndex;              /* loop counter */
156 
157   float32x4_t outV, srcV;
158   float32x2_t outV2;
159 
160   uint32x4_t idxV;
161   static const uint32_t indexInit[4]={4,5,6,7};
162   static const uint32_t countVInit[4]={0,1,2,3};
163   uint32x4_t maxIdx;
164   uint32x4_t index;
165   uint32x4_t delta;
166   uint32x4_t countV;
167   uint32x2_t countV2;
168 
169   maxIdx = vdupq_n_u32(ULONG_MAX);
170   delta = vdupq_n_u32(4);
171   index = vld1q_u32(indexInit);
172   countV = vld1q_u32(countVInit);
173 
174   /* Initialise the index value to zero. */
175   outIndex = 0U;
176 
177   /* Load first input value that act as reference value for comparison */
178   if (blockSize <= 3)
179   {
180       out = *pSrc++;
181 
182       blkCnt = blockSize - 1;
183 
184       while (blkCnt > 0U)
185       {
186         /* Initialize maxVal to the next consecutive values one by one */
187         maxVal1 = *pSrc++;
188 
189         /* compare for the maximum value */
190         if (out > maxVal1)
191         {
192           /* Update the maximum value and it's index */
193           out = maxVal1;
194           outIndex = blockSize - blkCnt;
195         }
196 
197         /* Decrement the loop counter */
198         blkCnt--;
199       }
200   }
201   else
202   {
203       outV = vld1q_f32(pSrc);
204       pSrc += 4;
205 
206       /* Compute 4 outputs at a time */
207       blkCnt = (blockSize - 4 ) >> 2U;
208 
209       while (blkCnt > 0U)
210       {
211         srcV = vld1q_f32(pSrc);
212         pSrc += 4;
213 
214         idxV = vcltq_f32(srcV, outV);
215         outV = vbslq_f32(idxV, srcV, outV );
216         countV = vbslq_u32(idxV, index,countV );
217 
218         index = vaddq_u32(index,delta);
219 
220         /* Decrement the loop counter */
221         blkCnt--;
222       }
223 
224       outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
225       outV2 = vpmin_f32(outV2,outV2);
226       out = vget_lane_f32(outV2,0);
227 
228       idxV = vceqq_f32(outV, vdupq_n_f32(out));
229       countV = vbslq_u32(idxV, countV,maxIdx);
230 
231       countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
232       countV2 = vpmin_u32(countV2,countV2);
233       outIndex = vget_lane_u32(countV2,0);
234 
235       /* if (blockSize - 1U) is not multiple of 4 */
236       blkCnt = (blockSize - 4 ) % 4U;
237 
238       while (blkCnt > 0U)
239       {
240         /* Initialize maxVal to the next consecutive values one by one */
241         maxVal1 = *pSrc++;
242 
243         /* compare for the maximum value */
244         if (out > maxVal1)
245         {
246           /* Update the maximum value and it's index */
247           out = maxVal1;
248           outIndex = blockSize - blkCnt ;
249         }
250 
251         /* Decrement the loop counter */
252         blkCnt--;
253       }
254   }
255 
256   /* Store the maximum value and it's index into destination pointers */
257   *pResult = out;
258   *pIndex = outIndex;
259 }
260 #else
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)261 void arm_min_f32(
262   const float32_t * pSrc,
263         uint32_t blockSize,
264         float32_t * pResult,
265         uint32_t * pIndex)
266 {
267         float32_t minVal, out;                         /* Temporary variables to store the output value. */
268         uint32_t blkCnt, outIndex;                     /* Loop counter */
269 
270 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
271         uint32_t index;                                /* index of maximum value */
272 #endif
273 
274   /* Initialise index value to zero. */
275   outIndex = 0U;
276 
277   /* Load first input value that act as reference value for comparision */
278   out = *pSrc++;
279 
280 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
281   /* Initialise index of maximum value. */
282   index = 0U;
283 
284   /* Loop unrolling: Compute 4 outputs at a time */
285   blkCnt = (blockSize - 1U) >> 2U;
286 
287   while (blkCnt > 0U)
288   {
289     /* Initialize minVal to next consecutive values one by one */
290     minVal = *pSrc++;
291 
292     /* compare for the minimum value */
293     if (out > minVal)
294     {
295       /* Update the minimum value and it's index */
296       out = minVal;
297       outIndex = index + 1U;
298     }
299 
300     minVal = *pSrc++;
301     if (out > minVal)
302     {
303       out = minVal;
304       outIndex = index + 2U;
305     }
306 
307     minVal = *pSrc++;
308     if (out > minVal)
309     {
310       out = minVal;
311       outIndex = index + 3U;
312     }
313 
314     minVal = *pSrc++;
315     if (out > minVal)
316     {
317       out = minVal;
318       outIndex = index + 4U;
319     }
320 
321     index += 4U;
322 
323     /* Decrement loop counter */
324     blkCnt--;
325   }
326 
327   /* Loop unrolling: Compute remaining outputs */
328   blkCnt = (blockSize - 1U) % 4U;
329 
330 #else
331 
332   /* Initialize blkCnt with number of samples */
333   blkCnt = (blockSize - 1U);
334 
335 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
336 
337   while (blkCnt > 0U)
338   {
339     /* Initialize minVal to the next consecutive values one by one */
340     minVal = *pSrc++;
341 
342     /* compare for the minimum value */
343     if (out > minVal)
344     {
345       /* Update the minimum value and it's index */
346       out = minVal;
347       outIndex = blockSize - blkCnt;
348     }
349 
350     /* Decrement loop counter */
351     blkCnt--;
352   }
353 
354   /* Store the minimum value and it's index into destination pointers */
355   *pResult = out;
356   *pIndex = outIndex;
357 }
358 #endif /* #if defined(ARM_MATH_NEON) */
359 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
360 
361 /**
362   @} end of Min group
363  */
364