1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_min_f32.c
4  * Description:  Minimum value of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
32 #include <limits.h>
33 #endif
34 
35 
36 /**
37   @ingroup groupStats
38  */
39 
40 /**
41   @defgroup Min Minimum
42 
43   Computes the minimum value of an array of data.
44   The function returns both the minimum value and its position within the array.
45   There are separate functions for floating-point, Q31, Q15, and Q7 data types.
46  */
47 
48 /**
49   @addtogroup Min
50   @{
51  */
52 
53 /**
54   @brief         Minimum value of a floating-point vector.
55   @param[in]     pSrc       points to the input vector
56   @param[in]     blockSize  number of samples in input vector
57   @param[out]    pResult    minimum value returned here
58   @param[out]    pIndex     index of minimum value returned here
59  */
60 
61 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
62 
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)63 void arm_min_f32(
64   const float32_t * pSrc,
65   uint32_t blockSize,
66   float32_t * pResult,
67   uint32_t * pIndex)
68 {
69     uint32_t  blkCnt;           /* loop counters */
70     f32x4_t vecSrc;
71     float32_t const *pSrcVec;
72     f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
73     float32_t minValue = F32_MAX;
74     uint32_t  idx = blockSize;
75     uint32x4_t indexVec;
76     uint32x4_t curExtremIdxVec;
77     float32_t tmp;
78     mve_pred16_t p0;
79 
80     indexVec = vidupq_u32((uint32_t)0, 1);
81     curExtremIdxVec = vdupq_n_u32(0);
82 
83     pSrcVec = (float32_t const *) pSrc;
84     /* Compute 4 outputs at a time */
85     blkCnt = blockSize >> 2U;
86     while (blkCnt > 0U)
87     {
88         vecSrc = vldrwq_f32(pSrcVec);
89         pSrcVec += 4;
90         /*
91          * Get current max per lane and current index per lane
92          * when a max is selected
93          */
94         p0 = vcmpleq(vecSrc, curExtremValVec);
95         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
96         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
97 
98         indexVec = indexVec +  4;
99         /*
100          * Decrement the blockSize loop counter
101          */
102         blkCnt--;
103     }
104 
105     /*
106      * Get min value across the vector
107      */
108     minValue = vminnmvq(minValue, curExtremValVec);
109     /*
110      * set index for lower values to max possible index
111      */
112     p0 = vcmpleq(curExtremValVec, minValue);
113     indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
114     /*
115      * Get min index which is thus for a max value
116      */
117     idx = vminvq(idx, indexVec);
118 
119     /*
120      * tail
121      */
122     blkCnt = blockSize & 0x3;
123 
124     while (blkCnt > 0U)
125     {
126       /* Initialize minVal to the next consecutive values one by one */
127       tmp = *pSrc++;
128 
129       /* compare for the minimum value */
130       if (minValue > tmp)
131       {
132         /* Update the minimum value and it's index */
133         minValue = tmp;
134         idx = blockSize - blkCnt;
135       }
136       blkCnt--;
137     }
138     /*
139      * Save result
140      */
141     *pIndex = idx;
142     *pResult = minValue;
143 }
144 
145 #else
146 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)147 void arm_min_f32(
148   const float32_t * pSrc,
149   uint32_t blockSize,
150   float32_t * pResult,
151   uint32_t * pIndex)
152 {
153   float32_t maxVal1, out;               /* Temporary variables to store the output value. */
154   uint32_t blkCnt, outIndex;              /* loop counter */
155 
156   float32x4_t outV, srcV;
157   float32x2_t outV2;
158 
159   uint32x4_t idxV;
160   static const uint32_t indexInit[4]={4,5,6,7};
161   static const uint32_t countVInit[4]={0,1,2,3};
162   uint32x4_t maxIdx;
163   uint32x4_t index;
164   uint32x4_t delta;
165   uint32x4_t countV;
166   uint32x2_t countV2;
167 
168   maxIdx = vdupq_n_u32(UINT_MAX);
169   delta = vdupq_n_u32(4);
170   index = vld1q_u32(indexInit);
171   countV = vld1q_u32(countVInit);
172 
173   /* Initialise the index value to zero. */
174   outIndex = 0U;
175 
176   /* Load first input value that act as reference value for comparison */
177   if (blockSize <= 3)
178   {
179       out = *pSrc++;
180 
181       blkCnt = blockSize - 1;
182 
183       while (blkCnt > 0U)
184       {
185         /* Initialize maxVal to the next consecutive values one by one */
186         maxVal1 = *pSrc++;
187 
188         /* compare for the maximum value */
189         if (out > maxVal1)
190         {
191           /* Update the maximum value and it's index */
192           out = maxVal1;
193           outIndex = blockSize - blkCnt;
194         }
195 
196         /* Decrement the loop counter */
197         blkCnt--;
198       }
199   }
200   else
201   {
202       outV = vld1q_f32(pSrc);
203       pSrc += 4;
204 
205       /* Compute 4 outputs at a time */
206       blkCnt = (blockSize - 4 ) >> 2U;
207 
208       while (blkCnt > 0U)
209       {
210         srcV = vld1q_f32(pSrc);
211         pSrc += 4;
212 
213         idxV = vcltq_f32(srcV, outV);
214         outV = vbslq_f32(idxV, srcV, outV );
215         countV = vbslq_u32(idxV, index,countV );
216 
217         index = vaddq_u32(index,delta);
218 
219         /* Decrement the loop counter */
220         blkCnt--;
221       }
222 
223       outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
224       outV2 = vpmin_f32(outV2,outV2);
225       out = vget_lane_f32(outV2,0);
226 
227       idxV = vceqq_f32(outV, vdupq_n_f32(out));
228       countV = vbslq_u32(idxV, countV,maxIdx);
229 
230       countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
231       countV2 = vpmin_u32(countV2,countV2);
232       outIndex = vget_lane_u32(countV2,0);
233 
234       /* if (blockSize - 1U) is not multiple of 4 */
235       blkCnt = (blockSize - 4 ) % 4U;
236 
237       while (blkCnt > 0U)
238       {
239         /* Initialize maxVal to the next consecutive values one by one */
240         maxVal1 = *pSrc++;
241 
242         /* compare for the maximum value */
243         if (out > maxVal1)
244         {
245           /* Update the maximum value and it's index */
246           out = maxVal1;
247           outIndex = blockSize - blkCnt ;
248         }
249 
250         /* Decrement the loop counter */
251         blkCnt--;
252       }
253   }
254 
255   /* Store the maximum value and it's index into destination pointers */
256   *pResult = out;
257   *pIndex = outIndex;
258 }
259 #else
arm_min_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)260 void arm_min_f32(
261   const float32_t * pSrc,
262         uint32_t blockSize,
263         float32_t * pResult,
264         uint32_t * pIndex)
265 {
266         float32_t minVal, out;                         /* Temporary variables to store the output value. */
267         uint32_t blkCnt, outIndex;                     /* Loop counter */
268 
269 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
270         uint32_t index;                                /* index of maximum value */
271 #endif
272 
273   /* Initialise index value to zero. */
274   outIndex = 0U;
275 
276   /* Load first input value that act as reference value for comparision */
277   out = *pSrc++;
278 
279 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
280   /* Initialise index of maximum value. */
281   index = 0U;
282 
283   /* Loop unrolling: Compute 4 outputs at a time */
284   blkCnt = (blockSize - 1U) >> 2U;
285 
286   while (blkCnt > 0U)
287   {
288     /* Initialize minVal to next consecutive values one by one */
289     minVal = *pSrc++;
290 
291     /* compare for the minimum value */
292     if (out > minVal)
293     {
294       /* Update the minimum value and it's index */
295       out = minVal;
296       outIndex = index + 1U;
297     }
298 
299     minVal = *pSrc++;
300     if (out > minVal)
301     {
302       out = minVal;
303       outIndex = index + 2U;
304     }
305 
306     minVal = *pSrc++;
307     if (out > minVal)
308     {
309       out = minVal;
310       outIndex = index + 3U;
311     }
312 
313     minVal = *pSrc++;
314     if (out > minVal)
315     {
316       out = minVal;
317       outIndex = index + 4U;
318     }
319 
320     index += 4U;
321 
322     /* Decrement loop counter */
323     blkCnt--;
324   }
325 
326   /* Loop unrolling: Compute remaining outputs */
327   blkCnt = (blockSize - 1U) % 4U;
328 
329 #else
330 
331   /* Initialize blkCnt with number of samples */
332   blkCnt = (blockSize - 1U);
333 
334 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
335 
336   while (blkCnt > 0U)
337   {
338     /* Initialize minVal to the next consecutive values one by one */
339     minVal = *pSrc++;
340 
341     /* compare for the minimum value */
342     if (out > minVal)
343     {
344       /* Update the minimum value and it's index */
345       out = minVal;
346       outIndex = blockSize - blkCnt;
347     }
348 
349     /* Decrement loop counter */
350     blkCnt--;
351   }
352 
353   /* Store the minimum value and it's index into destination pointers */
354   *pResult = out;
355   *pIndex = outIndex;
356 }
357 #endif /* #if defined(ARM_MATH_NEON) */
358 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
359 
360 /**
361   @} end of Min group
362  */
363