1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_min_f16.c
4  * Description:  Minimum value of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 
34 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
35 #include <limits.h>
36 #endif
37 
38 
39 /**
40   @ingroup groupStats
41  */
42 
43 /**
44   @addtogroup Min
45   @{
46  */
47 
48 /**
49   @brief         Minimum value of a floating-point vector.
50   @param[in]     pSrc       points to the input vector
51   @param[in]     blockSize  number of samples in input vector
52   @param[out]    pResult    minimum value returned here
53   @param[out]    pIndex     index of minimum value returned here
54   @return        none
55  */
56 
57 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
58 
arm_min_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)59 void arm_min_f16(
60   const float16_t * pSrc,
61   uint32_t blockSize,
62   float16_t * pResult,
63   uint32_t * pIndex)
64 {
65     int32_t  blkCnt;           /* loop counters */
66     f16x8_t vecSrc;
67     float16_t const *pSrcVec;
68     f16x8_t curExtremValVec = vdupq_n_f16(F16_MAX);
69     float16_t minValue = F16_MAX;
70     uint32_t  idx = blockSize;
71     uint16x8_t indexVec;
72     uint16x8_t curExtremIdxVec;
73     mve_pred16_t p0;
74 
75     indexVec = vidupq_u16((uint32_t)0, 1);
76     curExtremIdxVec = vdupq_n_u16(0);
77 
78     pSrcVec = (float16_t const *) pSrc;
79     blkCnt = blockSize >> 3;
80     while (blkCnt > 0)
81     {
82         vecSrc = vldrhq_f16(pSrcVec);  pSrcVec += 8;
83         /*
84          * Get current min per lane and current index per lane
85          * when a min is selected
86          */
87         p0 = vcmpleq(vecSrc, curExtremValVec);
88         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
89         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
90 
91         indexVec = indexVec + 8;
92         /*
93          * Decrement the blockSize loop counter
94          */
95         blkCnt--;
96     }
97     /*
98      * tail
99      * (will be merged thru tail predication)
100      */
101     blkCnt = blockSize & 7;
102     if (blkCnt > 0)
103     {
104         vecSrc = vldrhq_f16(pSrcVec);  pSrcVec += 8;
105         p0 = vctp16q(blkCnt);
106         /*
107          * Get current min per lane and current index per lane
108          * when a min is selected
109          */
110         p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
111         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
112         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
113     }
114     /*
115      * Get min value across the vector
116      */
117     minValue = vminnmvq(minValue, curExtremValVec);
118     /*
119      * set index for lower values to min possible index
120      */
121     p0 = vcmpleq(curExtremValVec, minValue);
122     indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
123     /*
124      * Get min index which is thus for a min value
125      */
126     idx = vminvq(idx, indexVec);
127     /*
128      * Save result
129      */
130     *pIndex = idx;
131     *pResult = minValue;
132 }
133 
134 #else
135 
arm_min_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)136 void arm_min_f16(
137   const float16_t * pSrc,
138         uint32_t blockSize,
139         float16_t * pResult,
140         uint32_t * pIndex)
141 {
142         float16_t minVal, out;                         /* Temporary variables to store the output value. */
143         uint32_t blkCnt, outIndex;                     /* Loop counter */
144 
145 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
146         uint32_t index;                                /* index of maximum value */
147 #endif
148 
149   /* Initialise index value to zero. */
150   outIndex = 0U;
151 
152   /* Load first input value that act as reference value for comparision */
153   out = *pSrc++;
154 
155 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
156   /* Initialise index of maximum value. */
157   index = 0U;
158 
159   /* Loop unrolling: Compute 4 outputs at a time */
160   blkCnt = (blockSize - 1U) >> 2U;
161 
162   while (blkCnt > 0U)
163   {
164     /* Initialize minVal to next consecutive values one by one */
165     minVal = *pSrc++;
166 
167     /* compare for the minimum value */
168     if (out > minVal)
169     {
170       /* Update the minimum value and it's index */
171       out = minVal;
172       outIndex = index + 1U;
173     }
174 
175     minVal = *pSrc++;
176     if (out > minVal)
177     {
178       out = minVal;
179       outIndex = index + 2U;
180     }
181 
182     minVal = *pSrc++;
183     if (out > minVal)
184     {
185       out = minVal;
186       outIndex = index + 3U;
187     }
188 
189     minVal = *pSrc++;
190     if (out > minVal)
191     {
192       out = minVal;
193       outIndex = index + 4U;
194     }
195 
196     index += 4U;
197 
198     /* Decrement loop counter */
199     blkCnt--;
200   }
201 
202   /* Loop unrolling: Compute remaining outputs */
203   blkCnt = (blockSize - 1U) % 4U;
204 
205 #else
206 
207   /* Initialize blkCnt with number of samples */
208   blkCnt = (blockSize - 1U);
209 
210 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
211 
212   while (blkCnt > 0U)
213   {
214     /* Initialize minVal to the next consecutive values one by one */
215     minVal = *pSrc++;
216 
217     /* compare for the minimum value */
218     if (out > minVal)
219     {
220       /* Update the minimum value and it's index */
221       out = minVal;
222       outIndex = blockSize - blkCnt;
223     }
224 
225     /* Decrement loop counter */
226     blkCnt--;
227   }
228 
229   /* Store the minimum value and it's index into destination pointers */
230   *pResult = out;
231   *pIndex = outIndex;
232 }
233 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
234 
235 /**
236   @} end of Min group
237  */
238 
239 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
240 
241