1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_absmin_f16.c
4  * Description:  Minimum value of absolute values of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 
34 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
35 #include <limits.h>
36 #endif
37 
38 
39 /**
40   @ingroup groupStats
41  */
42 
43 /**
44   @addtogroup AbsMin
45   @{
46  */
47 
48 /**
49   @brief         Minimum value of absolute values of a floating-point vector.
50   @param[in]     pSrc       points to the input vector
51   @param[in]     blockSize  number of samples in input vector
52   @param[out]    pResult    minimum value returned here
53   @param[out]    pIndex     index of minimum value returned here
54   @return        none
55  */
56 
57 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
58 
59 #include "arm_helium_utils.h"
arm_absmin_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)60 void arm_absmin_f16(
61   const float16_t * pSrc,
62         uint32_t blockSize,
63         float16_t * pResult,
64         uint32_t * pIndex)
65 {
66     uint16_t  blkCnt;           /* loop counters */
67     f16x8_t vecSrc;
68     float16_t const *pSrcVec;
69     f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
70     float16_t minValue = F16_ABSMAX;
71     uint16_t  idx = blockSize;
72     uint16x8_t indexVec;
73     uint16x8_t curExtremIdxVec;
74     mve_pred16_t p0;
75 
76 
77     indexVec = vidupq_u16((uint32_t)0, 1);
78     curExtremIdxVec = vdupq_n_u16(0);
79 
80     pSrcVec = (float16_t const *) pSrc;
81     blkCnt = blockSize >> 3;
82     while (blkCnt > 0U)
83     {
84         vecSrc = vldrhq_f16(pSrcVec);
85         pSrcVec += 8;
86         vecSrc = vabsq(vecSrc);
87         /*
88          * Get current max per lane and current index per lane
89          * when a max is selected
90          */
91         p0 = vcmpleq(vecSrc, curExtremValVec);
92         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
93         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
94 
95         indexVec = indexVec +  8;
96         /*
97          * Decrement the blockSize loop counter
98          */
99         blkCnt--;
100     }
101     /*
102      * tail
103      * (will be merged thru tail predication)
104      */
105     blkCnt = blockSize & 7;
106     if (blkCnt > 0U)
107     {
108         p0 = vctp16q(blkCnt);
109 
110         vecSrc = vldrhq_f16(pSrcVec);
111         pSrcVec += 8;
112         vecSrc = vabsq(vecSrc);
113         /*
114          * Get current max per lane and current index per lane
115          * when a max is selected
116          */
117         p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
118         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
119         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
120     }
121     /*
122      * Get min value across the vector
123      */
124     minValue = vminnmvq(minValue, curExtremValVec);
125     /*
126      * set index for lower values to max possible index
127      */
128     p0 = vcmpleq(curExtremValVec, minValue);
129     indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
130     /*
131      * Get min index which is thus for a max value
132      */
133     idx = vminvq(idx, indexVec);
134     /*
135      * Save result
136      */
137     *pIndex = idx;
138     *pResult = minValue;
139 }
140 
141 #else
142 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmin_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)143 void arm_absmin_f16(
144   const float16_t * pSrc,
145         uint32_t blockSize,
146         float16_t * pResult,
147         uint32_t * pIndex)
148 {
149         float16_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
150         uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
151         uint32_t index;                                /* index of maximum value */                         \
152                                                                                                             \
153   /* Initialize index value to zero. */                                                                     \
154   outIndex = 0U;                                                                                            \
155   /* Load first input value that act as reference value for comparision */                                  \
156   out = *pSrc++;                                                                                            \
157   out = (out > 0.0f16) ? out : -out;                                                                             \
158   /* Initialize index of extrema value. */                                                                  \
159   index = 0U;                                                                                               \
160                                                                                                             \
161   /* Loop unrolling: Compute 4 outputs at a time */                                                         \
162   blkCnt = (blockSize - 1U) >> 2U;                                                                          \
163                                                                                                             \
164   while (blkCnt > 0U)                                                                                       \
165   {                                                                                                         \
166     /* Initialize cur_absmin to next consecutive values one by one */                                         \
167     cur_absmin = *pSrc++;                                                                                     \
168     cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
169     /* compare for the extrema value */                                                                     \
170     if (cur_absmin < out)                                                                         \
171     {                                                                                                       \
172       /* Update the extrema value and it's index */                                                         \
173       out = cur_absmin;                                                                                       \
174       outIndex = index + 1U;                                                                                \
175     }                                                                                                       \
176                                                                                                             \
177     cur_absmin = *pSrc++;                                                                                     \
178     cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
179     if (cur_absmin < out)                                                                         \
180     {                                                                                                       \
181       out = cur_absmin;                                                                                       \
182       outIndex = index + 2U;                                                                                \
183     }                                                                                                       \
184                                                                                                             \
185     cur_absmin = *pSrc++;                                                                                     \
186     cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
187     if (cur_absmin < out)                                                                          \
188     {                                                                                                       \
189       out = cur_absmin;                                                                                       \
190       outIndex = index + 3U;                                                                                \
191     }                                                                                                       \
192                                                                                                             \
193     cur_absmin = *pSrc++;                                                                                     \
194     cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
195     if (cur_absmin < out)                                                                          \
196     {                                                                                                       \
197       out = cur_absmin;                                                                                       \
198       outIndex = index + 4U;                                                                                \
199     }                                                                                                       \
200                                                                                                             \
201     index += 4U;                                                                                            \
202                                                                                                             \
203     /* Decrement loop counter */                                                                            \
204     blkCnt--;                                                                                               \
205   }                                                                                                         \
206                                                                                                             \
207   /* Loop unrolling: Compute remaining outputs */                                                           \
208   blkCnt = (blockSize - 1U) % 4U;                                                                           \
209                                                                                                             \
210                                                                                                             \
211   while (blkCnt > 0U)                                                                                       \
212   {                                                                                                         \
213     cur_absmin = *pSrc++;                                                                                     \
214     cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
215     if (cur_absmin < out)                                                                         \
216     {                                                                                                       \
217       out = cur_absmin;                                                                                       \
218       outIndex = blockSize - blkCnt;                                                                        \
219     }                                                                                                       \
220                                                                                                             \
221     /* Decrement loop counter */                                                                            \
222     blkCnt--;                                                                                               \
223   }                                                                                                         \
224                                                                                                             \
225   /* Store the extrema value and it's index into destination pointers */                                    \
226   *pResult = out;                                                                                           \
227   *pIndex = outIndex;
228 }
229 #else
arm_absmin_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)230 void arm_absmin_f16(
231   const float16_t * pSrc,
232         uint32_t blockSize,
233         float16_t * pResult,
234         uint32_t * pIndex)
235 {
236         float16_t minVal, out;                         /* Temporary variables to store the output value. */
237         uint32_t blkCnt, outIndex;                     /* Loop counter */
238 
239   /* Initialise index value to zero. */
240   outIndex = 0U;
241 
242   /* Load first input value that act as reference value for comparision */
243   out = fabsf(*pSrc++);
244 
245   /* Initialize blkCnt with number of samples */
246   blkCnt = (blockSize - 1U);
247 
248   while (blkCnt > 0U)
249   {
250     /* Initialize minVal to the next consecutive values one by one */
251     minVal = fabsf(*pSrc++);
252 
253     /* compare for the minimum value */
254     if (out > minVal)
255     {
256       /* Update the minimum value and it's index */
257       out = minVal;
258       outIndex = blockSize - blkCnt;
259     }
260 
261     /* Decrement loop counter */
262     blkCnt--;
263   }
264 
265   /* Store the minimum value and it's index into destination pointers */
266   *pResult = out;
267   *pIndex = outIndex;
268 }
269 #endif /* defined(ARM_MATH_LOOPUNROLL) */
270 #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
271 /**
272   @} end of AbsMin group
273  */
274 
275 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
276 
277