1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_absmax_f16.c
4  * Description:  Maximum value of a absolute values of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
34 #include <limits.h>
35 #endif
36 
37 /**
38   @ingroup groupStats
39  */
40 
41 
42 /**
43   @addtogroup AbsMax
44   @{
45  */
46 
47 /**
48   @brief         Maximum value of absolute values of a floating-point vector.
49   @param[in]     pSrc       points to the input vector
50   @param[in]     blockSize  number of samples in input vector
51   @param[out]    pResult    maximum value returned here
52   @param[out]    pIndex     index of maximum value returned here
53  */
54 
55 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
56 
57 #include "arm_helium_utils.h"
arm_absmax_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)58 ARM_DSP_ATTRIBUTE void arm_absmax_f16(
59   const float16_t * pSrc,
60         uint32_t blockSize,
61         float16_t * pResult,
62         uint32_t * pIndex)
63 {
64       uint16_t        blkCnt;           /* loop counters */
65     f16x8_t       vecSrc;
66     float16_t const *pSrcVec;
67     f16x8_t       curExtremValVec = vdupq_n_f16(F16_ABSMIN);
68     float16_t       maxValue = F16_ABSMIN;
69     uint16_t        idx = blockSize;
70     uint16x8_t    indexVec;
71     uint16x8_t    curExtremIdxVec;
72     mve_pred16_t    p0;
73 
74 
75     indexVec = vidupq_u16((uint32_t)0, 1);
76     curExtremIdxVec = vdupq_n_u16(0);
77 
78     pSrcVec = (float16_t const *) pSrc;
79     blkCnt = blockSize >> 3;
80     while (blkCnt > 0U)
81     {
82         vecSrc = vldrhq_f16(pSrcVec);
83         pSrcVec += 8;
84         vecSrc = vabsq(vecSrc);
85         /*
86          * Get current max per lane and current index per lane
87          * when a max is selected
88          */
89         p0 = vcmpgeq(vecSrc, curExtremValVec);
90         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
91         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
92 
93         indexVec = indexVec +  8;
94         /*
95          * Decrement the blockSize loop counter
96          */
97         blkCnt--;
98     }
99     /*
100      * tail
101      * (will be merged thru tail predication)
102      */
103     blkCnt = blockSize & 7;
104     if (blkCnt > 0U)
105     {
106         vecSrc = vldrhq_f16(pSrcVec);
107         pSrcVec += 8;
108         vecSrc = vabsq(vecSrc);
109 
110         p0 = vctp16q(blkCnt);
111         /*
112          * Get current max per lane and current index per lane
113          * when a max is selected
114          */
115         p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0);
116         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
117         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
118     }
119     /*
120      * Get max value across the vector
121      */
122     maxValue = vmaxnmvq(maxValue, curExtremValVec);
123     /*
124      * set index for lower values to max possible index
125      */
126     p0 = vcmpgeq(curExtremValVec, maxValue);
127     indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
128     /*
129      * Get min index which is thus for a max value
130      */
131     idx = vminvq(idx, indexVec);
132     /*
133      * Save result
134      */
135     *pIndex = idx;
136     *pResult = maxValue;
137 }
138 #else
139 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmax_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)140 ARM_DSP_ATTRIBUTE void arm_absmax_f16(
141   const float16_t * pSrc,
142         uint32_t blockSize,
143         float16_t * pResult,
144         uint32_t * pIndex)
145 {
146         float16_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
147         uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
148         uint32_t index;                                /* index of maximum value */                         \
149                                                                                                             \
150   /* Initialize index value to zero. */                                                                     \
151   outIndex = 0U;                                                                                            \
152   /* Load first input value that act as reference value for comparision */                                  \
153   out = *pSrc++;                                                                                            \
154   out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
155   /* Initialize index of extrema value. */                                                                  \
156   index = 0U;                                                                                               \
157                                                                                                             \
158   /* Loop unrolling: Compute 4 outputs at a time */                                                         \
159   blkCnt = (blockSize - 1U) >> 2U;                                                                          \
160                                                                                                             \
161   while (blkCnt > 0U)                                                                                       \
162   {                                                                                                         \
163     /* Initialize cur_absmax to next consecutive values one by one */                                         \
164     cur_absmax = *pSrc++;                                                                                     \
165     cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
166     /* compare for the extrema value */                                                                     \
167     if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
168     {                                                                                                       \
169       /* Update the extrema value and it's index */                                                         \
170       out = cur_absmax;                                                                                       \
171       outIndex = index + 1U;                                                                                \
172     }                                                                                                       \
173                                                                                                             \
174     cur_absmax = *pSrc++;                                                                                     \
175     cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
176     if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
177     {                                                                                                       \
178       out = cur_absmax;                                                                                       \
179       outIndex = index + 2U;                                                                                \
180     }                                                                                                       \
181                                                                                                             \
182     cur_absmax = *pSrc++;                                                                                     \
183     cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
184     if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
185     {                                                                                                       \
186       out = cur_absmax;                                                                                       \
187       outIndex = index + 3U;                                                                                \
188     }                                                                                                       \
189                                                                                                             \
190     cur_absmax = *pSrc++;                                                                                     \
191     cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
192     if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
193     {                                                                                                       \
194       out = cur_absmax;                                                                                       \
195       outIndex = index + 4U;                                                                                \
196     }                                                                                                       \
197                                                                                                             \
198     index += 4U;                                                                                            \
199                                                                                                             \
200     /* Decrement loop counter */                                                                            \
201     blkCnt--;                                                                                               \
202   }                                                                                                         \
203                                                                                                             \
204   /* Loop unrolling: Compute remaining outputs */                                                           \
205   blkCnt = (blockSize - 1U) % 4U;                                                                           \
206                                                                                                             \
207                                                                                                             \
208   while (blkCnt > 0U)                                                                                       \
209   {                                                                                                         \
210     cur_absmax = *pSrc++;                                                                                     \
211     cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
212     if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
213     {                                                                                                       \
214       out = cur_absmax;                                                                                       \
215       outIndex = blockSize - blkCnt;                                                                        \
216     }                                                                                                       \
217                                                                                                             \
218     /* Decrement loop counter */                                                                            \
219     blkCnt--;                                                                                               \
220   }                                                                                                         \
221                                                                                                             \
222   /* Store the extrema value and it's index into destination pointers */                                    \
223   *pResult = out;                                                                                           \
224   *pIndex = outIndex;
225 }
226 #else
arm_absmax_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)227 ARM_DSP_ATTRIBUTE void arm_absmax_f16(
228   const float16_t * pSrc,
229         uint32_t blockSize,
230         float16_t * pResult,
231         uint32_t * pIndex)
232 {
233         float16_t maxVal, out;                         /* Temporary variables to store the output value. */
234         uint32_t blkCnt, outIndex;                     /* Loop counter */
235 
236   /* Initialise index value to zero. */
237   outIndex = 0U;
238 
239   /* Load first input value that act as reference value for comparision */
240   out = (_Float16)fabsf((float32_t)*pSrc++);
241 
242   /* Initialize blkCnt with number of samples */
243   blkCnt = (blockSize - 1U);
244 
245   while (blkCnt > 0U)
246   {
247     /* Initialize maxVal to the next consecutive values one by one */
248     maxVal = (_Float16)fabsf((float32_t)*pSrc++);
249 
250     /* compare for the maximum value */
251     if ((_Float16)out < (_Float16)maxVal)
252     {
253       /* Update the maximum value and it's index */
254       out = maxVal;
255       outIndex = blockSize - blkCnt;
256     }
257 
258     /* Decrement loop counter */
259     blkCnt--;
260   }
261 
262   /* Store the maximum value and it's index into destination pointers */
263   *pResult = out;
264   *pIndex = outIndex;
265 }
266 #endif /* defined(ARM_MATH_LOOPUNROLL) */
267 #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
268 /**
269   @} end of AbsMax group
270  */
271 
272 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
273 
274