1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_max_f16.c
4  * Description:  Maximum value of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
34 #include <limits.h>
35 #endif
36 
37 /**
38   @ingroup groupStats
39  */
40 
41 
42 /**
43   @addtogroup Max
44   @{
45  */
46 
47 /**
48   @brief         Maximum value of a floating-point vector.
49   @param[in]     pSrc       points to the input vector
50   @param[in]     blockSize  number of samples in input vector
51   @param[out]    pResult    maximum value returned here
52   @param[out]    pIndex     index of maximum value returned here
53   @return        none
54  */
55 
56 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
57 
arm_max_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)58 void arm_max_f16(
59   const float16_t * pSrc,
60   uint32_t blockSize,
61   float16_t * pResult,
62   uint32_t * pIndex)
63 {
64      int32_t blkCnt;
65     f16x8_t vecSrc;
66     f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
67     float16_t maxValue = F16_MIN;
68     uint32_t idx = blockSize;
69     uint16x8_t indexVec;
70     uint16x8_t curExtremIdxVec;
71     uint32_t curIdx = 0;
72     mve_pred16_t p0;
73     float16_t tmp;
74 
75 
76     indexVec = vidupq_wb_u16(&curIdx, 1);
77     curExtremIdxVec = vdupq_n_u16(0);
78 
79     /* Compute 4 outputs at a time */
80     blkCnt = blockSize >> 3;
81     while (blkCnt > 0)
82     {
83         vecSrc = vldrhq_f16(pSrc);
84         /*
85          * Get current max per lane and current index per lane
86          * when a max is selected
87          */
88         p0 = vcmpgeq(vecSrc, curExtremValVec);
89         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
90         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
91 
92         indexVec = vidupq_wb_u16(&curIdx, 1);
93 
94         pSrc += 8;
95         /* Decrement the loop counter */
96         blkCnt--;
97     }
98 
99 
100     /*
101      * Get max value across the vector
102      */
103     maxValue = vmaxnmvq(maxValue, curExtremValVec);
104     /*
105      * set index for lower values to max possible index
106      */
107     p0 = vcmpgeq(curExtremValVec, maxValue);
108     indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
109     /*
110      * Get min index which is thus for a max value
111      */
112     idx = vminvq(idx, indexVec);
113 
114     /* Tail */
115     blkCnt = blockSize & 7;
116 
117     while (blkCnt > 0)
118     {
119       /* Initialize tmp to the next consecutive values one by one */
120       tmp = *pSrc++;
121 
122       /* compare for the maximum value */
123       if (maxValue < tmp)
124       {
125         /* Update the maximum value and it's index */
126         maxValue = tmp;
127         idx = blockSize - blkCnt;
128       }
129 
130       /* Decrement loop counter */
131       blkCnt--;
132     }
133 
134     /*
135      * Save result
136      */
137     *pIndex = idx;
138     *pResult = maxValue;
139 }
140 
141 #else
arm_max_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)142 void arm_max_f16(
143   const float16_t * pSrc,
144         uint32_t blockSize,
145         float16_t * pResult,
146         uint32_t * pIndex)
147 {
148         float16_t maxVal, out;                         /* Temporary variables to store the output value. */
149         uint32_t blkCnt, outIndex;                     /* Loop counter */
150 
151 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
152         uint32_t index;                                /* index of maximum value */
153 #endif
154 
155   /* Initialise index value to zero. */
156   outIndex = 0U;
157 
158   /* Load first input value that act as reference value for comparision */
159   out = *pSrc++;
160 
161 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
162   /* Initialise index of maximum value. */
163   index = 0U;
164 
165   /* Loop unrolling: Compute 4 outputs at a time */
166   blkCnt = (blockSize - 1U) >> 2U;
167 
168   while (blkCnt > 0U)
169   {
170     /* Initialize maxVal to next consecutive values one by one */
171     maxVal = *pSrc++;
172 
173     /* compare for the maximum value */
174     if (out < maxVal)
175     {
176       /* Update the maximum value and it's index */
177       out = maxVal;
178       outIndex = index + 1U;
179     }
180 
181     maxVal = *pSrc++;
182     if (out < maxVal)
183     {
184       out = maxVal;
185       outIndex = index + 2U;
186     }
187 
188     maxVal = *pSrc++;
189     if (out < maxVal)
190     {
191       out = maxVal;
192       outIndex = index + 3U;
193     }
194 
195     maxVal = *pSrc++;
196     if (out < maxVal)
197     {
198       out = maxVal;
199       outIndex = index + 4U;
200     }
201 
202     index += 4U;
203 
204     /* Decrement loop counter */
205     blkCnt--;
206   }
207 
208   /* Loop unrolling: Compute remaining outputs */
209   blkCnt = (blockSize - 1U) % 4U;
210 
211 #else
212 
213   /* Initialize blkCnt with number of samples */
214   blkCnt = (blockSize - 1U);
215 
216 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
217 
218   while (blkCnt > 0U)
219   {
220     /* Initialize maxVal to the next consecutive values one by one */
221     maxVal = *pSrc++;
222 
223     /* compare for the maximum value */
224     if (out < maxVal)
225     {
226       /* Update the maximum value and it's index */
227       out = maxVal;
228       outIndex = blockSize - blkCnt;
229     }
230 
231     /* Decrement loop counter */
232     blkCnt--;
233   }
234 
235   /* Store the maximum value and it's index into destination pointers */
236   *pResult = out;
237   *pIndex = outIndex;
238 }
239 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
240 
241 /**
242   @} end of Max group
243  */
244 
245 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
246 
247