1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_max_f16.c
4 * Description: Maximum value of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
34 #include <limits.h>
35 #endif
36
37 /**
38 @ingroup groupStats
39 */
40
41
42 /**
43 @addtogroup Max
44 @{
45 */
46
47 /**
48 @brief Maximum value of a floating-point vector.
49 @param[in] pSrc points to the input vector
50 @param[in] blockSize number of samples in input vector
51 @param[out] pResult maximum value returned here
52 @param[out] pIndex index of maximum value returned here
53 */
54
55 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
56
arm_max_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)57 ARM_DSP_ATTRIBUTE void arm_max_f16(
58 const float16_t * pSrc,
59 uint32_t blockSize,
60 float16_t * pResult,
61 uint32_t * pIndex)
62 {
63 int32_t blkCnt;
64 f16x8_t vecSrc;
65 f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
66 float16_t maxValue = F16_MIN;
67 uint32_t idx = blockSize;
68 uint16x8_t indexVec;
69 uint16x8_t curExtremIdxVec;
70 uint32_t curIdx = 0;
71 mve_pred16_t p0;
72 float16_t tmp;
73
74
75 indexVec = vidupq_wb_u16(&curIdx, 1);
76 curExtremIdxVec = vdupq_n_u16(0);
77
78 /* Compute 4 outputs at a time */
79 blkCnt = blockSize >> 3;
80 while (blkCnt > 0)
81 {
82 vecSrc = vldrhq_f16(pSrc);
83 /*
84 * Get current max per lane and current index per lane
85 * when a max is selected
86 */
87 p0 = vcmpgeq(vecSrc, curExtremValVec);
88 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
89 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
90
91 indexVec = vidupq_wb_u16(&curIdx, 1);
92
93 pSrc += 8;
94 /* Decrement the loop counter */
95 blkCnt--;
96 }
97
98
99 /*
100 * Get max value across the vector
101 */
102 maxValue = vmaxnmvq(maxValue, curExtremValVec);
103 /*
104 * set index for lower values to max possible index
105 */
106 p0 = vcmpgeq(curExtremValVec, maxValue);
107 indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
108 /*
109 * Get min index which is thus for a max value
110 */
111 idx = vminvq(idx, indexVec);
112
113 /* Tail */
114 blkCnt = blockSize & 7;
115
116 while (blkCnt > 0)
117 {
118 /* Initialize tmp to the next consecutive values one by one */
119 tmp = *pSrc++;
120
121 /* compare for the maximum value */
122 if ((_Float16)maxValue < (_Float16)tmp)
123 {
124 /* Update the maximum value and it's index */
125 maxValue = tmp;
126 idx = blockSize - blkCnt;
127 }
128
129 /* Decrement loop counter */
130 blkCnt--;
131 }
132
133 /*
134 * Save result
135 */
136 *pIndex = idx;
137 *pResult = maxValue;
138 }
139
140 #else
arm_max_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)141 ARM_DSP_ATTRIBUTE void arm_max_f16(
142 const float16_t * pSrc,
143 uint32_t blockSize,
144 float16_t * pResult,
145 uint32_t * pIndex)
146 {
147 float16_t maxVal, out; /* Temporary variables to store the output value. */
148 uint32_t blkCnt, outIndex; /* Loop counter */
149
150 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
151 uint32_t index; /* index of maximum value */
152 #endif
153
154 /* Initialise index value to zero. */
155 outIndex = 0U;
156
157 /* Load first input value that act as reference value for comparision */
158 out = *pSrc++;
159
160 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
161 /* Initialise index of maximum value. */
162 index = 0U;
163
164 /* Loop unrolling: Compute 4 outputs at a time */
165 blkCnt = (blockSize - 1U) >> 2U;
166
167 while (blkCnt > 0U)
168 {
169 /* Initialize maxVal to next consecutive values one by one */
170 maxVal = *pSrc++;
171
172 /* compare for the maximum value */
173 if ((_Float16)out < (_Float16)maxVal)
174 {
175 /* Update the maximum value and it's index */
176 out = maxVal;
177 outIndex = index + 1U;
178 }
179
180 maxVal = *pSrc++;
181 if ((_Float16)out < (_Float16)maxVal)
182 {
183 out = maxVal;
184 outIndex = index + 2U;
185 }
186
187 maxVal = *pSrc++;
188 if ((_Float16)out < (_Float16)maxVal)
189 {
190 out = maxVal;
191 outIndex = index + 3U;
192 }
193
194 maxVal = *pSrc++;
195 if ((_Float16)out < (_Float16)maxVal)
196 {
197 out = maxVal;
198 outIndex = index + 4U;
199 }
200
201 index += 4U;
202
203 /* Decrement loop counter */
204 blkCnt--;
205 }
206
207 /* Loop unrolling: Compute remaining outputs */
208 blkCnt = (blockSize - 1U) % 4U;
209
210 #else
211
212 /* Initialize blkCnt with number of samples */
213 blkCnt = (blockSize - 1U);
214
215 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
216
217 while (blkCnt > 0U)
218 {
219 /* Initialize maxVal to the next consecutive values one by one */
220 maxVal = *pSrc++;
221
222 /* compare for the maximum value */
223 if ((_Float16)out < (_Float16)maxVal)
224 {
225 /* Update the maximum value and it's index */
226 out = maxVal;
227 outIndex = blockSize - blkCnt;
228 }
229
230 /* Decrement loop counter */
231 blkCnt--;
232 }
233
234 /* Store the maximum value and it's index into destination pointers */
235 *pResult = out;
236 *pIndex = outIndex;
237 }
238 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
239
240 /**
241 @} end of Max group
242 */
243
244 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
245
246