1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_absmax_f16.c
4 * Description: Maximum value of a absolute values of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
34 #include <limits.h>
35 #endif
36
37 /**
38 @ingroup groupStats
39 */
40
41
42 /**
43 @addtogroup AbsMax
44 @{
45 */
46
47 /**
48 @brief Maximum value of absolute values of a floating-point vector.
49 @param[in] pSrc points to the input vector
50 @param[in] blockSize number of samples in input vector
51 @param[out] pResult maximum value returned here
52 @param[out] pIndex index of maximum value returned here
53 */
54
55 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
56
57 #include "arm_helium_utils.h"
arm_absmax_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)58 ARM_DSP_ATTRIBUTE void arm_absmax_f16(
59 const float16_t * pSrc,
60 uint32_t blockSize,
61 float16_t * pResult,
62 uint32_t * pIndex)
63 {
64 uint16_t blkCnt; /* loop counters */
65 f16x8_t vecSrc;
66 float16_t const *pSrcVec;
67 f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN);
68 float16_t maxValue = F16_ABSMIN;
69 uint16_t idx = blockSize;
70 uint16x8_t indexVec;
71 uint16x8_t curExtremIdxVec;
72 mve_pred16_t p0;
73
74
75 indexVec = vidupq_u16((uint32_t)0, 1);
76 curExtremIdxVec = vdupq_n_u16(0);
77
78 pSrcVec = (float16_t const *) pSrc;
79 blkCnt = blockSize >> 3;
80 while (blkCnt > 0U)
81 {
82 vecSrc = vldrhq_f16(pSrcVec);
83 pSrcVec += 8;
84 vecSrc = vabsq(vecSrc);
85 /*
86 * Get current max per lane and current index per lane
87 * when a max is selected
88 */
89 p0 = vcmpgeq(vecSrc, curExtremValVec);
90 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
91 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
92
93 indexVec = indexVec + 8;
94 /*
95 * Decrement the blockSize loop counter
96 */
97 blkCnt--;
98 }
99 /*
100 * tail
101 * (will be merged thru tail predication)
102 */
103 blkCnt = blockSize & 7;
104 if (blkCnt > 0U)
105 {
106 vecSrc = vldrhq_f16(pSrcVec);
107 pSrcVec += 8;
108 vecSrc = vabsq(vecSrc);
109
110 p0 = vctp16q(blkCnt);
111 /*
112 * Get current max per lane and current index per lane
113 * when a max is selected
114 */
115 p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0);
116 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
117 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
118 }
119 /*
120 * Get max value across the vector
121 */
122 maxValue = vmaxnmvq(maxValue, curExtremValVec);
123 /*
124 * set index for lower values to max possible index
125 */
126 p0 = vcmpgeq(curExtremValVec, maxValue);
127 indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
128 /*
129 * Get min index which is thus for a max value
130 */
131 idx = vminvq(idx, indexVec);
132 /*
133 * Save result
134 */
135 *pIndex = idx;
136 *pResult = maxValue;
137 }
138 #else
139 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmax_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)140 ARM_DSP_ATTRIBUTE void arm_absmax_f16(
141 const float16_t * pSrc,
142 uint32_t blockSize,
143 float16_t * pResult,
144 uint32_t * pIndex)
145 {
146 float16_t cur_absmax, out; /* Temporary variables to store the output value. */\
147 uint32_t blkCnt, outIndex; /* Loop counter */ \
148 uint32_t index; /* index of maximum value */ \
149 \
150 /* Initialize index value to zero. */ \
151 outIndex = 0U; \
152 /* Load first input value that act as reference value for comparision */ \
153 out = *pSrc++; \
154 out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
155 /* Initialize index of extrema value. */ \
156 index = 0U; \
157 \
158 /* Loop unrolling: Compute 4 outputs at a time */ \
159 blkCnt = (blockSize - 1U) >> 2U; \
160 \
161 while (blkCnt > 0U) \
162 { \
163 /* Initialize cur_absmax to next consecutive values one by one */ \
164 cur_absmax = *pSrc++; \
165 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
166 /* compare for the extrema value */ \
167 if ((_Float16)cur_absmax > (_Float16)out) \
168 { \
169 /* Update the extrema value and it's index */ \
170 out = cur_absmax; \
171 outIndex = index + 1U; \
172 } \
173 \
174 cur_absmax = *pSrc++; \
175 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
176 if ((_Float16)cur_absmax > (_Float16)out) \
177 { \
178 out = cur_absmax; \
179 outIndex = index + 2U; \
180 } \
181 \
182 cur_absmax = *pSrc++; \
183 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
184 if ((_Float16)cur_absmax > (_Float16)out) \
185 { \
186 out = cur_absmax; \
187 outIndex = index + 3U; \
188 } \
189 \
190 cur_absmax = *pSrc++; \
191 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
192 if ((_Float16)cur_absmax > (_Float16)out) \
193 { \
194 out = cur_absmax; \
195 outIndex = index + 4U; \
196 } \
197 \
198 index += 4U; \
199 \
200 /* Decrement loop counter */ \
201 blkCnt--; \
202 } \
203 \
204 /* Loop unrolling: Compute remaining outputs */ \
205 blkCnt = (blockSize - 1U) % 4U; \
206 \
207 \
208 while (blkCnt > 0U) \
209 { \
210 cur_absmax = *pSrc++; \
211 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
212 if ((_Float16)cur_absmax > (_Float16)out) \
213 { \
214 out = cur_absmax; \
215 outIndex = blockSize - blkCnt; \
216 } \
217 \
218 /* Decrement loop counter */ \
219 blkCnt--; \
220 } \
221 \
222 /* Store the extrema value and it's index into destination pointers */ \
223 *pResult = out; \
224 *pIndex = outIndex;
225 }
226 #else
arm_absmax_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)227 ARM_DSP_ATTRIBUTE void arm_absmax_f16(
228 const float16_t * pSrc,
229 uint32_t blockSize,
230 float16_t * pResult,
231 uint32_t * pIndex)
232 {
233 float16_t maxVal, out; /* Temporary variables to store the output value. */
234 uint32_t blkCnt, outIndex; /* Loop counter */
235
236 /* Initialise index value to zero. */
237 outIndex = 0U;
238
239 /* Load first input value that act as reference value for comparision */
240 out = (_Float16)fabsf((float32_t)*pSrc++);
241
242 /* Initialize blkCnt with number of samples */
243 blkCnt = (blockSize - 1U);
244
245 while (blkCnt > 0U)
246 {
247 /* Initialize maxVal to the next consecutive values one by one */
248 maxVal = (_Float16)fabsf((float32_t)*pSrc++);
249
250 /* compare for the maximum value */
251 if ((_Float16)out < (_Float16)maxVal)
252 {
253 /* Update the maximum value and it's index */
254 out = maxVal;
255 outIndex = blockSize - blkCnt;
256 }
257
258 /* Decrement loop counter */
259 blkCnt--;
260 }
261
262 /* Store the maximum value and it's index into destination pointers */
263 *pResult = out;
264 *pIndex = outIndex;
265 }
266 #endif /* defined(ARM_MATH_LOOPUNROLL) */
267 #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
268 /**
269 @} end of AbsMax group
270 */
271
272 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
273
274