1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_absmax_no_idx_f16.c
4 * Description: Maximum value of a absolute values of a floating-point vector
5 *
6 * $Date: 16 November 2021
7 * $Revision: V1.10.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
34 #include <limits.h>
35 #endif
36
37 /**
38 @ingroup groupStats
39 */
40
41
42 /**
43 @addtogroup AbsMax
44 @{
45 */
46
47 /**
48 @brief Maximum value of absolute values of a floating-point vector.
49 @param[in] pSrc points to the input vector
50 @param[in] blockSize number of samples in input vector
51 @param[out] pResult maximum value returned here
52 */
53
54 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
55
56 #include "arm_helium_utils.h"
arm_absmax_no_idx_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)57 ARM_DSP_ATTRIBUTE void arm_absmax_no_idx_f16(
58 const float16_t * pSrc,
59 uint32_t blockSize,
60 float16_t * pResult)
61 {
62 uint16_t blkCnt; /* loop counters */
63 f16x8_t vecSrc;
64 float16_t const *pSrcVec;
65 f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN);
66 float16_t maxValue = F16_ABSMIN;
67 mve_pred16_t p0;
68
69
70 pSrcVec = (float16_t const *) pSrc;
71 blkCnt = blockSize >> 3;
72 while (blkCnt > 0)
73 {
74 vecSrc = vldrhq_f16(pSrcVec);
75 pSrcVec += 8;
76 /*
77 * update per-lane max.
78 */
79 curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
80 /*
81 * Decrement the blockSize loop counter
82 */
83 blkCnt--;
84 }
85 /*
86 * tail
87 * (will be merged thru tail predication)
88 */
89 blkCnt = blockSize & 7;
90 if (blkCnt > 0U)
91 {
92 vecSrc = vldrhq_f16(pSrcVec);
93 pSrcVec += 8;
94 p0 = vctp16q(blkCnt);
95 /*
96 * Get current max per lane and current index per lane
97 * when a max is selected
98 */
99 curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
100 }
101 /*
102 * Get max value across the vector
103 */
104 maxValue = vmaxnmavq(maxValue, curExtremValVec);
105 *pResult = maxValue;
106 }
107 #else
108 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmax_no_idx_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)109 ARM_DSP_ATTRIBUTE void arm_absmax_no_idx_f16(
110 const float16_t * pSrc,
111 uint32_t blockSize,
112 float16_t * pResult)
113 {
114 float16_t cur_absmax, out; /* Temporary variables to store the output value. */\
115 uint32_t blkCnt; /* Loop counter */ \
116 \
117 \
118 /* Load first input value that act as reference value for comparision */ \
119 out = *pSrc++; \
120 out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
121 \
122 \
123 /* Loop unrolling: Compute 4 outputs at a time */ \
124 blkCnt = (blockSize - 1U) >> 2U; \
125 \
126 while (blkCnt > 0U) \
127 { \
128 /* Initialize cur_absmax to next consecutive values one by one */ \
129 cur_absmax = *pSrc++; \
130 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
131 /* compare for the extrema value */ \
132 if ((_Float16)cur_absmax > (_Float16)out) \
133 { \
134 /* Update the extrema value and it's index */ \
135 out = cur_absmax; \
136 } \
137 \
138 cur_absmax = *pSrc++; \
139 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
140 if ((_Float16)cur_absmax > (_Float16)out) \
141 { \
142 out = cur_absmax; \
143 } \
144 \
145 cur_absmax = *pSrc++; \
146 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
147 if ((_Float16)cur_absmax > (_Float16)out) \
148 { \
149 out = cur_absmax; \
150 } \
151 \
152 cur_absmax = *pSrc++; \
153 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
154 if ((_Float16)cur_absmax > (_Float16)out) \
155 { \
156 out = cur_absmax; \
157 } \
158 \
159 \
160 /* Decrement loop counter */ \
161 blkCnt--; \
162 } \
163 \
164 /* Loop unrolling: Compute remaining outputs */ \
165 blkCnt = (blockSize - 1U) % 4U; \
166 \
167 \
168 while (blkCnt > 0U) \
169 { \
170 cur_absmax = *pSrc++; \
171 cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
172 if ((_Float16)cur_absmax > (_Float16)out) \
173 { \
174 out = cur_absmax; \
175 } \
176 \
177 /* Decrement loop counter */ \
178 blkCnt--; \
179 } \
180 \
181 /* Store the extrema value and it's index into destination pointers */ \
182 *pResult = out; \
183 }
184 #else
arm_absmax_no_idx_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)185 ARM_DSP_ATTRIBUTE void arm_absmax_no_idx_f16(
186 const float16_t * pSrc,
187 uint32_t blockSize,
188 float16_t * pResult)
189 {
190 float16_t maxVal, out; /* Temporary variables to store the output value. */
191 uint32_t blkCnt; /* Loop counter */
192
193
194
195 /* Load first input value that act as reference value for comparision */
196 out = (_Float16)fabsf((float32_t)*pSrc++);
197
198 /* Initialize blkCnt with number of samples */
199 blkCnt = (blockSize - 1U);
200
201 while (blkCnt > 0U)
202 {
203 /* Initialize maxVal to the next consecutive values one by one */
204 maxVal = (_Float16)fabsf((float32_t)*pSrc++);
205
206 /* compare for the maximum value */
207 if ((_Float16)out < (_Float16)maxVal)
208 {
209 /* Update the maximum value and it's index */
210 out = maxVal;
211 }
212
213 /* Decrement loop counter */
214 blkCnt--;
215 }
216
217 /* Store the maximum value and it's index into destination pointers */
218 *pResult = out;
219 }
220 #endif /* defined(ARM_MATH_LOOPUNROLL) */
221 #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
222 /**
223 @} end of AbsMax group
224 */
225
226 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
227
228