1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_min_f16.c
4 * Description: Minimum value of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33
34 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
35 #include <limits.h>
36 #endif
37
38
39 /**
40 @ingroup groupStats
41 */
42
43 /**
44 @addtogroup Min
45 @{
46 */
47
48 /**
49 @brief Minimum value of a floating-point vector.
50 @param[in] pSrc points to the input vector
51 @param[in] blockSize number of samples in input vector
52 @param[out] pResult minimum value returned here
53 @param[out] pIndex index of minimum value returned here
54 @return none
55 */
56
57 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
58
arm_min_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)59 void arm_min_f16(
60 const float16_t * pSrc,
61 uint32_t blockSize,
62 float16_t * pResult,
63 uint32_t * pIndex)
64 {
65 int32_t blkCnt; /* loop counters */
66 f16x8_t vecSrc;
67 float16_t const *pSrcVec;
68 f16x8_t curExtremValVec = vdupq_n_f16(F16_MAX);
69 float16_t minValue = F16_MAX;
70 uint32_t idx = blockSize;
71 uint16x8_t indexVec;
72 uint16x8_t curExtremIdxVec;
73 mve_pred16_t p0;
74
75 indexVec = vidupq_u16((uint32_t)0, 1);
76 curExtremIdxVec = vdupq_n_u16(0);
77
78 pSrcVec = (float16_t const *) pSrc;
79 blkCnt = blockSize >> 3;
80 while (blkCnt > 0)
81 {
82 vecSrc = vldrhq_f16(pSrcVec); pSrcVec += 8;
83 /*
84 * Get current min per lane and current index per lane
85 * when a min is selected
86 */
87 p0 = vcmpleq(vecSrc, curExtremValVec);
88 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
89 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
90
91 indexVec = indexVec + 8;
92 /*
93 * Decrement the blockSize loop counter
94 */
95 blkCnt--;
96 }
97 /*
98 * tail
99 * (will be merged thru tail predication)
100 */
101 blkCnt = blockSize & 7;
102 if (blkCnt > 0)
103 {
104 vecSrc = vldrhq_f16(pSrcVec); pSrcVec += 8;
105 p0 = vctp16q(blkCnt);
106 /*
107 * Get current min per lane and current index per lane
108 * when a min is selected
109 */
110 p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
111 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
112 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
113 }
114 /*
115 * Get min value across the vector
116 */
117 minValue = vminnmvq(minValue, curExtremValVec);
118 /*
119 * set index for lower values to min possible index
120 */
121 p0 = vcmpleq(curExtremValVec, minValue);
122 indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
123 /*
124 * Get min index which is thus for a min value
125 */
126 idx = vminvq(idx, indexVec);
127 /*
128 * Save result
129 */
130 *pIndex = idx;
131 *pResult = minValue;
132 }
133
134 #else
135
arm_min_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult,uint32_t * pIndex)136 void arm_min_f16(
137 const float16_t * pSrc,
138 uint32_t blockSize,
139 float16_t * pResult,
140 uint32_t * pIndex)
141 {
142 float16_t minVal, out; /* Temporary variables to store the output value. */
143 uint32_t blkCnt, outIndex; /* Loop counter */
144
145 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
146 uint32_t index; /* index of maximum value */
147 #endif
148
149 /* Initialise index value to zero. */
150 outIndex = 0U;
151
152 /* Load first input value that act as reference value for comparision */
153 out = *pSrc++;
154
155 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
156 /* Initialise index of maximum value. */
157 index = 0U;
158
159 /* Loop unrolling: Compute 4 outputs at a time */
160 blkCnt = (blockSize - 1U) >> 2U;
161
162 while (blkCnt > 0U)
163 {
164 /* Initialize minVal to next consecutive values one by one */
165 minVal = *pSrc++;
166
167 /* compare for the minimum value */
168 if (out > minVal)
169 {
170 /* Update the minimum value and it's index */
171 out = minVal;
172 outIndex = index + 1U;
173 }
174
175 minVal = *pSrc++;
176 if (out > minVal)
177 {
178 out = minVal;
179 outIndex = index + 2U;
180 }
181
182 minVal = *pSrc++;
183 if (out > minVal)
184 {
185 out = minVal;
186 outIndex = index + 3U;
187 }
188
189 minVal = *pSrc++;
190 if (out > minVal)
191 {
192 out = minVal;
193 outIndex = index + 4U;
194 }
195
196 index += 4U;
197
198 /* Decrement loop counter */
199 blkCnt--;
200 }
201
202 /* Loop unrolling: Compute remaining outputs */
203 blkCnt = (blockSize - 1U) % 4U;
204
205 #else
206
207 /* Initialize blkCnt with number of samples */
208 blkCnt = (blockSize - 1U);
209
210 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
211
212 while (blkCnt > 0U)
213 {
214 /* Initialize minVal to the next consecutive values one by one */
215 minVal = *pSrc++;
216
217 /* compare for the minimum value */
218 if (out > minVal)
219 {
220 /* Update the minimum value and it's index */
221 out = minVal;
222 outIndex = blockSize - blkCnt;
223 }
224
225 /* Decrement loop counter */
226 blkCnt--;
227 }
228
229 /* Store the minimum value and it's index into destination pointers */
230 *pResult = out;
231 *pIndex = outIndex;
232 }
233 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
234
235 /**
236 @} end of Min group
237 */
238
239 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
240
241