1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_max_f32.c
4  * Description:  Maximum value of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
31 #include <limits.h>
32 #endif
33 
34 /**
35   @ingroup groupStats
36  */
37 
38 /**
39   @defgroup Max Maximum
40 
41   Computes the maximum value of an array of data.
42   The function returns both the maximum value and its position within the array.
43   There are separate functions for floating-point, Q31, Q15, and Q7 data types.
44  */
45 
46 /**
47   @addtogroup Max
48   @{
49  */
50 
51 /**
52   @brief         Maximum value of a floating-point vector.
53   @param[in]     pSrc       points to the input vector
54   @param[in]     blockSize  number of samples in input vector
55   @param[out]    pResult    maximum value returned here
56   @param[out]    pIndex     index of maximum value returned here
57  */
58 
59 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_max_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)60 ARM_DSP_ATTRIBUTE void arm_max_f32(
61   const float32_t * pSrc,
62   uint32_t blockSize,
63   float32_t * pResult,
64   uint32_t * pIndex)
65 {
66     uint32_t blkCnt;
67     f32x4_t vecSrc;
68     f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN);
69     float32_t maxValue = F32_MIN;
70     uint32_t idx = blockSize;
71     uint32x4_t indexVec;
72     uint32x4_t curExtremIdxVec;
73     uint32_t curIdx = 0;
74     mve_pred16_t p0;
75     float32_t tmp;
76 
77 
78     indexVec = vidupq_wb_u32(&curIdx, 1);
79     curExtremIdxVec = vdupq_n_u32(0);
80 
81     /* Compute 4 outputs at a time */
82     blkCnt = blockSize >> 2U;
83     while (blkCnt > 0U)
84     {
85         vecSrc = vldrwq_f32(pSrc);
86         /*
87          * Get current max per lane and current index per lane
88          * when a max is selected
89          */
90         p0 = vcmpgeq(vecSrc, curExtremValVec);
91         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
92         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
93 
94         indexVec = vidupq_wb_u32(&curIdx, 1);
95 
96         pSrc += 4;
97         /* Decrement the loop counter */
98         blkCnt--;
99     }
100 
101 
102     /*
103      * Get max value across the vector
104      */
105     maxValue = vmaxnmvq(maxValue, curExtremValVec);
106     /*
107      * set index for lower values to max possible index
108      */
109     p0 = vcmpgeq(curExtremValVec, maxValue);
110     indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
111     /*
112      * Get min index which is thus for a max value
113      */
114     idx = vminvq(idx, indexVec);
115 
116     /* Tail */
117     blkCnt = blockSize & 0x3;
118 
119     while (blkCnt > 0U)
120     {
121       /* Initialize tmp to the next consecutive values one by one */
122       tmp = *pSrc++;
123 
124       /* compare for the maximum value */
125       if (maxValue < tmp)
126       {
127         /* Update the maximum value and it's index */
128         maxValue = tmp;
129         idx = blockSize - blkCnt;
130       }
131 
132       /* Decrement loop counter */
133       blkCnt--;
134     }
135 
136     /*
137      * Save result
138      */
139     *pIndex = idx;
140     *pResult = maxValue;
141 }
142 
143 #else
144 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_max_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)145 ARM_DSP_ATTRIBUTE void arm_max_f32(
146   const float32_t * pSrc,
147   uint32_t blockSize,
148   float32_t * pResult,
149   uint32_t * pIndex)
150 {
151   float32_t maxVal1, out;               /* Temporary variables to store the output value. */
152   uint32_t blkCnt, outIndex;              /* loop counter */
153 
154   float32x4_t outV, srcV;
155   float32x2_t outV2;
156 
157   uint32x4_t idxV;
158   uint32x4_t maxIdx;
159   static const uint32_t indexInit[4]={4,5,6,7};
160   static const uint32_t countVInit[4]={0,1,2,3};
161 
162   uint32x4_t index;
163   uint32x4_t delta;
164   uint32x4_t countV;
165   uint32x2_t countV2;
166 
167   maxIdx = vdupq_n_u32(UINT_MAX);
168   delta = vdupq_n_u32(4);
169   index = vld1q_u32(indexInit);
170   countV = vld1q_u32(countVInit);
171 
172 
173   /* Initialise the index value to zero. */
174   outIndex = 0U;
175 
176   /* Load first input value that act as reference value for comparison */
177   if (blockSize <= 3)
178   {
179       out = *pSrc++;
180 
181       blkCnt = blockSize - 1;
182 
183       while (blkCnt > 0U)
184       {
185         /* Initialize maxVal to the next consecutive values one by one */
186         maxVal1 = *pSrc++;
187 
188         /* compare for the maximum value */
189         if (out < maxVal1)
190         {
191           /* Update the maximum value and it's index */
192           out = maxVal1;
193           outIndex = blockSize - blkCnt;
194         }
195 
196         /* Decrement the loop counter */
197         blkCnt--;
198       }
199   }
200   else
201   {
202       outV = vld1q_f32(pSrc);
203       pSrc += 4;
204 
205       /* Compute 4 outputs at a time */
206       blkCnt = (blockSize - 4 ) >> 2U;
207 
208       while (blkCnt > 0U)
209       {
210         srcV = vld1q_f32(pSrc);
211         pSrc += 4;
212 
213         idxV = vcgtq_f32(srcV, outV);
214         outV = vbslq_f32(idxV, srcV, outV );
215         countV = vbslq_u32(idxV, index,countV );
216 
217         index = vaddq_u32(index,delta);
218 
219         /* Decrement the loop counter */
220         blkCnt--;
221       }
222 
223       outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
224       outV2 = vpmax_f32(outV2,outV2);
225       out = vget_lane_f32(outV2, 0);
226 
227       idxV = vceqq_f32(outV, vdupq_n_f32(out));
228       countV = vbslq_u32(idxV, countV,maxIdx);
229 
230       countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
231       countV2 = vpmin_u32(countV2,countV2);
232       outIndex = vget_lane_u32(countV2,0);
233 
234       /* if (blockSize - 1U) is not multiple of 4 */
235       blkCnt = (blockSize - 4 ) % 4U;
236 
237       while (blkCnt > 0U)
238       {
239         /* Initialize maxVal to the next consecutive values one by one */
240         maxVal1 = *pSrc++;
241 
242         /* compare for the maximum value */
243         if (out < maxVal1)
244         {
245           /* Update the maximum value and it's index */
246           out = maxVal1;
247           outIndex = blockSize - blkCnt ;
248         }
249 
250         /* Decrement the loop counter */
251         blkCnt--;
252       }
253 
254 
255   }
256 
257   /* Store the maximum value and it's index into destination pointers */
258   *pResult = out;
259   *pIndex = outIndex;
260 }
261 #else
arm_max_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)262 ARM_DSP_ATTRIBUTE void arm_max_f32(
263   const float32_t * pSrc,
264         uint32_t blockSize,
265         float32_t * pResult,
266         uint32_t * pIndex)
267 {
268         float32_t maxVal, out;                         /* Temporary variables to store the output value. */
269         uint32_t blkCnt, outIndex;                     /* Loop counter */
270 
271 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
272         uint32_t index;                                /* index of maximum value */
273 #endif
274 
275   /* Initialise index value to zero. */
276   outIndex = 0U;
277 
278   /* Load first input value that act as reference value for comparision */
279   out = *pSrc++;
280 
281 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
282   /* Initialise index of maximum value. */
283   index = 0U;
284 
285   /* Loop unrolling: Compute 4 outputs at a time */
286   blkCnt = (blockSize - 1U) >> 2U;
287 
288   while (blkCnt > 0U)
289   {
290     /* Initialize maxVal to next consecutive values one by one */
291     maxVal = *pSrc++;
292 
293     /* compare for the maximum value */
294     if (out < maxVal)
295     {
296       /* Update the maximum value and it's index */
297       out = maxVal;
298       outIndex = index + 1U;
299     }
300 
301     maxVal = *pSrc++;
302     if (out < maxVal)
303     {
304       out = maxVal;
305       outIndex = index + 2U;
306     }
307 
308     maxVal = *pSrc++;
309     if (out < maxVal)
310     {
311       out = maxVal;
312       outIndex = index + 3U;
313     }
314 
315     maxVal = *pSrc++;
316     if (out < maxVal)
317     {
318       out = maxVal;
319       outIndex = index + 4U;
320     }
321 
322     index += 4U;
323 
324     /* Decrement loop counter */
325     blkCnt--;
326   }
327 
328   /* Loop unrolling: Compute remaining outputs */
329   blkCnt = (blockSize - 1U) % 4U;
330 
331 #else
332 
333   /* Initialize blkCnt with number of samples */
334   blkCnt = (blockSize - 1U);
335 
336 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
337 
338   while (blkCnt > 0U)
339   {
340     /* Initialize maxVal to the next consecutive values one by one */
341     maxVal = *pSrc++;
342 
343     /* compare for the maximum value */
344     if (out < maxVal)
345     {
346       /* Update the maximum value and it's index */
347       out = maxVal;
348       outIndex = blockSize - blkCnt;
349     }
350 
351     /* Decrement loop counter */
352     blkCnt--;
353   }
354 
355   /* Store the maximum value and it's index into destination pointers */
356   *pResult = out;
357   *pIndex = outIndex;
358 }
359 #endif /* #if defined(ARM_MATH_NEON) */
360 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
361 
362 /**
363   @} end of Max group
364  */
365