1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_absmax_f32.c
4  * Description:  Maximum value of absolute values of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
31 #include <limits.h>
32 #endif
33 
34 /**
35   @ingroup groupStats
36  */
37 
38 /**
39   @defgroup AbsMax Absolute Maximum
40 
41   Computes the maximum value of absolute values of an array of data.
42   The function returns both the maximum value and its position within the array.
43   There are separate functions for floating-point, Q31, Q15, and Q7 data types.
44  */
45 
46 /**
47   @addtogroup AbsMax
48   @{
49  */
50 
51 /**
52   @brief         Maximum value of absolute values of a floating-point vector.
53   @param[in]     pSrc       points to the input vector
54   @param[in]     blockSize  number of samples in input vector
55   @param[out]    pResult    maximum value returned here
56   @param[out]    pIndex     index of maximum value returned here
57   @return        none
58  */
59 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
60 
61 #include "arm_helium_utils.h"
62 
arm_absmax_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)63 void arm_absmax_f32(
64   const float32_t * pSrc,
65         uint32_t blockSize,
66         float32_t * pResult,
67         uint32_t * pIndex)
68 {
69     int32_t blkSize = blockSize;
70     f32x4_t vecSrc;
71     f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
72     float32_t maxValue = F32_ABSMIN;
73     uint32_t idx = blockSize;
74     uint32x4_t indexVec;
75     uint32x4_t curExtremIdxVec;
76     uint32_t curIdx = 0;
77     mve_pred16_t p0;
78 
79 
80     indexVec = vidupq_wb_u32(&curIdx, 1);
81     curExtremIdxVec = vdupq_n_u32(0);
82 
83     do {
84         mve_pred16_t p = vctp32q(blkSize);
85 
86         vecSrc = vldrwq_z_f32((float32_t const *) pSrc, p);
87         vecSrc = vabsq_m(vuninitializedq_f32(), vecSrc, p);
88         /*
89          * Get current max per lane and current index per lane
90          * when a max is selected
91          */
92         p0 = vcmpgeq_m(vecSrc, curExtremValVec, p);
93         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
94         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
95 
96         /* Does TP detection works here ?? */
97         indexVec = vidupq_wb_u32(&curIdx, 1);
98 
99         blkSize -= 4;
100         pSrc += 4;
101     }
102     while (blkSize > 0);
103 
104     /*
105      * Get max value across the vector
106      */
107     maxValue = vmaxnmvq(maxValue, curExtremValVec);
108     /*
109      * set index for lower values to max possible index
110      */
111     p0 = vcmpgeq(curExtremValVec, maxValue);
112     indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
113     /*
114      * Get min index which is thus for a max value
115      */
116     idx = vminvq(idx, indexVec);
117     /*
118      * Save result
119      */
120     *pIndex = idx;
121     *pResult = maxValue;
122 }
123 
124 
125 #else
126 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmax_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)127 void arm_absmax_f32(
128   const float32_t * pSrc,
129         uint32_t blockSize,
130         float32_t * pResult,
131         uint32_t * pIndex)
132 {
133         float32_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
134         uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
135         uint32_t index;                                /* index of maximum value */                         \
136                                                                                                             \
137   /* Initialize index value to zero. */                                                                     \
138   outIndex = 0U;                                                                                            \
139   /* Load first input value that act as reference value for comparision */                                  \
140   out = *pSrc++;                                                                                            \
141   out = (out > 0.0f) ? out : -out;                                                                             \
142   /* Initialize index of extrema value. */                                                                  \
143   index = 0U;                                                                                               \
144                                                                                                             \
145   /* Loop unrolling: Compute 4 outputs at a time */                                                         \
146   blkCnt = (blockSize - 1U) >> 2U;                                                                          \
147                                                                                                             \
148   while (blkCnt > 0U)                                                                                       \
149   {                                                                                                         \
150     /* Initialize cur_absmax to next consecutive values one by one */                                         \
151     cur_absmax = *pSrc++;                                                                                     \
152     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
153     /* compare for the extrema value */                                                                     \
154     if (cur_absmax > out)                                                                         \
155     {                                                                                                       \
156       /* Update the extrema value and it's index */                                                         \
157       out = cur_absmax;                                                                                       \
158       outIndex = index + 1U;                                                                                \
159     }                                                                                                       \
160                                                                                                             \
161     cur_absmax = *pSrc++;                                                                                     \
162     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
163     if (cur_absmax > out)                                                                         \
164     {                                                                                                       \
165       out = cur_absmax;                                                                                       \
166       outIndex = index + 2U;                                                                                \
167     }                                                                                                       \
168                                                                                                             \
169     cur_absmax = *pSrc++;                                                                                     \
170     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
171     if (cur_absmax > out)                                                                          \
172     {                                                                                                       \
173       out = cur_absmax;                                                                                       \
174       outIndex = index + 3U;                                                                                \
175     }                                                                                                       \
176                                                                                                             \
177     cur_absmax = *pSrc++;                                                                                     \
178     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
179     if (cur_absmax > out)                                                                          \
180     {                                                                                                       \
181       out = cur_absmax;                                                                                       \
182       outIndex = index + 4U;                                                                                \
183     }                                                                                                       \
184                                                                                                             \
185     index += 4U;                                                                                            \
186                                                                                                             \
187     /* Decrement loop counter */                                                                            \
188     blkCnt--;                                                                                               \
189   }                                                                                                         \
190                                                                                                             \
191   /* Loop unrolling: Compute remaining outputs */                                                           \
192   blkCnt = (blockSize - 1U) % 4U;                                                                           \
193                                                                                                             \
194                                                                                                             \
195   while (blkCnt > 0U)                                                                                       \
196   {                                                                                                         \
197     cur_absmax = *pSrc++;                                                                                     \
198     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
199     if (cur_absmax > out)                                                                         \
200     {                                                                                                       \
201       out = cur_absmax;                                                                                       \
202       outIndex = blockSize - blkCnt;                                                                        \
203     }                                                                                                       \
204                                                                                                             \
205     /* Decrement loop counter */                                                                            \
206     blkCnt--;                                                                                               \
207   }                                                                                                         \
208                                                                                                             \
209   /* Store the extrema value and it's index into destination pointers */                                    \
210   *pResult = out;                                                                                           \
211   *pIndex = outIndex;
212 }
213 #else
arm_absmax_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)214 void arm_absmax_f32(
215   const float32_t * pSrc,
216         uint32_t blockSize,
217         float32_t * pResult,
218         uint32_t * pIndex)
219 {
220         float32_t maxVal, out;                         /* Temporary variables to store the output value. */
221         uint32_t blkCnt, outIndex;                     /* Loop counter */
222 
223 
224 
225   /* Initialise index value to zero. */
226   outIndex = 0U;
227 
228   /* Load first input value that act as reference value for comparision */
229   out = fabsf(*pSrc++);
230 
231   /* Initialize blkCnt with number of samples */
232   blkCnt = (blockSize - 1U);
233 
234 
235   while (blkCnt > 0U)
236   {
237     /* Initialize maxVal to the next consecutive values one by one */
238     maxVal = fabsf(*pSrc++);
239 
240     /* compare for the maximum value */
241     if (out < maxVal)
242     {
243       /* Update the maximum value and it's index */
244       out = maxVal;
245       outIndex = blockSize - blkCnt;
246     }
247 
248     /* Decrement loop counter */
249     blkCnt--;
250   }
251 
252   /* Store the maximum value and it's index into destination pointers */
253   *pResult = out;
254   *pIndex = outIndex;
255 }
256 #endif /* defined(ARM_MATH_LOOPUNROLL) */
257 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
258 /**
259   @} end of AbsMax group
260  */
261