1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_absmax_f32.c
4  * Description:  Maximum value of absolute values of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
31 #include <limits.h>
32 #endif
33 
34 /**
35   @ingroup groupStats
36  */
37 
38 /**
39   @defgroup AbsMax Absolute Maximum
40 
41   Computes the maximum value of absolute values of an array of data.
42   The function returns both the maximum value and its position within the array.
43   There are separate functions for floating-point, Q31, Q15, and Q7 data types.
44  */
45 
46 /**
47   @addtogroup AbsMax
48   @{
49  */
50 
51 /**
52   @brief         Maximum value of absolute values of a floating-point vector.
53   @param[in]     pSrc       points to the input vector
54   @param[in]     blockSize  number of samples in input vector
55   @param[out]    pResult    maximum value returned here
56   @param[out]    pIndex     index of maximum value returned here
57  */
58 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
59 
60 #include "arm_helium_utils.h"
61 
arm_absmax_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)62 void arm_absmax_f32(
63   const float32_t * pSrc,
64         uint32_t blockSize,
65         float32_t * pResult,
66         uint32_t * pIndex)
67 {
68     int32_t blkSize = blockSize;
69     f32x4_t vecSrc;
70     f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
71     float32_t maxValue = F32_ABSMIN;
72     uint32_t idx = blockSize;
73     uint32x4_t indexVec;
74     uint32x4_t curExtremIdxVec;
75     uint32_t curIdx = 0;
76     mve_pred16_t p0;
77 
78 
79     indexVec = vidupq_wb_u32(&curIdx, 1);
80     curExtremIdxVec = vdupq_n_u32(0);
81 
82     do {
83         mve_pred16_t p = vctp32q(blkSize);
84 
85         vecSrc = vldrwq_z_f32((float32_t const *) pSrc, p);
86         vecSrc = vabsq_m(vuninitializedq_f32(), vecSrc, p);
87         /*
88          * Get current max per lane and current index per lane
89          * when a max is selected
90          */
91         p0 = vcmpgeq_m(vecSrc, curExtremValVec, p);
92         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
93         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
94 
95         /* Does TP detection works here ?? */
96         indexVec = vidupq_wb_u32(&curIdx, 1);
97 
98         blkSize -= 4;
99         pSrc += 4;
100     }
101     while (blkSize > 0);
102 
103     /*
104      * Get max value across the vector
105      */
106     maxValue = vmaxnmvq(maxValue, curExtremValVec);
107     /*
108      * set index for lower values to max possible index
109      */
110     p0 = vcmpgeq(curExtremValVec, maxValue);
111     indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
112     /*
113      * Get min index which is thus for a max value
114      */
115     idx = vminvq(idx, indexVec);
116     /*
117      * Save result
118      */
119     *pIndex = idx;
120     *pResult = maxValue;
121 }
122 
123 
124 #else
125 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmax_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)126 void arm_absmax_f32(
127   const float32_t * pSrc,
128         uint32_t blockSize,
129         float32_t * pResult,
130         uint32_t * pIndex)
131 {
132         float32_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
133         uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
134         uint32_t index;                                /* index of maximum value */                         \
135                                                                                                             \
136   /* Initialize index value to zero. */                                                                     \
137   outIndex = 0U;                                                                                            \
138   /* Load first input value that act as reference value for comparision */                                  \
139   out = *pSrc++;                                                                                            \
140   out = (out > 0.0f) ? out : -out;                                                                             \
141   /* Initialize index of extrema value. */                                                                  \
142   index = 0U;                                                                                               \
143                                                                                                             \
144   /* Loop unrolling: Compute 4 outputs at a time */                                                         \
145   blkCnt = (blockSize - 1U) >> 2U;                                                                          \
146                                                                                                             \
147   while (blkCnt > 0U)                                                                                       \
148   {                                                                                                         \
149     /* Initialize cur_absmax to next consecutive values one by one */                                         \
150     cur_absmax = *pSrc++;                                                                                     \
151     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
152     /* compare for the extrema value */                                                                     \
153     if (cur_absmax > out)                                                                         \
154     {                                                                                                       \
155       /* Update the extrema value and it's index */                                                         \
156       out = cur_absmax;                                                                                       \
157       outIndex = index + 1U;                                                                                \
158     }                                                                                                       \
159                                                                                                             \
160     cur_absmax = *pSrc++;                                                                                     \
161     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
162     if (cur_absmax > out)                                                                         \
163     {                                                                                                       \
164       out = cur_absmax;                                                                                       \
165       outIndex = index + 2U;                                                                                \
166     }                                                                                                       \
167                                                                                                             \
168     cur_absmax = *pSrc++;                                                                                     \
169     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
170     if (cur_absmax > out)                                                                          \
171     {                                                                                                       \
172       out = cur_absmax;                                                                                       \
173       outIndex = index + 3U;                                                                                \
174     }                                                                                                       \
175                                                                                                             \
176     cur_absmax = *pSrc++;                                                                                     \
177     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
178     if (cur_absmax > out)                                                                          \
179     {                                                                                                       \
180       out = cur_absmax;                                                                                       \
181       outIndex = index + 4U;                                                                                \
182     }                                                                                                       \
183                                                                                                             \
184     index += 4U;                                                                                            \
185                                                                                                             \
186     /* Decrement loop counter */                                                                            \
187     blkCnt--;                                                                                               \
188   }                                                                                                         \
189                                                                                                             \
190   /* Loop unrolling: Compute remaining outputs */                                                           \
191   blkCnt = (blockSize - 1U) % 4U;                                                                           \
192                                                                                                             \
193                                                                                                             \
194   while (blkCnt > 0U)                                                                                       \
195   {                                                                                                         \
196     cur_absmax = *pSrc++;                                                                                     \
197     cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
198     if (cur_absmax > out)                                                                         \
199     {                                                                                                       \
200       out = cur_absmax;                                                                                       \
201       outIndex = blockSize - blkCnt;                                                                        \
202     }                                                                                                       \
203                                                                                                             \
204     /* Decrement loop counter */                                                                            \
205     blkCnt--;                                                                                               \
206   }                                                                                                         \
207                                                                                                             \
208   /* Store the extrema value and it's index into destination pointers */                                    \
209   *pResult = out;                                                                                           \
210   *pIndex = outIndex;
211 }
212 #else
arm_absmax_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult,uint32_t * pIndex)213 void arm_absmax_f32(
214   const float32_t * pSrc,
215         uint32_t blockSize,
216         float32_t * pResult,
217         uint32_t * pIndex)
218 {
219         float32_t maxVal, out;                         /* Temporary variables to store the output value. */
220         uint32_t blkCnt, outIndex;                     /* Loop counter */
221 
222 
223 
224   /* Initialise index value to zero. */
225   outIndex = 0U;
226 
227   /* Load first input value that act as reference value for comparision */
228   out = fabsf(*pSrc++);
229 
230   /* Initialize blkCnt with number of samples */
231   blkCnt = (blockSize - 1U);
232 
233 
234   while (blkCnt > 0U)
235   {
236     /* Initialize maxVal to the next consecutive values one by one */
237     maxVal = fabsf(*pSrc++);
238 
239     /* compare for the maximum value */
240     if (out < maxVal)
241     {
242       /* Update the maximum value and it's index */
243       out = maxVal;
244       outIndex = blockSize - blkCnt;
245     }
246 
247     /* Decrement loop counter */
248     blkCnt--;
249   }
250 
251   /* Store the maximum value and it's index into destination pointers */
252   *pResult = out;
253   *pIndex = outIndex;
254 }
255 #endif /* defined(ARM_MATH_LOOPUNROLL) */
256 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
257 /**
258   @} end of AbsMax group
259  */
260