1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_absmax_q7.c
4  * Description:  Maximum value of absolute values of a Q7 vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 /**
32   @ingroup groupStats
33  */
34 
35 /**
36   @addtogroup AbsMax
37   @{
38  */
39 
40 /**
41   @brief         Maximum value of absolute values of a Q7 vector.
42   @param[in]     pSrc       points to the input vector
43   @param[in]     blockSize  number of samples in input vector
44   @param[out]    pResult    maximum value returned here
45   @param[out]    pIndex     index of maximum value returned here
46   @return        none
47  */
48 
49 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
50 
51 #include <stdint.h>
52 #include "arm_helium_utils.h"
53 
54 #define MAX_BLKSZ_S8  (UINT8_MAX+1)
55 
arm_small_blk_absmax_q7(const q7_t * pSrc,uint16_t blockSize,q7_t * pResult,uint32_t * pIndex)56 static void arm_small_blk_absmax_q7(
57     const q7_t * pSrc,
58     uint16_t blockSize,
59     q7_t * pResult,
60     uint32_t * pIndex)
61 {
62     int32_t        blkCnt;     /* loop counters */
63     q7x16_t        extremValVec = vdupq_n_s8(Q7_ABSMIN);
64     q7_t           maxValue = Q7_ABSMIN;
65     uint8x16_t     indexVec;
66     uint8x16_t     extremIdxVec;
67     mve_pred16_t   p0;
68     uint8_t        extremIdxArr[16];
69 
70     indexVec = vidupq_u8(0U, 1);
71 
72     blkCnt = blockSize;
73     do {
74         mve_pred16_t    p = vctp8q(blkCnt);
75         q7x16_t         extremIdxVal = vld1q_z_s8(pSrc, p);
76 
77         extremIdxVal = vabsq(extremIdxVal);
78         /*
79          * Get current max per lane and current index per lane
80          * when a max is selected
81          */
82         p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
83 
84         extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
85         /* store per-lane extrema indexes */
86         vst1q_p_u8(extremIdxArr, indexVec, p0);
87 
88         indexVec += 16;
89         pSrc += 16;
90         blkCnt -= 16;
91     }
92     while (blkCnt > 0);
93 
94 
95     /* Get max value across the vector   */
96     maxValue = vmaxvq(maxValue, extremValVec);
97 
98     /* set index for lower values to max possible index   */
99     p0 = vcmpgeq(extremValVec, maxValue);
100     extremIdxVec = vld1q_u8(extremIdxArr);
101 
102     indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
103     *pIndex = vminvq_u8(blockSize - 1, indexVec);
104     *pResult = maxValue;
105 }
106 
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)107 void arm_absmax_q7(
108   const q7_t * pSrc,
109         uint32_t blockSize,
110         q7_t * pResult,
111         uint32_t * pIndex)
112 {
113     int32_t   totalSize = blockSize;
114 
115     if (totalSize <= MAX_BLKSZ_S8)
116     {
117         arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex);
118     }
119     else
120     {
121         uint32_t  curIdx = 0;
122         q7_t      curBlkExtr = Q7_MIN;
123         uint32_t  curBlkPos = 0;
124         uint32_t  curBlkIdx = 0;
125         /*
126          * process blocks of 255 elts
127          */
128         while (totalSize >= MAX_BLKSZ_S8)
129         {
130             const q7_t     *curSrc = pSrc;
131 
132             arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
133             if (*pResult > curBlkExtr)
134             {
135                 /*
136                  * update partial extrema
137                  */
138                 curBlkExtr = *pResult;
139                 curBlkPos = *pIndex;
140                 curBlkIdx = curIdx;
141             }
142             curIdx++;
143             pSrc += MAX_BLKSZ_S8;
144             totalSize -= MAX_BLKSZ_S8;
145         }
146         /*
147          * remainder
148          */
149         arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex);
150         if (*pResult > curBlkExtr)
151         {
152             curBlkExtr = *pResult;
153             curBlkPos = *pIndex;
154             curBlkIdx = curIdx;
155         }
156         *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
157         *pResult = curBlkExtr;
158     }
159 }
160 #else
161 #if defined(ARM_MATH_DSP)
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)162 void arm_absmax_q7(
163   const q7_t * pSrc,
164         uint32_t blockSize,
165         q7_t * pResult,
166         uint32_t * pIndex)
167 {
168         q7_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
169         uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
170         uint32_t index;                                /* index of maximum value */                         \
171                                                                                                             \
172   /* Initialize index value to zero. */                                                                     \
173   outIndex = 0U;                                                                                            \
174   /* Load first input value that act as reference value for comparision */                                  \
175   out = *pSrc++;                                                                                            \
176   out = (out > 0) ? out : (q7_t)__QSUB8(0, out);                                                                           \
177   /* Initialize index of extrema value. */                                                                  \
178   index = 0U;                                                                                               \
179                                                                                                             \
180   /* Loop unrolling: Compute 4 outputs at a time */                                                         \
181   blkCnt = (blockSize - 1U) >> 2U;                                                                          \
182                                                                                                             \
183   while (blkCnt > 0U)                                                                                       \
184   {                                                                                                         \
185     /* Initialize cur_absmax to next consecutive values one by one */                                         \
186     cur_absmax = *pSrc++;                                                                                     \
187     cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
188     /* compare for the extrema value */                                                                     \
189     if (cur_absmax > out)                                                                         \
190     {                                                                                                       \
191       /* Update the extrema value and it's index */                                                         \
192       out = cur_absmax;                                                                                       \
193       outIndex = index + 1U;                                                                                \
194     }                                                                                                       \
195                                                                                                             \
196     cur_absmax = *pSrc++;                                                                                     \
197     cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
198     if (cur_absmax > out)                                                                         \
199     {                                                                                                       \
200       out = cur_absmax;                                                                                       \
201       outIndex = index + 2U;                                                                                \
202     }                                                                                                       \
203                                                                                                             \
204     cur_absmax = *pSrc++;                                                                                     \
205     cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
206     if (cur_absmax > out)                                                                          \
207     {                                                                                                       \
208       out = cur_absmax;                                                                                       \
209       outIndex = index + 3U;                                                                                \
210     }                                                                                                       \
211                                                                                                             \
212     cur_absmax = *pSrc++;                                                                                     \
213     cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
214     if (cur_absmax > out)                                                                          \
215     {                                                                                                       \
216       out = cur_absmax;                                                                                       \
217       outIndex = index + 4U;                                                                                \
218     }                                                                                                       \
219                                                                                                             \
220     index += 4U;                                                                                            \
221                                                                                                             \
222     /* Decrement loop counter */                                                                            \
223     blkCnt--;                                                                                               \
224   }                                                                                                         \
225                                                                                                             \
226   /* Loop unrolling: Compute remaining outputs */                                                           \
227   blkCnt = (blockSize - 1U) % 4U;                                                                           \
228                                                                                                             \
229                                                                                                             \
230   while (blkCnt > 0U)                                                                                       \
231   {                                                                                                         \
232     cur_absmax = *pSrc++;                                                                                     \
233     cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
234     if (cur_absmax > out)                                                                         \
235     {                                                                                                       \
236       out = cur_absmax;                                                                                       \
237       outIndex = blockSize - blkCnt;                                                                        \
238     }                                                                                                       \
239                                                                                                             \
240     /* Decrement loop counter */                                                                            \
241     blkCnt--;                                                                                               \
242   }                                                                                                         \
243                                                                                                             \
244   /* Store the extrema value and it's index into destination pointers */                                    \
245   *pResult = out;                                                                                           \
246   *pIndex = outIndex;
247 }
248 #else
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)249 void arm_absmax_q7(
250   const q7_t * pSrc,
251         uint32_t blockSize,
252         q7_t * pResult,
253         uint32_t * pIndex)
254 {
255        q7_t maxVal, out;                              /* Temporary variables to store the output value. */
256         uint32_t blkCnt, outIndex;                     /* Loop counter */
257 
258 
259   /* Initialise index value to zero. */
260   outIndex = 0U;
261   /* Load first input value that act as reference value for comparision */
262   out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
263   pSrc++;
264 
265   /* Initialize blkCnt with number of samples */
266   blkCnt = (blockSize - 1U);
267 
268   while (blkCnt > 0U)
269   {
270     /* Initialize maxVal to the next consecutive values one by one */
271     maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
272     pSrc++;
273 
274     /* compare for the maximum value */
275     if (out < maxVal)
276     {
277       /* Update the maximum value and it's index */
278       out = maxVal;
279       outIndex = blockSize - blkCnt;
280     }
281 
282     /* Decrement loop counter */
283     blkCnt--;
284   }
285 
286   /* Store the maximum value and it's index into destination pointers */
287   *pResult = out;
288   *pIndex = outIndex;
289 }
290 #endif /* defined(ARM_MATH_DSP) */
291 #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
292 /**
293   @} end of AbsMax group
294  */
295