1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_absmax_q7.c
4 * Description: Maximum value of absolute values of a Q7 vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35 /**
36 @addtogroup AbsMax
37 @{
38 */
39
40 /**
41 @brief Maximum value of absolute values of a Q7 vector.
42 @param[in] pSrc points to the input vector
43 @param[in] blockSize number of samples in input vector
44 @param[out] pResult maximum value returned here
45 @param[out] pIndex index of maximum value returned here
46 */
47
48 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
49 #pragma message "Scalar version of arm_absmax_q7 built. Helium version has build issues with gcc."
50 #endif
51
52
53 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
54
55 #include <stdint.h>
56 #include "arm_helium_utils.h"
57
58 #define MAX_BLKSZ_S8 (UINT8_MAX+1)
59
arm_small_blk_absmax_q7(const q7_t * pSrc,uint16_t blockSize,q7_t * pResult,uint32_t * pIndex)60 static void arm_small_blk_absmax_q7(
61 const q7_t * pSrc,
62 uint16_t blockSize,
63 q7_t * pResult,
64 uint32_t * pIndex)
65 {
66 int32_t blkCnt; /* loop counters */
67 q7x16_t extremValVec = vdupq_n_s8(Q7_ABSMIN);
68 q7_t maxValue = Q7_ABSMIN;
69 uint8x16_t indexVec;
70 uint8x16_t extremIdxVec;
71 mve_pred16_t p0;
72 uint8_t extremIdxArr[16];
73
74 indexVec = vidupq_u8(0U, 1);
75
76 blkCnt = blockSize;
77 do {
78 mve_pred16_t p = vctp8q(blkCnt);
79 q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
80
81 extremIdxVal = vqabsq(extremIdxVal);
82 /*
83 * Get current max per lane and current index per lane
84 * when a max is selected
85 */
86 p0 = vcmpgtq_m(extremIdxVal, extremValVec, p);
87
88 extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
89 /* store per-lane extrema indexes */
90 vst1q_p_u8(extremIdxArr, indexVec, p0);
91
92 indexVec += 16;
93 pSrc += 16;
94 blkCnt -= 16;
95 }
96 while (blkCnt > 0);
97
98
99 /* Get max value across the vector */
100 maxValue = vmaxvq(maxValue, extremValVec);
101
102 /* set index for lower values to max possible index */
103 p0 = vcmpgeq(extremValVec, maxValue);
104 extremIdxVec = vld1q_u8(extremIdxArr);
105
106 indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
107 *pIndex = vminvq_u8(blockSize - 1, indexVec);
108 *pResult = maxValue;
109 }
110
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)111 ARM_DSP_ATTRIBUTE void arm_absmax_q7(
112 const q7_t * pSrc,
113 uint32_t blockSize,
114 q7_t * pResult,
115 uint32_t * pIndex)
116 {
117 int32_t totalSize = blockSize;
118
119 if (totalSize <= MAX_BLKSZ_S8)
120 {
121 arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex);
122 }
123 else
124 {
125 uint32_t curIdx = 0;
126 q7_t curBlkExtr = Q7_MIN;
127 uint32_t curBlkPos = 0;
128 uint32_t curBlkIdx = 0;
129 /*
130 * process blocks of 255 elts
131 */
132 while (totalSize >= MAX_BLKSZ_S8)
133 {
134 const q7_t *curSrc = pSrc;
135
136 arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
137 if (*pResult > curBlkExtr)
138 {
139 /*
140 * update partial extrema
141 */
142 curBlkExtr = *pResult;
143 curBlkPos = *pIndex;
144 curBlkIdx = curIdx;
145 }
146 curIdx++;
147 pSrc += MAX_BLKSZ_S8;
148 totalSize -= MAX_BLKSZ_S8;
149 }
150 /*
151 * remainder
152 */
153 arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex);
154 if (*pResult > curBlkExtr)
155 {
156 curBlkExtr = *pResult;
157 curBlkPos = *pIndex;
158 curBlkIdx = curIdx;
159 }
160 *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
161 *pResult = curBlkExtr;
162 }
163 }
164 #else
165 #if defined(ARM_MATH_DSP)
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)166 ARM_DSP_ATTRIBUTE void arm_absmax_q7(
167 const q7_t * pSrc,
168 uint32_t blockSize,
169 q7_t * pResult,
170 uint32_t * pIndex)
171 {
172 q7_t cur_absmax, out; /* Temporary variables to store the output value. */\
173 uint32_t blkCnt, outIndex; /* Loop counter */ \
174 uint32_t index; /* index of maximum value */ \
175 \
176 /* Initialize index value to zero. */ \
177 outIndex = 0U; \
178 /* Load first input value that act as reference value for comparision */ \
179 out = *pSrc++; \
180 out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
181 /* Initialize index of extrema value. */ \
182 index = 0U; \
183 \
184 /* Loop unrolling: Compute 4 outputs at a time */ \
185 blkCnt = (blockSize - 1U) >> 2U; \
186 \
187 while (blkCnt > 0U) \
188 { \
189 /* Initialize cur_absmax to next consecutive values one by one */ \
190 cur_absmax = *pSrc++; \
191 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
192 /* compare for the extrema value */ \
193 if (cur_absmax > out) \
194 { \
195 /* Update the extrema value and it's index */ \
196 out = cur_absmax; \
197 outIndex = index + 1U; \
198 } \
199 \
200 cur_absmax = *pSrc++; \
201 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
202 if (cur_absmax > out) \
203 { \
204 out = cur_absmax; \
205 outIndex = index + 2U; \
206 } \
207 \
208 cur_absmax = *pSrc++; \
209 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
210 if (cur_absmax > out) \
211 { \
212 out = cur_absmax; \
213 outIndex = index + 3U; \
214 } \
215 \
216 cur_absmax = *pSrc++; \
217 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
218 if (cur_absmax > out) \
219 { \
220 out = cur_absmax; \
221 outIndex = index + 4U; \
222 } \
223 \
224 index += 4U; \
225 \
226 /* Decrement loop counter */ \
227 blkCnt--; \
228 } \
229 \
230 /* Loop unrolling: Compute remaining outputs */ \
231 blkCnt = (blockSize - 1U) % 4U; \
232 \
233 \
234 while (blkCnt > 0U) \
235 { \
236 cur_absmax = *pSrc++; \
237 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
238 if (cur_absmax > out) \
239 { \
240 out = cur_absmax; \
241 outIndex = blockSize - blkCnt; \
242 } \
243 \
244 /* Decrement loop counter */ \
245 blkCnt--; \
246 } \
247 \
248 /* Store the extrema value and it's index into destination pointers */ \
249 *pResult = out; \
250 *pIndex = outIndex;
251 }
252 #else
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)253 ARM_DSP_ATTRIBUTE void arm_absmax_q7(
254 const q7_t * pSrc,
255 uint32_t blockSize,
256 q7_t * pResult,
257 uint32_t * pIndex)
258 {
259 q7_t maxVal, out; /* Temporary variables to store the output value. */
260 uint32_t blkCnt, outIndex; /* Loop counter */
261
262
263 /* Initialise index value to zero. */
264 outIndex = 0U;
265 /* Load first input value that act as reference value for comparision */
266 out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
267 pSrc++;
268
269 /* Initialize blkCnt with number of samples */
270 blkCnt = (blockSize - 1U);
271
272 while (blkCnt > 0U)
273 {
274 /* Initialize maxVal to the next consecutive values one by one */
275 maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
276 pSrc++;
277
278 /* compare for the maximum value */
279 if (out < maxVal)
280 {
281 /* Update the maximum value and it's index */
282 out = maxVal;
283 outIndex = blockSize - blkCnt;
284 }
285
286 /* Decrement loop counter */
287 blkCnt--;
288 }
289
290 /* Store the maximum value and it's index into destination pointers */
291 *pResult = out;
292 *pIndex = outIndex;
293 }
294 #endif /* defined(ARM_MATH_DSP) */
295 #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
296 /**
297 @} end of AbsMax group
298 */
299