1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_absmax_q7.c
4 * Description: Maximum value of absolute values of a Q7 vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35 /**
36 @addtogroup AbsMax
37 @{
38 */
39
40 /**
41 @brief Maximum value of absolute values of a Q7 vector.
42 @param[in] pSrc points to the input vector
43 @param[in] blockSize number of samples in input vector
44 @param[out] pResult maximum value returned here
45 @param[out] pIndex index of maximum value returned here
46 @return none
47 */
48
49 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
50
51 #include <stdint.h>
52 #include "arm_helium_utils.h"
53
54 #define MAX_BLKSZ_S8 (UINT8_MAX+1)
55
arm_small_blk_absmax_q7(const q7_t * pSrc,uint16_t blockSize,q7_t * pResult,uint32_t * pIndex)56 static void arm_small_blk_absmax_q7(
57 const q7_t * pSrc,
58 uint16_t blockSize,
59 q7_t * pResult,
60 uint32_t * pIndex)
61 {
62 int32_t blkCnt; /* loop counters */
63 q7x16_t extremValVec = vdupq_n_s8(Q7_ABSMIN);
64 q7_t maxValue = Q7_ABSMIN;
65 uint8x16_t indexVec;
66 uint8x16_t extremIdxVec;
67 mve_pred16_t p0;
68 uint8_t extremIdxArr[16];
69
70 indexVec = vidupq_u8(0U, 1);
71
72 blkCnt = blockSize;
73 do {
74 mve_pred16_t p = vctp8q(blkCnt);
75 q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
76
77 extremIdxVal = vabsq(extremIdxVal);
78 /*
79 * Get current max per lane and current index per lane
80 * when a max is selected
81 */
82 p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
83
84 extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
85 /* store per-lane extrema indexes */
86 vst1q_p_u8(extremIdxArr, indexVec, p0);
87
88 indexVec += 16;
89 pSrc += 16;
90 blkCnt -= 16;
91 }
92 while (blkCnt > 0);
93
94
95 /* Get max value across the vector */
96 maxValue = vmaxvq(maxValue, extremValVec);
97
98 /* set index for lower values to max possible index */
99 p0 = vcmpgeq(extremValVec, maxValue);
100 extremIdxVec = vld1q_u8(extremIdxArr);
101
102 indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
103 *pIndex = vminvq_u8(blockSize - 1, indexVec);
104 *pResult = maxValue;
105 }
106
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)107 void arm_absmax_q7(
108 const q7_t * pSrc,
109 uint32_t blockSize,
110 q7_t * pResult,
111 uint32_t * pIndex)
112 {
113 int32_t totalSize = blockSize;
114
115 if (totalSize <= MAX_BLKSZ_S8)
116 {
117 arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex);
118 }
119 else
120 {
121 uint32_t curIdx = 0;
122 q7_t curBlkExtr = Q7_MIN;
123 uint32_t curBlkPos = 0;
124 uint32_t curBlkIdx = 0;
125 /*
126 * process blocks of 255 elts
127 */
128 while (totalSize >= MAX_BLKSZ_S8)
129 {
130 const q7_t *curSrc = pSrc;
131
132 arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
133 if (*pResult > curBlkExtr)
134 {
135 /*
136 * update partial extrema
137 */
138 curBlkExtr = *pResult;
139 curBlkPos = *pIndex;
140 curBlkIdx = curIdx;
141 }
142 curIdx++;
143 pSrc += MAX_BLKSZ_S8;
144 totalSize -= MAX_BLKSZ_S8;
145 }
146 /*
147 * remainder
148 */
149 arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex);
150 if (*pResult > curBlkExtr)
151 {
152 curBlkExtr = *pResult;
153 curBlkPos = *pIndex;
154 curBlkIdx = curIdx;
155 }
156 *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
157 *pResult = curBlkExtr;
158 }
159 }
160 #else
161 #if defined(ARM_MATH_DSP)
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)162 void arm_absmax_q7(
163 const q7_t * pSrc,
164 uint32_t blockSize,
165 q7_t * pResult,
166 uint32_t * pIndex)
167 {
168 q7_t cur_absmax, out; /* Temporary variables to store the output value. */\
169 uint32_t blkCnt, outIndex; /* Loop counter */ \
170 uint32_t index; /* index of maximum value */ \
171 \
172 /* Initialize index value to zero. */ \
173 outIndex = 0U; \
174 /* Load first input value that act as reference value for comparision */ \
175 out = *pSrc++; \
176 out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
177 /* Initialize index of extrema value. */ \
178 index = 0U; \
179 \
180 /* Loop unrolling: Compute 4 outputs at a time */ \
181 blkCnt = (blockSize - 1U) >> 2U; \
182 \
183 while (blkCnt > 0U) \
184 { \
185 /* Initialize cur_absmax to next consecutive values one by one */ \
186 cur_absmax = *pSrc++; \
187 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
188 /* compare for the extrema value */ \
189 if (cur_absmax > out) \
190 { \
191 /* Update the extrema value and it's index */ \
192 out = cur_absmax; \
193 outIndex = index + 1U; \
194 } \
195 \
196 cur_absmax = *pSrc++; \
197 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
198 if (cur_absmax > out) \
199 { \
200 out = cur_absmax; \
201 outIndex = index + 2U; \
202 } \
203 \
204 cur_absmax = *pSrc++; \
205 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
206 if (cur_absmax > out) \
207 { \
208 out = cur_absmax; \
209 outIndex = index + 3U; \
210 } \
211 \
212 cur_absmax = *pSrc++; \
213 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
214 if (cur_absmax > out) \
215 { \
216 out = cur_absmax; \
217 outIndex = index + 4U; \
218 } \
219 \
220 index += 4U; \
221 \
222 /* Decrement loop counter */ \
223 blkCnt--; \
224 } \
225 \
226 /* Loop unrolling: Compute remaining outputs */ \
227 blkCnt = (blockSize - 1U) % 4U; \
228 \
229 \
230 while (blkCnt > 0U) \
231 { \
232 cur_absmax = *pSrc++; \
233 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
234 if (cur_absmax > out) \
235 { \
236 out = cur_absmax; \
237 outIndex = blockSize - blkCnt; \
238 } \
239 \
240 /* Decrement loop counter */ \
241 blkCnt--; \
242 } \
243 \
244 /* Store the extrema value and it's index into destination pointers */ \
245 *pResult = out; \
246 *pIndex = outIndex;
247 }
248 #else
arm_absmax_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)249 void arm_absmax_q7(
250 const q7_t * pSrc,
251 uint32_t blockSize,
252 q7_t * pResult,
253 uint32_t * pIndex)
254 {
255 q7_t maxVal, out; /* Temporary variables to store the output value. */
256 uint32_t blkCnt, outIndex; /* Loop counter */
257
258
259 /* Initialise index value to zero. */
260 outIndex = 0U;
261 /* Load first input value that act as reference value for comparision */
262 out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
263 pSrc++;
264
265 /* Initialize blkCnt with number of samples */
266 blkCnt = (blockSize - 1U);
267
268 while (blkCnt > 0U)
269 {
270 /* Initialize maxVal to the next consecutive values one by one */
271 maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
272 pSrc++;
273
274 /* compare for the maximum value */
275 if (out < maxVal)
276 {
277 /* Update the maximum value and it's index */
278 out = maxVal;
279 outIndex = blockSize - blkCnt;
280 }
281
282 /* Decrement loop counter */
283 blkCnt--;
284 }
285
286 /* Store the maximum value and it's index into destination pointers */
287 *pResult = out;
288 *pIndex = outIndex;
289 }
290 #endif /* defined(ARM_MATH_DSP) */
291 #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
292 /**
293 @} end of AbsMax group
294 */
295