1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_absmin_q7.c
4 * Description: Minimum value of absolute values of a Q7 vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35
36 /**
37 @addtogroup AbsMin
38 @{
39 */
40
41 /**
42 @brief Minimum value of absolute values of a Q7 vector.
43 @param[in] pSrc points to the input vector
44 @param[in] blockSize number of samples in input vector
45 @param[out] pResult minimum value returned here
46 @param[out] pIndex index of minimum value returned here
47 @return none
48 */
49 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
50
51 #include <stdint.h>
52 #include "arm_helium_utils.h"
53
54 #define MAX_BLKSZ_S8 (UINT8_MAX+1)
55
arm_small_blk_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)56 static void arm_small_blk_absmin_q7(
57 const q7_t *pSrc,
58 uint32_t blockSize,
59 q7_t *pResult,
60 uint32_t *pIndex)
61 {
62 uint16_t blkCnt; /* loop counters */
63 q7x16_t vecSrc;
64 q7_t const *pSrcVec;
65 q7x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
66 q7_t minValue = Q7_ABSMAX;
67 uint16_t idx = blockSize - 1;
68 uint8x16_t indexVec;
69 uint8x16_t curExtremIdxVec;
70 uint32_t startIdx = 0;
71 mve_pred16_t p0;
72
73
74 indexVec = vidupq_wb_u8(&startIdx, 1);
75 curExtremIdxVec = vdupq_n_u8(0);
76
77 pSrcVec = (q7_t const *) pSrc;
78 blkCnt = blockSize >> 4;
79 while (blkCnt > 0U)
80 {
81 vecSrc = vld1q(pSrcVec);
82 pSrcVec += 16;
83 vecSrc = vabsq(vecSrc);
84 /*
85 * Get current min per lane and current index per lane
86 * when a min is selected
87 */
88 p0 = vcmpleq(vecSrc, curExtremValVec);
89 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
90 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
91
92 indexVec = vidupq_wb_u8(&startIdx, 1);
93 /*
94 * Decrement the blockSize loop counter
95 */
96 blkCnt--;
97 }
98 /*
99 * tail
100 * (will be merged thru tail predication)
101 */
102 blkCnt = blockSize & 0xF;
103 if (blkCnt > 0U)
104 {
105 vecSrc = vld1q(pSrcVec);
106 pSrcVec += 16;
107 vecSrc = vabsq(vecSrc);
108
109 p0 = vctp8q(blkCnt);
110 /*
111 * Get current min per lane and current index per lane
112 * when a min is selected
113 */
114 p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
115 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
116 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
117 }
118 /*
119 * Get min value across the vector
120 */
121 minValue = vminvq(minValue, curExtremValVec);
122 /*
123 * set index for lower values to min possible index
124 */
125 p0 = vcmpleq(curExtremValVec, minValue);
126 idx = vminvq_p_u8(idx, curExtremIdxVec, p0);
127 /*
128 * Save result
129 */
130 *pIndex = idx;
131 *pResult = minValue;
132 }
133
134
arm_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)135 void arm_absmin_q7(
136 const q7_t * pSrc,
137 uint32_t blockSize,
138 q7_t * pResult,
139 uint32_t * pIndex)
140 {
141 int32_t totalSize = blockSize;
142
143 if (totalSize <= MAX_BLKSZ_S8)
144 {
145 arm_small_blk_absmin_q7(pSrc, blockSize, pResult, pIndex);
146 }
147 else
148 {
149 uint32_t curIdx = 0;
150 q7_t curBlkExtr = Q7_MAX;
151 uint32_t curBlkPos = 0;
152 uint32_t curBlkIdx = 0;
153 /*
154 * process blocks of 255 elts
155 */
156 while (totalSize >= MAX_BLKSZ_S8)
157 {
158 const q7_t *curSrc = pSrc;
159
160 arm_small_blk_absmin_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
161 if (*pResult < curBlkExtr)
162 {
163 /*
164 * update partial extrema
165 */
166 curBlkExtr = *pResult;
167 curBlkPos = *pIndex;
168 curBlkIdx = curIdx;
169 }
170 curIdx++;
171 pSrc += MAX_BLKSZ_S8;
172 totalSize -= MAX_BLKSZ_S8;
173 }
174 /*
175 * remainder
176 */
177 arm_small_blk_absmin_q7(pSrc, totalSize, pResult, pIndex);
178 if (*pResult < curBlkExtr)
179 {
180 curBlkExtr = *pResult;
181 curBlkPos = *pIndex;
182 curBlkIdx = curIdx;
183 }
184 *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
185 *pResult = curBlkExtr;
186 }
187 }
188
189 #else
190 #if defined(ARM_MATH_DSP)
arm_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)191 void arm_absmin_q7(
192 const q7_t * pSrc,
193 uint32_t blockSize,
194 q7_t * pResult,
195 uint32_t * pIndex)
196 {
197 q7_t cur_absmin, out; /* Temporary variables to store the output value. */\
198 uint32_t blkCnt, outIndex; /* Loop counter */ \
199 uint32_t index; /* index of maximum value */ \
200 \
201 /* Initialize index value to zero. */ \
202 outIndex = 0U; \
203 /* Load first input value that act as reference value for comparision */ \
204 out = *pSrc++; \
205 out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
206 /* Initialize index of extrema value. */ \
207 index = 0U; \
208 \
209 /* Loop unrolling: Compute 4 outputs at a time */ \
210 blkCnt = (blockSize - 1U) >> 2U; \
211 \
212 while (blkCnt > 0U) \
213 { \
214 /* Initialize cur_absmin to next consecutive values one by one */ \
215 cur_absmin = *pSrc++; \
216 cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
217 /* compare for the extrema value */ \
218 if (cur_absmin < out) \
219 { \
220 /* Update the extrema value and it's index */ \
221 out = cur_absmin; \
222 outIndex = index + 1U; \
223 } \
224 \
225 cur_absmin = *pSrc++; \
226 cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
227 if (cur_absmin < out) \
228 { \
229 out = cur_absmin; \
230 outIndex = index + 2U; \
231 } \
232 \
233 cur_absmin = *pSrc++; \
234 cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
235 if (cur_absmin < out) \
236 { \
237 out = cur_absmin; \
238 outIndex = index + 3U; \
239 } \
240 \
241 cur_absmin = *pSrc++; \
242 cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
243 if (cur_absmin < out) \
244 { \
245 out = cur_absmin; \
246 outIndex = index + 4U; \
247 } \
248 \
249 index += 4U; \
250 \
251 /* Decrement loop counter */ \
252 blkCnt--; \
253 } \
254 \
255 /* Loop unrolling: Compute remaining outputs */ \
256 blkCnt = (blockSize - 1U) % 4U; \
257 \
258 \
259 while (blkCnt > 0U) \
260 { \
261 cur_absmin = *pSrc++; \
262 cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
263 if (cur_absmin < out) \
264 { \
265 out = cur_absmin; \
266 outIndex = blockSize - blkCnt; \
267 } \
268 \
269 /* Decrement loop counter */ \
270 blkCnt--; \
271 } \
272 \
273 /* Store the extrema value and it's index into destination pointers */ \
274 *pResult = out; \
275 *pIndex = outIndex;
276 }
277 #else
arm_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)278 void arm_absmin_q7(
279 const q7_t * pSrc,
280 uint32_t blockSize,
281 q7_t * pResult,
282 uint32_t * pIndex)
283 {
284 q7_t minVal, out; /* Temporary variables to store the output value. */
285 uint32_t blkCnt, outIndex; /* Loop counter */
286
287 /* Initialise index value to zero. */
288 outIndex = 0U;
289 /* Load first input value that act as reference value for comparision */
290 out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
291 pSrc++;
292
293 /* Initialize blkCnt with number of samples */
294 blkCnt = (blockSize - 1U);
295
296 while (blkCnt > 0U)
297 {
298 /* Initialize minVal to the next consecutive values one by one */
299 minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
300 pSrc++;
301
302 /* compare for the minimum value */
303 if (out > minVal)
304 {
305 /* Update the minimum value and it's index */
306 out = minVal;
307 outIndex = blockSize - blkCnt;
308 }
309
310 /* Decrement loop counter */
311 blkCnt--;
312 }
313
314 /* Store the minimum value and it's index into destination pointers */
315 *pResult = out;
316 *pIndex = outIndex;
317 }
318 #endif /* defined(ARM_MATH_DSP) */
319 #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
320 /**
321 @} end of AbsMin group
322 */
323