1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_absmin_q7.c
4  * Description:  Minimum value of absolute values of a Q7 vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 /**
32   @ingroup groupStats
33  */
34 
35 
36 /**
37   @addtogroup AbsMin
38   @{
39  */
40 
41 /**
42   @brief         Minimum value of absolute values of a Q7 vector.
43   @param[in]     pSrc       points to the input vector
44   @param[in]     blockSize  number of samples in input vector
45   @param[out]    pResult    minimum value returned here
46   @param[out]    pIndex     index of minimum value returned here
47  */
48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
49 
50 #include <stdint.h>
51 #include "arm_helium_utils.h"
52 
53 #define MAX_BLKSZ_S8  (UINT8_MAX+1)
54 
arm_small_blk_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)55 static void arm_small_blk_absmin_q7(
56     const q7_t        *pSrc,
57     uint32_t     blockSize,
58     q7_t        *pResult,
59     uint32_t    *pIndex)
60 {
61     uint16_t        blkCnt;           /* loop counters */
62     q7x16_t       vecSrc;
63     q7_t const   *pSrcVec;
64     q7x16_t       curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
65     q7_t           minValue = Q7_ABSMAX;
66     uint16_t       idx = blockSize - 1;
67     uint8x16_t    indexVec;
68     uint8x16_t    curExtremIdxVec;
69     uint32_t       startIdx = 0;
70     mve_pred16_t   p0;
71 
72 
73     indexVec = vidupq_wb_u8(&startIdx, 1);
74     curExtremIdxVec = vdupq_n_u8(0);
75 
76     pSrcVec = (q7_t const *) pSrc;
77     blkCnt = blockSize >> 4;
78     while (blkCnt > 0U)
79     {
80         vecSrc = vld1q(pSrcVec);
81         pSrcVec += 16;
82         vecSrc = vabsq(vecSrc);
83         /*
84          * Get current min per lane and current index per lane
85          * when a min is selected
86          */
87         p0 = vcmpleq(vecSrc, curExtremValVec);
88         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
89         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
90 
91         indexVec = vidupq_wb_u8(&startIdx, 1);
92         /*
93          * Decrement the blockSize loop counter
94          */
95         blkCnt--;
96     }
97     /*
98      * tail
99      * (will be merged thru tail predication)
100      */
101     blkCnt = blockSize & 0xF;
102     if (blkCnt > 0U)
103     {
104         vecSrc = vld1q(pSrcVec);
105         pSrcVec += 16;
106         vecSrc = vabsq(vecSrc);
107 
108         p0 = vctp8q(blkCnt);
109         /*
110          * Get current min per lane and current index per lane
111          * when a min is selected
112          */
113         p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
114         curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
115         curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
116     }
117     /*
118      * Get min value across the vector
119      */
120     minValue = vminvq(minValue, curExtremValVec);
121     /*
122      * set index for lower values to min possible index
123      */
124     p0 = vcmpleq(curExtremValVec, minValue);
125     idx = vminvq_p_u8(idx, curExtremIdxVec, p0);
126     /*
127      * Save result
128      */
129     *pIndex = idx;
130     *pResult = minValue;
131 }
132 
133 
arm_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)134 ARM_DSP_ATTRIBUTE void arm_absmin_q7(
135   const q7_t * pSrc,
136         uint32_t blockSize,
137         q7_t * pResult,
138         uint32_t * pIndex)
139 {
140      int32_t   totalSize = blockSize;
141 
142     if (totalSize <= MAX_BLKSZ_S8)
143     {
144         arm_small_blk_absmin_q7(pSrc, blockSize, pResult, pIndex);
145     }
146     else
147     {
148         uint32_t  curIdx = 0;
149         q7_t      curBlkExtr = Q7_MAX;
150         uint32_t  curBlkPos = 0;
151         uint32_t  curBlkIdx = 0;
152         /*
153          * process blocks of 255 elts
154          */
155         while (totalSize >= MAX_BLKSZ_S8)
156         {
157             const q7_t     *curSrc = pSrc;
158 
159             arm_small_blk_absmin_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
160             if (*pResult < curBlkExtr)
161             {
162                 /*
163                  * update partial extrema
164                  */
165                 curBlkExtr = *pResult;
166                 curBlkPos = *pIndex;
167                 curBlkIdx = curIdx;
168             }
169             curIdx++;
170             pSrc += MAX_BLKSZ_S8;
171             totalSize -= MAX_BLKSZ_S8;
172         }
173         /*
174          * remainder
175          */
176         arm_small_blk_absmin_q7(pSrc, totalSize, pResult, pIndex);
177         if (*pResult < curBlkExtr)
178         {
179             curBlkExtr = *pResult;
180             curBlkPos = *pIndex;
181             curBlkIdx = curIdx;
182         }
183         *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
184         *pResult = curBlkExtr;
185     }
186 }
187 
188 #else
189 #if defined(ARM_MATH_DSP)
arm_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)190 ARM_DSP_ATTRIBUTE void arm_absmin_q7(
191   const q7_t * pSrc,
192         uint32_t blockSize,
193         q7_t * pResult,
194         uint32_t * pIndex)
195 {
196         q7_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
197         uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
198         uint32_t index;                                /* index of maximum value */                         \
199                                                                                                             \
200   /* Initialize index value to zero. */                                                                     \
201   outIndex = 0U;                                                                                            \
202   /* Load first input value that act as reference value for comparision */                                  \
203   out = *pSrc++;                                                                                            \
204   out = (out > 0) ? out : (q7_t)__QSUB8(0, out);                                                                           \
205   /* Initialize index of extrema value. */                                                                  \
206   index = 0U;                                                                                               \
207                                                                                                             \
208   /* Loop unrolling: Compute 4 outputs at a time */                                                         \
209   blkCnt = (blockSize - 1U) >> 2U;                                                                          \
210                                                                                                             \
211   while (blkCnt > 0U)                                                                                       \
212   {                                                                                                         \
213     /* Initialize cur_absmin to next consecutive values one by one */                                         \
214     cur_absmin = *pSrc++;                                                                                     \
215     cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
216     /* compare for the extrema value */                                                                     \
217     if (cur_absmin < out)                                                                         \
218     {                                                                                                       \
219       /* Update the extrema value and it's index */                                                         \
220       out = cur_absmin;                                                                                       \
221       outIndex = index + 1U;                                                                                \
222     }                                                                                                       \
223                                                                                                             \
224     cur_absmin = *pSrc++;                                                                                     \
225     cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
226     if (cur_absmin < out)                                                                         \
227     {                                                                                                       \
228       out = cur_absmin;                                                                                       \
229       outIndex = index + 2U;                                                                                \
230     }                                                                                                       \
231                                                                                                             \
232     cur_absmin = *pSrc++;                                                                                     \
233     cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
234     if (cur_absmin < out)                                                                          \
235     {                                                                                                       \
236       out = cur_absmin;                                                                                       \
237       outIndex = index + 3U;                                                                                \
238     }                                                                                                       \
239                                                                                                             \
240     cur_absmin = *pSrc++;                                                                                     \
241     cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                 \
242     if (cur_absmin < out)                                                                          \
243     {                                                                                                       \
244       out = cur_absmin;                                                                                       \
245       outIndex = index + 4U;                                                                                \
246     }                                                                                                       \
247                                                                                                             \
248     index += 4U;                                                                                            \
249                                                                                                             \
250     /* Decrement loop counter */                                                                            \
251     blkCnt--;                                                                                               \
252   }                                                                                                         \
253                                                                                                             \
254   /* Loop unrolling: Compute remaining outputs */                                                           \
255   blkCnt = (blockSize - 1U) % 4U;                                                                           \
256                                                                                                             \
257                                                                                                             \
258   while (blkCnt > 0U)                                                                                       \
259   {                                                                                                         \
260     cur_absmin = *pSrc++;                                                                                     \
261     cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                 \
262     if (cur_absmin < out)                                                                         \
263     {                                                                                                       \
264       out = cur_absmin;                                                                                       \
265       outIndex = blockSize - blkCnt;                                                                        \
266     }                                                                                                       \
267                                                                                                             \
268     /* Decrement loop counter */                                                                            \
269     blkCnt--;                                                                                               \
270   }                                                                                                         \
271                                                                                                             \
272   /* Store the extrema value and it's index into destination pointers */                                    \
273   *pResult = out;                                                                                           \
274   *pIndex = outIndex;
275 }
276 #else
arm_absmin_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)277 ARM_DSP_ATTRIBUTE void arm_absmin_q7(
278   const q7_t * pSrc,
279         uint32_t blockSize,
280         q7_t * pResult,
281         uint32_t * pIndex)
282 {
283         q7_t minVal, out;                              /* Temporary variables to store the output value. */
284         uint32_t blkCnt, outIndex;                     /* Loop counter */
285 
286   /* Initialise index value to zero. */
287   outIndex = 0U;
288   /* Load first input value that act as reference value for comparision */
289   out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
290   pSrc++;
291 
292   /* Initialize blkCnt with number of samples */
293   blkCnt = (blockSize - 1U);
294 
295   while (blkCnt > 0U)
296   {
297     /* Initialize minVal to the next consecutive values one by one */
298     minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
299     pSrc++;
300 
301     /* compare for the minimum value */
302     if (out > minVal)
303     {
304       /* Update the minimum value and it's index */
305       out = minVal;
306       outIndex = blockSize - blkCnt;
307     }
308 
309     /* Decrement loop counter */
310     blkCnt--;
311   }
312 
313   /* Store the minimum value and it's index into destination pointers */
314   *pResult = out;
315   *pIndex = outIndex;
316 }
317 #endif /* defined(ARM_MATH_DSP) */
318 #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
319 /**
320   @} end of AbsMin group
321  */
322