1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_clip_q7.c
4  * Description:  Floating-point vector addition
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 
36 /**
37   @addtogroup BasicClip
38   @{
39  */
40 
41 /**
42   @brief         Elementwise fixed-point clipping
43   @param[in]     pSrc          points to input values
44   @param[out]    pDst          points to output clipped values
45   @param[in]     low           lower bound
46   @param[in]     high          higher bound
47   @param[in]     numSamples    number of samples to clip
48   @return        none
49  */
50 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
52 #include "arm_helium_utils.h"
arm_clip_q7(const q7_t * pSrc,q7_t * pDst,q7_t low,q7_t high,uint32_t numSamples)53 void arm_clip_q7(const q7_t * pSrc,
54   q7_t * pDst,
55   q7_t low,
56   q7_t high,
57   uint32_t numSamples)
58 {
59     uint32_t  blkCnt;
60     q7x16_t curVec0, curVec1;
61     q7x16_t vecLow, vecHigh;
62 
63     vecLow = vdupq_n_s8(low);
64     vecHigh = vdupq_n_s8(high);
65 
66     curVec0 = vld1q(pSrc);
67     pSrc += 16;
68     /*
69      * unrolled x 2 to allow
70      * vldr/vstr/vmin/vmax
71      * stall free interleaving
72      */
73     blkCnt = numSamples >> 5;
74     while (blkCnt--)
75     {
76         curVec0 = vmaxq(curVec0, vecLow);
77         curVec1 = vld1q(pSrc);
78         pSrc += 16;
79         curVec0 = vminq(curVec0, vecHigh);
80         vst1q(pDst, curVec0);
81         pDst += 16;
82         curVec1 = vmaxq(curVec1, vecLow);
83         curVec0 = vld1q(pSrc);
84         pSrc += 16;
85         curVec1 = vminq(curVec1, vecHigh);
86         vst1q(pDst, curVec1);
87         pDst += 16;
88     }
89     /*
90      * Tail handling
91      */
92     blkCnt = numSamples - ((numSamples >> 5) << 5);
93     if (blkCnt >= 16)
94     {
95         curVec0 = vmaxq(curVec0, vecLow);
96         curVec0 = vminq(curVec0, vecHigh);
97         vst1q(pDst, curVec0);
98         pDst += 16;
99         curVec0 = vld1q(pSrc);
100         pSrc += 16;
101     }
102 
103     if (blkCnt > 0)
104     {
105         mve_pred16_t p0 = vctp8q(blkCnt & 0xf);
106         curVec0 = vmaxq(curVec0, vecLow);
107         curVec0 = vminq(curVec0, vecHigh);
108         vstrbq_p(pDst, curVec0, p0);
109     }
110 }
111 
112 #else
arm_clip_q7(const q7_t * pSrc,q7_t * pDst,q7_t low,q7_t high,uint32_t numSamples)113 void arm_clip_q7(const q7_t * pSrc,
114   q7_t * pDst,
115   q7_t low,
116   q7_t high,
117   uint32_t numSamples)
118 {
119     for (uint32_t i = 0; i < numSamples; i++)
120     {
121         if (pSrc[i] > high)
122             pDst[i] = high;
123         else if (pSrc[i] < low)
124             pDst[i] = low;
125         else
126             pDst[i] = pSrc[i];
127     }
128 }
129 #endif /* defined(ARM_MATH_MVEI) */
130 
131 /**
132   @} end of BasicClip group
133  */
134