1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_clip_q7.c
4  * Description:  Floating-point vector addition
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 
36 /**
37   @addtogroup BasicClip
38   @{
39  */
40 
41 /**
42   @brief         Elementwise fixed-point clipping
43   @param[in]     pSrc          points to input values
44   @param[out]    pDst          points to output clipped values
45   @param[in]     low           lower bound
46   @param[in]     high          higher bound
47   @param[in]     numSamples    number of samples to clip
48  */
49 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
50 
51 #include "arm_helium_utils.h"
arm_clip_q7(const q7_t * pSrc,q7_t * pDst,q7_t low,q7_t high,uint32_t numSamples)52 ARM_DSP_ATTRIBUTE void arm_clip_q7(const q7_t * pSrc,
53   q7_t * pDst,
54   q7_t low,
55   q7_t high,
56   uint32_t numSamples)
57 {
58     uint32_t  blkCnt;
59     q7x16_t curVec0, curVec1;
60     q7x16_t vecLow, vecHigh;
61 
62     vecLow = vdupq_n_s8(low);
63     vecHigh = vdupq_n_s8(high);
64 
65     curVec0 = vld1q(pSrc);
66     pSrc += 16;
67     /*
68      * unrolled x 2 to allow
69      * vldr/vstr/vmin/vmax
70      * stall free interleaving
71      */
72     blkCnt = numSamples >> 5;
73     while (blkCnt--)
74     {
75         curVec0 = vmaxq(curVec0, vecLow);
76         curVec1 = vld1q(pSrc);
77         pSrc += 16;
78         curVec0 = vminq(curVec0, vecHigh);
79         vst1q(pDst, curVec0);
80         pDst += 16;
81         curVec1 = vmaxq(curVec1, vecLow);
82         curVec0 = vld1q(pSrc);
83         pSrc += 16;
84         curVec1 = vminq(curVec1, vecHigh);
85         vst1q(pDst, curVec1);
86         pDst += 16;
87     }
88     /*
89      * Tail handling
90      */
91     blkCnt = numSamples - ((numSamples >> 5) << 5);
92     if (blkCnt >= 16)
93     {
94         curVec0 = vmaxq(curVec0, vecLow);
95         curVec0 = vminq(curVec0, vecHigh);
96         vst1q(pDst, curVec0);
97         pDst += 16;
98         curVec0 = vld1q(pSrc);
99         pSrc += 16;
100     }
101 
102     if (blkCnt > 0)
103     {
104         mve_pred16_t p0 = vctp8q(blkCnt & 0xf);
105         curVec0 = vmaxq(curVec0, vecLow);
106         curVec0 = vminq(curVec0, vecHigh);
107         vstrbq_p(pDst, curVec0, p0);
108     }
109 }
110 
111 #else
arm_clip_q7(const q7_t * pSrc,q7_t * pDst,q7_t low,q7_t high,uint32_t numSamples)112 ARM_DSP_ATTRIBUTE void arm_clip_q7(const q7_t * pSrc,
113   q7_t * pDst,
114   q7_t low,
115   q7_t high,
116   uint32_t numSamples)
117 {
118     uint32_t i;
119     for (i = 0; i < numSamples; i++)
120     {
121         if (pSrc[i] > high)
122             pDst[i] = high;
123         else if (pSrc[i] < low)
124             pDst[i] = low;
125         else
126             pDst[i] = pSrc[i];
127     }
128 }
129 #endif /* defined(ARM_MATH_MVEI) */
130 
131 /**
132   @} end of BasicClip group
133  */
134