1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_clip_f32.c
4  * Description:  Floating-point vector addition
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 /**
36   @defgroup BasicClip Elementwise clipping
37 
38   Element-by-element clipping of a value.
39 
40   The value is constrained between 2 bounds.
41 
42   There are separate functions for floating-point, Q7, Q15, and Q31 data types.
43  */
44 
45 /**
46   @addtogroup BasicClip
47   @{
48  */
49 
50 /**
51   @brief         Elementwise floating-point clipping
52   @param[in]     pSrc          points to input values
53   @param[out]    pDst          points to output clipped values
54   @param[in]     low           lower bound
55   @param[in]     high          higher bound
56   @param[in]     numSamples    number of samples to clip
57  */
58 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
59 
60 #include "arm_helium_utils.h"
61 
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)62 ARM_DSP_ATTRIBUTE void arm_clip_f32(const float32_t * pSrc,
63   float32_t * pDst,
64   float32_t low,
65   float32_t high,
66   uint32_t numSamples)
67 {
68     uint32_t  blkCnt;
69     f32x4_t curVec0, curVec1;
70     f32x4_t vecLow, vecHigh;
71 
72     vecLow = vdupq_n_f32(low);
73     vecHigh = vdupq_n_f32(high);
74 
75     curVec0 = vld1q(pSrc);
76     pSrc += 4;
77     /*
78      * unrolled x 2 to allow
79      * vldr/vstr/vmin/vmax
80      * stall free interleaving
81      */
82     blkCnt = numSamples >> 3;
83     while (blkCnt--)
84     {
85         curVec0 = vmaxnmq(curVec0, vecLow);
86         curVec1 = vld1q(pSrc);
87         pSrc += 4;
88         curVec0 = vminnmq(curVec0, vecHigh);
89         vst1q(pDst, curVec0);
90         pDst += 4;
91         curVec1 = vmaxnmq(curVec1, vecLow);
92         curVec0 = vld1q(pSrc);
93         pSrc += 4;
94         curVec1 = vminnmq(curVec1, vecHigh);
95         vst1q(pDst, curVec1);
96         pDst += 4;
97     }
98     /*
99      * Tail handling
100      */
101     blkCnt = numSamples - ((numSamples >> 3) << 3);
102     if (blkCnt >= 4)
103     {
104         curVec0 = vmaxnmq(curVec0, vecLow);
105         curVec0 = vminnmq(curVec0, vecHigh);
106         vst1q(pDst, curVec0);
107         pDst += 4;
108         curVec0 = vld1q(pSrc);
109         pSrc += 4;
110     }
111 
112     if (blkCnt > 0)
113     {
114         mve_pred16_t p0 = vctp32q(blkCnt & 3);
115         curVec0 = vmaxnmq(curVec0, vecLow);
116         curVec0 = vminnmq(curVec0, vecHigh);
117         vstrwq_p(pDst, curVec0, p0);
118     }
119 }
120 
121 #else
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)122 ARM_DSP_ATTRIBUTE void arm_clip_f32(const float32_t * pSrc,
123   float32_t * pDst,
124   float32_t low,
125   float32_t high,
126   uint32_t numSamples)
127 {
128     uint32_t i;
129     for (i = 0; i < numSamples; i++)
130     {
131         if (pSrc[i] > high)
132             pDst[i] = high;
133         else if (pSrc[i] < low)
134             pDst[i] = low;
135         else
136             pDst[i] = pSrc[i];
137     }
138 }
139 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
140 
141 /**
142   @} end of BasicClip group
143  */
144