1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_clip_f32.c
4  * Description:  Floating-point vector addition
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 /**
36   @defgroup BasicClip Elementwise clipping
37 
38   Element-by-element clipping of a value.
39 
40   The value is constrained between 2 bounds.
41 
42   There are separate functions for floating-point, Q7, Q15, and Q31 data types.
43  */
44 
45 /**
46   @addtogroup BasicClip
47   @{
48  */
49 
50 /**
51   @brief         Elementwise floating-point clipping
52   @param[in]     pSrc          points to input values
53   @param[out]    pDst          points to output clipped values
54   @param[in]     low           lower bound
55   @param[in]     high          higher bound
56   @param[in]     numSamples    number of samples to clip
57   @return        none
58  */
59 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
60 
61 #include "arm_helium_utils.h"
62 
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)63 void arm_clip_f32(const float32_t * pSrc,
64   float32_t * pDst,
65   float32_t low,
66   float32_t high,
67   uint32_t numSamples)
68 {
69     uint32_t  blkCnt;
70     f32x4_t curVec0, curVec1;
71     f32x4_t vecLow, vecHigh;
72 
73     vecLow = vdupq_n_f32(low);
74     vecHigh = vdupq_n_f32(high);
75 
76     curVec0 = vld1q(pSrc);
77     pSrc += 4;
78     /*
79      * unrolled x 2 to allow
80      * vldr/vstr/vmin/vmax
81      * stall free interleaving
82      */
83     blkCnt = numSamples >> 3;
84     while (blkCnt--)
85     {
86         curVec0 = vmaxnmq(curVec0, vecLow);
87         curVec1 = vld1q(pSrc);
88         pSrc += 4;
89         curVec0 = vminnmq(curVec0, vecHigh);
90         vst1q(pDst, curVec0);
91         pDst += 4;
92         curVec1 = vmaxnmq(curVec1, vecLow);
93         curVec0 = vld1q(pSrc);
94         pSrc += 4;
95         curVec1 = vminnmq(curVec1, vecHigh);
96         vst1q(pDst, curVec1);
97         pDst += 4;
98     }
99     /*
100      * Tail handling
101      */
102     blkCnt = numSamples - ((numSamples >> 3) << 3);
103     if (blkCnt >= 4)
104     {
105         curVec0 = vmaxnmq(curVec0, vecLow);
106         curVec0 = vminnmq(curVec0, vecHigh);
107         vst1q(pDst, curVec0);
108         pDst += 4;
109         curVec0 = vld1q(pSrc);
110         pSrc += 4;
111     }
112 
113     if (blkCnt > 0)
114     {
115         mve_pred16_t p0 = vctp32q(blkCnt & 3);
116         curVec0 = vmaxnmq(curVec0, vecLow);
117         curVec0 = vminnmq(curVec0, vecHigh);
118         vstrwq_p(pDst, curVec0, p0);
119     }
120 }
121 
122 #else
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)123 void arm_clip_f32(const float32_t * pSrc,
124   float32_t * pDst,
125   float32_t low,
126   float32_t high,
127   uint32_t numSamples)
128 {
129     for (uint32_t i = 0; i < numSamples; i++)
130     {
131         if (pSrc[i] > high)
132             pDst[i] = high;
133         else if (pSrc[i] < low)
134             pDst[i] = low;
135         else
136             pDst[i] = pSrc[i];
137     }
138 }
139 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
140 
141 /**
142   @} end of BasicClip group
143  */
144