1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_clip_f32.c
4 * Description: Floating-point vector addition
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35 /**
36 @defgroup BasicClip Elementwise clipping
37
38 Element-by-element clipping of a value.
39
40 The value is constrained between 2 bounds.
41
42 There are separate functions for floating-point, Q7, Q15, and Q31 data types.
43 */
44
45 /**
46 @addtogroup BasicClip
47 @{
48 */
49
50 /**
51 @brief Elementwise floating-point clipping
52 @param[in] pSrc points to input values
53 @param[out] pDst points to output clipped values
54 @param[in] low lower bound
55 @param[in] high higher bound
56 @param[in] numSamples number of samples to clip
57 */
58 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
59
60 #include "arm_helium_utils.h"
61
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)62 ARM_DSP_ATTRIBUTE void arm_clip_f32(const float32_t * pSrc,
63 float32_t * pDst,
64 float32_t low,
65 float32_t high,
66 uint32_t numSamples)
67 {
68 uint32_t blkCnt;
69 f32x4_t curVec0, curVec1;
70 f32x4_t vecLow, vecHigh;
71
72 vecLow = vdupq_n_f32(low);
73 vecHigh = vdupq_n_f32(high);
74
75 curVec0 = vld1q(pSrc);
76 pSrc += 4;
77 /*
78 * unrolled x 2 to allow
79 * vldr/vstr/vmin/vmax
80 * stall free interleaving
81 */
82 blkCnt = numSamples >> 3;
83 while (blkCnt--)
84 {
85 curVec0 = vmaxnmq(curVec0, vecLow);
86 curVec1 = vld1q(pSrc);
87 pSrc += 4;
88 curVec0 = vminnmq(curVec0, vecHigh);
89 vst1q(pDst, curVec0);
90 pDst += 4;
91 curVec1 = vmaxnmq(curVec1, vecLow);
92 curVec0 = vld1q(pSrc);
93 pSrc += 4;
94 curVec1 = vminnmq(curVec1, vecHigh);
95 vst1q(pDst, curVec1);
96 pDst += 4;
97 }
98 /*
99 * Tail handling
100 */
101 blkCnt = numSamples - ((numSamples >> 3) << 3);
102 if (blkCnt >= 4)
103 {
104 curVec0 = vmaxnmq(curVec0, vecLow);
105 curVec0 = vminnmq(curVec0, vecHigh);
106 vst1q(pDst, curVec0);
107 pDst += 4;
108 curVec0 = vld1q(pSrc);
109 pSrc += 4;
110 }
111
112 if (blkCnt > 0)
113 {
114 mve_pred16_t p0 = vctp32q(blkCnt & 3);
115 curVec0 = vmaxnmq(curVec0, vecLow);
116 curVec0 = vminnmq(curVec0, vecHigh);
117 vstrwq_p(pDst, curVec0, p0);
118 }
119 }
120
121 #else
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)122 ARM_DSP_ATTRIBUTE void arm_clip_f32(const float32_t * pSrc,
123 float32_t * pDst,
124 float32_t low,
125 float32_t high,
126 uint32_t numSamples)
127 {
128 uint32_t i;
129 for (i = 0; i < numSamples; i++)
130 {
131 if (pSrc[i] > high)
132 pDst[i] = high;
133 else if (pSrc[i] < low)
134 pDst[i] = low;
135 else
136 pDst[i] = pSrc[i];
137 }
138 }
139 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
140
141 /**
142 @} end of BasicClip group
143 */
144