1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_clip_f32.c
4 * Description: Floating-point vector addition
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35 /**
36 @defgroup BasicClip Elementwise clipping
37
38 Element-by-element clipping of a value.
39
40 The value is constrained between 2 bounds.
41
42 There are separate functions for floating-point, Q7, Q15, and Q31 data types.
43 */
44
45 /**
46 @addtogroup BasicClip
47 @{
48 */
49
50 /**
51 @brief Elementwise floating-point clipping
52 @param[in] pSrc points to input values
53 @param[out] pDst points to output clipped values
54 @param[in] low lower bound
55 @param[in] high higher bound
56 @param[in] numSamples number of samples to clip
57 @return none
58 */
59 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
60
61 #include "arm_helium_utils.h"
62
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)63 void arm_clip_f32(const float32_t * pSrc,
64 float32_t * pDst,
65 float32_t low,
66 float32_t high,
67 uint32_t numSamples)
68 {
69 uint32_t blkCnt;
70 f32x4_t curVec0, curVec1;
71 f32x4_t vecLow, vecHigh;
72
73 vecLow = vdupq_n_f32(low);
74 vecHigh = vdupq_n_f32(high);
75
76 curVec0 = vld1q(pSrc);
77 pSrc += 4;
78 /*
79 * unrolled x 2 to allow
80 * vldr/vstr/vmin/vmax
81 * stall free interleaving
82 */
83 blkCnt = numSamples >> 3;
84 while (blkCnt--)
85 {
86 curVec0 = vmaxnmq(curVec0, vecLow);
87 curVec1 = vld1q(pSrc);
88 pSrc += 4;
89 curVec0 = vminnmq(curVec0, vecHigh);
90 vst1q(pDst, curVec0);
91 pDst += 4;
92 curVec1 = vmaxnmq(curVec1, vecLow);
93 curVec0 = vld1q(pSrc);
94 pSrc += 4;
95 curVec1 = vminnmq(curVec1, vecHigh);
96 vst1q(pDst, curVec1);
97 pDst += 4;
98 }
99 /*
100 * Tail handling
101 */
102 blkCnt = numSamples - ((numSamples >> 3) << 3);
103 if (blkCnt >= 4)
104 {
105 curVec0 = vmaxnmq(curVec0, vecLow);
106 curVec0 = vminnmq(curVec0, vecHigh);
107 vst1q(pDst, curVec0);
108 pDst += 4;
109 curVec0 = vld1q(pSrc);
110 pSrc += 4;
111 }
112
113 if (blkCnt > 0)
114 {
115 mve_pred16_t p0 = vctp32q(blkCnt & 3);
116 curVec0 = vmaxnmq(curVec0, vecLow);
117 curVec0 = vminnmq(curVec0, vecHigh);
118 vstrwq_p(pDst, curVec0, p0);
119 }
120 }
121
122 #else
arm_clip_f32(const float32_t * pSrc,float32_t * pDst,float32_t low,float32_t high,uint32_t numSamples)123 void arm_clip_f32(const float32_t * pSrc,
124 float32_t * pDst,
125 float32_t low,
126 float32_t high,
127 uint32_t numSamples)
128 {
129 for (uint32_t i = 0; i < numSamples; i++)
130 {
131 if (pSrc[i] > high)
132 pDst[i] = high;
133 else if (pSrc[i] < low)
134 pDst[i] = low;
135 else
136 pDst[i] = pSrc[i];
137 }
138 }
139 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
140
141 /**
142 @} end of BasicClip group
143 */
144