1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_clip_f16.c
4 * Description: Floating-point vector addition
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions_f16.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35
36 /**
37 @addtogroup BasicClip
38 @{
39 */
40
41 /**
42 @brief Elementwise floating-point clipping
43 @param[in] pSrc points to input values
44 @param[out] pDst points to output clipped values
45 @param[in] low lower bound
46 @param[in] high higher bound
47 @param[in] numSamples number of samples to clip
48 */
49
50 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
51
52 #include "arm_helium_utils.h"
53
arm_clip_f16(const float16_t * pSrc,float16_t * pDst,float16_t low,float16_t high,uint32_t numSamples)54 void arm_clip_f16(const float16_t * pSrc,
55 float16_t * pDst,
56 float16_t low,
57 float16_t high,
58 uint32_t numSamples)
59 {
60 uint32_t blkCnt;
61 f16x8_t curVec0, curVec1;
62 f16x8_t vecLow, vecHigh;
63
64 vecLow = vdupq_n_f16(low);
65 vecHigh = vdupq_n_f16(high);
66
67 curVec0 = vld1q(pSrc);
68 pSrc += 8;
69 /*
70 * unrolled x 2 to allow
71 * vldr/vstr/vmin/vmax
72 * stall free interleaving
73 */
74 blkCnt = numSamples >> 4;
75 while (blkCnt--)
76 {
77 curVec0 = vmaxnmq(curVec0, vecLow);
78 curVec1 = vld1q(pSrc);
79 pSrc += 8;
80 curVec0 = vminnmq(curVec0, vecHigh);
81 vst1q(pDst, curVec0);
82 pDst += 8;
83 curVec1 = vmaxnmq(curVec1, vecLow);
84 curVec0 = vld1q(pSrc);
85 pSrc += 8;
86 curVec1 = vminnmq(curVec1, vecHigh);
87 vst1q(pDst, curVec1);
88 pDst += 8;
89 }
90 /*
91 * Tail handling
92 */
93 blkCnt = numSamples - ((numSamples >> 4) << 4);
94 if (blkCnt >= 8)
95 {
96 curVec0 = vmaxnmq(curVec0, vecLow);
97 curVec0 = vminnmq(curVec0, vecHigh);
98 vst1q(pDst, curVec0);
99 pDst += 8;
100 curVec0 = vld1q(pSrc);
101 pSrc += 8;
102 }
103
104 if (blkCnt > 0)
105 {
106 mve_pred16_t p0 = vctp16q(blkCnt & 7);
107 curVec0 = vmaxnmq(curVec0, vecLow);
108 curVec0 = vminnmq(curVec0, vecHigh);
109 vstrhq_p(pDst, curVec0, p0);
110 }
111 }
112
113 #else
114
115 #if defined(ARM_FLOAT16_SUPPORTED)
116
arm_clip_f16(const float16_t * pSrc,float16_t * pDst,float16_t low,float16_t high,uint32_t numSamples)117 void arm_clip_f16(const float16_t * pSrc,
118 float16_t * pDst,
119 float16_t low,
120 float16_t high,
121 uint32_t numSamples)
122 {
123 for (uint32_t i = 0; i < numSamples; i++)
124 {
125 if ((_Float16)pSrc[i] > (_Float16)high)
126 pDst[i] = high;
127 else if ((_Float16)pSrc[i] < (_Float16)low)
128 pDst[i] = low;
129 else
130 pDst[i] = pSrc[i];
131 }
132 }
133 #endif /* defined(ARM_FLOAT16_SUPPORTED */
134
135 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
136
137
138 /**
139 @} end of BasicClip group
140 */
141