1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_clip_f16.c
4 * Description: Floating-point vector addition
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions_f16.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35
36 /**
37 @addtogroup BasicClip
38 @{
39 */
40
41 /**
42 @brief Elementwise floating-point clipping
43 @param[in] pSrc points to input values
44 @param[out] pDst points to output clipped values
45 @param[in] low lower bound
46 @param[in] high higher bound
47 @param[in] numSamples number of samples to clip
48 @return none
49 */
50
51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
52
53 #include "arm_helium_utils.h"
54
arm_clip_f16(const float16_t * pSrc,float16_t * pDst,float16_t low,float16_t high,uint32_t numSamples)55 void arm_clip_f16(const float16_t * pSrc,
56 float16_t * pDst,
57 float16_t low,
58 float16_t high,
59 uint32_t numSamples)
60 {
61 uint32_t blkCnt;
62 f16x8_t curVec0, curVec1;
63 f16x8_t vecLow, vecHigh;
64
65 vecLow = vdupq_n_f16(low);
66 vecHigh = vdupq_n_f16(high);
67
68 curVec0 = vld1q(pSrc);
69 pSrc += 8;
70 /*
71 * unrolled x 2 to allow
72 * vldr/vstr/vmin/vmax
73 * stall free interleaving
74 */
75 blkCnt = numSamples >> 4;
76 while (blkCnt--)
77 {
78 curVec0 = vmaxnmq(curVec0, vecLow);
79 curVec1 = vld1q(pSrc);
80 pSrc += 8;
81 curVec0 = vminnmq(curVec0, vecHigh);
82 vst1q(pDst, curVec0);
83 pDst += 8;
84 curVec1 = vmaxnmq(curVec1, vecLow);
85 curVec0 = vld1q(pSrc);
86 pSrc += 8;
87 curVec1 = vminnmq(curVec1, vecHigh);
88 vst1q(pDst, curVec1);
89 pDst += 8;
90 }
91 /*
92 * Tail handling
93 */
94 blkCnt = numSamples - ((numSamples >> 4) << 4);
95 if (blkCnt >= 8)
96 {
97 curVec0 = vmaxnmq(curVec0, vecLow);
98 curVec0 = vminnmq(curVec0, vecHigh);
99 vst1q(pDst, curVec0);
100 pDst += 8;
101 curVec0 = vld1q(pSrc);
102 pSrc += 8;
103 }
104
105 if (blkCnt > 0)
106 {
107 mve_pred16_t p0 = vctp16q(blkCnt & 7);
108 curVec0 = vmaxnmq(curVec0, vecLow);
109 curVec0 = vminnmq(curVec0, vecHigh);
110 vstrhq_p(pDst, curVec0, p0);
111 }
112 }
113
114 #else
115
116 #if defined(ARM_FLOAT16_SUPPORTED)
117
arm_clip_f16(const float16_t * pSrc,float16_t * pDst,float16_t low,float16_t high,uint32_t numSamples)118 void arm_clip_f16(const float16_t * pSrc,
119 float16_t * pDst,
120 float16_t low,
121 float16_t high,
122 uint32_t numSamples)
123 {
124 for (uint32_t i = 0; i < numSamples; i++)
125 {
126 if (pSrc[i] > high)
127 pDst[i] = high;
128 else if (pSrc[i] < low)
129 pDst[i] = low;
130 else
131 pDst[i] = pSrc[i];
132 }
133 }
134 #endif /* defined(ARM_FLOAT16_SUPPORTED */
135
136 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
137
138
139 /**
140 @} end of BasicClip group
141 */
142