1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_clip_f16.c
4  * Description:  Floating-point vector addition
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions_f16.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 
36 /**
37   @addtogroup BasicClip
38   @{
39  */
40 
41 /**
42   @brief         Elementwise floating-point clipping
43   @param[in]     pSrc          points to input values
44   @param[out]    pDst          points to output clipped values
45   @param[in]     low           lower bound
46   @param[in]     high          higher bound
47   @param[in]     numSamples    number of samples to clip
48  */
49 
50 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
52 #include "arm_helium_utils.h"
53 
arm_clip_f16(const float16_t * pSrc,float16_t * pDst,float16_t low,float16_t high,uint32_t numSamples)54 void arm_clip_f16(const float16_t * pSrc,
55   float16_t * pDst,
56   float16_t low,
57   float16_t high,
58   uint32_t numSamples)
59 {
60     uint32_t  blkCnt;
61     f16x8_t curVec0, curVec1;
62     f16x8_t vecLow, vecHigh;
63 
64     vecLow = vdupq_n_f16(low);
65     vecHigh = vdupq_n_f16(high);
66 
67     curVec0 = vld1q(pSrc);
68     pSrc += 8;
69     /*
70      * unrolled x 2 to allow
71      * vldr/vstr/vmin/vmax
72      * stall free interleaving
73      */
74     blkCnt = numSamples >> 4;
75     while (blkCnt--)
76     {
77         curVec0 = vmaxnmq(curVec0, vecLow);
78         curVec1 = vld1q(pSrc);
79         pSrc += 8;
80         curVec0 = vminnmq(curVec0, vecHigh);
81         vst1q(pDst, curVec0);
82         pDst += 8;
83         curVec1 = vmaxnmq(curVec1, vecLow);
84         curVec0 = vld1q(pSrc);
85         pSrc += 8;
86         curVec1 = vminnmq(curVec1, vecHigh);
87         vst1q(pDst, curVec1);
88         pDst += 8;
89     }
90     /*
91      * Tail handling
92      */
93     blkCnt = numSamples - ((numSamples >> 4) << 4);
94     if (blkCnt >= 8)
95     {
96         curVec0 = vmaxnmq(curVec0, vecLow);
97         curVec0 = vminnmq(curVec0, vecHigh);
98         vst1q(pDst, curVec0);
99         pDst += 8;
100         curVec0 = vld1q(pSrc);
101         pSrc += 8;
102     }
103 
104     if (blkCnt > 0)
105     {
106         mve_pred16_t p0 = vctp16q(blkCnt & 7);
107         curVec0 = vmaxnmq(curVec0, vecLow);
108         curVec0 = vminnmq(curVec0, vecHigh);
109         vstrhq_p(pDst, curVec0, p0);
110     }
111 }
112 
113 #else
114 
115 #if defined(ARM_FLOAT16_SUPPORTED)
116 
arm_clip_f16(const float16_t * pSrc,float16_t * pDst,float16_t low,float16_t high,uint32_t numSamples)117 void arm_clip_f16(const float16_t * pSrc,
118   float16_t * pDst,
119   float16_t low,
120   float16_t high,
121   uint32_t numSamples)
122 {
123     for (uint32_t i = 0; i < numSamples; i++)
124     {
125         if ((_Float16)pSrc[i] > (_Float16)high)
126             pDst[i] = high;
127         else if ((_Float16)pSrc[i] < (_Float16)low)
128             pDst[i] = low;
129         else
130             pDst[i] = pSrc[i];
131     }
132 }
133 #endif /* defined(ARM_FLOAT16_SUPPORTED */
134 
135 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
136 
137 
138 /**
139   @} end of BasicClip group
140  */
141