1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_clip_q15.c
4 * Description: Floating-point vector addition
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35
36 /**
37 @addtogroup BasicClip
38 @{
39 */
40
41 /**
42 @brief Elementwise fixed-point clipping
43 @param[in] pSrc points to input values
44 @param[out] pDst points to output clipped values
45 @param[in] low lower bound
46 @param[in] high higher bound
47 @param[in] numSamples number of samples to clip
48 */
49 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
50
51 #include "arm_helium_utils.h"
arm_clip_q15(const q15_t * pSrc,q15_t * pDst,q15_t low,q15_t high,uint32_t numSamples)52 ARM_DSP_ATTRIBUTE void arm_clip_q15(const q15_t * pSrc,
53 q15_t * pDst,
54 q15_t low,
55 q15_t high,
56 uint32_t numSamples)
57 {
58 uint32_t blkCnt;
59 q15x8_t curVec0, curVec1;
60 q15x8_t vecLow, vecHigh;
61
62 vecLow = vdupq_n_s16(low);
63 vecHigh = vdupq_n_s16(high);
64
65 curVec0 = vld1q(pSrc);
66 pSrc += 8;
67 /*
68 * unrolled x 2 to allow
69 * vldr/vstr/vmin/vmax
70 * stall free interleaving
71 */
72 blkCnt = numSamples >> 4;
73 while (blkCnt--)
74 {
75 curVec0 = vmaxq(curVec0, vecLow);
76 curVec1 = vld1q(pSrc);
77 pSrc += 8;
78 curVec0 = vminq(curVec0, vecHigh);
79 vst1q(pDst, curVec0);
80 pDst += 8;
81 curVec1 = vmaxq(curVec1, vecLow);
82 curVec0 = vld1q(pSrc);
83 pSrc += 8;
84 curVec1 = vminq(curVec1, vecHigh);
85 vst1q(pDst, curVec1);
86 pDst += 8;
87 }
88 /*
89 * Tail handling
90 */
91 blkCnt = numSamples - ((numSamples >> 4) << 4);
92 if (blkCnt >= 8)
93 {
94 curVec0 = vmaxq(curVec0, vecLow);
95 curVec0 = vminq(curVec0, vecHigh);
96 vst1q(pDst, curVec0);
97 pDst += 8;
98 curVec0 = vld1q(pSrc);
99 pSrc += 8;
100 }
101
102 if (blkCnt > 0)
103 {
104 mve_pred16_t p0 = vctp16q(blkCnt & 7);
105 curVec0 = vmaxq(curVec0, vecLow);
106 curVec0 = vminq(curVec0, vecHigh);
107 vstrhq_p(pDst, curVec0, p0);
108 }
109 }
110
111 #else
arm_clip_q15(const q15_t * pSrc,q15_t * pDst,q15_t low,q15_t high,uint32_t numSamples)112 ARM_DSP_ATTRIBUTE void arm_clip_q15(const q15_t * pSrc,
113 q15_t * pDst,
114 q15_t low,
115 q15_t high,
116 uint32_t numSamples)
117 {
118 uint32_t i;
119 for (i = 0; i < numSamples; i++)
120 {
121 if (pSrc[i] > high)
122 pDst[i] = high;
123 else if (pSrc[i] < low)
124 pDst[i] = low;
125 else
126 pDst[i] = pSrc[i];
127 }
128 }
129 #endif /* defined(ARM_MATH_MVEI) */
130
131 /**
132 @} end of BasicClip group
133 */
134