1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_clip_q7.c
4 * Description: Floating-point vector addition
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35
36 /**
37 @addtogroup BasicClip
38 @{
39 */
40
41 /**
42 @brief Elementwise fixed-point clipping
43 @param[in] pSrc points to input values
44 @param[out] pDst points to output clipped values
45 @param[in] low lower bound
46 @param[in] high higher bound
47 @param[in] numSamples number of samples to clip
48 @return none
49 */
50 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
51
52 #include "arm_helium_utils.h"
arm_clip_q7(const q7_t * pSrc,q7_t * pDst,q7_t low,q7_t high,uint32_t numSamples)53 void arm_clip_q7(const q7_t * pSrc,
54 q7_t * pDst,
55 q7_t low,
56 q7_t high,
57 uint32_t numSamples)
58 {
59 uint32_t blkCnt;
60 q7x16_t curVec0, curVec1;
61 q7x16_t vecLow, vecHigh;
62
63 vecLow = vdupq_n_s8(low);
64 vecHigh = vdupq_n_s8(high);
65
66 curVec0 = vld1q(pSrc);
67 pSrc += 16;
68 /*
69 * unrolled x 2 to allow
70 * vldr/vstr/vmin/vmax
71 * stall free interleaving
72 */
73 blkCnt = numSamples >> 5;
74 while (blkCnt--)
75 {
76 curVec0 = vmaxq(curVec0, vecLow);
77 curVec1 = vld1q(pSrc);
78 pSrc += 16;
79 curVec0 = vminq(curVec0, vecHigh);
80 vst1q(pDst, curVec0);
81 pDst += 16;
82 curVec1 = vmaxq(curVec1, vecLow);
83 curVec0 = vld1q(pSrc);
84 pSrc += 16;
85 curVec1 = vminq(curVec1, vecHigh);
86 vst1q(pDst, curVec1);
87 pDst += 16;
88 }
89 /*
90 * Tail handling
91 */
92 blkCnt = numSamples - ((numSamples >> 5) << 5);
93 if (blkCnt >= 16)
94 {
95 curVec0 = vmaxq(curVec0, vecLow);
96 curVec0 = vminq(curVec0, vecHigh);
97 vst1q(pDst, curVec0);
98 pDst += 16;
99 curVec0 = vld1q(pSrc);
100 pSrc += 16;
101 }
102
103 if (blkCnt > 0)
104 {
105 mve_pred16_t p0 = vctp8q(blkCnt & 0xf);
106 curVec0 = vmaxq(curVec0, vecLow);
107 curVec0 = vminq(curVec0, vecHigh);
108 vstrbq_p(pDst, curVec0, p0);
109 }
110 }
111
112 #else
arm_clip_q7(const q7_t * pSrc,q7_t * pDst,q7_t low,q7_t high,uint32_t numSamples)113 void arm_clip_q7(const q7_t * pSrc,
114 q7_t * pDst,
115 q7_t low,
116 q7_t high,
117 uint32_t numSamples)
118 {
119 for (uint32_t i = 0; i < numSamples; i++)
120 {
121 if (pSrc[i] > high)
122 pDst[i] = high;
123 else if (pSrc[i] < low)
124 pDst[i] = low;
125 else
126 pDst[i] = pSrc[i];
127 }
128 }
129 #endif /* defined(ARM_MATH_MVEI) */
130
131 /**
132 @} end of BasicClip group
133 */
134