1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_scale_q31.c
4 * Description: Multiplies a Q31 vector by a scalar
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35 /**
36 @addtogroup BasicScale
37 @{
38 */
39
40 /**
41 @brief Multiplies a Q31 vector by a scalar.
42 @param[in] pSrc points to the input vector
43 @param[in] scaleFract fractional portion of the scale value
44 @param[in] shift number of bits to shift the result by
45 @param[out] pDst points to the output vector
46 @param[in] blockSize number of samples in each vector
47
48 @par Scaling and Overflow Behavior
49 The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
50 These are multiplied to yield a 2.62 intermediate result and this is shifted
51 with saturation to 1.31 format.
52 There is an intermediate shift by 32 to go from the
53 2.62 to 1.31 format.
54 The shift argument is applied on the 1.31 result and not to the intermediate
55 2.62 format.
56 */
57
58 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
59
60 #include "arm_helium_utils.h"
61
arm_scale_q31(const q31_t * pSrc,q31_t scaleFract,int8_t shift,q31_t * pDst,uint32_t blockSize)62 ARM_DSP_ATTRIBUTE void arm_scale_q31(
63 const q31_t * pSrc,
64 q31_t scaleFract,
65 int8_t shift,
66 q31_t * pDst,
67 uint32_t blockSize)
68 {
69 uint32_t blkCnt; /* loop counters */
70 q31x4_t vecSrc;
71 q31x4_t vecDst;
72
73 /* Compute 4 outputs at a time */
74 blkCnt = blockSize >> 2;
75 while (blkCnt > 0U)
76 {
77 /*
78 * C = A * scale
79 * Scale the input and then store the result in the destination buffer.
80 */
81 vecSrc = vld1q(pSrc);
82 vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
83 vecDst = vqshlq_r(vecDst, shift + 1);
84 vst1q(pDst, vecDst);
85 /*
86 * Decrement the blockSize loop counter
87 */
88 blkCnt--;
89 /*
90 * advance vector source and destination pointers
91 */
92 pSrc += 4;
93 pDst += 4;
94 }
95 /*
96 * tail
97 */
98 blkCnt = blockSize & 3;
99 if (blkCnt > 0U)
100 {
101 mve_pred16_t p0 = vctp32q(blkCnt);
102 vecSrc = vld1q(pSrc);
103 vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
104 vecDst = vqshlq_r(vecDst, shift + 1);
105 vstrwq_p(pDst, vecDst, p0);
106 }
107 }
108
109 #else
arm_scale_q31(const q31_t * pSrc,q31_t scaleFract,int8_t shift,q31_t * pDst,uint32_t blockSize)110 ARM_DSP_ATTRIBUTE void arm_scale_q31(
111 const q31_t *pSrc,
112 q31_t scaleFract,
113 int8_t shift,
114 q31_t *pDst,
115 uint32_t blockSize)
116 {
117 uint32_t blkCnt; /* Loop counter */
118 q31_t in, out; /* Temporary variables */
119 int8_t kShift = shift + 1; /* Shift to apply after scaling */
120 int8_t sign = (kShift & 0x80);
121
122 #if defined (ARM_MATH_LOOPUNROLL)
123
124 /* Loop unrolling: Compute 4 outputs at a time */
125 blkCnt = blockSize >> 2U;
126
127 if (sign == 0U)
128 {
129 while (blkCnt > 0U)
130 {
131 /* C = A * scale */
132
133 /* Scale input and store result in destination buffer. */
134 in = *pSrc++; /* read input from source */
135 in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
136 out = in << kShift; /* apply shifting */
137 if (in != (out >> kShift)) /* saturate the result */
138 out = 0x7FFFFFFF ^ (in >> 31);
139 *pDst++ = out; /* Store result destination */
140
141 in = *pSrc++;
142 in = ((q63_t) in * scaleFract) >> 32;
143 out = in << kShift;
144 if (in != (out >> kShift))
145 out = 0x7FFFFFFF ^ (in >> 31);
146 *pDst++ = out;
147
148 in = *pSrc++;
149 in = ((q63_t) in * scaleFract) >> 32;
150 out = in << kShift;
151 if (in != (out >> kShift))
152 out = 0x7FFFFFFF ^ (in >> 31);
153 *pDst++ = out;
154
155 in = *pSrc++;
156 in = ((q63_t) in * scaleFract) >> 32;
157 out = in << kShift;
158 if (in != (out >> kShift))
159 out = 0x7FFFFFFF ^ (in >> 31);
160 *pDst++ = out;
161
162 /* Decrement loop counter */
163 blkCnt--;
164 }
165 }
166 else
167 {
168 while (blkCnt > 0U)
169 {
170 /* C = A * scale */
171
172 /* Scale input and store result in destination buffer. */
173 in = *pSrc++; /* read four inputs from source */
174 in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
175 out = in >> -kShift; /* apply shifting */
176 *pDst++ = out; /* Store result destination */
177
178 in = *pSrc++;
179 in = ((q63_t) in * scaleFract) >> 32;
180 out = in >> -kShift;
181 *pDst++ = out;
182
183 in = *pSrc++;
184 in = ((q63_t) in * scaleFract) >> 32;
185 out = in >> -kShift;
186 *pDst++ = out;
187
188 in = *pSrc++;
189 in = ((q63_t) in * scaleFract) >> 32;
190 out = in >> -kShift;
191 *pDst++ = out;
192
193 /* Decrement loop counter */
194 blkCnt--;
195 }
196 }
197
198 /* Loop unrolling: Compute remaining outputs */
199 blkCnt = blockSize % 0x4U;
200
201 #else
202
203 /* Initialize blkCnt with number of samples */
204 blkCnt = blockSize;
205
206 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
207
208 if (sign == 0U)
209 {
210 while (blkCnt > 0U)
211 {
212 /* C = A * scale */
213
214 /* Scale input and store result in destination buffer. */
215 in = *pSrc++;
216 in = ((q63_t) in * scaleFract) >> 32;
217 out = in << kShift;
218 if (in != (out >> kShift))
219 out = 0x7FFFFFFF ^ (in >> 31);
220 *pDst++ = out;
221
222 /* Decrement loop counter */
223 blkCnt--;
224 }
225 }
226 else
227 {
228 while (blkCnt > 0U)
229 {
230 /* C = A * scale */
231
232 /* Scale input and store result in destination buffer. */
233 in = *pSrc++;
234 in = ((q63_t) in * scaleFract) >> 32;
235 out = in >> -kShift;
236 *pDst++ = out;
237
238 /* Decrement loop counter */
239 blkCnt--;
240 }
241 }
242
243 }
244 #endif /* defined(ARM_MATH_MVEI) */
245
246 /**
247 @} end of BasicScale group
248 */
249