1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_scale_f32.c
4 * Description: Multiplies a floating-point vector by a scalar
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35 /**
36 @defgroup BasicScale Vector Scale
37
38 Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
39
40 <pre>
41 pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
42 </pre>
43
44 In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
45 a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
46 The shift allows the gain of the scaling operation to exceed 1.0.
47 The algorithm used with fixed-point data is:
48
49 <pre>
50 pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
51 </pre>
52
53 The overall scale factor applied to the fixed-point data is
54 <pre>
55 scale = scaleFract * 2^shift.
56 </pre>
57
58 The functions support in-place computation allowing the source and destination
59 pointers to reference the same memory buffer.
60 */
61
62 /**
63 @addtogroup BasicScale
64 @{
65 */
66
67 /**
68 @brief Multiplies a floating-point vector by a scalar.
69 @param[in] pSrc points to the input vector
70 @param[in] scale scale factor to be applied
71 @param[out] pDst points to the output vector
72 @param[in] blockSize number of samples in each vector
73 */
74
75 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
76
77 #include "arm_helium_utils.h"
78
arm_scale_f32(const float32_t * pSrc,float32_t scale,float32_t * pDst,uint32_t blockSize)79 ARM_DSP_ATTRIBUTE void arm_scale_f32(
80 const float32_t * pSrc,
81 float32_t scale,
82 float32_t * pDst,
83 uint32_t blockSize)
84 {
85 uint32_t blkCnt; /* Loop counter */
86
87 f32x4_t vec1;
88 f32x4_t res;
89
90 /* Compute 4 outputs at a time */
91 blkCnt = blockSize >> 2U;
92
93 while (blkCnt > 0U)
94 {
95 /* C = A + offset */
96
97 /* Add offset and then store the results in the destination buffer. */
98 vec1 = vld1q(pSrc);
99 res = vmulq(vec1,scale);
100 vst1q(pDst, res);
101
102 /* Increment pointers */
103 pSrc += 4;
104 pDst += 4;
105
106 /* Decrement the loop counter */
107 blkCnt--;
108 }
109
110 /* Tail */
111 blkCnt = blockSize & 0x3;
112
113 if (blkCnt > 0U)
114 {
115 mve_pred16_t p0 = vctp32q(blkCnt);
116 vec1 = vld1q((float32_t const *) pSrc);
117 vstrwq_p(pDst, vmulq(vec1, scale), p0);
118 }
119
120
121 }
122
123 #else
arm_scale_f32(const float32_t * pSrc,float32_t scale,float32_t * pDst,uint32_t blockSize)124 ARM_DSP_ATTRIBUTE void arm_scale_f32(
125 const float32_t *pSrc,
126 float32_t scale,
127 float32_t *pDst,
128 uint32_t blockSize)
129 {
130 uint32_t blkCnt; /* Loop counter */
131 #if defined(ARM_MATH_NEON_EXPERIMENTAL)
132 f32x4_t vec1;
133 f32x4_t res;
134
135 /* Compute 4 outputs at a time */
136 blkCnt = blockSize >> 2U;
137
138 while (blkCnt > 0U)
139 {
140 /* C = A * scale */
141
142 /* Scale the input and then store the results in the destination buffer. */
143 vec1 = vld1q_f32(pSrc);
144 res = vmulq_f32(vec1, vdupq_n_f32(scale));
145 vst1q_f32(pDst, res);
146
147 /* Increment pointers */
148 pSrc += 4;
149 pDst += 4;
150
151 /* Decrement the loop counter */
152 blkCnt--;
153 }
154
155 /* Tail */
156 blkCnt = blockSize & 0x3;
157
158 #else
159 #if defined (ARM_MATH_LOOPUNROLL)
160
161 /* Loop unrolling: Compute 4 outputs at a time */
162 blkCnt = blockSize >> 2U;
163
164 while (blkCnt > 0U)
165 {
166 float32_t in1, in2, in3, in4;
167
168 /* C = A * scale */
169
170 /* Scale input and store result in destination buffer. */
171 in1 = (*pSrc++) * scale;
172
173 in2 = (*pSrc++) * scale;
174
175 in3 = (*pSrc++) * scale;
176
177 in4 = (*pSrc++) * scale;
178
179 *pDst++ = in1;
180 *pDst++ = in2;
181 *pDst++ = in3;
182 *pDst++ = in4;
183
184 /* Decrement loop counter */
185 blkCnt--;
186 }
187
188 /* Loop unrolling: Compute remaining outputs */
189 blkCnt = blockSize % 0x4U;
190
191 #else
192
193 /* Initialize blkCnt with number of samples */
194 blkCnt = blockSize;
195
196 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
197 #endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
198
199 while (blkCnt > 0U)
200 {
201 /* C = A * scale */
202
203 /* Scale input and store result in destination buffer. */
204 *pDst++ = (*pSrc++) * scale;
205
206 /* Decrement loop counter */
207 blkCnt--;
208 }
209
210 }
211 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
212
213 /**
214 @} end of BasicScale group
215 */
216