1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_scale_f32.c
4 * Description: Multiplies a floating-point matrix by a scalar
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions.h"
30
31 /**
32 @ingroup groupMatrix
33 */
34
35 /**
36 @defgroup MatrixScale Matrix Scale
37
38 Multiplies a matrix by a scalar. This is accomplished by multiplying each element in the
39 matrix by the scalar. For example:
40 \image html MatrixScale.gif "Matrix Scaling of a 3 x 3 matrix"
41
42 The function checks to make sure that the input and output matrices are of the same size.
43
44 In the fixed-point Q15 and Q31 functions, <code>scale</code> is represented by
45 a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
46 The shift allows the gain of the scaling operation to exceed 1.0.
47 The overall scale factor applied to the fixed-point data is
48 <pre>
49 scale = scaleFract * 2^shift.
50 </pre>
51 */
52
53 /**
54 @addtogroup MatrixScale
55 @{
56 */
57
58 /**
59 @brief Floating-point matrix scaling.
60 @param[in] pSrc points to input matrix
61 @param[in] scale scale factor to be applied
62 @param[out] pDst points to output matrix structure
63 @return execution status
64 - \ref ARM_MATH_SUCCESS : Operation successful
65 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
66 */
67 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)68 arm_status arm_mat_scale_f32(
69 const arm_matrix_instance_f32 * pSrc,
70 float32_t scale,
71 arm_matrix_instance_f32 * pDst)
72 {
73 arm_status status; /* status of matrix scaling */
74 #ifdef ARM_MATH_MATRIX_CHECK
75 /* Check for matrix mismatch condition */
76 if ((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
77 {
78 /* Set status as ARM_MATH_SIZE_MISMATCH */
79 status = ARM_MATH_SIZE_MISMATCH;
80 }
81 else
82 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
83 {
84 float32_t *pIn = pSrc->pData; /* input data matrix pointer */
85 float32_t *pOut = pDst->pData; /* output data matrix pointer */
86 uint32_t numSamples; /* total number of elements in the matrix */
87 uint32_t blkCnt; /* loop counters */
88 f32x4_t vecIn, vecOut;
89 float32_t const *pInVec;
90
91 pInVec = (float32_t const *) pIn;
92 /*
93 * Total number of samples in the input matrix
94 */
95 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
96 blkCnt = numSamples >> 2;
97 while (blkCnt > 0U)
98 {
99 /*
100 * C(m,n) = A(m,n) * scale
101 * Scaling and results are stored in the destination buffer.
102 */
103 vecIn = vld1q(pInVec);
104 pInVec += 4;
105
106 vecOut = vecIn * scale;
107
108 vst1q(pOut, vecOut);
109 pOut += 4;
110 /*
111 * Decrement the blockSize loop counter
112 */
113 blkCnt--;
114 }
115 /*
116 * tail
117 */
118 blkCnt = numSamples & 3;
119 if (blkCnt > 0U)
120 {
121 mve_pred16_t p0 = vctp32q(blkCnt);
122 vecIn = vld1q(pInVec);
123 vecOut = vecIn * scale;
124
125 vstrwq_p(pOut, vecOut, p0);
126 }
127 /* Set status as ARM_MATH_SUCCESS */
128 status = ARM_MATH_SUCCESS;
129 }
130
131 /* Return to application */
132 return (status);
133
134 }
135 #else
136 #if defined(ARM_MATH_NEON_EXPERIMENTAL)
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)137 arm_status arm_mat_scale_f32(
138 const arm_matrix_instance_f32 * pSrc,
139 float32_t scale,
140 arm_matrix_instance_f32 * pDst)
141 {
142 float32_t *pIn = pSrc->pData; /* input data matrix pointer */
143 float32_t *pOut = pDst->pData; /* output data matrix pointer */
144 uint32_t numSamples; /* total number of elements in the matrix */
145 uint32_t blkCnt; /* loop counters */
146 arm_status status; /* status of matrix scaling */
147
148
149 #ifdef ARM_MATH_MATRIX_CHECK
150 /* Check for matrix mismatch condition */
151 if ((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
152 {
153 /* Set status as ARM_MATH_SIZE_MISMATCH */
154 status = ARM_MATH_SIZE_MISMATCH;
155 }
156 else
157 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
158 {
159 float32x4_t vec1;
160 float32x4_t res;
161
162 /* Total number of samples in the input matrix */
163 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
164
165 blkCnt = numSamples >> 2;
166
167 /* Compute 4 outputs at a time.
168 ** a second loop below computes the remaining 1 to 3 samples. */
169 while (blkCnt > 0U)
170 {
171 /* C(m,n) = A(m,n) * scale */
172 /* Scaling and results are stored in the destination buffer. */
173 vec1 = vld1q_f32(pIn);
174 res = vmulq_f32(vec1, vdupq_n_f32(scale));
175 vst1q_f32(pOut, res);
176
177 /* update pointers to process next sampels */
178 pIn += 4U;
179 pOut += 4U;
180
181 /* Decrement the numSamples loop counter */
182 blkCnt--;
183 }
184
185 /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
186 ** No loop unrolling is used. */
187 blkCnt = numSamples % 0x4U;
188
189 while (blkCnt > 0U)
190 {
191 /* C(m,n) = A(m,n) * scale */
192 /* The results are stored in the destination buffer. */
193 *pOut++ = (*pIn++) * scale;
194
195 /* Decrement the loop counter */
196 blkCnt--;
197 }
198
199 /* Set status as ARM_MATH_SUCCESS */
200 status = ARM_MATH_SUCCESS;
201 }
202
203 /* Return to application */
204 return (status);
205 }
206 #else
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)207 arm_status arm_mat_scale_f32(
208 const arm_matrix_instance_f32 * pSrc,
209 float32_t scale,
210 arm_matrix_instance_f32 * pDst)
211 {
212 float32_t *pIn = pSrc->pData; /* Input data matrix pointer */
213 float32_t *pOut = pDst->pData; /* Output data matrix pointer */
214 uint32_t numSamples; /* Total number of elements in the matrix */
215 uint32_t blkCnt; /* Loop counters */
216 arm_status status; /* Status of matrix scaling */
217
218 #ifdef ARM_MATH_MATRIX_CHECK
219
220 /* Check for matrix mismatch condition */
221 if ((pSrc->numRows != pDst->numRows) ||
222 (pSrc->numCols != pDst->numCols) )
223 {
224 /* Set status as ARM_MATH_SIZE_MISMATCH */
225 status = ARM_MATH_SIZE_MISMATCH;
226 }
227 else
228
229 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
230
231 {
232 /* Total number of samples in input matrix */
233 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
234
235 #if defined (ARM_MATH_LOOPUNROLL)
236
237 /* Loop unrolling: Compute 4 outputs at a time */
238 blkCnt = numSamples >> 2U;
239
240 while (blkCnt > 0U)
241 {
242 /* C(m,n) = A(m,n) * scale */
243
244 /* Scale and store result in destination buffer. */
245 *pOut++ = (*pIn++) * scale;
246 *pOut++ = (*pIn++) * scale;
247 *pOut++ = (*pIn++) * scale;
248 *pOut++ = (*pIn++) * scale;
249
250 /* Decrement loop counter */
251 blkCnt--;
252 }
253
254 /* Loop unrolling: Compute remaining outputs */
255 blkCnt = numSamples % 0x4U;
256
257 #else
258
259 /* Initialize blkCnt with number of samples */
260 blkCnt = numSamples;
261
262 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
263
264 while (blkCnt > 0U)
265 {
266 /* C(m,n) = A(m,n) * scale */
267
268 /* Scale and store result in destination buffer. */
269 *pOut++ = (*pIn++) * scale;
270
271 /* Decrement loop counter */
272 blkCnt--;
273 }
274
275 /* Set status as ARM_MATH_SUCCESS */
276 status = ARM_MATH_SUCCESS;
277 }
278
279 /* Return to application */
280 return (status);
281 }
282 #endif /* #if defined(ARM_MATH_NEON) */
283 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
284
285 /**
286 @} end of MatrixScale group
287 */
288