1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_scale_f32.c
4  * Description:  Multiplies a floating-point matrix by a scalar
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 
31 /**
32   @ingroup groupMatrix
33  */
34 
35 /**
36   @defgroup MatrixScale Matrix Scale
37 
38   Multiplies a matrix by a scalar.  This is accomplished by multiplying each element in the
39   matrix by the scalar.  For example:
40   \image html MatrixScale.gif "Matrix Scaling of a 3 x 3 matrix"
41 
42   The function checks to make sure that the input and output matrices are of the same size.
43 
44   In the fixed-point Q15 and Q31 functions, <code>scale</code> is represented by
45   a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
46   The shift allows the gain of the scaling operation to exceed 1.0.
47   The overall scale factor applied to the fixed-point data is
48   <pre>
49       scale = scaleFract * 2^shift.
50   </pre>
51  */
52 
53 /**
54   @addtogroup MatrixScale
55   @{
56  */
57 
58 /**
59   @brief         Floating-point matrix scaling.
60   @param[in]     pSrc       points to input matrix
61   @param[in]     scale      scale factor to be applied
62   @param[out]    pDst       points to output matrix structure
63   @return        execution status
64                    - \ref ARM_MATH_SUCCESS       : Operation successful
65                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
66  */
67 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)68 arm_status arm_mat_scale_f32(
69   const arm_matrix_instance_f32 * pSrc,
70   float32_t scale,
71   arm_matrix_instance_f32 * pDst)
72 {
73   arm_status status;                             /* status of matrix scaling     */
74   #ifdef ARM_MATH_MATRIX_CHECK
75   /* Check for matrix mismatch condition */
76   if ((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
77   {
78     /* Set status as ARM_MATH_SIZE_MISMATCH */
79     status = ARM_MATH_SIZE_MISMATCH;
80   }
81   else
82 #endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
83   {
84     float32_t *pIn = pSrc->pData;   /* input data matrix pointer */
85     float32_t *pOut = pDst->pData;  /* output data matrix pointer */
86     uint32_t  numSamples;           /* total number of elements in the matrix */
87     uint32_t  blkCnt;               /* loop counters */
88     f32x4_t vecIn, vecOut;
89     float32_t const *pInVec;
90 
91     pInVec = (float32_t const *) pIn;
92     /*
93      * Total number of samples in the input matrix
94      */
95     numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
96     blkCnt = numSamples >> 2;
97     while (blkCnt > 0U)
98     {
99         /*
100          * C(m,n) = A(m,n) * scale
101          * Scaling and results are stored in the destination buffer.
102          */
103         vecIn = vld1q(pInVec);
104         pInVec += 4;
105 
106         vecOut = vecIn * scale;
107 
108         vst1q(pOut, vecOut);
109         pOut += 4;
110         /*
111          * Decrement the blockSize loop counter
112          */
113         blkCnt--;
114     }
115     /*
116      * tail
117      */
118     blkCnt = numSamples & 3;
119     if (blkCnt > 0U)
120     {
121         mve_pred16_t p0 = vctp32q(blkCnt);
122         vecIn = vld1q(pInVec);
123         vecOut = vecIn * scale;
124 
125         vstrwq_p(pOut, vecOut, p0);
126     }
127     /* Set status as ARM_MATH_SUCCESS */
128     status = ARM_MATH_SUCCESS;
129   }
130 
131   /* Return to application */
132   return (status);
133 
134 }
135 #else
136 #if defined(ARM_MATH_NEON_EXPERIMENTAL)
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)137 arm_status arm_mat_scale_f32(
138   const arm_matrix_instance_f32 * pSrc,
139   float32_t scale,
140   arm_matrix_instance_f32 * pDst)
141 {
142   float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */
143   float32_t *pOut = pDst->pData;                 /* output data matrix pointer */
144   uint32_t numSamples;                           /* total number of elements in the matrix */
145   uint32_t blkCnt;                               /* loop counters */
146   arm_status status;                             /* status of matrix scaling     */
147 
148 
149 #ifdef ARM_MATH_MATRIX_CHECK
150   /* Check for matrix mismatch condition */
151   if ((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
152   {
153     /* Set status as ARM_MATH_SIZE_MISMATCH */
154     status = ARM_MATH_SIZE_MISMATCH;
155   }
156   else
157 #endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
158   {
159     float32x4_t vec1;
160     float32x4_t res;
161 
162     /* Total number of samples in the input matrix */
163     numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
164 
165     blkCnt = numSamples >> 2;
166 
167     /* Compute 4 outputs at a time.
168      ** a second loop below computes the remaining 1 to 3 samples. */
169     while (blkCnt > 0U)
170     {
171       /* C(m,n) = A(m,n) * scale */
172       /* Scaling and results are stored in the destination buffer. */
173       vec1 = vld1q_f32(pIn);
174       res = vmulq_f32(vec1, vdupq_n_f32(scale));
175       vst1q_f32(pOut, res);
176 
177       /* update pointers to process next sampels */
178       pIn += 4U;
179       pOut += 4U;
180 
181       /* Decrement the numSamples loop counter */
182       blkCnt--;
183     }
184 
185     /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
186      ** No loop unrolling is used. */
187     blkCnt = numSamples % 0x4U;
188 
189     while (blkCnt > 0U)
190     {
191       /* C(m,n) = A(m,n) * scale */
192       /* The results are stored in the destination buffer. */
193       *pOut++ = (*pIn++) * scale;
194 
195       /* Decrement the loop counter */
196       blkCnt--;
197     }
198 
199     /* Set status as ARM_MATH_SUCCESS */
200     status = ARM_MATH_SUCCESS;
201   }
202 
203   /* Return to application */
204   return (status);
205 }
206 #else
arm_mat_scale_f32(const arm_matrix_instance_f32 * pSrc,float32_t scale,arm_matrix_instance_f32 * pDst)207 arm_status arm_mat_scale_f32(
208   const arm_matrix_instance_f32 * pSrc,
209         float32_t                 scale,
210         arm_matrix_instance_f32 * pDst)
211 {
212   float32_t *pIn = pSrc->pData;                  /* Input data matrix pointer */
213   float32_t *pOut = pDst->pData;                 /* Output data matrix pointer */
214   uint32_t numSamples;                           /* Total number of elements in the matrix */
215   uint32_t blkCnt;                               /* Loop counters */
216   arm_status status;                             /* Status of matrix scaling */
217 
218 #ifdef ARM_MATH_MATRIX_CHECK
219 
220   /* Check for matrix mismatch condition */
221   if ((pSrc->numRows != pDst->numRows) ||
222       (pSrc->numCols != pDst->numCols)   )
223   {
224     /* Set status as ARM_MATH_SIZE_MISMATCH */
225     status = ARM_MATH_SIZE_MISMATCH;
226   }
227   else
228 
229 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
230 
231   {
232     /* Total number of samples in input matrix */
233     numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
234 
235 #if defined (ARM_MATH_LOOPUNROLL)
236 
237     /* Loop unrolling: Compute 4 outputs at a time */
238     blkCnt = numSamples >> 2U;
239 
240     while (blkCnt > 0U)
241     {
242       /* C(m,n) = A(m,n) * scale */
243 
244       /* Scale and store result in destination buffer. */
245       *pOut++ = (*pIn++) * scale;
246       *pOut++ = (*pIn++) * scale;
247       *pOut++ = (*pIn++) * scale;
248       *pOut++ = (*pIn++) * scale;
249 
250       /* Decrement loop counter */
251       blkCnt--;
252     }
253 
254     /* Loop unrolling: Compute remaining outputs */
255     blkCnt = numSamples % 0x4U;
256 
257 #else
258 
259     /* Initialize blkCnt with number of samples */
260     blkCnt = numSamples;
261 
262 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
263 
264     while (blkCnt > 0U)
265     {
266       /* C(m,n) = A(m,n) * scale */
267 
268       /* Scale and store result in destination buffer. */
269       *pOut++ = (*pIn++) * scale;
270 
271       /* Decrement loop counter */
272       blkCnt--;
273     }
274 
275     /* Set status as ARM_MATH_SUCCESS */
276     status = ARM_MATH_SUCCESS;
277   }
278 
279   /* Return to application */
280   return (status);
281 }
282 #endif /* #if defined(ARM_MATH_NEON) */
283 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
284 
285 /**
286   @} end of MatrixScale group
287  */
288