1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_scale_f16.c
4 * Description: Multiplies a floating-point matrix by a scalar
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33
34 /**
35 @ingroup groupMatrix
36 */
37
38
39 /**
40 @addtogroup MatrixScale
41 @{
42 */
43
44 /**
45 @brief Floating-point matrix scaling.
46 @param[in] pSrc points to input matrix
47 @param[in] scale scale factor to be applied
48 @param[out] pDst points to output matrix structure
49 @return execution status
50 - \ref ARM_MATH_SUCCESS : Operation successful
51 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
52 */
53 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
54
arm_mat_scale_f16(const arm_matrix_instance_f16 * pSrc,float16_t scale,arm_matrix_instance_f16 * pDst)55 arm_status arm_mat_scale_f16(
56 const arm_matrix_instance_f16 * pSrc,
57 float16_t scale,
58 arm_matrix_instance_f16 * pDst)
59 {
60 arm_status status; /* status of matrix scaling */
61 #ifdef ARM_MATH_MATRIX_CHECK
62 /* Check for matrix mismatch condition */
63 if ((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
64 {
65 /* Set status as ARM_MATH_SIZE_MISMATCH */
66 status = ARM_MATH_SIZE_MISMATCH;
67 }
68 else
69 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
70 {
71 float16_t *pIn = pSrc->pData; /* input data matrix pointer */
72 float16_t *pOut = pDst->pData; /* output data matrix pointer */
73 uint32_t numSamples; /* total number of elements in the matrix */
74 uint32_t blkCnt; /* loop counters */
75 f16x8_t vecIn, vecOut, vecScale;
76 float16_t const *pInVec;
77
78 pInVec = (float16_t const *) pIn;
79
80 vecScale = vdupq_n_f16(scale);
81 /*
82 * Total number of samples in the input matrix
83 */
84 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
85 blkCnt = numSamples >> 3;
86 while (blkCnt > 0U)
87 {
88 /*
89 * C(m,n) = A(m,n) * scale
90 * Scaling and results are stored in the destination buffer.
91 */
92 vecIn = vld1q(pInVec);
93 pInVec += 8;
94
95 vecOut = vmulq_f16(vecIn, vecScale);
96
97 vst1q(pOut, vecOut);
98 pOut += 8;
99 /*
100 * Decrement the blockSize loop counter
101 */
102 blkCnt--;
103 }
104 /*
105 * tail
106 */
107 blkCnt = numSamples & 7;
108 if (blkCnt > 0U)
109 {
110 mve_pred16_t p0 = vctp16q(blkCnt);
111 vecIn = vld1q(pInVec);
112 vecOut = vecIn * scale;
113
114 vstrhq_p(pOut, vecOut, p0);
115 }
116 /* Set status as ARM_MATH_SUCCESS */
117 status = ARM_MATH_SUCCESS;
118 }
119
120 /* Return to application */
121 return (status);
122
123 }
124 #else
125
arm_mat_scale_f16(const arm_matrix_instance_f16 * pSrc,float16_t scale,arm_matrix_instance_f16 * pDst)126 arm_status arm_mat_scale_f16(
127 const arm_matrix_instance_f16 * pSrc,
128 float16_t scale,
129 arm_matrix_instance_f16 * pDst)
130 {
131 float16_t *pIn = pSrc->pData; /* Input data matrix pointer */
132 float16_t *pOut = pDst->pData; /* Output data matrix pointer */
133 uint32_t numSamples; /* Total number of elements in the matrix */
134 uint32_t blkCnt; /* Loop counters */
135 arm_status status; /* Status of matrix scaling */
136
137 #ifdef ARM_MATH_MATRIX_CHECK
138
139 /* Check for matrix mismatch condition */
140 if ((pSrc->numRows != pDst->numRows) ||
141 (pSrc->numCols != pDst->numCols) )
142 {
143 /* Set status as ARM_MATH_SIZE_MISMATCH */
144 status = ARM_MATH_SIZE_MISMATCH;
145 }
146 else
147
148 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
149
150 {
151 /* Total number of samples in input matrix */
152 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
153
154 #if defined (ARM_MATH_LOOPUNROLL)
155
156 /* Loop unrolling: Compute 4 outputs at a time */
157 blkCnt = numSamples >> 2U;
158
159 while (blkCnt > 0U)
160 {
161 /* C(m,n) = A(m,n) * scale */
162
163 /* Scale and store result in destination buffer. */
164 *pOut++ = (*pIn++) * scale;
165 *pOut++ = (*pIn++) * scale;
166 *pOut++ = (*pIn++) * scale;
167 *pOut++ = (*pIn++) * scale;
168
169 /* Decrement loop counter */
170 blkCnt--;
171 }
172
173 /* Loop unrolling: Compute remaining outputs */
174 blkCnt = numSamples % 0x4U;
175
176 #else
177
178 /* Initialize blkCnt with number of samples */
179 blkCnt = numSamples;
180
181 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
182
183 while (blkCnt > 0U)
184 {
185 /* C(m,n) = A(m,n) * scale */
186
187 /* Scale and store result in destination buffer. */
188 *pOut++ = (*pIn++) * scale;
189
190 /* Decrement loop counter */
191 blkCnt--;
192 }
193
194 /* Set status as ARM_MATH_SUCCESS */
195 status = ARM_MATH_SUCCESS;
196 }
197
198 /* Return to application */
199 return (status);
200 }
201 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
202
203 /**
204 @} end of MatrixScale group
205 */
206
207 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
208
209