1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_scale_q31.c
4 * Description: Multiplies a Q31 matrix by a scalar
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions.h"
30
31 /**
32 @ingroup groupMatrix
33 */
34
35 /**
36 @addtogroup MatrixScale
37 @{
38 */
39
40 /**
41 @brief Q31 matrix scaling.
42 @param[in] pSrc points to input matrix
43 @param[in] scaleFract fractional portion of the scale factor
44 @param[in] shift number of bits to shift the result by
45 @param[out] pDst points to output matrix structure
46 @return execution status
47 - \ref ARM_MATH_SUCCESS : Operation successful
48 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
49
50 @par Scaling and Overflow Behavior
51 The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
52 These are multiplied to yield a 2.62 intermediate result which is shifted with saturation to 1.31 format.
53 */
54 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_scale_q31(const arm_matrix_instance_q31 * pSrc,q31_t scaleFract,int32_t shift,arm_matrix_instance_q31 * pDst)55 arm_status arm_mat_scale_q31(
56 const arm_matrix_instance_q31 * pSrc,
57 q31_t scaleFract,
58 int32_t shift,
59 arm_matrix_instance_q31 * pDst)
60 {
61 q31_t *pIn = pSrc->pData; /* input data matrix pointer */
62 q31_t *pOut = pDst->pData; /* output data matrix pointer */
63 uint32_t numSamples; /* total number of elements in the matrix */
64 uint32_t blkCnt; /* loop counters */
65 q31x4_t vecIn, vecOut;
66 q31_t const *pInVec;
67 int32_t totShift = shift + 1; /* shift to apply after scaling */
68 arm_status status; /* Status of matrix scaling */
69
70 pInVec = (q31_t const *) pIn;
71 #ifdef ARM_MATH_MATRIX_CHECK
72
73 /* Check for matrix mismatch condition */
74 if ((pSrc->numRows != pDst->numRows) ||
75 (pSrc->numCols != pDst->numCols) )
76 {
77 /* Set status as ARM_MATH_SIZE_MISMATCH */
78 status = ARM_MATH_SIZE_MISMATCH;
79 }
80 else
81
82 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
83 {
84
85 /*
86 * Total number of samples in the input matrix
87 */
88 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
89 blkCnt = numSamples >> 2;
90 while (blkCnt > 0U)
91 {
92 /*
93 * C(m,n) = A(m,n) * scale
94 * Scaling and results are stored in the destination buffer.
95 */
96 vecIn = vld1q(pInVec);
97 pInVec += 4;
98 /* multiply input with scaler value */
99 vecOut = vmulhq(vecIn, vdupq_n_s32(scaleFract));
100 /* apply shifting */
101 vecOut = vqshlq_r(vecOut, totShift);
102
103 vst1q(pOut, vecOut);
104 pOut += 4;
105 /*
106 * Decrement the blockSize loop counter
107 */
108 blkCnt--;
109 }
110 /*
111 * tail
112 */
113 blkCnt = numSamples & 3;
114 if (blkCnt > 0U)
115 {
116 mve_pred16_t p0 = vctp32q(blkCnt);
117 vecIn = vld1q(pInVec);
118 pInVec += 4;
119 vecOut = vmulhq(vecIn, vdupq_n_s32(scaleFract));
120 vecOut = vqshlq_r(vecOut, totShift);
121 vstrwq_p(pOut, vecOut, p0);
122 }
123 /* Set status as ARM_MATH_SUCCESS */
124 status = ARM_MATH_SUCCESS;
125 }
126
127 /* Return to application */
128 return (status);
129 }
130
131 #else
arm_mat_scale_q31(const arm_matrix_instance_q31 * pSrc,q31_t scaleFract,int32_t shift,arm_matrix_instance_q31 * pDst)132 arm_status arm_mat_scale_q31(
133 const arm_matrix_instance_q31 * pSrc,
134 q31_t scaleFract,
135 int32_t shift,
136 arm_matrix_instance_q31 * pDst)
137 {
138 q31_t *pIn = pSrc->pData; /* Input data matrix pointer */
139 q31_t *pOut = pDst->pData; /* Output data matrix pointer */
140 uint32_t numSamples; /* Total number of elements in the matrix */
141 uint32_t blkCnt; /* Loop counter */
142 arm_status status; /* Status of matrix scaling */
143 int32_t kShift = shift + 1; /* Shift to apply after scaling */
144 q31_t in, out; /* Temporary variabels */
145
146 #ifdef ARM_MATH_MATRIX_CHECK
147
148 /* Check for matrix mismatch condition */
149 if ((pSrc->numRows != pDst->numRows) ||
150 (pSrc->numCols != pDst->numCols) )
151 {
152 /* Set status as ARM_MATH_SIZE_MISMATCH */
153 status = ARM_MATH_SIZE_MISMATCH;
154 }
155 else
156
157 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
158
159 {
160 /* Total number of samples in input matrix */
161 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
162
163 #if defined (ARM_MATH_LOOPUNROLL)
164
165 /* Loop unrolling: Compute 4 outputs at a time */
166 blkCnt = numSamples >> 2U;
167
168 while (blkCnt > 0U)
169 {
170 /* C(m,n) = A(m,n) * k */
171
172 /* Scale, saturate and store result in destination buffer. */
173 in = *pIn++; /* read four inputs from source */
174 in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
175 out = in << kShift; /* apply shifting */
176 if (in != (out >> kShift)) /* saturate the results. */
177 out = 0x7FFFFFFF ^ (in >> 31);
178 *pOut++ = out; /* Store result destination */
179
180 in = *pIn++;
181 in = ((q63_t) in * scaleFract) >> 32;
182 out = in << kShift;
183 if (in != (out >> kShift))
184 out = 0x7FFFFFFF ^ (in >> 31);
185 *pOut++ = out;
186
187 in = *pIn++;
188 in = ((q63_t) in * scaleFract) >> 32;
189 out = in << kShift;
190 if (in != (out >> kShift))
191 out = 0x7FFFFFFF ^ (in >> 31);
192 *pOut++ = out;
193
194 in = *pIn++;
195 in = ((q63_t) in * scaleFract) >> 32;
196 out = in << kShift;
197 if (in != (out >> kShift))
198 out = 0x7FFFFFFF ^ (in >> 31);
199 *pOut++ = out;
200
201 /* Decrement loop counter */
202 blkCnt--;
203 }
204
205 /* Loop unrolling: Compute remaining outputs */
206 blkCnt = numSamples % 0x4U;
207
208 #else
209
210 /* Initialize blkCnt with number of samples */
211 blkCnt = numSamples;
212
213 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
214
215 while (blkCnt > 0U)
216 {
217 /* C(m,n) = A(m,n) * k */
218
219 /* Scale, saturate and store result in destination buffer. */
220 in = *pIn++;
221 in = ((q63_t) in * scaleFract) >> 32;
222 out = in << kShift;
223 if (in != (out >> kShift))
224 out = 0x7FFFFFFF ^ (in >> 31);
225 *pOut++ = out;
226
227 /* Decrement loop counter */
228 blkCnt--;
229 }
230
231 /* Set status as ARM_MATH_SUCCESS */
232 status = ARM_MATH_SUCCESS;
233 }
234
235 /* Return to application */
236 return (status);
237 }
238 #endif /* defined(ARM_MATH_MVEI) */
239
240 /**
241 @} end of MatrixScale group
242 */
243