1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_sub_q31.c
4 * Description: Q31 matrix subtraction
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions.h"
30
31 /**
32 @ingroup groupMatrix
33 */
34
35 /**
36 @addtogroup MatrixSub
37 @{
38 */
39
40 /**
41 @brief Q31 matrix subtraction.
42 @param[in] pSrcA points to the first input matrix structure
43 @param[in] pSrcB points to the second input matrix structure
44 @param[out] pDst points to output matrix structure
45 @return execution status
46 - \ref ARM_MATH_SUCCESS : Operation successful
47 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
48
49 @par Scaling and Overflow Behavior
50 The function uses saturating arithmetic.
51 Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
52 */
53 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_sub_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)54 arm_status arm_mat_sub_q31(
55 const arm_matrix_instance_q31 * pSrcA,
56 const arm_matrix_instance_q31 * pSrcB,
57 arm_matrix_instance_q31 * pDst)
58 {
59 uint32_t numSamples; /* total number of elements in the matrix */
60 q31_t *pDataA, *pDataB, *pDataDst;
61 q31x4_t vecA, vecB, vecDst;
62 q31_t const *pSrcAVec;
63 q31_t const *pSrcBVec;
64 uint32_t blkCnt; /* loop counters */
65 arm_status status; /* status of matrix subtraction */
66
67 pDataA = pSrcA->pData;
68 pDataB = pSrcB->pData;
69 pDataDst = pDst->pData;
70 pSrcAVec = (q31_t const *) pDataA;
71 pSrcBVec = (q31_t const *) pDataB;
72
73 #ifdef ARM_MATH_MATRIX_CHECK
74
75 /* Check for matrix mismatch condition */
76 if ((pSrcA->numRows != pSrcB->numRows) ||
77 (pSrcA->numCols != pSrcB->numCols) ||
78 (pSrcA->numRows != pDst->numRows) ||
79 (pSrcA->numCols != pDst->numCols) )
80 {
81 /* Set status as ARM_MATH_SIZE_MISMATCH */
82 status = ARM_MATH_SIZE_MISMATCH;
83 }
84 else
85
86 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
87 {
88
89 /*
90 * Total number of samples in the input matrix
91 */
92 numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols;
93 blkCnt = numSamples >> 2;
94 while (blkCnt > 0U)
95 {
96 /* C(m,n) = A(m,n) + B(m,n) */
97 /* sub and then store the results in the destination buffer. */
98 vecA = vld1q(pSrcAVec);
99 pSrcAVec += 4;
100 vecB = vld1q(pSrcBVec);
101 pSrcBVec += 4;
102 vecDst = vqsubq(vecA, vecB);
103 vst1q(pDataDst, vecDst);
104 pDataDst += 4;
105 /*
106 * Decrement the blockSize loop counter
107 */
108 blkCnt--;
109 }
110 /*
111 * tail
112 */
113 blkCnt = numSamples & 3;
114 if (blkCnt > 0U)
115 {
116 mve_pred16_t p0 = vctp32q(blkCnt);
117 vecA = vld1q(pSrcAVec);
118 pSrcAVec += 4;
119 vecB = vld1q(pSrcBVec);
120 pSrcBVec += 4;
121 vecDst = vqsubq_m(vecDst, vecA, vecB, p0);
122 vstrwq_p(pDataDst, vecDst, p0);
123 }
124 status = ARM_MATH_SUCCESS;
125 }
126
127 /* Return to application */
128 return (status);
129 }
130
131 #else
arm_mat_sub_q31(const arm_matrix_instance_q31 * pSrcA,const arm_matrix_instance_q31 * pSrcB,arm_matrix_instance_q31 * pDst)132 arm_status arm_mat_sub_q31(
133 const arm_matrix_instance_q31 * pSrcA,
134 const arm_matrix_instance_q31 * pSrcB,
135 arm_matrix_instance_q31 * pDst)
136 {
137 q31_t *pInA = pSrcA->pData; /* input data matrix pointer A */
138 q31_t *pInB = pSrcB->pData; /* input data matrix pointer B */
139 q31_t *pOut = pDst->pData; /* output data matrix pointer */
140
141 uint32_t numSamples; /* total number of elements in the matrix */
142 uint32_t blkCnt; /* loop counters */
143 arm_status status; /* status of matrix subtraction */
144
145 #ifdef ARM_MATH_MATRIX_CHECK
146
147 /* Check for matrix mismatch condition */
148 if ((pSrcA->numRows != pSrcB->numRows) ||
149 (pSrcA->numCols != pSrcB->numCols) ||
150 (pSrcA->numRows != pDst->numRows) ||
151 (pSrcA->numCols != pDst->numCols) )
152 {
153 /* Set status as ARM_MATH_SIZE_MISMATCH */
154 status = ARM_MATH_SIZE_MISMATCH;
155 }
156 else
157
158 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
159
160 {
161 /* Total number of samples in input matrix */
162 numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols;
163
164 #if defined (ARM_MATH_LOOPUNROLL)
165
166 /* Loop unrolling: Compute 4 outputs at a time */
167 blkCnt = numSamples >> 2U;
168
169 while (blkCnt > 0U)
170 {
171 /* C(m,n) = A(m,n) - B(m,n) */
172
173 /* Subtract, saturate and then store the results in the destination buffer. */
174 *pOut++ = __QSUB(*pInA++, *pInB++);
175
176 *pOut++ = __QSUB(*pInA++, *pInB++);
177
178 *pOut++ = __QSUB(*pInA++, *pInB++);
179
180 *pOut++ = __QSUB(*pInA++, *pInB++);
181
182 /* Decrement loop counter */
183 blkCnt--;
184 }
185
186 /* Loop unrolling: Compute remaining outputs */
187 blkCnt = numSamples % 0x4U;
188
189 #else
190
191 /* Initialize blkCnt with number of samples */
192 blkCnt = numSamples;
193
194 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
195
196 while (blkCnt > 0U)
197 {
198 /* C(m,n) = A(m,n) - B(m,n) */
199
200 /* Subtract, saturate and store result in destination buffer. */
201 *pOut++ = __QSUB(*pInA++, *pInB++);
202
203 /* Decrement loop counter */
204 blkCnt--;
205 }
206
207 /* Set status as ARM_MATH_SUCCESS */
208 status = ARM_MATH_SUCCESS;
209 }
210
211 /* Return to application */
212 return (status);
213 }
214 #endif /* defined(ARM_MATH_MVEI) */
215
216 /**
217 @} end of MatrixSub group
218 */
219