1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_trans_q7.c
4 * Description: Q7 matrix transpose
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions.h"
30
31 /**
32 @ingroup groupMatrix
33 */
34
35 /**
36 @addtogroup MatrixTrans
37 @{
38 */
39
40 /**
41 @brief Q7 matrix transpose.
42 @param[in] pSrc points to input matrix
43 @param[out] pDst points to output matrix
44 @return execution status
45 - \ref ARM_MATH_SUCCESS : Operation successful
46 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
47 */
48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_trans_q7(const arm_matrix_instance_q7 * pSrc,arm_matrix_instance_q7 * pDst)49 arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst)
50 {
51
52 uint16x8_t vecOffs;
53 uint32_t i;
54 uint32_t blkCnt;
55 uint8_t const *pDataC;
56 uint8_t *pDataDestR;
57 uint16x8_t vecIn;
58
59 const uint8_t * pDataSrc=(const uint8_t *)pSrc->pData;
60 uint8_t * pDataDst=(uint8_t *)pDst->pData;
61
62 #ifdef ARM_MATH_MATRIX_CHECK
63 /* Check for matrix mismatch condition */
64 if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
65 {
66 /* Set status as ARM_MATH_SIZE_MISMATCH */
67 return ARM_MATH_SIZE_MISMATCH;
68 }
69 #endif
70
71 vecOffs = vidupq_u16((uint32_t)0, 1);
72 vecOffs = vecOffs * pSrc->numCols;
73
74 i = pSrc->numCols;
75 do
76 {
77 pDataC = (uint8_t const *) pDataSrc;
78 pDataDestR = (uint8_t*)pDataDst;
79
80 blkCnt = pSrc->numRows >> 3;
81 while (blkCnt > 0U)
82 {
83 /* widened loads */
84 vecIn = vldrbq_gather_offset_u16(pDataC, vecOffs);
85 vstrbq_u16(pDataDestR, vecIn);
86 pDataDestR += 8;
87 pDataC = pDataC + pSrc->numCols * 8;
88 /*
89 * Decrement the blockSize loop counter
90 */
91 blkCnt--;
92 }
93
94 /*
95 * tail
96 * (will be merged thru tail predication)
97 */
98 blkCnt = pSrc->numRows & 7;
99 if (blkCnt > 0U)
100 {
101 mve_pred16_t p0 = vctp16q(blkCnt);
102 vecIn = vldrbq_gather_offset_u16(pDataC, vecOffs);
103 vstrbq_p_u16(pDataDestR, vecIn, p0);
104 }
105 pDataSrc += 1;
106 pDataDst += pSrc->numRows;
107 }
108 while (--i);
109
110 return (ARM_MATH_SUCCESS);
111 }
112 #else
arm_mat_trans_q7(const arm_matrix_instance_q7 * pSrc,arm_matrix_instance_q7 * pDst)113 arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst)
114 {
115 q7_t *pSrcA = pSrc->pData; /* input data matrix pointer */
116 q7_t *pOut = pDst->pData; /* output data matrix pointer */
117 uint16_t nRows = pSrc->numRows; /* number of nRows */
118 uint16_t nColumns = pSrc->numCols; /* number of nColumns */
119 uint16_t col, row = nRows, i = 0U; /* row and column loop counters */
120 arm_status status; /* status of matrix transpose */
121
122
123 #ifdef ARM_MATH_MATRIX_CHECK
124 /* Check for matrix mismatch condition */
125 if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) {
126 /* Set status as ARM_MATH_SIZE_MISMATCH */
127 status = ARM_MATH_SIZE_MISMATCH;
128 } else
129 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
130
131 {
132 /* Matrix transpose by exchanging the rows with columns */
133 /* row loop */
134 do {
135 /* The pointer pOut is set to starting address of the column being processed */
136 pOut = pDst->pData + i;
137
138 /* Initialize column loop counter */
139 col = nColumns;
140
141
142 while (col > 0U) {
143 /* Read and store the input element in the destination */
144 *pOut = *pSrcA++;
145
146 /* Update the pointer pOut to point to the next row of the transposed matrix */
147 pOut += nRows;
148
149 /* Decrement the column loop counter */
150 col--;
151 }
152
153 i++;
154
155 /* Decrement the row loop counter */
156 row--;
157
158 } while (row > 0U);
159
160 /* set status as ARM_MATH_SUCCESS */
161 status = ARM_MATH_SUCCESS;
162 }
163 /* Return to application */
164 return (status);
165 }
166 #endif /* defined(ARM_MATH_MVEI) */
167
168
169 /**
170 @} end of MatrixTrans group
171 */
172