1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_trans_q7.c
4  * Description:  Q7 matrix transpose
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 
31 /**
32   @ingroup groupMatrix
33  */
34 
35 /**
36   @addtogroup MatrixTrans
37   @{
38  */
39 
40 /**
41   @brief         Q7 matrix transpose.
42   @param[in]     pSrc      points to input matrix
43   @param[out]    pDst      points to output matrix
44   @return        execution status
45                    - \ref ARM_MATH_SUCCESS       : Operation successful
46                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
47  */
48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_trans_q7(const arm_matrix_instance_q7 * pSrc,arm_matrix_instance_q7 * pDst)49 arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst)
50 {
51 
52     uint16x8_t    vecOffs;
53     uint32_t        i;
54     uint32_t        blkCnt;
55     uint8_t const  *pDataC;
56     uint8_t        *pDataDestR;
57     uint16x8_t    vecIn;
58 
59     const uint8_t   * pDataSrc=(const uint8_t  *)pSrc->pData;
60     uint8_t   * pDataDst=(uint8_t  *)pDst->pData;
61 
62 #ifdef ARM_MATH_MATRIX_CHECK
63     /* Check for matrix mismatch condition */
64     if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
65     {
66         /* Set status as ARM_MATH_SIZE_MISMATCH */
67         return ARM_MATH_SIZE_MISMATCH;
68     }
69 #endif
70 
71     vecOffs = vidupq_u16((uint32_t)0, 1);
72     vecOffs = vecOffs * pSrc->numCols;
73 
74     i = pSrc->numCols;
75     do
76     {
77         pDataC = (uint8_t const *) pDataSrc;
78         pDataDestR = (uint8_t*)pDataDst;
79 
80         blkCnt = pSrc->numRows >> 3;
81         while (blkCnt > 0U)
82         {
83             /* widened loads */
84             vecIn = vldrbq_gather_offset_u16(pDataC, vecOffs);
85             vstrbq_u16(pDataDestR, vecIn);
86             pDataDestR += 8;
87             pDataC = pDataC + pSrc->numCols * 8;
88             /*
89              * Decrement the blockSize loop counter
90              */
91             blkCnt--;
92         }
93 
94         /*
95          * tail
96          * (will be merged thru tail predication)
97          */
98         blkCnt = pSrc->numRows & 7;
99         if (blkCnt > 0U)
100         {
101             mve_pred16_t p0 = vctp16q(blkCnt);
102             vecIn = vldrbq_gather_offset_u16(pDataC, vecOffs);
103             vstrbq_p_u16(pDataDestR, vecIn, p0);
104         }
105         pDataSrc += 1;
106         pDataDst += pSrc->numRows;
107     }
108     while (--i);
109 
110     return (ARM_MATH_SUCCESS);
111 }
112 #else
arm_mat_trans_q7(const arm_matrix_instance_q7 * pSrc,arm_matrix_instance_q7 * pDst)113 arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst)
114 {
115     q7_t *pSrcA = pSrc->pData;         /* input data matrix pointer */
116     q7_t *pOut = pDst->pData;          /* output data matrix pointer */
117     uint16_t nRows = pSrc->numRows;    /* number of nRows */
118     uint16_t nColumns = pSrc->numCols; /* number of nColumns */
119     uint16_t col, row = nRows, i = 0U; /* row and column loop counters */
120     arm_status status;                 /* status of matrix transpose */
121 
122 
123 #ifdef ARM_MATH_MATRIX_CHECK
124     /* Check for matrix mismatch condition */
125     if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) {
126         /* Set status as ARM_MATH_SIZE_MISMATCH */
127         status = ARM_MATH_SIZE_MISMATCH;
128     } else
129 #endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
130 
131     {
132         /* Matrix transpose by exchanging the rows with columns */
133         /* row loop     */
134         do {
135             /* The pointer pOut is set to starting address of the column being processed */
136             pOut = pDst->pData + i;
137 
138             /* Initialize column loop counter */
139             col = nColumns;
140 
141 
142             while (col > 0U) {
143                 /* Read and store the input element in the destination */
144                 *pOut = *pSrcA++;
145 
146                 /* Update the pointer pOut to point to the next row of the transposed matrix */
147                 pOut += nRows;
148 
149                 /* Decrement the column loop counter */
150                 col--;
151             }
152 
153             i++;
154 
155             /* Decrement the row loop counter */
156             row--;
157 
158         } while (row > 0U);
159 
160         /* set status as ARM_MATH_SUCCESS */
161         status = ARM_MATH_SUCCESS;
162     }
163     /* Return to application */
164     return (status);
165 }
166 #endif /* defined(ARM_MATH_MVEI) */
167 
168 
169 /**
170   @} end of MatrixTrans group
171  */
172