1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_trans_f64.c
4  * Description:  Floating-point matrix transpose
5  *
6  * $Date:        10 August 2022
7  * $Revision:    V1.9.1
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 
31 /**
32   @ingroup groupMatrix
33  */
34 
35 /**
36   @defgroup MatrixTrans Matrix Transpose
37 
38   Tranposes a matrix.
39 
40   Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
41   \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
42  */
43 
44 /**
45   @addtogroup MatrixTrans
46   @{
47  */
48 
49 /**
50   @brief         Floating-point matrix transpose.
51   @param[in]     pSrc      points to input matrix
52   @param[out]    pDst      points to output matrix
53   @return        execution status
54                    - \ref ARM_MATH_SUCCESS       : Operation successful
55                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
56  */
57 #if defined(ARM_MATH_NEON) && defined(__aarch64__)
58 
arm_mat_trans_f64(const arm_matrix_instance_f64 * pSrc,arm_matrix_instance_f64 * pDst)59 ARM_DSP_ATTRIBUTE arm_status arm_mat_trans_f64(
60     const arm_matrix_instance_f64 * pSrc,
61     arm_matrix_instance_f64 * pDst)
62 {
63     float64_t *pIn = pSrc->pData;                  /* input data matrix pointer */
64     float64_t *pOut = pDst->pData;                 /* output data matrix pointer */
65     float64_t *px;                                 /* Temporary output data matrix pointer */
66     uint16_t nRows = pSrc->numRows;                /* number of rows */
67     uint16_t nColumns = pSrc->numCols;             /* number of columns */
68 
69     uint16_t blkCnt, rowCnt, i = 0U, row = nRows;          /* loop counters */
70     arm_status status;                             /* status of matrix transpose  */
71 
72 #ifdef ARM_MATH_MATRIX_CHECK
73 
74     /* Check for matrix mismatch condition */
75     if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
76     {
77         /* Set status as ARM_MATH_SIZE_MISMATCH */
78         status = ARM_MATH_SIZE_MISMATCH;
79     }
80     else
81 #endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
82 
83     {
84         /* Matrix transpose by exchanging the rows with columns */
85         /* Row loop */
86         rowCnt = row >> 1;
87         while (rowCnt > 0U)
88         {
89             float64_t *row0,*row1;
90             float64x2x4_t raV;
91 
92             blkCnt = nColumns >> 2;
93 
94             /* The pointer px is set to starting address of the column being processed */
95             px = pOut + i;
96 
97             /* Compute 4 outputs at a time.
98              ** a second loop below computes the remaining 1 to 3 samples. */
99             while (blkCnt > 0U)        /* Column loop */
100             {
101                 row0 = pIn;
102                 row1 = pIn+nColumns;
103                 pIn+=4;
104                 raV = vld4q_lane_f64(row0, raV, 0);
105                 raV = vld4q_lane_f64(row1, raV, 1);
106 
107                 vst1q_f64(px,raV.val[0]);
108                 px += nRows;
109 
110                 vst1q_f64(px,raV.val[1]);
111                 px += nRows;
112 
113                 vst1q_f64(px,raV.val[2]);
114                 px += nRows;
115 
116                 vst1q_f64(px,raV.val[3]);
117                 px += nRows;
118 
119                 /* Decrement the column loop counter */
120                 blkCnt--;
121             }
122 
123             /* Perform matrix transpose for last 3 samples here. */
124             blkCnt = nColumns % 0x4U;
125 
126             while (blkCnt > 0U)
127             {
128                 /* Read and store the input element in the destination */
129                 *px++ = *pIn;
130                 *px++ = *(pIn + 1 * nColumns);
131 
132                 px += (nRows - 2);
133                 pIn++;
134 
135                 /* Decrement the column loop counter */
136                 blkCnt--;
137             }
138 
139             i += 2;
140             pIn += 1 * nColumns;
141 
142             /* Decrement the row loop counter */
143             rowCnt--;
144 
145         }         /* Row loop end  */
146 
147         rowCnt = row & 1;
148         while (rowCnt > 0U)
149         {
150             blkCnt = nColumns ;
151             /* The pointer px is set to starting address of the column being processed */
152             px = pOut + i;
153 
154             while (blkCnt > 0U)
155             {
156                 /* Read and store the input element in the destination */
157                 *px = *pIn++;
158 
159                 /* Update the pointer px to point to the next row of the transposed matrix */
160                 px += nRows;
161 
162                 /* Decrement the column loop counter */
163                 blkCnt--;
164             }
165             i++;
166             rowCnt -- ;
167         }
168 
169         /* Set status as ARM_MATH_SUCCESS */
170         status = ARM_MATH_SUCCESS;
171     }
172 
173     /* Return to application */
174     return (status);
175 }
176 #else
arm_mat_trans_f64(const arm_matrix_instance_f64 * pSrc,arm_matrix_instance_f64 * pDst)177 ARM_DSP_ATTRIBUTE arm_status arm_mat_trans_f64(
178     const arm_matrix_instance_f64 * pSrc,
179     arm_matrix_instance_f64 * pDst)
180 {
181     float64_t *pIn = pSrc->pData;                  /* input data matrix pointer */
182     float64_t *pOut = pDst->pData;                 /* output data matrix pointer */
183     float64_t *px;                                 /* Temporary output data matrix pointer */
184     uint16_t nRows = pSrc->numRows;                /* number of rows */
185     uint16_t nCols = pSrc->numCols;                /* number of columns */
186     uint64_t col, row = nRows, i = 0U;             /* Loop counters */
187     arm_status status;                             /* status of matrix transpose */
188 
189 #ifdef ARM_MATH_MATRIX_CHECK
190 
191     /* Check for matrix mismatch condition */
192     if ((pSrc->numRows != pDst->numCols) ||
193         (pSrc->numCols != pDst->numRows)   )
194     {
195         /* Set status as ARM_MATH_SIZE_MISMATCH */
196         status = ARM_MATH_SIZE_MISMATCH;
197     }
198     else
199 
200 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
201 
202     {
203         /* Matrix transpose by exchanging the rows with columns */
204         /* row loop */
205         do
206         {
207             /* Pointer px is set to starting address of column being processed */
208             px = pOut + i;
209 
210 #if defined (ARM_MATH_LOOPUNROLL)
211 
212             /* Loop unrolling: Compute 4 outputs at a time */
213             col = nCols >> 2U;
214 
215             while (col > 0U)        /* column loop */
216             {
217                 /* Read and store input element in destination */
218                 *px = *pIn++;
219                 /* Update pointer px to point to next row of transposed matrix */
220                 px += nRows;
221 
222                 *px = *pIn++;
223                 px += nRows;
224 
225                 *px = *pIn++;
226                 px += nRows;
227 
228                 *px = *pIn++;
229                 px += nRows;
230 
231                 /* Decrement column loop counter */
232                 col--;
233             }
234 
235             /* Loop unrolling: Compute remaining outputs */
236             col = nCols % 0x4U;
237 
238 #else
239 
240             /* Initialize col with number of samples */
241             col = nCols;
242 
243 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
244 
245             while (col > 0U)
246             {
247                 /* Read and store input element in destination */
248                 *px = *pIn++;
249 
250                 /* Update pointer px to point to next row of transposed matrix */
251                 px += nRows;
252 
253                 /* Decrement column loop counter */
254                 col--;
255             }
256 
257             i++;
258 
259             /* Decrement row loop counter */
260             row--;
261 
262         } while (row > 0U);          /* row loop end */
263 
264         /* Set status as ARM_MATH_SUCCESS */
265         status = ARM_MATH_SUCCESS;
266     }
267 
268     /* Return to application */
269     return (status);
270 }
271 #endif
272 /**
273  * @} end of MatrixTrans group
274  */
275