1 /******************************************************************************
2  * @file     matrix_functions_f16.h
3  * @brief    Public header file for CMSIS DSP Library
4  * @version  V1.10.0
5  * @date     08 July 2021
6  * Target Processor: Cortex-M and Cortex-A cores
7  ******************************************************************************/
8 /*
9  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
10  *
11  * SPDX-License-Identifier: Apache-2.0
12  *
13  * Licensed under the Apache License, Version 2.0 (the License); you may
14  * not use this file except in compliance with the License.
15  * You may obtain a copy of the License at
16  *
17  * www.apache.org/licenses/LICENSE-2.0
18  *
19  * Unless required by applicable law or agreed to in writing, software
20  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
21  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  * See the License for the specific language governing permissions and
23  * limitations under the License.
24  */
25 
26 
27 #ifndef _MATRIX_FUNCTIONS_F16_H_
28 #define _MATRIX_FUNCTIONS_F16_H_
29 
30 #ifdef   __cplusplus
31 extern "C"
32 {
33 #endif
34 
35 
36 #include "arm_math_types_f16.h"
37 #include "arm_math_memory.h"
38 
39 #include "dsp/none.h"
40 #include "dsp/utils.h"
41 
42 #if defined(ARM_FLOAT16_SUPPORTED)
43 
44   #define DEFAULT_HOUSEHOLDER_THRESHOLD_F16 (1.0e-3f)
45 
46  /**
47    * @brief Instance structure for the floating-point matrix structure.
48    */
49   typedef struct
50   {
51     uint16_t numRows;     /**< number of rows of the matrix.     */
52     uint16_t numCols;     /**< number of columns of the matrix.  */
53     float16_t *pData;     /**< points to the data of the matrix. */
54   } arm_matrix_instance_f16;
55 
56  /**
57    * @brief Floating-point matrix addition.
58    * @param[in]  pSrcA  points to the first input matrix structure
59    * @param[in]  pSrcB  points to the second input matrix structure
60    * @param[out] pDst   points to output matrix structure
61    * @return     The function returns either
62    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
63    */
64 arm_status arm_mat_add_f16(
65   const arm_matrix_instance_f16 * pSrcA,
66   const arm_matrix_instance_f16 * pSrcB,
67         arm_matrix_instance_f16 * pDst);
68 
69   /**
70    * @brief Floating-point, complex, matrix multiplication.
71    * @param[in]  pSrcA  points to the first input matrix structure
72    * @param[in]  pSrcB  points to the second input matrix structure
73    * @param[out] pDst   points to output matrix structure
74    * @return     The function returns either
75    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
76    */
77 arm_status arm_mat_cmplx_mult_f16(
78   const arm_matrix_instance_f16 * pSrcA,
79   const arm_matrix_instance_f16 * pSrcB,
80         arm_matrix_instance_f16 * pDst);
81 
82   /**
83    * @brief Floating-point matrix transpose.
84    * @param[in]  pSrc  points to the input matrix
85    * @param[out] pDst  points to the output matrix
86    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
87    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
88    */
89 arm_status arm_mat_trans_f16(
90   const arm_matrix_instance_f16 * pSrc,
91         arm_matrix_instance_f16 * pDst);
92 
93   /**
94    * @brief Floating-point complex matrix transpose.
95    * @param[in]  pSrc  points to the input matrix
96    * @param[out] pDst  points to the output matrix
97    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
98    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
99    */
100 arm_status arm_mat_cmplx_trans_f16(
101   const arm_matrix_instance_f16 * pSrc,
102   arm_matrix_instance_f16 * pDst);
103 
104   /**
105    * @brief Floating-point matrix multiplication
106    * @param[in]  pSrcA  points to the first input matrix structure
107    * @param[in]  pSrcB  points to the second input matrix structure
108    * @param[out] pDst   points to output matrix structure
109    * @return     The function returns either
110    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
111    */
112 arm_status arm_mat_mult_f16(
113   const arm_matrix_instance_f16 * pSrcA,
114   const arm_matrix_instance_f16 * pSrcB,
115         arm_matrix_instance_f16 * pDst);
116   /**
117    * @brief Floating-point matrix and vector multiplication
118    * @param[in]  pSrcMat  points to the input matrix structure
119    * @param[in]  pVec     points to vector
120    * @param[out] pDst     points to output vector
121    */
122 void arm_mat_vec_mult_f16(
123   const arm_matrix_instance_f16 *pSrcMat,
124   const float16_t *pVec,
125   float16_t *pDst);
126 
127   /**
128    * @brief Floating-point matrix subtraction
129    * @param[in]  pSrcA  points to the first input matrix structure
130    * @param[in]  pSrcB  points to the second input matrix structure
131    * @param[out] pDst   points to output matrix structure
132    * @return     The function returns either
133    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
134    */
135 arm_status arm_mat_sub_f16(
136   const arm_matrix_instance_f16 * pSrcA,
137   const arm_matrix_instance_f16 * pSrcB,
138         arm_matrix_instance_f16 * pDst);
139 
140   /**
141    * @brief Floating-point matrix scaling.
142    * @param[in]  pSrc   points to the input matrix
143    * @param[in]  scale  scale factor
144    * @param[out] pDst   points to the output matrix
145    * @return     The function returns either
146    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
147    */
148 arm_status arm_mat_scale_f16(
149   const arm_matrix_instance_f16 * pSrc,
150         float16_t scale,
151         arm_matrix_instance_f16 * pDst);
152 
153   /**
154    * @brief  Floating-point matrix initialization.
155    * @param[in,out] S         points to an instance of the floating-point matrix structure.
156    * @param[in]     nRows     number of rows in the matrix.
157    * @param[in]     nColumns  number of columns in the matrix.
158    * @param[in]     pData     points to the matrix data array.
159    */
160 void arm_mat_init_f16(
161         arm_matrix_instance_f16 * S,
162         uint16_t nRows,
163         uint16_t nColumns,
164         float16_t * pData);
165 
166 
167   /**
168    * @brief Floating-point matrix inverse.
169    * @param[in]  src   points to the instance of the input floating-point matrix structure.
170    * @param[out] dst   points to the instance of the output floating-point matrix structure.
171    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
172    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
173    */
174   arm_status arm_mat_inverse_f16(
175   const arm_matrix_instance_f16 * src,
176   arm_matrix_instance_f16 * dst);
177 
178 
179  /**
180    * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
181    * @param[in]  src   points to the instance of the input floating-point matrix structure.
182    * @param[out] dst   points to the instance of the output floating-point matrix structure.
183    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
184    * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE.
185    * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
186    * The decomposition is returning a lower triangular matrix.
187    */
188   arm_status arm_mat_cholesky_f16(
189   const arm_matrix_instance_f16 * src,
190   arm_matrix_instance_f16 * dst);
191 
192  /**
193    * @brief Solve UT . X = A where UT is an upper triangular matrix
194    * @param[in]  ut  The upper triangular matrix
195    * @param[in]  a  The matrix a
196    * @param[out] dst The solution X of UT . X = A
197    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
198   */
199   arm_status arm_mat_solve_upper_triangular_f16(
200   const arm_matrix_instance_f16 * ut,
201   const arm_matrix_instance_f16 * a,
202   arm_matrix_instance_f16 * dst);
203 
204  /**
205    * @brief Solve LT . X = A where LT is a lower triangular matrix
206    * @param[in]  lt  The lower triangular matrix
207    * @param[in]  a  The matrix a
208    * @param[out] dst The solution X of LT . X = A
209    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
210    */
211   arm_status arm_mat_solve_lower_triangular_f16(
212   const arm_matrix_instance_f16 * lt,
213   const arm_matrix_instance_f16 * a,
214   arm_matrix_instance_f16 * dst);
215 
216 
217 /**
218   @brief         QR decomposition of a m x n floating point matrix with m >= n.
219   @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
220   @param[in]     threshold norm2 threshold.
221   @param[out]    pOutR     points to output R matrix structure of dimension m x n
222   @param[out]    pOutQ     points to output Q matrix structure of dimension m x m
223   @param[out]    pOutTau   points to Householder scaling factors of dimension n
224   @param[inout]  pTmpA     points to a temporary vector of dimension m.
225   @param[inout]  pTmpB     points to a temporary vector of dimension n.
226   @return        execution status
227                    - \ref ARM_MATH_SUCCESS       : Operation successful
228                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
229                    - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
230  */
231 
232 arm_status arm_mat_qr_f16(
233     const arm_matrix_instance_f16 * pSrc,
234     const float16_t threshold,
235     arm_matrix_instance_f16 * pOutR,
236     arm_matrix_instance_f16 * pOutQ,
237     float16_t * pOutTau,
238     float16_t *pTmpA,
239     float16_t *pTmpB
240     );
241 
242 /**
243   @brief         Householder transform of a half floating point vector.
244   @param[in]     pSrc        points to the input vector.
245   @param[in]     threshold   norm2 threshold.
246   @param[in]     blockSize   dimension of the vector space.
247   @param[outQ]   pOut        points to the output vector.
248   @return        beta        return the scaling factor beta
249  */
250 
251 float16_t arm_householder_f16(
252     const float16_t * pSrc,
253     const float16_t threshold,
254     uint32_t    blockSize,
255     float16_t * pOut
256     );
257 
258 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
259 #ifdef   __cplusplus
260 }
261 #endif
262 
263 #endif /* ifndef _MATRIX_FUNCTIONS_F16_H_ */
264