1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_solve_upper_triangular_f64.c
4  * Description:  Solve linear system UT X = A with UT upper triangular matrix
5  *
6  * $Date:        10 August 2022
7  * $Revision:    V1.9.1
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 
31 
32 /**
33   @ingroup groupMatrix
34  */
35 
36 
37 /**
38   @addtogroup MatrixInv
39   @{
40  */
41 
42 /**
43    * @brief Solve UT . X = A where UT is an upper triangular matrix
44    * @param[in]  ut  The upper triangular matrix
45    * @param[in]  a  The matrix a
46    * @param[out] dst The solution X of UT . X = A
47    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
48   */
49 
50 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__aarch64__)
arm_mat_solve_upper_triangular_f64(const arm_matrix_instance_f64 * ut,const arm_matrix_instance_f64 * a,arm_matrix_instance_f64 * dst)51 ARM_DSP_ATTRIBUTE arm_status arm_mat_solve_upper_triangular_f64(
52     const arm_matrix_instance_f64 * ut,
53     const arm_matrix_instance_f64 * a,
54     arm_matrix_instance_f64 * dst)
55 {
56     arm_status status;                             /* status of matrix inverse */
57 
58 
59 #ifdef ARM_MATH_MATRIX_CHECK
60 
61     /* Check for matrix mismatch condition */
62     if ((ut->numRows != ut->numCols) ||
63         (ut->numRows != a->numRows)   )
64     {
65         /* Set status as ARM_MATH_SIZE_MISMATCH */
66         status = ARM_MATH_SIZE_MISMATCH;
67     }
68     else
69 
70 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
71 
72     {
73 
74         int i,j,k,n,cols;
75 
76         n = dst->numRows;
77         cols = dst->numCols;
78 
79         float64_t *pX = dst->pData;
80         float64_t *pUT = ut->pData;
81         float64_t *pA = a->pData;
82 
83         float64_t *ut_row;
84         float64_t *a_col;
85 
86         float64_t invUT;
87 
88         float64x2_t vecA;
89         float64x2_t vecX;
90 
91         for(i=n-1; i >= 0 ; i--)
92         {
93             for(j=0; j+1 < cols; j +=2)
94             {
95                 vecA = vld1q_f64(&pA[i * cols + j]);
96 
97                 for(k=n-1; k > i; k--)
98                 {
99                     vecX = vld1q_f64(&pX[cols*k+j]);
100                     vecA = vfmsq_f64(vecA,vdupq_n_f64(pUT[n*i + k]),vecX);
101                 }
102 
103                 if (pUT[n*i + i]==0.0)
104                 {
105                     return(ARM_MATH_SINGULAR);
106                 }
107 
108                 invUT = 1.0 / pUT[n*i + i];
109                 vecA = vmulq_f64(vecA,vdupq_n_f64(invUT));
110 
111 
112                 vst1q_f64(&pX[i*cols+j],vecA);
113             }
114 
115             for(; j < cols; j ++)
116             {
117                 a_col = &pA[j];
118 
119                 ut_row = &pUT[n*i];
120 
121                 float64_t tmp=a_col[i * cols];
122 
123                 for(k=n-1; k > i; k--)
124                 {
125                     tmp -= ut_row[k] * pX[cols*k+j];
126                 }
127 
128                 if (ut_row[i]==0.0)
129                 {
130                     return(ARM_MATH_SINGULAR);
131                 }
132                 tmp = tmp / ut_row[i];
133                 pX[i*cols+j] = tmp;
134             }
135 
136         }
137         status = ARM_MATH_SUCCESS;
138 
139     }
140 
141 
142     /* Return to application */
143     return (status);
144 }
145 
146 #else
arm_mat_solve_upper_triangular_f64(const arm_matrix_instance_f64 * ut,const arm_matrix_instance_f64 * a,arm_matrix_instance_f64 * dst)147 ARM_DSP_ATTRIBUTE arm_status arm_mat_solve_upper_triangular_f64(
148     const arm_matrix_instance_f64 * ut,
149     const arm_matrix_instance_f64 * a,
150     arm_matrix_instance_f64 * dst)
151 {
152     arm_status status;                             /* status of matrix inverse */
153 
154 
155 #ifdef ARM_MATH_MATRIX_CHECK
156 
157     /* Check for matrix mismatch condition */
158     if ((ut->numRows != ut->numCols) ||
159         (ut->numRows != a->numRows)   )
160     {
161         /* Set status as ARM_MATH_SIZE_MISMATCH */
162         status = ARM_MATH_SIZE_MISMATCH;
163     }
164     else
165 
166 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
167 
168     {
169 
170         int i,j,k,n,cols;
171 
172         float64_t *pX = dst->pData;
173         float64_t *pUT = ut->pData;
174         float64_t *pA = a->pData;
175 
176         float64_t *ut_row;
177         float64_t *a_col;
178 
179         n = dst->numRows;
180         cols = dst->numCols;
181 
182         for(j=0; j < cols; j ++)
183         {
184             a_col = &pA[j];
185 
186             for(i=n-1; i >= 0 ; i--)
187             {
188                 float64_t tmp=a_col[i * cols];
189 
190                 ut_row = &pUT[n*i];
191 
192                 for(k=n-1; k > i; k--)
193                 {
194                     tmp -= ut_row[k] * pX[cols*k+j];
195                 }
196 
197                 if (ut_row[i]==0.0)
198                 {
199                     return(ARM_MATH_SINGULAR);
200                 }
201                 tmp = tmp / ut_row[i];
202                 pX[i*cols+j] = tmp;
203             }
204 
205         }
206         status = ARM_MATH_SUCCESS;
207 
208     }
209 
210 
211     /* Return to application */
212     return (status);
213 }
214 #endif
215 
216 /**
217   @} end of MatrixInv group
218  */
219