1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_solve_lower_triangular_f64.c
4  * Description:  Solve linear system LT X = A with LT lower triangular matrix
5  *
6  * $Date:        10 August 2022
7  * $Revision:    V1.9.1
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 
31 /**
32   @ingroup groupMatrix
33  */
34 
35 
36 /**
37   @addtogroup MatrixInv
38   @{
39  */
40 
41 
42    /**
43    * @brief Solve LT . X = A where LT is a lower triangular matrix
44    * @param[in]  lt  The lower triangular matrix
45    * @param[in]  a  The matrix a
46    * @param[out] dst The solution X of LT . X = A
47    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
48    */
49 
50 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__aarch64__)
arm_mat_solve_lower_triangular_f64(const arm_matrix_instance_f64 * lt,const arm_matrix_instance_f64 * a,arm_matrix_instance_f64 * dst)51 ARM_DSP_ATTRIBUTE arm_status arm_mat_solve_lower_triangular_f64(
52     const arm_matrix_instance_f64 * lt,
53     const arm_matrix_instance_f64 * a,
54     arm_matrix_instance_f64 * dst)
55 {
56     arm_status status;                             /* status of matrix inverse */
57 
58 
59 #ifdef ARM_MATH_MATRIX_CHECK
60 
61     /* Check for matrix mismatch condition */
62     if ((lt->numRows != lt->numCols) ||
63         (lt->numRows != a->numRows)   )
64     {
65         /* Set status as ARM_MATH_SIZE_MISMATCH */
66         status = ARM_MATH_SIZE_MISMATCH;
67     }
68     else
69 
70 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
71 
72     {
73         /* a1 b1 c1   x1 = a1
74          b2 c2   x2   a2
75          c3   x3   a3
76 
77          x3 = a3 / c3
78          x2 = (a2 - c2 x3) / b2
79 
80          */
81         int i,j,k,n,cols;
82 
83         n = dst->numRows;
84         cols = dst->numCols;
85 
86         float64_t *pX = dst->pData;
87         float64_t *pLT = lt->pData;
88         float64_t *pA = a->pData;
89 
90         float64_t *lt_row;
91         float64_t *a_col;
92 
93         float64_t invLT;
94 
95         float64x2_t vecA;
96         float64x2_t vecX;
97 
98         for(i=0; i < n ; i++)
99         {
100 
101             for(j=0; j+1 < cols; j += 2)
102             {
103                 vecA = vld1q_f64(&pA[i * cols + j]);
104 
105                 for(k=0; k < i; k++)
106                 {
107                     vecX = vld1q_f64(&pX[cols*k+j]);
108                     vecA = vfmsq_f64(vecA,vdupq_n_f64(pLT[n*i + k]),vecX);
109                 }
110 
111                 if (pLT[n*i + i]==0.0)
112                 {
113                     return(ARM_MATH_SINGULAR);
114                 }
115 
116                 invLT = 1.0 / pLT[n*i + i];
117                 vecA = vmulq_f64(vecA,vdupq_n_f64(invLT));
118                 vst1q_f64(&pX[i*cols+j],vecA);
119 
120             }
121 
122             for(; j < cols; j ++)
123             {
124                 a_col = &pA[j];
125                 lt_row = &pLT[n*i];
126 
127                 float64_t tmp=a_col[i * cols];
128 
129                 for(k=0; k < i; k++)
130                 {
131                     tmp -= lt_row[k] * pX[cols*k+j];
132                 }
133 
134                 if (lt_row[i]==0.0)
135                 {
136                     return(ARM_MATH_SINGULAR);
137                 }
138                 tmp = tmp / lt_row[i];
139                 pX[i*cols+j] = tmp;
140             }
141 
142         }
143         status = ARM_MATH_SUCCESS;
144 
145     }
146 
147     /* Return to application */
148     return (status);
149 }
150 
151 #else
arm_mat_solve_lower_triangular_f64(const arm_matrix_instance_f64 * lt,const arm_matrix_instance_f64 * a,arm_matrix_instance_f64 * dst)152 ARM_DSP_ATTRIBUTE arm_status arm_mat_solve_lower_triangular_f64(
153     const arm_matrix_instance_f64 * lt,
154     const arm_matrix_instance_f64 * a,
155     arm_matrix_instance_f64 * dst)
156 {
157     arm_status status;                             /* status of matrix inverse */
158 
159 
160 #ifdef ARM_MATH_MATRIX_CHECK
161 
162     /* Check for matrix mismatch condition */
163     if ((lt->numRows != lt->numCols) ||
164         (lt->numRows != a->numRows)   )
165     {
166         /* Set status as ARM_MATH_SIZE_MISMATCH */
167         status = ARM_MATH_SIZE_MISMATCH;
168     }
169     else
170 
171 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
172 
173     {
174         /* a1 b1 c1   x1 = a1
175          b2 c2   x2   a2
176          c3   x3   a3
177 
178          x3 = a3 / c3
179          x2 = (a2 - c2 x3) / b2
180 
181          */
182         int i,j,k,n,cols;
183 
184         float64_t *pX = dst->pData;
185         float64_t *pLT = lt->pData;
186         float64_t *pA = a->pData;
187 
188         float64_t *lt_row;
189         float64_t *a_col;
190 
191         n = dst->numRows;
192         cols = dst->numCols;
193 
194         for(j=0; j < cols; j ++)
195         {
196             a_col = &pA[j];
197 
198             for(i=0; i < n ; i++)
199             {
200                 float64_t tmp=a_col[i * cols];
201 
202                 lt_row = &pLT[n*i];
203 
204                 for(k=0; k < i; k++)
205                 {
206                     tmp -= lt_row[k] * pX[cols*k+j];
207                 }
208 
209                 if (lt_row[i]==0.0)
210                 {
211                     return(ARM_MATH_SINGULAR);
212                 }
213                 tmp = tmp / lt_row[i];
214                 pX[i*cols+j] = tmp;
215             }
216 
217         }
218         status = ARM_MATH_SUCCESS;
219 
220     }
221 
222     /* Return to application */
223     return (status);
224 }
225 #endif
226 /**
227   @} end of MatrixInv group
228  */
229