1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_solve_upper_triangular_f32.c
4  * Description:  Solve linear system UT X = A with UT upper triangular matrix
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 
31 
32 /**
33   @ingroup groupMatrix
34  */
35 
36 
37 /**
38   @addtogroup MatrixInv
39   @{
40  */
41 
42 /**
43    * @brief Solve UT . X = A where UT is an upper triangular matrix
44    * @param[in]  ut  The upper triangular matrix
45    * @param[in]  a  The matrix a
46    * @param[out] dst The solution X of UT . X = A
47    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
48   */
49 
50 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
52 #include "arm_helium_utils.h"
53 
arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)54   arm_status arm_mat_solve_upper_triangular_f32(
55   const arm_matrix_instance_f32 * ut,
56   const arm_matrix_instance_f32 * a,
57   arm_matrix_instance_f32 * dst)
58   {
59 arm_status status;                             /* status of matrix inverse */
60 
61 
62 #ifdef ARM_MATH_MATRIX_CHECK
63 
64   /* Check for matrix mismatch condition */
65   if ((ut->numRows != ut->numCols) ||
66       (ut->numRows != a->numRows)   )
67   {
68     /* Set status as ARM_MATH_SIZE_MISMATCH */
69     status = ARM_MATH_SIZE_MISMATCH;
70   }
71   else
72 
73 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
74 
75   {
76 
77     int i,j,k,n,cols;
78 
79     n = dst->numRows;
80     cols = dst->numCols;
81 
82     float32_t *pX = dst->pData;
83     float32_t *pUT = ut->pData;
84     float32_t *pA = a->pData;
85 
86     float32_t *ut_row;
87     float32_t *a_col;
88 
89     float32_t invUT;
90 
91     f32x4_t vecA;
92     f32x4_t vecX;
93 
94     for(i=n-1; i >= 0 ; i--)
95     {
96       for(j=0; j+3 < cols; j +=4)
97       {
98             vecA = vld1q_f32(&pA[i * cols + j]);
99 
100             for(k=n-1; k > i; k--)
101             {
102                 vecX = vld1q_f32(&pX[cols*k+j]);
103                 vecA = vfmsq(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
104             }
105 
106             if (pUT[n*i + i]==0.0f)
107             {
108               return(ARM_MATH_SINGULAR);
109             }
110 
111             invUT = 1.0f / pUT[n*i + i];
112             vecA = vmulq(vecA,vdupq_n_f32(invUT));
113 
114 
115             vst1q(&pX[i*cols+j],vecA);
116       }
117 
118       for(; j < cols; j ++)
119       {
120             a_col = &pA[j];
121 
122             ut_row = &pUT[n*i];
123 
124             float32_t tmp=a_col[i * cols];
125 
126             for(k=n-1; k > i; k--)
127             {
128                 tmp -= ut_row[k] * pX[cols*k+j];
129             }
130 
131             if (ut_row[i]==0.0f)
132             {
133               return(ARM_MATH_SINGULAR);
134             }
135             tmp = tmp / ut_row[i];
136             pX[i*cols+j] = tmp;
137        }
138 
139     }
140     status = ARM_MATH_SUCCESS;
141 
142   }
143 
144 
145   /* Return to application */
146   return (status);
147 }
148 
149 #else
150 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)151   arm_status arm_mat_solve_upper_triangular_f32(
152   const arm_matrix_instance_f32 * ut,
153   const arm_matrix_instance_f32 * a,
154   arm_matrix_instance_f32 * dst)
155   {
156 arm_status status;                             /* status of matrix inverse */
157 
158 
159 #ifdef ARM_MATH_MATRIX_CHECK
160 
161   /* Check for matrix mismatch condition */
162   if ((ut->numRows != ut->numCols) ||
163       (ut->numRows != a->numRows)   )
164   {
165     /* Set status as ARM_MATH_SIZE_MISMATCH */
166     status = ARM_MATH_SIZE_MISMATCH;
167   }
168   else
169 
170 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
171 
172   {
173 
174     int i,j,k,n,cols;
175 
176     n = dst->numRows;
177     cols = dst->numCols;
178 
179     float32_t *pX = dst->pData;
180     float32_t *pUT = ut->pData;
181     float32_t *pA = a->pData;
182 
183     float32_t *ut_row;
184     float32_t *a_col;
185 
186     float32_t invUT;
187 
188     f32x4_t vecA;
189     f32x4_t vecX;
190 
191     for(i=n-1; i >= 0 ; i--)
192     {
193       for(j=0; j+3 < cols; j +=4)
194       {
195             vecA = vld1q_f32(&pA[i * cols + j]);
196 
197             for(k=n-1; k > i; k--)
198             {
199                 vecX = vld1q_f32(&pX[cols*k+j]);
200                 vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
201             }
202 
203             if (pUT[n*i + i]==0.0f)
204             {
205               return(ARM_MATH_SINGULAR);
206             }
207 
208             invUT = 1.0f / pUT[n*i + i];
209             vecA = vmulq_f32(vecA,vdupq_n_f32(invUT));
210 
211 
212             vst1q_f32(&pX[i*cols+j],vecA);
213       }
214 
215       for(; j < cols; j ++)
216       {
217             a_col = &pA[j];
218 
219             ut_row = &pUT[n*i];
220 
221             float32_t tmp=a_col[i * cols];
222 
223             for(k=n-1; k > i; k--)
224             {
225                 tmp -= ut_row[k] * pX[cols*k+j];
226             }
227 
228             if (ut_row[i]==0.0f)
229             {
230               return(ARM_MATH_SINGULAR);
231             }
232             tmp = tmp / ut_row[i];
233             pX[i*cols+j] = tmp;
234        }
235 
236     }
237     status = ARM_MATH_SUCCESS;
238 
239   }
240 
241 
242   /* Return to application */
243   return (status);
244 }
245 
246 #else
arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)247   arm_status arm_mat_solve_upper_triangular_f32(
248   const arm_matrix_instance_f32 * ut,
249   const arm_matrix_instance_f32 * a,
250   arm_matrix_instance_f32 * dst)
251   {
252 arm_status status;                             /* status of matrix inverse */
253 
254 
255 #ifdef ARM_MATH_MATRIX_CHECK
256 
257   /* Check for matrix mismatch condition */
258   if ((ut->numRows != ut->numCols) ||
259       (ut->numRows != a->numRows)   )
260   {
261     /* Set status as ARM_MATH_SIZE_MISMATCH */
262     status = ARM_MATH_SIZE_MISMATCH;
263   }
264   else
265 
266 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
267 
268   {
269 
270     int i,j,k,n,cols;
271 
272     float32_t *pX = dst->pData;
273     float32_t *pUT = ut->pData;
274     float32_t *pA = a->pData;
275 
276     float32_t *ut_row;
277     float32_t *a_col;
278 
279     n = dst->numRows;
280     cols = dst->numCols;
281 
282     for(j=0; j < cols; j ++)
283     {
284        a_col = &pA[j];
285 
286        for(i=n-1; i >= 0 ; i--)
287        {
288             float32_t tmp=a_col[i * cols];
289 
290             ut_row = &pUT[n*i];
291 
292             for(k=n-1; k > i; k--)
293             {
294                 tmp -= ut_row[k] * pX[cols*k+j];
295             }
296 
297             if (ut_row[i]==0.0f)
298             {
299               return(ARM_MATH_SINGULAR);
300             }
301             tmp = tmp / ut_row[i];
302             pX[i*cols+j] = tmp;
303        }
304 
305     }
306     status = ARM_MATH_SUCCESS;
307 
308   }
309 
310 
311   /* Return to application */
312   return (status);
313 }
314 #endif /* #if defined(ARM_MATH_NEON) */
315 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
316 
317 /**
318   @} end of MatrixInv group
319  */
320