1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_mat_solve_upper_triangular_f32.c 4 * Description: Solve linear system UT X = A with UT upper triangular matrix 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/matrix_functions.h" 30 31 32 /** 33 @ingroup groupMatrix 34 */ 35 36 37 /** 38 @addtogroup MatrixInv 39 @{ 40 */ 41 42 /** 43 * @brief Solve UT . X = A where UT is an upper triangular matrix 44 * @param[in] ut The upper triangular matrix 45 * @param[in] a The matrix a 46 * @param[out] dst The solution X of UT . X = A 47 * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved. 48 */ 49 50 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) 51 52 #include "arm_helium_utils.h" 53 arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)54 arm_status arm_mat_solve_upper_triangular_f32( 55 const arm_matrix_instance_f32 * ut, 56 const arm_matrix_instance_f32 * a, 57 arm_matrix_instance_f32 * dst) 58 { 59 arm_status status; /* status of matrix inverse */ 60 61 62 #ifdef ARM_MATH_MATRIX_CHECK 63 64 /* Check for matrix mismatch condition */ 65 if ((ut->numRows != ut->numCols) || 66 (ut->numRows != a->numRows) ) 67 { 68 /* Set status as ARM_MATH_SIZE_MISMATCH */ 69 status = ARM_MATH_SIZE_MISMATCH; 70 } 71 else 72 73 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 74 75 { 76 77 int i,j,k,n,cols; 78 79 n = dst->numRows; 80 cols = dst->numCols; 81 82 float32_t *pX = dst->pData; 83 float32_t *pUT = ut->pData; 84 float32_t *pA = a->pData; 85 86 float32_t *ut_row; 87 float32_t *a_col; 88 89 float32_t invUT; 90 91 f32x4_t vecA; 92 f32x4_t vecX; 93 94 for(i=n-1; i >= 0 ; i--) 95 { 96 for(j=0; j+3 < cols; j +=4) 97 { 98 vecA = vld1q_f32(&pA[i * cols + j]); 99 100 for(k=n-1; k > i; k--) 101 { 102 vecX = vld1q_f32(&pX[cols*k+j]); 103 vecA = vfmsq(vecA,vdupq_n_f32(pUT[n*i + k]),vecX); 104 } 105 106 if (pUT[n*i + i]==0.0f) 107 { 108 return(ARM_MATH_SINGULAR); 109 } 110 111 invUT = 1.0f / pUT[n*i + i]; 112 vecA = vmulq(vecA,vdupq_n_f32(invUT)); 113 114 115 vst1q(&pX[i*cols+j],vecA); 116 } 117 118 for(; j < cols; j ++) 119 { 120 a_col = &pA[j]; 121 122 ut_row = &pUT[n*i]; 123 124 float32_t tmp=a_col[i * cols]; 125 126 for(k=n-1; k > i; k--) 127 { 128 tmp -= ut_row[k] * pX[cols*k+j]; 129 } 130 131 if (ut_row[i]==0.0f) 132 { 133 return(ARM_MATH_SINGULAR); 134 } 135 tmp = tmp / ut_row[i]; 136 pX[i*cols+j] = tmp; 137 } 138 139 } 140 status = ARM_MATH_SUCCESS; 141 142 } 143 144 145 /* Return to application */ 146 return (status); 147 } 148 149 #else 150 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)151 arm_status arm_mat_solve_upper_triangular_f32( 152 const arm_matrix_instance_f32 * ut, 153 const arm_matrix_instance_f32 * a, 154 arm_matrix_instance_f32 * dst) 155 { 156 arm_status status; /* status of matrix inverse */ 157 158 159 #ifdef ARM_MATH_MATRIX_CHECK 160 161 /* Check for matrix mismatch condition */ 162 if ((ut->numRows != ut->numCols) || 163 (ut->numRows != a->numRows) ) 164 { 165 /* Set status as ARM_MATH_SIZE_MISMATCH */ 166 status = ARM_MATH_SIZE_MISMATCH; 167 } 168 else 169 170 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 171 172 { 173 174 int i,j,k,n,cols; 175 176 n = dst->numRows; 177 cols = dst->numCols; 178 179 float32_t *pX = dst->pData; 180 float32_t *pUT = ut->pData; 181 float32_t *pA = a->pData; 182 183 float32_t *ut_row; 184 float32_t *a_col; 185 186 float32_t invUT; 187 188 f32x4_t vecA; 189 f32x4_t vecX; 190 191 for(i=n-1; i >= 0 ; i--) 192 { 193 for(j=0; j+3 < cols; j +=4) 194 { 195 vecA = vld1q_f32(&pA[i * cols + j]); 196 197 for(k=n-1; k > i; k--) 198 { 199 vecX = vld1q_f32(&pX[cols*k+j]); 200 vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX); 201 } 202 203 if (pUT[n*i + i]==0.0f) 204 { 205 return(ARM_MATH_SINGULAR); 206 } 207 208 invUT = 1.0f / pUT[n*i + i]; 209 vecA = vmulq_f32(vecA,vdupq_n_f32(invUT)); 210 211 212 vst1q_f32(&pX[i*cols+j],vecA); 213 } 214 215 for(; j < cols; j ++) 216 { 217 a_col = &pA[j]; 218 219 ut_row = &pUT[n*i]; 220 221 float32_t tmp=a_col[i * cols]; 222 223 for(k=n-1; k > i; k--) 224 { 225 tmp -= ut_row[k] * pX[cols*k+j]; 226 } 227 228 if (ut_row[i]==0.0f) 229 { 230 return(ARM_MATH_SINGULAR); 231 } 232 tmp = tmp / ut_row[i]; 233 pX[i*cols+j] = tmp; 234 } 235 236 } 237 status = ARM_MATH_SUCCESS; 238 239 } 240 241 242 /* Return to application */ 243 return (status); 244 } 245 246 #else arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 * ut,const arm_matrix_instance_f32 * a,arm_matrix_instance_f32 * dst)247 arm_status arm_mat_solve_upper_triangular_f32( 248 const arm_matrix_instance_f32 * ut, 249 const arm_matrix_instance_f32 * a, 250 arm_matrix_instance_f32 * dst) 251 { 252 arm_status status; /* status of matrix inverse */ 253 254 255 #ifdef ARM_MATH_MATRIX_CHECK 256 257 /* Check for matrix mismatch condition */ 258 if ((ut->numRows != ut->numCols) || 259 (ut->numRows != a->numRows) ) 260 { 261 /* Set status as ARM_MATH_SIZE_MISMATCH */ 262 status = ARM_MATH_SIZE_MISMATCH; 263 } 264 else 265 266 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 267 268 { 269 270 int i,j,k,n,cols; 271 272 float32_t *pX = dst->pData; 273 float32_t *pUT = ut->pData; 274 float32_t *pA = a->pData; 275 276 float32_t *ut_row; 277 float32_t *a_col; 278 279 n = dst->numRows; 280 cols = dst->numCols; 281 282 for(j=0; j < cols; j ++) 283 { 284 a_col = &pA[j]; 285 286 for(i=n-1; i >= 0 ; i--) 287 { 288 float32_t tmp=a_col[i * cols]; 289 290 ut_row = &pUT[n*i]; 291 292 for(k=n-1; k > i; k--) 293 { 294 tmp -= ut_row[k] * pX[cols*k+j]; 295 } 296 297 if (ut_row[i]==0.0f) 298 { 299 return(ARM_MATH_SINGULAR); 300 } 301 tmp = tmp / ut_row[i]; 302 pX[i*cols+j] = tmp; 303 } 304 305 } 306 status = ARM_MATH_SUCCESS; 307 308 } 309 310 311 /* Return to application */ 312 return (status); 313 } 314 #endif /* #if defined(ARM_MATH_NEON) */ 315 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 316 317 /** 318 @} end of MatrixInv group 319 */ 320