1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_cholesky_f64.c
4 * Description: Floating-point Cholesky decomposition
5 *
6 * $Date: 10 August 2022
7 * $Revision: V1.9.1
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions.h"
30 #include "dsp/matrix_utils.h"
31
32 /**
33 @ingroup groupMatrix
34 */
35
36 /**
37 @addtogroup MatrixChol
38 @{
39 */
40
41 /**
42 * @brief Floating-point Cholesky decomposition of positive-definite matrix.
43 * @param[in] pSrc points to the instance of the input floating-point matrix structure.
44 * @param[out] pDst points to the instance of the output floating-point matrix structure.
45 * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
46 * @return execution status
47 - \ref ARM_MATH_SUCCESS : Operation successful
48 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
49 - \ref ARM_MATH_DECOMPOSITION_FAILURE : Input matrix cannot be decomposed
50 * @par
51 * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
52 * The decomposition of A is returning a lower triangular matrix L such that A = L L^t
53 *
54 * @par
55 * The destination matrix should be set to 0 before calling the functions because
56 * the function may not overwrite all output elements.
57 */
58
59 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__aarch64__)
60
arm_mat_cholesky_f64(const arm_matrix_instance_f64 * pSrc,arm_matrix_instance_f64 * pDst)61 ARM_DSP_ATTRIBUTE arm_status arm_mat_cholesky_f64(
62 const arm_matrix_instance_f64 * pSrc,
63 arm_matrix_instance_f64 * pDst)
64 {
65
66 arm_status status; /* status of matrix inverse */
67
68
69 #ifdef ARM_MATH_MATRIX_CHECK
70
71 /* Check for matrix mismatch condition */
72 if ((pSrc->numRows != pSrc->numCols) ||
73 (pDst->numRows != pDst->numCols) ||
74 (pSrc->numRows != pDst->numRows) )
75 {
76 /* Set status as ARM_MATH_SIZE_MISMATCH */
77 status = ARM_MATH_SIZE_MISMATCH;
78 }
79 else
80
81 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
82
83 {
84 int i,j,k;
85 int n = pSrc->numRows;
86 float64_t invSqrtVj;
87 float64_t *pA,*pG;
88 int kCnt;
89
90
91 float64x2_t acc, acc0, acc1, acc2, acc3;
92 float64x2_t vecGi;
93 float64x2_t vecGj,vecGj0,vecGj1,vecGj2,vecGj3;
94
95
96 float64_t sum=0.0;
97 float64_t sum0=0.0,sum1=0.0,sum2=0.0,sum3=0.0;
98
99
100 pA = pSrc->pData;
101 pG = pDst->pData;
102
103 for(i=0 ;i < n ; i++)
104 {
105 for(j=i ; j+3 < n ; j+=4)
106 {
107 pG[(j + 0) * n + i] = pA[(j + 0) * n + i];
108 pG[(j + 1) * n + i] = pA[(j + 1) * n + i];
109 pG[(j + 2) * n + i] = pA[(j + 2) * n + i];
110 pG[(j + 3) * n + i] = pA[(j + 3) * n + i];
111
112 acc0 = vdupq_n_f64(0.0);
113 acc1 = vdupq_n_f64(0.0);
114 acc2 = vdupq_n_f64(0.0);
115 acc3 = vdupq_n_f64(0.0);
116
117 kCnt = i >> 1U;
118 k=0;
119 while(kCnt > 0)
120 {
121
122 vecGi=vld1q_f64(&pG[i * n + k]);
123
124 vecGj0=vld1q_f64(&pG[(j + 0) * n + k]);
125 vecGj1=vld1q_f64(&pG[(j + 1) * n + k]);
126 vecGj2=vld1q_f64(&pG[(j + 2) * n + k]);
127 vecGj3=vld1q_f64(&pG[(j + 3) * n + k]);
128
129 acc0 = vfmaq_f64(acc0, vecGi, vecGj0);
130 acc1 = vfmaq_f64(acc1, vecGi, vecGj1);
131 acc2 = vfmaq_f64(acc2, vecGi, vecGj2);
132 acc3 = vfmaq_f64(acc3, vecGi, vecGj3);
133
134 kCnt--;
135 k+=2;
136 }
137
138
139 sum0 = vaddvq_f64(acc0);
140 sum1 = vaddvq_f64(acc1);
141 sum2 = vaddvq_f64(acc2);
142 sum3 = vaddvq_f64(acc3);
143
144
145 kCnt = i & 1;
146 while(kCnt > 0)
147 {
148
149 sum0 = sum0 + pG[i * n + k] * pG[(j + 0) * n + k];
150 sum1 = sum1 + pG[i * n + k] * pG[(j + 1) * n + k];
151 sum2 = sum2 + pG[i * n + k] * pG[(j + 2) * n + k];
152 sum3 = sum3 + pG[i * n + k] * pG[(j + 3) * n + k];
153 kCnt--;
154 k++;
155 }
156
157 pG[(j + 0) * n + i] -= sum0;
158 pG[(j + 1) * n + i] -= sum1;
159 pG[(j + 2) * n + i] -= sum2;
160 pG[(j + 3) * n + i] -= sum3;
161 }
162
163 for(; j < n ; j++)
164 {
165 pG[j * n + i] = pA[j * n + i];
166
167 acc = vdupq_n_f64(0.0);
168
169 kCnt = i >> 1U;
170 k=0;
171 while(kCnt > 0)
172 {
173
174 vecGi=vld1q_f64(&pG[i * n + k]);
175 vecGj=vld1q_f64(&pG[j * n + k]);
176
177 acc = vfmaq_f64(acc, vecGi, vecGj);
178
179 kCnt--;
180 k+=2;
181 }
182
183
184 sum = vaddvq_f64(acc);
185
186 kCnt = i & 1;
187 while(kCnt > 0)
188 {
189 sum = sum + pG[i * n + k] * pG[(j + 0) * n + k];
190
191
192 kCnt--;
193 k++;
194 }
195
196 pG[j * n + i] -= sum;
197 }
198
199 if (pG[i * n + i] <= 0.0)
200 {
201 return(ARM_MATH_DECOMPOSITION_FAILURE);
202 }
203
204 invSqrtVj = 1.0/sqrt(pG[i * n + i]);
205 SCALE_COL_F64(pDst,i,invSqrtVj,i);
206 }
207
208 status = ARM_MATH_SUCCESS;
209
210 }
211
212
213 /* Return to application */
214 return (status);
215 }
216 #else
arm_mat_cholesky_f64(const arm_matrix_instance_f64 * pSrc,arm_matrix_instance_f64 * pDst)217 ARM_DSP_ATTRIBUTE arm_status arm_mat_cholesky_f64(
218 const arm_matrix_instance_f64 * pSrc,
219 arm_matrix_instance_f64 * pDst)
220 {
221
222 arm_status status; /* status of matrix inverse */
223
224
225 #ifdef ARM_MATH_MATRIX_CHECK
226
227 /* Check for matrix mismatch condition */
228 if ((pSrc->numRows != pSrc->numCols) ||
229 (pDst->numRows != pDst->numCols) ||
230 (pSrc->numRows != pDst->numRows) )
231 {
232 /* Set status as ARM_MATH_SIZE_MISMATCH */
233 status = ARM_MATH_SIZE_MISMATCH;
234 }
235 else
236
237 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
238
239 {
240 int i,j,k;
241 int n = pSrc->numRows;
242 float64_t invSqrtVj;
243 float64_t *pA,*pG;
244
245 pA = pSrc->pData;
246 pG = pDst->pData;
247
248
249 for(i=0 ; i < n ; i++)
250 {
251 for(j=i ; j < n ; j++)
252 {
253 pG[j * n + i] = pA[j * n + i];
254
255 for(k=0; k < i ; k++)
256 {
257 pG[j * n + i] = pG[j * n + i] - pG[i * n + k] * pG[j * n + k];
258 }
259 }
260
261 if (pG[i * n + i] <= 0.0)
262 {
263 return(ARM_MATH_DECOMPOSITION_FAILURE);
264 }
265
266 invSqrtVj = 1.0/sqrt(pG[i * n + i]);
267 SCALE_COL_F64(pDst,i,invSqrtVj,i);
268
269 }
270
271 status = ARM_MATH_SUCCESS;
272
273 }
274
275
276 /* Return to application */
277 return (status);
278 }
279 #endif
280
281 /**
282 @} end of MatrixChol group
283 */
284