1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_qr_f64.c
4 * Description: Double floating-point matrix QR decomposition.
5 *
6 * $Date: 15 June 2022
7 * $Revision: V1.11.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions.h"
30 #include "dsp/matrix_utils.h"
31
32
33
34 /**
35 @ingroup groupMatrix
36 */
37
38
39 /**
40 @addtogroup MatrixQR
41 @{
42 */
43
44 /**
45 @brief QR decomposition of a m x n double floating point matrix with m >= n.
46 @param[in] pSrc points to input matrix structure. The source matrix is modified by the function.
47 @param[in] threshold norm2 threshold.
48 @param[out] pOutR points to output R matrix structure of dimension m x n
49 @param[out] pOutQ points to output Q matrix structure of dimension m x m (can be NULL)
50 @param[out] pOutTau points to Householder scaling factors of dimension n
51 @param[inout] pTmpA points to a temporary vector of dimension m.
52 @param[inout] pTmpB points to a temporary vector of dimension m.
53 @return execution status
54 - \ref ARM_MATH_SUCCESS : Operation successful
55 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
56
57 @par pOutQ is optional:
58 pOutQ can be a NULL pointer.
59 In this case, the argument will be ignored
60 and the output Q matrix won't be computed.
61
62
63 @par Norm2 threshold
64 For the meaning of this argument please
65 refer to the \ref MatrixHouseholder documentation
66
67 */
68
69
70
71
arm_mat_qr_f64(const arm_matrix_instance_f64 * pSrc,const float64_t threshold,arm_matrix_instance_f64 * pOutR,arm_matrix_instance_f64 * pOutQ,float64_t * pOutTau,float64_t * pTmpA,float64_t * pTmpB)72 ARM_DSP_ATTRIBUTE arm_status arm_mat_qr_f64(
73 const arm_matrix_instance_f64 * pSrc,
74 const float64_t threshold,
75 arm_matrix_instance_f64 * pOutR,
76 arm_matrix_instance_f64 * pOutQ,
77 float64_t * pOutTau,
78 float64_t *pTmpA,
79 float64_t *pTmpB
80 )
81
82 {
83 int32_t col=0;
84 int32_t nb,pos;
85 float64_t *pa,*pc;
86 float64_t beta;
87 float64_t *pv;
88 float64_t *pdst;
89 float64_t *p;
90
91 if (pSrc->numRows < pSrc->numCols)
92 {
93 return(ARM_MATH_SIZE_MISMATCH);
94 }
95
96 memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float64_t));
97 pOutR->numCols = pSrc->numCols;
98 pOutR->numRows = pSrc->numRows;
99
100 p = pOutR->pData;
101
102 pc = pOutTau;
103 for(col=0 ; col < pSrc->numCols; col++)
104 {
105 int32_t i,j,k,blkCnt;
106 float64_t *pa0,*pa1,*pa2,*pa3;
107 COPY_COL_F64(pOutR,col,col,pTmpA);
108
109 beta = arm_householder_f64(pTmpA,threshold,pSrc->numRows - col,pTmpA);
110 *pc++ = beta;
111
112 pdst = pTmpB;
113
114 /* v.T A(col:,col:) -> tmpb */
115 pv = pTmpA;
116 pa = p;
117 for(j=0;j<pSrc->numCols-col; j++)
118 {
119 *pdst++ = *pv * *pa++;
120 }
121 pa += col;
122 pv++;
123 pdst = pTmpB;
124
125 pa0 = pa;
126 pa1 = pa0 + pSrc->numCols;
127 pa2 = pa1 + pSrc->numCols;
128 pa3 = pa2 + pSrc->numCols;
129
130 /* Unrolled loop */
131 blkCnt = (pSrc->numRows-col - 1) >> 2;
132 k=1;
133 while(blkCnt > 0)
134 {
135 float64_t sum;
136
137 for(j=0;j<pSrc->numCols-col; j++)
138 {
139 sum = *pdst;
140
141 sum += pv[0] * *pa0++;
142 sum += pv[1] * *pa1++;
143 sum += pv[2] * *pa2++;
144 sum += pv[3] * *pa3++;
145
146 *pdst++ = sum;
147 }
148 pa0 += col + 3*pSrc->numCols;
149 pa1 += col + 3*pSrc->numCols;
150 pa2 += col + 3*pSrc->numCols;
151 pa3 += col + 3*pSrc->numCols;
152 pv += 4;
153 pdst = pTmpB;
154 k += 4;
155 blkCnt--;
156 }
157
158 pa = pa0;
159 for(;k<pSrc->numRows-col; k++)
160 {
161 for(j=0;j<pSrc->numCols-col; j++)
162 {
163 *pdst++ += *pv * *pa++;
164 }
165 pa += col;
166 pv++;
167 pdst = pTmpB;
168 }
169
170 /* A(col:,col:) - beta v tmpb */
171 pa = p;
172 for(j=0;j<pSrc->numRows-col; j++)
173 {
174 float64_t f = beta * pTmpA[j];
175
176 for(i=0;i<pSrc->numCols-col; i++)
177 {
178 *pa = *pa - f * pTmpB[i] ;
179 pa++;
180 }
181 pa += col;
182 }
183
184 /* Copy Householder reflectors into R matrix */
185 pa = p + pOutR->numCols;
186 for(k=0;k<pSrc->numRows-col-1; k++)
187 {
188 *pa = pTmpA[k+1];
189 pa += pOutR->numCols;
190 }
191
192 p += 1 + pOutR->numCols;
193 }
194
195 /* Generate Q if requested by user matrix */
196
197 if (pOutQ != NULL)
198 {
199 /* Initialize Q matrix to identity */
200 memset(pOutQ->pData,0,sizeof(float64_t)*pOutQ->numRows*pOutQ->numRows);
201
202 pa = pOutQ->pData;
203 for(col=0 ; col < pOutQ->numCols; col++)
204 {
205 *pa = 1.0;
206 pa += pOutQ->numCols+1;
207 }
208
209 nb = pOutQ->numRows - pOutQ->numCols + 1;
210
211 pc = pOutTau + pOutQ->numCols - 1;
212 for(col=0 ; col < pOutQ->numCols; col++)
213 {
214 int32_t i,j,k, blkCnt;
215 float64_t *pa0,*pa1,*pa2,*pa3;
216 pos = pSrc->numRows - nb;
217 p = pOutQ->pData + pos + pOutQ->numCols*pos ;
218
219
220 COPY_COL_F64(pOutR,pos,pos,pTmpA);
221 pTmpA[0] = 1.0;
222 pdst = pTmpB;
223
224 /* v.T A(col:,col:) -> tmpb */
225
226 pv = pTmpA;
227 pa = p;
228 for(j=0;j<pOutQ->numRows-pos; j++)
229 {
230 *pdst++ = *pv * *pa++;
231 }
232 pa += pos;
233 pv++;
234 pdst = pTmpB;
235 pa0 = pa;
236 pa1 = pa0 + pOutQ->numRows;
237 pa2 = pa1 + pOutQ->numRows;
238 pa3 = pa2 + pOutQ->numRows;
239
240 /* Unrolled loop */
241 blkCnt = (pOutQ->numRows-pos - 1) >> 2;
242 k=1;
243 while(blkCnt > 0)
244 {
245 float64_t sum;
246
247 for(j=0;j<pOutQ->numRows-pos; j++)
248 {
249 sum = *pdst;
250
251 sum += pv[0] * *pa0++;
252 sum += pv[1] * *pa1++;
253 sum += pv[2] * *pa2++;
254 sum += pv[3] * *pa3++;
255
256 *pdst++ = sum;
257 }
258 pa0 += pos + 3*pOutQ->numRows;
259 pa1 += pos + 3*pOutQ->numRows;
260 pa2 += pos + 3*pOutQ->numRows;
261 pa3 += pos + 3*pOutQ->numRows;
262 pv += 4;
263 pdst = pTmpB;
264 k += 4;
265 blkCnt--;
266 }
267
268 pa = pa0;
269 for(;k<pOutQ->numRows-pos; k++)
270 {
271 for(j=0;j<pOutQ->numRows-pos; j++)
272 {
273 *pdst++ += *pv * *pa++;
274 }
275 pa += pos;
276 pv++;
277 pdst = pTmpB;
278 }
279
280 pa = p;
281 beta = *pc--;
282 for(j=0;j<pOutQ->numRows-pos; j++)
283 {
284 float64_t f = beta * pTmpA[j];
285
286 for(i=0;i<pOutQ->numCols-pos; i++)
287 {
288 *pa = *pa - f * pTmpB[i] ;
289 pa++;
290 }
291 pa += pos;
292 }
293
294
295 nb++;
296 }
297 }
298
299 arm_status status = ARM_MATH_SUCCESS;
300 /* Return to application */
301 return (status);
302 }
303
304
305 /**
306 @} end of MatrixQR group
307 */
308