1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mat_qr_f64.c
4  * Description:  Double floating-point matrix QR decomposition.
5  *
6  * $Date:        15 June 2022
7  * $Revision:    V1.11.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/matrix_functions.h"
30 #include "dsp/matrix_utils.h"
31 
32 
33 
34 /**
35   @ingroup groupMatrix
36  */
37 
38 
39 /**
40   @addtogroup MatrixQR
41   @{
42  */
43 
44 /**
45   @brief         QR decomposition of a m x n double floating point matrix with m >= n.
46   @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
47   @param[in]     threshold norm2 threshold.
48   @param[out]    pOutR     points to output R matrix structure of dimension m x n
49   @param[out]    pOutQ     points to output Q matrix structure of dimension m x m (can be NULL)
50   @param[out]    pOutTau   points to Householder scaling factors of dimension n
51   @param[inout]  pTmpA     points to a temporary vector of dimension m.
52   @param[inout]  pTmpB     points to a temporary vector of dimension m.
53   @return        execution status
54                    - \ref ARM_MATH_SUCCESS       : Operation successful
55                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
56 
57   @par           pOutQ is optional:
58                  pOutQ can be a NULL pointer.
59                  In this case, the argument will be ignored
60                  and the output Q matrix won't be computed.
61 
62 
63   @par           Norm2 threshold
64                  For the meaning of this argument please
65                  refer to the \ref MatrixHouseholder documentation
66 
67  */
68 
69 
70 
71 
arm_mat_qr_f64(const arm_matrix_instance_f64 * pSrc,const float64_t threshold,arm_matrix_instance_f64 * pOutR,arm_matrix_instance_f64 * pOutQ,float64_t * pOutTau,float64_t * pTmpA,float64_t * pTmpB)72 ARM_DSP_ATTRIBUTE arm_status arm_mat_qr_f64(
73     const arm_matrix_instance_f64 * pSrc,
74     const float64_t threshold,
75     arm_matrix_instance_f64 * pOutR,
76     arm_matrix_instance_f64 * pOutQ,
77     float64_t * pOutTau,
78     float64_t *pTmpA,
79     float64_t *pTmpB
80     )
81 
82 {
83   int32_t col=0;
84   int32_t nb,pos;
85   float64_t *pa,*pc;
86   float64_t beta;
87   float64_t *pv;
88   float64_t *pdst;
89   float64_t *p;
90 
91   if (pSrc->numRows < pSrc->numCols)
92   {
93     return(ARM_MATH_SIZE_MISMATCH);
94   }
95 
96   memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float64_t));
97   pOutR->numCols = pSrc->numCols;
98   pOutR->numRows = pSrc->numRows;
99 
100   p = pOutR->pData;
101 
102   pc = pOutTau;
103   for(col=0 ; col < pSrc->numCols; col++)
104   {
105       int32_t i,j,k,blkCnt;
106       float64_t *pa0,*pa1,*pa2,*pa3;
107       COPY_COL_F64(pOutR,col,col,pTmpA);
108 
109       beta = arm_householder_f64(pTmpA,threshold,pSrc->numRows - col,pTmpA);
110       *pc++ = beta;
111 
112       pdst = pTmpB;
113 
114       /* v.T A(col:,col:) -> tmpb */
115       pv = pTmpA;
116       pa = p;
117       for(j=0;j<pSrc->numCols-col; j++)
118       {
119               *pdst++ = *pv * *pa++;
120       }
121       pa += col;
122       pv++;
123       pdst = pTmpB;
124 
125       pa0 = pa;
126       pa1 = pa0 + pSrc->numCols;
127       pa2 = pa1 + pSrc->numCols;
128       pa3 = pa2 + pSrc->numCols;
129 
130       /* Unrolled loop */
131       blkCnt = (pSrc->numRows-col - 1) >> 2;
132       k=1;
133       while(blkCnt > 0)
134       {
135           float64_t sum;
136 
137           for(j=0;j<pSrc->numCols-col; j++)
138           {
139               sum = *pdst;
140 
141               sum += pv[0] * *pa0++;
142               sum += pv[1] * *pa1++;
143               sum += pv[2] * *pa2++;
144               sum += pv[3] * *pa3++;
145 
146               *pdst++ = sum;
147           }
148           pa0 += col + 3*pSrc->numCols;
149           pa1 += col + 3*pSrc->numCols;
150           pa2 += col + 3*pSrc->numCols;
151           pa3 += col + 3*pSrc->numCols;
152           pv  += 4;
153           pdst = pTmpB;
154           k += 4;
155           blkCnt--;
156       }
157 
158       pa = pa0;
159       for(;k<pSrc->numRows-col; k++)
160       {
161           for(j=0;j<pSrc->numCols-col; j++)
162           {
163               *pdst++ += *pv * *pa++;
164           }
165           pa += col;
166           pv++;
167           pdst = pTmpB;
168       }
169 
170       /* A(col:,col:) - beta v tmpb */
171       pa = p;
172       for(j=0;j<pSrc->numRows-col; j++)
173       {
174         float64_t f = beta * pTmpA[j];
175 
176         for(i=0;i<pSrc->numCols-col; i++)
177         {
178           *pa = *pa - f * pTmpB[i] ;
179           pa++;
180         }
181         pa += col;
182       }
183 
184       /* Copy Householder reflectors into R matrix */
185       pa = p + pOutR->numCols;
186       for(k=0;k<pSrc->numRows-col-1; k++)
187       {
188          *pa = pTmpA[k+1];
189          pa += pOutR->numCols;
190       }
191 
192       p += 1 + pOutR->numCols;
193   }
194 
195   /* Generate Q if requested by user matrix */
196 
197   if (pOutQ != NULL)
198   {
199      /* Initialize Q matrix to identity */
200      memset(pOutQ->pData,0,sizeof(float64_t)*pOutQ->numRows*pOutQ->numRows);
201 
202      pa = pOutQ->pData;
203      for(col=0 ; col < pOutQ->numCols; col++)
204      {
205         *pa = 1.0;
206         pa += pOutQ->numCols+1;
207      }
208 
209      nb = pOutQ->numRows - pOutQ->numCols + 1;
210 
211      pc = pOutTau + pOutQ->numCols - 1;
212      for(col=0 ; col < pOutQ->numCols; col++)
213      {
214        int32_t i,j,k, blkCnt;
215        float64_t *pa0,*pa1,*pa2,*pa3;
216        pos = pSrc->numRows - nb;
217        p = pOutQ->pData + pos + pOutQ->numCols*pos ;
218 
219 
220        COPY_COL_F64(pOutR,pos,pos,pTmpA);
221        pTmpA[0] = 1.0;
222        pdst = pTmpB;
223 
224        /* v.T A(col:,col:) -> tmpb */
225 
226        pv = pTmpA;
227        pa = p;
228        for(j=0;j<pOutQ->numRows-pos; j++)
229        {
230                *pdst++ = *pv * *pa++;
231        }
232        pa += pos;
233        pv++;
234        pdst = pTmpB;
235        pa0 = pa;
236        pa1 = pa0 + pOutQ->numRows;
237        pa2 = pa1 + pOutQ->numRows;
238        pa3 = pa2 + pOutQ->numRows;
239 
240        /* Unrolled loop */
241        blkCnt = (pOutQ->numRows-pos - 1) >> 2;
242        k=1;
243        while(blkCnt > 0)
244        {
245            float64_t sum;
246 
247            for(j=0;j<pOutQ->numRows-pos; j++)
248            {
249               sum = *pdst;
250 
251               sum += pv[0] * *pa0++;
252               sum += pv[1] * *pa1++;
253               sum += pv[2] * *pa2++;
254               sum += pv[3] * *pa3++;
255 
256               *pdst++ = sum;
257            }
258            pa0 += pos + 3*pOutQ->numRows;
259            pa1 += pos + 3*pOutQ->numRows;
260            pa2 += pos + 3*pOutQ->numRows;
261            pa3 += pos + 3*pOutQ->numRows;
262            pv  += 4;
263            pdst = pTmpB;
264            k += 4;
265            blkCnt--;
266        }
267 
268        pa = pa0;
269        for(;k<pOutQ->numRows-pos; k++)
270        {
271            for(j=0;j<pOutQ->numRows-pos; j++)
272            {
273                *pdst++ += *pv * *pa++;
274            }
275            pa += pos;
276            pv++;
277            pdst = pTmpB;
278        }
279 
280        pa = p;
281        beta = *pc--;
282        for(j=0;j<pOutQ->numRows-pos; j++)
283        {
284            float64_t f = beta * pTmpA[j];
285 
286            for(i=0;i<pOutQ->numCols-pos; i++)
287            {
288              *pa = *pa - f * pTmpB[i] ;
289              pa++;
290            }
291            pa += pos;
292        }
293 
294 
295        nb++;
296      }
297   }
298 
299   arm_status status = ARM_MATH_SUCCESS;
300   /* Return to application */
301   return (status);
302 }
303 
304 
305 /**
306   @} end of MatrixQR group
307  */
308