1 extern "C" {
2     extern void matrix_test();
3 }
4 
5 #include "allocator.h"
6 
7 #include <dsppp/arch.hpp>
8 #include <dsppp/fixed_point.hpp>
9 #include <dsppp/matrix.hpp>
10 #include <dsppp/unroll.hpp>
11 
12 #include <iostream>
13 
14 #include <cmsis_tests.h>
15 #include "boost/mp11.hpp"
16 using namespace boost::mp11;
17 
18 
19 extern "C" {
20 #include "dsp/matrix_functions.h"
21 #include "dsp/matrix_utils.h"
22 }
23 
24 template<typename T>
25 struct MatTestConstant;
26 
27 template<>
28 struct MatTestConstant<double>
29 {
30     constexpr static double value = 0.001;
31     constexpr static double half = 0.5;
32 };
33 
34 template<>
35 struct MatTestConstant<float32_t>
36 {
37     constexpr static float value = 0.001f;
38     constexpr static float half = 0.5f;
39 };
40 
41 #if !defined(DISABLEFLOAT16)
42 template<>
43 struct MatTestConstant<float16_t>
44 {
45     constexpr static float16_t value = (float16_t)0.001f;
46     constexpr static float16_t half = (float16_t)0.5f;
47 
48 };
49 #endif
50 
51 template<>
52 struct MatTestConstant<Q7>
53 {
54     constexpr static Q7 value = 0.001_q7;
55     constexpr static Q7 half = 0.5_q7;
56 };
57 
58 template<>
59 struct MatTestConstant<Q15>
60 {
61     constexpr static Q15 value = 0.001_q15;
62     constexpr static Q15 half = 0.5_q15;
63 };
64 
65 template<>
66 struct MatTestConstant<Q31>
67 {
68     constexpr static Q31 value = 0.001_q31;
69     constexpr static Q31 half = 0.5_q31;
70 };
71 
72 
73 template<typename T>
74 struct ErrThreshold
75 {
76    constexpr static float abserr = 0;
77    constexpr static float relerr = 0;
78    constexpr static float abserr_cholesky = 0;
79    constexpr static float relerr_cholesky = 0;
80    constexpr static float abserr_householder = 0;
81    constexpr static float relerr_householder = 0;
82    constexpr static float abserr_qr = 0;
83    constexpr static float relerr_qr = 0;
84    constexpr static float abserr_inv = 0;
85    constexpr static float relerr_inv = 0;
86 };
87 
88 // Should be more accurate than F32 but right know
89 // we only check there is no regression compared to f32
90 template<>
91 struct ErrThreshold<double>
92 {
93    constexpr static float abserr = ABS_ERROR;
94    constexpr static float relerr = REL_ERROR;
95    constexpr static float abserr_cholesky = 3e-4;
96    constexpr static float relerr_cholesky = 1e-4;
97 
98    constexpr static float abserr_householder = ABS_ERROR;
99    constexpr static float relerr_householder = REL_ERROR;
100    constexpr static float abserr_qr = ABS_ERROR;
101    constexpr static float relerr_qr = REL_ERROR;
102 
103    constexpr static float abserr_inv = ABS_ERROR;
104    constexpr static float relerr_inv = REL_ERROR;
105 };
106 
107 template<>
108 struct ErrThreshold<float32_t>
109 {
110    constexpr static float abserr = ABS_ERROR;
111    constexpr static float relerr = REL_ERROR;
112    constexpr static float abserr_cholesky = 3e-4;
113    constexpr static float relerr_cholesky = 1e-4;
114 
115    constexpr static float abserr_householder = ABS_ERROR;
116    constexpr static float relerr_householder = REL_ERROR;
117    constexpr static float abserr_qr = ABS_ERROR;
118    constexpr static float relerr_qr = REL_ERROR;
119 
120    constexpr static float abserr_inv = 4.0e-6;
121    constexpr static float relerr_inv = 5.0e-6;
122 };
123 
124 #if !defined(DISABLEFLOAT16)
125 template<>
126 struct ErrThreshold<float16_t>
127 {
128    constexpr static float abserr = ABS_ERROR;
129    constexpr static float relerr = REL_ERROR;
130    constexpr static float abserr_cholesky = 2e-1;
131    constexpr static float relerr_cholesky = 2e-1;
132 
133    constexpr static float abserr_householder = 2e-4;
134    constexpr static float relerr_householder = 2e-3;
135    // 32x32 is not numerically behaving well with
136    // the matrix used as input
137    constexpr static float abserr_qr = 2.0;
138    constexpr static float relerr_qr = 1e-2;
139 
140    constexpr static float abserr_inv = 3e-2;
141    constexpr static float relerr_inv = 3e-2;
142 };
143 #endif
144 
cmsisdsp_mat_inv(float64_t * amod,float64_t * b,uint32_t r,uint32_t c)145 void cmsisdsp_mat_inv(float64_t *amod,
146                       float64_t* b,
147                       uint32_t r,uint32_t c)
148 {
149    arm_matrix_instance_f64 src;
150    arm_matrix_instance_f64 dst;
151 
152 
153    src.numRows = r;
154    src.numCols = c;
155    src.pData = amod;
156 
157    dst.numRows = r;
158    dst.numCols = c;
159    dst.pData = b;
160 
161    arm_status status = arm_mat_inverse_f64(&src,&dst);
162    (void)status;
163 };
164 
cmsisdsp_mat_inv(float32_t * amod,float32_t * b,uint32_t r,uint32_t c)165 void cmsisdsp_mat_inv(float32_t *amod,
166                       float32_t* b,
167                       uint32_t r,uint32_t c)
168 {
169    arm_matrix_instance_f32 src;
170    arm_matrix_instance_f32 dst;
171 
172 
173    src.numRows = r;
174    src.numCols = c;
175    src.pData = amod;
176 
177    dst.numRows = r;
178    dst.numCols = c;
179    dst.pData = b;
180 
181    arm_status status = arm_mat_inverse_f32(&src,&dst);
182    (void)status;
183 };
184 
185 #if !defined(DISABLEFLOAT16)
cmsisdsp_mat_inv(float16_t * amod,float16_t * b,uint32_t r,uint32_t c)186 void cmsisdsp_mat_inv(float16_t *amod,
187                       float16_t* b,
188                       uint32_t r,uint32_t c)
189 {
190    arm_matrix_instance_f16 src;
191    arm_matrix_instance_f16 dst;
192 
193 
194    src.numRows = r;
195    src.numCols = c;
196    src.pData = amod;
197 
198    dst.numRows = r;
199    dst.numCols = c;
200    dst.pData = b;
201 
202    arm_status status = arm_mat_inverse_f16(&src,&dst);
203    (void)status;
204 };
205 #endif
206 
207 const float32_t mat64[64] = {0.395744, 0.623798, 0.885422, 0.95415, 0.310384, 0.257541,
208         0.631426, 0.424491, 0.130945, 0.799959, 0.133693, 0.479455,
209         0.519254, 0.381039, 0.617455, 0.748273, 0.146944, 0.928945,
210         0.430936, 0.508207, 0.829023, 0.358027, 0.999501, 0.851953,
211         0.273895, 0.685898, 0.0436612, 0.295212, 0.467651, 0.0515567,
212         0.21037, 0.607475, 0.570295, 0.281109, 0.979219, 0.0947969,
213         0.319016, 0.398405, 0.349953, 0.710002, 0.431597, 0.447659,
214         0.0747669, 0.057063, 0.165648, 0.773106, 0.135765, 0.709327,
215         0.873836, 0.292361, 0.00202529, 0.392942, 0.520183, 0.0528055,
216         0.797982, 0.613497, 0.509682, 0.0435791, 0.780526, 0.960582,
217         0.535914, 0.216113, 0.134108, 0.225859};
218 
219 const float32_t mat16[16] = {1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 6.0,
220          3.0, 5.0, 9.0, 10.0, 4.0, 6.0, 10.0, 16.0};
221 
222 const float32_t mat256[256] = {0.97936, 0.498105, 0.452618, 0.299761, 0.688624, 0.247212, \
223         0.228337, 0.22905, 0.563815, 0.251998, 0.5238, 0.141223, 0.0980689, \
224         0.79112, 0.771182, 0.890995, 0.0256181, 0.0377277, 0.575629, \
225         0.648138, 0.926218, 0.803878, 0.620333, 0.325635, 0.587355, 0.041795, \
226         0.934271, 0.0690131, 0.0240136, 0.800828, 0.522999, 0.374706, \
227         0.266977, 0.208028, 0.112878, 0.0389899, 0.658311, 0.205067, \
228         0.244172, 0.0762778, 0.190575, 0.677312, 0.0682093, 0.367328, \
229         0.0191464, 0.988968, 0.437477, 0.130622, 0.907823, 0.0116559, \
230         0.614526, 0.447443, 0.0126975, 0.995496, 0.947676, 0.659996, \
231         0.321547, 0.725415, 0.658426, 0.0243924, 0.0843519, 0.351748, \
232         0.974332, 0.673381, 0.375012, 0.719626, 0.721219, 0.766905, \
233         0.17065, 0.648905, 0.770983, 0.360008, 0.344226, 0.179633, 0.347905, \
234         0.555561, 0.742615, 0.908389, 0.806959, 0.176078, 0.872167, \
235         0.321839, 0.098607, 0.954515, 0.627286, 0.235082, 0.746179, 0.163606, \
236         0.899323, 0.871471, 0.712448, 0.956971, 0.736687, 0.750702, 0.843348, \
237         0.302435, 0.444862, 0.0644597, 0.765519, 0.518397, 0.765541, \
238         0.900375, 0.201853, 0.490325, 0.721786, 0.893647, 0.774724, \
239         0.0983631, 0.339887, 0.526084, 0.0786152, 0.515697, 0.438801, \
240         0.226628, 0.125093, 0.886642, 0.617766, 0.71696, 0.473172, 0.640949, \
241         0.67688, 0.676214, 0.453662, 0.345796, 0.608999, 0.904448, 0.0965741, \
242         0.00461771, 0.467399, 0.292235, 0.0418646, 0.116632, 0.0766192, \
243         0.269051, 0.411649, 0.0538381, 0.973959, 0.667106, 0.301662, \
244         0.977206, 0.891751, 0.420267, 0.441334, 0.0896179, 0.249969, \
245         0.672614, 0.623966, 0.609733, 0.320772, 0.39723, 0.845196, 0.653877, \
246         0.0599186, 0.340188, 0.199787, 0.598104, 0.45664, 0.920485, 0.969439, \
247         0.446555, 0.0932837, 0.0247635, 0.747644, 0.438759, 0.639154, \
248         0.754049, 0.379433, 0.968655, 0.0452146, 0.208123, 0.252654, \
249         0.261898, 0.608665, 0.145211, 0.395368, 0.799111, 0.697823, \
250         0.382906, 0.456515, 0.262579, 0.284169, 0.881488, 0.860877, 0.155548, \
251         0.537387, 0.804235, 0.311383, 0.183216, 0.677692, 0.829542, 0.406049, \
252         0.860392, 0.467668, 0.385633, 0.654692, 0.841125, 0.178406, \
253         0.668945, 0.369609, 0.809711, 0.454593, 0.632028, 0.605791, 0.643851, \
254         0.787023, 0.285633, 0.832216, 0.30892, 0.303559, 0.704898, 0.61118, \
255         0.435547, 0.173678, 0.788689, 0.319511, 0.648378, 0.635417, 0.125127, \
256         0.310251, 0.800819, 0.4863, 0.924361, 0.308059, 0.952175, 0.449844, \
257         0.215496, 0.257826, 0.556383, 0.259735, 0.197234, 0.0509903, 0.21474, \
258         0.145085, 0.41288, 0.876758, 0.096721, 0.228955, 0.0152248, 0.126501, \
259         0.28899, 0.336668, 0.580015, 0.932761, 0.989783, 0.667379, \
260         0.798751, 0.587173, 0.445902, 0.041448, 0.311878, 0.0332857, \
261         0.401984, 0.795049, 0.8222, 0.678648, 0.807558};
262 
263 template<typename T,int R,int C, template<int> typename A>
init_mat(Matrix<T,R,C,A> & pDst,std::size_t r,std::size_t c)264 void init_mat(Matrix<T,R,C,A> &pDst,std::size_t r,std::size_t c)
265 {
266    const float32_t *p;
267    if ((r==4) && (r==c))
268    {
269       p = mat16;
270    }
271 
272    if ((r==8) && (r==c))
273    {
274       p = mat64;
275    }
276 
277    if ((r==16) && (r==c))
278    {
279       p = mat256;
280    }
281 
282 
283    for(std::size_t i=0;i<r*c;i++)
284    {
285       pDst[i] = T(p[i]);
286    }
287 }
288 
289 #if !defined(DISABLEFLOAT16)
_abs(float16_t x)290 __STATIC_FORCEINLINE float16_t _abs(float16_t x)
291 {
292    return((float16_t)fabsf((float)x));
293 };
294 #endif
295 
_abs(float32_t x)296 __STATIC_FORCEINLINE float32_t _abs(float32_t x)
297 {
298    return(fabsf(x));
299 };
300 
_abs(double x)301 __STATIC_FORCEINLINE double _abs(double x)
302 {
303    return(fabs(x));
304 };
305 
306 template<typename T,
307          int NB,
308          template<int> typename A,
309          typename M>
_matinv(const Matrix<T,NB,NB,A> & a,M && res)310 void _matinv(const Matrix<T,NB,NB,A> &a,M && res)
311 {
312 
313   Matrix<T,NB,NB,TMP_ALLOC> b = a;
314 
315   const vector_length_t nb_rows = a.rows();
316   const vector_length_t nb_cols = a.columns();
317 
318 
319   for(index_t r=0;r < nb_rows ; r++)
320   {
321      res.row(r) = T{};
322      res(r,r) = number_traits<T>::one();
323   }
324 
325 
326   for(index_t c=0;c < nb_cols ; c++)
327   {
328      T pivot = b(c,c);
329      index_t selectedRow = c;
330 
331 
332      for(index_t r=c+1;r < nb_rows ; r++)
333      {
334         T newPivot = b(r,c);
335         if (_abs(newPivot)>_abs(pivot))
336         {
337             pivot = newPivot;
338             selectedRow = r;
339         }
340      }
341 
342      if ((pivot!=T{}) && (selectedRow != c))
343      {
344          swap(b.row(c,c),b.row(selectedRow,c));
345          swap(res.row(c),res.row(selectedRow));
346      }
347      else if (pivot == T{})
348      {
349         break;
350      }
351 
352      pivot = number_traits<T>::one() / pivot;
353 
354      b.row(c,c) *= pivot;
355      res.row(c) *= pivot;
356 
357      index_t r=0;
358 
359      for(;r < c ; r++)
360      {
361          const T tmp = b(r,c);
362          b.row(r,c) -= b.row(c,c)*tmp;
363          res.row(r) -= res.row(c)*tmp;
364      }
365 
366      for(r=c+1;r < nb_rows ; r++)
367      {
368          const T tmp = b(r,c);
369          b.row(r,c) -= b.row(c,c)*tmp;
370          res.row(r) -= res.row(c)*tmp;
371      }
372 
373   }
374 
375 
376 }
377 
378 template<typename T,
379          int NB,
380          template<int> typename A,
381          typename std::enable_if<(NB>0),bool>::type = true>
matinv(const Matrix<T,NB,NB,A> & a)382 Matrix<T,NB,NB,TMP_ALLOC> matinv(const Matrix<T,NB,NB,A> &a)
383 {
384    Matrix<T,NB,NB,TMP_ALLOC> res;
385    _matinv(a,res);
386    return(res);
387 }
388 
389 template<typename T,
390          int NB,
391          template<int> typename A,
392          typename std::enable_if<(NB<0),bool>::type = true>
matinv(const Matrix<T,NB,NB,A> & a)393 Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> matinv(const Matrix<T,NB,NB,A> &a)
394 {
395    Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> res(a.rows(),a.columns());
396    return (_matinv(a,res));
397    return(res);
398 }
399 
400 template<typename T,
401          int NB,
402          template<int> typename A,
403          typename std::enable_if<(NB<0),bool>::type = true>
matinv(Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> & res,const Matrix<T,NB,NB,A> & a)404 void matinv(Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> &res, const Matrix<T,NB,NB,A> &a)
405 {
406    (void)_matinv(a,res);
407 }
408 
409 
410 template<typename T,int R,int C>
testinv()411 void testinv()
412 {
413    std::cout << "----\r\n";
414    std::cout << R << " x " << C << "\r\n";
415 
416    #if defined(STATIC_TEST)
417    PMat<T,R,C> a;
418    #else
419    PMat<T> a(R,C);
420    #endif
421 
422    init_mat(a,R,C);
423 
424    #if !defined(STATIC_TEST)
425    PMat<T> res(R,C);
426    #endif
427 
428    INIT_SYSTICK;
429    START_CYCLE_MEASUREMENT;
430    startSectionNB(1);
431    #if defined(STATIC_TEST)
432    PMat<T,R,C> res = matinv(a);
433    #else
434    matinv(res,a);
435    #endif
436    stopSectionNB(1);
437    STOP_CYCLE_MEASUREMENT;
438 
439    PMat<T> amod(a);
440    PMat<T> cmsis_res(R,C);
441 
442    INIT_SYSTICK;
443    START_CYCLE_MEASUREMENT;
444    cmsisdsp_mat_inv(amod.ptr(),cmsis_res.ptr(),R,C);
445    STOP_CYCLE_MEASUREMENT;
446 
447 
448 
449    if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
450        ErrThreshold<T>::abserr_inv,ErrThreshold<T>::relerr_inv))
451    {
452       printf("inv failed \r\n");
453 
454    }
455 
456    std::cout << "=====\r\n";
457 
458 }
459 
460 template<typename T,int R,int C>
testadd()461 void testadd()
462 {
463    std::cout << "----\r\n";
464    std::cout << R << " x " << C << "\r\n";
465 
466    #if defined(STATIC_TEST)
467    PMat<T,R,C> a;
468    PMat<T,R,C> b;
469    #else
470    PMat<T> a(R,C);
471    PMat<T> b(R,C);
472    #endif
473 
474    init_array(a,R*C);
475    init_array(b,R*C);
476 
477 
478    INIT_SYSTICK;
479    START_CYCLE_MEASUREMENT;
480    startSectionNB(1);
481    #if defined(STATIC_TEST)
482    PMat<T,R,C> res = a+b;
483    #else
484    PMat<T> res = a+b;
485    #endif
486    stopSectionNB(1);
487    STOP_CYCLE_MEASUREMENT;
488 
489    //PrintType<decltype(a+b)>();
490    //PrintType<decltype(res)>();
491 //
492    //std::cout << "a: " << IsVector<decltype(a)>::value << "\r\n";
493    //std::cout << "b: " << IsVector<decltype(b)>::value << "\r\n";
494    //std::cout << "a+b: " << IsVector<decltype(a+b)>::value << "\r\n";
495    //std::cout << "res: " << IsVector<decltype(res)>::value << "\r\n";
496    //std::cout << "same: " << SameElementType<decltype(res),decltype(a+b)>::value << "\r\n";
497 //
498    //std::cout << "vec inst: " << has_vector_inst<decltype(res)>() << "\r\n";
499    //std::cout << "vec index pair: " << vector_idx_pair<decltype(res),decltype(a+b)>() << "\r\n";
500    //std::cout << "must use mat idx: " << must_use_matrix_idx_pair<decltype(res),decltype(a+b)>() << "\r\n";
501 
502    INIT_SYSTICK;
503    START_CYCLE_MEASUREMENT;
504    #if defined(STATIC_TEST)
505    PMat<T,R,C> cmsis_res;
506    #else
507    PMat<T> cmsis_res(R,C);
508    #endif
509    cmsisdsp_mat_add(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C);
510    STOP_CYCLE_MEASUREMENT;
511 
512 
513 
514    if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
515        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
516    {
517       printf("add failed \r\n");
518    }
519 
520    std::cout << "=====\r\n";
521 
522 }
523 
524 
525 template<typename T,int R,int C>
testdiag()526 void testdiag()
527 {
528    std::cout << "----\r\n";
529    std::cout << R << " x " << C << "\r\n";
530    #if defined(STATIC_TEST)
531    PVector<T,R> a;
532    #else
533    PVector<T> a(R);
534    #endif
535    init_array(a,R);
536 
537    INIT_SYSTICK;
538    START_CYCLE_MEASUREMENT;
539    startSectionNB(1);
540    #if defined(STATIC_TEST)
541    PMat<T,R,C> res=PMat<T,R,C>::diagonal(a);
542    #else
543    PMat<T> res=PMat<T>::diagonal(a);
544    #endif
545    stopSectionNB(1);
546    STOP_CYCLE_MEASUREMENT;
547 
548 
549    const T* ap = a.const_ptr();
550 
551    INIT_SYSTICK;
552    START_CYCLE_MEASUREMENT;
553    #if defined(STATIC_TEST)
554    PMat<T,R,C> cmsis_res;
555    #else
556    PMat<T> cmsis_res(R,C);
557    #endif
558    T* refp = cmsis_res.ptr();
559 
560    UNROLL_LOOP
561    for(index_t row=0;row < R; row++)
562    {
563       UNROLL_LOOP
564       for(index_t col=0;col < C; col++)
565       {
566          if (row != col)
567          {
568             refp[row*C+col] = T{};
569          }
570          else
571          {
572              refp[row*C+col] = ap[row];
573          }
574       }
575    }
576    STOP_CYCLE_MEASUREMENT;
577 
578 
579 
580    if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
581        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
582    {
583       printf("diag failed \r\n");
584    }
585 
586    std::cout << "=====\r\n";
587 }
588 
589 
590 
591 template<typename T,int R,int C>
testouter()592 void testouter()
593 {
594    std::cout << "----\r\n";
595    std::cout << R << " x " << C << "\r\n";
596 
597    PVector<T,R> a;
598    PVector<T,C> b;
599    init_array(a,R);
600    init_array(b,C);
601 
602    b = b + b;
603 
604    INIT_SYSTICK;
605    START_CYCLE_MEASUREMENT;
606    startSectionNB(1);
607    PMat<T,R,C> res = outer(a,b);
608    stopSectionNB(1);
609    STOP_CYCLE_MEASUREMENT;
610 
611 
612 
613    INIT_SYSTICK;
614    START_CYCLE_MEASUREMENT;
615    startSectionNB(2);
616    #if defined(STATIC_TEST)
617    PMat<T,R,C> cmsis_res;
618    #else
619    PMat<T> cmsis_res(R,C);
620    #endif
621    CMSISOuter<T>::run(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C);
622    startSectionNB(2);
623    STOP_CYCLE_MEASUREMENT;
624 
625    //std::cout<<cmsis_res;
626 
627    if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
628        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
629    {
630       printf("outer failed \r\n");
631    }
632 
633    std::cout << "=====\r\n";
634 
635 }
636 
637 template<typename T,int R,int C>
testview()638 void testview()
639 {
640    std::cout << "----\r\n";
641    std::cout << R << " x " << C << "\r\n";
642 
643    #if defined(STATIC_TEST)
644    PVector<T,R> a;
645    #else
646    PVector<T> a(R);
647    #endif
648    init_array(a,R);
649 
650    #if defined(STATIC_TEST)
651    PMat<T,R,C> res=PMat<T,R,C>::diagonal(a);
652    #else
653    PMat<T> res=PMat<T>::diagonal(a);
654    #endif
655    //std::cout << res;
656    constexpr int subsize = 8;
657    constexpr int subpos = 8;
658    auto r = res.sub(Slice(subpos,subpos+subsize),Slice(subpos,subpos+subsize));
659 
660    #if defined(STATIC_TEST)
661    PMat<T,subsize,subsize> resb;
662    #else
663    PMat<T> resb(subsize,subsize);
664    #endif
665 
666    INIT_SYSTICK;
667    START_CYCLE_MEASUREMENT;
668    startSectionNB(1);
669    resb = r+r;
670    stopSectionNB(1);
671    STOP_CYCLE_MEASUREMENT;
672 
673    //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
674 
675    PMat<T,subsize,subsize> cmsis_res;
676    INIT_SYSTICK;
677    START_CYCLE_MEASUREMENT;
678    startSectionNB(2);
679    DISABLE_LOOP_UNROLL
680    for(index_t row=0;row < subsize ; row++)
681    {
682       DISABLE_LOOP_UNROLL
683       for(index_t col=0;col < subsize ; col++)
684       {
685          cmsis_res(row,col) = r(row,col)+r(row,col);
686       }
687    }
688    startSectionNB(2);
689    STOP_CYCLE_MEASUREMENT;
690 
691    //std::cout<<resb;
692    //std::cout<<cmsis_res;
693 
694    if (!validate(resb,cmsis_res,
695        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
696    {
697       printf("sub matrix failed \r\n");
698    }
699 
700    std::cout << "=====\r\n";
701 
702 }
703 
704 
705 
706 template<typename T,int R,int C>
testmatvec()707 void testmatvec()
708 {
709 
710    using STO = typename vector_traits<T>::storage_type;
711 
712    std::cout << "----\r\n";
713    std::cout << R << " x " << C << "\r\n";
714 
715    #if defined(STATIC_TEST)
716    PVector<T,C> a;
717    #else
718    PVector<T> a(C);
719    #endif
720    init_array(a,C);
721 
722    #if defined(STATIC_TEST)
723    PMat<T,R,C> m;
724    #else
725    PMat<T> m(R,C);
726    #endif
727    init_array(m,R*C);
728 
729 
730    INIT_SYSTICK;
731    START_CYCLE_MEASUREMENT;
732    startSectionNB(1);
733    #if defined(STATIC_TEST)
734    PVector<T,R> res = dot(m,a);
735    #else
736    PVector<T> res = dot(m,a);
737    #endif
738    stopSectionNB(1);
739    STOP_CYCLE_MEASUREMENT;
740 
741    //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
742 
743    INIT_SYSTICK;
744    START_CYCLE_MEASUREMENT;
745    #if defined(STATIC_TEST)
746    PVector<T,R> cmsis_res;
747    #else
748    PVector<T> cmsis_res(R);
749    #endif
750    typename CMSISMatrixType<T>::type S;
751    S.numRows = R;
752    S.numCols = C;
753    S.pData = reinterpret_cast<STO*>(const_cast<T*>(m.ptr()));
754 
755 
756    startSectionNB(2);
757    cmsis_mat_vec_mult(&S, a.const_ptr(), cmsis_res.ptr());
758    startSectionNB(2);
759    STOP_CYCLE_MEASUREMENT;
760 
761    //std::cout << cmsis_res;
762 
763    if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R,
764        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
765    {
766       printf("matrix times vector failed \r\n");
767    }
768    std::cout << "=====\r\n";
769 
770 }
771 
772 template<typename T,int R,int C>
testcomplexmatvec()773 void testcomplexmatvec()
774 {
775    const T scalar = MatTestConstant<T>::half;
776    using STO = typename vector_traits<T>::storage_type;
777 
778    std::cout << "----\r\n";
779    std::cout << R << " x " << C << "\r\n";
780 
781    #if defined(STATIC_TEST)
782    PVector<T,C> a;
783    PVector<T,C> b;
784    #else
785    PVector<T> a(C);
786    PVector<T> b(C);
787    #endif
788    init_array(a,C);
789    init_array(b,C);
790 
791    #if defined(STATIC_TEST)
792    PMat<T,R,C> m;
793    #else
794    PMat<T> m(R,C);
795    #endif
796    init_array(m,R*C);
797 
798 
799    INIT_SYSTICK;
800    START_CYCLE_MEASUREMENT;
801    startSectionNB(1);
802    #if defined(STATIC_TEST)
803    PVector<T,C> tmpv = a + b * scalar;
804    PVector<T,R> res = dot(m,tmpv);
805    #else
806    PVector<T> tmpv = a + b * scalar;
807    PVector<T> res = dot(m,tmpv);
808    #endif
809    stopSectionNB(1);
810    STOP_CYCLE_MEASUREMENT;
811 
812    //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
813 
814    INIT_SYSTICK;
815    START_CYCLE_MEASUREMENT;
816    #if defined(STATIC_TEST)
817    PVector<T,R> cmsis_res;
818    PVector<T,C> tmp;
819    #else
820    PVector<T> cmsis_res(R);
821    PVector<T> tmp(C);
822    #endif
823    typename CMSISMatrixType<T>::type S;
824    S.numRows = R;
825    S.numCols = C;
826    S.pData = reinterpret_cast<STO*>(const_cast<T*>(m.ptr()));
827 
828 
829    startSectionNB(2);
830      cmsis_complex_mat_vec(&S,
831                          a.const_ptr(),
832                          b.const_ptr(),
833                          scalar,
834                          tmp.ptr(),
835                          cmsis_res.ptr());
836    startSectionNB(2);
837    STOP_CYCLE_MEASUREMENT;
838 
839 
840 
841    //std::cout << cmsis_res;
842 
843    if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R,
844        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
845    {
846       printf("matrix times vector expression failed \r\n");
847    }
848 
849    std::cout << "=====\r\n";
850 
851 }
852 
853 
854 template<typename T,int R, int K,int C>
testmatmult()855 void testmatmult()
856 {
857    std::cout << "----\r\n";
858    std::cout << R << " x " << K << " x " << C << "\r\n";
859 
860    using S = typename CMSISMatrixType<T>::scalar;
861 
862    #if defined(STATIC_TEST)
863    PMat<T,R,K> ma;
864    #else
865    PMat<T> ma(R,K);
866    #endif
867    init_array(ma,R*K);
868 
869    #if defined(STATIC_TEST)
870    PMat<T,K,C> mb;
871    #else
872    PMat<T> mb(K,C);
873    #endif
874    init_array(mb,K*C);
875 
876 
877 
878    mb += TestConstant<T>::small;
879 
880    //std::cout << ma;
881    //std::cout << mb;
882 
883 
884    INIT_SYSTICK;
885    START_CYCLE_MEASUREMENT;
886    startSectionNB(1);
887    #if defined(STATIC_TEST)
888    PMat<T,R,C> res = dot(ma,mb);
889    #else
890    PMat<T> res = dot(ma,mb);
891    #endif
892    stopSectionNB(1);
893    STOP_CYCLE_MEASUREMENT;
894 
895    //PrintType<decltype(ma)>();
896    //PrintType<decltype(mb)>();
897    //std::cout << ma;
898    //std::cout << mb;
899    //std::cout << res;
900 
901 
902    //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
903 
904    PMat<T> tmp(C,K);
905 
906    INIT_SYSTICK;
907    START_CYCLE_MEASUREMENT;
908    #if defined(STATIC_TEST)
909    PMat<T,R,C> cmsis_res;
910    #else
911    PMat<T> cmsis_res(R,C);
912    #endif
913 
914 
915    typename CMSISMatrixType<T>::type SA;
916    SA.numRows = R;
917    SA.numCols = K;
918    SA.pData = reinterpret_cast<S*>(ma.ptr());
919 
920    typename CMSISMatrixType<T>::type SB;
921    SB.numRows = K;
922    SB.numCols = C;
923    SB.pData = reinterpret_cast<S*>(mb.ptr());
924 
925    typename CMSISMatrixType<T>::type RES;
926    RES.numRows = R;
927    RES.numCols = C;
928    RES.pData = reinterpret_cast<S*>(cmsis_res.ptr());
929 
930 
931    startSectionNB(2);
932    cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast<S*>(tmp.ptr()));
933    startSectionNB(2);
934    STOP_CYCLE_MEASUREMENT;
935 
936 
937    //std::cout << cmsis_res;
938 
939    if (!validate(res,cmsis_res,
940        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
941    {
942       printf("matrix times matrix expression failed \r\n");
943    }
944 
945    std::cout << "=====\r\n";
946 
947 }
948 
949 template<typename T,int R,int K, int C>
testsubmatmult()950 void testsubmatmult()
951 {
952    std::cout << "----\r\n";
953    std::cout << R << " x " << K << " x " << C << "\r\n";
954 
955    using S = typename CMSISMatrixType<T>::scalar;
956    constexpr int TOTALA = 4 + 2*K + 2*R + K*R;
957    constexpr int TOTALB = 4 + 2*C + 2*K + C*K;
958 
959    #if defined(STATIC_TEST)
960    PMat<T,R+2,K+2> ma;
961    #else
962    PMat<T> ma(R+2,K+2);
963    #endif
964    init_array(ma,TOTALA);
965 
966    #if defined(STATIC_TEST)
967    PMat<T,K+2,C+2> mb;
968    #else
969    PMat<T> mb(K+2,C+2);
970    #endif
971    init_array(mb,TOTALB);
972 
973 
974 
975    mb += MatTestConstant<T>::value;
976 
977    //std::cout << ma;
978    //std::cout << mb;
979 
980 
981    INIT_SYSTICK;
982    START_CYCLE_MEASUREMENT;
983    #if defined(STATIC_TEST)
984    PMat<T,R,C> res(T{});
985    #else
986    PMat<T> res(R,C,T{});
987    #endif
988    startSectionNB(1);
989    res.sub(Slice(0,R),Slice(0,C)) = copy(dot(ma.sub(Slice(0,R),Slice(0,K)),mb.sub(Slice(0,K),Slice(0,C))));
990    stopSectionNB(1);
991    STOP_CYCLE_MEASUREMENT;
992 
993    //PrintType<decltype(ma)>();
994    //PrintType<decltype(mb)>();
995    //std::cout << ma;
996    //std::cout << mb;
997    //std::cout << res;
998 
999 
1000    //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
1001 
1002    PMat<T> cmsis_res(R,C);
1003    PMat<T> cmsis_ma(R,K);
1004    PMat<T> cmsis_mb(K,C);
1005    PMat<T> tmp(C,K);
1006 
1007    typename CMSISMatrixType<T>::type SA;
1008    SA.numRows = R;
1009    SA.numCols = K;
1010    SA.pData = reinterpret_cast<S*>(cmsis_ma.ptr());
1011 
1012    typename CMSISMatrixType<T>::type SB;
1013    SB.numRows = K;
1014    SB.numCols = C;
1015    SB.pData = reinterpret_cast<S*>(cmsis_mb.ptr());
1016 
1017    typename CMSISMatrixType<T>::type RES;
1018    RES.numRows = R;
1019    RES.numCols = C;
1020    RES.pData = reinterpret_cast<S*>(cmsis_res.ptr());
1021 
1022 
1023 
1024    INIT_SYSTICK;
1025    START_CYCLE_MEASUREMENT;
1026    startSectionNB(2);
1027    cmsis_ma = copy(ma.sub(Slice(0,R),Slice(0,K)));
1028    cmsis_mb = copy(mb.sub(Slice(0,K),Slice(0,C)));
1029    cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast<S*>(tmp.ptr()));
1030    startSectionNB(2);
1031    STOP_CYCLE_MEASUREMENT;
1032 
1033 
1034    //std::cout << cmsis_res;
1035 
1036    if (!validate(res.sub(Slice(0,R),Slice(0,C)),cmsis_res,
1037        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
1038    {
1039       printf("matrix times matrix expression failed \r\n");
1040    }
1041 
1042 
1043    std::cout << "=====\r\n";
1044 }
1045 
1046 
1047 template<typename T,int R,int C>
testmattranspose()1048 void testmattranspose()
1049 {
1050    std::cout << "----\r\n";
1051    std::cout << R << " x " << C << "\r\n";
1052 
1053    #if defined(STATIC_TEST)
1054    PMat<T,R,C> ma;
1055    #else
1056    PMat<T> ma(R,C);
1057    #endif
1058    init_array(ma,R*C);
1059 
1060 
1061    //PrintType<decltype(ma)>();
1062    INIT_SYSTICK;
1063    START_CYCLE_MEASUREMENT;
1064    startSectionNB(1);
1065    #if defined(STATIC_TEST)
1066    PMat<T,C,R> res = ma.transpose();
1067    #else
1068    PMat<T> res = ma.transpose();
1069    #endif
1070    stopSectionNB(1);
1071    STOP_CYCLE_MEASUREMENT;
1072 
1073    //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
1074 
1075    INIT_SYSTICK;
1076    START_CYCLE_MEASUREMENT;
1077    #if defined(STATIC_TEST)
1078    PMat<T,C,R> cmsis_res;
1079    #else
1080    PMat<T> cmsis_res(C,R);
1081    #endif
1082 
1083    typename CMSISMatrixType<T>::type SA;
1084    SA.numRows = R;
1085    SA.numCols = C;
1086    SA.pData = reinterpret_cast<typename CMSISMatrixType<T>::scalar*>(ma.ptr());
1087 
1088    typename CMSISMatrixType<T>::type RES;
1089    RES.numRows = C;
1090    RES.numCols = R;
1091    RES.pData = reinterpret_cast<typename CMSISMatrixType<T>::scalar*>(cmsis_res.ptr());
1092 
1093 
1094    startSectionNB(2);
1095    cmsis_mat_trans(&SA, &RES);
1096    startSectionNB(2);
1097    STOP_CYCLE_MEASUREMENT;
1098 
1099    //std::cout << cmsis_res;
1100 
1101    if (!validate(res,cmsis_res,
1102        ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
1103    {
1104       printf("matrix transpose failed \r\n");
1105    }
1106 
1107 
1108    std::cout << "=====\r\n";
1109 }
1110 
1111 
1112 #if !defined(DISABLEFLOAT16)
_gen_sqrt(const float16_t v)1113 static float16_t _gen_sqrt(const float16_t v)
1114 {
1115    return((float16_t)sqrtf(v));
1116 }
1117 #endif
1118 
_gen_sqrt(const float32_t v)1119 static float32_t _gen_sqrt(const float32_t v)
1120 {
1121    return(sqrtf(v));
1122 }
1123 
_gen_sqrt(const float64_t v)1124 static float64_t _gen_sqrt(const float64_t v)
1125 {
1126    return(sqrt(v));
1127 }
1128 
1129 template<int L,template<int> typename A,
1130          typename V,typename T>
_householder(Vector<T,L,A> & res,const V & v,const T eps)1131 inline T _householder(Vector<T,L,A> &res,const V&v,const T eps)
1132 {
1133    T alpha = v[0];
1134    T tau;
1135    T beta;
1136    if (v.length()==1)
1137    {
1138       res[0] = T{};
1139       return(T{});
1140    }
1141    T xnorm2 = dot(v.sub(1),v.sub(1));
1142 
1143    //std::cout << xnorm2 << "\r\n";
1144    if (xnorm2 <= eps)
1145    {
1146       tau = T{};
1147       res = T{};
1148    }
1149    else
1150    {
1151       if (alpha<=0)
1152       {
1153          beta = _gen_sqrt(alpha*alpha+xnorm2);
1154       }
1155       else
1156       {
1157          beta = -_gen_sqrt(alpha*alpha+xnorm2);
1158       }
1159       T r = number_traits<T>::one() / (alpha - beta);
1160       res = v * r;
1161       tau = (beta - alpha)/beta;
1162       res[0] = number_traits<T>::one();
1163    }
1164    return(tau);
1165 }
1166 
1167 template<typename V,typename T,
1168          typename std::enable_if<
1169          !IsDynamic<V>::value &&
1170          SameElementType<V,T>::value &&
1171          IsVector<V>::value,bool>::type = true>
householder(const V & v,const T threshold)1172 auto householder(const V&v,const T threshold)
1173 {
1174   constexpr int NB = StaticLength<V>::value;
1175   Vector<T,NB,TMP_ALLOC> res;
1176   T beta = _householder(res,v,threshold);
1177   return std::tuple<T,Vector<T,NB,TMP_ALLOC>>(beta,res);
1178 }
1179 
1180 template<typename V,typename T,
1181          typename std::enable_if<
1182          IsDynamic<V>::value &&
1183          SameElementType<V,T>::value &&
1184          IsVector<V>::value,bool>::type = true>
householder(const V & v,const T threshold)1185 auto householder(const V&v,const T threshold)
1186 {
1187   Vector<T,DYNAMIC,TMP_ALLOC> res(v.length());
1188   T beta = _householder(res,v,threshold);
1189   return std::tuple<T,Vector<T,DYNAMIC,TMP_ALLOC>>(beta,res);
1190 }
1191 
1192 template<typename V,typename T,typename TMP,
1193          typename std::enable_if<
1194          IsDynamic<V>::value &&
1195          SameElementType<V,T>::value &&
1196          IsVector<V>::value,bool>::type = true>
householder(const V & v,const T threshold,TMP & res)1197 auto householder(const V&v,const T threshold,TMP &res)
1198 {
1199   T beta = _householder(res,v,threshold);
1200   return beta;
1201 }
1202 
1203 template<typename T>
1204 struct HouseholderThreshold;
1205 
1206 #if !defined(DISABLEFLOAT16)
1207 template<>
1208 struct HouseholderThreshold<float16_t>
1209 {
1210    static constexpr float16_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F16;
1211 };
1212 #endif
1213 
1214 template<>
1215 struct HouseholderThreshold<float64_t>
1216 {
1217    static constexpr float64_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F64;
1218 };
1219 
1220 
1221 template<>
1222 struct HouseholderThreshold<float32_t>
1223 {
1224    static constexpr float32_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F32;
1225 };
1226 
1227 
1228 template<typename T,int NB>
testHouseholder()1229 static void testHouseholder()
1230 {
1231    std::cout << "----\r\n" << "N = " << NB << "\r\n";
1232    #if defined(STATIC_TEST)
1233    PVector<T,NB> a;
1234    #else
1235    PVector<T> a(NB);
1236    #endif
1237 
1238    cmsis_init_householder(a.ptr(),NB);
1239 
1240 
1241    INIT_SYSTICK;
1242    START_CYCLE_MEASUREMENT;
1243    startSectionNB(1);
1244    auto res = householder(a,HouseholderThreshold<T>::value);
1245    //PVector<T,NB> res;// = a + b;
1246    //float res_beta=0;
1247    stopSectionNB(1);
1248    STOP_CYCLE_MEASUREMENT;
1249 
1250 
1251 
1252 
1253    INIT_SYSTICK;
1254    START_CYCLE_MEASUREMENT;
1255    #if defined(STATIC_TEST)
1256    PVector<T,NB> ref;
1257    #else
1258    PVector<T> ref(NB);
1259    #endif
1260    T ref_beta = cmsis_householder(a.const_ptr(),ref.ptr(),NB);
1261    STOP_CYCLE_MEASUREMENT;
1262 
1263    if (!validate(std::get<1>(res).const_ptr(),ref.const_ptr(),NB,
1264        ErrThreshold<T>::abserr_householder,ErrThreshold<T>::relerr_householder))
1265    {
1266       printf("householder vector failed \r\n");
1267    }
1268 
1269 
1270    if (!validate(std::get<0>(res),ref_beta,
1271        ErrThreshold<T>::abserr_householder,ErrThreshold<T>::relerr_householder))
1272    {
1273       printf("householder beta failed \r\n");
1274    }
1275    std::cout << "=====\r\n";
1276 }
1277 
1278 #include "debug_mat.h"
1279 
1280 #if 1
1281 // R >= C
1282 template<typename T,int R,int C,template<int> typename A>
QR(const Matrix<T,R,C,A> & m,const T eps,bool wantQ)1283 auto QR(const Matrix<T,R,C,A>&m,const T eps,bool wantQ)
1284 {
1285    #if defined(STATIC_TEST)
1286    Vector<T,C,TMP_ALLOC> tau;
1287    Matrix<T,R,C,TMP_ALLOC> RM = m;
1288    Matrix<T,R,R,TMP_ALLOC> Q = Matrix<T,R,R>::identity();
1289 
1290 
1291    // Temporaries
1292    Vector<T,R,TMP_ALLOC> tmpvec;
1293    Matrix<T,1,R,TMP_ALLOC> tmpmat;
1294    #else
1295    Vector<T> tau(m.columns());
1296    Matrix<T> RM = m;
1297    Matrix<T> Q = Matrix<T>::identity(m.rows());
1298 
1299 
1300    // Temporaries
1301    Vector<T> tmpvec(m.rows());
1302    Matrix<T> tmpmat(1,m.rows());
1303    #endif
1304 
1305    const int NBC = m.columns();
1306    const int NBR = m.rows();
1307 
1308 
1309    for(index_t c=0;c<NBC-1;c++)
1310    {
1311       auto beta = householder(RM.col(c,c),eps,tmpvec);
1312       tau[c] = beta;
1313 
1314       MatrixView<T> vt(tmpvec,1,NBR-c);
1315       dot(tmpmat.sub(0,1,0,NBC-c),vt,RM.sub(c,c));
1316 
1317       RM.sub(c,c) =
1318         RM.sub(c,c) - beta * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBC-c));
1319 
1320       // Copy householder reflector
1321       // Not valid when c == C-1
1322       // We don't want to use a  test since CMSIS-DSP is not using
1323       // one and introducing a test would give worse performance
1324       RM.col(c,c+1) = copy(tmpvec.sub(1,NBR-c));
1325 
1326    }
1327 
1328 
1329    auto beta = householder(RM.col(NBC-1,NBC-1),eps,tmpvec);
1330    tau[NBC-1] = beta;
1331 
1332    MatrixView<T> vt(tmpvec,1,NBR-(NBC-1));
1333    dot(tmpmat.sub(0,1,0,NBC-(NBC-1)),vt,RM.sub(NBC-1,NBC-1));
1334 
1335    RM.sub(NBC-1,NBC-1) =
1336         RM.sub(NBC-1,NBC-1) - beta * outer(tmpvec.sub(0,NBR-(NBC-1)),tmpmat.row(0,0,NBC-(NBC-1)));
1337 
1338 
1339 
1340 
1341    if (wantQ)
1342    {
1343       for(index_t c=NBC-1;c>=0;c--)
1344       {
1345          tmpvec.sub(1) = copy(RM.col(c,c+1));
1346          tmpvec[0] = number_traits<T>::one();
1347 
1348          MatrixView<T> vt(tmpvec,1,NBR-c);
1349          dot(tmpmat.sub(0,1,0,NBR-c),vt,Q.sub(c,c));
1350 
1351          Q.sub(c,c) =
1352            Q.sub(c,c) - tau[c] * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBR-c));
1353 
1354       }
1355    }
1356 
1357    return std::make_tuple(RM,Q,tau);
1358 
1359 }
1360 
1361 template<typename T,int R,int C>
testQR()1362 static void testQR()
1363 {
1364    std::cout << "----\r\n";
1365    std::cout << R << " x " << C << "\r\n";
1366    #if defined(STATIC_TEST)
1367    PMat<T,R,C> a;
1368    #else
1369    PMat<T> a(R,C);
1370    #endif
1371 
1372    cmsis_init_qr(a.ptr(),R,C);
1373 
1374 
1375    INIT_SYSTICK;
1376    START_CYCLE_MEASUREMENT;
1377    startSectionNB(1);
1378    auto res = QR(a,HouseholderThreshold<T>::value,true);
1379    stopSectionNB(1);
1380    STOP_CYCLE_MEASUREMENT;
1381 
1382    //std::cout << "next\r\n";
1383 
1384    //std::cout << std::get<0>(res);
1385    //std::cout << std::get<1>(res);
1386    //std::cout << std::get<2>(res);
1387 
1388    // For fair comparison, in dynamic mode we must take into
1389    // account the memory allocations since they are made
1390    // by the QR algorithms
1391    #if !defined(STATIC_TEST)
1392    INIT_SYSTICK;
1393    START_CYCLE_MEASUREMENT;
1394    #endif
1395 
1396    #if 0 //defined(STATIC_TEST)
1397    PMat<T,R,C> cmsis_res;
1398    PMat<T,R,C> cmsis_outRp;
1399    PMat<T,R,R> cmsis_outQp;
1400    PVector<T,C> cmsis_tau;
1401    PVector<T,R> cmsis_tmpa;
1402    PVector<T,C> cmsis_tmpb;
1403    #else
1404    PMat<T> cmsis_res(R,C);
1405    PMat<T> cmsis_outRp(R,C);
1406    PMat<T> cmsis_outQp(R,R);
1407    PVector<T> cmsis_tau(C);
1408    PVector<T> cmsis_tmpa(R);
1409    PVector<T> cmsis_tmpb(C);
1410    #endif
1411 
1412    typename CMSISMatrixType<T>::type RP;
1413    RP.numRows = R;
1414    RP.numCols = C;
1415    RP.pData = cmsis_outRp.ptr();
1416 
1417    typename CMSISMatrixType<T>::type QP;
1418    QP.numRows = R;
1419    QP.numCols = R;
1420    QP.pData = cmsis_outQp.ptr();
1421 
1422    typename CMSISMatrixType<T>::type IN;
1423    IN.numRows = R;
1424    IN.numCols = C;
1425    IN.pData = a.ptr();
1426 
1427    //std::cout << "-------\r\n";
1428 
1429    #if defined(STATIC_TEST)
1430    INIT_SYSTICK;
1431    START_CYCLE_MEASUREMENT;
1432    #endif
1433    arm_status status=cmsis_qr(&IN,HouseholderThreshold<T>::value,
1434                                     &RP,&QP,
1435                                     cmsis_tau.ptr(),
1436                                     cmsis_tmpa.ptr(),
1437                                     cmsis_tmpb.ptr());
1438    (void)status;
1439    STOP_CYCLE_MEASUREMENT;
1440 
1441    //std::cout << cmsis_outRp;
1442    //std::cout << cmsis_outQp;
1443    //std::cout << cmsis_tau;
1444 
1445    if (!validate(std::get<0>(res),cmsis_outRp,
1446        ErrThreshold<T>::abserr_qr,ErrThreshold<T>::relerr_qr))
1447    {
1448       printf("QR Rp matrix failed \r\n");
1449    }
1450 
1451 
1452    if (!validate(std::get<1>(res),cmsis_outQp,
1453        ErrThreshold<T>::abserr_qr,ErrThreshold<T>::relerr_qr))
1454    {
1455       printf("QR Qp matrix failed \r\n");
1456    }
1457 
1458    if (!validate(std::get<2>(res),cmsis_tau,
1459        ErrThreshold<T>::abserr_qr,ErrThreshold<T>::relerr_qr))
1460    {
1461       printf("QR tau failed \r\n");
1462    }
1463    std::cout << "=====\r\n";
1464 }
1465 
1466 #endif
1467 
1468 
1469 template<typename T,int R,template<int> typename A>
cholesky(const Matrix<T,R,R,A> & a)1470 auto cholesky(const Matrix<T,R,R,A>&a)
1471 {
1472      // Temporaries
1473    #if defined(STATIC_TEST)
1474    Matrix<T,R,R,TMP_ALLOC> g = a;
1475    Vector<T,R,TMP_ALLOC> tmp;
1476    #else
1477    Matrix<T> g = a;
1478    Vector<T> tmp(a.rows());
1479    #endif
1480 
1481    const int NBR = a.rows();
1482 
1483    g.col(0,0) = g.col(0,0) * (T)(number_traits<T>::one() / _gen_sqrt(g(0,0)));
1484 
1485    for(int j=1;j<NBR;j++)
1486    {
1487       dot(tmp.sub(j),g.sub(j,NBR,0,j) , g.row(j,0,j));
1488 
1489       g.col(j,j) = (g.col(j,j) - tmp.sub(j)) * (T)(number_traits<T>::one() / _gen_sqrt(g(j,j)- tmp[j]));
1490 
1491    }
1492    return(g);
1493 }
1494 
1495 
1496 template<typename T,int R>
testCholesky()1497 static void testCholesky()
1498 {
1499    std::cout << "----\r\n";
1500    std::cout << R << " x " << R << "\r\n";
1501    #if defined(STATIC_TEST)
1502    PMat<T,R,R> a;
1503    #else
1504    PMat<T> a(R,R);
1505    #endif
1506 
1507    cmsis_init_cholesky(a.ptr(),R,R);
1508 
1509    //std::cout << a;
1510 
1511    INIT_SYSTICK;
1512    START_CYCLE_MEASUREMENT;
1513    startSectionNB(1);
1514    // Not totally equivalent to CMSIS implementation
1515    // It should be possible to rewrite it to avoid use of
1516    // temporary buffer like CMSIS-DSP
1517    auto res = cholesky(a);
1518    stopSectionNB(1);
1519    STOP_CYCLE_MEASUREMENT;
1520 
1521 
1522    //std::cout << res;
1523 
1524    PMat<T,R,R> cmsis_res(T{});
1525 
1526    typename CMSISMatrixType<T>::type OUT;
1527    OUT.numRows = R;
1528    OUT.numCols = R;
1529    OUT.pData = cmsis_res.ptr();
1530 
1531 
1532    typename CMSISMatrixType<T>::type IN;
1533    IN.numRows = R;
1534    IN.numCols = R;
1535    IN.pData = a.ptr();
1536 
1537    //std::cout << "-------\r\n";
1538 
1539 
1540    INIT_SYSTICK;
1541    START_CYCLE_MEASUREMENT;
1542    arm_status status=cmsis_cholesky(&IN,&OUT);
1543    (void)status;
1544    STOP_CYCLE_MEASUREMENT;
1545 
1546    //std::cout << cmsis_res;
1547 
1548 
1549    if (!validateLT(res,cmsis_res,
1550        ErrThreshold<T>::abserr_cholesky,ErrThreshold<T>::relerr_cholesky))
1551    {
1552       printf("cholesky failed \r\n");
1553    }
1554    std::cout << "=====\r\n";
1555 }
1556 
1557 template<typename TT,typename ...T>
1558 struct TESTINV
1559 {
allTESTINV1560    static void all()
1561    {
1562       testinv<TT,T::value...>();
1563    }
1564 };
1565 
1566 template<typename TT,typename ...T>
1567 struct TESTOUTER
1568 {
allTESTOUTER1569    static void all()
1570    {
1571       testouter<TT,T::value...>();
1572    }
1573 };
1574 
1575 template<typename TT,typename ...T>
1576 struct TESTMATVEC
1577 {
allTESTMATVEC1578    static void all()
1579    {
1580       testmatvec<TT,T::value...>();
1581    }
1582 };
1583 
1584 template<typename TT,typename ...T>
1585 struct TESTCOMPLEXMATVEC
1586 {
allTESTCOMPLEXMATVEC1587    static void all()
1588    {
1589       testcomplexmatvec<TT,T::value...>();
1590    }
1591 };
1592 
1593 template<typename TT,typename ...T>
1594 struct TESTADD
1595 {
allTESTADD1596    static void all()
1597    {
1598       testadd <TT,T::value...>();
1599    }
1600 };
1601 
1602 template<typename TT,typename ...T>
1603 struct TESTMATTRANSPOSE
1604 {
allTESTMATTRANSPOSE1605    static void all()
1606    {
1607       testmattranspose<TT,T::value...>();
1608    }
1609 };
1610 
1611 template<typename TT,typename ...T>
1612 struct TESTMATMULT
1613 {
allTESTMATMULT1614    static void all()
1615    {
1616       testmatmult<TT,T::value...>();
1617    }
1618 };
1619 
1620 template<typename TT,typename ...T>
1621 struct TESTSUBMATMULT
1622 {
allTESTSUBMATMULT1623    static void all()
1624    {
1625       testsubmatmult<TT,T::value...>();
1626    }
1627 };
1628 
1629 
1630 template<typename TT,typename... T>
1631 struct TEST_CASES
1632 {
allTEST_CASES1633    static void all()
1634    {
1635       (mp_push_front<T,TT>::all(),...);
1636    }
1637 };
1638 
1639 template<template <typename, typename...> typename F,
1640          typename... A>
1641 using ALL_TESTS = mp_rename<mp_push_front<mp_product<F, A...>,float>,TEST_CASES>;
1642 
1643 
1644 template<typename T>
matrix_all_test()1645 void matrix_all_test()
1646 {
1647 
1648 #if defined(MATRIX_TEST)
1649    #if !defined(SUBTEST1) && !defined(SUBTEST2)
1650    const int nb_tails = TailForTests<T>::tail;
1651    const int nb_loops = TailForTests<T>::loop;
1652    using UNROLL = mp_rename<mp_list_v<1,2,4,8,9,11>,mp_list>;
1653    #endif
1654 
1655    #if defined(SUBTEST8) || defined(SUBTEST14)
1656    using UNROLLA = mp_rename<mp_list_v<1>,mp_list>;
1657    #endif
1658    #if defined(SUBTEST9) || defined(SUBTEST15)
1659    using UNROLLA = mp_rename<mp_list_v<2>,mp_list>;
1660    #endif
1661    #if defined(SUBTEST10) || defined(SUBTEST16)
1662    using UNROLLA = mp_rename<mp_list_v<4>,mp_list>;
1663    #endif
1664 
1665    #if defined(SUBTEST11) || defined(SUBTEST17)
1666    using UNROLLA = mp_rename<mp_list_v<8>,mp_list>;
1667    #endif
1668    #if defined(SUBTEST12) || defined(SUBTEST18)
1669    using UNROLLA = mp_rename<mp_list_v<9>,mp_list>;
1670    #endif
1671    #if defined(SUBTEST13) || defined(SUBTEST19)
1672    using UNROLLA = mp_rename<mp_list_v<11>,mp_list>;
1673    #endif
1674 
1675    #if !defined(SUBTEST1) && !defined(SUBTEST2)
1676    using VEC = mp_rename<mp_list_v<1,
1677                                    nb_tails,
1678                                    nb_loops,
1679                                    nb_loops+1,
1680                                    nb_loops+nb_tails>,mp_list>;
1681    #endif
1682 
1683 
1684    //using res = mp_rename<mp_push_front<mp_product<TEST, A, B,C>,float>,TEST_CASES>;
1685    //using res = ALL_TESTS<TEST,A,B,C>;
1686    //PrintType<test>();
1687 
1688 
1689    if constexpr (number_traits<T>::is_float)
1690    {
1691 
1692 #if defined(SUBTEST1)
1693 
1694       title<T>("Householder");
1695       testHouseholder<T,NBVEC_4>();
1696       testHouseholder<T,NBVEC_16>();
1697       testHouseholder<T,NBVEC_32>();
1698 
1699       title<T>("QR");
1700       testQR<T,NBVEC_4,NBVEC_4>();
1701       testQR<T,NBVEC_16,NBVEC_16>();
1702       testQR<T,NBVEC_32,NBVEC_32>();
1703 
1704       title<T>("Cholesky");
1705       testCholesky<T,NBVEC_4>();
1706       testCholesky<T,NBVEC_16>();
1707       testCholesky<T,NBVEC_32>();
1708 
1709 #endif
1710 
1711 #if defined(SUBTEST2)
1712       title<T>("Matrix inverse");
1713       testinv<T,NBVEC_4,NBVEC_4>();
1714       testinv<T,NBVEC_8,NBVEC_8>();
1715       testinv<T,NBVEC_16,NBVEC_16>();
1716 #endif
1717 
1718 
1719    }
1720 
1721 #if defined(SUBTEST1)
1722       title<T>("Matrix outer product");
1723 
1724       testouter<T,NBVEC_4,NBVEC_4>();
1725       testouter<T,NBVEC_8,NBVEC_8>();
1726       testouter<T,NBVEC_16,NBVEC_16>();
1727 #endif
1728 
1729 #if defined(SUBTEST3)
1730       title<T>("Matrix outer product");
1731       ALL_TESTS<TESTOUTER,UNROLL,VEC>::all();
1732 #endif
1733 
1734       if constexpr (!std::is_same<T,double>::value)
1735       {
1736 #if defined(SUBTEST1)
1737           title<T>("Matrix times vector");
1738 
1739           testmatvec<T,NBVEC_4 ,NBVEC_4>();
1740           testmatvec<T,NBVEC_16,NBVEC_16>();
1741           testmatvec<T,NBVEC_32,NBVEC_32>();
1742           testmatvec<T,NBVEC_44,NBVEC_44>();
1743           testmatvec<T,NBVEC_47,NBVEC_47>();
1744 #endif
1745 
1746 #if defined(SUBTEST4)
1747           title<T>("Matrix times vector");
1748           ALL_TESTS<TESTMATVEC,UNROLL,VEC>::all();
1749 #endif
1750 
1751 #if defined(SUBTEST1)
1752           title<T>("Matrix times vector expression");
1753 
1754           testcomplexmatvec<T,NBVEC_4 ,NBVEC_4>();
1755           testcomplexmatvec<T,NBVEC_16,NBVEC_16>();
1756           testcomplexmatvec<T,NBVEC_32,NBVEC_32>();
1757           testcomplexmatvec<T,NBVEC_44,NBVEC_44>();
1758           testcomplexmatvec<T,NBVEC_47,NBVEC_47>();
1759 #endif
1760 
1761 #if defined(SUBTEST5)
1762           title<T>("Matrix times vector expression");
1763           ALL_TESTS<TESTCOMPLEXMATVEC,UNROLL,VEC>::all();
1764 #endif
1765       }
1766 
1767    if constexpr (!std::is_same<T,Q7>::value && !std::is_same<T,double>::value)
1768    {
1769 #if defined(SUBTEST1)
1770       title<T>("Matrix add");
1771 
1772       testadd<T,NBVEC_4,NBVEC_4>();
1773       testadd<T,NBVEC_8,NBVEC_8>();
1774       testadd<T,NBVEC_16,NBVEC_16>();
1775 #endif
1776 
1777 #if defined(SUBTEST6)
1778       title<T>("Matrix add");
1779       ALL_TESTS<TESTADD,UNROLL,VEC>::all();
1780 #endif
1781     }
1782 
1783  #if defined(SUBTEST1)
1784    title<T>("Matrix diag");
1785 
1786    testdiag<T,NBVEC_4,NBVEC_4>();
1787    testdiag<T,NBVEC_8,NBVEC_8>();
1788    testdiag<T,NBVEC_16,NBVEC_16>();
1789 
1790    title<T>("Matrix submatrix");
1791 
1792    testview<T,NBVEC_16,NBVEC_16>();
1793 #endif
1794 
1795 #if defined(SUBTEST1)
1796    title<T>("Matrix multiply");
1797    testmatmult<T,NBVEC_4,NBVEC_4,NBVEC_4>();
1798    testmatmult<T,NBVEC_16,NBVEC_16,NBVEC_16>();
1799    testmatmult<T,NBVEC_32,NBVEC_32,NBVEC_32>();
1800 #endif
1801 
1802 
1803 
1804 #if defined(SUBTEST1)
1805    title<T>("Matrix transpose");
1806    testmattranspose<T,NBVEC_2,NBVEC_2>();
1807    testmattranspose<T,NBVEC_3,NBVEC_3>();
1808    testmattranspose<T,NBVEC_4,NBVEC_4>();
1809    testmattranspose<T,NBVEC_16,NBVEC_16>();
1810    testmattranspose<T,NBVEC_32,NBVEC_32>();
1811 #endif
1812 
1813 #if defined(SUBTEST7)
1814    title<T>("Matrix transpose");
1815    ALL_TESTS<TESTMATTRANSPOSE,UNROLL,VEC>::all();
1816 #endif
1817 
1818 #if defined(SUBTEST8) || defined(SUBTEST9)|| defined(SUBTEST10)|| defined(SUBTEST11)|| defined(SUBTEST12)|| defined(SUBTEST13)
1819    title<T>("Matrix multiply");
1820    ALL_TESTS<TESTMATMULT,UNROLLA,VEC,UNROLL>::all();
1821 #endif
1822 
1823 #if defined(SUBTEST1)
1824    title<T>("Submatrix multiply");
1825    testsubmatmult<T,NBVEC_4,NBVEC_4,NBVEC_4>();
1826    testsubmatmult<T,NBVEC_16,NBVEC_16,NBVEC_16>();
1827 #endif
1828 
1829 #if defined(SUBTEST14) || defined(SUBTEST15) || defined(SUBTEST16)|| defined(SUBTEST17)|| defined(SUBTEST18)|| defined(SUBTEST19)
1830    title<T>("Submatrix multiply");
1831    ALL_TESTS<TESTSUBMATMULT,UNROLLA,VEC,UNROLL>::all();
1832 #endif
1833 
1834    //testsubmatmult<T,NBVEC_32>();
1835 #endif
1836 
1837 };
1838 
1839 
1840 
matrix_test()1841 void matrix_test()
1842 {
1843 #if defined(MATRIX_TEST)
1844    #if defined(F64_DT)
1845    matrix_all_test<double>();
1846    #endif
1847    #if defined(F32_DT)
1848    matrix_all_test<float>();
1849    #endif
1850    #if defined(F16_DT) && !defined(DISABLEFLOAT16)
1851    matrix_all_test<float16_t>();
1852    #endif
1853    #if defined(Q31_DT)
1854    matrix_all_test<Q31>();
1855    #endif
1856    #if defined(Q15_DT)
1857    matrix_all_test<Q15>();
1858    #endif
1859    #if defined(Q7_DT)
1860    matrix_all_test<Q7>();
1861    #endif
1862 #endif
1863 }