1 extern "C" {
2 extern void matrix_test();
3 }
4
5 #include "allocator.h"
6
7 #include <dsppp/arch.hpp>
8 #include <dsppp/fixed_point.hpp>
9 #include <dsppp/matrix.hpp>
10 #include <dsppp/unroll.hpp>
11
12 #include <iostream>
13
14 #include <cmsis_tests.h>
15 #include "boost/mp11.hpp"
16 using namespace boost::mp11;
17
18
19 extern "C" {
20 #include "dsp/matrix_functions.h"
21 #include "dsp/matrix_utils.h"
22 }
23
24 template<typename T>
25 struct MatTestConstant;
26
27 template<>
28 struct MatTestConstant<double>
29 {
30 constexpr static double value = 0.001;
31 constexpr static double half = 0.5;
32 };
33
34 template<>
35 struct MatTestConstant<float32_t>
36 {
37 constexpr static float value = 0.001f;
38 constexpr static float half = 0.5f;
39 };
40
41 #if !defined(DISABLEFLOAT16)
42 template<>
43 struct MatTestConstant<float16_t>
44 {
45 constexpr static float16_t value = (float16_t)0.001f;
46 constexpr static float16_t half = (float16_t)0.5f;
47
48 };
49 #endif
50
51 template<>
52 struct MatTestConstant<Q7>
53 {
54 constexpr static Q7 value = 0.001_q7;
55 constexpr static Q7 half = 0.5_q7;
56 };
57
58 template<>
59 struct MatTestConstant<Q15>
60 {
61 constexpr static Q15 value = 0.001_q15;
62 constexpr static Q15 half = 0.5_q15;
63 };
64
65 template<>
66 struct MatTestConstant<Q31>
67 {
68 constexpr static Q31 value = 0.001_q31;
69 constexpr static Q31 half = 0.5_q31;
70 };
71
72
73 template<typename T>
74 struct ErrThreshold
75 {
76 constexpr static float abserr = 0;
77 constexpr static float relerr = 0;
78 constexpr static float abserr_cholesky = 0;
79 constexpr static float relerr_cholesky = 0;
80 constexpr static float abserr_householder = 0;
81 constexpr static float relerr_householder = 0;
82 constexpr static float abserr_qr = 0;
83 constexpr static float relerr_qr = 0;
84 constexpr static float abserr_inv = 0;
85 constexpr static float relerr_inv = 0;
86 };
87
88 // Should be more accurate than F32 but right know
89 // we only check there is no regression compared to f32
90 template<>
91 struct ErrThreshold<double>
92 {
93 constexpr static float abserr = ABS_ERROR;
94 constexpr static float relerr = REL_ERROR;
95 constexpr static float abserr_cholesky = 3e-4;
96 constexpr static float relerr_cholesky = 1e-4;
97
98 constexpr static float abserr_householder = ABS_ERROR;
99 constexpr static float relerr_householder = REL_ERROR;
100 constexpr static float abserr_qr = ABS_ERROR;
101 constexpr static float relerr_qr = REL_ERROR;
102
103 constexpr static float abserr_inv = ABS_ERROR;
104 constexpr static float relerr_inv = REL_ERROR;
105 };
106
107 template<>
108 struct ErrThreshold<float32_t>
109 {
110 constexpr static float abserr = ABS_ERROR;
111 constexpr static float relerr = REL_ERROR;
112 constexpr static float abserr_cholesky = 3e-4;
113 constexpr static float relerr_cholesky = 1e-4;
114
115 constexpr static float abserr_householder = ABS_ERROR;
116 constexpr static float relerr_householder = REL_ERROR;
117 constexpr static float abserr_qr = ABS_ERROR;
118 constexpr static float relerr_qr = REL_ERROR;
119
120 constexpr static float abserr_inv = 4.0e-6;
121 constexpr static float relerr_inv = 5.0e-6;
122 };
123
124 #if !defined(DISABLEFLOAT16)
125 template<>
126 struct ErrThreshold<float16_t>
127 {
128 constexpr static float abserr = ABS_ERROR;
129 constexpr static float relerr = REL_ERROR;
130 constexpr static float abserr_cholesky = 2e-1;
131 constexpr static float relerr_cholesky = 2e-1;
132
133 constexpr static float abserr_householder = 2e-4;
134 constexpr static float relerr_householder = 2e-3;
135 // 32x32 is not numerically behaving well with
136 // the matrix used as input
137 constexpr static float abserr_qr = 2.0;
138 constexpr static float relerr_qr = 1e-2;
139
140 constexpr static float abserr_inv = 3e-2;
141 constexpr static float relerr_inv = 3e-2;
142 };
143 #endif
144
cmsisdsp_mat_inv(float64_t * amod,float64_t * b,uint32_t r,uint32_t c)145 void cmsisdsp_mat_inv(float64_t *amod,
146 float64_t* b,
147 uint32_t r,uint32_t c)
148 {
149 arm_matrix_instance_f64 src;
150 arm_matrix_instance_f64 dst;
151
152
153 src.numRows = r;
154 src.numCols = c;
155 src.pData = amod;
156
157 dst.numRows = r;
158 dst.numCols = c;
159 dst.pData = b;
160
161 arm_status status = arm_mat_inverse_f64(&src,&dst);
162 (void)status;
163 };
164
cmsisdsp_mat_inv(float32_t * amod,float32_t * b,uint32_t r,uint32_t c)165 void cmsisdsp_mat_inv(float32_t *amod,
166 float32_t* b,
167 uint32_t r,uint32_t c)
168 {
169 arm_matrix_instance_f32 src;
170 arm_matrix_instance_f32 dst;
171
172
173 src.numRows = r;
174 src.numCols = c;
175 src.pData = amod;
176
177 dst.numRows = r;
178 dst.numCols = c;
179 dst.pData = b;
180
181 arm_status status = arm_mat_inverse_f32(&src,&dst);
182 (void)status;
183 };
184
185 #if !defined(DISABLEFLOAT16)
cmsisdsp_mat_inv(float16_t * amod,float16_t * b,uint32_t r,uint32_t c)186 void cmsisdsp_mat_inv(float16_t *amod,
187 float16_t* b,
188 uint32_t r,uint32_t c)
189 {
190 arm_matrix_instance_f16 src;
191 arm_matrix_instance_f16 dst;
192
193
194 src.numRows = r;
195 src.numCols = c;
196 src.pData = amod;
197
198 dst.numRows = r;
199 dst.numCols = c;
200 dst.pData = b;
201
202 arm_status status = arm_mat_inverse_f16(&src,&dst);
203 (void)status;
204 };
205 #endif
206
207 const float32_t mat64[64] = {0.395744, 0.623798, 0.885422, 0.95415, 0.310384, 0.257541,
208 0.631426, 0.424491, 0.130945, 0.799959, 0.133693, 0.479455,
209 0.519254, 0.381039, 0.617455, 0.748273, 0.146944, 0.928945,
210 0.430936, 0.508207, 0.829023, 0.358027, 0.999501, 0.851953,
211 0.273895, 0.685898, 0.0436612, 0.295212, 0.467651, 0.0515567,
212 0.21037, 0.607475, 0.570295, 0.281109, 0.979219, 0.0947969,
213 0.319016, 0.398405, 0.349953, 0.710002, 0.431597, 0.447659,
214 0.0747669, 0.057063, 0.165648, 0.773106, 0.135765, 0.709327,
215 0.873836, 0.292361, 0.00202529, 0.392942, 0.520183, 0.0528055,
216 0.797982, 0.613497, 0.509682, 0.0435791, 0.780526, 0.960582,
217 0.535914, 0.216113, 0.134108, 0.225859};
218
219 const float32_t mat16[16] = {1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 6.0,
220 3.0, 5.0, 9.0, 10.0, 4.0, 6.0, 10.0, 16.0};
221
222 const float32_t mat256[256] = {0.97936, 0.498105, 0.452618, 0.299761, 0.688624, 0.247212, \
223 0.228337, 0.22905, 0.563815, 0.251998, 0.5238, 0.141223, 0.0980689, \
224 0.79112, 0.771182, 0.890995, 0.0256181, 0.0377277, 0.575629, \
225 0.648138, 0.926218, 0.803878, 0.620333, 0.325635, 0.587355, 0.041795, \
226 0.934271, 0.0690131, 0.0240136, 0.800828, 0.522999, 0.374706, \
227 0.266977, 0.208028, 0.112878, 0.0389899, 0.658311, 0.205067, \
228 0.244172, 0.0762778, 0.190575, 0.677312, 0.0682093, 0.367328, \
229 0.0191464, 0.988968, 0.437477, 0.130622, 0.907823, 0.0116559, \
230 0.614526, 0.447443, 0.0126975, 0.995496, 0.947676, 0.659996, \
231 0.321547, 0.725415, 0.658426, 0.0243924, 0.0843519, 0.351748, \
232 0.974332, 0.673381, 0.375012, 0.719626, 0.721219, 0.766905, \
233 0.17065, 0.648905, 0.770983, 0.360008, 0.344226, 0.179633, 0.347905, \
234 0.555561, 0.742615, 0.908389, 0.806959, 0.176078, 0.872167, \
235 0.321839, 0.098607, 0.954515, 0.627286, 0.235082, 0.746179, 0.163606, \
236 0.899323, 0.871471, 0.712448, 0.956971, 0.736687, 0.750702, 0.843348, \
237 0.302435, 0.444862, 0.0644597, 0.765519, 0.518397, 0.765541, \
238 0.900375, 0.201853, 0.490325, 0.721786, 0.893647, 0.774724, \
239 0.0983631, 0.339887, 0.526084, 0.0786152, 0.515697, 0.438801, \
240 0.226628, 0.125093, 0.886642, 0.617766, 0.71696, 0.473172, 0.640949, \
241 0.67688, 0.676214, 0.453662, 0.345796, 0.608999, 0.904448, 0.0965741, \
242 0.00461771, 0.467399, 0.292235, 0.0418646, 0.116632, 0.0766192, \
243 0.269051, 0.411649, 0.0538381, 0.973959, 0.667106, 0.301662, \
244 0.977206, 0.891751, 0.420267, 0.441334, 0.0896179, 0.249969, \
245 0.672614, 0.623966, 0.609733, 0.320772, 0.39723, 0.845196, 0.653877, \
246 0.0599186, 0.340188, 0.199787, 0.598104, 0.45664, 0.920485, 0.969439, \
247 0.446555, 0.0932837, 0.0247635, 0.747644, 0.438759, 0.639154, \
248 0.754049, 0.379433, 0.968655, 0.0452146, 0.208123, 0.252654, \
249 0.261898, 0.608665, 0.145211, 0.395368, 0.799111, 0.697823, \
250 0.382906, 0.456515, 0.262579, 0.284169, 0.881488, 0.860877, 0.155548, \
251 0.537387, 0.804235, 0.311383, 0.183216, 0.677692, 0.829542, 0.406049, \
252 0.860392, 0.467668, 0.385633, 0.654692, 0.841125, 0.178406, \
253 0.668945, 0.369609, 0.809711, 0.454593, 0.632028, 0.605791, 0.643851, \
254 0.787023, 0.285633, 0.832216, 0.30892, 0.303559, 0.704898, 0.61118, \
255 0.435547, 0.173678, 0.788689, 0.319511, 0.648378, 0.635417, 0.125127, \
256 0.310251, 0.800819, 0.4863, 0.924361, 0.308059, 0.952175, 0.449844, \
257 0.215496, 0.257826, 0.556383, 0.259735, 0.197234, 0.0509903, 0.21474, \
258 0.145085, 0.41288, 0.876758, 0.096721, 0.228955, 0.0152248, 0.126501, \
259 0.28899, 0.336668, 0.580015, 0.932761, 0.989783, 0.667379, \
260 0.798751, 0.587173, 0.445902, 0.041448, 0.311878, 0.0332857, \
261 0.401984, 0.795049, 0.8222, 0.678648, 0.807558};
262
263 template<typename T,int R,int C, template<int> typename A>
init_mat(Matrix<T,R,C,A> & pDst,std::size_t r,std::size_t c)264 void init_mat(Matrix<T,R,C,A> &pDst,std::size_t r,std::size_t c)
265 {
266 const float32_t *p;
267 if ((r==4) && (r==c))
268 {
269 p = mat16;
270 }
271
272 if ((r==8) && (r==c))
273 {
274 p = mat64;
275 }
276
277 if ((r==16) && (r==c))
278 {
279 p = mat256;
280 }
281
282
283 for(std::size_t i=0;i<r*c;i++)
284 {
285 pDst[i] = T(p[i]);
286 }
287 }
288
289 #if !defined(DISABLEFLOAT16)
_abs(float16_t x)290 __STATIC_FORCEINLINE float16_t _abs(float16_t x)
291 {
292 return((float16_t)fabsf((float)x));
293 };
294 #endif
295
_abs(float32_t x)296 __STATIC_FORCEINLINE float32_t _abs(float32_t x)
297 {
298 return(fabsf(x));
299 };
300
_abs(double x)301 __STATIC_FORCEINLINE double _abs(double x)
302 {
303 return(fabs(x));
304 };
305
306 template<typename T,
307 int NB,
308 template<int> typename A,
309 typename M>
_matinv(const Matrix<T,NB,NB,A> & a,M && res)310 void _matinv(const Matrix<T,NB,NB,A> &a,M && res)
311 {
312
313 Matrix<T,NB,NB,TMP_ALLOC> b = a;
314
315 const vector_length_t nb_rows = a.rows();
316 const vector_length_t nb_cols = a.columns();
317
318
319 for(index_t r=0;r < nb_rows ; r++)
320 {
321 res.row(r) = T{};
322 res(r,r) = number_traits<T>::one();
323 }
324
325
326 for(index_t c=0;c < nb_cols ; c++)
327 {
328 T pivot = b(c,c);
329 index_t selectedRow = c;
330
331
332 for(index_t r=c+1;r < nb_rows ; r++)
333 {
334 T newPivot = b(r,c);
335 if (_abs(newPivot)>_abs(pivot))
336 {
337 pivot = newPivot;
338 selectedRow = r;
339 }
340 }
341
342 if ((pivot!=T{}) && (selectedRow != c))
343 {
344 swap(b.row(c,c),b.row(selectedRow,c));
345 swap(res.row(c),res.row(selectedRow));
346 }
347 else if (pivot == T{})
348 {
349 break;
350 }
351
352 pivot = number_traits<T>::one() / pivot;
353
354 b.row(c,c) *= pivot;
355 res.row(c) *= pivot;
356
357 index_t r=0;
358
359 for(;r < c ; r++)
360 {
361 const T tmp = b(r,c);
362 b.row(r,c) -= b.row(c,c)*tmp;
363 res.row(r) -= res.row(c)*tmp;
364 }
365
366 for(r=c+1;r < nb_rows ; r++)
367 {
368 const T tmp = b(r,c);
369 b.row(r,c) -= b.row(c,c)*tmp;
370 res.row(r) -= res.row(c)*tmp;
371 }
372
373 }
374
375
376 }
377
378 template<typename T,
379 int NB,
380 template<int> typename A,
381 typename std::enable_if<(NB>0),bool>::type = true>
matinv(const Matrix<T,NB,NB,A> & a)382 Matrix<T,NB,NB,TMP_ALLOC> matinv(const Matrix<T,NB,NB,A> &a)
383 {
384 Matrix<T,NB,NB,TMP_ALLOC> res;
385 _matinv(a,res);
386 return(res);
387 }
388
389 template<typename T,
390 int NB,
391 template<int> typename A,
392 typename std::enable_if<(NB<0),bool>::type = true>
matinv(const Matrix<T,NB,NB,A> & a)393 Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> matinv(const Matrix<T,NB,NB,A> &a)
394 {
395 Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> res(a.rows(),a.columns());
396 return (_matinv(a,res));
397 return(res);
398 }
399
400 template<typename T,
401 int NB,
402 template<int> typename A,
403 typename std::enable_if<(NB<0),bool>::type = true>
matinv(Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> & res,const Matrix<T,NB,NB,A> & a)404 void matinv(Matrix<T,DYNAMIC,DYNAMIC,TMP_ALLOC> &res, const Matrix<T,NB,NB,A> &a)
405 {
406 (void)_matinv(a,res);
407 }
408
409
410 template<typename T,int R,int C>
testinv()411 void testinv()
412 {
413 std::cout << "----\r\n";
414 std::cout << R << " x " << C << "\r\n";
415
416 #if defined(STATIC_TEST)
417 PMat<T,R,C> a;
418 #else
419 PMat<T> a(R,C);
420 #endif
421
422 init_mat(a,R,C);
423
424 #if !defined(STATIC_TEST)
425 PMat<T> res(R,C);
426 #endif
427
428 INIT_SYSTICK;
429 START_CYCLE_MEASUREMENT;
430 startSectionNB(1);
431 #if defined(STATIC_TEST)
432 PMat<T,R,C> res = matinv(a);
433 #else
434 matinv(res,a);
435 #endif
436 stopSectionNB(1);
437 STOP_CYCLE_MEASUREMENT;
438
439 PMat<T> amod(a);
440 PMat<T> cmsis_res(R,C);
441
442 INIT_SYSTICK;
443 START_CYCLE_MEASUREMENT;
444 cmsisdsp_mat_inv(amod.ptr(),cmsis_res.ptr(),R,C);
445 STOP_CYCLE_MEASUREMENT;
446
447
448
449 if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
450 ErrThreshold<T>::abserr_inv,ErrThreshold<T>::relerr_inv))
451 {
452 printf("inv failed \r\n");
453
454 }
455
456 std::cout << "=====\r\n";
457
458 }
459
460 template<typename T,int R,int C>
testadd()461 void testadd()
462 {
463 std::cout << "----\r\n";
464 std::cout << R << " x " << C << "\r\n";
465
466 #if defined(STATIC_TEST)
467 PMat<T,R,C> a;
468 PMat<T,R,C> b;
469 #else
470 PMat<T> a(R,C);
471 PMat<T> b(R,C);
472 #endif
473
474 init_array(a,R*C);
475 init_array(b,R*C);
476
477
478 INIT_SYSTICK;
479 START_CYCLE_MEASUREMENT;
480 startSectionNB(1);
481 #if defined(STATIC_TEST)
482 PMat<T,R,C> res = a+b;
483 #else
484 PMat<T> res = a+b;
485 #endif
486 stopSectionNB(1);
487 STOP_CYCLE_MEASUREMENT;
488
489 //PrintType<decltype(a+b)>();
490 //PrintType<decltype(res)>();
491 //
492 //std::cout << "a: " << IsVector<decltype(a)>::value << "\r\n";
493 //std::cout << "b: " << IsVector<decltype(b)>::value << "\r\n";
494 //std::cout << "a+b: " << IsVector<decltype(a+b)>::value << "\r\n";
495 //std::cout << "res: " << IsVector<decltype(res)>::value << "\r\n";
496 //std::cout << "same: " << SameElementType<decltype(res),decltype(a+b)>::value << "\r\n";
497 //
498 //std::cout << "vec inst: " << has_vector_inst<decltype(res)>() << "\r\n";
499 //std::cout << "vec index pair: " << vector_idx_pair<decltype(res),decltype(a+b)>() << "\r\n";
500 //std::cout << "must use mat idx: " << must_use_matrix_idx_pair<decltype(res),decltype(a+b)>() << "\r\n";
501
502 INIT_SYSTICK;
503 START_CYCLE_MEASUREMENT;
504 #if defined(STATIC_TEST)
505 PMat<T,R,C> cmsis_res;
506 #else
507 PMat<T> cmsis_res(R,C);
508 #endif
509 cmsisdsp_mat_add(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C);
510 STOP_CYCLE_MEASUREMENT;
511
512
513
514 if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
515 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
516 {
517 printf("add failed \r\n");
518 }
519
520 std::cout << "=====\r\n";
521
522 }
523
524
525 template<typename T,int R,int C>
testdiag()526 void testdiag()
527 {
528 std::cout << "----\r\n";
529 std::cout << R << " x " << C << "\r\n";
530 #if defined(STATIC_TEST)
531 PVector<T,R> a;
532 #else
533 PVector<T> a(R);
534 #endif
535 init_array(a,R);
536
537 INIT_SYSTICK;
538 START_CYCLE_MEASUREMENT;
539 startSectionNB(1);
540 #if defined(STATIC_TEST)
541 PMat<T,R,C> res=PMat<T,R,C>::diagonal(a);
542 #else
543 PMat<T> res=PMat<T>::diagonal(a);
544 #endif
545 stopSectionNB(1);
546 STOP_CYCLE_MEASUREMENT;
547
548
549 const T* ap = a.const_ptr();
550
551 INIT_SYSTICK;
552 START_CYCLE_MEASUREMENT;
553 #if defined(STATIC_TEST)
554 PMat<T,R,C> cmsis_res;
555 #else
556 PMat<T> cmsis_res(R,C);
557 #endif
558 T* refp = cmsis_res.ptr();
559
560 UNROLL_LOOP
561 for(index_t row=0;row < R; row++)
562 {
563 UNROLL_LOOP
564 for(index_t col=0;col < C; col++)
565 {
566 if (row != col)
567 {
568 refp[row*C+col] = T{};
569 }
570 else
571 {
572 refp[row*C+col] = ap[row];
573 }
574 }
575 }
576 STOP_CYCLE_MEASUREMENT;
577
578
579
580 if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
581 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
582 {
583 printf("diag failed \r\n");
584 }
585
586 std::cout << "=====\r\n";
587 }
588
589
590
591 template<typename T,int R,int C>
testouter()592 void testouter()
593 {
594 std::cout << "----\r\n";
595 std::cout << R << " x " << C << "\r\n";
596
597 PVector<T,R> a;
598 PVector<T,C> b;
599 init_array(a,R);
600 init_array(b,C);
601
602 b = b + b;
603
604 INIT_SYSTICK;
605 START_CYCLE_MEASUREMENT;
606 startSectionNB(1);
607 PMat<T,R,C> res = outer(a,b);
608 stopSectionNB(1);
609 STOP_CYCLE_MEASUREMENT;
610
611
612
613 INIT_SYSTICK;
614 START_CYCLE_MEASUREMENT;
615 startSectionNB(2);
616 #if defined(STATIC_TEST)
617 PMat<T,R,C> cmsis_res;
618 #else
619 PMat<T> cmsis_res(R,C);
620 #endif
621 CMSISOuter<T>::run(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C);
622 startSectionNB(2);
623 STOP_CYCLE_MEASUREMENT;
624
625 //std::cout<<cmsis_res;
626
627 if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C,
628 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
629 {
630 printf("outer failed \r\n");
631 }
632
633 std::cout << "=====\r\n";
634
635 }
636
637 template<typename T,int R,int C>
testview()638 void testview()
639 {
640 std::cout << "----\r\n";
641 std::cout << R << " x " << C << "\r\n";
642
643 #if defined(STATIC_TEST)
644 PVector<T,R> a;
645 #else
646 PVector<T> a(R);
647 #endif
648 init_array(a,R);
649
650 #if defined(STATIC_TEST)
651 PMat<T,R,C> res=PMat<T,R,C>::diagonal(a);
652 #else
653 PMat<T> res=PMat<T>::diagonal(a);
654 #endif
655 //std::cout << res;
656 constexpr int subsize = 8;
657 constexpr int subpos = 8;
658 auto r = res.sub(Slice(subpos,subpos+subsize),Slice(subpos,subpos+subsize));
659
660 #if defined(STATIC_TEST)
661 PMat<T,subsize,subsize> resb;
662 #else
663 PMat<T> resb(subsize,subsize);
664 #endif
665
666 INIT_SYSTICK;
667 START_CYCLE_MEASUREMENT;
668 startSectionNB(1);
669 resb = r+r;
670 stopSectionNB(1);
671 STOP_CYCLE_MEASUREMENT;
672
673 //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
674
675 PMat<T,subsize,subsize> cmsis_res;
676 INIT_SYSTICK;
677 START_CYCLE_MEASUREMENT;
678 startSectionNB(2);
679 DISABLE_LOOP_UNROLL
680 for(index_t row=0;row < subsize ; row++)
681 {
682 DISABLE_LOOP_UNROLL
683 for(index_t col=0;col < subsize ; col++)
684 {
685 cmsis_res(row,col) = r(row,col)+r(row,col);
686 }
687 }
688 startSectionNB(2);
689 STOP_CYCLE_MEASUREMENT;
690
691 //std::cout<<resb;
692 //std::cout<<cmsis_res;
693
694 if (!validate(resb,cmsis_res,
695 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
696 {
697 printf("sub matrix failed \r\n");
698 }
699
700 std::cout << "=====\r\n";
701
702 }
703
704
705
706 template<typename T,int R,int C>
testmatvec()707 void testmatvec()
708 {
709
710 using STO = typename vector_traits<T>::storage_type;
711
712 std::cout << "----\r\n";
713 std::cout << R << " x " << C << "\r\n";
714
715 #if defined(STATIC_TEST)
716 PVector<T,C> a;
717 #else
718 PVector<T> a(C);
719 #endif
720 init_array(a,C);
721
722 #if defined(STATIC_TEST)
723 PMat<T,R,C> m;
724 #else
725 PMat<T> m(R,C);
726 #endif
727 init_array(m,R*C);
728
729
730 INIT_SYSTICK;
731 START_CYCLE_MEASUREMENT;
732 startSectionNB(1);
733 #if defined(STATIC_TEST)
734 PVector<T,R> res = dot(m,a);
735 #else
736 PVector<T> res = dot(m,a);
737 #endif
738 stopSectionNB(1);
739 STOP_CYCLE_MEASUREMENT;
740
741 //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
742
743 INIT_SYSTICK;
744 START_CYCLE_MEASUREMENT;
745 #if defined(STATIC_TEST)
746 PVector<T,R> cmsis_res;
747 #else
748 PVector<T> cmsis_res(R);
749 #endif
750 typename CMSISMatrixType<T>::type S;
751 S.numRows = R;
752 S.numCols = C;
753 S.pData = reinterpret_cast<STO*>(const_cast<T*>(m.ptr()));
754
755
756 startSectionNB(2);
757 cmsis_mat_vec_mult(&S, a.const_ptr(), cmsis_res.ptr());
758 startSectionNB(2);
759 STOP_CYCLE_MEASUREMENT;
760
761 //std::cout << cmsis_res;
762
763 if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R,
764 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
765 {
766 printf("matrix times vector failed \r\n");
767 }
768 std::cout << "=====\r\n";
769
770 }
771
772 template<typename T,int R,int C>
testcomplexmatvec()773 void testcomplexmatvec()
774 {
775 const T scalar = MatTestConstant<T>::half;
776 using STO = typename vector_traits<T>::storage_type;
777
778 std::cout << "----\r\n";
779 std::cout << R << " x " << C << "\r\n";
780
781 #if defined(STATIC_TEST)
782 PVector<T,C> a;
783 PVector<T,C> b;
784 #else
785 PVector<T> a(C);
786 PVector<T> b(C);
787 #endif
788 init_array(a,C);
789 init_array(b,C);
790
791 #if defined(STATIC_TEST)
792 PMat<T,R,C> m;
793 #else
794 PMat<T> m(R,C);
795 #endif
796 init_array(m,R*C);
797
798
799 INIT_SYSTICK;
800 START_CYCLE_MEASUREMENT;
801 startSectionNB(1);
802 #if defined(STATIC_TEST)
803 PVector<T,C> tmpv = a + b * scalar;
804 PVector<T,R> res = dot(m,tmpv);
805 #else
806 PVector<T> tmpv = a + b * scalar;
807 PVector<T> res = dot(m,tmpv);
808 #endif
809 stopSectionNB(1);
810 STOP_CYCLE_MEASUREMENT;
811
812 //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
813
814 INIT_SYSTICK;
815 START_CYCLE_MEASUREMENT;
816 #if defined(STATIC_TEST)
817 PVector<T,R> cmsis_res;
818 PVector<T,C> tmp;
819 #else
820 PVector<T> cmsis_res(R);
821 PVector<T> tmp(C);
822 #endif
823 typename CMSISMatrixType<T>::type S;
824 S.numRows = R;
825 S.numCols = C;
826 S.pData = reinterpret_cast<STO*>(const_cast<T*>(m.ptr()));
827
828
829 startSectionNB(2);
830 cmsis_complex_mat_vec(&S,
831 a.const_ptr(),
832 b.const_ptr(),
833 scalar,
834 tmp.ptr(),
835 cmsis_res.ptr());
836 startSectionNB(2);
837 STOP_CYCLE_MEASUREMENT;
838
839
840
841 //std::cout << cmsis_res;
842
843 if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R,
844 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
845 {
846 printf("matrix times vector expression failed \r\n");
847 }
848
849 std::cout << "=====\r\n";
850
851 }
852
853
854 template<typename T,int R, int K,int C>
testmatmult()855 void testmatmult()
856 {
857 std::cout << "----\r\n";
858 std::cout << R << " x " << K << " x " << C << "\r\n";
859
860 using S = typename CMSISMatrixType<T>::scalar;
861
862 #if defined(STATIC_TEST)
863 PMat<T,R,K> ma;
864 #else
865 PMat<T> ma(R,K);
866 #endif
867 init_array(ma,R*K);
868
869 #if defined(STATIC_TEST)
870 PMat<T,K,C> mb;
871 #else
872 PMat<T> mb(K,C);
873 #endif
874 init_array(mb,K*C);
875
876
877
878 mb += TestConstant<T>::small;
879
880 //std::cout << ma;
881 //std::cout << mb;
882
883
884 INIT_SYSTICK;
885 START_CYCLE_MEASUREMENT;
886 startSectionNB(1);
887 #if defined(STATIC_TEST)
888 PMat<T,R,C> res = dot(ma,mb);
889 #else
890 PMat<T> res = dot(ma,mb);
891 #endif
892 stopSectionNB(1);
893 STOP_CYCLE_MEASUREMENT;
894
895 //PrintType<decltype(ma)>();
896 //PrintType<decltype(mb)>();
897 //std::cout << ma;
898 //std::cout << mb;
899 //std::cout << res;
900
901
902 //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
903
904 PMat<T> tmp(C,K);
905
906 INIT_SYSTICK;
907 START_CYCLE_MEASUREMENT;
908 #if defined(STATIC_TEST)
909 PMat<T,R,C> cmsis_res;
910 #else
911 PMat<T> cmsis_res(R,C);
912 #endif
913
914
915 typename CMSISMatrixType<T>::type SA;
916 SA.numRows = R;
917 SA.numCols = K;
918 SA.pData = reinterpret_cast<S*>(ma.ptr());
919
920 typename CMSISMatrixType<T>::type SB;
921 SB.numRows = K;
922 SB.numCols = C;
923 SB.pData = reinterpret_cast<S*>(mb.ptr());
924
925 typename CMSISMatrixType<T>::type RES;
926 RES.numRows = R;
927 RES.numCols = C;
928 RES.pData = reinterpret_cast<S*>(cmsis_res.ptr());
929
930
931 startSectionNB(2);
932 cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast<S*>(tmp.ptr()));
933 startSectionNB(2);
934 STOP_CYCLE_MEASUREMENT;
935
936
937 //std::cout << cmsis_res;
938
939 if (!validate(res,cmsis_res,
940 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
941 {
942 printf("matrix times matrix expression failed \r\n");
943 }
944
945 std::cout << "=====\r\n";
946
947 }
948
949 template<typename T,int R,int K, int C>
testsubmatmult()950 void testsubmatmult()
951 {
952 std::cout << "----\r\n";
953 std::cout << R << " x " << K << " x " << C << "\r\n";
954
955 using S = typename CMSISMatrixType<T>::scalar;
956 constexpr int TOTALA = 4 + 2*K + 2*R + K*R;
957 constexpr int TOTALB = 4 + 2*C + 2*K + C*K;
958
959 #if defined(STATIC_TEST)
960 PMat<T,R+2,K+2> ma;
961 #else
962 PMat<T> ma(R+2,K+2);
963 #endif
964 init_array(ma,TOTALA);
965
966 #if defined(STATIC_TEST)
967 PMat<T,K+2,C+2> mb;
968 #else
969 PMat<T> mb(K+2,C+2);
970 #endif
971 init_array(mb,TOTALB);
972
973
974
975 mb += MatTestConstant<T>::value;
976
977 //std::cout << ma;
978 //std::cout << mb;
979
980
981 INIT_SYSTICK;
982 START_CYCLE_MEASUREMENT;
983 #if defined(STATIC_TEST)
984 PMat<T,R,C> res(T{});
985 #else
986 PMat<T> res(R,C,T{});
987 #endif
988 startSectionNB(1);
989 res.sub(Slice(0,R),Slice(0,C)) = copy(dot(ma.sub(Slice(0,R),Slice(0,K)),mb.sub(Slice(0,K),Slice(0,C))));
990 stopSectionNB(1);
991 STOP_CYCLE_MEASUREMENT;
992
993 //PrintType<decltype(ma)>();
994 //PrintType<decltype(mb)>();
995 //std::cout << ma;
996 //std::cout << mb;
997 //std::cout << res;
998
999
1000 //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
1001
1002 PMat<T> cmsis_res(R,C);
1003 PMat<T> cmsis_ma(R,K);
1004 PMat<T> cmsis_mb(K,C);
1005 PMat<T> tmp(C,K);
1006
1007 typename CMSISMatrixType<T>::type SA;
1008 SA.numRows = R;
1009 SA.numCols = K;
1010 SA.pData = reinterpret_cast<S*>(cmsis_ma.ptr());
1011
1012 typename CMSISMatrixType<T>::type SB;
1013 SB.numRows = K;
1014 SB.numCols = C;
1015 SB.pData = reinterpret_cast<S*>(cmsis_mb.ptr());
1016
1017 typename CMSISMatrixType<T>::type RES;
1018 RES.numRows = R;
1019 RES.numCols = C;
1020 RES.pData = reinterpret_cast<S*>(cmsis_res.ptr());
1021
1022
1023
1024 INIT_SYSTICK;
1025 START_CYCLE_MEASUREMENT;
1026 startSectionNB(2);
1027 cmsis_ma = copy(ma.sub(Slice(0,R),Slice(0,K)));
1028 cmsis_mb = copy(mb.sub(Slice(0,K),Slice(0,C)));
1029 cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast<S*>(tmp.ptr()));
1030 startSectionNB(2);
1031 STOP_CYCLE_MEASUREMENT;
1032
1033
1034 //std::cout << cmsis_res;
1035
1036 if (!validate(res.sub(Slice(0,R),Slice(0,C)),cmsis_res,
1037 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
1038 {
1039 printf("matrix times matrix expression failed \r\n");
1040 }
1041
1042
1043 std::cout << "=====\r\n";
1044 }
1045
1046
1047 template<typename T,int R,int C>
testmattranspose()1048 void testmattranspose()
1049 {
1050 std::cout << "----\r\n";
1051 std::cout << R << " x " << C << "\r\n";
1052
1053 #if defined(STATIC_TEST)
1054 PMat<T,R,C> ma;
1055 #else
1056 PMat<T> ma(R,C);
1057 #endif
1058 init_array(ma,R*C);
1059
1060
1061 //PrintType<decltype(ma)>();
1062 INIT_SYSTICK;
1063 START_CYCLE_MEASUREMENT;
1064 startSectionNB(1);
1065 #if defined(STATIC_TEST)
1066 PMat<T,C,R> res = ma.transpose();
1067 #else
1068 PMat<T> res = ma.transpose();
1069 #endif
1070 stopSectionNB(1);
1071 STOP_CYCLE_MEASUREMENT;
1072
1073 //std::cout << IsMatrix<decltype(r+r)>::value << "\r\n";
1074
1075 INIT_SYSTICK;
1076 START_CYCLE_MEASUREMENT;
1077 #if defined(STATIC_TEST)
1078 PMat<T,C,R> cmsis_res;
1079 #else
1080 PMat<T> cmsis_res(C,R);
1081 #endif
1082
1083 typename CMSISMatrixType<T>::type SA;
1084 SA.numRows = R;
1085 SA.numCols = C;
1086 SA.pData = reinterpret_cast<typename CMSISMatrixType<T>::scalar*>(ma.ptr());
1087
1088 typename CMSISMatrixType<T>::type RES;
1089 RES.numRows = C;
1090 RES.numCols = R;
1091 RES.pData = reinterpret_cast<typename CMSISMatrixType<T>::scalar*>(cmsis_res.ptr());
1092
1093
1094 startSectionNB(2);
1095 cmsis_mat_trans(&SA, &RES);
1096 startSectionNB(2);
1097 STOP_CYCLE_MEASUREMENT;
1098
1099 //std::cout << cmsis_res;
1100
1101 if (!validate(res,cmsis_res,
1102 ErrThreshold<T>::abserr,ErrThreshold<T>::relerr))
1103 {
1104 printf("matrix transpose failed \r\n");
1105 }
1106
1107
1108 std::cout << "=====\r\n";
1109 }
1110
1111
1112 #if !defined(DISABLEFLOAT16)
_gen_sqrt(const float16_t v)1113 static float16_t _gen_sqrt(const float16_t v)
1114 {
1115 return((float16_t)sqrtf(v));
1116 }
1117 #endif
1118
_gen_sqrt(const float32_t v)1119 static float32_t _gen_sqrt(const float32_t v)
1120 {
1121 return(sqrtf(v));
1122 }
1123
_gen_sqrt(const float64_t v)1124 static float64_t _gen_sqrt(const float64_t v)
1125 {
1126 return(sqrt(v));
1127 }
1128
1129 template<int L,template<int> typename A,
1130 typename V,typename T>
_householder(Vector<T,L,A> & res,const V & v,const T eps)1131 inline T _householder(Vector<T,L,A> &res,const V&v,const T eps)
1132 {
1133 T alpha = v[0];
1134 T tau;
1135 T beta;
1136 if (v.length()==1)
1137 {
1138 res[0] = T{};
1139 return(T{});
1140 }
1141 T xnorm2 = dot(v.sub(1),v.sub(1));
1142
1143 //std::cout << xnorm2 << "\r\n";
1144 if (xnorm2 <= eps)
1145 {
1146 tau = T{};
1147 res = T{};
1148 }
1149 else
1150 {
1151 if (alpha<=0)
1152 {
1153 beta = _gen_sqrt(alpha*alpha+xnorm2);
1154 }
1155 else
1156 {
1157 beta = -_gen_sqrt(alpha*alpha+xnorm2);
1158 }
1159 T r = number_traits<T>::one() / (alpha - beta);
1160 res = v * r;
1161 tau = (beta - alpha)/beta;
1162 res[0] = number_traits<T>::one();
1163 }
1164 return(tau);
1165 }
1166
1167 template<typename V,typename T,
1168 typename std::enable_if<
1169 !IsDynamic<V>::value &&
1170 SameElementType<V,T>::value &&
1171 IsVector<V>::value,bool>::type = true>
householder(const V & v,const T threshold)1172 auto householder(const V&v,const T threshold)
1173 {
1174 constexpr int NB = StaticLength<V>::value;
1175 Vector<T,NB,TMP_ALLOC> res;
1176 T beta = _householder(res,v,threshold);
1177 return std::tuple<T,Vector<T,NB,TMP_ALLOC>>(beta,res);
1178 }
1179
1180 template<typename V,typename T,
1181 typename std::enable_if<
1182 IsDynamic<V>::value &&
1183 SameElementType<V,T>::value &&
1184 IsVector<V>::value,bool>::type = true>
householder(const V & v,const T threshold)1185 auto householder(const V&v,const T threshold)
1186 {
1187 Vector<T,DYNAMIC,TMP_ALLOC> res(v.length());
1188 T beta = _householder(res,v,threshold);
1189 return std::tuple<T,Vector<T,DYNAMIC,TMP_ALLOC>>(beta,res);
1190 }
1191
1192 template<typename V,typename T,typename TMP,
1193 typename std::enable_if<
1194 IsDynamic<V>::value &&
1195 SameElementType<V,T>::value &&
1196 IsVector<V>::value,bool>::type = true>
householder(const V & v,const T threshold,TMP & res)1197 auto householder(const V&v,const T threshold,TMP &res)
1198 {
1199 T beta = _householder(res,v,threshold);
1200 return beta;
1201 }
1202
1203 template<typename T>
1204 struct HouseholderThreshold;
1205
1206 #if !defined(DISABLEFLOAT16)
1207 template<>
1208 struct HouseholderThreshold<float16_t>
1209 {
1210 static constexpr float16_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F16;
1211 };
1212 #endif
1213
1214 template<>
1215 struct HouseholderThreshold<float64_t>
1216 {
1217 static constexpr float64_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F64;
1218 };
1219
1220
1221 template<>
1222 struct HouseholderThreshold<float32_t>
1223 {
1224 static constexpr float32_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F32;
1225 };
1226
1227
1228 template<typename T,int NB>
testHouseholder()1229 static void testHouseholder()
1230 {
1231 std::cout << "----\r\n" << "N = " << NB << "\r\n";
1232 #if defined(STATIC_TEST)
1233 PVector<T,NB> a;
1234 #else
1235 PVector<T> a(NB);
1236 #endif
1237
1238 cmsis_init_householder(a.ptr(),NB);
1239
1240
1241 INIT_SYSTICK;
1242 START_CYCLE_MEASUREMENT;
1243 startSectionNB(1);
1244 auto res = householder(a,HouseholderThreshold<T>::value);
1245 //PVector<T,NB> res;// = a + b;
1246 //float res_beta=0;
1247 stopSectionNB(1);
1248 STOP_CYCLE_MEASUREMENT;
1249
1250
1251
1252
1253 INIT_SYSTICK;
1254 START_CYCLE_MEASUREMENT;
1255 #if defined(STATIC_TEST)
1256 PVector<T,NB> ref;
1257 #else
1258 PVector<T> ref(NB);
1259 #endif
1260 T ref_beta = cmsis_householder(a.const_ptr(),ref.ptr(),NB);
1261 STOP_CYCLE_MEASUREMENT;
1262
1263 if (!validate(std::get<1>(res).const_ptr(),ref.const_ptr(),NB,
1264 ErrThreshold<T>::abserr_householder,ErrThreshold<T>::relerr_householder))
1265 {
1266 printf("householder vector failed \r\n");
1267 }
1268
1269
1270 if (!validate(std::get<0>(res),ref_beta,
1271 ErrThreshold<T>::abserr_householder,ErrThreshold<T>::relerr_householder))
1272 {
1273 printf("householder beta failed \r\n");
1274 }
1275 std::cout << "=====\r\n";
1276 }
1277
1278 #include "debug_mat.h"
1279
1280 #if 1
1281 // R >= C
1282 template<typename T,int R,int C,template<int> typename A>
QR(const Matrix<T,R,C,A> & m,const T eps,bool wantQ)1283 auto QR(const Matrix<T,R,C,A>&m,const T eps,bool wantQ)
1284 {
1285 #if defined(STATIC_TEST)
1286 Vector<T,C,TMP_ALLOC> tau;
1287 Matrix<T,R,C,TMP_ALLOC> RM = m;
1288 Matrix<T,R,R,TMP_ALLOC> Q = Matrix<T,R,R>::identity();
1289
1290
1291 // Temporaries
1292 Vector<T,R,TMP_ALLOC> tmpvec;
1293 Matrix<T,1,R,TMP_ALLOC> tmpmat;
1294 #else
1295 Vector<T> tau(m.columns());
1296 Matrix<T> RM = m;
1297 Matrix<T> Q = Matrix<T>::identity(m.rows());
1298
1299
1300 // Temporaries
1301 Vector<T> tmpvec(m.rows());
1302 Matrix<T> tmpmat(1,m.rows());
1303 #endif
1304
1305 const int NBC = m.columns();
1306 const int NBR = m.rows();
1307
1308
1309 for(index_t c=0;c<NBC-1;c++)
1310 {
1311 auto beta = householder(RM.col(c,c),eps,tmpvec);
1312 tau[c] = beta;
1313
1314 MatrixView<T> vt(tmpvec,1,NBR-c);
1315 dot(tmpmat.sub(0,1,0,NBC-c),vt,RM.sub(c,c));
1316
1317 RM.sub(c,c) =
1318 RM.sub(c,c) - beta * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBC-c));
1319
1320 // Copy householder reflector
1321 // Not valid when c == C-1
1322 // We don't want to use a test since CMSIS-DSP is not using
1323 // one and introducing a test would give worse performance
1324 RM.col(c,c+1) = copy(tmpvec.sub(1,NBR-c));
1325
1326 }
1327
1328
1329 auto beta = householder(RM.col(NBC-1,NBC-1),eps,tmpvec);
1330 tau[NBC-1] = beta;
1331
1332 MatrixView<T> vt(tmpvec,1,NBR-(NBC-1));
1333 dot(tmpmat.sub(0,1,0,NBC-(NBC-1)),vt,RM.sub(NBC-1,NBC-1));
1334
1335 RM.sub(NBC-1,NBC-1) =
1336 RM.sub(NBC-1,NBC-1) - beta * outer(tmpvec.sub(0,NBR-(NBC-1)),tmpmat.row(0,0,NBC-(NBC-1)));
1337
1338
1339
1340
1341 if (wantQ)
1342 {
1343 for(index_t c=NBC-1;c>=0;c--)
1344 {
1345 tmpvec.sub(1) = copy(RM.col(c,c+1));
1346 tmpvec[0] = number_traits<T>::one();
1347
1348 MatrixView<T> vt(tmpvec,1,NBR-c);
1349 dot(tmpmat.sub(0,1,0,NBR-c),vt,Q.sub(c,c));
1350
1351 Q.sub(c,c) =
1352 Q.sub(c,c) - tau[c] * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBR-c));
1353
1354 }
1355 }
1356
1357 return std::make_tuple(RM,Q,tau);
1358
1359 }
1360
1361 template<typename T,int R,int C>
testQR()1362 static void testQR()
1363 {
1364 std::cout << "----\r\n";
1365 std::cout << R << " x " << C << "\r\n";
1366 #if defined(STATIC_TEST)
1367 PMat<T,R,C> a;
1368 #else
1369 PMat<T> a(R,C);
1370 #endif
1371
1372 cmsis_init_qr(a.ptr(),R,C);
1373
1374
1375 INIT_SYSTICK;
1376 START_CYCLE_MEASUREMENT;
1377 startSectionNB(1);
1378 auto res = QR(a,HouseholderThreshold<T>::value,true);
1379 stopSectionNB(1);
1380 STOP_CYCLE_MEASUREMENT;
1381
1382 //std::cout << "next\r\n";
1383
1384 //std::cout << std::get<0>(res);
1385 //std::cout << std::get<1>(res);
1386 //std::cout << std::get<2>(res);
1387
1388 // For fair comparison, in dynamic mode we must take into
1389 // account the memory allocations since they are made
1390 // by the QR algorithms
1391 #if !defined(STATIC_TEST)
1392 INIT_SYSTICK;
1393 START_CYCLE_MEASUREMENT;
1394 #endif
1395
1396 #if 0 //defined(STATIC_TEST)
1397 PMat<T,R,C> cmsis_res;
1398 PMat<T,R,C> cmsis_outRp;
1399 PMat<T,R,R> cmsis_outQp;
1400 PVector<T,C> cmsis_tau;
1401 PVector<T,R> cmsis_tmpa;
1402 PVector<T,C> cmsis_tmpb;
1403 #else
1404 PMat<T> cmsis_res(R,C);
1405 PMat<T> cmsis_outRp(R,C);
1406 PMat<T> cmsis_outQp(R,R);
1407 PVector<T> cmsis_tau(C);
1408 PVector<T> cmsis_tmpa(R);
1409 PVector<T> cmsis_tmpb(C);
1410 #endif
1411
1412 typename CMSISMatrixType<T>::type RP;
1413 RP.numRows = R;
1414 RP.numCols = C;
1415 RP.pData = cmsis_outRp.ptr();
1416
1417 typename CMSISMatrixType<T>::type QP;
1418 QP.numRows = R;
1419 QP.numCols = R;
1420 QP.pData = cmsis_outQp.ptr();
1421
1422 typename CMSISMatrixType<T>::type IN;
1423 IN.numRows = R;
1424 IN.numCols = C;
1425 IN.pData = a.ptr();
1426
1427 //std::cout << "-------\r\n";
1428
1429 #if defined(STATIC_TEST)
1430 INIT_SYSTICK;
1431 START_CYCLE_MEASUREMENT;
1432 #endif
1433 arm_status status=cmsis_qr(&IN,HouseholderThreshold<T>::value,
1434 &RP,&QP,
1435 cmsis_tau.ptr(),
1436 cmsis_tmpa.ptr(),
1437 cmsis_tmpb.ptr());
1438 (void)status;
1439 STOP_CYCLE_MEASUREMENT;
1440
1441 //std::cout << cmsis_outRp;
1442 //std::cout << cmsis_outQp;
1443 //std::cout << cmsis_tau;
1444
1445 if (!validate(std::get<0>(res),cmsis_outRp,
1446 ErrThreshold<T>::abserr_qr,ErrThreshold<T>::relerr_qr))
1447 {
1448 printf("QR Rp matrix failed \r\n");
1449 }
1450
1451
1452 if (!validate(std::get<1>(res),cmsis_outQp,
1453 ErrThreshold<T>::abserr_qr,ErrThreshold<T>::relerr_qr))
1454 {
1455 printf("QR Qp matrix failed \r\n");
1456 }
1457
1458 if (!validate(std::get<2>(res),cmsis_tau,
1459 ErrThreshold<T>::abserr_qr,ErrThreshold<T>::relerr_qr))
1460 {
1461 printf("QR tau failed \r\n");
1462 }
1463 std::cout << "=====\r\n";
1464 }
1465
1466 #endif
1467
1468
1469 template<typename T,int R,template<int> typename A>
cholesky(const Matrix<T,R,R,A> & a)1470 auto cholesky(const Matrix<T,R,R,A>&a)
1471 {
1472 // Temporaries
1473 #if defined(STATIC_TEST)
1474 Matrix<T,R,R,TMP_ALLOC> g = a;
1475 Vector<T,R,TMP_ALLOC> tmp;
1476 #else
1477 Matrix<T> g = a;
1478 Vector<T> tmp(a.rows());
1479 #endif
1480
1481 const int NBR = a.rows();
1482
1483 g.col(0,0) = g.col(0,0) * (T)(number_traits<T>::one() / _gen_sqrt(g(0,0)));
1484
1485 for(int j=1;j<NBR;j++)
1486 {
1487 dot(tmp.sub(j),g.sub(j,NBR,0,j) , g.row(j,0,j));
1488
1489 g.col(j,j) = (g.col(j,j) - tmp.sub(j)) * (T)(number_traits<T>::one() / _gen_sqrt(g(j,j)- tmp[j]));
1490
1491 }
1492 return(g);
1493 }
1494
1495
1496 template<typename T,int R>
testCholesky()1497 static void testCholesky()
1498 {
1499 std::cout << "----\r\n";
1500 std::cout << R << " x " << R << "\r\n";
1501 #if defined(STATIC_TEST)
1502 PMat<T,R,R> a;
1503 #else
1504 PMat<T> a(R,R);
1505 #endif
1506
1507 cmsis_init_cholesky(a.ptr(),R,R);
1508
1509 //std::cout << a;
1510
1511 INIT_SYSTICK;
1512 START_CYCLE_MEASUREMENT;
1513 startSectionNB(1);
1514 // Not totally equivalent to CMSIS implementation
1515 // It should be possible to rewrite it to avoid use of
1516 // temporary buffer like CMSIS-DSP
1517 auto res = cholesky(a);
1518 stopSectionNB(1);
1519 STOP_CYCLE_MEASUREMENT;
1520
1521
1522 //std::cout << res;
1523
1524 PMat<T,R,R> cmsis_res(T{});
1525
1526 typename CMSISMatrixType<T>::type OUT;
1527 OUT.numRows = R;
1528 OUT.numCols = R;
1529 OUT.pData = cmsis_res.ptr();
1530
1531
1532 typename CMSISMatrixType<T>::type IN;
1533 IN.numRows = R;
1534 IN.numCols = R;
1535 IN.pData = a.ptr();
1536
1537 //std::cout << "-------\r\n";
1538
1539
1540 INIT_SYSTICK;
1541 START_CYCLE_MEASUREMENT;
1542 arm_status status=cmsis_cholesky(&IN,&OUT);
1543 (void)status;
1544 STOP_CYCLE_MEASUREMENT;
1545
1546 //std::cout << cmsis_res;
1547
1548
1549 if (!validateLT(res,cmsis_res,
1550 ErrThreshold<T>::abserr_cholesky,ErrThreshold<T>::relerr_cholesky))
1551 {
1552 printf("cholesky failed \r\n");
1553 }
1554 std::cout << "=====\r\n";
1555 }
1556
1557 template<typename TT,typename ...T>
1558 struct TESTINV
1559 {
allTESTINV1560 static void all()
1561 {
1562 testinv<TT,T::value...>();
1563 }
1564 };
1565
1566 template<typename TT,typename ...T>
1567 struct TESTOUTER
1568 {
allTESTOUTER1569 static void all()
1570 {
1571 testouter<TT,T::value...>();
1572 }
1573 };
1574
1575 template<typename TT,typename ...T>
1576 struct TESTMATVEC
1577 {
allTESTMATVEC1578 static void all()
1579 {
1580 testmatvec<TT,T::value...>();
1581 }
1582 };
1583
1584 template<typename TT,typename ...T>
1585 struct TESTCOMPLEXMATVEC
1586 {
allTESTCOMPLEXMATVEC1587 static void all()
1588 {
1589 testcomplexmatvec<TT,T::value...>();
1590 }
1591 };
1592
1593 template<typename TT,typename ...T>
1594 struct TESTADD
1595 {
allTESTADD1596 static void all()
1597 {
1598 testadd <TT,T::value...>();
1599 }
1600 };
1601
1602 template<typename TT,typename ...T>
1603 struct TESTMATTRANSPOSE
1604 {
allTESTMATTRANSPOSE1605 static void all()
1606 {
1607 testmattranspose<TT,T::value...>();
1608 }
1609 };
1610
1611 template<typename TT,typename ...T>
1612 struct TESTMATMULT
1613 {
allTESTMATMULT1614 static void all()
1615 {
1616 testmatmult<TT,T::value...>();
1617 }
1618 };
1619
1620 template<typename TT,typename ...T>
1621 struct TESTSUBMATMULT
1622 {
allTESTSUBMATMULT1623 static void all()
1624 {
1625 testsubmatmult<TT,T::value...>();
1626 }
1627 };
1628
1629
1630 template<typename TT,typename... T>
1631 struct TEST_CASES
1632 {
allTEST_CASES1633 static void all()
1634 {
1635 (mp_push_front<T,TT>::all(),...);
1636 }
1637 };
1638
1639 template<template <typename, typename...> typename F,
1640 typename... A>
1641 using ALL_TESTS = mp_rename<mp_push_front<mp_product<F, A...>,float>,TEST_CASES>;
1642
1643
1644 template<typename T>
matrix_all_test()1645 void matrix_all_test()
1646 {
1647
1648 #if defined(MATRIX_TEST)
1649 #if !defined(SUBTEST1) && !defined(SUBTEST2)
1650 const int nb_tails = TailForTests<T>::tail;
1651 const int nb_loops = TailForTests<T>::loop;
1652 using UNROLL = mp_rename<mp_list_v<1,2,4,8,9,11>,mp_list>;
1653 #endif
1654
1655 #if defined(SUBTEST8) || defined(SUBTEST14)
1656 using UNROLLA = mp_rename<mp_list_v<1>,mp_list>;
1657 #endif
1658 #if defined(SUBTEST9) || defined(SUBTEST15)
1659 using UNROLLA = mp_rename<mp_list_v<2>,mp_list>;
1660 #endif
1661 #if defined(SUBTEST10) || defined(SUBTEST16)
1662 using UNROLLA = mp_rename<mp_list_v<4>,mp_list>;
1663 #endif
1664
1665 #if defined(SUBTEST11) || defined(SUBTEST17)
1666 using UNROLLA = mp_rename<mp_list_v<8>,mp_list>;
1667 #endif
1668 #if defined(SUBTEST12) || defined(SUBTEST18)
1669 using UNROLLA = mp_rename<mp_list_v<9>,mp_list>;
1670 #endif
1671 #if defined(SUBTEST13) || defined(SUBTEST19)
1672 using UNROLLA = mp_rename<mp_list_v<11>,mp_list>;
1673 #endif
1674
1675 #if !defined(SUBTEST1) && !defined(SUBTEST2)
1676 using VEC = mp_rename<mp_list_v<1,
1677 nb_tails,
1678 nb_loops,
1679 nb_loops+1,
1680 nb_loops+nb_tails>,mp_list>;
1681 #endif
1682
1683
1684 //using res = mp_rename<mp_push_front<mp_product<TEST, A, B,C>,float>,TEST_CASES>;
1685 //using res = ALL_TESTS<TEST,A,B,C>;
1686 //PrintType<test>();
1687
1688
1689 if constexpr (number_traits<T>::is_float)
1690 {
1691
1692 #if defined(SUBTEST1)
1693
1694 title<T>("Householder");
1695 testHouseholder<T,NBVEC_4>();
1696 testHouseholder<T,NBVEC_16>();
1697 testHouseholder<T,NBVEC_32>();
1698
1699 title<T>("QR");
1700 testQR<T,NBVEC_4,NBVEC_4>();
1701 testQR<T,NBVEC_16,NBVEC_16>();
1702 testQR<T,NBVEC_32,NBVEC_32>();
1703
1704 title<T>("Cholesky");
1705 testCholesky<T,NBVEC_4>();
1706 testCholesky<T,NBVEC_16>();
1707 testCholesky<T,NBVEC_32>();
1708
1709 #endif
1710
1711 #if defined(SUBTEST2)
1712 title<T>("Matrix inverse");
1713 testinv<T,NBVEC_4,NBVEC_4>();
1714 testinv<T,NBVEC_8,NBVEC_8>();
1715 testinv<T,NBVEC_16,NBVEC_16>();
1716 #endif
1717
1718
1719 }
1720
1721 #if defined(SUBTEST1)
1722 title<T>("Matrix outer product");
1723
1724 testouter<T,NBVEC_4,NBVEC_4>();
1725 testouter<T,NBVEC_8,NBVEC_8>();
1726 testouter<T,NBVEC_16,NBVEC_16>();
1727 #endif
1728
1729 #if defined(SUBTEST3)
1730 title<T>("Matrix outer product");
1731 ALL_TESTS<TESTOUTER,UNROLL,VEC>::all();
1732 #endif
1733
1734 if constexpr (!std::is_same<T,double>::value)
1735 {
1736 #if defined(SUBTEST1)
1737 title<T>("Matrix times vector");
1738
1739 testmatvec<T,NBVEC_4 ,NBVEC_4>();
1740 testmatvec<T,NBVEC_16,NBVEC_16>();
1741 testmatvec<T,NBVEC_32,NBVEC_32>();
1742 testmatvec<T,NBVEC_44,NBVEC_44>();
1743 testmatvec<T,NBVEC_47,NBVEC_47>();
1744 #endif
1745
1746 #if defined(SUBTEST4)
1747 title<T>("Matrix times vector");
1748 ALL_TESTS<TESTMATVEC,UNROLL,VEC>::all();
1749 #endif
1750
1751 #if defined(SUBTEST1)
1752 title<T>("Matrix times vector expression");
1753
1754 testcomplexmatvec<T,NBVEC_4 ,NBVEC_4>();
1755 testcomplexmatvec<T,NBVEC_16,NBVEC_16>();
1756 testcomplexmatvec<T,NBVEC_32,NBVEC_32>();
1757 testcomplexmatvec<T,NBVEC_44,NBVEC_44>();
1758 testcomplexmatvec<T,NBVEC_47,NBVEC_47>();
1759 #endif
1760
1761 #if defined(SUBTEST5)
1762 title<T>("Matrix times vector expression");
1763 ALL_TESTS<TESTCOMPLEXMATVEC,UNROLL,VEC>::all();
1764 #endif
1765 }
1766
1767 if constexpr (!std::is_same<T,Q7>::value && !std::is_same<T,double>::value)
1768 {
1769 #if defined(SUBTEST1)
1770 title<T>("Matrix add");
1771
1772 testadd<T,NBVEC_4,NBVEC_4>();
1773 testadd<T,NBVEC_8,NBVEC_8>();
1774 testadd<T,NBVEC_16,NBVEC_16>();
1775 #endif
1776
1777 #if defined(SUBTEST6)
1778 title<T>("Matrix add");
1779 ALL_TESTS<TESTADD,UNROLL,VEC>::all();
1780 #endif
1781 }
1782
1783 #if defined(SUBTEST1)
1784 title<T>("Matrix diag");
1785
1786 testdiag<T,NBVEC_4,NBVEC_4>();
1787 testdiag<T,NBVEC_8,NBVEC_8>();
1788 testdiag<T,NBVEC_16,NBVEC_16>();
1789
1790 title<T>("Matrix submatrix");
1791
1792 testview<T,NBVEC_16,NBVEC_16>();
1793 #endif
1794
1795 #if defined(SUBTEST1)
1796 title<T>("Matrix multiply");
1797 testmatmult<T,NBVEC_4,NBVEC_4,NBVEC_4>();
1798 testmatmult<T,NBVEC_16,NBVEC_16,NBVEC_16>();
1799 testmatmult<T,NBVEC_32,NBVEC_32,NBVEC_32>();
1800 #endif
1801
1802
1803
1804 #if defined(SUBTEST1)
1805 title<T>("Matrix transpose");
1806 testmattranspose<T,NBVEC_2,NBVEC_2>();
1807 testmattranspose<T,NBVEC_3,NBVEC_3>();
1808 testmattranspose<T,NBVEC_4,NBVEC_4>();
1809 testmattranspose<T,NBVEC_16,NBVEC_16>();
1810 testmattranspose<T,NBVEC_32,NBVEC_32>();
1811 #endif
1812
1813 #if defined(SUBTEST7)
1814 title<T>("Matrix transpose");
1815 ALL_TESTS<TESTMATTRANSPOSE,UNROLL,VEC>::all();
1816 #endif
1817
1818 #if defined(SUBTEST8) || defined(SUBTEST9)|| defined(SUBTEST10)|| defined(SUBTEST11)|| defined(SUBTEST12)|| defined(SUBTEST13)
1819 title<T>("Matrix multiply");
1820 ALL_TESTS<TESTMATMULT,UNROLLA,VEC,UNROLL>::all();
1821 #endif
1822
1823 #if defined(SUBTEST1)
1824 title<T>("Submatrix multiply");
1825 testsubmatmult<T,NBVEC_4,NBVEC_4,NBVEC_4>();
1826 testsubmatmult<T,NBVEC_16,NBVEC_16,NBVEC_16>();
1827 #endif
1828
1829 #if defined(SUBTEST14) || defined(SUBTEST15) || defined(SUBTEST16)|| defined(SUBTEST17)|| defined(SUBTEST18)|| defined(SUBTEST19)
1830 title<T>("Submatrix multiply");
1831 ALL_TESTS<TESTSUBMATMULT,UNROLLA,VEC,UNROLL>::all();
1832 #endif
1833
1834 //testsubmatmult<T,NBVEC_32>();
1835 #endif
1836
1837 };
1838
1839
1840
matrix_test()1841 void matrix_test()
1842 {
1843 #if defined(MATRIX_TEST)
1844 #if defined(F64_DT)
1845 matrix_all_test<double>();
1846 #endif
1847 #if defined(F32_DT)
1848 matrix_all_test<float>();
1849 #endif
1850 #if defined(F16_DT) && !defined(DISABLEFLOAT16)
1851 matrix_all_test<float16_t>();
1852 #endif
1853 #if defined(Q31_DT)
1854 matrix_all_test<Q31>();
1855 #endif
1856 #if defined(Q15_DT)
1857 matrix_all_test<Q15>();
1858 #endif
1859 #if defined(Q7_DT)
1860 matrix_all_test<Q7>();
1861 #endif
1862 #endif
1863 }