extern "C" { extern void matrix_test(); } #include "allocator.h" #include #include #include #include #include #include #include "boost/mp11.hpp" using namespace boost::mp11; extern "C" { #include "dsp/matrix_functions.h" #include "dsp/matrix_utils.h" } template struct MatTestConstant; template<> struct MatTestConstant { constexpr static double value = 0.001; constexpr static double half = 0.5; }; template<> struct MatTestConstant { constexpr static float value = 0.001f; constexpr static float half = 0.5f; }; #if !defined(DISABLEFLOAT16) template<> struct MatTestConstant { constexpr static float16_t value = (float16_t)0.001f; constexpr static float16_t half = (float16_t)0.5f; }; #endif template<> struct MatTestConstant { constexpr static Q7 value = 0.001_q7; constexpr static Q7 half = 0.5_q7; }; template<> struct MatTestConstant { constexpr static Q15 value = 0.001_q15; constexpr static Q15 half = 0.5_q15; }; template<> struct MatTestConstant { constexpr static Q31 value = 0.001_q31; constexpr static Q31 half = 0.5_q31; }; template struct ErrThreshold { constexpr static float abserr = 0; constexpr static float relerr = 0; constexpr static float abserr_cholesky = 0; constexpr static float relerr_cholesky = 0; constexpr static float abserr_householder = 0; constexpr static float relerr_householder = 0; constexpr static float abserr_qr = 0; constexpr static float relerr_qr = 0; constexpr static float abserr_inv = 0; constexpr static float relerr_inv = 0; }; // Should be more accurate than F32 but right know // we only check there is no regression compared to f32 template<> struct ErrThreshold { constexpr static float abserr = ABS_ERROR; constexpr static float relerr = REL_ERROR; constexpr static float abserr_cholesky = 3e-4; constexpr static float relerr_cholesky = 1e-4; constexpr static float abserr_householder = ABS_ERROR; constexpr static float relerr_householder = REL_ERROR; constexpr static float abserr_qr = ABS_ERROR; constexpr static float relerr_qr = REL_ERROR; constexpr static float abserr_inv = ABS_ERROR; constexpr static float relerr_inv = REL_ERROR; }; template<> struct ErrThreshold { constexpr static float abserr = ABS_ERROR; constexpr static float relerr = REL_ERROR; constexpr static float abserr_cholesky = 3e-4; constexpr static float relerr_cholesky = 1e-4; constexpr static float abserr_householder = ABS_ERROR; constexpr static float relerr_householder = REL_ERROR; constexpr static float abserr_qr = ABS_ERROR; constexpr static float relerr_qr = REL_ERROR; constexpr static float abserr_inv = 4.0e-6; constexpr static float relerr_inv = 5.0e-6; }; #if !defined(DISABLEFLOAT16) template<> struct ErrThreshold { constexpr static float abserr = ABS_ERROR; constexpr static float relerr = REL_ERROR; constexpr static float abserr_cholesky = 2e-1; constexpr static float relerr_cholesky = 2e-1; constexpr static float abserr_householder = 2e-4; constexpr static float relerr_householder = 2e-3; // 32x32 is not numerically behaving well with // the matrix used as input constexpr static float abserr_qr = 2.0; constexpr static float relerr_qr = 1e-2; constexpr static float abserr_inv = 3e-2; constexpr static float relerr_inv = 3e-2; }; #endif void cmsisdsp_mat_inv(float64_t *amod, float64_t* b, uint32_t r,uint32_t c) { arm_matrix_instance_f64 src; arm_matrix_instance_f64 dst; src.numRows = r; src.numCols = c; src.pData = amod; dst.numRows = r; dst.numCols = c; dst.pData = b; arm_status status = arm_mat_inverse_f64(&src,&dst); (void)status; }; void cmsisdsp_mat_inv(float32_t *amod, float32_t* b, uint32_t r,uint32_t c) { arm_matrix_instance_f32 src; arm_matrix_instance_f32 dst; src.numRows = r; src.numCols = c; src.pData = amod; dst.numRows = r; dst.numCols = c; dst.pData = b; arm_status status = arm_mat_inverse_f32(&src,&dst); (void)status; }; #if !defined(DISABLEFLOAT16) void cmsisdsp_mat_inv(float16_t *amod, float16_t* b, uint32_t r,uint32_t c) { arm_matrix_instance_f16 src; arm_matrix_instance_f16 dst; src.numRows = r; src.numCols = c; src.pData = amod; dst.numRows = r; dst.numCols = c; dst.pData = b; arm_status status = arm_mat_inverse_f16(&src,&dst); (void)status; }; #endif const float32_t mat64[64] = {0.395744, 0.623798, 0.885422, 0.95415, 0.310384, 0.257541, 0.631426, 0.424491, 0.130945, 0.799959, 0.133693, 0.479455, 0.519254, 0.381039, 0.617455, 0.748273, 0.146944, 0.928945, 0.430936, 0.508207, 0.829023, 0.358027, 0.999501, 0.851953, 0.273895, 0.685898, 0.0436612, 0.295212, 0.467651, 0.0515567, 0.21037, 0.607475, 0.570295, 0.281109, 0.979219, 0.0947969, 0.319016, 0.398405, 0.349953, 0.710002, 0.431597, 0.447659, 0.0747669, 0.057063, 0.165648, 0.773106, 0.135765, 0.709327, 0.873836, 0.292361, 0.00202529, 0.392942, 0.520183, 0.0528055, 0.797982, 0.613497, 0.509682, 0.0435791, 0.780526, 0.960582, 0.535914, 0.216113, 0.134108, 0.225859}; const float32_t mat16[16] = {1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 6.0, 3.0, 5.0, 9.0, 10.0, 4.0, 6.0, 10.0, 16.0}; const float32_t mat256[256] = {0.97936, 0.498105, 0.452618, 0.299761, 0.688624, 0.247212, \ 0.228337, 0.22905, 0.563815, 0.251998, 0.5238, 0.141223, 0.0980689, \ 0.79112, 0.771182, 0.890995, 0.0256181, 0.0377277, 0.575629, \ 0.648138, 0.926218, 0.803878, 0.620333, 0.325635, 0.587355, 0.041795, \ 0.934271, 0.0690131, 0.0240136, 0.800828, 0.522999, 0.374706, \ 0.266977, 0.208028, 0.112878, 0.0389899, 0.658311, 0.205067, \ 0.244172, 0.0762778, 0.190575, 0.677312, 0.0682093, 0.367328, \ 0.0191464, 0.988968, 0.437477, 0.130622, 0.907823, 0.0116559, \ 0.614526, 0.447443, 0.0126975, 0.995496, 0.947676, 0.659996, \ 0.321547, 0.725415, 0.658426, 0.0243924, 0.0843519, 0.351748, \ 0.974332, 0.673381, 0.375012, 0.719626, 0.721219, 0.766905, \ 0.17065, 0.648905, 0.770983, 0.360008, 0.344226, 0.179633, 0.347905, \ 0.555561, 0.742615, 0.908389, 0.806959, 0.176078, 0.872167, \ 0.321839, 0.098607, 0.954515, 0.627286, 0.235082, 0.746179, 0.163606, \ 0.899323, 0.871471, 0.712448, 0.956971, 0.736687, 0.750702, 0.843348, \ 0.302435, 0.444862, 0.0644597, 0.765519, 0.518397, 0.765541, \ 0.900375, 0.201853, 0.490325, 0.721786, 0.893647, 0.774724, \ 0.0983631, 0.339887, 0.526084, 0.0786152, 0.515697, 0.438801, \ 0.226628, 0.125093, 0.886642, 0.617766, 0.71696, 0.473172, 0.640949, \ 0.67688, 0.676214, 0.453662, 0.345796, 0.608999, 0.904448, 0.0965741, \ 0.00461771, 0.467399, 0.292235, 0.0418646, 0.116632, 0.0766192, \ 0.269051, 0.411649, 0.0538381, 0.973959, 0.667106, 0.301662, \ 0.977206, 0.891751, 0.420267, 0.441334, 0.0896179, 0.249969, \ 0.672614, 0.623966, 0.609733, 0.320772, 0.39723, 0.845196, 0.653877, \ 0.0599186, 0.340188, 0.199787, 0.598104, 0.45664, 0.920485, 0.969439, \ 0.446555, 0.0932837, 0.0247635, 0.747644, 0.438759, 0.639154, \ 0.754049, 0.379433, 0.968655, 0.0452146, 0.208123, 0.252654, \ 0.261898, 0.608665, 0.145211, 0.395368, 0.799111, 0.697823, \ 0.382906, 0.456515, 0.262579, 0.284169, 0.881488, 0.860877, 0.155548, \ 0.537387, 0.804235, 0.311383, 0.183216, 0.677692, 0.829542, 0.406049, \ 0.860392, 0.467668, 0.385633, 0.654692, 0.841125, 0.178406, \ 0.668945, 0.369609, 0.809711, 0.454593, 0.632028, 0.605791, 0.643851, \ 0.787023, 0.285633, 0.832216, 0.30892, 0.303559, 0.704898, 0.61118, \ 0.435547, 0.173678, 0.788689, 0.319511, 0.648378, 0.635417, 0.125127, \ 0.310251, 0.800819, 0.4863, 0.924361, 0.308059, 0.952175, 0.449844, \ 0.215496, 0.257826, 0.556383, 0.259735, 0.197234, 0.0509903, 0.21474, \ 0.145085, 0.41288, 0.876758, 0.096721, 0.228955, 0.0152248, 0.126501, \ 0.28899, 0.336668, 0.580015, 0.932761, 0.989783, 0.667379, \ 0.798751, 0.587173, 0.445902, 0.041448, 0.311878, 0.0332857, \ 0.401984, 0.795049, 0.8222, 0.678648, 0.807558}; template typename A> void init_mat(Matrix &pDst,std::size_t r,std::size_t c) { const float32_t *p; if ((r==4) && (r==c)) { p = mat16; } if ((r==8) && (r==c)) { p = mat64; } if ((r==16) && (r==c)) { p = mat256; } for(std::size_t i=0;i typename A, typename M> void _matinv(const Matrix &a,M && res) { Matrix b = a; const vector_length_t nb_rows = a.rows(); const vector_length_t nb_cols = a.columns(); for(index_t r=0;r < nb_rows ; r++) { res.row(r) = T{}; res(r,r) = number_traits::one(); } for(index_t c=0;c < nb_cols ; c++) { T pivot = b(c,c); index_t selectedRow = c; for(index_t r=c+1;r < nb_rows ; r++) { T newPivot = b(r,c); if (_abs(newPivot)>_abs(pivot)) { pivot = newPivot; selectedRow = r; } } if ((pivot!=T{}) && (selectedRow != c)) { swap(b.row(c,c),b.row(selectedRow,c)); swap(res.row(c),res.row(selectedRow)); } else if (pivot == T{}) { break; } pivot = number_traits::one() / pivot; b.row(c,c) *= pivot; res.row(c) *= pivot; index_t r=0; for(;r < c ; r++) { const T tmp = b(r,c); b.row(r,c) -= b.row(c,c)*tmp; res.row(r) -= res.row(c)*tmp; } for(r=c+1;r < nb_rows ; r++) { const T tmp = b(r,c); b.row(r,c) -= b.row(c,c)*tmp; res.row(r) -= res.row(c)*tmp; } } } template typename A, typename std::enable_if<(NB>0),bool>::type = true> Matrix matinv(const Matrix &a) { Matrix res; _matinv(a,res); return(res); } template typename A, typename std::enable_if<(NB<0),bool>::type = true> Matrix matinv(const Matrix &a) { Matrix res(a.rows(),a.columns()); return (_matinv(a,res)); return(res); } template typename A, typename std::enable_if<(NB<0),bool>::type = true> void matinv(Matrix &res, const Matrix &a) { (void)_matinv(a,res); } template void testinv() { std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PMat a; #else PMat a(R,C); #endif init_mat(a,R,C); #if !defined(STATIC_TEST) PMat res(R,C); #endif INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); #if defined(STATIC_TEST) PMat res = matinv(a); #else matinv(res,a); #endif stopSectionNB(1); STOP_CYCLE_MEASUREMENT; PMat amod(a); PMat cmsis_res(R,C); INIT_SYSTICK; START_CYCLE_MEASUREMENT; cmsisdsp_mat_inv(amod.ptr(),cmsis_res.ptr(),R,C); STOP_CYCLE_MEASUREMENT; if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, ErrThreshold::abserr_inv,ErrThreshold::relerr_inv)) { printf("inv failed \r\n"); } std::cout << "=====\r\n"; } template void testadd() { std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PMat a; PMat b; #else PMat a(R,C); PMat b(R,C); #endif init_array(a,R*C); init_array(b,R*C); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); #if defined(STATIC_TEST) PMat res = a+b; #else PMat res = a+b; #endif stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //PrintType(); //PrintType(); // //std::cout << "a: " << IsVector::value << "\r\n"; //std::cout << "b: " << IsVector::value << "\r\n"; //std::cout << "a+b: " << IsVector::value << "\r\n"; //std::cout << "res: " << IsVector::value << "\r\n"; //std::cout << "same: " << SameElementType::value << "\r\n"; // //std::cout << "vec inst: " << has_vector_inst() << "\r\n"; //std::cout << "vec index pair: " << vector_idx_pair() << "\r\n"; //std::cout << "must use mat idx: " << must_use_matrix_idx_pair() << "\r\n"; INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PMat cmsis_res; #else PMat cmsis_res(R,C); #endif cmsisdsp_mat_add(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C); STOP_CYCLE_MEASUREMENT; if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, ErrThreshold::abserr,ErrThreshold::relerr)) { printf("add failed \r\n"); } std::cout << "=====\r\n"; } template void testdiag() { std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PVector a; #else PVector a(R); #endif init_array(a,R); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); #if defined(STATIC_TEST) PMat res=PMat::diagonal(a); #else PMat res=PMat::diagonal(a); #endif stopSectionNB(1); STOP_CYCLE_MEASUREMENT; const T* ap = a.const_ptr(); INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PMat cmsis_res; #else PMat cmsis_res(R,C); #endif T* refp = cmsis_res.ptr(); UNROLL_LOOP for(index_t row=0;row < R; row++) { UNROLL_LOOP for(index_t col=0;col < C; col++) { if (row != col) { refp[row*C+col] = T{}; } else { refp[row*C+col] = ap[row]; } } } STOP_CYCLE_MEASUREMENT; if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, ErrThreshold::abserr,ErrThreshold::relerr)) { printf("diag failed \r\n"); } std::cout << "=====\r\n"; } template void testouter() { std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; PVector a; PVector b; init_array(a,R); init_array(b,C); b = b + b; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); PMat res = outer(a,b); stopSectionNB(1); STOP_CYCLE_MEASUREMENT; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(2); #if defined(STATIC_TEST) PMat cmsis_res; #else PMat cmsis_res(R,C); #endif CMSISOuter::run(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C); startSectionNB(2); STOP_CYCLE_MEASUREMENT; //std::cout<::abserr,ErrThreshold::relerr)) { printf("outer failed \r\n"); } std::cout << "=====\r\n"; } template void testview() { std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PVector a; #else PVector a(R); #endif init_array(a,R); #if defined(STATIC_TEST) PMat res=PMat::diagonal(a); #else PMat res=PMat::diagonal(a); #endif //std::cout << res; constexpr int subsize = 8; constexpr int subpos = 8; auto r = res.sub(Slice(subpos,subpos+subsize),Slice(subpos,subpos+subsize)); #if defined(STATIC_TEST) PMat resb; #else PMat resb(subsize,subsize); #endif INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); resb = r+r; stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //std::cout << IsMatrix::value << "\r\n"; PMat cmsis_res; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(2); DISABLE_LOOP_UNROLL for(index_t row=0;row < subsize ; row++) { DISABLE_LOOP_UNROLL for(index_t col=0;col < subsize ; col++) { cmsis_res(row,col) = r(row,col)+r(row,col); } } startSectionNB(2); STOP_CYCLE_MEASUREMENT; //std::cout<::abserr,ErrThreshold::relerr)) { printf("sub matrix failed \r\n"); } std::cout << "=====\r\n"; } template void testmatvec() { using STO = typename vector_traits::storage_type; std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PVector a; #else PVector a(C); #endif init_array(a,C); #if defined(STATIC_TEST) PMat m; #else PMat m(R,C); #endif init_array(m,R*C); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); #if defined(STATIC_TEST) PVector res = dot(m,a); #else PVector res = dot(m,a); #endif stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //std::cout << IsMatrix::value << "\r\n"; INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PVector cmsis_res; #else PVector cmsis_res(R); #endif typename CMSISMatrixType::type S; S.numRows = R; S.numCols = C; S.pData = reinterpret_cast(const_cast(m.ptr())); startSectionNB(2); cmsis_mat_vec_mult(&S, a.const_ptr(), cmsis_res.ptr()); startSectionNB(2); STOP_CYCLE_MEASUREMENT; //std::cout << cmsis_res; if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R, ErrThreshold::abserr,ErrThreshold::relerr)) { printf("matrix times vector failed \r\n"); } std::cout << "=====\r\n"; } template void testcomplexmatvec() { const T scalar = MatTestConstant::half; using STO = typename vector_traits::storage_type; std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PVector a; PVector b; #else PVector a(C); PVector b(C); #endif init_array(a,C); init_array(b,C); #if defined(STATIC_TEST) PMat m; #else PMat m(R,C); #endif init_array(m,R*C); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); #if defined(STATIC_TEST) PVector tmpv = a + b * scalar; PVector res = dot(m,tmpv); #else PVector tmpv = a + b * scalar; PVector res = dot(m,tmpv); #endif stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //std::cout << IsMatrix::value << "\r\n"; INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PVector cmsis_res; PVector tmp; #else PVector cmsis_res(R); PVector tmp(C); #endif typename CMSISMatrixType::type S; S.numRows = R; S.numCols = C; S.pData = reinterpret_cast(const_cast(m.ptr())); startSectionNB(2); cmsis_complex_mat_vec(&S, a.const_ptr(), b.const_ptr(), scalar, tmp.ptr(), cmsis_res.ptr()); startSectionNB(2); STOP_CYCLE_MEASUREMENT; //std::cout << cmsis_res; if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R, ErrThreshold::abserr,ErrThreshold::relerr)) { printf("matrix times vector expression failed \r\n"); } std::cout << "=====\r\n"; } template void testmatmult() { std::cout << "----\r\n"; std::cout << R << " x " << K << " x " << C << "\r\n"; using S = typename CMSISMatrixType::scalar; #if defined(STATIC_TEST) PMat ma; #else PMat ma(R,K); #endif init_array(ma,R*K); #if defined(STATIC_TEST) PMat mb; #else PMat mb(K,C); #endif init_array(mb,K*C); mb += TestConstant::small; //std::cout << ma; //std::cout << mb; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); #if defined(STATIC_TEST) PMat res = dot(ma,mb); #else PMat res = dot(ma,mb); #endif stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //PrintType(); //PrintType(); //std::cout << ma; //std::cout << mb; //std::cout << res; //std::cout << IsMatrix::value << "\r\n"; PMat tmp(C,K); INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PMat cmsis_res; #else PMat cmsis_res(R,C); #endif typename CMSISMatrixType::type SA; SA.numRows = R; SA.numCols = K; SA.pData = reinterpret_cast(ma.ptr()); typename CMSISMatrixType::type SB; SB.numRows = K; SB.numCols = C; SB.pData = reinterpret_cast(mb.ptr()); typename CMSISMatrixType::type RES; RES.numRows = R; RES.numCols = C; RES.pData = reinterpret_cast(cmsis_res.ptr()); startSectionNB(2); cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast(tmp.ptr())); startSectionNB(2); STOP_CYCLE_MEASUREMENT; //std::cout << cmsis_res; if (!validate(res,cmsis_res, ErrThreshold::abserr,ErrThreshold::relerr)) { printf("matrix times matrix expression failed \r\n"); } std::cout << "=====\r\n"; } template void testsubmatmult() { std::cout << "----\r\n"; std::cout << R << " x " << K << " x " << C << "\r\n"; using S = typename CMSISMatrixType::scalar; constexpr int TOTALA = 4 + 2*K + 2*R + K*R; constexpr int TOTALB = 4 + 2*C + 2*K + C*K; #if defined(STATIC_TEST) PMat ma; #else PMat ma(R+2,K+2); #endif init_array(ma,TOTALA); #if defined(STATIC_TEST) PMat mb; #else PMat mb(K+2,C+2); #endif init_array(mb,TOTALB); mb += MatTestConstant::value; //std::cout << ma; //std::cout << mb; INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PMat res(T{}); #else PMat res(R,C,T{}); #endif startSectionNB(1); res.sub(Slice(0,R),Slice(0,C)) = copy(dot(ma.sub(Slice(0,R),Slice(0,K)),mb.sub(Slice(0,K),Slice(0,C)))); stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //PrintType(); //PrintType(); //std::cout << ma; //std::cout << mb; //std::cout << res; //std::cout << IsMatrix::value << "\r\n"; PMat cmsis_res(R,C); PMat cmsis_ma(R,K); PMat cmsis_mb(K,C); PMat tmp(C,K); typename CMSISMatrixType::type SA; SA.numRows = R; SA.numCols = K; SA.pData = reinterpret_cast(cmsis_ma.ptr()); typename CMSISMatrixType::type SB; SB.numRows = K; SB.numCols = C; SB.pData = reinterpret_cast(cmsis_mb.ptr()); typename CMSISMatrixType::type RES; RES.numRows = R; RES.numCols = C; RES.pData = reinterpret_cast(cmsis_res.ptr()); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(2); cmsis_ma = copy(ma.sub(Slice(0,R),Slice(0,K))); cmsis_mb = copy(mb.sub(Slice(0,K),Slice(0,C))); cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast(tmp.ptr())); startSectionNB(2); STOP_CYCLE_MEASUREMENT; //std::cout << cmsis_res; if (!validate(res.sub(Slice(0,R),Slice(0,C)),cmsis_res, ErrThreshold::abserr,ErrThreshold::relerr)) { printf("matrix times matrix expression failed \r\n"); } std::cout << "=====\r\n"; } template void testmattranspose() { std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PMat ma; #else PMat ma(R,C); #endif init_array(ma,R*C); //PrintType(); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); #if defined(STATIC_TEST) PMat res = ma.transpose(); #else PMat res = ma.transpose(); #endif stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //std::cout << IsMatrix::value << "\r\n"; INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PMat cmsis_res; #else PMat cmsis_res(C,R); #endif typename CMSISMatrixType::type SA; SA.numRows = R; SA.numCols = C; SA.pData = reinterpret_cast::scalar*>(ma.ptr()); typename CMSISMatrixType::type RES; RES.numRows = C; RES.numCols = R; RES.pData = reinterpret_cast::scalar*>(cmsis_res.ptr()); startSectionNB(2); cmsis_mat_trans(&SA, &RES); startSectionNB(2); STOP_CYCLE_MEASUREMENT; //std::cout << cmsis_res; if (!validate(res,cmsis_res, ErrThreshold::abserr,ErrThreshold::relerr)) { printf("matrix transpose failed \r\n"); } std::cout << "=====\r\n"; } #if !defined(DISABLEFLOAT16) static float16_t _gen_sqrt(const float16_t v) { return((float16_t)sqrtf(v)); } #endif static float32_t _gen_sqrt(const float32_t v) { return(sqrtf(v)); } static float64_t _gen_sqrt(const float64_t v) { return(sqrt(v)); } template typename A, typename V,typename T> inline T _householder(Vector &res,const V&v,const T eps) { T alpha = v[0]; T tau; T beta; if (v.length()==1) { res[0] = T{}; return(T{}); } T xnorm2 = dot(v.sub(1),v.sub(1)); //std::cout << xnorm2 << "\r\n"; if (xnorm2 <= eps) { tau = T{}; res = T{}; } else { if (alpha<=0) { beta = _gen_sqrt(alpha*alpha+xnorm2); } else { beta = -_gen_sqrt(alpha*alpha+xnorm2); } T r = number_traits::one() / (alpha - beta); res = v * r; tau = (beta - alpha)/beta; res[0] = number_traits::one(); } return(tau); } template::value && SameElementType::value && IsVector::value,bool>::type = true> auto householder(const V&v,const T threshold) { constexpr int NB = StaticLength::value; Vector res; T beta = _householder(res,v,threshold); return std::tuple>(beta,res); } template::value && SameElementType::value && IsVector::value,bool>::type = true> auto householder(const V&v,const T threshold) { Vector res(v.length()); T beta = _householder(res,v,threshold); return std::tuple>(beta,res); } template::value && SameElementType::value && IsVector::value,bool>::type = true> auto householder(const V&v,const T threshold,TMP &res) { T beta = _householder(res,v,threshold); return beta; } template struct HouseholderThreshold; #if !defined(DISABLEFLOAT16) template<> struct HouseholderThreshold { static constexpr float16_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F16; }; #endif template<> struct HouseholderThreshold { static constexpr float64_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F64; }; template<> struct HouseholderThreshold { static constexpr float32_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F32; }; template static void testHouseholder() { std::cout << "----\r\n" << "N = " << NB << "\r\n"; #if defined(STATIC_TEST) PVector a; #else PVector a(NB); #endif cmsis_init_householder(a.ptr(),NB); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); auto res = householder(a,HouseholderThreshold::value); //PVector res;// = a + b; //float res_beta=0; stopSectionNB(1); STOP_CYCLE_MEASUREMENT; INIT_SYSTICK; START_CYCLE_MEASUREMENT; #if defined(STATIC_TEST) PVector ref; #else PVector ref(NB); #endif T ref_beta = cmsis_householder(a.const_ptr(),ref.ptr(),NB); STOP_CYCLE_MEASUREMENT; if (!validate(std::get<1>(res).const_ptr(),ref.const_ptr(),NB, ErrThreshold::abserr_householder,ErrThreshold::relerr_householder)) { printf("householder vector failed \r\n"); } if (!validate(std::get<0>(res),ref_beta, ErrThreshold::abserr_householder,ErrThreshold::relerr_householder)) { printf("householder beta failed \r\n"); } std::cout << "=====\r\n"; } #include "debug_mat.h" #if 1 // R >= C template typename A> auto QR(const Matrix&m,const T eps,bool wantQ) { #if defined(STATIC_TEST) Vector tau; Matrix RM = m; Matrix Q = Matrix::identity(); // Temporaries Vector tmpvec; Matrix tmpmat; #else Vector tau(m.columns()); Matrix RM = m; Matrix Q = Matrix::identity(m.rows()); // Temporaries Vector tmpvec(m.rows()); Matrix tmpmat(1,m.rows()); #endif const int NBC = m.columns(); const int NBR = m.rows(); for(index_t c=0;c vt(tmpvec,1,NBR-c); dot(tmpmat.sub(0,1,0,NBC-c),vt,RM.sub(c,c)); RM.sub(c,c) = RM.sub(c,c) - beta * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBC-c)); // Copy householder reflector // Not valid when c == C-1 // We don't want to use a test since CMSIS-DSP is not using // one and introducing a test would give worse performance RM.col(c,c+1) = copy(tmpvec.sub(1,NBR-c)); } auto beta = householder(RM.col(NBC-1,NBC-1),eps,tmpvec); tau[NBC-1] = beta; MatrixView vt(tmpvec,1,NBR-(NBC-1)); dot(tmpmat.sub(0,1,0,NBC-(NBC-1)),vt,RM.sub(NBC-1,NBC-1)); RM.sub(NBC-1,NBC-1) = RM.sub(NBC-1,NBC-1) - beta * outer(tmpvec.sub(0,NBR-(NBC-1)),tmpmat.row(0,0,NBC-(NBC-1))); if (wantQ) { for(index_t c=NBC-1;c>=0;c--) { tmpvec.sub(1) = copy(RM.col(c,c+1)); tmpvec[0] = number_traits::one(); MatrixView vt(tmpvec,1,NBR-c); dot(tmpmat.sub(0,1,0,NBR-c),vt,Q.sub(c,c)); Q.sub(c,c) = Q.sub(c,c) - tau[c] * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBR-c)); } } return std::make_tuple(RM,Q,tau); } template static void testQR() { std::cout << "----\r\n"; std::cout << R << " x " << C << "\r\n"; #if defined(STATIC_TEST) PMat a; #else PMat a(R,C); #endif cmsis_init_qr(a.ptr(),R,C); INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); auto res = QR(a,HouseholderThreshold::value,true); stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //std::cout << "next\r\n"; //std::cout << std::get<0>(res); //std::cout << std::get<1>(res); //std::cout << std::get<2>(res); // For fair comparison, in dynamic mode we must take into // account the memory allocations since they are made // by the QR algorithms #if !defined(STATIC_TEST) INIT_SYSTICK; START_CYCLE_MEASUREMENT; #endif #if 0 //defined(STATIC_TEST) PMat cmsis_res; PMat cmsis_outRp; PMat cmsis_outQp; PVector cmsis_tau; PVector cmsis_tmpa; PVector cmsis_tmpb; #else PMat cmsis_res(R,C); PMat cmsis_outRp(R,C); PMat cmsis_outQp(R,R); PVector cmsis_tau(C); PVector cmsis_tmpa(R); PVector cmsis_tmpb(C); #endif typename CMSISMatrixType::type RP; RP.numRows = R; RP.numCols = C; RP.pData = cmsis_outRp.ptr(); typename CMSISMatrixType::type QP; QP.numRows = R; QP.numCols = R; QP.pData = cmsis_outQp.ptr(); typename CMSISMatrixType::type IN; IN.numRows = R; IN.numCols = C; IN.pData = a.ptr(); //std::cout << "-------\r\n"; #if defined(STATIC_TEST) INIT_SYSTICK; START_CYCLE_MEASUREMENT; #endif arm_status status=cmsis_qr(&IN,HouseholderThreshold::value, &RP,&QP, cmsis_tau.ptr(), cmsis_tmpa.ptr(), cmsis_tmpb.ptr()); (void)status; STOP_CYCLE_MEASUREMENT; //std::cout << cmsis_outRp; //std::cout << cmsis_outQp; //std::cout << cmsis_tau; if (!validate(std::get<0>(res),cmsis_outRp, ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) { printf("QR Rp matrix failed \r\n"); } if (!validate(std::get<1>(res),cmsis_outQp, ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) { printf("QR Qp matrix failed \r\n"); } if (!validate(std::get<2>(res),cmsis_tau, ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) { printf("QR tau failed \r\n"); } std::cout << "=====\r\n"; } #endif template typename A> auto cholesky(const Matrix&a) { // Temporaries #if defined(STATIC_TEST) Matrix g = a; Vector tmp; #else Matrix g = a; Vector tmp(a.rows()); #endif const int NBR = a.rows(); g.col(0,0) = g.col(0,0) * (T)(number_traits::one() / _gen_sqrt(g(0,0))); for(int j=1;j::one() / _gen_sqrt(g(j,j)- tmp[j])); } return(g); } template static void testCholesky() { std::cout << "----\r\n"; std::cout << R << " x " << R << "\r\n"; #if defined(STATIC_TEST) PMat a; #else PMat a(R,R); #endif cmsis_init_cholesky(a.ptr(),R,R); //std::cout << a; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); // Not totally equivalent to CMSIS implementation // It should be possible to rewrite it to avoid use of // temporary buffer like CMSIS-DSP auto res = cholesky(a); stopSectionNB(1); STOP_CYCLE_MEASUREMENT; //std::cout << res; PMat cmsis_res(T{}); typename CMSISMatrixType::type OUT; OUT.numRows = R; OUT.numCols = R; OUT.pData = cmsis_res.ptr(); typename CMSISMatrixType::type IN; IN.numRows = R; IN.numCols = R; IN.pData = a.ptr(); //std::cout << "-------\r\n"; INIT_SYSTICK; START_CYCLE_MEASUREMENT; arm_status status=cmsis_cholesky(&IN,&OUT); (void)status; STOP_CYCLE_MEASUREMENT; //std::cout << cmsis_res; if (!validateLT(res,cmsis_res, ErrThreshold::abserr_cholesky,ErrThreshold::relerr_cholesky)) { printf("cholesky failed \r\n"); } std::cout << "=====\r\n"; } template struct TESTINV { static void all() { testinv(); } }; template struct TESTOUTER { static void all() { testouter(); } }; template struct TESTMATVEC { static void all() { testmatvec(); } }; template struct TESTCOMPLEXMATVEC { static void all() { testcomplexmatvec(); } }; template struct TESTADD { static void all() { testadd (); } }; template struct TESTMATTRANSPOSE { static void all() { testmattranspose(); } }; template struct TESTMATMULT { static void all() { testmatmult(); } }; template struct TESTSUBMATMULT { static void all() { testsubmatmult(); } }; template struct TEST_CASES { static void all() { (mp_push_front::all(),...); } }; template