extern "C" { extern void fusion_test(); } #include "allocator.h" #include #include #include #include #include #include #include #include template static void test() { std::cout << "----\r\n" << "N = " << NB << "\r\n"; #if defined(STATIC_TEST) PVector a; PVector b; PVector c; #else PVector a(NB); PVector b(NB); PVector c(NB); #endif init_array(a,NB); init_array(b,NB); init_array(c,NB); #if defined(STATIC_TEST) PVector resa; PVector resb; #else PVector resa(NB); PVector resb(NB); #endif INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); results(resa,resb) = Merged{a + b,a + c}; stopSectionNB(1); STOP_CYCLE_MEASUREMENT; PVector refa; PVector refb; INIT_SYSTICK; START_CYCLE_MEASUREMENT; cmsisdsp_add(a.const_ptr(),b.const_ptr(),refa.ptr(),NB); cmsisdsp_add(a.const_ptr(),c.const_ptr(),refb.ptr(),NB); STOP_CYCLE_MEASUREMENT; if (!validate(resa.const_ptr(),refa.const_ptr(),NB)) { printf("add a failed \r\n"); } if (!validate(resb.const_ptr(),refb.const_ptr(),NB)) { printf("add b failed \r\n"); } std::cout << "=====\r\n"; } template static void test2() { std::cout << "----\r\n" << "N = " << NB << "\r\n"; #if defined(STATIC_TEST) PVector a; PVector b; PVector c; #else PVector a(NB); PVector b(NB); PVector c(NB); #endif using Acc = typename number_traits::accumulator; init_array(a,NB); init_array(b,NB); init_array(c,NB); Acc resa,resb,refa,refb; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(2); std::tie(resa,resb) = dot(Merged{expr(a),expr(a)}, Merged{expr(b),expr(c)}); stopSectionNB(2); STOP_CYCLE_MEASUREMENT; INIT_SYSTICK; START_CYCLE_MEASUREMENT; cmsisdsp_dot(a.const_ptr(),b.const_ptr(),refa,NB); cmsisdsp_dot(a.const_ptr(),c.const_ptr(),refb,NB); STOP_CYCLE_MEASUREMENT; if (!validate(resa,refa)) { printf("dot a failed \r\n"); } if (!validate(resb,refb)) { printf("dot b failed \r\n"); } std::cout << "=====\r\n"; } template static void test3() { std::cout << "----\r\n" << "N = " << NB << "\r\n"; constexpr int U = 2; #if defined(STATIC_TEST) PVector a[U]; PVector b[U]; #else PVector a[U]={PVector(NB),PVector(NB)}; PVector b[U]={PVector(NB),PVector(NB)}; #endif using Acc = typename number_traits::accumulator; for(int i=0;i res; Acc ref[U]; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(3); results(res) = dot(unroll( [&a](index_t k){return expr(a[k]);}), unroll( [&b](index_t k){return expr(b[k]);}) ); stopSectionNB(3); STOP_CYCLE_MEASUREMENT; INIT_SYSTICK; START_CYCLE_MEASUREMENT; for(int i=0;i void all_fusion_test() { const int nb_tails = TailForTests::tail; const int nb_loops = TailForTests::loop; title("Vector Fusion"); test(); test(); test(); test(); test(); test(); title("Dot Product Fusion"); test2(); test2(); test2(); test2(); test2(); test2(); title("Unroll Fusion"); test3(); test3(); test3(); test3(); test3(); test3(); } void fusion_test() { /* gcc has some issues with this code. FVP is freezing when trying to run it. Since those kind of fusion are not really used in the library (because performance is not good) we can disable those tests to at least be able to test other parts of the library with gcc. */ #if !defined(GCC_COMPILER) #if defined(FUSION_TEST) #if defined(F64_DT) all_fusion_test(); #endif #if defined(F32_DT) all_fusion_test(); #endif #if defined(F16_DT) && !defined(DISABLEFLOAT16) all_fusion_test(); #endif #if defined(Q31_DT) all_fusion_test(); #endif #if defined(Q15_DT) all_fusion_test(); #endif #if defined(Q7_DT) all_fusion_test(); #endif #endif #endif }