#include "allocator.h" #include #include #include using namespace arm_cmsis_dsp; #include "dsp/basic_math_functions.h" #include "dsp/basic_math_functions_f16.h" #include "dsp/filtering_functions.h" #include "dsp/matrix_functions.h" #include "dsp/matrix_functions_f16.h" #include "bench.h" #if !defined(DISABLEFLOAT16) void cmsisdsp_add(const float16_t* a, const float16_t* b, float16_t* c, uint32_t l) { arm_add_f16(a,b,c,l); }; #endif void cmsisdsp_add(const float64_t* a, const float64_t* b, float64_t* c, uint32_t l) { arm_add_f64(a,b,c,l); }; void cmsisdsp_add(const float32_t* a, const float32_t* b, float32_t* c, uint32_t l) { arm_add_f32(a,b,c,l); }; void cmsisdsp_add(const Q31* a, const Q31* b, Q31* c, uint32_t l) { arm_add_q31(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(c),l); }; void cmsisdsp_add(const Q15* a, const Q15* b, Q15* c, uint32_t l) { arm_add_q15(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(c),l); }; void cmsisdsp_add(const Q7* a, const Q7* b, Q7* c, uint32_t l) { arm_add_q7(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(c),l); }; #if !defined(DISABLEFLOAT16) void cmsisdsp_dot(const float16_t* a, const float16_t* b, float16_t &c, uint32_t l) { arm_dot_prod_f16(a,b,l,&c); }; #endif void cmsisdsp_dot(const float64_t* a, const float64_t* b, float64_t &c, uint32_t l) { arm_dot_prod_f64(a,b,l,&c); }; void cmsisdsp_dot(const float32_t* a, const float32_t* b, float32_t &c, uint32_t l) { arm_dot_prod_f32(a,b,l,&c); }; void cmsisdsp_dot(const Q31* a, const Q31* b, Q<15,48> &c, uint32_t l) { arm_dot_prod_q31(reinterpret_cast(a), reinterpret_cast(b),l, reinterpret_cast(&c)); }; void cmsisdsp_dot(const Q15* a, const Q15* b, Q<33,30> &c, uint32_t l) { arm_dot_prod_q15(reinterpret_cast(a), reinterpret_cast(b),l, reinterpret_cast(&c)); }; void cmsisdsp_dot(const Q7* a, const Q7* b, Q<17,14> &c, uint32_t l) { arm_dot_prod_q7(reinterpret_cast(a), reinterpret_cast(b),l, reinterpret_cast(&c)); }; void cmsisdsp_dot_expr(const double* a, const double* b, const double* c, const double* d, double* tmp1, double* tmp2, const double scale, double &r, uint32_t l) { arm_add_f64(a,b,tmp1,l); arm_scale_f64(tmp1,scale,tmp1,l); arm_mult_f64(c,d,tmp2,l); arm_dot_prod_f64(tmp1,tmp2,l,&r); }; void cmsisdsp_dot_expr(const float32_t* a, const float32_t* b, const float32_t* c, const float32_t* d, float32_t* tmp1, float32_t* tmp2, const float32_t scale, float32_t &r, uint32_t l) { arm_add_f32(a,b,tmp1,l); arm_scale_f32(tmp1,scale,tmp1,l); arm_mult_f32(c,d,tmp2,l); arm_dot_prod_f32(tmp1,tmp2,l,&r); }; #if !defined(DISABLEFLOAT16) void cmsisdsp_dot_expr(const float16_t* a, const float16_t* b, const float16_t* c, const float16_t* d, float16_t* tmp1, float16_t* tmp2, const float16_t scale, float16_t &r, uint32_t l) { arm_add_f16(a,b,tmp1,l); arm_scale_f16(tmp1,scale,tmp1,l); arm_mult_f16(c,d,tmp2,l); arm_dot_prod_f16(tmp1,tmp2,l,&r); }; #endif void cmsisdsp_fir(const arm_fir_instance_f32 * S, const float32_t * pSrc, float32_t * pDst, uint32_t blockSize) { arm_fir_f32(S,pSrc,pDst,blockSize); }; void cmsisdsp_fir(const arm_fir_instance_q7 * S, const Q7 * pSrc, Q7 * pDst, uint32_t blockSize) { arm_fir_q7(S,reinterpret_cast(pSrc), reinterpret_cast(pDst),blockSize); }; void cmsisdsp_fir(const arm_fir_instance_q15 * S, const Q15 * pSrc, Q15 * pDst, uint32_t blockSize) { arm_fir_q15(S,reinterpret_cast(pSrc), reinterpret_cast(pDst),blockSize); }; void cmsisdsp_fir(const arm_fir_instance_q31 * S, const Q31 * pSrc, Q31 * pDst, uint32_t blockSize) { arm_fir_q31(S,reinterpret_cast(pSrc), reinterpret_cast(pDst),blockSize); }; void cmsisdsp_dot_expr(const Q7* a, const Q7* b, const Q7* c, const Q7* d, Q7* tmp1, Q7* tmp2, const Q7 scale, Q<17,14> &r, uint32_t l) { arm_add_q7(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(tmp1),l); arm_scale_q7(reinterpret_cast(tmp1),scale.v,0, reinterpret_cast(tmp1),l); arm_mult_q7(reinterpret_cast(c), reinterpret_cast(d), reinterpret_cast(tmp2),l); arm_dot_prod_q7(reinterpret_cast(tmp1), reinterpret_cast(tmp2),l,&r.v); }; void cmsisdsp_dot_expr(const Q15* a, const Q15* b, const Q15* c, const Q15* d, Q15* tmp1, Q15* tmp2, const Q15 scale, Q<33,30> &r, uint32_t l) { arm_add_q15(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(tmp1),l); arm_scale_q15(reinterpret_cast(tmp1),scale.v,0, reinterpret_cast(tmp1),l); arm_mult_q15(reinterpret_cast(c), reinterpret_cast(d), reinterpret_cast(tmp2),l); arm_dot_prod_q15(reinterpret_cast(tmp1), reinterpret_cast(tmp2),l,&r.v); }; void cmsisdsp_dot_expr(const Q31* a, const Q31* b, const Q31* c, const Q31* d, Q31* tmp1, Q31* tmp2, const Q31 scale, Q<15,48> &r, uint32_t l) { arm_add_q31(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(tmp1),l); arm_scale_q31(reinterpret_cast(tmp1),scale.v,0, reinterpret_cast(tmp1),l); arm_mult_q31(reinterpret_cast(c), reinterpret_cast(d), reinterpret_cast(tmp2),l); arm_dot_prod_q31(reinterpret_cast(tmp1), reinterpret_cast(tmp2),l,&r.v); }; void cmsisdsp_mat_add(const float32_t* a, const float32_t* b, float32_t* c, uint32_t row,uint32_t col) { arm_matrix_instance_f32 srca; arm_matrix_instance_f32 srcb; arm_matrix_instance_f32 dst; srca.numRows = row; srca.numCols = col; srca.pData = (float32_t*)a; srcb.numRows = row; srcb.numCols = col; srcb.pData = (float32_t*)b; dst.numRows = row; dst.numCols = col; dst.pData = c; arm_mat_add_f32(&srca,&srcb,&dst); } #if !defined(DISABLEFLOAT16) void cmsisdsp_mat_add(const float16_t* a, const float16_t* b, float16_t* c, uint32_t row,uint32_t col) { arm_matrix_instance_f16 srca; arm_matrix_instance_f16 srcb; arm_matrix_instance_f16 dst; srca.numRows = row; srca.numCols = col; srca.pData = (float16_t*)a; srcb.numRows = row; srcb.numCols = col; srcb.pData = (float16_t*)b; dst.numRows = row; dst.numCols = col; dst.pData = c; arm_mat_add_f16(&srca,&srcb,&dst); } #endif void cmsisdsp_mat_add(const Q31* a, const Q31* b, Q31* c, uint32_t row,uint32_t col) { arm_matrix_instance_q31 srca; arm_matrix_instance_q31 srcb; arm_matrix_instance_q31 dst; srca.numRows = row; srca.numCols = col; srca.pData = reinterpret_cast(const_cast(a)); srcb.numRows = row; srcb.numCols = col; srcb.pData = reinterpret_cast(const_cast(b)); dst.numRows = row; dst.numCols = col; dst.pData = reinterpret_cast(c); arm_mat_add_q31(&srca,&srcb,&dst); } void cmsisdsp_mat_add(const Q15* a, const Q15* b, Q15* c, uint32_t row,uint32_t col) { arm_matrix_instance_q15 srca; arm_matrix_instance_q15 srcb; arm_matrix_instance_q15 dst; srca.numRows = row; srca.numCols = col; srca.pData = reinterpret_cast(const_cast(a)); srcb.numRows = row; srcb.numCols = col; srcb.pData = reinterpret_cast(const_cast(b)); dst.numRows = row; dst.numCols = col; dst.pData = reinterpret_cast(c); arm_mat_add_q15(&srca,&srcb,&dst); } void cmsisdsp_mat_add(const Q7* a, const Q7* b, Q7* c, uint32_t row,uint32_t col) { (void)a; (void)b; (void)c; (void)row; (void)col; // Doing nothing since there is no equivalent CMSIS-DSP // function // Required to enable the build /* arm_matrix_instance_q7 srca; arm_matrix_instance_q7 srcb; arm_matrix_instance_q7 dst; srca.numRows = row; srca.numCols = col; srca.pData = reinterpret_cast(const_cast(a)); srcb.numRows = row; srcb.numCols = col; srcb.pData = reinterpret_cast(const_cast(b)); dst.numRows = row; dst.numCols = col; dst.pData = reinterpret_cast(c); arm_mat_add_q7(&srca,&srcb,&dst); */ } #if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) void _cmsis_outer(const float32_t *a, const float32_t *b, float32_t *res, const uint32_t r,const uint32_t c) { for(unsigned int row=0; row(a); const q31_t *pb = reinterpret_cast(b); q31_t *pr = reinterpret_cast(res); for(unsigned int row=0; row(a); const q15_t *pb = reinterpret_cast(b); q15_t *pr = reinterpret_cast(res); for(unsigned int row=0; row(a); const q7_t *pb = reinterpret_cast(b); q7_t *pr = reinterpret_cast(res); for(unsigned int row=0; row(pVec), reinterpret_cast(pDst)); } void cmsis_mat_vec_mult( const arm_matrix_instance_q15 *pSrcMat, const Q15 *pVec, Q15 *pDst) { arm_mat_vec_mult_q15(pSrcMat, reinterpret_cast(pVec), reinterpret_cast(pDst)); } void cmsis_mat_vec_mult( const arm_matrix_instance_q7 *pSrcMat, const Q7 *pVec, Q7 *pDst) { arm_mat_vec_mult_q7(pSrcMat, reinterpret_cast(pVec), reinterpret_cast(pDst)); } extern void cmsis_complex_mat_vec( const arm_matrix_instance_f32 * src, const float32_t * a, const float32_t * b, const float32_t scalar, float32_t * tmp, float32_t * dst) { arm_scale_f32(b,scalar,tmp,src->numCols); arm_add_f32(a,tmp,tmp,src->numCols); arm_mat_vec_mult_f32(src, tmp, dst); } #if !defined(DISABLEFLOAT16) extern void cmsis_complex_mat_vec( const arm_matrix_instance_f16 * src, const float16_t * a, const float16_t * b, const float16_t scalar, float16_t * tmp, float16_t * dst) { arm_scale_f16(b,scalar,tmp,src->numCols); arm_add_f16(a,tmp,tmp,src->numCols); arm_mat_vec_mult_f16(src, tmp, dst); } #endif extern void cmsis_complex_mat_vec( const arm_matrix_instance_q31 * src, const Q31 * a, const Q31 * b, const Q31 scalar, Q31 * tmp, Q31 * dst) { arm_scale_q31(reinterpret_cast(b), scalar.v,0, reinterpret_cast(tmp),src->numCols); arm_add_q31(reinterpret_cast(a), reinterpret_cast(tmp), reinterpret_cast(tmp),src->numCols); arm_mat_vec_mult_q31(src, reinterpret_cast(tmp), reinterpret_cast(dst)); } extern void cmsis_complex_mat_vec( const arm_matrix_instance_q15 * src, const Q15 * a, const Q15 * b, const Q15 scalar, Q15 * tmp, Q15 * dst) { arm_scale_q15(reinterpret_cast(b), scalar.v,0, reinterpret_cast(tmp),src->numCols); arm_add_q15(reinterpret_cast(a), reinterpret_cast(tmp), reinterpret_cast(tmp),src->numCols); arm_mat_vec_mult_q15(src, reinterpret_cast(tmp), reinterpret_cast(dst)); } extern void cmsis_complex_mat_vec( const arm_matrix_instance_q7 * src, const Q7 * a, const Q7 * b, const Q7 scalar, Q7 * tmp, Q7 * dst) { arm_scale_q7(reinterpret_cast(b), scalar.v,0, reinterpret_cast(tmp),src->numCols); arm_add_q7(reinterpret_cast(a), reinterpret_cast(tmp), reinterpret_cast(tmp),src->numCols); arm_mat_vec_mult_q7(src, reinterpret_cast(tmp), reinterpret_cast(dst)); }