/cmsis-dsp-latest/Source/StatisticsFunctions/ |
D | arm_mse_f32.c | 61 float32x4_t vecSum; in arm_mse_f32() local 64 vecSum = vdupq_n_f32(0.0f); in arm_mse_f32() 78 vecSum = vfmaq(vecSum, vecA, vecA); in arm_mse_f32() 94 vecSum = vfmaq_m(vecSum, vecA, vecA, p0); in arm_mse_f32() 97 sum = vecAddAcrossF32Mve(vecSum); in arm_mse_f32() 115 float32x4_t vecSum; in arm_mse_f32() local 119 vecSum = vdupq_n_f32(0.0f); in arm_mse_f32() 136 vecSum = vfmaq_f32(vecSum, vecA, vecA); in arm_mse_f32() 144 sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum))); in arm_mse_f32() 146 tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)); in arm_mse_f32()
|
D | arm_accumulate_f32.c | 58 f32x4_t vecSum; in arm_accumulate_f32() local 61 vecSum = vdupq_n_f32(0.0f); in arm_accumulate_f32() 75 vecSum = vaddq_f32(vecSum, vecA); in arm_accumulate_f32() 90 vecSum = vaddq_m(vecSum,vecSum, vecA, p0); in arm_accumulate_f32() 93 sum = vecAddAcrossF32Mve(vecSum); in arm_accumulate_f32()
|
D | arm_mse_f16.c | 61 float16x8_t vecSum; in arm_mse_f16() local 64 vecSum = vdupq_n_f16(0.0f16); in arm_mse_f16() 77 vecSum = vfmaq(vecSum, vecA, vecA); in arm_mse_f16() 93 vecSum = vfmaq_m(vecSum, vecA, vecA, p0); in arm_mse_f16() 96 sum = vecAddAcrossF16Mve(vecSum); in arm_mse_f16()
|
/cmsis-dsp-latest/Source/BasicMathFunctions/ |
D | arm_dot_prod_f16.c | 61 f16x8_t vecSum; in arm_dot_prod_f16() local 64 vecSum = vdupq_n_f16(0.0f); in arm_dot_prod_f16() 81 vecSum = vfmaq(vecSum, vecA, vecB); in arm_dot_prod_f16() 97 vecSum = vfmaq_m(vecSum, vecA, vecB, p0); in arm_dot_prod_f16() 100 sum = vecAddAcrossF16Mve(vecSum); in arm_dot_prod_f16()
|
D | arm_dot_prod_f32.c | 73 f32x4_t vecSum; in arm_dot_prod_f32() local 76 vecSum = vdupq_n_f32(0.0f); in arm_dot_prod_f32() 93 vecSum = vfmaq(vecSum, vecA, vecB); in arm_dot_prod_f32() 109 vecSum = vfmaq_m(vecSum, vecA, vecB, p0); in arm_dot_prod_f32() 112 sum = vecAddAcrossF32Mve(vecSum); in arm_dot_prod_f32()
|
/cmsis-dsp-latest/Source/TransformFunctions/ |
D | arm_cfft_f16.c | 260 f16x8_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_f16_mve() local 277 vecSum = vaddq(vecIn0, vecIn1); in arm_cfft_radix4by2_f16_mve() 282 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_f16_mve() 473 f16x8_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_inverse_f16_mve() local 490 vecSum = vaddq(vecIn0, vecIn1); in arm_cfft_radix4by2_inverse_f16_mve() 495 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_inverse_f16_mve()
|
D | arm_cfft_q31.c | 229 q31x4_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_q31_mve() local 245 vecSum = vhaddq(vecIn0, vecIn1); in arm_cfft_radix4by2_q31_mve() 246 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_q31_mve() 481 q31x4_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_inverse_q31_mve() local 499 vecSum = vhaddq(vecIn0, vecIn1); in arm_cfft_radix4by2_inverse_q31_mve() 500 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_inverse_q31_mve()
|
D | arm_cfft_q15.c | 211 q15x8_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_q15_mve() local 230 vecSum = vhaddq(vecIn0, vecIn1); in arm_cfft_radix4by2_q15_mve() 231 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_q15_mve() 448 q15x8_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_inverse_q15_mve() local 467 vecSum = vhaddq(vecIn0, vecIn1); in arm_cfft_radix4by2_inverse_q15_mve() 468 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_inverse_q15_mve()
|
D | arm_cfft_f32.c | 263 f32x4_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_f32_mve() local 280 vecSum = vecIn0 + vecIn1; in arm_cfft_radix4by2_f32_mve() 285 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_f32_mve() 476 f32x4_t vecIn0, vecIn1, vecSum, vecDiff; in arm_cfft_radix4by2_inverse_f32_mve() local 493 vecSum = vecIn0 + vecIn1; in arm_cfft_radix4by2_inverse_f32_mve() 498 vst1q(pIn0, vecSum); in arm_cfft_radix4by2_inverse_f32_mve()
|