| /cmsis-dsp-latest/Source/MatrixFunctions/ |
| D | arm_mat_cmplx_mult_f32.c | 83 f32x4_t vecB, vecA; in arm_mat_cmplx_mult_f32_2x2_mve() local 93 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_f32_2x2_mve() 96 acc0 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 97 acc0 = vcmlaq_rot90(acc0, vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 100 acc1 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 101 acc1 = vcmlaq_rot90(acc1, vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 114 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_f32_2x2_mve() 117 acc0 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 118 acc0 = vcmlaq_rot90(acc0, vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 121 acc1 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() [all …]
|
| D | arm_mat_cmplx_mult_f16.c | 77 f16x8_t vecB, vecA0, vecA1; in arm_mat_cmplx_mult_f16_2x2_mve() local 102 vecB = vldrhq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_f16_2x2_mve() 104 acc0 = vcmulq(vecA0, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 105 acc0 = vcmlaq_rot90(acc0, vecA0, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 107 acc1 = vcmulq(vecA1, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 108 acc1 = vcmlaq_rot90(acc1, vecA1, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 153 f16x8_t vecB, vecA0, vecA1, vecA2; in arm_mat_cmplx_mult_f16_3x3_mve() local 172 vecB = vldrhq_gather_shifted_offset_z(pInB, vecColBOffs0, p0); in arm_mat_cmplx_mult_f16_3x3_mve() 174 acc0 = vcmulq(vecA0, vecB); in arm_mat_cmplx_mult_f16_3x3_mve() 175 acc0 = vcmlaq_rot90(acc0, vecA0, vecB); in arm_mat_cmplx_mult_f16_3x3_mve() [all …]
|
| D | arm_mat_mult_q7.c | 72 q7x16_t vecB, vecA0, vecA1; in arm_mat_mult_q7_2x2_mve() local 79 vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); in arm_mat_mult_q7_2x2_mve() 84 acc0 = vmladavq_s8(vecA0, vecB); in arm_mat_mult_q7_2x2_mve() 85 acc1 = vmladavq_s8(vecA1, vecB); in arm_mat_mult_q7_2x2_mve() 94 vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); in arm_mat_mult_q7_2x2_mve() 96 acc0 = vmladavq_s8(vecA0, vecB); in arm_mat_mult_q7_2x2_mve() 97 acc1 = vmladavq_s8(vecA1, vecB); in arm_mat_mult_q7_2x2_mve() 122 q7x16_t vecB, vecA0, vecA1, vecA2; in arm_mat_mult_q7_3x3_mve() local 130 vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); in arm_mat_mult_q7_3x3_mve() 136 acc0 = vmladavq_s8(vecA0, vecB); in arm_mat_mult_q7_3x3_mve() [all …]
|
| D | arm_mat_mult_q31.c | 79 q31x4_t vecB, vecA0, vecA1; in arm_mat_mult_q31_2x2_mve() local 89 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_q31_2x2_mve() 95 acc0 = vrmlaldavhq(vecA0, vecB); in arm_mat_mult_q31_2x2_mve() 96 acc1 = vrmlaldavhq(vecA1, vecB); in arm_mat_mult_q31_2x2_mve() 108 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_q31_2x2_mve() 110 acc0 = vrmlaldavhq(vecA0, vecB); in arm_mat_mult_q31_2x2_mve() 111 acc1 = vrmlaldavhq(vecA1, vecB); in arm_mat_mult_q31_2x2_mve() 139 q31x4_t vecB, vecA; in arm_mat_mult_q31_3x3_mve() local 148 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_q31_3x3_mve() 151 acc0 = vrmlaldavhq(vecA, vecB); in arm_mat_mult_q31_3x3_mve() [all …]
|
| D | arm_mat_cmplx_mult_q31.c | 79 q31x4_t vecB, vecA; in arm_mat_cmplx_mult_q31_2x2_mve() local 90 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_q31_2x2_mve() 92 acc0 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 93 acc1 = vmlaldavxq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 96 acc2 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 97 acc3 = vmlaldavxq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 108 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_q31_2x2_mve() 110 acc0 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 111 acc1 = vmlaldavxq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 114 acc2 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() [all …]
|
| D | arm_mat_mult_opt_q31.c | 83 q31x4_t vecB, vecA0, vecA1; in arm_mat_mult_opt_q31_2x2_mve() local 93 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_opt_q31_2x2_mve() 99 acc0 = vrmlaldavhq(vecA0, vecB); in arm_mat_mult_opt_q31_2x2_mve() 100 acc1 = vrmlaldavhq(vecA1, vecB); in arm_mat_mult_opt_q31_2x2_mve() 112 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_opt_q31_2x2_mve() 114 acc0 = vrmlaldavhq(vecA0, vecB); in arm_mat_mult_opt_q31_2x2_mve() 115 acc1 = vrmlaldavhq(vecA1, vecB); in arm_mat_mult_opt_q31_2x2_mve() 143 q31x4_t vecB, vecA; in arm_mat_mult_opt_q31_3x3_mve() local 152 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_opt_q31_3x3_mve() 155 acc0 = vrmlaldavhq(vecA, vecB); in arm_mat_mult_opt_q31_3x3_mve() [all …]
|
| D | arm_mat_mult_q15.c | 83 q15x8_t vecB, vecA0, vecA1; in arm_mat_mult_q15_2x2_mve() local 90 vecB = vldrhq_gather_shifted_offset_z_s16((q15_t const *)pInB, vecColBOffs, p0); in arm_mat_mult_q15_2x2_mve() 95 acc0 = vmlaldavq(vecA0, vecB); in arm_mat_mult_q15_2x2_mve() 96 acc1 = vmlaldavq(vecA1, vecB); in arm_mat_mult_q15_2x2_mve() 108 vecB = vldrhq_gather_shifted_offset_z_s16(pInB, vecColBOffs, p0); in arm_mat_mult_q15_2x2_mve() 110 acc0 = vmlaldavq(vecA0, vecB); in arm_mat_mult_q15_2x2_mve() 111 acc1 = vmlaldavq(vecA1, vecB); in arm_mat_mult_q15_2x2_mve() 140 q15x8_t vecB, vecA0, vecA1, vecA2; in arm_mat_mult_q15_3x3_mve() local 148 vecB = vldrhq_gather_shifted_offset_z_s16((q15_t const *)pInB, vecColBOffs, p0); in arm_mat_mult_q15_3x3_mve() 154 acc0 = vmlaldavq(vecA0, vecB); in arm_mat_mult_q15_3x3_mve() [all …]
|
| D | arm_mat_cmplx_mult_q15.c | 136 q15x8_t vecA, vecB, vecB2; in arm_mat_cmplx_mult_q15() local 172 vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); in arm_mat_cmplx_mult_q15() 174 acc0 = vmlsldavaq_s16(acc0, vecA, vecB); in arm_mat_cmplx_mult_q15() 175 acc1 = vmlaldavaxq_s16(acc1, vecA, vecB); in arm_mat_cmplx_mult_q15() 195 vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); in arm_mat_cmplx_mult_q15() 199 acc0 = vmlsldavaq_s16(acc0, vecA, vecB); in arm_mat_cmplx_mult_q15() 200 acc1 = vmlaldavaxq_s16(acc1, vecA, vecB); in arm_mat_cmplx_mult_q15() 230 q15x8_t vecA, vecB; in arm_mat_cmplx_mult_q15() local 265 vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); in arm_mat_cmplx_mult_q15() 267 acc0 = vmlsldavaq_s16(acc0, vecA, vecB); in arm_mat_cmplx_mult_q15() [all …]
|
| /cmsis-dsp-latest/dsppp/Include/dsppp/Helium/ |
| D | matrix_multiply_fixed.hpp | 43 VEC vecB, vecA0, vecA1; in arm_mat_mult_2x2_mve() local 50 vecB = inner::vload1_z<StaticStride<MB>::value>(pInB,MATRIX_DIM2,p0); in arm_mat_mult_2x2_mve() 54 vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM2,p0); in arm_mat_mult_2x2_mve() 61 acc0 = inner::vmacc(vecA0, vecB,p0); in arm_mat_mult_2x2_mve() 62 acc1 = inner::vmacc(vecA1, vecB,p0); in arm_mat_mult_2x2_mve() 73 vecB = inner::vload1_z<StaticStride<MB>::value>(pInB,MATRIX_DIM2,p0); in arm_mat_mult_2x2_mve() 77 vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM2,p0); in arm_mat_mult_2x2_mve() 80 acc0 = inner::vmacc(vecA0, vecB,p0); in arm_mat_mult_2x2_mve() 81 acc1 = inner::vmacc(vecA1, vecB,p0); in arm_mat_mult_2x2_mve() 111 VEC vecB, vecA0, vecA1, vecA2; in arm_mat_mult_3x3_mve() local [all …]
|
| /cmsis-dsp-latest/Source/StatisticsFunctions/ |
| D | arm_mse_f32.c | 60 float32x4_t vecA, vecB; in arm_mse_f32() local 73 vecB = vld1q(pSrcB); in arm_mse_f32() 76 vecA = vsubq(vecA, vecB); in arm_mse_f32() 91 vecB = vld1q(pSrcB); in arm_mse_f32() 93 vecA = vsubq(vecA, vecB); in arm_mse_f32() 114 float32x4_t vecA, vecB; in arm_mse_f32() local 131 vecB = vld1q_f32(pSrcB); in arm_mse_f32() 134 vecA = vsubq_f32(vecA, vecB); in arm_mse_f32()
|
| /cmsis-dsp-latest/Source/BasicMathFunctions/ |
| D | arm_add_q31.c | 64 q31x4_t vecB; in arm_add_q31() local 75 vecB = vld1q(pSrcB); in arm_add_q31() 76 vst1q(pDst, vqaddq(vecA, vecB)); in arm_add_q31() 96 vecB = vld1q(pSrcB); in arm_add_q31() 97 vstrwq_p(pDst, vqaddq(vecA, vecB), p0); in arm_add_q31()
|
| D | arm_sub_q31.c | 64 q31x4_t vecB; in arm_sub_q31() local 75 vecB = vld1q(pSrcB); in arm_sub_q31() 76 vst1q(pDst, vqsubq(vecA, vecB)); in arm_sub_q31() 96 vecB = vld1q(pSrcB); in arm_sub_q31() 97 vstrwq_p(pDst, vqsubq(vecA, vecB), p0); in arm_sub_q31()
|
| D | arm_dot_prod_q31.c | 68 q31x4_t vecB; in arm_dot_prod_q31() local 80 vecB = vld1q(pSrcB); in arm_dot_prod_q31() 81 sum = vrmlaldavhaq(sum, vecA, vecB); in arm_dot_prod_q31() 100 vecB = vld1q(pSrcB); in arm_dot_prod_q31() 101 sum = vrmlaldavhaq_p(sum, vecA, vecB, p0); in arm_dot_prod_q31()
|
| D | arm_add_q7.c | 65 q7x16_t vecB; in arm_add_q7() local 76 vecB = vld1q(pSrcB); in arm_add_q7() 77 vst1q(pDst, vqaddq(vecA, vecB)); in arm_add_q7() 97 vecB = vld1q(pSrcB); in arm_add_q7() 98 vstrbq_p(pDst, vqaddq(vecA, vecB), p0); in arm_add_q7()
|
| D | arm_dot_prod_f16.c | 60 f16x8_t vecA, vecB; in arm_dot_prod_f16() local 78 vecB = vld1q(pSrcB); in arm_dot_prod_f16() 81 vecSum = vfmaq(vecSum, vecA, vecB); in arm_dot_prod_f16() 96 vecB = vld1q(pSrcB); in arm_dot_prod_f16() 97 vecSum = vfmaq_m(vecSum, vecA, vecB, p0); in arm_dot_prod_f16()
|
| D | arm_sub_q7.c | 63 q7x16_t vecB; in arm_sub_q7() local 74 vecB = vld1q(pSrcB); in arm_sub_q7() 75 vst1q(pDst, vqsubq(vecA, vecB)); in arm_sub_q7() 95 vecB = vld1q(pSrcB); in arm_sub_q7() 96 vstrbq_p(pDst, vqsubq(vecA, vecB), p0); in arm_sub_q7()
|
| D | arm_dot_prod_q15.c | 66 q15x8_t vecB; in arm_dot_prod_q15() local 78 vecB = vld1q(pSrcB); in arm_dot_prod_q15() 79 sum = vmlaldavaq(sum, vecA, vecB); in arm_dot_prod_q15() 98 vecB = vld1q(pSrcB); in arm_dot_prod_q15() 99 sum = vmlaldavaq_p(sum, vecA, vecB, p0); in arm_dot_prod_q15()
|
| D | arm_mult_q31.c | 62 q31x4_t vecA, vecB; in arm_mult_q31() local 73 vecB = vld1q(pSrcB); in arm_mult_q31() 74 vst1q(pDst, vqdmulhq(vecA, vecB)); in arm_mult_q31() 94 vecB = vld1q(pSrcB); in arm_mult_q31() 95 vstrwq_p(pDst, vqdmulhq(vecA, vecB), p0); in arm_mult_q31()
|
| D | arm_add_q15.c | 64 q15x8_t vecB; in arm_add_q15() local 75 vecB = vld1q(pSrcB); in arm_add_q15() 76 vst1q(pDst, vqaddq(vecA, vecB)); in arm_add_q15() 96 vecB = vld1q(pSrcB); in arm_add_q15() 97 vstrhq_p(pDst, vqaddq(vecA, vecB), p0); in arm_add_q15()
|
| D | arm_mult_q7.c | 62 q7x16_t vecA, vecB; in arm_mult_q7() local 73 vecB = vld1q(pSrcB); in arm_mult_q7() 74 vst1q(pDst, vqdmulhq(vecA, vecB)); in arm_mult_q7() 94 vecB = vld1q(pSrcB); in arm_mult_q7() 95 vstrbq_p(pDst, vqdmulhq(vecA, vecB), p0); in arm_mult_q7()
|
| D | arm_sub_q15.c | 64 q15x8_t vecB; in arm_sub_q15() local 75 vecB = vld1q(pSrcB); in arm_sub_q15() 76 vst1q(pDst, vqsubq(vecA, vecB)); in arm_sub_q15() 96 vecB = vld1q(pSrcB); in arm_sub_q15() 97 vstrhq_p(pDst, vqsubq(vecA, vecB), p0); in arm_sub_q15()
|
| D | arm_dot_prod_q7.c | 67 q7x16_t vecB; in arm_dot_prod_q7() local 79 vecB = vld1q(pSrcB); in arm_dot_prod_q7() 80 sum = vmladavaq(sum, vecA, vecB); in arm_dot_prod_q7() 99 vecB = vld1q(pSrcB); in arm_dot_prod_q7() 100 sum = vmladavaq_p(sum, vecA, vecB, p0); in arm_dot_prod_q7()
|
| /cmsis-dsp-latest/Source/DistanceFunctions/ |
| D | arm_chebyshev_distance_f16.c | 70 f16x8_t vecA, vecB; in arm_chebyshev_distance_f16() local 79 vecB = vld1q(pB); in arm_chebyshev_distance_f16() 84 vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff); in arm_chebyshev_distance_f16() 99 vecB = vldrhq_z_f16(pB, p0); in arm_chebyshev_distance_f16() 105 vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0); in arm_chebyshev_distance_f16()
|
| /cmsis-dsp-latest/Source/ComplexMathFunctions/ |
| D | arm_cmplx_mag_squared_f32.c | 135 float32x4x2_t vecB; in arm_cmplx_mag_squared_f32() local 154 vecB = vld2q_f32(pSrc); in arm_cmplx_mag_squared_f32() 157 vRealB = vmulq_f32(vecB.val[0], vecB.val[0]); in arm_cmplx_mag_squared_f32() 158 vImagB = vmulq_f32(vecB.val[1], vecB.val[1]); in arm_cmplx_mag_squared_f32()
|
| /cmsis-dsp-latest/Source/TransformFunctions/ |
| D | arm_cfft_f16.c | 95 f16x8_t vecA, vecB, vecC, vecD; in _arm_radix4_butterfly_f16_mve() local 140 vecB = vldrhq_f16(inB); in _arm_radix4_butterfly_f16_mve() 146 vecSum1 = vecB + vecD; in _arm_radix4_butterfly_f16_mve() 147 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_f16_mve() 223 vecB = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 4); in _arm_radix4_butterfly_f16_mve() 226 vecSum1 = vecB + vecD; in _arm_radix4_butterfly_f16_mve() 227 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_f16_mve() 301 f16x8_t vecA, vecB, vecC, vecD; in _arm_radix4_butterfly_inverse_f16_mve() local 348 vecB = vldrhq_f16(inB); in _arm_radix4_butterfly_inverse_f16_mve() 354 vecSum1 = vecB + vecD; in _arm_radix4_butterfly_inverse_f16_mve() [all …]
|