/hal_nxp-3.6.0/mcux/mcux-sdk/CMSIS/DSP/Source/MatrixFunctions/ |
D | arm_mat_cmplx_mult_f32.c | 83 f32x4_t vecB, vecA; in arm_mat_cmplx_mult_f32_2x2_mve() local 93 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_f32_2x2_mve() 96 acc0 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 97 acc0 = vcmlaq_rot90(acc0, vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 100 acc1 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 101 acc1 = vcmlaq_rot90(acc1, vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 114 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_f32_2x2_mve() 117 acc0 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 118 acc0 = vcmlaq_rot90(acc0, vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() 121 acc1 = vcmulq(vecA, vecB); in arm_mat_cmplx_mult_f32_2x2_mve() [all …]
|
D | arm_mat_cmplx_mult_f16.c | 77 f16x8_t vecB, vecA0, vecA1; in arm_mat_cmplx_mult_f16_2x2_mve() local 102 vecB = vldrhq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_f16_2x2_mve() 104 acc0 = vcmulq(vecA0, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 105 acc0 = vcmlaq_rot90(acc0, vecA0, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 107 acc1 = vcmulq(vecA1, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 108 acc1 = vcmlaq_rot90(acc1, vecA1, vecB); in arm_mat_cmplx_mult_f16_2x2_mve() 152 f16x8_t vecB, vecA0, vecA1, vecA2; in arm_mat_cmplx_mult_f16_3x3_mve() local 171 vecB = vldrhq_gather_shifted_offset_z(pInB, vecColBOffs0, p0); in arm_mat_cmplx_mult_f16_3x3_mve() 173 acc0 = vcmulq(vecA0, vecB); in arm_mat_cmplx_mult_f16_3x3_mve() 174 acc0 = vcmlaq_rot90(acc0, vecA0, vecB); in arm_mat_cmplx_mult_f16_3x3_mve() [all …]
|
D | arm_mat_mult_q7.c | 72 q7x16_t vecB, vecA0, vecA1; in arm_mat_mult_q7_2x2_mve() local 79 vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); in arm_mat_mult_q7_2x2_mve() 84 acc0 = vmladavq_s8(vecA0, vecB); in arm_mat_mult_q7_2x2_mve() 85 acc1 = vmladavq_s8(vecA1, vecB); in arm_mat_mult_q7_2x2_mve() 94 vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); in arm_mat_mult_q7_2x2_mve() 96 acc0 = vmladavq_s8(vecA0, vecB); in arm_mat_mult_q7_2x2_mve() 97 acc1 = vmladavq_s8(vecA1, vecB); in arm_mat_mult_q7_2x2_mve() 122 q7x16_t vecB, vecA0, vecA1, vecA2; in arm_mat_mult_q7_3x3_mve() local 130 vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); in arm_mat_mult_q7_3x3_mve() 136 acc0 = vmladavq_s8(vecA0, vecB); in arm_mat_mult_q7_3x3_mve() [all …]
|
D | arm_mat_cmplx_mult_q31.c | 79 q31x4_t vecB, vecA; in arm_mat_cmplx_mult_q31_2x2_mve() local 90 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_q31_2x2_mve() 92 acc0 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 93 acc1 = vmlaldavxq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 96 acc2 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 97 acc3 = vmlaldavxq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 108 vecB = vldrwq_gather_shifted_offset(pInB, vecColBOffs0); in arm_mat_cmplx_mult_q31_2x2_mve() 110 acc0 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 111 acc1 = vmlaldavxq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() 114 acc2 = vmlsldavq_s32(vecA, vecB); in arm_mat_cmplx_mult_q31_2x2_mve() [all …]
|
D | arm_mat_mult_q31.c | 79 q31x4_t vecB, vecA0, vecA1; in arm_mat_mult_q31_2x2_mve() local 89 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_q31_2x2_mve() 95 acc0 = vrmlaldavhq(vecA0, vecB); in arm_mat_mult_q31_2x2_mve() 96 acc1 = vrmlaldavhq(vecA1, vecB); in arm_mat_mult_q31_2x2_mve() 108 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_q31_2x2_mve() 110 acc0 = vrmlaldavhq(vecA0, vecB); in arm_mat_mult_q31_2x2_mve() 111 acc1 = vrmlaldavhq(vecA1, vecB); in arm_mat_mult_q31_2x2_mve() 139 q31x4_t vecB, vecA; in arm_mat_mult_q31_3x3_mve() local 148 vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0); in arm_mat_mult_q31_3x3_mve() 151 acc0 = vrmlaldavhq(vecA, vecB); in arm_mat_mult_q31_3x3_mve() [all …]
|
D | arm_mat_mult_q15.c | 80 q15x8_t vecB, vecA0, vecA1; in arm_mat_mult_q15_2x2_mve() local 87 vecB = vldrhq_gather_shifted_offset_z_s16((q15_t const *)pInB, vecColBOffs, p0); in arm_mat_mult_q15_2x2_mve() 92 acc0 = vmlaldavq(vecA0, vecB); in arm_mat_mult_q15_2x2_mve() 93 acc1 = vmlaldavq(vecA1, vecB); in arm_mat_mult_q15_2x2_mve() 105 vecB = vldrhq_gather_shifted_offset_z_s16(pInB, vecColBOffs, p0); in arm_mat_mult_q15_2x2_mve() 107 acc0 = vmlaldavq(vecA0, vecB); in arm_mat_mult_q15_2x2_mve() 108 acc1 = vmlaldavq(vecA1, vecB); in arm_mat_mult_q15_2x2_mve() 137 q15x8_t vecB, vecA0, vecA1, vecA2; in arm_mat_mult_q15_3x3_mve() local 145 vecB = vldrhq_gather_shifted_offset_z_s16((q15_t const *)pInB, vecColBOffs, p0); in arm_mat_mult_q15_3x3_mve() 151 acc0 = vmlaldavq(vecA0, vecB); in arm_mat_mult_q15_3x3_mve() [all …]
|
D | arm_mat_cmplx_mult_q15.c | 136 q15x8_t vecA, vecB, vecB2; in arm_mat_cmplx_mult_q15() local 172 vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); in arm_mat_cmplx_mult_q15() 174 acc0 = vmlsldavaq_s16(acc0, vecA, vecB); in arm_mat_cmplx_mult_q15() 175 acc1 = vmlaldavaxq_s16(acc1, vecA, vecB); in arm_mat_cmplx_mult_q15() 195 vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); in arm_mat_cmplx_mult_q15() 199 acc0 = vmlsldavaq_s16(acc0, vecA, vecB); in arm_mat_cmplx_mult_q15() 200 acc1 = vmlaldavaxq_s16(acc1, vecA, vecB); in arm_mat_cmplx_mult_q15() 230 q15x8_t vecA, vecB; in arm_mat_cmplx_mult_q15() local 265 vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); in arm_mat_cmplx_mult_q15() 267 acc0 = vmlsldavaq_s16(acc0, vecA, vecB); in arm_mat_cmplx_mult_q15() [all …]
|
/hal_nxp-3.6.0/mcux/mcux-sdk/CMSIS/DSP/Source/TransformFunctions/ |
D | arm_cfft_f16.c | 95 f16x8_t vecA, vecB, vecC, vecD; in _arm_radix4_butterfly_f16_mve() local 142 vecB = vldrhq_f16(inB); in _arm_radix4_butterfly_f16_mve() 148 vecSum1 = vecB + vecD; in _arm_radix4_butterfly_f16_mve() 149 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_f16_mve() 224 vecB = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 4); in _arm_radix4_butterfly_f16_mve() 227 vecSum1 = vecB + vecD; in _arm_radix4_butterfly_f16_mve() 228 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_f16_mve() 302 f16x8_t vecA, vecB, vecC, vecD; in _arm_radix4_butterfly_inverse_f16_mve() local 349 vecB = vldrhq_f16(inB); in _arm_radix4_butterfly_inverse_f16_mve() 355 vecSum1 = vecB + vecD; in _arm_radix4_butterfly_inverse_f16_mve() [all …]
|
D | arm_cfft_q31.c | 45 q31x4_t vecA, vecB, vecC, vecD; in _arm_radix4_butterfly_q31_mve() local 95 vecB = vldrwq_s32(inB); in _arm_radix4_butterfly_q31_mve() 101 vecSum1 = vhaddq(vecB, vecD); in _arm_radix4_butterfly_q31_mve() 102 vecDiff1 = vhsubq(vecB, vecD); in _arm_radix4_butterfly_q31_mve() 185 vecB = vldrwq_gather_base_s32(vecScGathAddr, 8); in _arm_radix4_butterfly_q31_mve() 188 vecSum1 = vhaddq(vecB, vecD); in _arm_radix4_butterfly_q31_mve() 189 vecDiff1 = vhsubq(vecB, vecD); in _arm_radix4_butterfly_q31_mve() 295 q31x4_t vecA, vecB, vecC, vecD; in _arm_radix4_butterfly_inverse_q31_mve() local 345 vecB = vldrwq_s32(inB); in _arm_radix4_butterfly_inverse_q31_mve() 351 vecSum1 = vhaddq(vecB, vecD); in _arm_radix4_butterfly_inverse_q31_mve() [all …]
|
/hal_nxp-3.6.0/mcux/mcux-sdk/CMSIS/DSP/Source/ComplexMathFunctions/ |
D | arm_cmplx_mag_squared_f32.c | 136 float32x4x2_t vecB; in arm_cmplx_mag_squared_f32() local 155 vecB = vld2q_f32(pSrc); in arm_cmplx_mag_squared_f32() 158 vRealB = vmulq_f32(vecB.val[0], vecB.val[0]); in arm_cmplx_mag_squared_f32() 159 vImagB = vmulq_f32(vecB.val[1], vecB.val[1]); in arm_cmplx_mag_squared_f32()
|
D | arm_cmplx_mag_f32.c | 171 float32x4x2_t vecB; in arm_cmplx_mag_f32() local 186 vecB = vld2q_f32(pSrc); in arm_cmplx_mag_f32() 193 vRealB = vmulq_f32(vecB.val[0], vecB.val[0]); in arm_cmplx_mag_f32() 194 vImagB = vmulq_f32(vecB.val[1], vecB.val[1]); in arm_cmplx_mag_f32()
|
/hal_nxp-3.6.0/mcux/mcux-sdk/CMSIS/DSP/Source/BasicMathFunctions/ |
D | arm_add_q31.c | 65 q31x4_t vecB; in arm_add_q31() local 76 vecB = vld1q(pSrcB); in arm_add_q31() 77 vst1q(pDst, vqaddq(vecA, vecB)); in arm_add_q31() 97 vecB = vld1q(pSrcB); in arm_add_q31() 98 vstrwq_p(pDst, vqaddq(vecA, vecB), p0); in arm_add_q31()
|
D | arm_sub_q31.c | 65 q31x4_t vecB; in arm_sub_q31() local 76 vecB = vld1q(pSrcB); in arm_sub_q31() 77 vst1q(pDst, vqsubq(vecA, vecB)); in arm_sub_q31() 97 vecB = vld1q(pSrcB); in arm_sub_q31() 98 vstrwq_p(pDst, vqsubq(vecA, vecB), p0); in arm_sub_q31()
|
D | arm_dot_prod_q31.c | 69 q31x4_t vecB; in arm_dot_prod_q31() local 81 vecB = vld1q(pSrcB); in arm_dot_prod_q31() 82 sum = vrmlaldavhaq(sum, vecA, vecB); in arm_dot_prod_q31() 101 vecB = vld1q(pSrcB); in arm_dot_prod_q31() 102 sum = vrmlaldavhaq_p(sum, vecA, vecB, p0); in arm_dot_prod_q31()
|
D | arm_add_q7.c | 66 q7x16_t vecB; in arm_add_q7() local 77 vecB = vld1q(pSrcB); in arm_add_q7() 78 vst1q(pDst, vqaddq(vecA, vecB)); in arm_add_q7() 98 vecB = vld1q(pSrcB); in arm_add_q7() 99 vstrbq_p(pDst, vqaddq(vecA, vecB), p0); in arm_add_q7()
|
D | arm_dot_prod_f16.c | 73 f16x8_t vecA, vecB; in arm_dot_prod_f16() local 91 vecB = vld1q(pSrcB); in arm_dot_prod_f16() 94 vecSum = vfmaq(vecSum, vecA, vecB); in arm_dot_prod_f16() 109 vecB = vld1q(pSrcB); in arm_dot_prod_f16() 110 vecSum = vfmaq_m(vecSum, vecA, vecB, p0); in arm_dot_prod_f16()
|
D | arm_sub_q7.c | 64 q7x16_t vecB; in arm_sub_q7() local 75 vecB = vld1q(pSrcB); in arm_sub_q7() 76 vst1q(pDst, vqsubq(vecA, vecB)); in arm_sub_q7() 96 vecB = vld1q(pSrcB); in arm_sub_q7() 97 vstrbq_p(pDst, vqsubq(vecA, vecB), p0); in arm_sub_q7()
|
D | arm_mult_q31.c | 63 q31x4_t vecA, vecB; in arm_mult_q31() local 74 vecB = vld1q(pSrcB); in arm_mult_q31() 75 vst1q(pDst, vqdmulhq(vecA, vecB)); in arm_mult_q31() 95 vecB = vld1q(pSrcB); in arm_mult_q31() 96 vstrwq_p(pDst, vqdmulhq(vecA, vecB), p0); in arm_mult_q31()
|
D | arm_dot_prod_q15.c | 67 q15x8_t vecB; in arm_dot_prod_q15() local 79 vecB = vld1q(pSrcB); in arm_dot_prod_q15() 80 sum = vmlaldavaq(sum, vecA, vecB); in arm_dot_prod_q15() 99 vecB = vld1q(pSrcB); in arm_dot_prod_q15() 100 sum = vmlaldavaq_p(sum, vecA, vecB, p0); in arm_dot_prod_q15()
|
D | arm_add_q15.c | 65 q15x8_t vecB; in arm_add_q15() local 76 vecB = vld1q(pSrcB); in arm_add_q15() 77 vst1q(pDst, vqaddq(vecA, vecB)); in arm_add_q15() 97 vecB = vld1q(pSrcB); in arm_add_q15() 98 vstrhq_p(pDst, vqaddq(vecA, vecB), p0); in arm_add_q15()
|
D | arm_sub_q15.c | 65 q15x8_t vecB; in arm_sub_q15() local 76 vecB = vld1q(pSrcB); in arm_sub_q15() 77 vst1q(pDst, vqsubq(vecA, vecB)); in arm_sub_q15() 97 vecB = vld1q(pSrcB); in arm_sub_q15() 98 vstrhq_p(pDst, vqsubq(vecA, vecB), p0); in arm_sub_q15()
|
D | arm_mult_q7.c | 63 q7x16_t vecA, vecB; in arm_mult_q7() local 74 vecB = vld1q(pSrcB); in arm_mult_q7() 75 vst1q(pDst, vqdmulhq(vecA, vecB)); in arm_mult_q7() 95 vecB = vld1q(pSrcB); in arm_mult_q7() 96 vstrbq_p(pDst, vqdmulhq(vecA, vecB), p0); in arm_mult_q7()
|
D | arm_dot_prod_f32.c | 73 f32x4_t vecA, vecB; in arm_dot_prod_f32() local 91 vecB = vld1q(pSrcB); in arm_dot_prod_f32() 94 vecSum = vfmaq(vecSum, vecA, vecB); in arm_dot_prod_f32() 109 vecB = vld1q(pSrcB); in arm_dot_prod_f32() 110 vecSum = vfmaq_m(vecSum, vecA, vecB, p0); in arm_dot_prod_f32()
|
D | arm_dot_prod_q7.c | 68 q7x16_t vecB; in arm_dot_prod_q7() local 80 vecB = vld1q(pSrcB); in arm_dot_prod_q7() 81 sum = vmladavaq(sum, vecA, vecB); in arm_dot_prod_q7() 100 vecB = vld1q(pSrcB); in arm_dot_prod_q7() 101 sum = vmladavaq_p(sum, vecA, vecB, p0); in arm_dot_prod_q7()
|
/hal_nxp-3.6.0/mcux/mcux-sdk/CMSIS/DSP/Source/DistanceFunctions/ |
D | arm_chebyshev_distance_f16.c | 70 f16x8_t vecA, vecB; in arm_chebyshev_distance_f16() local 79 vecB = vld1q(pB); in arm_chebyshev_distance_f16() 84 vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff); in arm_chebyshev_distance_f16() 99 vecB = vldrhq_z_f16(pB, p0); in arm_chebyshev_distance_f16() 105 vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0); in arm_chebyshev_distance_f16()
|