/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/ConvolutionFunctions/ |
D | arm_nn_mat_mult_kernel_q7_q15.c | 67 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local 84 sum2 = __SMLAD(inA11, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15() 92 sum2 = __SMLAD(inA12, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15() 107 sum2 += inA1 * inB2; in arm_nn_mat_mult_kernel_q7_q15() 114 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15() 131 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local 144 sum2 = __SMLAD(inA11, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15() 150 sum2 = __SMLAD(inA12, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15() 162 sum2 += inA1 * inB2; in arm_nn_mat_mult_kernel_q7_q15() 167 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15()
|
D | arm_nn_mat_mult_kernel_q7_q15_reordered.c | 67 q31_t sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() local 84 sum2 = __SMLAD(inA11, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15_reordered() 92 sum2 = __SMLAD(inA12, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15_reordered() 107 sum2 += inA1 * inB2; in arm_nn_mat_mult_kernel_q7_q15_reordered() 114 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered()
|
D | arm_depthwise_separable_conv_HWC_q7.c | 149 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() local 183 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7() 212 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7() 267 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7() 311 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7() 332 sum2 += inA.bytes[1] * inB.bytes[1]; in arm_depthwise_separable_conv_HWC_q7() 339 *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7()
|
D | arm_depthwise_separable_conv_HWC_q7_nonsquare.c | 159 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() local 193 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 222 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 274 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7_nonsquare() 316 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7_nonsquare() 336 sum2 += inA.bytes[1] * inB.bytes[1]; in arm_depthwise_separable_conv_HWC_q7_nonsquare() 343 *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
|
D | arm_convolve_HWC_q15_fast.c | 158 q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() local 172 sum2 = __SMLAD(inA1, inB2, sum2); in arm_convolve_HWC_q15_fast() 187 sum2 += inA1 * inB2; in arm_convolve_HWC_q15_fast() 194 *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); in arm_convolve_HWC_q15_fast()
|
D | arm_convolve_HWC_q15_fast_nonsquare.c | 170 q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() local 184 sum2 = __SMLAD(inA1, inB2, sum2); in arm_convolve_HWC_q15_fast_nonsquare() 199 sum2 += inA1 * inB2; in arm_convolve_HWC_q15_fast_nonsquare() 206 *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare()
|
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/FullyConnectedFunctions/ |
D | arm_fully_connected_q7_opt.c | 154 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() local 175 sum2 = __SMLAD(inM12, inV, sum2); in arm_fully_connected_q7_opt() 187 sum2 = __SMLAD(inM12, inV, sum2); in arm_fully_connected_q7_opt() 206 sum2 = __SMLAD(inM11, inV, sum2); in arm_fully_connected_q7_opt() 218 sum2 = __SMLAD(inM11, inV, sum2); in arm_fully_connected_q7_opt() 270 [ sum2 ] "+r"(sum2), in arm_fully_connected_q7_opt() 308 [ sum2 ] "+r"(sum2), in arm_fully_connected_q7_opt() 329 sum2 += inV * inM2; in arm_fully_connected_q7_opt() 335 *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); in arm_fully_connected_q7_opt() 394 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() [all …]
|
D | arm_fully_connected_mat_q7_vec_q15_opt.c | 141 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local 163 sum2 = __SMLAD(inM12, inV, sum2); in arm_fully_connected_mat_q7_vec_q15_opt() 184 sum2 = __SMLAD(inM11, inV, sum2); in arm_fully_connected_mat_q7_vec_q15_opt() 224 [ sum2 ] "+r"(sum2), in arm_fully_connected_mat_q7_vec_q15_opt() 249 [ sum2 ] "+r"(sum2), in arm_fully_connected_mat_q7_vec_q15_opt() 270 sum2 += inV * inM2; in arm_fully_connected_mat_q7_vec_q15_opt() 276 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt() 336 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local 354 sum2 += inA1 * inB3 + inA2 * inB4; in arm_fully_connected_mat_q7_vec_q15_opt() 374 sum2 += inA * inB; in arm_fully_connected_mat_q7_vec_q15_opt() [all …]
|
D | arm_fully_connected_q15_opt.c | 120 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local 139 sum2 = __SMLAD(inV, inM12, sum2); in arm_fully_connected_q15_opt() 171 [ sum2 ] "+r"(sum2), in arm_fully_connected_q15_opt() 192 sum2 += inV * inM2; in arm_fully_connected_q15_opt() 198 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_q15_opt() 258 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local 276 sum2 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_q15_opt() 295 sum2 += inA * inB; in arm_fully_connected_q15_opt() 303 *pO++ = (q15_t)__SSAT((sum2 >> out_shift), 16); in arm_fully_connected_q15_opt()
|
D | arm_fully_connected_mat_q7_vec_q15.c | 93 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15() local 108 sum2 = __SMLAD(inV, inM21, sum2); in arm_fully_connected_mat_q7_vec_q15() 113 sum2 = __SMLAD(inV, inM22, sum2); in arm_fully_connected_mat_q7_vec_q15() 125 sum2 += inV * inM2; in arm_fully_connected_mat_q7_vec_q15() 129 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15()
|
D | arm_fully_connected_q15.c | 90 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15() local 104 sum2 = __SMLAD(inV1, inM2, sum2); in arm_fully_connected_q15() 110 sum2 = __SMLAD(inV1, inM2, sum2); in arm_fully_connected_q15() 122 sum2 += inV * inM2; in arm_fully_connected_q15() 126 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_q15()
|
D | arm_fully_connected_q7.c | 94 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7() local 109 sum2 = __SMLAD(inV, inM21, sum2); in arm_fully_connected_q7() 114 sum2 = __SMLAD(inV, inM22, sum2); in arm_fully_connected_q7() 126 sum2 += inV * inM2; in arm_fully_connected_q7() 130 *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); in arm_fully_connected_q7()
|
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/DSP/Source/MatrixFunctions/ |
D | arm_mat_mult_fast_q31.c | 76 q31_t sum1, sum2, sum3, sum4; /* Accumulator */ in arm_mat_mult_fast_q31() local 124 sum2 = 0; in arm_mat_mult_fast_q31() 148 sum2 = __SMMLA(inA1, inB2, sum2); in arm_mat_mult_fast_q31() 153 sum2 = (q31_t) ((((q63_t) sum2 << 32) + ((q63_t) inA1 * inB2)) >> 32); in arm_mat_mult_fast_q31() 164 *px++ = sum2 << 1; in arm_mat_mult_fast_q31()
|
D | arm_mat_mult_fast_q15.c | 88 q31_t sum2, sum3, sum4; in arm_mat_mult_fast_q15() local 254 sum2 = 0; in arm_mat_mult_fast_q15() 282 sum2 = __SMLAD(inA1, inB2, sum2); in arm_mat_mult_fast_q15() 317 sum2 += inA1 * inB2; in arm_mat_mult_fast_q15() 338 *px++ = (q15_t) (sum2 >> 15); in arm_mat_mult_fast_q15()
|
D | arm_mat_vec_mult_q7.c | 310 q31_t sum2 = 0; in arm_mat_vec_mult_q7() local 340 sum2 = __SMLAD(matData, vecData, sum2); in arm_mat_vec_mult_q7() 341 sum2 = __SMLAD(matData2, vecData2, sum2); in arm_mat_vec_mult_q7() 364 sum2 += *pInA2++ * vecData; in arm_mat_vec_mult_q7() 372 *px++ = (q7_t)(__SSAT((sum2 >> 7), 8)); in arm_mat_vec_mult_q7()
|
D | arm_mat_vec_mult_q15.c | 298 q63_t sum2 = 0; in arm_mat_vec_mult_q15() local 320 sum2 = __SMLALD(matData, vecData, sum2); in arm_mat_vec_mult_q15() 335 sum2 += (q63_t)*pInA2++ * vecData; in arm_mat_vec_mult_q15() 342 *px++ = (q15_t)(__SSAT((sum2 >> 15), 16)); in arm_mat_vec_mult_q15()
|
D | arm_mat_cholesky_f32.c | 226 float32_t sum0=0.0f,sum1=0.0f,sum2=0.0f,sum3=0.0f; in arm_mat_cholesky_f32() local 270 sum2 = vpadds_f32(vpadd_f32(vget_low_f32(acc2), vget_high_f32(acc2))); in arm_mat_cholesky_f32() 281 sum2 = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1); in arm_mat_cholesky_f32() 293 sum2 = sum2 + pG[i * n + k] * pG[(j + 2) * n + k]; in arm_mat_cholesky_f32() 301 pG[(j + 2) * n + i] -= sum2; in arm_mat_cholesky_f32()
|
D | arm_mat_vec_mult_f16.c | 314 float16_t sum2 = 0.0f; in arm_mat_vec_mult_f16() local 336 sum2 += matData * vecData; in arm_mat_vec_mult_f16() 348 *px++ = sum2; in arm_mat_vec_mult_f16()
|
D | arm_mat_vec_mult_f32.c | 319 float32_t sum2 = 0.0f; in arm_mat_vec_mult_f32() local 341 sum2 += matData * vecData; in arm_mat_vec_mult_f32() 353 *px++ = sum2; in arm_mat_vec_mult_f32()
|
D | arm_mat_vec_mult_q31.c | 295 q63_t sum2 = 0; in arm_mat_vec_mult_q31() local 318 sum2 += (q63_t)matData * vecData; in arm_mat_vec_mult_q31() 330 *px++ = (q31_t)(sum2 >> 31); in arm_mat_vec_mult_q31()
|
D | arm_mat_mult_f32.c | 549 float32_t sum0,sum1, sum2,sum3, sum4, sum5 , sum6, sum7; in arm_mat_mult_f32() local 594 sum2 = 0.0f; in arm_mat_mult_f32() 674 sum2 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); in arm_mat_mult_f32() 700 sum2 += *pIn1C++ * (*pIn2); in arm_mat_mult_f32() 715 *pxC++ = sum2; in arm_mat_mult_f32()
|