/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/FullyConnectedFunctions/ |
D | arm_fully_connected_q7_opt.c | 155 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() local 179 sum3 = __SMLAD(inM13, inV, sum3); in arm_fully_connected_q7_opt() 191 sum3 = __SMLAD(inM13, inV, sum3); in arm_fully_connected_q7_opt() 210 sum3 = __SMLAD(inM14, inV, sum3); in arm_fully_connected_q7_opt() 222 sum3 = __SMLAD(inM14, inV, sum3); in arm_fully_connected_q7_opt() 271 [ sum3 ] "+r"(sum3), in arm_fully_connected_q7_opt() 309 [ sum3 ] "+r"(sum3), in arm_fully_connected_q7_opt() 330 sum3 += inV * inM3; in arm_fully_connected_q7_opt() 336 *pO++ = (q7_t)(__SSAT((sum3 >> out_shift), 8)); in arm_fully_connected_q7_opt() 395 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() [all …]
|
D | arm_fully_connected_mat_q7_vec_q15_opt.c | 142 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local 167 sum3 = __SMLAD(inM13, inV, sum3); in arm_fully_connected_mat_q7_vec_q15_opt() 188 sum3 = __SMLAD(inM14, inV, sum3); in arm_fully_connected_mat_q7_vec_q15_opt() 225 [ sum3 ] "+r"(sum3), in arm_fully_connected_mat_q7_vec_q15_opt() 250 [ sum3 ] "+r"(sum3), in arm_fully_connected_mat_q7_vec_q15_opt() 271 sum3 += inV * inM3; in arm_fully_connected_mat_q7_vec_q15_opt() 277 *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt() 337 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local 361 sum3 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_mat_q7_vec_q15_opt() 376 sum3 += inA * inB; in arm_fully_connected_mat_q7_vec_q15_opt() [all …]
|
D | arm_fully_connected_q15_opt.c | 121 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local 141 sum3 = __SMLAD(inV, inM13, sum3); in arm_fully_connected_q15_opt() 172 [ sum3 ] "+r"(sum3), in arm_fully_connected_q15_opt() 193 sum3 += inV * inM3; in arm_fully_connected_q15_opt() 199 *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); in arm_fully_connected_q15_opt() 259 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local 280 sum3 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_q15_opt() 297 sum3 += inA * inB; in arm_fully_connected_q15_opt() 304 *pO++ = (q15_t)__SSAT((sum3 >> out_shift), 16); in arm_fully_connected_q15_opt()
|
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/ConvolutionFunctions/ |
D | arm_nn_mat_mult_kernel_q7_q15_reordered.c | 68 q31_t sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() local 85 sum3 = __SMLAD(inA21, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15_reordered() 93 sum3 = __SMLAD(inA22, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15_reordered() 108 sum3 += inA2 * inB1; in arm_nn_mat_mult_kernel_q7_q15_reordered() 113 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered()
|
D | arm_depthwise_separable_conv_HWC_q7.c | 150 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() local 186 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7() 221 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7() 268 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7() 312 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7() 333 sum3 += inA.bytes[2] * inB.bytes[2]; in arm_depthwise_separable_conv_HWC_q7() 340 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7()
|
D | arm_depthwise_separable_conv_HWC_q7_nonsquare.c | 160 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() local 196 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 231 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 275 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7_nonsquare() 317 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7_nonsquare() 337 sum3 += inA.bytes[2] * inB.bytes[2]; in arm_depthwise_separable_conv_HWC_q7_nonsquare() 344 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
|
D | arm_nn_mat_mult_kernel_q7_q15.c | 68 q31_t sum3 = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local 85 sum3 = __SMLAD(inA21, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15() 93 sum3 = __SMLAD(inA22, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15() 108 sum3 += inA2 * inB1; in arm_nn_mat_mult_kernel_q7_q15() 113 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15()
|
D | arm_convolve_HWC_q15_fast.c | 159 q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() local 173 sum3 = __SMLAD(inA2, inB1, sum3); in arm_convolve_HWC_q15_fast() 188 sum3 += inA2 * inB1; in arm_convolve_HWC_q15_fast() 193 *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); in arm_convolve_HWC_q15_fast()
|
D | arm_convolve_HWC_q15_fast_nonsquare.c | 171 q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() local 185 sum3 = __SMLAD(inA2, inB1, sum3); in arm_convolve_HWC_q15_fast_nonsquare() 200 sum3 += inA2 * inB1; in arm_convolve_HWC_q15_fast_nonsquare() 205 *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare()
|
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/DSP/Source/MatrixFunctions/ |
D | arm_mat_mult_fast_q31.c | 76 q31_t sum1, sum2, sum3, sum4; /* Accumulator */ in arm_mat_mult_fast_q31() local 125 sum3 = 0; in arm_mat_mult_fast_q31() 149 sum3 = __SMMLA(inA2, inB1, sum3); in arm_mat_mult_fast_q31() 154 sum3 = (q31_t) ((((q63_t) sum3 << 32) + ((q63_t) inA2 * inB1)) >> 32); in arm_mat_mult_fast_q31() 165 *px2++ = sum3 << 1; in arm_mat_mult_fast_q31()
|
D | arm_mat_mult_fast_q15.c | 88 q31_t sum2, sum3, sum4; in arm_mat_mult_fast_q15() local 255 sum3 = 0; in arm_mat_mult_fast_q15() 283 sum3 = __SMLAD(inA2, inB1, sum3); in arm_mat_mult_fast_q15() 318 sum3 += inA2 * inB1; in arm_mat_mult_fast_q15() 339 *px2++ = (q15_t) (sum3 >> 15); in arm_mat_mult_fast_q15()
|
D | arm_mat_vec_mult_q7.c | 311 q31_t sum3 = 0; in arm_mat_vec_mult_q7() local 345 sum3 = __SMLAD(matData, vecData, sum3); in arm_mat_vec_mult_q7() 346 sum3 = __SMLAD(matData2, vecData2, sum3); in arm_mat_vec_mult_q7() 365 sum3 += *pInA3++ * vecData; in arm_mat_vec_mult_q7() 373 *px++ = (q7_t)(__SSAT((sum3 >> 7), 8)); in arm_mat_vec_mult_q7()
|
D | arm_mat_vec_mult_q15.c | 299 q63_t sum3 = 0; in arm_mat_vec_mult_q15() local 322 sum3 = __SMLALD(matData, vecData, sum3); in arm_mat_vec_mult_q15() 336 sum3 += (q63_t)*pInA3++ * vecData; in arm_mat_vec_mult_q15() 343 *px++ = (q15_t)(__SSAT((sum3 >> 15), 16)); in arm_mat_vec_mult_q15()
|
D | arm_mat_cholesky_f32.c | 226 float32_t sum0=0.0f,sum1=0.0f,sum2=0.0f,sum3=0.0f; in arm_mat_cholesky_f32() local 271 sum3 = vpadds_f32(vpadd_f32(vget_low_f32(acc3), vget_high_f32(acc3))); in arm_mat_cholesky_f32() 284 sum3 = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1); in arm_mat_cholesky_f32() 294 sum3 = sum3 + pG[i * n + k] * pG[(j + 3) * n + k]; in arm_mat_cholesky_f32() 302 pG[(j + 3) * n + i] -= sum3; in arm_mat_cholesky_f32()
|
D | arm_mat_vec_mult_f16.c | 315 float16_t sum3 = 0.0f; in arm_mat_vec_mult_f16() local 338 sum3 += matData * vecData; in arm_mat_vec_mult_f16() 349 *px++ = sum3; in arm_mat_vec_mult_f16()
|
D | arm_mat_vec_mult_f32.c | 320 float32_t sum3 = 0.0f; in arm_mat_vec_mult_f32() local 343 sum3 += matData * vecData; in arm_mat_vec_mult_f32() 354 *px++ = sum3; in arm_mat_vec_mult_f32()
|
D | arm_mat_vec_mult_q31.c | 296 q63_t sum3 = 0; in arm_mat_vec_mult_q31() local 320 sum3 += (q63_t)matData * vecData; in arm_mat_vec_mult_q31() 331 *px++ = (q31_t)(sum3 >> 31); in arm_mat_vec_mult_q31()
|
D | arm_mat_mult_f32.c | 549 float32_t sum0,sum1, sum2,sum3, sum4, sum5 , sum6, sum7; in arm_mat_mult_f32() local 595 sum3 = 0.0f; in arm_mat_mult_f32() 677 sum3 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); in arm_mat_mult_f32() 701 sum3 += *pIn1D++ * (*pIn2); in arm_mat_mult_f32() 716 *pxD++ = sum3; in arm_mat_mult_f32()
|