/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/FullyConnectedFunctions/ |
D | arm_fully_connected_q7_opt.c | 156 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() local 180 sum4 = __SMLAD(inM14, inV, sum4); in arm_fully_connected_q7_opt() 192 sum4 = __SMLAD(inM14, inV, sum4); in arm_fully_connected_q7_opt() 211 sum4 = __SMLAD(inM13, inV, sum4); in arm_fully_connected_q7_opt() 223 sum4 = __SMLAD(inM13, inV, sum4); in arm_fully_connected_q7_opt() 272 [ sum4 ] "+r"(sum4), in arm_fully_connected_q7_opt() 310 [ sum4 ] "+r"(sum4), in arm_fully_connected_q7_opt() 331 sum4 += inV * inM4; in arm_fully_connected_q7_opt() 337 *pO++ = (q7_t)(__SSAT((sum4 >> out_shift), 8)); in arm_fully_connected_q7_opt() 396 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() [all …]
|
D | arm_fully_connected_mat_q7_vec_q15_opt.c | 143 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local 168 sum4 = __SMLAD(inM14, inV, sum4); in arm_fully_connected_mat_q7_vec_q15_opt() 189 sum4 = __SMLAD(inM13, inV, sum4); in arm_fully_connected_mat_q7_vec_q15_opt() 226 [ sum4 ] "+r"(sum4), in arm_fully_connected_mat_q7_vec_q15_opt() 251 [ sum4 ] "+r"(sum4), in arm_fully_connected_mat_q7_vec_q15_opt() 272 sum4 += inV * inM4; in arm_fully_connected_mat_q7_vec_q15_opt() 278 *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt() 338 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local 362 sum4 += inA1 * inB3 + inA2 * inB4; in arm_fully_connected_mat_q7_vec_q15_opt() 378 sum4 += inA * inB; in arm_fully_connected_mat_q7_vec_q15_opt() [all …]
|
D | arm_fully_connected_q15_opt.c | 122 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local 143 sum4 = __SMLAD(inV, inM14, sum4); in arm_fully_connected_q15_opt() 173 [ sum4 ] "+r"(sum4), in arm_fully_connected_q15_opt() 194 sum4 += inV * inM4; in arm_fully_connected_q15_opt() 200 *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); in arm_fully_connected_q15_opt() 260 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local 284 sum4 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_q15_opt() 299 sum4 += inA * inB; in arm_fully_connected_q15_opt() 305 *pO++ = (q15_t)__SSAT((sum4 >> out_shift), 16); in arm_fully_connected_q15_opt()
|
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/ConvolutionFunctions/ |
D | arm_nn_mat_mult_kernel_q7_q15_reordered.c | 69 q31_t sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() local 86 sum4 = __SMLAD(inA21, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15_reordered() 94 sum4 = __SMLAD(inA22, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15_reordered() 109 sum4 += inA2 * inB2; in arm_nn_mat_mult_kernel_q7_q15_reordered() 115 *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered()
|
D | arm_depthwise_separable_conv_HWC_q7.c | 151 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() local 189 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7() 218 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7() 269 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7() 313 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7() 334 sum4 += inA.bytes[3] * inB.bytes[3]; in arm_depthwise_separable_conv_HWC_q7() 341 *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7()
|
D | arm_depthwise_separable_conv_HWC_q7_nonsquare.c | 161 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() local 199 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 228 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 276 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7_nonsquare() 318 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7_nonsquare() 338 sum4 += inA.bytes[3] * inB.bytes[3]; in arm_depthwise_separable_conv_HWC_q7_nonsquare() 345 *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
|
D | arm_nn_mat_mult_kernel_q7_q15.c | 69 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local 86 sum4 = __SMLAD(inA21, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15() 94 sum4 = __SMLAD(inA22, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15() 109 sum4 += inA2 * inB2; in arm_nn_mat_mult_kernel_q7_q15() 115 *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15()
|
D | arm_convolve_HWC_q15_fast.c | 160 q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() local 174 sum4 = __SMLAD(inA2, inB2, sum4); in arm_convolve_HWC_q15_fast() 189 sum4 += inA2 * inB2; in arm_convolve_HWC_q15_fast() 195 *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); in arm_convolve_HWC_q15_fast()
|
D | arm_convolve_HWC_q15_fast_nonsquare.c | 172 q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() local 186 sum4 = __SMLAD(inA2, inB2, sum4); in arm_convolve_HWC_q15_fast_nonsquare() 201 sum4 += inA2 * inB2; in arm_convolve_HWC_q15_fast_nonsquare() 207 *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare()
|
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/DSP/Source/MatrixFunctions/ |
D | arm_mat_mult_fast_q31.c | 76 q31_t sum1, sum2, sum3, sum4; /* Accumulator */ in arm_mat_mult_fast_q31() local 126 sum4 = 0; in arm_mat_mult_fast_q31() 150 sum4 = __SMMLA(inA2, inB2, sum4); in arm_mat_mult_fast_q31() 155 sum4 = (q31_t) ((((q63_t) sum4 << 32) + ((q63_t) inA2 * inB2)) >> 32); in arm_mat_mult_fast_q31() 166 *px2++ = sum4 << 1; in arm_mat_mult_fast_q31()
|
D | arm_mat_mult_fast_q15.c | 88 q31_t sum2, sum3, sum4; in arm_mat_mult_fast_q15() local 256 sum4 = 0; in arm_mat_mult_fast_q15() 284 sum4 = __SMLAD(inA2, inB2, sum4); in arm_mat_mult_fast_q15() 319 sum4 += inA2 * inB2; in arm_mat_mult_fast_q15() 340 *px2++ = (q15_t) (sum4 >> 15); in arm_mat_mult_fast_q15()
|
D | arm_mat_vec_mult_q7.c | 312 q31_t sum4 = 0; in arm_mat_vec_mult_q7() local 350 sum4 = __SMLAD(matData, vecData, sum4); in arm_mat_vec_mult_q7() 351 sum4 = __SMLAD(matData2, vecData2, sum4); in arm_mat_vec_mult_q7() 366 sum4 += *pInA4++ * vecData; in arm_mat_vec_mult_q7() 374 *px++ = (q7_t)(__SSAT((sum4 >> 7), 8)); in arm_mat_vec_mult_q7()
|
D | arm_mat_vec_mult_q15.c | 300 q63_t sum4 = 0; in arm_mat_vec_mult_q15() local 324 sum4 = __SMLALD(matData, vecData, sum4); in arm_mat_vec_mult_q15() 337 sum4 += (q63_t)*pInA4++ * vecData; in arm_mat_vec_mult_q15() 344 *px++ = (q15_t)(__SSAT((sum4 >> 15), 16)); in arm_mat_vec_mult_q15()
|
D | arm_mat_vec_mult_f16.c | 316 float16_t sum4 = 0.0f; in arm_mat_vec_mult_f16() local 340 sum4 += matData * vecData; in arm_mat_vec_mult_f16() 350 *px++ = sum4; in arm_mat_vec_mult_f16()
|
D | arm_mat_vec_mult_f32.c | 321 float32_t sum4 = 0.0f; in arm_mat_vec_mult_f32() local 345 sum4 += matData * vecData; in arm_mat_vec_mult_f32() 355 *px++ = sum4; in arm_mat_vec_mult_f32()
|
D | arm_mat_vec_mult_q31.c | 297 q63_t sum4 = 0; in arm_mat_vec_mult_q31() local 322 sum4 += (q63_t)matData * vecData; in arm_mat_vec_mult_q31() 332 *px++ = (q31_t)(sum4 >> 31); in arm_mat_vec_mult_q31()
|
D | arm_mat_mult_f32.c | 549 float32_t sum0,sum1, sum2,sum3, sum4, sum5 , sum6, sum7; in arm_mat_mult_f32() local 596 sum4 = 0.0f; in arm_mat_mult_f32() 680 sum4 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); in arm_mat_mult_f32() 702 sum4 += *pIn1E++ * (*pIn2); in arm_mat_mult_f32() 717 *pxE++ = sum4; in arm_mat_mult_f32()
|