Home
last modified time | relevance | path

Searched refs:sum4 (Results 1 – 17 of 17) sorted by relevance

/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/FullyConnectedFunctions/
Darm_fully_connected_q7_opt.c156 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() local
180 sum4 = __SMLAD(inM14, inV, sum4); in arm_fully_connected_q7_opt()
192 sum4 = __SMLAD(inM14, inV, sum4); in arm_fully_connected_q7_opt()
211 sum4 = __SMLAD(inM13, inV, sum4); in arm_fully_connected_q7_opt()
223 sum4 = __SMLAD(inM13, inV, sum4); in arm_fully_connected_q7_opt()
272 [ sum4 ] "+r"(sum4), in arm_fully_connected_q7_opt()
310 [ sum4 ] "+r"(sum4), in arm_fully_connected_q7_opt()
331 sum4 += inV * inM4; in arm_fully_connected_q7_opt()
337 *pO++ = (q7_t)(__SSAT((sum4 >> out_shift), 8)); in arm_fully_connected_q7_opt()
396 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt()
[all …]
Darm_fully_connected_mat_q7_vec_q15_opt.c143 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local
168 sum4 = __SMLAD(inM14, inV, sum4); in arm_fully_connected_mat_q7_vec_q15_opt()
189 sum4 = __SMLAD(inM13, inV, sum4); in arm_fully_connected_mat_q7_vec_q15_opt()
226 [ sum4 ] "+r"(sum4), in arm_fully_connected_mat_q7_vec_q15_opt()
251 [ sum4 ] "+r"(sum4), in arm_fully_connected_mat_q7_vec_q15_opt()
272 sum4 += inV * inM4; in arm_fully_connected_mat_q7_vec_q15_opt()
278 *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt()
338 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local
362 sum4 += inA1 * inB3 + inA2 * inB4; in arm_fully_connected_mat_q7_vec_q15_opt()
378 sum4 += inA * inB; in arm_fully_connected_mat_q7_vec_q15_opt()
[all …]
Darm_fully_connected_q15_opt.c122 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local
143 sum4 = __SMLAD(inV, inM14, sum4); in arm_fully_connected_q15_opt()
173 [ sum4 ] "+r"(sum4), in arm_fully_connected_q15_opt()
194 sum4 += inV * inM4; in arm_fully_connected_q15_opt()
200 *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); in arm_fully_connected_q15_opt()
260 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local
284 sum4 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_q15_opt()
299 sum4 += inA * inB; in arm_fully_connected_q15_opt()
305 *pO++ = (q15_t)__SSAT((sum4 >> out_shift), 16); in arm_fully_connected_q15_opt()
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/ConvolutionFunctions/
Darm_nn_mat_mult_kernel_q7_q15_reordered.c69 q31_t sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() local
86 sum4 = __SMLAD(inA21, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15_reordered()
94 sum4 = __SMLAD(inA22, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15_reordered()
109 sum4 += inA2 * inB2; in arm_nn_mat_mult_kernel_q7_q15_reordered()
115 *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered()
Darm_depthwise_separable_conv_HWC_q7.c151 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() local
189 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7()
218 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7()
269 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7()
313 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7()
334 sum4 += inA.bytes[3] * inB.bytes[3]; in arm_depthwise_separable_conv_HWC_q7()
341 *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7()
Darm_depthwise_separable_conv_HWC_q7_nonsquare.c161 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() local
199 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
228 sum4 = __SMLAD(opA, opB, sum4); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
276 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7_nonsquare()
318 [ sum4 ] "+r"(sum4), in arm_depthwise_separable_conv_HWC_q7_nonsquare()
338 sum4 += inA.bytes[3] * inB.bytes[3]; in arm_depthwise_separable_conv_HWC_q7_nonsquare()
345 *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
Darm_nn_mat_mult_kernel_q7_q15.c69 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local
86 sum4 = __SMLAD(inA21, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15()
94 sum4 = __SMLAD(inA22, inB2, sum4); in arm_nn_mat_mult_kernel_q7_q15()
109 sum4 += inA2 * inB2; in arm_nn_mat_mult_kernel_q7_q15()
115 *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15()
Darm_convolve_HWC_q15_fast.c160 q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() local
174 sum4 = __SMLAD(inA2, inB2, sum4); in arm_convolve_HWC_q15_fast()
189 sum4 += inA2 * inB2; in arm_convolve_HWC_q15_fast()
195 *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); in arm_convolve_HWC_q15_fast()
Darm_convolve_HWC_q15_fast_nonsquare.c172 q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() local
186 sum4 = __SMLAD(inA2, inB2, sum4); in arm_convolve_HWC_q15_fast_nonsquare()
201 sum4 += inA2 * inB2; in arm_convolve_HWC_q15_fast_nonsquare()
207 *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare()
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/DSP/Source/MatrixFunctions/
Darm_mat_mult_fast_q31.c76 q31_t sum1, sum2, sum3, sum4; /* Accumulator */ in arm_mat_mult_fast_q31() local
126 sum4 = 0; in arm_mat_mult_fast_q31()
150 sum4 = __SMMLA(inA2, inB2, sum4); in arm_mat_mult_fast_q31()
155 sum4 = (q31_t) ((((q63_t) sum4 << 32) + ((q63_t) inA2 * inB2)) >> 32); in arm_mat_mult_fast_q31()
166 *px2++ = sum4 << 1; in arm_mat_mult_fast_q31()
Darm_mat_mult_fast_q15.c88 q31_t sum2, sum3, sum4; in arm_mat_mult_fast_q15() local
256 sum4 = 0; in arm_mat_mult_fast_q15()
284 sum4 = __SMLAD(inA2, inB2, sum4); in arm_mat_mult_fast_q15()
319 sum4 += inA2 * inB2; in arm_mat_mult_fast_q15()
340 *px2++ = (q15_t) (sum4 >> 15); in arm_mat_mult_fast_q15()
Darm_mat_vec_mult_q7.c312 q31_t sum4 = 0; in arm_mat_vec_mult_q7() local
350 sum4 = __SMLAD(matData, vecData, sum4); in arm_mat_vec_mult_q7()
351 sum4 = __SMLAD(matData2, vecData2, sum4); in arm_mat_vec_mult_q7()
366 sum4 += *pInA4++ * vecData; in arm_mat_vec_mult_q7()
374 *px++ = (q7_t)(__SSAT((sum4 >> 7), 8)); in arm_mat_vec_mult_q7()
Darm_mat_vec_mult_q15.c300 q63_t sum4 = 0; in arm_mat_vec_mult_q15() local
324 sum4 = __SMLALD(matData, vecData, sum4); in arm_mat_vec_mult_q15()
337 sum4 += (q63_t)*pInA4++ * vecData; in arm_mat_vec_mult_q15()
344 *px++ = (q15_t)(__SSAT((sum4 >> 15), 16)); in arm_mat_vec_mult_q15()
Darm_mat_vec_mult_f16.c316 float16_t sum4 = 0.0f; in arm_mat_vec_mult_f16() local
340 sum4 += matData * vecData; in arm_mat_vec_mult_f16()
350 *px++ = sum4; in arm_mat_vec_mult_f16()
Darm_mat_vec_mult_f32.c321 float32_t sum4 = 0.0f; in arm_mat_vec_mult_f32() local
345 sum4 += matData * vecData; in arm_mat_vec_mult_f32()
355 *px++ = sum4; in arm_mat_vec_mult_f32()
Darm_mat_vec_mult_q31.c297 q63_t sum4 = 0; in arm_mat_vec_mult_q31() local
322 sum4 += (q63_t)matData * vecData; in arm_mat_vec_mult_q31()
332 *px++ = (q31_t)(sum4 >> 31); in arm_mat_vec_mult_q31()
Darm_mat_mult_f32.c549 float32_t sum0,sum1, sum2,sum3, sum4, sum5 , sum6, sum7; in arm_mat_mult_f32() local
596 sum4 = 0.0f; in arm_mat_mult_f32()
680 sum4 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); in arm_mat_mult_f32()
702 sum4 += *pIn1E++ * (*pIn2); in arm_mat_mult_f32()
717 *pxE++ = sum4; in arm_mat_mult_f32()