Home
last modified time | relevance | path

Searched refs:sum3 (Results 1 – 18 of 18) sorted by relevance

/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/FullyConnectedFunctions/
Darm_fully_connected_q7_opt.c155 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() local
179 sum3 = __SMLAD(inM13, inV, sum3); in arm_fully_connected_q7_opt()
191 sum3 = __SMLAD(inM13, inV, sum3); in arm_fully_connected_q7_opt()
210 sum3 = __SMLAD(inM14, inV, sum3); in arm_fully_connected_q7_opt()
222 sum3 = __SMLAD(inM14, inV, sum3); in arm_fully_connected_q7_opt()
271 [ sum3 ] "+r"(sum3), in arm_fully_connected_q7_opt()
309 [ sum3 ] "+r"(sum3), in arm_fully_connected_q7_opt()
330 sum3 += inV * inM3; in arm_fully_connected_q7_opt()
336 *pO++ = (q7_t)(__SSAT((sum3 >> out_shift), 8)); in arm_fully_connected_q7_opt()
395 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt()
[all …]
Darm_fully_connected_mat_q7_vec_q15_opt.c142 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local
167 sum3 = __SMLAD(inM13, inV, sum3); in arm_fully_connected_mat_q7_vec_q15_opt()
188 sum3 = __SMLAD(inM14, inV, sum3); in arm_fully_connected_mat_q7_vec_q15_opt()
225 [ sum3 ] "+r"(sum3), in arm_fully_connected_mat_q7_vec_q15_opt()
250 [ sum3 ] "+r"(sum3), in arm_fully_connected_mat_q7_vec_q15_opt()
271 sum3 += inV * inM3; in arm_fully_connected_mat_q7_vec_q15_opt()
277 *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt()
337 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local
361 sum3 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_mat_q7_vec_q15_opt()
376 sum3 += inA * inB; in arm_fully_connected_mat_q7_vec_q15_opt()
[all …]
Darm_fully_connected_q15_opt.c121 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local
141 sum3 = __SMLAD(inV, inM13, sum3); in arm_fully_connected_q15_opt()
172 [ sum3 ] "+r"(sum3), in arm_fully_connected_q15_opt()
193 sum3 += inV * inM3; in arm_fully_connected_q15_opt()
199 *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); in arm_fully_connected_q15_opt()
259 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local
280 sum3 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_q15_opt()
297 sum3 += inA * inB; in arm_fully_connected_q15_opt()
304 *pO++ = (q15_t)__SSAT((sum3 >> out_shift), 16); in arm_fully_connected_q15_opt()
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/ConvolutionFunctions/
Darm_nn_mat_mult_kernel_q7_q15_reordered.c68 q31_t sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() local
85 sum3 = __SMLAD(inA21, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15_reordered()
93 sum3 = __SMLAD(inA22, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15_reordered()
108 sum3 += inA2 * inB1; in arm_nn_mat_mult_kernel_q7_q15_reordered()
113 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered()
Darm_depthwise_separable_conv_HWC_q7.c150 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() local
186 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7()
221 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7()
268 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7()
312 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7()
333 sum3 += inA.bytes[2] * inB.bytes[2]; in arm_depthwise_separable_conv_HWC_q7()
340 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7()
Darm_depthwise_separable_conv_HWC_q7_nonsquare.c160 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() local
196 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
231 sum3 = __SMLAD(opA, opB, sum3); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
275 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7_nonsquare()
317 [ sum3 ] "+r"(sum3), in arm_depthwise_separable_conv_HWC_q7_nonsquare()
337 sum3 += inA.bytes[2] * inB.bytes[2]; in arm_depthwise_separable_conv_HWC_q7_nonsquare()
344 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
Darm_nn_mat_mult_kernel_q7_q15.c68 q31_t sum3 = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local
85 sum3 = __SMLAD(inA21, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15()
93 sum3 = __SMLAD(inA22, inB1, sum3); in arm_nn_mat_mult_kernel_q7_q15()
108 sum3 += inA2 * inB1; in arm_nn_mat_mult_kernel_q7_q15()
113 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15()
Darm_convolve_HWC_q15_fast.c159 q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() local
173 sum3 = __SMLAD(inA2, inB1, sum3); in arm_convolve_HWC_q15_fast()
188 sum3 += inA2 * inB1; in arm_convolve_HWC_q15_fast()
193 *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); in arm_convolve_HWC_q15_fast()
Darm_convolve_HWC_q15_fast_nonsquare.c171 q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() local
185 sum3 = __SMLAD(inA2, inB1, sum3); in arm_convolve_HWC_q15_fast_nonsquare()
200 sum3 += inA2 * inB1; in arm_convolve_HWC_q15_fast_nonsquare()
205 *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare()
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/DSP/Source/MatrixFunctions/
Darm_mat_mult_fast_q31.c76 q31_t sum1, sum2, sum3, sum4; /* Accumulator */ in arm_mat_mult_fast_q31() local
125 sum3 = 0; in arm_mat_mult_fast_q31()
149 sum3 = __SMMLA(inA2, inB1, sum3); in arm_mat_mult_fast_q31()
154 sum3 = (q31_t) ((((q63_t) sum3 << 32) + ((q63_t) inA2 * inB1)) >> 32); in arm_mat_mult_fast_q31()
165 *px2++ = sum3 << 1; in arm_mat_mult_fast_q31()
Darm_mat_mult_fast_q15.c88 q31_t sum2, sum3, sum4; in arm_mat_mult_fast_q15() local
255 sum3 = 0; in arm_mat_mult_fast_q15()
283 sum3 = __SMLAD(inA2, inB1, sum3); in arm_mat_mult_fast_q15()
318 sum3 += inA2 * inB1; in arm_mat_mult_fast_q15()
339 *px2++ = (q15_t) (sum3 >> 15); in arm_mat_mult_fast_q15()
Darm_mat_vec_mult_q7.c311 q31_t sum3 = 0; in arm_mat_vec_mult_q7() local
345 sum3 = __SMLAD(matData, vecData, sum3); in arm_mat_vec_mult_q7()
346 sum3 = __SMLAD(matData2, vecData2, sum3); in arm_mat_vec_mult_q7()
365 sum3 += *pInA3++ * vecData; in arm_mat_vec_mult_q7()
373 *px++ = (q7_t)(__SSAT((sum3 >> 7), 8)); in arm_mat_vec_mult_q7()
Darm_mat_vec_mult_q15.c299 q63_t sum3 = 0; in arm_mat_vec_mult_q15() local
322 sum3 = __SMLALD(matData, vecData, sum3); in arm_mat_vec_mult_q15()
336 sum3 += (q63_t)*pInA3++ * vecData; in arm_mat_vec_mult_q15()
343 *px++ = (q15_t)(__SSAT((sum3 >> 15), 16)); in arm_mat_vec_mult_q15()
Darm_mat_cholesky_f32.c226 float32_t sum0=0.0f,sum1=0.0f,sum2=0.0f,sum3=0.0f; in arm_mat_cholesky_f32() local
271 sum3 = vpadds_f32(vpadd_f32(vget_low_f32(acc3), vget_high_f32(acc3))); in arm_mat_cholesky_f32()
284 sum3 = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1); in arm_mat_cholesky_f32()
294 sum3 = sum3 + pG[i * n + k] * pG[(j + 3) * n + k]; in arm_mat_cholesky_f32()
302 pG[(j + 3) * n + i] -= sum3; in arm_mat_cholesky_f32()
Darm_mat_vec_mult_f16.c315 float16_t sum3 = 0.0f; in arm_mat_vec_mult_f16() local
338 sum3 += matData * vecData; in arm_mat_vec_mult_f16()
349 *px++ = sum3; in arm_mat_vec_mult_f16()
Darm_mat_vec_mult_f32.c320 float32_t sum3 = 0.0f; in arm_mat_vec_mult_f32() local
343 sum3 += matData * vecData; in arm_mat_vec_mult_f32()
354 *px++ = sum3; in arm_mat_vec_mult_f32()
Darm_mat_vec_mult_q31.c296 q63_t sum3 = 0; in arm_mat_vec_mult_q31() local
320 sum3 += (q63_t)matData * vecData; in arm_mat_vec_mult_q31()
331 *px++ = (q31_t)(sum3 >> 31); in arm_mat_vec_mult_q31()
Darm_mat_mult_f32.c549 float32_t sum0,sum1, sum2,sum3, sum4, sum5 , sum6, sum7; in arm_mat_mult_f32() local
595 sum3 = 0.0f; in arm_mat_mult_f32()
677 sum3 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); in arm_mat_mult_f32()
701 sum3 += *pIn1D++ * (*pIn2); in arm_mat_mult_f32()
716 *pxD++ = sum3; in arm_mat_mult_f32()