Home
last modified time | relevance | path

Searched refs:sum2 (Results 1 – 21 of 21) sorted by relevance

/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/ConvolutionFunctions/
Darm_nn_mat_mult_kernel_q7_q15.c67 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local
84 sum2 = __SMLAD(inA11, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15()
92 sum2 = __SMLAD(inA12, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15()
107 sum2 += inA1 * inB2; in arm_nn_mat_mult_kernel_q7_q15()
114 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15()
131 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() local
144 sum2 = __SMLAD(inA11, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15()
150 sum2 = __SMLAD(inA12, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15()
162 sum2 += inA1 * inB2; in arm_nn_mat_mult_kernel_q7_q15()
167 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15()
Darm_nn_mat_mult_kernel_q7_q15_reordered.c67 q31_t sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() local
84 sum2 = __SMLAD(inA11, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15_reordered()
92 sum2 = __SMLAD(inA12, inB2, sum2); in arm_nn_mat_mult_kernel_q7_q15_reordered()
107 sum2 += inA1 * inB2; in arm_nn_mat_mult_kernel_q7_q15_reordered()
114 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered()
Darm_depthwise_separable_conv_HWC_q7.c149 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() local
183 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7()
212 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7()
267 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7()
311 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7()
332 sum2 += inA.bytes[1] * inB.bytes[1]; in arm_depthwise_separable_conv_HWC_q7()
339 *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7()
Darm_depthwise_separable_conv_HWC_q7_nonsquare.c159 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() local
193 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
222 sum2 = __SMLAD(opA, opB, sum2); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
274 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7_nonsquare()
316 [ sum2 ] "+r"(sum2), in arm_depthwise_separable_conv_HWC_q7_nonsquare()
336 sum2 += inA.bytes[1] * inB.bytes[1]; in arm_depthwise_separable_conv_HWC_q7_nonsquare()
343 *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare()
Darm_convolve_HWC_q15_fast.c158 q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() local
172 sum2 = __SMLAD(inA1, inB2, sum2); in arm_convolve_HWC_q15_fast()
187 sum2 += inA1 * inB2; in arm_convolve_HWC_q15_fast()
194 *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); in arm_convolve_HWC_q15_fast()
Darm_convolve_HWC_q15_fast_nonsquare.c170 q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() local
184 sum2 = __SMLAD(inA1, inB2, sum2); in arm_convolve_HWC_q15_fast_nonsquare()
199 sum2 += inA1 * inB2; in arm_convolve_HWC_q15_fast_nonsquare()
206 *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare()
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/NN/Source/FullyConnectedFunctions/
Darm_fully_connected_q7_opt.c154 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() local
175 sum2 = __SMLAD(inM12, inV, sum2); in arm_fully_connected_q7_opt()
187 sum2 = __SMLAD(inM12, inV, sum2); in arm_fully_connected_q7_opt()
206 sum2 = __SMLAD(inM11, inV, sum2); in arm_fully_connected_q7_opt()
218 sum2 = __SMLAD(inM11, inV, sum2); in arm_fully_connected_q7_opt()
270 [ sum2 ] "+r"(sum2), in arm_fully_connected_q7_opt()
308 [ sum2 ] "+r"(sum2), in arm_fully_connected_q7_opt()
329 sum2 += inV * inM2; in arm_fully_connected_q7_opt()
335 *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); in arm_fully_connected_q7_opt()
394 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt()
[all …]
Darm_fully_connected_mat_q7_vec_q15_opt.c141 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local
163 sum2 = __SMLAD(inM12, inV, sum2); in arm_fully_connected_mat_q7_vec_q15_opt()
184 sum2 = __SMLAD(inM11, inV, sum2); in arm_fully_connected_mat_q7_vec_q15_opt()
224 [ sum2 ] "+r"(sum2), in arm_fully_connected_mat_q7_vec_q15_opt()
249 [ sum2 ] "+r"(sum2), in arm_fully_connected_mat_q7_vec_q15_opt()
270 sum2 += inV * inM2; in arm_fully_connected_mat_q7_vec_q15_opt()
276 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt()
336 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() local
354 sum2 += inA1 * inB3 + inA2 * inB4; in arm_fully_connected_mat_q7_vec_q15_opt()
374 sum2 += inA * inB; in arm_fully_connected_mat_q7_vec_q15_opt()
[all …]
Darm_fully_connected_q15_opt.c120 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local
139 sum2 = __SMLAD(inV, inM12, sum2); in arm_fully_connected_q15_opt()
171 [ sum2 ] "+r"(sum2), in arm_fully_connected_q15_opt()
192 sum2 += inV * inM2; in arm_fully_connected_q15_opt()
198 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_q15_opt()
258 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() local
276 sum2 += inA1 * inB1 + inA2 * inB2; in arm_fully_connected_q15_opt()
295 sum2 += inA * inB; in arm_fully_connected_q15_opt()
303 *pO++ = (q15_t)__SSAT((sum2 >> out_shift), 16); in arm_fully_connected_q15_opt()
Darm_fully_connected_mat_q7_vec_q15.c93 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15() local
108 sum2 = __SMLAD(inV, inM21, sum2); in arm_fully_connected_mat_q7_vec_q15()
113 sum2 = __SMLAD(inV, inM22, sum2); in arm_fully_connected_mat_q7_vec_q15()
125 sum2 += inV * inM2; in arm_fully_connected_mat_q7_vec_q15()
129 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15()
Darm_fully_connected_q15.c90 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15() local
104 sum2 = __SMLAD(inV1, inM2, sum2); in arm_fully_connected_q15()
110 sum2 = __SMLAD(inV1, inM2, sum2); in arm_fully_connected_q15()
122 sum2 += inV * inM2; in arm_fully_connected_q15()
126 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_q15()
Darm_fully_connected_q7.c94 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7() local
109 sum2 = __SMLAD(inV, inM21, sum2); in arm_fully_connected_q7()
114 sum2 = __SMLAD(inV, inM22, sum2); in arm_fully_connected_q7()
126 sum2 += inV * inM2; in arm_fully_connected_q7()
130 *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); in arm_fully_connected_q7()
/hal_nxp-3.5.0/mcux/mcux-sdk/CMSIS/DSP/Source/MatrixFunctions/
Darm_mat_mult_fast_q31.c76 q31_t sum1, sum2, sum3, sum4; /* Accumulator */ in arm_mat_mult_fast_q31() local
124 sum2 = 0; in arm_mat_mult_fast_q31()
148 sum2 = __SMMLA(inA1, inB2, sum2); in arm_mat_mult_fast_q31()
153 sum2 = (q31_t) ((((q63_t) sum2 << 32) + ((q63_t) inA1 * inB2)) >> 32); in arm_mat_mult_fast_q31()
164 *px++ = sum2 << 1; in arm_mat_mult_fast_q31()
Darm_mat_mult_fast_q15.c88 q31_t sum2, sum3, sum4; in arm_mat_mult_fast_q15() local
254 sum2 = 0; in arm_mat_mult_fast_q15()
282 sum2 = __SMLAD(inA1, inB2, sum2); in arm_mat_mult_fast_q15()
317 sum2 += inA1 * inB2; in arm_mat_mult_fast_q15()
338 *px++ = (q15_t) (sum2 >> 15); in arm_mat_mult_fast_q15()
Darm_mat_vec_mult_q7.c310 q31_t sum2 = 0; in arm_mat_vec_mult_q7() local
340 sum2 = __SMLAD(matData, vecData, sum2); in arm_mat_vec_mult_q7()
341 sum2 = __SMLAD(matData2, vecData2, sum2); in arm_mat_vec_mult_q7()
364 sum2 += *pInA2++ * vecData; in arm_mat_vec_mult_q7()
372 *px++ = (q7_t)(__SSAT((sum2 >> 7), 8)); in arm_mat_vec_mult_q7()
Darm_mat_vec_mult_q15.c298 q63_t sum2 = 0; in arm_mat_vec_mult_q15() local
320 sum2 = __SMLALD(matData, vecData, sum2); in arm_mat_vec_mult_q15()
335 sum2 += (q63_t)*pInA2++ * vecData; in arm_mat_vec_mult_q15()
342 *px++ = (q15_t)(__SSAT((sum2 >> 15), 16)); in arm_mat_vec_mult_q15()
Darm_mat_cholesky_f32.c226 float32_t sum0=0.0f,sum1=0.0f,sum2=0.0f,sum3=0.0f; in arm_mat_cholesky_f32() local
270 sum2 = vpadds_f32(vpadd_f32(vget_low_f32(acc2), vget_high_f32(acc2))); in arm_mat_cholesky_f32()
281 sum2 = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1); in arm_mat_cholesky_f32()
293 sum2 = sum2 + pG[i * n + k] * pG[(j + 2) * n + k]; in arm_mat_cholesky_f32()
301 pG[(j + 2) * n + i] -= sum2; in arm_mat_cholesky_f32()
Darm_mat_vec_mult_f16.c314 float16_t sum2 = 0.0f; in arm_mat_vec_mult_f16() local
336 sum2 += matData * vecData; in arm_mat_vec_mult_f16()
348 *px++ = sum2; in arm_mat_vec_mult_f16()
Darm_mat_vec_mult_f32.c319 float32_t sum2 = 0.0f; in arm_mat_vec_mult_f32() local
341 sum2 += matData * vecData; in arm_mat_vec_mult_f32()
353 *px++ = sum2; in arm_mat_vec_mult_f32()
Darm_mat_vec_mult_q31.c295 q63_t sum2 = 0; in arm_mat_vec_mult_q31() local
318 sum2 += (q63_t)matData * vecData; in arm_mat_vec_mult_q31()
330 *px++ = (q31_t)(sum2 >> 31); in arm_mat_vec_mult_q31()
Darm_mat_mult_f32.c549 float32_t sum0,sum1, sum2,sum3, sum4, sum5 , sum6, sum7; in arm_mat_mult_f32() local
594 sum2 = 0.0f; in arm_mat_mult_f32()
674 sum2 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); in arm_mat_mult_f32()
700 sum2 += *pIn1C++ * (*pIn2); in arm_mat_mult_f32()
715 *pxC++ = sum2; in arm_mat_mult_f32()