/cmsis-nn-latest/Source/NNSupportFunctions/ |
D | arm_nn_mat_mul_core_1x_s8.c | 65 int32_t acc[4]; in arm_nn_mat_mul_core_1x_s8() local 100 acc[index] = acc_n0; in arm_nn_mat_mul_core_1x_s8() 104 int32x4_t res = vldrwq_s32(acc); in arm_nn_mat_mul_core_1x_s8() 124 int32_t acc_n0 = acc[i]; in arm_nn_mat_mul_core_1x_s8()
|
D | arm_nn_mat_mul_core_1x_s4.c | 68 int32_t acc[4]; in arm_nn_mat_mul_core_1x_s4() local 95 acc[index] = acc_n0; in arm_nn_mat_mul_core_1x_s4() 99 int32x4_t res = vldrwq_s32(acc); in arm_nn_mat_mul_core_1x_s4() 120 int32_t acc_n0 = acc[i]; in arm_nn_mat_mul_core_1x_s4()
|
D | arm_nn_vec_mat_mult_t_s8.c | 135 int32x4_t acc = {acc_0, acc_1, acc_2, acc_3}; in arm_nn_vec_mat_mult_t_s8() local 138 acc += vdupq_n_s32(lhs_offset) * rhs_sum; in arm_nn_vec_mat_mult_t_s8() 141 acc += vdupq_n_s32(rhs_offset) * vdupq_n_s32(lhs_sum); in arm_nn_vec_mat_mult_t_s8() 142 acc += vdupq_n_s32(rhs_offset * lhs_offset) * vdupq_n_s32(rhs_cols); in arm_nn_vec_mat_mult_t_s8() 144 acc = arm_requantize_mve(acc, dst_multiplier, dst_shift); in arm_nn_vec_mat_mult_t_s8() 145 acc = vaddq_s32(acc, vdupq_n_s32(dst_offset)); in arm_nn_vec_mat_mult_t_s8() 146 acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); in arm_nn_vec_mat_mult_t_s8() 147 acc = vminq_s32(acc, vdupq_n_s32(activation_max)); in arm_nn_vec_mat_mult_t_s8() 149 vstrbq_scatter_offset_s32(dst, address_offset_array, acc); in arm_nn_vec_mat_mult_t_s8() 482 int32x4_t acc = {acc_0, acc_1, acc_2, acc_3}; in arm_nn_vec_mat_mult_t_s8() local [all …]
|
D | arm_nn_vec_mat_mul_result_acc_s8_s16.c | 109 int32x4_t acc = {acc_0, acc_1, acc_2, acc_3}; in arm_nn_vec_mat_mul_result_acc_s8_s16() local 111 acc = arm_requantize_mve(acc, dst_multiplier, dst_shift); in arm_nn_vec_mat_mul_result_acc_s8_s16() 112 acc = vaddq_s32(acc, vldrhq_s32(dst)); in arm_nn_vec_mat_mul_result_acc_s8_s16() 114 acc = vmaxq_s32(acc, vdupq_n_s32(NN_Q15_MIN)); in arm_nn_vec_mat_mul_result_acc_s8_s16() 115 acc = vminq_s32(acc, vdupq_n_s32(NN_Q15_MAX)); in arm_nn_vec_mat_mul_result_acc_s8_s16() 117 vstrhq_s32(dst, acc); in arm_nn_vec_mat_mul_result_acc_s8_s16()
|
D | arm_nn_vec_mat_mult_t_svdf_s8.c | 114 int32x4_t acc = {acc_0, acc_1, acc_2, 0}; in arm_nn_vec_mat_mult_t_svdf_s8() local 116 acc += vdupq_n_s32(lhs_offset) * rhs_sum; in arm_nn_vec_mat_mult_t_svdf_s8() 118 acc = arm_requantize_mve(acc, dst_multiplier, dst_shift); in arm_nn_vec_mat_mult_t_svdf_s8() 119 acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); in arm_nn_vec_mat_mult_t_svdf_s8() 120 acc = vminq_s32(acc, vdupq_n_s32(activation_max)); in arm_nn_vec_mat_mult_t_svdf_s8() 121 *(dst) = (int16_t)acc[0]; in arm_nn_vec_mat_mult_t_svdf_s8() 122 *(dst + dst_offset) = (int16_t)acc[1]; in arm_nn_vec_mat_mult_t_svdf_s8() 123 *(dst + 2 * dst_offset) = (int16_t)acc[2]; in arm_nn_vec_mat_mult_t_svdf_s8()
|
D | arm_nn_vec_mat_mult_t_s4.c | 287 int32x4_t acc = {acc0, acc1, acc2, acc3}; in arm_nn_vec_mat_mult_t_s4() local 290 acc += vdupq_n_s32(lhs_offset) * rhs_sum; in arm_nn_vec_mat_mult_t_s4() 292 acc = arm_requantize_mve(acc, dst_multiplier, dst_shift); in arm_nn_vec_mat_mult_t_s4() 293 acc = vaddq_s32(acc, vdupq_n_s32(dst_offset)); in arm_nn_vec_mat_mult_t_s4() 294 acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); in arm_nn_vec_mat_mult_t_s4() 295 acc = vminq_s32(acc, vdupq_n_s32(activation_max)); in arm_nn_vec_mat_mult_t_s4() 297 vstrbq_scatter_offset_s32(dst, scatter_offset, acc); in arm_nn_vec_mat_mult_t_s4()
|
D | arm_nn_mat_mult_nt_t_s16.c | 213 int32_t acc[4]; in arm_nn_mat_mult_nt_t_s16() local 248 acc[index] = acc_n0; in arm_nn_mat_mult_nt_t_s16() 252 int32x4_t res = vldrwq_s32(acc); in arm_nn_mat_mult_nt_t_s16() 267 int32_t acc_n0 = acc[i]; in arm_nn_mat_mult_nt_t_s16()
|
D | arm_nn_mat_mult_nt_t_s8.c | 155 int32_t acc[4]; in arm_nn_mat_mult_nt_t_s8() local 196 acc[index] = sum_tmp; in arm_nn_mat_mult_nt_t_s8() 200 int32x4_t res = vldrwq_s32(acc); in arm_nn_mat_mult_nt_t_s8() 215 int32_t acc_n0 = acc[i]; in arm_nn_mat_mult_nt_t_s8()
|
D | arm_nn_mat_mult_nt_t_s4.c | 276 int32_t acc[4]; in arm_nn_mat_mult_nt_t_s4() local 335 acc[index] = sum_tmp; in arm_nn_mat_mult_nt_t_s4() 339 int32x4_t res = vldrwq_s32(acc); in arm_nn_mat_mult_nt_t_s4() 354 int32_t acc_n0 = acc[i]; in arm_nn_mat_mult_nt_t_s4()
|
/cmsis-nn-latest/Source/ConvolutionFunctions/ |
D | arm_depthwise_conv_fast_s16.c | 408 int64_t acc = sum; in arm_depthwise_conv_fast_s16() local 411 acc += *bias++; in arm_depthwise_conv_fast_s16() 413 … result = arm_nn_requantize_s64(acc, REDUCE_MULTIPLIER(*output_mult), *output_shift++); in arm_depthwise_conv_fast_s16()
|