/cmsis-nn-3.7.0/Source/NNSupportFunctions/ |
D | arm_nn_mat_mul_core_1x_s8.c | 68 int32_t acc_n0 = 0; in arm_nn_mat_mul_core_1x_s8() local 78 acc_n0 += row_base[j] * col; in arm_nn_mat_mul_core_1x_s8() 91 … : [col] "+r"(col_base), [sum] "+Te"(sum_tmp), [row0] "+r"(row_base), [out0] "+Te"(acc_n0) in arm_nn_mat_mul_core_1x_s8() 97 acc_n0 += sum_tmp; in arm_nn_mat_mul_core_1x_s8() 100 acc[index] = acc_n0; in arm_nn_mat_mul_core_1x_s8() 124 int32_t acc_n0 = acc[i]; in arm_nn_mat_mul_core_1x_s8() local 127 acc_n0 += bias[i]; in arm_nn_mat_mul_core_1x_s8() 129 acc_n0 = arm_nn_requantize(acc_n0, output_mult[i], output_shift[i]); in arm_nn_mat_mul_core_1x_s8() 130 acc_n0 += conv_params->output_offset; in arm_nn_mat_mul_core_1x_s8() 131 acc_n0 = MAX(acc_n0, conv_params->activation.min); in arm_nn_mat_mul_core_1x_s8() [all …]
|
D | arm_nn_mat_mul_core_1x_s4.c | 71 int32_t acc_n0 = 0; in arm_nn_mat_mul_core_1x_s4() local 88 acc_n0 = vmladavaq_p_s8(acc_n0, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mul_core_1x_s4() 92 acc_n0 += sum_tmp; in arm_nn_mat_mul_core_1x_s4() 95 acc[index] = acc_n0; in arm_nn_mat_mul_core_1x_s4() 120 int32_t acc_n0 = acc[i]; in arm_nn_mat_mul_core_1x_s4() local 123 acc_n0 += bias[i]; in arm_nn_mat_mul_core_1x_s4() 125 acc_n0 = arm_nn_requantize(acc_n0, output_mult[i], output_shift[i]); in arm_nn_mat_mul_core_1x_s4() 126 acc_n0 += conv_params->output_offset; in arm_nn_mat_mul_core_1x_s4() 127 acc_n0 = MAX(acc_n0, conv_params->activation.min); in arm_nn_mat_mul_core_1x_s4() 128 acc_n0 = MIN(acc_n0, conv_params->activation.max); in arm_nn_mat_mul_core_1x_s4() [all …]
|
D | arm_nn_mat_mult_nt_t_s16.c | 78 int32_t acc_n0 = 0; in arm_nn_mat_mult_nt_t_s16() local 93 acc_n0 += ip_row_0[j] * col; in arm_nn_mat_mult_nt_t_s16() 125 [out0] "=Te"(acc_n0), in arm_nn_mat_mult_nt_t_s16() 139 acc_n0 += bias_s32[i]; in arm_nn_mat_mult_nt_t_s16() 145 int32x4_t res = {acc_n0, acc_n1, acc_n2, acc_n3}; in arm_nn_mat_mult_nt_t_s16() 156 int64_t acc_n0_s64 = acc_n0; in arm_nn_mat_mult_nt_t_s16() 190 acc_n0 = arm_nn_requantize_s64(acc_n0_s64, reduced_multiplier, shift); in arm_nn_mat_mult_nt_t_s16() 194 int32x4_t res = {acc_n0, acc_n1, acc_n2, acc_n3}; in arm_nn_mat_mult_nt_t_s16() 218 int32_t acc_n0 = 0; in arm_nn_mat_mult_nt_t_s16() local 226 acc_n0 += ip_row_0[j] * col; in arm_nn_mat_mult_nt_t_s16() [all …]
|
D | arm_nn_mat_mul_core_4x_s8.c | 61 int32_t acc_n0 = 0; in arm_nn_mat_mul_core_4x_s8() local 78 acc_n0 += ip_row_0[j] * col; in arm_nn_mat_mul_core_4x_s8() 106 [out0] "+Te"(acc_n0), in arm_nn_mat_mul_core_4x_s8() 114 int32x4_t res = {acc_n0, acc_n1, acc_n2, acc_n3}; in arm_nn_mat_mul_core_4x_s8()
|
D | arm_nn_mat_mult_nt_t_s8.c | 71 int32_t acc_n0 = 0; in arm_nn_mat_mult_nt_t_s8() local 88 acc_n0 += lhs_vec[j] * col; in arm_nn_mat_mult_nt_t_s8() 123 [out0] "=Te"(acc_n0), in arm_nn_mat_mult_nt_t_s8() 130 int32x4_t res = {acc_n0, acc_n1, acc_n2, acc_n3}; in arm_nn_mat_mult_nt_t_s8() 160 int32_t acc_n0 = 0; in arm_nn_mat_mult_nt_t_s8() local 170 acc_n0 += lhs_vec[j] * col; in arm_nn_mat_mult_nt_t_s8() 185 … : [col] "+r"(col_base), [sum] "=Te"(sum_tmp), [row0] "+r"(lhs_vec), [out0] "=Te"(acc_n0) in arm_nn_mat_mult_nt_t_s8() 190 sum_tmp += acc_n0; in arm_nn_mat_mult_nt_t_s8() 215 int32_t acc_n0 = acc[i]; in arm_nn_mat_mult_nt_t_s8() local 216 acc_n0 = arm_nn_requantize(acc_n0, multipliers[i], shifts[i]); in arm_nn_mat_mult_nt_t_s8() [all …]
|
D | arm_nn_mat_mult_nt_t_s4.c | 77 int32_t acc_n0 = 0; in arm_nn_mat_mult_nt_t_s4() local 95 acc_n0 += ip_row_0[0] * col; in arm_nn_mat_mult_nt_t_s4() 119 acc_n0 = vmladavaq_s8(acc_n0, col_vec, lhs_vec); in arm_nn_mat_mult_nt_t_s4() 144 acc_n0 = vmladavaq_p_s8(acc_n0, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 156 int32x4_t res = {acc_n0, acc_n1, acc_n2, acc_n3}; in arm_nn_mat_mult_nt_t_s4() 183 int32_t acc_n0 = 0; in arm_nn_mat_mult_nt_t_s4() local 199 acc_n0 += ip_row_0[0] * col; in arm_nn_mat_mult_nt_t_s4() 221 acc_n0 = vmladavaq_s8(acc_n0, col_vec, lhs_vec); in arm_nn_mat_mult_nt_t_s4() 242 acc_n0 = vmladavaq_p_s8(acc_n0, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 251 int32x4_t res = {acc_n0, acc_n1, acc_n2, 0}; in arm_nn_mat_mult_nt_t_s4() [all …]
|