/cmsis-nn-3.7.0/Source/NNSupportFunctions/ |
D | arm_nn_vec_mat_mult_t_svdf_s8.c | 95 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mult_t_svdf_s8() local 96 rhs_sum_0 = vaddvaq_s8(rhs_sum_0, ker_0); in arm_nn_vec_mat_mult_t_svdf_s8() 97 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_svdf_s8() 143 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mult_t_svdf_s8() local 144 rhs_sum_0 = vaddvaq_s8(rhs_sum_0, ker_0); in arm_nn_vec_mat_mult_t_svdf_s8() 145 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_svdf_s8() 180 int32_t vec_0, vec_1, ker_0, ker_1; in arm_nn_vec_mat_mult_t_svdf_s8() local 191 ker_0 = arm_nn_read_s8x4_ia(&rhs_0); in arm_nn_vec_mat_mult_t_svdf_s8() 192 ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); in arm_nn_vec_mat_mult_t_svdf_s8() 193 ker_0 = SXTB16(ker_0); in arm_nn_vec_mat_mult_t_svdf_s8() [all …]
|
D | arm_nn_depthwise_conv_nt_t_s4.c | 95 int32x4_t ker_0; in arm_nn_depthwise_conv_nt_t_s4() local 98 ker_0 = vldrbq_gather_offset_s32(rhs_0, gather_offset); in arm_nn_depthwise_conv_nt_t_s4() 100 ker_0 = vrshlq_m_n_s32(ker_0, 28, lower_nibble_mask); in arm_nn_depthwise_conv_nt_t_s4() 101 ker_0 = vshrq_m_n_s32(ker_0, ker_0, 24, lower_nibble_mask); in arm_nn_depthwise_conv_nt_t_s4() 103 ker_0 = vshrq_n_s32(ker_0, 4); in arm_nn_depthwise_conv_nt_t_s4() 109 ker_0 = vldrbq_s32(temp); in arm_nn_depthwise_conv_nt_t_s4() 112 ker_sum = vaddq_s32(ker_sum, ker_0); in arm_nn_depthwise_conv_nt_t_s4() 115 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s4() 118 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s4() 121 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_s4() [all …]
|
D | arm_nn_vec_mat_mult_t_s4.c | 112 const int8x16_t ker_0 = vldrbq_s8(rhs_ptr); in arm_nn_vec_mat_mult_t_s4() local 114 int8x16_t ker_low_0 = vrshlq_n_s8(ker_0, 4); in arm_nn_vec_mat_mult_t_s4() 116 int8x16_t ker_high_0 = vshrq_n_s8(ker_0, 4); in arm_nn_vec_mat_mult_t_s4() 141 int8x16_t ker_0 = vldrbq_gather_offset_s8(rhs_ptr, gather_offset); in arm_nn_vec_mat_mult_t_s4() local 142 ker_0 = vrshlq_m_n_s8(ker_0, 4, lower_nibble_mask); in arm_nn_vec_mat_mult_t_s4() 143 ker_0 = vshrq_n_s8(ker_0, 4); in arm_nn_vec_mat_mult_t_s4() 145 rhs_sum_0 = vaddvaq_s8(rhs_sum_0, ker_0); in arm_nn_vec_mat_mult_t_s4() 146 acc0 = vmladavaq_s8(acc0, ker_0, input); in arm_nn_vec_mat_mult_t_s4() 163 int8x16_t ker_0 = vldrbq_gather_offset_z_s8(rhs_ptr, gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4() local 164 ker_0 = vrshlq_m_n_s8(ker_0, 4, lower_nibble_mask); in arm_nn_vec_mat_mult_t_s4() [all …]
|
D | arm_nn_vec_mat_mul_result_acc_s8_s16.c | 90 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mul_result_acc_s8_s16() local 91 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mul_result_acc_s8_s16() 136 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mul_result_acc_s8_s16() local 137 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mul_result_acc_s8_s16() 174 int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_0); in arm_nn_vec_mat_mul_result_acc_s8_s16() local 175 int32_t ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); in arm_nn_vec_mat_mul_result_acc_s8_s16() 176 ker_0 = SXTB16(ker_0); in arm_nn_vec_mat_mul_result_acc_s8_s16() 179 acc_0 = SMLAD(ker_0, vec_0, acc_0); in arm_nn_vec_mat_mul_result_acc_s8_s16() 181 ker_0 = arm_nn_read_s8x4_ia(&rhs_1); in arm_nn_vec_mat_mul_result_acc_s8_s16() 182 ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); in arm_nn_vec_mat_mul_result_acc_s8_s16() [all …]
|
D | arm_nn_vec_mat_mult_t_s8.c | 115 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0_ptr, p); in arm_nn_vec_mat_mult_t_s8() local 116 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_s8() 171 const int8x16_t ker_0 = vldrbq_z_s8(rhs_ptr, p); in arm_nn_vec_mat_mult_t_s8() local 172 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_s8() 232 int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_0_ptr); in arm_nn_vec_mat_mult_t_s8() 233 int32_t ker_1 = SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); in arm_nn_vec_mat_mult_t_s8() 234 ker_0 = SXTAB16(rhs_offset_s16x2, ker_0); in arm_nn_vec_mat_mult_t_s8() 237 acc_0 = SMLAD(ker_0, vec_0, acc_0); in arm_nn_vec_mat_mult_t_s8() 239 ker_0 = arm_nn_read_s8x4_ia(&rhs_1_ptr); in arm_nn_vec_mat_mult_t_s8() 240 ker_1 = SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); in arm_nn_vec_mat_mult_t_s8() [all …]
|
D | arm_nn_depthwise_conv_nt_t_s8.c | 88 const int32x4_t ker_0 = vldrbq_s32(rhs_0); in arm_nn_depthwise_conv_nt_t_s8() local 89 ker_sum = vaddq_s32(ker_sum, ker_0); in arm_nn_depthwise_conv_nt_t_s8() 92 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s8() 95 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s8() 98 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_s8() 101 out_3 += vmulq_s32(ip_3, ker_0); in arm_nn_depthwise_conv_nt_t_s8()
|
D | arm_nn_depthwise_conv_nt_t_padded_s8.c | 96 const int32x4_t ker_0 = vldrbq_s32(rhs_0); in arm_nn_depthwise_conv_nt_t_padded_s8() local 100 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8() 104 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8() 108 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8() 113 out_3 += vmulq_s32(ip_3, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8()
|
D | arm_nn_depthwise_conv_nt_t_s16.c | 80 const int32x4_t ker_0 = vldrbq_s32(rhs_0); in arm_nn_depthwise_conv_nt_t_s16() local 83 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s16() 86 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s16() 89 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_s16() 92 out_3 += vmulq_s32(ip_3, ker_0); in arm_nn_depthwise_conv_nt_t_s16()
|
D | arm_nn_vec_mat_mult_t_s16.c | 237 int32_t ker_0, ker_1, vec_part_0, vec_part_1; in arm_nn_vec_mat_mult_t_s16() local 242 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mult_t_s16() 244 acc_0 = SMLAD(ker_0, vec_part_0, acc_0); in arm_nn_vec_mat_mult_t_s16() 247 rhs_1 = read_and_pad(rhs_1, &ker_0, &ker_1); in arm_nn_vec_mat_mult_t_s16() 249 acc_1 = SMLAD(ker_0, vec_part_0, acc_1); in arm_nn_vec_mat_mult_t_s16() 295 int32_t ker_0, ker_1, vec; in arm_nn_vec_mat_mult_t_s16() local 296 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mult_t_s16() 299 acc_0 = SMLAD(ker_0, vec, acc_0); in arm_nn_vec_mat_mult_t_s16()
|
D | arm_nn_vec_mat_mul_result_acc_s16.c | 237 int32_t ker_0, ker_1, vec_part_0, vec_part_1; in arm_nn_vec_mat_mul_result_acc_s16() local 242 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mul_result_acc_s16() 244 acc_0 = SMLAD(ker_0, vec_part_0, acc_0); in arm_nn_vec_mat_mul_result_acc_s16() 247 rhs_1 = read_and_pad(rhs_1, &ker_0, &ker_1); in arm_nn_vec_mat_mul_result_acc_s16() 249 acc_1 = SMLAD(ker_0, vec_part_0, acc_1); in arm_nn_vec_mat_mul_result_acc_s16() 294 int32_t ker_0, ker_1, vec; in arm_nn_vec_mat_mul_result_acc_s16() local 295 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mul_result_acc_s16() 298 acc_0 = SMLAD(ker_0, vec, acc_0); in arm_nn_vec_mat_mul_result_acc_s16()
|
/cmsis-nn-3.7.0/Source/FullyConnectedFunctions/ |
D | arm_vector_sum_s8.c | 88 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8() local 89 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8() 130 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8() local 131 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8()
|
D | arm_vector_sum_s8_s64.c | 87 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8_s64() local 88 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8_s64() 129 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8_s64() local 130 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8_s64()
|
/cmsis-nn-3.7.0/Source/ConvolutionFunctions/ |
D | arm_nn_depthwise_conv_s8_core.c | 74 int32x4_t ker_0 = vldrbq_s32(row_0); in arm_nn_depthwise_conv_s8_core() local 88 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core() 89 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_s8_core() 108 ker_0 = vldrbq_s32(row_0); in arm_nn_depthwise_conv_s8_core() 120 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core() 121 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_s8_core() 130 ker_0 = vldrbq_s32(row_0); in arm_nn_depthwise_conv_s8_core()
|
D | arm_depthwise_conv_s4_opt.c | 187 int32x4_t ker_0; in arm_depthwise_conv_s4_opt() local 190 ker_0 = vldrbq_gather_offset_s32(row_0, gather_offset); in arm_depthwise_conv_s4_opt() 191 ker_0 = vrshlq_m_n_s32(ker_0, 28, lower_nibble_mask); in arm_depthwise_conv_s4_opt() 192 ker_0 = vshrq_m_n_s32(ker_0, ker_0, 24, lower_nibble_mask); in arm_depthwise_conv_s4_opt() 194 ker_0 = vshrq_n_s32(ker_0, 4); in arm_depthwise_conv_s4_opt() 202 ker_0 = vldrbq_s32(temp); in arm_depthwise_conv_s4_opt() 207 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s4_opt() 218 int32x4_t ker_0 = vldrbq_gather_offset_s32(row_0, gather_offset); in arm_depthwise_conv_s4_opt() local 219 ker_0 = vrshlq_m_n_s32(ker_0, 28, lower_nibble_mask); in arm_depthwise_conv_s4_opt() 220 ker_0 = vshrq_m_n_s32(ker_0, ker_0, 24, lower_nibble_mask); in arm_depthwise_conv_s4_opt() [all …]
|
D | arm_depthwise_conv_s8_opt.c | 182 const int32x4_t ker_0 = vldrbq_s32(row_0); in arm_depthwise_conv_s8_opt() local 185 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s8_opt()
|
D | arm_depthwise_conv_fast_s16.c | 169 const int32x4_t ker_0 = vldrbq_s32(row_0); in arm_depthwise_conv_fast_s16() local 172 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_fast_s16()
|