Searched refs:rmdr_mask (Results 1 – 3 of 3) sorted by relevance
/cmsis-nn-latest/Source/NNSupportFunctions/ |
D | arm_nn_mat_mul_core_1x_s4.c | 77 mve_pred16_t rmdr_mask = vctp8q((uint32_t)j); in arm_nn_mat_mul_core_1x_s4() local 78 int8x16_t col_vec = vldrbq_gather_offset_z_s8(col_base, gather_offset, rmdr_mask); in arm_nn_mat_mul_core_1x_s4() 80 col_vec = vrshlq_m_n_s8(col_vec, 4, (lower_nibble_mask & rmdr_mask)); in arm_nn_mat_mul_core_1x_s4() 83 sum_tmp = vaddvaq_p_s8(sum_tmp, col_vec, rmdr_mask); in arm_nn_mat_mul_core_1x_s4() 85 int8x16_t lhs_vec = vldrbq_z_s8(row_base, rmdr_mask); in arm_nn_mat_mul_core_1x_s4() 88 acc_n0 = vmladavaq_p_s8(acc_n0, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mul_core_1x_s4()
|
D | arm_nn_mat_mult_nt_t_s4.c | 88 mve_pred16_t rmdr_mask = vctp8q(rhs_cols_offset); in arm_nn_mat_mult_nt_t_s4() local 92 rmdr_mask >>= 1; in arm_nn_mat_mult_nt_t_s4() 134 if (rmdr_mask) in arm_nn_mat_mult_nt_t_s4() 136 int8x16_t col_vec = vldrbq_gather_offset_z_s8(col_base, gather_offset, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 141 sum_tmp = vaddvaq_p_s8(sum_tmp, col_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 143 int8x16_t lhs_vec = vldrbq_z_s8(ip_row_0, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 144 acc_n0 = vmladavaq_p_s8(acc_n0, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 146 lhs_vec = vldrbq_z_s8(ip_row_1, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 147 acc_n1 = vmladavaq_p_s8(acc_n1, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() 149 lhs_vec = vldrbq_z_s8(ip_row_2, rmdr_mask); in arm_nn_mat_mult_nt_t_s4() [all …]
|
D | arm_nn_vec_mat_mult_t_s4.c | 94 mve_pred16_t rmdr_mask = vctp8q(rhs_cols_offset); in arm_nn_vec_mat_mult_t_s4() local 159 if (rmdr_mask) in arm_nn_vec_mat_mult_t_s4() 161 const int8x16_t input = vldrbq_z_s8(lhs_ptr, rmdr_mask); in arm_nn_vec_mat_mult_t_s4() 163 int8x16_t ker_0 = vldrbq_gather_offset_z_s8(rhs_ptr, gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4() 170 … int8x16_t ker_1 = vldrbq_gather_offset_z_s8(&rhs_ptr[rhs_offset], gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4() 191 rmdr_mask >>= 1; in arm_nn_vec_mat_mult_t_s4() 266 if (rmdr_mask) in arm_nn_vec_mat_mult_t_s4() 268 const int8x16_t input = vldrbq_z_s8(lhs_ptr, rmdr_mask); in arm_nn_vec_mat_mult_t_s4() 270 int8x16_t ker_0 = vldrbq_gather_offset_z_s8(rhs_ptr, gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4() 277 … int8x16_t ker_1 = vldrbq_gather_offset_z_s8(&rhs_ptr[rhs_offset], gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4()
|