Home
last modified time | relevance | path

Searched refs:rmdr_mask (Results 1 – 3 of 3) sorted by relevance

/cmsis-nn-latest/Source/NNSupportFunctions/
Darm_nn_mat_mul_core_1x_s4.c77 mve_pred16_t rmdr_mask = vctp8q((uint32_t)j); in arm_nn_mat_mul_core_1x_s4() local
78 int8x16_t col_vec = vldrbq_gather_offset_z_s8(col_base, gather_offset, rmdr_mask); in arm_nn_mat_mul_core_1x_s4()
80 col_vec = vrshlq_m_n_s8(col_vec, 4, (lower_nibble_mask & rmdr_mask)); in arm_nn_mat_mul_core_1x_s4()
83 sum_tmp = vaddvaq_p_s8(sum_tmp, col_vec, rmdr_mask); in arm_nn_mat_mul_core_1x_s4()
85 int8x16_t lhs_vec = vldrbq_z_s8(row_base, rmdr_mask); in arm_nn_mat_mul_core_1x_s4()
88 acc_n0 = vmladavaq_p_s8(acc_n0, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mul_core_1x_s4()
Darm_nn_mat_mult_nt_t_s4.c88 mve_pred16_t rmdr_mask = vctp8q(rhs_cols_offset); in arm_nn_mat_mult_nt_t_s4() local
92 rmdr_mask >>= 1; in arm_nn_mat_mult_nt_t_s4()
134 if (rmdr_mask) in arm_nn_mat_mult_nt_t_s4()
136 int8x16_t col_vec = vldrbq_gather_offset_z_s8(col_base, gather_offset, rmdr_mask); in arm_nn_mat_mult_nt_t_s4()
141 sum_tmp = vaddvaq_p_s8(sum_tmp, col_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4()
143 int8x16_t lhs_vec = vldrbq_z_s8(ip_row_0, rmdr_mask); in arm_nn_mat_mult_nt_t_s4()
144 acc_n0 = vmladavaq_p_s8(acc_n0, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4()
146 lhs_vec = vldrbq_z_s8(ip_row_1, rmdr_mask); in arm_nn_mat_mult_nt_t_s4()
147 acc_n1 = vmladavaq_p_s8(acc_n1, col_vec, lhs_vec, rmdr_mask); in arm_nn_mat_mult_nt_t_s4()
149 lhs_vec = vldrbq_z_s8(ip_row_2, rmdr_mask); in arm_nn_mat_mult_nt_t_s4()
[all …]
Darm_nn_vec_mat_mult_t_s4.c94 mve_pred16_t rmdr_mask = vctp8q(rhs_cols_offset); in arm_nn_vec_mat_mult_t_s4() local
159 if (rmdr_mask) in arm_nn_vec_mat_mult_t_s4()
161 const int8x16_t input = vldrbq_z_s8(lhs_ptr, rmdr_mask); in arm_nn_vec_mat_mult_t_s4()
163 int8x16_t ker_0 = vldrbq_gather_offset_z_s8(rhs_ptr, gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4()
170 … int8x16_t ker_1 = vldrbq_gather_offset_z_s8(&rhs_ptr[rhs_offset], gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4()
191 rmdr_mask >>= 1; in arm_nn_vec_mat_mult_t_s4()
266 if (rmdr_mask) in arm_nn_vec_mat_mult_t_s4()
268 const int8x16_t input = vldrbq_z_s8(lhs_ptr, rmdr_mask); in arm_nn_vec_mat_mult_t_s4()
270 int8x16_t ker_0 = vldrbq_gather_offset_z_s8(rhs_ptr, gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4()
277 … int8x16_t ker_1 = vldrbq_gather_offset_z_s8(&rhs_ptr[rhs_offset], gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4()