Home
last modified time | relevance | path

Searched refs:ker_0 (Results 1 – 16 of 16) sorted by relevance

/cmsis-nn-3.7.0/Source/NNSupportFunctions/
Darm_nn_vec_mat_mult_t_svdf_s8.c95 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mult_t_svdf_s8() local
96 rhs_sum_0 = vaddvaq_s8(rhs_sum_0, ker_0); in arm_nn_vec_mat_mult_t_svdf_s8()
97 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_svdf_s8()
143 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mult_t_svdf_s8() local
144 rhs_sum_0 = vaddvaq_s8(rhs_sum_0, ker_0); in arm_nn_vec_mat_mult_t_svdf_s8()
145 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_svdf_s8()
180 int32_t vec_0, vec_1, ker_0, ker_1; in arm_nn_vec_mat_mult_t_svdf_s8() local
191 ker_0 = arm_nn_read_s8x4_ia(&rhs_0); in arm_nn_vec_mat_mult_t_svdf_s8()
192 ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); in arm_nn_vec_mat_mult_t_svdf_s8()
193 ker_0 = SXTB16(ker_0); in arm_nn_vec_mat_mult_t_svdf_s8()
[all …]
Darm_nn_depthwise_conv_nt_t_s4.c95 int32x4_t ker_0; in arm_nn_depthwise_conv_nt_t_s4() local
98 ker_0 = vldrbq_gather_offset_s32(rhs_0, gather_offset); in arm_nn_depthwise_conv_nt_t_s4()
100 ker_0 = vrshlq_m_n_s32(ker_0, 28, lower_nibble_mask); in arm_nn_depthwise_conv_nt_t_s4()
101 ker_0 = vshrq_m_n_s32(ker_0, ker_0, 24, lower_nibble_mask); in arm_nn_depthwise_conv_nt_t_s4()
103 ker_0 = vshrq_n_s32(ker_0, 4); in arm_nn_depthwise_conv_nt_t_s4()
109 ker_0 = vldrbq_s32(temp); in arm_nn_depthwise_conv_nt_t_s4()
112 ker_sum = vaddq_s32(ker_sum, ker_0); in arm_nn_depthwise_conv_nt_t_s4()
115 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s4()
118 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s4()
121 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_s4()
[all …]
Darm_nn_vec_mat_mult_t_s4.c112 const int8x16_t ker_0 = vldrbq_s8(rhs_ptr); in arm_nn_vec_mat_mult_t_s4() local
114 int8x16_t ker_low_0 = vrshlq_n_s8(ker_0, 4); in arm_nn_vec_mat_mult_t_s4()
116 int8x16_t ker_high_0 = vshrq_n_s8(ker_0, 4); in arm_nn_vec_mat_mult_t_s4()
141 int8x16_t ker_0 = vldrbq_gather_offset_s8(rhs_ptr, gather_offset); in arm_nn_vec_mat_mult_t_s4() local
142 ker_0 = vrshlq_m_n_s8(ker_0, 4, lower_nibble_mask); in arm_nn_vec_mat_mult_t_s4()
143 ker_0 = vshrq_n_s8(ker_0, 4); in arm_nn_vec_mat_mult_t_s4()
145 rhs_sum_0 = vaddvaq_s8(rhs_sum_0, ker_0); in arm_nn_vec_mat_mult_t_s4()
146 acc0 = vmladavaq_s8(acc0, ker_0, input); in arm_nn_vec_mat_mult_t_s4()
163 int8x16_t ker_0 = vldrbq_gather_offset_z_s8(rhs_ptr, gather_offset, rmdr_mask); in arm_nn_vec_mat_mult_t_s4() local
164 ker_0 = vrshlq_m_n_s8(ker_0, 4, lower_nibble_mask); in arm_nn_vec_mat_mult_t_s4()
[all …]
Darm_nn_vec_mat_mul_result_acc_s8_s16.c90 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mul_result_acc_s8_s16() local
91 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mul_result_acc_s8_s16()
136 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); in arm_nn_vec_mat_mul_result_acc_s8_s16() local
137 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mul_result_acc_s8_s16()
174 int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_0); in arm_nn_vec_mat_mul_result_acc_s8_s16() local
175 int32_t ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); in arm_nn_vec_mat_mul_result_acc_s8_s16()
176 ker_0 = SXTB16(ker_0); in arm_nn_vec_mat_mul_result_acc_s8_s16()
179 acc_0 = SMLAD(ker_0, vec_0, acc_0); in arm_nn_vec_mat_mul_result_acc_s8_s16()
181 ker_0 = arm_nn_read_s8x4_ia(&rhs_1); in arm_nn_vec_mat_mul_result_acc_s8_s16()
182 ker_1 = SXTB16_RORn((uint32_t)ker_0, 8); in arm_nn_vec_mat_mul_result_acc_s8_s16()
[all …]
Darm_nn_vec_mat_mult_t_s8.c115 const int8x16_t ker_0 = vldrbq_z_s8(rhs_0_ptr, p); in arm_nn_vec_mat_mult_t_s8() local
116 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_s8()
171 const int8x16_t ker_0 = vldrbq_z_s8(rhs_ptr, p); in arm_nn_vec_mat_mult_t_s8() local
172 acc_0 = vmladavaq_s8(acc_0, ker_0, input); in arm_nn_vec_mat_mult_t_s8()
232 int32_t ker_0 = arm_nn_read_s8x4_ia(&rhs_0_ptr); in arm_nn_vec_mat_mult_t_s8()
233 int32_t ker_1 = SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); in arm_nn_vec_mat_mult_t_s8()
234 ker_0 = SXTAB16(rhs_offset_s16x2, ker_0); in arm_nn_vec_mat_mult_t_s8()
237 acc_0 = SMLAD(ker_0, vec_0, acc_0); in arm_nn_vec_mat_mult_t_s8()
239 ker_0 = arm_nn_read_s8x4_ia(&rhs_1_ptr); in arm_nn_vec_mat_mult_t_s8()
240 ker_1 = SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); in arm_nn_vec_mat_mult_t_s8()
[all …]
Darm_nn_depthwise_conv_nt_t_s8.c88 const int32x4_t ker_0 = vldrbq_s32(rhs_0); in arm_nn_depthwise_conv_nt_t_s8() local
89 ker_sum = vaddq_s32(ker_sum, ker_0); in arm_nn_depthwise_conv_nt_t_s8()
92 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s8()
95 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s8()
98 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_s8()
101 out_3 += vmulq_s32(ip_3, ker_0); in arm_nn_depthwise_conv_nt_t_s8()
Darm_nn_depthwise_conv_nt_t_padded_s8.c96 const int32x4_t ker_0 = vldrbq_s32(rhs_0); in arm_nn_depthwise_conv_nt_t_padded_s8() local
100 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8()
104 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8()
108 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8()
113 out_3 += vmulq_s32(ip_3, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8()
Darm_nn_depthwise_conv_nt_t_s16.c80 const int32x4_t ker_0 = vldrbq_s32(rhs_0); in arm_nn_depthwise_conv_nt_t_s16() local
83 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s16()
86 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s16()
89 out_2 += vmulq_s32(ip_2, ker_0); in arm_nn_depthwise_conv_nt_t_s16()
92 out_3 += vmulq_s32(ip_3, ker_0); in arm_nn_depthwise_conv_nt_t_s16()
Darm_nn_vec_mat_mult_t_s16.c237 int32_t ker_0, ker_1, vec_part_0, vec_part_1; in arm_nn_vec_mat_mult_t_s16() local
242 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mult_t_s16()
244 acc_0 = SMLAD(ker_0, vec_part_0, acc_0); in arm_nn_vec_mat_mult_t_s16()
247 rhs_1 = read_and_pad(rhs_1, &ker_0, &ker_1); in arm_nn_vec_mat_mult_t_s16()
249 acc_1 = SMLAD(ker_0, vec_part_0, acc_1); in arm_nn_vec_mat_mult_t_s16()
295 int32_t ker_0, ker_1, vec; in arm_nn_vec_mat_mult_t_s16() local
296 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mult_t_s16()
299 acc_0 = SMLAD(ker_0, vec, acc_0); in arm_nn_vec_mat_mult_t_s16()
Darm_nn_vec_mat_mul_result_acc_s16.c237 int32_t ker_0, ker_1, vec_part_0, vec_part_1; in arm_nn_vec_mat_mul_result_acc_s16() local
242 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mul_result_acc_s16()
244 acc_0 = SMLAD(ker_0, vec_part_0, acc_0); in arm_nn_vec_mat_mul_result_acc_s16()
247 rhs_1 = read_and_pad(rhs_1, &ker_0, &ker_1); in arm_nn_vec_mat_mul_result_acc_s16()
249 acc_1 = SMLAD(ker_0, vec_part_0, acc_1); in arm_nn_vec_mat_mul_result_acc_s16()
294 int32_t ker_0, ker_1, vec; in arm_nn_vec_mat_mul_result_acc_s16() local
295 rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); in arm_nn_vec_mat_mul_result_acc_s16()
298 acc_0 = SMLAD(ker_0, vec, acc_0); in arm_nn_vec_mat_mul_result_acc_s16()
/cmsis-nn-3.7.0/Source/FullyConnectedFunctions/
Darm_vector_sum_s8.c88 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8() local
89 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8()
130 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8() local
131 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8()
Darm_vector_sum_s8_s64.c87 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8_s64() local
88 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8_s64()
129 const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); in arm_vector_sum_s8_s64() local
130 vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); in arm_vector_sum_s8_s64()
/cmsis-nn-3.7.0/Source/ConvolutionFunctions/
Darm_nn_depthwise_conv_s8_core.c74 int32x4_t ker_0 = vldrbq_s32(row_0); in arm_nn_depthwise_conv_s8_core() local
88 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core()
89 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_s8_core()
108 ker_0 = vldrbq_s32(row_0); in arm_nn_depthwise_conv_s8_core()
120 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core()
121 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_s8_core()
130 ker_0 = vldrbq_s32(row_0); in arm_nn_depthwise_conv_s8_core()
Darm_depthwise_conv_s4_opt.c187 int32x4_t ker_0; in arm_depthwise_conv_s4_opt() local
190 ker_0 = vldrbq_gather_offset_s32(row_0, gather_offset); in arm_depthwise_conv_s4_opt()
191 ker_0 = vrshlq_m_n_s32(ker_0, 28, lower_nibble_mask); in arm_depthwise_conv_s4_opt()
192 ker_0 = vshrq_m_n_s32(ker_0, ker_0, 24, lower_nibble_mask); in arm_depthwise_conv_s4_opt()
194 ker_0 = vshrq_n_s32(ker_0, 4); in arm_depthwise_conv_s4_opt()
202 ker_0 = vldrbq_s32(temp); in arm_depthwise_conv_s4_opt()
207 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s4_opt()
218 int32x4_t ker_0 = vldrbq_gather_offset_s32(row_0, gather_offset); in arm_depthwise_conv_s4_opt() local
219 ker_0 = vrshlq_m_n_s32(ker_0, 28, lower_nibble_mask); in arm_depthwise_conv_s4_opt()
220 ker_0 = vshrq_m_n_s32(ker_0, ker_0, 24, lower_nibble_mask); in arm_depthwise_conv_s4_opt()
[all …]
Darm_depthwise_conv_s8_opt.c182 const int32x4_t ker_0 = vldrbq_s32(row_0); in arm_depthwise_conv_s8_opt() local
185 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s8_opt()
Darm_depthwise_conv_fast_s16.c169 const int32x4_t ker_0 = vldrbq_s32(row_0); in arm_depthwise_conv_fast_s16() local
172 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_fast_s16()