Home
last modified time | relevance | path

Searched refs:rhs_cols (Results 1 – 22 of 22) sorted by relevance

/cmsis-nn-3.7.0/Source/NNSupportFunctions/
Darm_nn_mat_mult_nt_t_s16.c56 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s16() argument
70 …onst int32_t rhs_cols_fast = is_int32_bias ? rhs_cols : (rhs_cols > MAX_COL_COUNT ? MAX_COL_COUNT … in arm_nn_mat_mult_nt_t_s16()
71 const int32_t rhs_cols_slow = rhs_cols - MAX_COL_COUNT; in arm_nn_mat_mult_nt_t_s16()
84 const int16_t *ip_row_1 = lhs + rhs_cols; in arm_nn_mat_mult_nt_t_s16()
85 const int16_t *ip_row_2 = lhs + (2 * rhs_cols); in arm_nn_mat_mult_nt_t_s16()
86 const int16_t *ip_row_3 = lhs + (3 * rhs_cols); in arm_nn_mat_mult_nt_t_s16()
87 const int8_t *col_base = rhs + i * rhs_cols; in arm_nn_mat_mult_nt_t_s16()
161 if (rhs_cols > MAX_COL_COUNT) in arm_nn_mat_mult_nt_t_s16()
164 ip_row_1 = lhs + rhs_cols + MAX_COL_COUNT; in arm_nn_mat_mult_nt_t_s16()
165 ip_row_2 = lhs + (2 * rhs_cols) + MAX_COL_COUNT; in arm_nn_mat_mult_nt_t_s16()
[all …]
Darm_nn_vec_mat_mul_result_acc_s8_s16.c49 const int32_t rhs_cols, in arm_nn_vec_mat_mul_result_acc_s8_s16() argument
64 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mul_result_acc_s8_s16()
68 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16()
70 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16()
72 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16()
74 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16()
81 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16()
125 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mul_result_acc_s8_s16()
128 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16()
142 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16()
[all …]
Darm_nn_vec_mat_mult_t_s16.c54 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_s16() argument
61 int32_t rhs_cols_fast = rhs_cols; in arm_nn_vec_mat_mult_t_s16()
63 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mult_t_s16()
78 const int8_t *rhs_ptr_1 = rhs + rhs_cols; in arm_nn_vec_mat_mult_t_s16()
79 const int8_t *rhs_ptr_2 = rhs + rhs_cols * 2; in arm_nn_vec_mat_mult_t_s16()
80 const int8_t *rhs_ptr_3 = rhs + rhs_cols * 3; in arm_nn_vec_mat_mult_t_s16()
117 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mult_t_s16()
119 for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++) in arm_nn_vec_mat_mult_t_s16()
162 rhs += 4 * rhs_cols; in arm_nn_vec_mat_mult_t_s16()
197 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mult_t_s16()
[all …]
Darm_nn_vec_mat_mult_t_s8.c67 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_s8() argument
87 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_s8()
91 const int8_t *rhs_1_ptr = rhs + rhs_cols; in arm_nn_vec_mat_mult_t_s8()
92 const int8_t *rhs_2_ptr = rhs + 2 * rhs_cols; in arm_nn_vec_mat_mult_t_s8()
93 const int8_t *rhs_3_ptr = rhs + 3 * rhs_cols; in arm_nn_vec_mat_mult_t_s8()
105 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_s8()
133 rhs += 4 * rhs_cols; in arm_nn_vec_mat_mult_t_s8()
142 acc += vdupq_n_s32(rhs_offset * lhs_offset) * vdupq_n_s32(rhs_cols); in arm_nn_vec_mat_mult_t_s8()
158 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_s8()
162 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_s8()
[all …]
Darm_nn_vec_mat_mul_result_acc_s16.c54 const int32_t rhs_cols, in arm_nn_vec_mat_mul_result_acc_s16() argument
70 int32_t rhs_cols_fast = rhs_cols; in arm_nn_vec_mat_mul_result_acc_s16()
72 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mul_result_acc_s16()
87 const int8_t *rhs_ptr_1 = rhs_ptr + rhs_cols; in arm_nn_vec_mat_mul_result_acc_s16()
88 const int8_t *rhs_ptr_2 = rhs_ptr + rhs_cols * 2; in arm_nn_vec_mat_mul_result_acc_s16()
89 const int8_t *rhs_ptr_3 = rhs_ptr + rhs_cols * 3; in arm_nn_vec_mat_mul_result_acc_s16()
126 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mul_result_acc_s16()
128 for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++) in arm_nn_vec_mat_mul_result_acc_s16()
167 rhs_ptr += 4 * rhs_cols; in arm_nn_vec_mat_mul_result_acc_s16()
195 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mul_result_acc_s16()
[all …]
Darm_nn_vec_mat_mult_t_svdf_s8.c56 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_svdf_s8() argument
61 if (rhs_cols < 0 || (NN_Q31_MAX - rhs_cols) < 16 || dst_offset < 0) in arm_nn_vec_mat_mult_t_svdf_s8()
75 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_svdf_s8()
79 const int8_t *rhs_1 = rhs + rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8()
80 const int8_t *rhs_2 = rhs + 2 * rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8()
86 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8()
112 rhs += 3 * rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8()
131 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_svdf_s8()
135 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8()
150 rhs += rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8()
[all …]
Darm_nn_mat_mult_nt_t_s4.c56 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s4() argument
65 const int rhs_cols_offset = rhs_cols % 16; in arm_nn_mat_mult_nt_t_s4()
66 const int32_t blk_cnt = rhs_cols >> 4; in arm_nn_mat_mult_nt_t_s4()
90 if ((rhs_cols & 0x1) & (i & 0x1)) in arm_nn_mat_mult_nt_t_s4()
194 if ((rhs_cols & 0x1) & (i & 0x1)) in arm_nn_mat_mult_nt_t_s4()
289 if ((rhs_cols & 0x1) & (i & 0x1)) in arm_nn_mat_mult_nt_t_s4()
367 const int32_t rhs_cols_int4 = rhs_cols >> 1; in arm_nn_mat_mult_nt_t_s4()
406 for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) in arm_nn_mat_mult_nt_t_s4()
409 read_and_pad_s4((const int8_t *)&packed_rhs_ptr[rhs_cols], &rhs_low1, &rhs_high1); in arm_nn_mat_mult_nt_t_s4()
433 read_and_pad_s4((const int8_t *)&packed_rhs_ptr[rhs_cols], &rhs_low1, &rhs_high1); in arm_nn_mat_mult_nt_t_s4()
[all …]
Darm_nn_mat_mult_nt_t_s8.c56 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s8() argument
80 const int8_t *col_base = rhs + i * rhs_cols; in arm_nn_mat_mult_nt_t_s8()
84 for (int j = 0; j < rhs_cols; j++) in arm_nn_mat_mult_nt_t_s8()
127 : [cnt] "r"(rhs_cols) in arm_nn_mat_mult_nt_t_s8()
162 const int8_t *col_base = rhs + i * rhs_cols; in arm_nn_mat_mult_nt_t_s8()
166 for (int j = 0; j < rhs_cols; j++) in arm_nn_mat_mult_nt_t_s8()
186 : [cnt] "r"(rhs_cols) in arm_nn_mat_mult_nt_t_s8()
227 const int32_t rhs_off0 = rhs_cols - 4; in arm_nn_mat_mult_nt_t_s8()
238 for (int32_t x = 0; x < rhs_cols; ++x) in arm_nn_mat_mult_nt_t_s8()
241 lhs_offset_contribution1 += rhs[x + rhs_cols]; in arm_nn_mat_mult_nt_t_s8()
[all …]
Darm_nn_mat_mult_nt_t_s8_s32.c53 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s8_s32() argument
58 const int32_t dst_idx_col_offset = dst_idx_offset * rhs_cols; in arm_nn_mat_mult_nt_t_s8_s32()
78 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
112 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
149 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
183 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
218 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
252 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
286 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
311 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32()
[all …]
Darm_nn_vec_mat_mult_t_s4.c61 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_s4() argument
67 const int rhs_offset = rhs_cols * row_loop_cnt; in arm_nn_vec_mat_mult_t_s4()
71 const int rhs_cols_offset = rhs_cols % 16; in arm_nn_vec_mat_mult_t_s4()
73 const int rhs_cols_offset = rhs_cols; in arm_nn_vec_mat_mult_t_s4()
86 const int32_t col_loop_cnt = rhs_cols >> 5; in arm_nn_vec_mat_mult_t_s4()
87 const int I6_elements_spill = rhs_cols & 0x10; in arm_nn_vec_mat_mult_t_s4()
180 if (rhs_cols & 1) in arm_nn_vec_mat_mult_t_s4()
317 for (int rhs_cols_idx = 0; rhs_cols_idx < (rhs_cols / 4); ++rhs_cols_idx) in arm_nn_vec_mat_mult_t_s4()
335 if (((rhs_cols % 4) == 2) || ((rhs_cols % 4) == 3)) in arm_nn_vec_mat_mult_t_s4()
357 if (rhs_cols % 2 == 1) in arm_nn_vec_mat_mult_t_s4()
[all …]
/cmsis-nn-3.7.0/Source/ConvolutionFunctions/
Darm_convolve_s8.c92 const int32_t rhs_cols = kernel_x * kernel_y * kernel_ch; in arm_convolve_s8() local
103 const int32_t remainder = rhs_cols % 4; in arm_convolve_s8()
104 const int32_t aligned_rhs_cols = remainder != 0 ? rhs_cols + 4 - remainder : rhs_cols; in arm_convolve_s8()
110 const int32_t aligned_rhs_cols_offset = aligned_rhs_cols - rhs_cols; in arm_convolve_s8()
173 rhs_cols, in arm_convolve_s8()
190 … im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); in arm_convolve_s8()
194 … im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); in arm_convolve_s8()
211 rhs_cols, in arm_convolve_s8()
227 rhs_cols, in arm_convolve_s8()
259 rhs_cols, in arm_convolve_s8()
[all …]
Darm_convolve_1x1_s4.c72 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s4() local
74 const int32_t input_inc = input_dims->w * conv_params->stride.h * rhs_cols; in arm_convolve_1x1_s4()
82 input_data = input_data_ref + (i_batch * rhs_cols * input_dims->w * input_dims->h); in arm_convolve_1x1_s4()
94 rhs_cols, in arm_convolve_1x1_s4()
99 rhs_cols * stride_w); in arm_convolve_1x1_s4()
Darm_convolve_1x1_s8.c72 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s8() local
74 const int32_t input_inc = input_dims->w * conv_params->stride.h * rhs_cols; in arm_convolve_1x1_s8()
82 input_data = input_data_ref + (i_batch * rhs_cols * input_dims->w * input_dims->h); in arm_convolve_1x1_s8()
94 rhs_cols, in arm_convolve_1x1_s8()
100 rhs_cols * stride_w); in arm_convolve_1x1_s8()
Darm_convolve_1x1_s4_fast.c74 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s4_fast() local
84 rhs_cols, in arm_convolve_1x1_s4_fast()
89 rhs_cols); in arm_convolve_1x1_s4_fast()
Darm_convolve_1x1_s8_fast.c74 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s8_fast() local
84 rhs_cols, in arm_convolve_1x1_s8_fast()
90 rhs_cols); in arm_convolve_1x1_s8_fast()
Darm_convolve_s4.c89 const int32_t rhs_cols = kernel_x * kernel_y * input_ch; in arm_convolve_s4() local
144 rhs_cols, in arm_convolve_s4()
149 rhs_cols); in arm_convolve_s4()
174 rhs_cols, in arm_convolve_s4()
179 rhs_cols); in arm_convolve_s4()
230 rhs_cols, in arm_convolve_s4()
263 if (rhs_cols % 2 && (i % 2)) in arm_convolve_s4()
271 uint16_t col_count = rhs_cols / 4; in arm_convolve_s4()
286 col_count = (rhs_cols & 0x3) >> 1; in arm_convolve_s4()
288 uint16_t col_count = rhs_cols >> 1; in arm_convolve_s4()
[all …]
Darm_convolve_s16.c79 const int32_t rhs_cols = input_ch * kernel_y * kernel_x; in arm_convolve_s16() local
147 rhs_cols, in arm_convolve_s16()
166 rhs_cols, in arm_convolve_s16()
195 rhs_cols, in arm_convolve_s16()
219 int32_t col_count = rhs_cols >> 2; in arm_convolve_s16()
236 col_count = rhs_cols & 0x3; in arm_convolve_s16()
238 uint16_t col_count = rhs_cols; in arm_convolve_s16()
Darm_convolve_get_buffer_sizes_s8.c91 const int32_t rhs_cols = filter_dims->w * filter_dims->h * input_dims->c; in arm_convolve_s8_get_buffer_size()
92 const int32_t remainder = rhs_cols % 4; in arm_convolve_s8_get_buffer_size()
93 const int32_t aligned_rhs_cols = remainder != 0 ? rhs_cols + 4 - remainder : rhs_cols; in arm_convolve_s8_get_buffer_size()
Darm_convolve_1_x_n_s8.c95 const int32_t rhs_cols = kernel_x * input_ch; in arm_convolve_1_x_n_s8() local
135 rhs_cols, in arm_convolve_1_x_n_s8()
165 rhs_cols, in arm_convolve_1_x_n_s8()
199 rhs_cols, in arm_convolve_1_x_n_s8()
Darm_convolve_get_buffer_sizes_s4.c82 const int32_t rhs_cols = filter_dims->w * filter_dims->h * input_dims->c; in arm_convolve_s4_get_buffer_size() local
83 return (2 * rhs_cols) * (int32_t)sizeof(int16_t); in arm_convolve_s4_get_buffer_size()
Darm_convolve_1_x_n_s4.c103 const int32_t rhs_cols = kernel_x * input_dims->c; in arm_convolve_1_x_n_s4() local
144 rhs_cols, in arm_convolve_1_x_n_s4()
/cmsis-nn-3.7.0/Include/
Darm_nnsupportfunctions.h457 const int32_t rhs_cols,
503 const int32_t rhs_cols,
550 const int32_t rhs_cols,
579 const int32_t rhs_cols,
611 const int32_t rhs_cols,
650 const int32_t rhs_cols,
680 const int32_t rhs_cols,
712 const int32_t rhs_cols,
1842 const int32_t rhs_cols,
1872 const int32_t rhs_cols,