/cmsis-nn-3.7.0/Source/NNSupportFunctions/ |
D | arm_nn_mat_mult_nt_t_s16.c | 56 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s16() argument 70 …onst int32_t rhs_cols_fast = is_int32_bias ? rhs_cols : (rhs_cols > MAX_COL_COUNT ? MAX_COL_COUNT … in arm_nn_mat_mult_nt_t_s16() 71 const int32_t rhs_cols_slow = rhs_cols - MAX_COL_COUNT; in arm_nn_mat_mult_nt_t_s16() 84 const int16_t *ip_row_1 = lhs + rhs_cols; in arm_nn_mat_mult_nt_t_s16() 85 const int16_t *ip_row_2 = lhs + (2 * rhs_cols); in arm_nn_mat_mult_nt_t_s16() 86 const int16_t *ip_row_3 = lhs + (3 * rhs_cols); in arm_nn_mat_mult_nt_t_s16() 87 const int8_t *col_base = rhs + i * rhs_cols; in arm_nn_mat_mult_nt_t_s16() 161 if (rhs_cols > MAX_COL_COUNT) in arm_nn_mat_mult_nt_t_s16() 164 ip_row_1 = lhs + rhs_cols + MAX_COL_COUNT; in arm_nn_mat_mult_nt_t_s16() 165 ip_row_2 = lhs + (2 * rhs_cols) + MAX_COL_COUNT; in arm_nn_mat_mult_nt_t_s16() [all …]
|
D | arm_nn_vec_mat_mul_result_acc_s8_s16.c | 49 const int32_t rhs_cols, in arm_nn_vec_mat_mul_result_acc_s8_s16() argument 64 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mul_result_acc_s8_s16() 68 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16() 70 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16() 72 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16() 74 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16() 81 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16() 125 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mul_result_acc_s8_s16() 128 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16() 142 rhs_ptr += rhs_cols; in arm_nn_vec_mat_mul_result_acc_s8_s16() [all …]
|
D | arm_nn_vec_mat_mult_t_s16.c | 54 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_s16() argument 61 int32_t rhs_cols_fast = rhs_cols; in arm_nn_vec_mat_mult_t_s16() 63 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mult_t_s16() 78 const int8_t *rhs_ptr_1 = rhs + rhs_cols; in arm_nn_vec_mat_mult_t_s16() 79 const int8_t *rhs_ptr_2 = rhs + rhs_cols * 2; in arm_nn_vec_mat_mult_t_s16() 80 const int8_t *rhs_ptr_3 = rhs + rhs_cols * 3; in arm_nn_vec_mat_mult_t_s16() 117 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mult_t_s16() 119 for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++) in arm_nn_vec_mat_mult_t_s16() 162 rhs += 4 * rhs_cols; in arm_nn_vec_mat_mult_t_s16() 197 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mult_t_s16() [all …]
|
D | arm_nn_vec_mat_mult_t_s8.c | 67 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_s8() argument 87 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_s8() 91 const int8_t *rhs_1_ptr = rhs + rhs_cols; in arm_nn_vec_mat_mult_t_s8() 92 const int8_t *rhs_2_ptr = rhs + 2 * rhs_cols; in arm_nn_vec_mat_mult_t_s8() 93 const int8_t *rhs_3_ptr = rhs + 3 * rhs_cols; in arm_nn_vec_mat_mult_t_s8() 105 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_s8() 133 rhs += 4 * rhs_cols; in arm_nn_vec_mat_mult_t_s8() 142 acc += vdupq_n_s32(rhs_offset * lhs_offset) * vdupq_n_s32(rhs_cols); in arm_nn_vec_mat_mult_t_s8() 158 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_s8() 162 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_s8() [all …]
|
D | arm_nn_vec_mat_mul_result_acc_s16.c | 54 const int32_t rhs_cols, in arm_nn_vec_mat_mul_result_acc_s16() argument 70 int32_t rhs_cols_fast = rhs_cols; in arm_nn_vec_mat_mul_result_acc_s16() 72 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mul_result_acc_s16() 87 const int8_t *rhs_ptr_1 = rhs_ptr + rhs_cols; in arm_nn_vec_mat_mul_result_acc_s16() 88 const int8_t *rhs_ptr_2 = rhs_ptr + rhs_cols * 2; in arm_nn_vec_mat_mul_result_acc_s16() 89 const int8_t *rhs_ptr_3 = rhs_ptr + rhs_cols * 3; in arm_nn_vec_mat_mul_result_acc_s16() 126 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mul_result_acc_s16() 128 for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++) in arm_nn_vec_mat_mul_result_acc_s16() 167 rhs_ptr += 4 * rhs_cols; in arm_nn_vec_mat_mul_result_acc_s16() 195 if (rhs_cols > MAX_COL_COUNT) in arm_nn_vec_mat_mul_result_acc_s16() [all …]
|
D | arm_nn_vec_mat_mult_t_svdf_s8.c | 56 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_svdf_s8() argument 61 if (rhs_cols < 0 || (NN_Q31_MAX - rhs_cols) < 16 || dst_offset < 0) in arm_nn_vec_mat_mult_t_svdf_s8() 75 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_svdf_s8() 79 const int8_t *rhs_1 = rhs + rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8() 80 const int8_t *rhs_2 = rhs + 2 * rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8() 86 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8() 112 rhs += 3 * rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8() 131 const int32_t col_loop_cnt = (rhs_cols + 15) / 16; in arm_nn_vec_mat_mult_t_svdf_s8() 135 uint32_t col_cnt = (uint32_t)rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8() 150 rhs += rhs_cols; in arm_nn_vec_mat_mult_t_svdf_s8() [all …]
|
D | arm_nn_mat_mult_nt_t_s4.c | 56 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s4() argument 65 const int rhs_cols_offset = rhs_cols % 16; in arm_nn_mat_mult_nt_t_s4() 66 const int32_t blk_cnt = rhs_cols >> 4; in arm_nn_mat_mult_nt_t_s4() 90 if ((rhs_cols & 0x1) & (i & 0x1)) in arm_nn_mat_mult_nt_t_s4() 194 if ((rhs_cols & 0x1) & (i & 0x1)) in arm_nn_mat_mult_nt_t_s4() 289 if ((rhs_cols & 0x1) & (i & 0x1)) in arm_nn_mat_mult_nt_t_s4() 367 const int32_t rhs_cols_int4 = rhs_cols >> 1; in arm_nn_mat_mult_nt_t_s4() 406 for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) in arm_nn_mat_mult_nt_t_s4() 409 read_and_pad_s4((const int8_t *)&packed_rhs_ptr[rhs_cols], &rhs_low1, &rhs_high1); in arm_nn_mat_mult_nt_t_s4() 433 read_and_pad_s4((const int8_t *)&packed_rhs_ptr[rhs_cols], &rhs_low1, &rhs_high1); in arm_nn_mat_mult_nt_t_s4() [all …]
|
D | arm_nn_mat_mult_nt_t_s8.c | 56 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s8() argument 80 const int8_t *col_base = rhs + i * rhs_cols; in arm_nn_mat_mult_nt_t_s8() 84 for (int j = 0; j < rhs_cols; j++) in arm_nn_mat_mult_nt_t_s8() 127 : [cnt] "r"(rhs_cols) in arm_nn_mat_mult_nt_t_s8() 162 const int8_t *col_base = rhs + i * rhs_cols; in arm_nn_mat_mult_nt_t_s8() 166 for (int j = 0; j < rhs_cols; j++) in arm_nn_mat_mult_nt_t_s8() 186 : [cnt] "r"(rhs_cols) in arm_nn_mat_mult_nt_t_s8() 227 const int32_t rhs_off0 = rhs_cols - 4; in arm_nn_mat_mult_nt_t_s8() 238 for (int32_t x = 0; x < rhs_cols; ++x) in arm_nn_mat_mult_nt_t_s8() 241 lhs_offset_contribution1 += rhs[x + rhs_cols]; in arm_nn_mat_mult_nt_t_s8() [all …]
|
D | arm_nn_mat_mult_nt_t_s8_s32.c | 53 const int32_t rhs_cols, in arm_nn_mat_mult_nt_t_s8_s32() argument 58 const int32_t dst_idx_col_offset = dst_idx_offset * rhs_cols; in arm_nn_mat_mult_nt_t_s8_s32() 78 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() 112 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() 149 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() 183 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() 218 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() 252 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() 286 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() 311 for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) in arm_nn_mat_mult_nt_t_s8_s32() [all …]
|
D | arm_nn_vec_mat_mult_t_s4.c | 61 const int32_t rhs_cols, in arm_nn_vec_mat_mult_t_s4() argument 67 const int rhs_offset = rhs_cols * row_loop_cnt; in arm_nn_vec_mat_mult_t_s4() 71 const int rhs_cols_offset = rhs_cols % 16; in arm_nn_vec_mat_mult_t_s4() 73 const int rhs_cols_offset = rhs_cols; in arm_nn_vec_mat_mult_t_s4() 86 const int32_t col_loop_cnt = rhs_cols >> 5; in arm_nn_vec_mat_mult_t_s4() 87 const int I6_elements_spill = rhs_cols & 0x10; in arm_nn_vec_mat_mult_t_s4() 180 if (rhs_cols & 1) in arm_nn_vec_mat_mult_t_s4() 317 for (int rhs_cols_idx = 0; rhs_cols_idx < (rhs_cols / 4); ++rhs_cols_idx) in arm_nn_vec_mat_mult_t_s4() 335 if (((rhs_cols % 4) == 2) || ((rhs_cols % 4) == 3)) in arm_nn_vec_mat_mult_t_s4() 357 if (rhs_cols % 2 == 1) in arm_nn_vec_mat_mult_t_s4() [all …]
|
/cmsis-nn-3.7.0/Source/ConvolutionFunctions/ |
D | arm_convolve_s8.c | 92 const int32_t rhs_cols = kernel_x * kernel_y * kernel_ch; in arm_convolve_s8() local 103 const int32_t remainder = rhs_cols % 4; in arm_convolve_s8() 104 const int32_t aligned_rhs_cols = remainder != 0 ? rhs_cols + 4 - remainder : rhs_cols; in arm_convolve_s8() 110 const int32_t aligned_rhs_cols_offset = aligned_rhs_cols - rhs_cols; in arm_convolve_s8() 173 rhs_cols, in arm_convolve_s8() 190 … im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); in arm_convolve_s8() 194 … im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); in arm_convolve_s8() 211 rhs_cols, in arm_convolve_s8() 227 rhs_cols, in arm_convolve_s8() 259 rhs_cols, in arm_convolve_s8() [all …]
|
D | arm_convolve_1x1_s4.c | 72 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s4() local 74 const int32_t input_inc = input_dims->w * conv_params->stride.h * rhs_cols; in arm_convolve_1x1_s4() 82 input_data = input_data_ref + (i_batch * rhs_cols * input_dims->w * input_dims->h); in arm_convolve_1x1_s4() 94 rhs_cols, in arm_convolve_1x1_s4() 99 rhs_cols * stride_w); in arm_convolve_1x1_s4()
|
D | arm_convolve_1x1_s8.c | 72 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s8() local 74 const int32_t input_inc = input_dims->w * conv_params->stride.h * rhs_cols; in arm_convolve_1x1_s8() 82 input_data = input_data_ref + (i_batch * rhs_cols * input_dims->w * input_dims->h); in arm_convolve_1x1_s8() 94 rhs_cols, in arm_convolve_1x1_s8() 100 rhs_cols * stride_w); in arm_convolve_1x1_s8()
|
D | arm_convolve_1x1_s4_fast.c | 74 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s4_fast() local 84 rhs_cols, in arm_convolve_1x1_s4_fast() 89 rhs_cols); in arm_convolve_1x1_s4_fast()
|
D | arm_convolve_1x1_s8_fast.c | 74 const int32_t rhs_cols = input_dims->c; in arm_convolve_1x1_s8_fast() local 84 rhs_cols, in arm_convolve_1x1_s8_fast() 90 rhs_cols); in arm_convolve_1x1_s8_fast()
|
D | arm_convolve_s4.c | 89 const int32_t rhs_cols = kernel_x * kernel_y * input_ch; in arm_convolve_s4() local 144 rhs_cols, in arm_convolve_s4() 149 rhs_cols); in arm_convolve_s4() 174 rhs_cols, in arm_convolve_s4() 179 rhs_cols); in arm_convolve_s4() 230 rhs_cols, in arm_convolve_s4() 263 if (rhs_cols % 2 && (i % 2)) in arm_convolve_s4() 271 uint16_t col_count = rhs_cols / 4; in arm_convolve_s4() 286 col_count = (rhs_cols & 0x3) >> 1; in arm_convolve_s4() 288 uint16_t col_count = rhs_cols >> 1; in arm_convolve_s4() [all …]
|
D | arm_convolve_s16.c | 79 const int32_t rhs_cols = input_ch * kernel_y * kernel_x; in arm_convolve_s16() local 147 rhs_cols, in arm_convolve_s16() 166 rhs_cols, in arm_convolve_s16() 195 rhs_cols, in arm_convolve_s16() 219 int32_t col_count = rhs_cols >> 2; in arm_convolve_s16() 236 col_count = rhs_cols & 0x3; in arm_convolve_s16() 238 uint16_t col_count = rhs_cols; in arm_convolve_s16()
|
D | arm_convolve_get_buffer_sizes_s8.c | 91 const int32_t rhs_cols = filter_dims->w * filter_dims->h * input_dims->c; in arm_convolve_s8_get_buffer_size() 92 const int32_t remainder = rhs_cols % 4; in arm_convolve_s8_get_buffer_size() 93 const int32_t aligned_rhs_cols = remainder != 0 ? rhs_cols + 4 - remainder : rhs_cols; in arm_convolve_s8_get_buffer_size()
|
D | arm_convolve_1_x_n_s8.c | 95 const int32_t rhs_cols = kernel_x * input_ch; in arm_convolve_1_x_n_s8() local 135 rhs_cols, in arm_convolve_1_x_n_s8() 165 rhs_cols, in arm_convolve_1_x_n_s8() 199 rhs_cols, in arm_convolve_1_x_n_s8()
|
D | arm_convolve_get_buffer_sizes_s4.c | 82 const int32_t rhs_cols = filter_dims->w * filter_dims->h * input_dims->c; in arm_convolve_s4_get_buffer_size() local 83 return (2 * rhs_cols) * (int32_t)sizeof(int16_t); in arm_convolve_s4_get_buffer_size()
|
D | arm_convolve_1_x_n_s4.c | 103 const int32_t rhs_cols = kernel_x * input_dims->c; in arm_convolve_1_x_n_s4() local 144 rhs_cols, in arm_convolve_1_x_n_s4()
|
/cmsis-nn-3.7.0/Include/ |
D | arm_nnsupportfunctions.h | 457 const int32_t rhs_cols, 503 const int32_t rhs_cols, 550 const int32_t rhs_cols, 579 const int32_t rhs_cols, 611 const int32_t rhs_cols, 650 const int32_t rhs_cols, 680 const int32_t rhs_cols, 712 const int32_t rhs_cols, 1842 const int32_t rhs_cols, 1872 const int32_t rhs_cols,
|