/cmsis-nn-latest/Source/ConvolutionFunctions/ |
D | arm_depthwise_conv_3x3_s8.c | 69 const int32_t input_ch = input_dims->c; in arm_depthwise_conv_3x3_s8() local 85 if (input_ch != output_ch) in arm_depthwise_conv_3x3_s8() 103 for (; in_ch <= (input_ch - 4); in_ch += 4) in arm_depthwise_conv_3x3_s8() 117 …const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + … in arm_depthwise_conv_3x3_s8() 118 const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch; in arm_depthwise_conv_3x3_s8() 145 in_val = arm_nn_read_s8x4(input_ptr + input_ch); in arm_depthwise_conv_3x3_s8() 146 ker_val = arm_nn_read_s8x4(kernel_ptr + input_ch); in arm_depthwise_conv_3x3_s8() 159 in_val = arm_nn_read_s8x4(input_ptr + (input_ch << 1)); in arm_depthwise_conv_3x3_s8() 160 ker_val = arm_nn_read_s8x4(kernel_ptr + (input_ch << 1)); in arm_depthwise_conv_3x3_s8() 172 input_ptr += (input_ch * input_x); in arm_depthwise_conv_3x3_s8() [all …]
|
D | arm_depthwise_conv_fast_s16.c | 63 const int32_t input_ch = input_dims->c; in arm_depthwise_conv_fast_s16() local 67 if (input_ch != output_ch) in arm_depthwise_conv_fast_s16() 120 memset(lhs_buffer, (int16_t)0, (uint32_t)(input_ch * sizeof(int16_t))); in arm_depthwise_conv_fast_s16() 125 … (int16_t *)(input + (i_ker_y * input_x + i_ker_x) * input_ch), in arm_depthwise_conv_fast_s16() 126 (uint32_t)(input_ch * sizeof(int16_t))); in arm_depthwise_conv_fast_s16() 128 lhs_buffer += input_ch; in arm_depthwise_conv_fast_s16() 138 input_ch, in arm_depthwise_conv_fast_s16() 150 input += input_x * input_y * input_ch; in arm_depthwise_conv_fast_s16() 157 int32_t loop_count = (input_ch + 3) / 4; in arm_depthwise_conv_fast_s16() 158 int32_t num_ch_to_process = input_ch; in arm_depthwise_conv_fast_s16() [all …]
|
D | arm_depthwise_conv_s4_opt.c | 65 const int32_t input_ch = input_dims->c; in arm_depthwise_conv_s4_opt() local 69 if (input_ch != output_ch) in arm_depthwise_conv_s4_opt() 104 const int32_t ch_loop = (input_ch + (S4_CH_IN_BLOCK_MVE - 1)) / S4_CH_IN_BLOCK_MVE; in arm_depthwise_conv_s4_opt() 129 input_slice + (i_ker_y * input_x + i_ker_x) * input_ch, in arm_depthwise_conv_s4_opt() 145 input_ch, in arm_depthwise_conv_s4_opt() 155 out += (4 * input_ch); in arm_depthwise_conv_s4_opt() 170 out = out_base + (i_buf * input_ch); in arm_depthwise_conv_s4_opt() 182 if (input_ch % 2) in arm_depthwise_conv_s4_opt() 211 row_0 += (input_ch >> 1) + get_low_nibble; in arm_depthwise_conv_s4_opt() 229 row_0 += input_ch >> 1; in arm_depthwise_conv_s4_opt() [all …]
|
D | arm_convolve_1_x_n_s4.c | 77 const uint16_t input_ch = input_dims->c; in arm_convolve_1_x_n_s4() local 105 const int32_t lhs_offset = input_ch * stride_x; in arm_convolve_1_x_n_s4() 114 status = arm_nn_mat_mul_core_1x_s4(actual_kernel_len * input_ch, in arm_convolve_1_x_n_s4() 115 ker_begin_idx * input_ch, in arm_convolve_1_x_n_s4() 117 filter_data + ((ker_begin_idx * input_ch) >> 1), in arm_convolve_1_x_n_s4() 134 input_start *= input_ch; in arm_convolve_1_x_n_s4() 165 status = arm_nn_mat_mul_core_1x_s4(ker_end_idx * input_ch, in arm_convolve_1_x_n_s4() 166 (kernel_x - ker_end_idx) * input_ch, in arm_convolve_1_x_n_s4() 167 input_data + est_input_x_idx * input_ch, in arm_convolve_1_x_n_s4() 177 input_data += (input_x * input_ch); in arm_convolve_1_x_n_s4()
|
D | arm_depthwise_conv_s8_opt.c | 63 const int32_t input_ch = input_dims->c; in arm_depthwise_conv_s8_opt() local 67 if (input_ch != output_ch) in arm_depthwise_conv_s8_opt() 103 const int32_t ch_loop = (input_ch + (CH_IN_BLOCK_MVE - 1)) / CH_IN_BLOCK_MVE; in arm_depthwise_conv_s8_opt() 128 input_slice + (i_ker_y * input_x + i_ker_x) * input_ch, in arm_depthwise_conv_s8_opt() 145 input_ch, in arm_depthwise_conv_s8_opt() 155 out += (4 * input_ch); in arm_depthwise_conv_s8_opt() 168 out = out_base + (i_buf * input_ch); in arm_depthwise_conv_s8_opt() 188 row_0 += input_ch; in arm_depthwise_conv_s8_opt() 236 … memset(&col_buffer[index], 0, (kernel_x * input_ch) * ker_y_start * sizeof(int16_t)); in arm_depthwise_conv_s8_opt() 237 index += (kernel_x * input_ch) * ker_y_start; in arm_depthwise_conv_s8_opt() [all …]
|
D | arm_convolve_1_x_n_s8.c | 75 const int32_t input_ch = input_dims->c; in arm_convolve_1_x_n_s8() local 92 const int32_t pad_size_left = pad_x * input_ch; in arm_convolve_1_x_n_s8() 93 const int32_t pad_size_right = asym_pad ? right_pad_num * input_ch : pad_size_left; in arm_convolve_1_x_n_s8() 95 const int32_t rhs_cols = kernel_x * input_ch; in arm_convolve_1_x_n_s8() 97 const int32_t lhs_offset = input_ch * stride_x; in arm_convolve_1_x_n_s8() 114 const uint32_t num_elem_left = kernel_x * input_ch; in arm_convolve_1_x_n_s8() 115 const uint32_t num_elem_right = num_elem_left - input_ch; in arm_convolve_1_x_n_s8() 154 input_start *= input_ch; in arm_convolve_1_x_n_s8() 185 input_start = (stride_x * (left_pad_num + no_pad_num) - pad_x) * input_ch; in arm_convolve_1_x_n_s8() 210 input_data += (input_x * input_ch); in arm_convolve_1_x_n_s8()
|
D | arm_convolve_s4.c | 73 const uint16_t input_ch = input_dims->c; in arm_convolve_s4() local 89 const int32_t rhs_cols = kernel_x * kernel_y * input_ch; in arm_convolve_s4() 122 … arm_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * input_ch); in arm_convolve_s4() 126 … arm_memcpy_s8(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch); in arm_convolve_s4() 128 im2col_buf += input_ch; in arm_convolve_s4() 207 memset(two_column_buf, 0, sizeof(int16_t) * input_ch); in arm_convolve_s4() 213 … input_data + (k_y * input_x + k_x) * input_ch, two_column_buf, input_ch, input_offset); in arm_convolve_s4() 215 two_column_buf += input_ch; in arm_convolve_s4() 325 input_data += (input_x * input_y * input_ch); in arm_convolve_s4()
|
D | arm_depthwise_conv_s4.c | 47 const int32_t input_ch, in depthwise_conv_s4_generic() argument 75 const int32_t kernel_index_offset = input_ch >> 1; in depthwise_conv_s4_generic() 76 if (!(input_ch % 2)) in depthwise_conv_s4_generic() 92 … for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch += 2, idx_out_ch_s4++) in depthwise_conv_s4_generic() 138 int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; in depthwise_conv_s4_generic() 153 idx_0 += dilation_x * input_ch; in depthwise_conv_s4_generic() 177 for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) in depthwise_conv_s4_generic() 229 … int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; in depthwise_conv_s4_generic() 250 idx_0 += dilation_x * input_ch; in depthwise_conv_s4_generic() 270 for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) in depthwise_conv_s4_generic() [all …]
|
D | arm_convolve_s16.c | 73 const int32_t input_ch = input_dims->c; in arm_convolve_s16() local 79 const int32_t rhs_cols = input_ch * kernel_y * kernel_x; in arm_convolve_s16() 122 … arm_memset_s8((int8_t *)im2col, 0, sizeof(int16_t) * (uint32_t)input_ch); in arm_convolve_s16() 127 … (const int8_t *)(input_data + (k_y * input_x + k_x) * input_ch), in arm_convolve_s16() 128 (uint32_t)input_ch * sizeof(int16_t)); in arm_convolve_s16() 130 im2col += input_ch; in arm_convolve_s16() 282 input_data += (input_x * input_y * input_ch); in arm_convolve_s16()
|
D | arm_depthwise_conv_s16.c | 46 const int32_t input_ch, in depthwise_conv_s16_mult_4_s16() argument 92 … int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch; in depthwise_conv_s16_mult_4_s16() 101 int32_t in_val = input[in_idx + ker_w * input_ch]; in depthwise_conv_s16_mult_4_s16() 137 const uint16_t input_ch, in depthwise_conv_s16_generic_s16() argument 166 for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) in depthwise_conv_s16_generic_s16() 217 int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; in depthwise_conv_s16_generic_s16() 218 … int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch; in depthwise_conv_s16_generic_s16() 234 input += (input_x * input_y * input_ch); in depthwise_conv_s16_generic_s16()
|
D | arm_depthwise_conv_s8.c | 50 const int32_t input_ch, in depthwise_conv_s8_mult_4() argument 101 … int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch; in depthwise_conv_s8_mult_4() 108 int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset; in depthwise_conv_s8_mult_4() 158 const uint16_t input_ch, in depthwise_conv_s8_generic() argument 194 for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) in depthwise_conv_s8_generic() 243 int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; in depthwise_conv_s8_generic() 244 … int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch; in depthwise_conv_s8_generic() 262 input += (input_x * input_y * input_ch); in depthwise_conv_s8_generic()
|
D | arm_convolve_s8.c | 72 const uint16_t input_ch = input_dims->c; in arm_convolve_s8() local 91 const int32_t groups = input_ch / kernel_ch; in arm_convolve_s8() 98 if (input_ch % groups != 0 || output_ch % groups != 0) in arm_convolve_s8() 151 … input_data + (k_y * input_x + k_x) * input_ch + i_group * kernel_ch, in arm_convolve_s8() 336 input_data += (input_x * input_y * input_ch); in arm_convolve_s8()
|
D | arm_transpose_conv_s8.c | 72 const int32_t input_ch = input_dims->c; in arm_transpose_conv_s8() local 136 input_ch, in arm_transpose_conv_s8() 141 filter_data_ptr += (input_ch * filter_size); in arm_transpose_conv_s8() 210 input_data_ptr += (input_size * input_ch); in arm_transpose_conv_s8()
|
/cmsis-nn-latest/Tests/UnitTest/TestCases/Common/ |
D | conv2d_s4_weights_template.json | 18 input_ch 39 input_ch
|
D | dw_s4_weights_template.json | 18 input_ch
|
/cmsis-nn-latest/Tests/UnitTest/ |
D | fully_connected_settings.py | 125 …f.write("#define {}_ACCUMULATION_DEPTH {}\n".format(prefix, self.input_ch * self.x_input * self.y_… 149 fc_input_format = [self.batches, self.input_ch * self.x_input * self.y_input] 165 fc_weights_format = [self.input_ch * self.y_input * self.x_input * self.output_ch] 189 fc_weights_format = [self.input_ch * self.y_input * self.x_input * self.output_ch] 227 fc_weights_format = [self.input_ch * self.y_input * self.x_input, self.output_ch] 241 keras.layers.InputLayer(input_shape=(self.y_input * self.x_input * self.input_ch, ),
|
D | model_extractor.py | 96 f.write("#define {}_IN_CH {}\n".format(prefix, self.input_ch)) 124 … f.write("#define {}_INPUT_SIZE {}\n".format(prefix, self.x_input * self.y_input * self.input_ch)) 140 … self.input_ch * self.x_input * self.y_input)) 149 [self.batches, self.y_input, self.x_input, self.input_ch] = input_shape 150 [output_ch, self.filter_y, self.filter_x, self.input_ch] = filter_shape 153 [self.batches, self.input_ch] = input_shape 154 [self.input_ch, self.output_ch] = filter_shape
|
D | conv_settings.py | 105 self.channel_multiplier = self.output_ch // self.input_ch 106 if self.output_ch % self.input_ch != 0: 224 w_shape = [self.filter_y * self.filter_x * self.input_ch * out_channel] 275 "input_ch": self.input_ch, 319 weight_shape = [self.filter_y, self.filter_x, out_channel, self.input_ch] 335 input_shape = (self.batches, self.y_input, self.x_input, self.input_ch)
|
D | add_mul_settings.py | 75 input_shape = (1, self.y_input, self.x_input, self.input_ch) 148 … self.batches * self.y_input * self.x_input * self.input_ch))
|
D | test_settings.py | 146 self.input_ch = in_ch 275 input_shape = [self.batches, self.y_input, self.x_input, self.input_ch] 350 f.write("#define {}_IN_CH {}\n".format(prefix, self.input_ch)) 355 … f.write("#define {}_INPUT_SIZE {}\n".format(prefix, self.x_input * self.y_input * self.input_ch)) 464 representative_dataset_shape = (self.batches, self.y_input, self.x_input, self.input_ch)
|
D | pooling_settings.py | 89 input_shape = (self.batches, self.y_input, self.x_input, self.input_ch)
|