/cmsis-nn-3.7.0/Source/NNSupportFunctions/ |
D | arm_nn_depthwise_conv_nt_t_s8.c | 69 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_s8() local 72 out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_nt_t_s8() 75 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_s8() 76 int32x4_t out_2 = out_0; in arm_nn_depthwise_conv_nt_t_s8() 77 int32x4_t out_3 = out_0; in arm_nn_depthwise_conv_nt_t_s8() 92 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s8() 112 out_0 = ker_sum + out_0; in arm_nn_depthwise_conv_nt_t_s8() 123 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_nt_t_s8() 124 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_nt_t_s8() 125 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_s8() [all …]
|
D | arm_nn_depthwise_conv_nt_t_padded_s8.c | 78 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_padded_s8() local 81 out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_nt_t_padded_s8() 84 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_padded_s8() 85 int32x4_t out_2 = out_0; in arm_nn_depthwise_conv_nt_t_padded_s8() 86 int32x4_t out_3 = out_0; in arm_nn_depthwise_conv_nt_t_padded_s8() 100 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8() 128 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_nt_t_padded_s8() 129 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_nt_t_padded_s8() 130 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_padded_s8() 131 out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_padded_s8() [all …]
|
D | arm_nn_depthwise_conv_nt_t_s4.c | 73 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_s4() local 76 out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_nt_t_s4() 79 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_s4() 80 int32x4_t out_2 = out_0; in arm_nn_depthwise_conv_nt_t_s4() 81 int32x4_t out_3 = out_0; in arm_nn_depthwise_conv_nt_t_s4() 115 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s4() 149 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s4() 170 out_0 = ker_sum + out_0; in arm_nn_depthwise_conv_nt_t_s4() 181 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_nt_t_s4() 182 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_nt_t_s4() [all …]
|
D | arm_nn_depthwise_conv_nt_t_s16.c | 73 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_s16() local 83 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s16() 106 int64_t in_requantize_0 = (int64_t)out_0[i_requantize]; in arm_nn_depthwise_conv_nt_t_s16() 120 out_0[i_requantize] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier, shift); in arm_nn_depthwise_conv_nt_t_s16() 128 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_s16() 129 out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_s16() 130 vstrhq_p_s32(out, out_0, p); in arm_nn_depthwise_conv_nt_t_s16()
|
/cmsis-nn-3.7.0/Source/ConvolutionFunctions/ |
D | arm_nn_depthwise_conv_s8_core.c | 65 int32x4_t out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_s8_core() local 66 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_s8_core() 88 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core() 96 out_0 += vmulq_s32(ip_0, ker_1); in arm_nn_depthwise_conv_s8_core() 104 out_0 += vmulq_s32(ip_0, ker_2); in arm_nn_depthwise_conv_s8_core() 120 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core() 138 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_s8_core() 141 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_s8_core() 142 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_s8_core() 143 out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_s8_core() [all …]
|
D | arm_nn_mat_mult_kernel_row_offset_s8_s16.c | 52 int8_t *out_0) in arm_nn_mat_mult_kernel_row_offset_s8_s16() argument 58 int8_t *out_1 = out_0 + row_address_offset; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 137 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 151 *out_0++ = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 223 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 234 out_0 += 2 * row_address_offset - output_ch; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 237 return out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 250 (void)out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
|
D | arm_nn_mat_mult_kernel_s8_s16.c | 51 int8_t *out_0) in arm_nn_mat_mult_kernel_s8_s16() argument 55 int8_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s8_s16() 132 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s8_s16() 146 *out_0++ = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s8_s16() 216 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s8_s16() 227 out_0 += output_ch; in arm_nn_mat_mult_kernel_s8_s16() 230 return out_0; in arm_nn_mat_mult_kernel_s8_s16() 242 (void)out_0; in arm_nn_mat_mult_kernel_s8_s16()
|
D | arm_nn_mat_mult_kernel_s16.c | 58 int16_t *out_0) in arm_nn_mat_mult_kernel_s16() argument 68 int16_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s16() 152 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16() 165 *out_0++ = (int16_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s16() 216 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16() 224 *out_0++ = (int16_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s16() 299 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16() 337 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16() 347 out_0 += output_ch; in arm_nn_mat_mult_kernel_s16() 350 return out_0; in arm_nn_mat_mult_kernel_s16() [all …]
|
D | arm_depthwise_conv_fast_s16.c | 165 int32x4_t out_0 = vdupq_n_s32(0); in arm_depthwise_conv_fast_s16() local 172 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_fast_s16() 178 int64_t in_requantize_0 = (int64_t)out_0[0]; in arm_depthwise_conv_fast_s16() 179 int64_t in_requantize_1 = (int64_t)out_0[1]; in arm_depthwise_conv_fast_s16() 180 int64_t in_requantize_2 = (int64_t)out_0[2]; in arm_depthwise_conv_fast_s16() 181 int64_t in_requantize_3 = (int64_t)out_0[3]; in arm_depthwise_conv_fast_s16() 196 … out_0[0] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier_0, output_shift[offset]); in arm_depthwise_conv_fast_s16() 197 … out_0[1] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier_1, output_shift[offset + 1]); in arm_depthwise_conv_fast_s16() 198 … out_0[2] = arm_nn_requantize_s64(in_requantize_2, reduced_multiplier_2, output_shift[offset + 2]); in arm_depthwise_conv_fast_s16() 199 … out_0[3] = arm_nn_requantize_s64(in_requantize_3, reduced_multiplier_3, output_shift[offset + 3]); in arm_depthwise_conv_fast_s16() [all …]
|
D | arm_depthwise_conv_s8_opt.c | 174 int32x4_t out_0 = vdupq_n_s32(0); in arm_depthwise_conv_s8_opt() local 177 out_0 = vldrwq_s32(&bias[offset]); in arm_depthwise_conv_s8_opt() 185 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s8_opt() 194 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_depthwise_conv_s8_opt() 195 out_0 = vaddq_n_s32(out_0, output_offset); in arm_depthwise_conv_s8_opt() 196 out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min)); in arm_depthwise_conv_s8_opt() 197 out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max)); in arm_depthwise_conv_s8_opt() 199 vstrbq_p_s32(out, out_0, p); in arm_depthwise_conv_s8_opt()
|
D | arm_nn_mat_mult_kernel_s4_s16.c | 49 int8_t *out_0) in arm_nn_mat_mult_kernel_s4_s16() argument 53 int8_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s4_s16() 173 *out_0 = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s4_s16() 174 out_0 += 2; in arm_nn_mat_mult_kernel_s4_s16() 189 *out_0-- = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s4_s16() 300 *out_0 = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s4_s16() 301 out_0 += 2; in arm_nn_mat_mult_kernel_s4_s16() 316 *out_0++ = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s4_s16() 422 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s4_s16() 435 out_0 += output_ch; in arm_nn_mat_mult_kernel_s4_s16() [all …]
|
D | arm_depthwise_conv_s4_opt.c | 176 int32x4_t out_0 = vdupq_n_s32(0); in arm_depthwise_conv_s4_opt() local 179 out_0 = vldrwq_s32(&bias[offset]); in arm_depthwise_conv_s4_opt() 207 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s4_opt() 226 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s4_opt() 236 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_depthwise_conv_s4_opt() 237 out_0 = vaddq_n_s32(out_0, output_offset); in arm_depthwise_conv_s4_opt() 238 out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min)); in arm_depthwise_conv_s4_opt() 239 out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max)); in arm_depthwise_conv_s4_opt() 241 vstrbq_p_s32(out, out_0, p); in arm_depthwise_conv_s4_opt()
|
/cmsis-nn-3.7.0/Include/ |
D | arm_nnsupportfunctions.h | 313 int16_t *out_0); 1162 int8_t *out_0); 1197 int8_t *out_0); 1238 int8_t *out_0);
|