/cmsis-nn-latest/Source/NNSupportFunctions/ |
D | arm_nn_depthwise_conv_nt_t_s8.c | 75 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_s8() local 95 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s8() 113 out_1 = ker_sum + out_1; in arm_nn_depthwise_conv_nt_t_s8() 129 out_1 = arm_requantize_mve_32x4(out_1, mult, shift); in arm_nn_depthwise_conv_nt_t_s8() 130 out_1 = vaddq_n_s32(out_1, out_offset); in arm_nn_depthwise_conv_nt_t_s8() 131 out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_s8() 132 out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_s8() 133 vstrbq_p_s32(out + total_ch, out_1, p); in arm_nn_depthwise_conv_nt_t_s8()
|
D | arm_nn_depthwise_conv_nt_t_padded_s8.c | 84 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_padded_s8() local 104 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8() 135 out_1 = arm_requantize_mve_32x4(out_1, mult, shift); in arm_nn_depthwise_conv_nt_t_padded_s8() 136 out_1 = vaddq_n_s32(out_1, out_offset); in arm_nn_depthwise_conv_nt_t_padded_s8() 137 out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_padded_s8() 138 out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_padded_s8() 139 vstrbq_p_s32(out + total_ch, out_1, p); in arm_nn_depthwise_conv_nt_t_padded_s8()
|
D | arm_nn_depthwise_conv_nt_t_s4.c | 79 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_s4() local 118 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s4() 152 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s4() 171 out_1 = ker_sum + out_1; in arm_nn_depthwise_conv_nt_t_s4() 187 out_1 = arm_requantize_mve_32x4(out_1, mult, shift); in arm_nn_depthwise_conv_nt_t_s4() 188 out_1 = vaddq_n_s32(out_1, out_offset); in arm_nn_depthwise_conv_nt_t_s4() 189 out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_s4() 190 out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_s4() 191 vstrbq_p_s32(out + total_ch, out_1, p); in arm_nn_depthwise_conv_nt_t_s4()
|
D | arm_nn_depthwise_conv_nt_t_s16.c | 74 int32x4_t out_1 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_s16() local 86 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_nt_t_s16() 107 int64_t in_requantize_1 = (int64_t)out_1[i_requantize]; in arm_nn_depthwise_conv_nt_t_s16() 121 out_1[i_requantize] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier, shift); in arm_nn_depthwise_conv_nt_t_s16() 132 out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_s16() 133 out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_s16() 134 vstrhq_p_s32(out + num_ch, out_1, p); in arm_nn_depthwise_conv_nt_t_s16()
|
/cmsis-nn-latest/Source/ConvolutionFunctions/ |
D | arm_nn_depthwise_conv_s8_core.c | 66 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_s8_core() local 89 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_s8_core() 97 out_1 += vmulq_s32(ip_1, ker_1); in arm_nn_depthwise_conv_s8_core() 105 out_1 += vmulq_s32(ip_1, ker_2); in arm_nn_depthwise_conv_s8_core() 121 out_1 += vmulq_s32(ip_1, ker_0); in arm_nn_depthwise_conv_s8_core() 139 out_1 = arm_requantize_mve_32x4(out_1, mult, shift); in arm_nn_depthwise_conv_s8_core() 146 out_1 = vaddq_n_s32(out_1, out_offset); in arm_nn_depthwise_conv_s8_core() 147 out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_s8_core() 148 out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_s8_core() 149 vstrbq_s32(out_tmp + num_ch, out_1); in arm_nn_depthwise_conv_s8_core()
|
D | arm_nn_mat_mult_kernel_s16.c | 68 int16_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s16() local 156 *out_1++ = (int16_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s16() 169 *out_1++ = (int16_t)ch_1_out_1; in arm_nn_mat_mult_kernel_s16() 220 *out_1++ = (int16_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s16() 228 *out_1++ = (int16_t)ch_1_out_1; in arm_nn_mat_mult_kernel_s16() 303 *out_1++ = (int16_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s16() 341 *out_1++ = (int16_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s16()
|
D | arm_nn_mat_mult_kernel_s4_s16.c | 53 int8_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s4_s16() local 180 *out_1 = (int8_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s4_s16() 181 out_1 += 2; in arm_nn_mat_mult_kernel_s4_s16() 195 *out_1-- = (int8_t)ch_1_out_1; in arm_nn_mat_mult_kernel_s4_s16() 307 *out_1 = (int8_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s4_s16() 308 out_1 += 2; in arm_nn_mat_mult_kernel_s4_s16() 322 *out_1++ = (int8_t)ch_1_out_1; in arm_nn_mat_mult_kernel_s4_s16() 428 *out_1++ = (int8_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s4_s16()
|
D | arm_nn_mat_mult_kernel_row_offset_s8_s16.c | 58 int8_t *out_1 = out_0 + row_address_offset; in arm_nn_mat_mult_kernel_row_offset_s8_s16() local 143 *out_1++ = (int8_t)ch_0_out_1; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 157 *out_1++ = (int8_t)ch_1_out_1; in arm_nn_mat_mult_kernel_row_offset_s8_s16() 229 *out_1++ = (int8_t)ch_0_out_1; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
|
D | arm_nn_mat_mult_kernel_s8_s16.c | 55 int8_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s8_s16() local 138 *out_1++ = (int8_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s8_s16() 152 *out_1++ = (int8_t)ch_1_out_1; in arm_nn_mat_mult_kernel_s8_s16() 222 *out_1++ = (int8_t)ch_0_out_1; in arm_nn_mat_mult_kernel_s8_s16()
|