Home
last modified time | relevance | path

Searched refs:out_0 (Results 1 – 13 of 13) sorted by relevance

/cmsis-nn-3.7.0/Source/NNSupportFunctions/
Darm_nn_depthwise_conv_nt_t_s8.c69 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_s8() local
72 out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_nt_t_s8()
75 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_s8()
76 int32x4_t out_2 = out_0; in arm_nn_depthwise_conv_nt_t_s8()
77 int32x4_t out_3 = out_0; in arm_nn_depthwise_conv_nt_t_s8()
92 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s8()
112 out_0 = ker_sum + out_0; in arm_nn_depthwise_conv_nt_t_s8()
123 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_nt_t_s8()
124 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_nt_t_s8()
125 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_s8()
[all …]
Darm_nn_depthwise_conv_nt_t_padded_s8.c78 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_padded_s8() local
81 out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_nt_t_padded_s8()
84 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_padded_s8()
85 int32x4_t out_2 = out_0; in arm_nn_depthwise_conv_nt_t_padded_s8()
86 int32x4_t out_3 = out_0; in arm_nn_depthwise_conv_nt_t_padded_s8()
100 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_padded_s8()
128 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_nt_t_padded_s8()
129 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_nt_t_padded_s8()
130 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_padded_s8()
131 out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_padded_s8()
[all …]
Darm_nn_depthwise_conv_nt_t_s4.c73 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_s4() local
76 out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_nt_t_s4()
79 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_nt_t_s4()
80 int32x4_t out_2 = out_0; in arm_nn_depthwise_conv_nt_t_s4()
81 int32x4_t out_3 = out_0; in arm_nn_depthwise_conv_nt_t_s4()
115 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s4()
149 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s4()
170 out_0 = ker_sum + out_0; in arm_nn_depthwise_conv_nt_t_s4()
181 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_nt_t_s4()
182 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_nt_t_s4()
[all …]
Darm_nn_depthwise_conv_nt_t_s16.c73 int32x4_t out_0 = vdupq_n_s32(0); in arm_nn_depthwise_conv_nt_t_s16() local
83 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_nt_t_s16()
106 int64_t in_requantize_0 = (int64_t)out_0[i_requantize]; in arm_nn_depthwise_conv_nt_t_s16()
120 out_0[i_requantize] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier, shift); in arm_nn_depthwise_conv_nt_t_s16()
128 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_nt_t_s16()
129 out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_nt_t_s16()
130 vstrhq_p_s32(out, out_0, p); in arm_nn_depthwise_conv_nt_t_s16()
/cmsis-nn-3.7.0/Source/ConvolutionFunctions/
Darm_nn_depthwise_conv_s8_core.c65 int32x4_t out_0 = vldrwq_s32(bias); in arm_nn_depthwise_conv_s8_core() local
66 int32x4_t out_1 = out_0; in arm_nn_depthwise_conv_s8_core()
88 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core()
96 out_0 += vmulq_s32(ip_0, ker_1); in arm_nn_depthwise_conv_s8_core()
104 out_0 += vmulq_s32(ip_0, ker_2); in arm_nn_depthwise_conv_s8_core()
120 out_0 += vmulq_s32(ip_0, ker_0); in arm_nn_depthwise_conv_s8_core()
138 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_nn_depthwise_conv_s8_core()
141 out_0 = vaddq_n_s32(out_0, out_offset); in arm_nn_depthwise_conv_s8_core()
142 out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); in arm_nn_depthwise_conv_s8_core()
143 out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); in arm_nn_depthwise_conv_s8_core()
[all …]
Darm_nn_mat_mult_kernel_row_offset_s8_s16.c52 int8_t *out_0) in arm_nn_mat_mult_kernel_row_offset_s8_s16() argument
58 int8_t *out_1 = out_0 + row_address_offset; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
137 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
151 *out_0++ = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
223 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
234 out_0 += 2 * row_address_offset - output_ch; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
237 return out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
250 (void)out_0; in arm_nn_mat_mult_kernel_row_offset_s8_s16()
Darm_nn_mat_mult_kernel_s8_s16.c51 int8_t *out_0) in arm_nn_mat_mult_kernel_s8_s16() argument
55 int8_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s8_s16()
132 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s8_s16()
146 *out_0++ = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s8_s16()
216 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s8_s16()
227 out_0 += output_ch; in arm_nn_mat_mult_kernel_s8_s16()
230 return out_0; in arm_nn_mat_mult_kernel_s8_s16()
242 (void)out_0; in arm_nn_mat_mult_kernel_s8_s16()
Darm_nn_mat_mult_kernel_s16.c58 int16_t *out_0) in arm_nn_mat_mult_kernel_s16() argument
68 int16_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s16()
152 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16()
165 *out_0++ = (int16_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s16()
216 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16()
224 *out_0++ = (int16_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s16()
299 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16()
337 *out_0++ = (int16_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s16()
347 out_0 += output_ch; in arm_nn_mat_mult_kernel_s16()
350 return out_0; in arm_nn_mat_mult_kernel_s16()
[all …]
Darm_depthwise_conv_fast_s16.c165 int32x4_t out_0 = vdupq_n_s32(0); in arm_depthwise_conv_fast_s16() local
172 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_fast_s16()
178 int64_t in_requantize_0 = (int64_t)out_0[0]; in arm_depthwise_conv_fast_s16()
179 int64_t in_requantize_1 = (int64_t)out_0[1]; in arm_depthwise_conv_fast_s16()
180 int64_t in_requantize_2 = (int64_t)out_0[2]; in arm_depthwise_conv_fast_s16()
181 int64_t in_requantize_3 = (int64_t)out_0[3]; in arm_depthwise_conv_fast_s16()
196out_0[0] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier_0, output_shift[offset]); in arm_depthwise_conv_fast_s16()
197out_0[1] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier_1, output_shift[offset + 1]); in arm_depthwise_conv_fast_s16()
198out_0[2] = arm_nn_requantize_s64(in_requantize_2, reduced_multiplier_2, output_shift[offset + 2]); in arm_depthwise_conv_fast_s16()
199out_0[3] = arm_nn_requantize_s64(in_requantize_3, reduced_multiplier_3, output_shift[offset + 3]); in arm_depthwise_conv_fast_s16()
[all …]
Darm_depthwise_conv_s8_opt.c174 int32x4_t out_0 = vdupq_n_s32(0); in arm_depthwise_conv_s8_opt() local
177 out_0 = vldrwq_s32(&bias[offset]); in arm_depthwise_conv_s8_opt()
185 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s8_opt()
194 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_depthwise_conv_s8_opt()
195 out_0 = vaddq_n_s32(out_0, output_offset); in arm_depthwise_conv_s8_opt()
196 out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min)); in arm_depthwise_conv_s8_opt()
197 out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max)); in arm_depthwise_conv_s8_opt()
199 vstrbq_p_s32(out, out_0, p); in arm_depthwise_conv_s8_opt()
Darm_nn_mat_mult_kernel_s4_s16.c49 int8_t *out_0) in arm_nn_mat_mult_kernel_s4_s16() argument
53 int8_t *out_1 = out_0 + output_ch; in arm_nn_mat_mult_kernel_s4_s16()
173 *out_0 = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s4_s16()
174 out_0 += 2; in arm_nn_mat_mult_kernel_s4_s16()
189 *out_0-- = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s4_s16()
300 *out_0 = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s4_s16()
301 out_0 += 2; in arm_nn_mat_mult_kernel_s4_s16()
316 *out_0++ = (int8_t)ch_1_out_0; in arm_nn_mat_mult_kernel_s4_s16()
422 *out_0++ = (int8_t)ch_0_out_0; in arm_nn_mat_mult_kernel_s4_s16()
435 out_0 += output_ch; in arm_nn_mat_mult_kernel_s4_s16()
[all …]
Darm_depthwise_conv_s4_opt.c176 int32x4_t out_0 = vdupq_n_s32(0); in arm_depthwise_conv_s4_opt() local
179 out_0 = vldrwq_s32(&bias[offset]); in arm_depthwise_conv_s4_opt()
207 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s4_opt()
226 out_0 += vmulq_s32(ip_0, ker_0); in arm_depthwise_conv_s4_opt()
236 out_0 = arm_requantize_mve_32x4(out_0, mult, shift); in arm_depthwise_conv_s4_opt()
237 out_0 = vaddq_n_s32(out_0, output_offset); in arm_depthwise_conv_s4_opt()
238 out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min)); in arm_depthwise_conv_s4_opt()
239 out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max)); in arm_depthwise_conv_s4_opt()
241 vstrbq_p_s32(out, out_0, p); in arm_depthwise_conv_s4_opt()
/cmsis-nn-3.7.0/Include/
Darm_nnsupportfunctions.h313 int16_t *out_0);
1162 int8_t *out_0);
1197 int8_t *out_0);
1238 int8_t *out_0);