/cmsis-nn-latest/Source/NNSupportFunctions/ |
D | arm_nn_mat_mul_core_1x_s8.c | 60 int32_t *output_shift = quant_params->shift; in arm_nn_mat_mul_core_1x_s8() local 110 res = arm_requantize_mve_32x4(res, vldrwq_s32(output_mult), vldrwq_s32(output_shift)); in arm_nn_mat_mul_core_1x_s8() 112 output_shift += 4; in arm_nn_mat_mul_core_1x_s8() 129 acc_n0 = arm_nn_requantize(acc_n0, output_mult[i], output_shift[i]); in arm_nn_mat_mul_core_1x_s8()
|
D | arm_nn_mat_mul_core_1x_s4.c | 60 int32_t *output_shift = quant_params->shift; in arm_nn_mat_mul_core_1x_s4() local 105 res = arm_requantize_mve_32x4(res, vldrwq_s32(output_mult), vldrwq_s32(output_shift)); in arm_nn_mat_mul_core_1x_s4() 107 output_shift += 4; in arm_nn_mat_mul_core_1x_s4() 125 acc_n0 = arm_nn_requantize(acc_n0, output_mult[i], output_shift[i]); in arm_nn_mat_mul_core_1x_s4()
|
D | arm_nn_lstm_step_s16.c | 103 params->output_shift, in arm_nn_lstm_step_s16()
|
D | arm_nn_lstm_step_s8.c | 101 params->output_shift, in arm_nn_lstm_step_s8()
|
/cmsis-nn-latest/Source/ConvolutionFunctions/ |
D | arm_depthwise_conv_s8.c | 62 const int32_t *output_shift, in depthwise_conv_s8_mult_4() argument 73 const int32_t *shift_base = output_shift; in depthwise_conv_s8_mult_4() 82 output_shift = shift_base; in depthwise_conv_s8_mult_4() 117 … res = arm_requantize_mve_32x4(res, vldrwq_s32(output_mult), vldrwq_s32(output_shift)); in depthwise_conv_s8_mult_4() 119 output_shift += 4; in depthwise_conv_s8_mult_4() 127 out_buff[0] = arm_nn_requantize(out_buff[0], *output_mult++, *output_shift++); in depthwise_conv_s8_mult_4() 128 out_buff[1] = arm_nn_requantize(out_buff[1], *output_mult++, *output_shift++); in depthwise_conv_s8_mult_4() 129 out_buff[2] = arm_nn_requantize(out_buff[2], *output_mult++, *output_shift++); in depthwise_conv_s8_mult_4() 130 out_buff[3] = arm_nn_requantize(out_buff[3], *output_mult++, *output_shift++); in depthwise_conv_s8_mult_4() 170 const int32_t *output_shift, in depthwise_conv_s8_generic() argument [all …]
|
D | arm_depthwise_conv_fast_s16.c | 93 const int32_t *output_shift = quant_params->shift; in arm_depthwise_conv_fast_s16() local 139 output_shift, in arm_depthwise_conv_fast_s16() 196 … out_0[0] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier_0, output_shift[offset]); in arm_depthwise_conv_fast_s16() 197 … out_0[1] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier_1, output_shift[offset + 1]); in arm_depthwise_conv_fast_s16() 198 … out_0[2] = arm_nn_requantize_s64(in_requantize_2, reduced_multiplier_2, output_shift[offset + 2]); in arm_depthwise_conv_fast_s16() 199 … out_0[3] = arm_nn_requantize_s64(in_requantize_3, reduced_multiplier_3, output_shift[offset + 3]); in arm_depthwise_conv_fast_s16() 224 const int32_t *const out_shift_start_pos = output_shift; in arm_depthwise_conv_fast_s16() 283 output_shift = out_shift_start_pos; in arm_depthwise_conv_fast_s16() 372 result = arm_nn_requantize_s64(acc_1, output_mult_1, *output_shift++); in arm_depthwise_conv_fast_s16() 377 result = arm_nn_requantize_s64(acc_2, output_mult_2, *output_shift++); in arm_depthwise_conv_fast_s16() [all …]
|
D | arm_convolve_s16.c | 91 int32_t *output_shift = quant_params->shift; in arm_convolve_s16() local 144 output_shift, in arm_convolve_s16() 162 output_shift, in arm_convolve_s16() 192 output_shift, in arm_convolve_s16() 257 sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]); in arm_convolve_s16() 269 sum = arm_nn_requantize_s64(acc_64, reduced_multiplier, output_shift[i]); in arm_convolve_s16()
|
D | arm_depthwise_conv_s4_opt.c | 87 const int32_t *output_shift = quant_params->shift; in arm_depthwise_conv_s4_opt() local 146 output_shift + block_offset, in arm_depthwise_conv_s4_opt() 234 const int32x4_t shift = vldrwq_s32(&output_shift[offset]); in arm_depthwise_conv_s4_opt() 256 const int32_t *const out_shift_start_pos = output_shift; in arm_depthwise_conv_s4_opt() 315 output_shift = out_shift_start_pos; in arm_depthwise_conv_s4_opt() 436 sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); in arm_depthwise_conv_s4_opt() 442 sum_2 = arm_nn_requantize(sum_2, *output_mult++, *output_shift++); in arm_depthwise_conv_s4_opt() 447 sum_3 = arm_nn_requantize(sum_3, *output_mult++, *output_shift++); in arm_depthwise_conv_s4_opt() 453 sum_4 = arm_nn_requantize(sum_4, *output_mult++, *output_shift++); in arm_depthwise_conv_s4_opt() 504 sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); in arm_depthwise_conv_s4_opt() [all …]
|
D | arm_depthwise_conv_s8_opt.c | 86 const int32_t *output_shift = quant_params->shift; in arm_depthwise_conv_s8_opt() local 146 output_shift + block_offset, in arm_depthwise_conv_s8_opt() 192 const int32x4_t shift = vldrwq_s32(&output_shift[offset]); in arm_depthwise_conv_s8_opt() 216 const int32_t *const out_shift_start_pos = output_shift; in arm_depthwise_conv_s8_opt() 272 output_shift = out_shift_start_pos; in arm_depthwise_conv_s8_opt() 348 sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); in arm_depthwise_conv_s8_opt() 354 sum_2 = arm_nn_requantize(sum_2, *output_mult++, *output_shift++); in arm_depthwise_conv_s8_opt() 359 sum_3 = arm_nn_requantize(sum_3, *output_mult++, *output_shift++); in arm_depthwise_conv_s8_opt() 365 sum_4 = arm_nn_requantize(sum_4, *output_mult++, *output_shift++); in arm_depthwise_conv_s8_opt() 391 sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); in arm_depthwise_conv_s8_opt()
|
D | arm_nn_mat_mult_s8.c | 43 const int32_t *output_shift, in arm_nn_mat_mult_s8() argument 109 res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]); in arm_nn_mat_mult_s8() 152 acc_0 = arm_nn_requantize(acc_0, output_mult[i_out_ch], output_shift[i_out_ch]); in arm_nn_mat_mult_s8() 168 (void)output_shift; in arm_nn_mat_mult_s8()
|
D | arm_depthwise_conv_s16.c | 58 const int32_t *output_shift, in depthwise_conv_s16_mult_4_s16() argument 110 … arm_nn_requantize_s64(out_buff[0], out_buff32[0], output_shift[out_ch + 0 + mult_tile]); in depthwise_conv_s16_mult_4_s16() 112 … arm_nn_requantize_s64(out_buff[1], out_buff32[1], output_shift[out_ch + 1 + mult_tile]); in depthwise_conv_s16_mult_4_s16() 114 … arm_nn_requantize_s64(out_buff[2], out_buff32[2], output_shift[out_ch + 2 + mult_tile]); in depthwise_conv_s16_mult_4_s16() 116 … arm_nn_requantize_s64(out_buff[3], out_buff32[3], output_shift[out_ch + 3 + mult_tile]); in depthwise_conv_s16_mult_4_s16() 148 const int32_t *output_shift, in depthwise_conv_s16_generic_s16() argument 225 … int32_t result = arm_nn_requantize_s64(acc_0, reduced_multiplier, output_shift[idx_out_ch]); in depthwise_conv_s16_generic_s16()
|
D | arm_convolve_s4.c | 93 int32_t *output_shift = quant_params->shift; in arm_convolve_s4() local 141 output_shift, in arm_convolve_s4() 171 output_shift, in arm_convolve_s4() 225 output_shift, in arm_convolve_s4() 316 sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]); in arm_convolve_s4()
|
D | arm_depthwise_conv_3x3_s8.c | 75 const int32_t *output_shift = quant_params->shift; in arm_depthwise_conv_3x3_s8() local 217 … out_buff0 = arm_nn_requantize(out_buff0, output_mult[in_ch + 0], output_shift[in_ch + 0]); in arm_depthwise_conv_3x3_s8() 218 … out_buff1 = arm_nn_requantize(out_buff1, output_mult[in_ch + 1], output_shift[in_ch + 1]); in arm_depthwise_conv_3x3_s8() 219 … out_buff2 = arm_nn_requantize(out_buff2, output_mult[in_ch + 2], output_shift[in_ch + 2]); in arm_depthwise_conv_3x3_s8() 220 … out_buff3 = arm_nn_requantize(out_buff3, output_mult[in_ch + 3], output_shift[in_ch + 3]); in arm_depthwise_conv_3x3_s8() 268 out_buff = arm_nn_requantize(out_buff, output_mult[in_ch], output_shift[in_ch]); in arm_depthwise_conv_3x3_s8()
|
D | arm_depthwise_conv_s4.c | 59 const int32_t *output_shift, in depthwise_conv_s4_generic() argument 161 … acc_0 = arm_nn_requantize(acc_0, output_mult[i_input_ch], output_shift[i_input_ch]); in depthwise_conv_s4_generic() 167 … acc_1 = arm_nn_requantize(acc_1, output_mult[i_input_ch + 1], output_shift[i_input_ch + 1]); in depthwise_conv_s4_generic() 259 … acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]); in depthwise_conv_s4_generic() 346 … acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]); in depthwise_conv_s4_generic() 353 … arm_nn_requantize(acc_1, output_mult[idx_out_ch + 1], output_shift[idx_out_ch + 1]); in depthwise_conv_s4_generic() 468 … acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]); in depthwise_conv_s4_generic()
|
D | arm_transpose_conv_s8.c | 92 const int32_t *output_shift = quant_params->shift; in arm_transpose_conv_s8() local 187 vldrwq_z_s32(&output_shift[output_ch_idx], p)); in arm_transpose_conv_s8() 201 …rm_nn_requantize(img_data[i_output_ch], output_multiplier[i_output_ch], output_shift[i_output_ch]); in arm_transpose_conv_s8()
|
D | arm_convolve_s8.c | 96 int32_t *output_shift = quant_params->shift; in arm_convolve_s8() local 124 const int32_t *output_shift_ptr = &output_shift[0]; in arm_convolve_s8()
|
/cmsis-nn-latest/Include/ |
D | arm_nn_types.h | 234 int32_t output_shift; member
|
D | arm_nnsupportfunctions.h | 271 const int32_t *output_shift,
|
/cmsis-nn-latest/Tests/UnitTest/ |
D | add_mul_settings.py | 124 (self.output_mult, self.output_shift) = self.quantize_scale(actual_output_scale) 154 f.write("#define {}_OUTPUT_SHIFT {}\n".format(prefix, self.output_shift))
|