| /hal_nxp-latest/mcux/mcux-sdk/CMSIS/NN/Source/FullyConnectedFunctions/ |
| D | arm_fully_connected_q15_opt.c | 101 const uint16_t out_shift, in arm_fully_connected_q15_opt() argument 119 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() 120 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() 121 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() 122 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() 197 *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); in arm_fully_connected_q15_opt() 198 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_q15_opt() 199 *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); in arm_fully_connected_q15_opt() 200 *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); in arm_fully_connected_q15_opt() 211 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15_opt() [all …]
|
| D | arm_fully_connected_mat_q7_vec_q15_opt.c | 121 const uint16_t out_shift, in arm_fully_connected_mat_q7_vec_q15_opt() argument 140 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() 141 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() 142 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() 143 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() 275 *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt() 276 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt() 277 *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt() 278 *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15_opt() 289 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15_opt() [all …]
|
| D | arm_fully_connected_q7_opt.c | 133 const uint16_t out_shift, in arm_fully_connected_q7_opt() argument 153 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() 154 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() 155 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() 156 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() 334 *pO++ = (q7_t)(__SSAT((sum >> out_shift), 8)); in arm_fully_connected_q7_opt() 335 *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); in arm_fully_connected_q7_opt() 336 *pO++ = (q7_t)(__SSAT((sum3 >> out_shift), 8)); in arm_fully_connected_q7_opt() 337 *pO++ = (q7_t)(__SSAT((sum4 >> out_shift), 8)); in arm_fully_connected_q7_opt() 348 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7_opt() [all …]
|
| D | arm_fully_connected_mat_q7_vec_q15.c | 73 const uint16_t out_shift, in arm_fully_connected_mat_q7_vec_q15() argument 92 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15() 93 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15() 128 *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15() 129 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15() 141 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15() 171 *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); in arm_fully_connected_mat_q7_vec_q15() 181 int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_mat_q7_vec_q15() 186 pOut[i] = (q15_t)__SSAT((ip_out >> out_shift), 16); in arm_fully_connected_mat_q7_vec_q15()
|
| D | arm_fully_connected_q7.c | 72 const uint16_t out_shift, in arm_fully_connected_q7() argument 93 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7() 94 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7() 129 *pO++ = (q7_t)(__SSAT((sum >> out_shift), 8)); in arm_fully_connected_q7() 130 *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); in arm_fully_connected_q7() 143 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7() 172 *pO++ = (q7_t)(__SSAT((sum >> out_shift), 8)); in arm_fully_connected_q7() 184 int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q7() 189 pOut[i] = (q7_t)__SSAT((ip_out >> out_shift), 8); in arm_fully_connected_q7()
|
| D | arm_fully_connected_q15.c | 70 const uint16_t out_shift, in arm_fully_connected_q15() argument 89 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15() 90 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15() 125 *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); in arm_fully_connected_q15() 126 *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); in arm_fully_connected_q15() 137 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15() 169 *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); in arm_fully_connected_q15() 179 int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_fully_connected_q15() 184 pOut[i] = (q15_t)__SSAT((ip_out >> out_shift), 16); in arm_fully_connected_q15()
|
| /hal_nxp-latest/mcux/mcux-sdk/CMSIS/NN/Source/ConvolutionFunctions/ |
| D | arm_nn_mat_mult_kernel_q7_q15.c | 45 const uint16_t out_shift, in arm_nn_mat_mult_kernel_q7_q15() argument 66 q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() 67 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() 68 q31_t sum3 = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() 69 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() 112 *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15() 113 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15() 114 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15() 115 *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15() 130 q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15() [all …]
|
| D | arm_nn_mat_mult_kernel_q7_q15_reordered.c | 45 const uint16_t out_shift, in arm_nn_mat_mult_kernel_q7_q15_reordered() argument 66 q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() 67 q31_t sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() 68 q31_t sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() 69 q31_t sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); in arm_nn_mat_mult_kernel_q7_q15_reordered() 112 *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered() 113 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered() 114 *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered() 115 *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_nn_mat_mult_kernel_q7_q15_reordered() 131 (void)out_shift; in arm_nn_mat_mult_kernel_q7_q15_reordered()
|
| D | arm_convolve_HWC_q15_fast.c | 89 const uint16_t out_shift, in arm_convolve_HWC_q15_fast() argument 157 q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() 158 q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() 159 q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() 160 q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() 192 *pOut++ = (q15_t)__SSAT(sum >> out_shift, 16); in arm_convolve_HWC_q15_fast() 193 *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); in arm_convolve_HWC_q15_fast() 194 *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); in arm_convolve_HWC_q15_fast() 195 *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); in arm_convolve_HWC_q15_fast() 227 conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast() [all …]
|
| D | arm_convolve_HWC_q15_fast_nonsquare.c | 98 const uint16_t out_shift, in arm_convolve_HWC_q15_fast_nonsquare() argument 169 q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() 170 q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() 171 q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() 172 q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() 204 *pOut++ = (q15_t)__SSAT(sum >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare() 205 *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare() 206 *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare() 207 *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); in arm_convolve_HWC_q15_fast_nonsquare() 239 conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q15_fast_nonsquare() [all …]
|
| D | arm_depthwise_separable_conv_HWC_q7.c | 92 const uint16_t out_shift, in arm_depthwise_separable_conv_HWC_q7() argument 148 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() 149 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() 150 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() 151 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() 338 *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7() 339 *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7() 340 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7() 341 *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7() 351 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7() [all …]
|
| D | arm_depthwise_separable_conv_HWC_q7_nonsquare.c | 88 const uint16_t out_shift, in arm_depthwise_separable_conv_HWC_q7_nonsquare() argument 158 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 159 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 160 q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 161 q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 342 *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 343 *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 344 *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 345 *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); in arm_depthwise_separable_conv_HWC_q7_nonsquare() 355 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); in arm_depthwise_separable_conv_HWC_q7_nonsquare() [all …]
|
| D | arm_nn_mat_mult_kernel_s8_s16_reordered.c | 46 const int32_t *out_shift, in arm_nn_mat_mult_kernel_s8_s16_reordered() argument 105 ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16_reordered() 111 ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16_reordered() 117 out_shift++; in arm_nn_mat_mult_kernel_s8_s16_reordered() 119 ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16_reordered() 125 ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16_reordered() 131 out_shift++; in arm_nn_mat_mult_kernel_s8_s16_reordered() 169 ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16_reordered() 175 ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16_reordered() 190 (void)out_shift; in arm_nn_mat_mult_kernel_s8_s16_reordered()
|
| D | arm_nn_mat_mult_kernel_s8_s16.c | 43 const int32_t *out_shift, in arm_nn_mat_mult_kernel_s8_s16() argument 149 int32x4_t shift = vldrwq_s32(out_shift); in arm_nn_mat_mult_kernel_s8_s16() 151 out_shift += ROW_PER_LOOP; in arm_nn_mat_mult_kernel_s8_s16() 199 shift_tail[i_ch] = out_shift[i_ch]; in arm_nn_mat_mult_kernel_s8_s16() 283 ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16() 289 ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16() 295 out_shift++; in arm_nn_mat_mult_kernel_s8_s16() 297 ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16() 303 ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); in arm_nn_mat_mult_kernel_s8_s16() 309 out_shift++; in arm_nn_mat_mult_kernel_s8_s16() [all …]
|
| D | arm_convolve_HWC_q7_fast.c | 101 const uint16_t out_shift, in arm_convolve_HWC_q7_fast() argument 161 … wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast() 197 … wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast() 219 … wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast() 250 … wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast() 283 … wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast() 298 q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_fast() 326 *pOut = (q7_t)__SSAT((sum >> out_shift), 8); in arm_convolve_HWC_q7_fast() 349 conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_fast() 367 … Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); in arm_convolve_HWC_q7_fast()
|
| D | arm_convolve_HWC_q7_fast_nonsquare.c | 88 const uint16_t out_shift, in arm_convolve_HWC_q7_fast_nonsquare() argument 151 …wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast_nonsquare() 189 …wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast_nonsquare() 212 …wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast_nonsquare() 245 …wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast_nonsquare() 280 …wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_fast_nonsquare() 294 q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_fast_nonsquare() 322 *pOut = (q7_t)__SSAT((sum >> out_shift), 8); in arm_convolve_HWC_q7_fast_nonsquare() 346 conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_fast_nonsquare() 365 … Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); in arm_convolve_HWC_q7_fast_nonsquare()
|
| D | arm_convolve_1x1_HWC_q7_fast_nonsquare.c | 95 const uint16_t out_shift, in arm_convolve_1x1_HWC_q7_fast_nonsquare() argument 136 wt, bufferA, ch_im_out, ch_im_in, bias_shift, out_shift, bias, pOut); in arm_convolve_1x1_HWC_q7_fast_nonsquare() 149 q31_t sum = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift); in arm_convolve_1x1_HWC_q7_fast_nonsquare() 178 *pOut = (q7_t)__SSAT((sum >> out_shift), 8); in arm_convolve_1x1_HWC_q7_fast_nonsquare() 203 conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); in arm_convolve_1x1_HWC_q7_fast_nonsquare() 222 … Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); in arm_convolve_1x1_HWC_q7_fast_nonsquare()
|
| D | arm_convolve_HWC_q7_basic.c | 84 const uint16_t out_shift, in arm_convolve_HWC_q7_basic() argument 132 … wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_basic() 149 q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_basic() 180 *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); in arm_convolve_HWC_q7_basic() 196 conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_basic() 214 … Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); in arm_convolve_HWC_q7_basic()
|
| D | arm_convolve_HWC_q7_basic_nonsquare.c | 82 const uint16_t out_shift, in arm_convolve_HWC_q7_basic_nonsquare() argument 133 …wt, bufferA, ch_im_out, ch_im_in * dim_kernel_y * dim_kernel_x, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_basic_nonsquare() 150 q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_basic_nonsquare() 181 *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); in arm_convolve_HWC_q7_basic_nonsquare() 197 conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_basic_nonsquare() 216 … Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); in arm_convolve_HWC_q7_basic_nonsquare()
|
| D | arm_convolve_HWC_q7_RGB.c | 90 const uint16_t out_shift, in arm_convolve_HWC_q7_RGB() argument 182 … wt, bufferA, ch_im_out, 3 * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); in arm_convolve_HWC_q7_RGB() 198 q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_RGB() 226 *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); in arm_convolve_HWC_q7_RGB() 248 conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift); in arm_convolve_HWC_q7_RGB() 266 … Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); in arm_convolve_HWC_q7_RGB()
|
| /hal_nxp-latest/mcux/mcux-sdk/CMSIS/NN/Source/NNSupportFunctions/ |
| D | arm_nn_mult_q7.c | 56 void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockS… in arm_nn_mult_q7() argument 74 …t1 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); in arm_nn_mult_q7() 75 …t2 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); in arm_nn_mult_q7() 76 …t3 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); in arm_nn_mult_q7() 77 …t4 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); in arm_nn_mult_q7() 103 …++ = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); in arm_nn_mult_q7()
|
| D | arm_nn_mult_q15.c | 56 void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t bl… in arm_nn_mult_q15() argument 90 out1 = (q15_t)__SSAT((q31_t)(mul1 + NN_ROUND(out_shift)) >> out_shift, 16); in arm_nn_mult_q15() 91 out2 = (q15_t)__SSAT((q31_t)(mul2 + NN_ROUND(out_shift)) >> out_shift, 16); in arm_nn_mult_q15() 92 out3 = (q15_t)__SSAT((q31_t)(mul3 + NN_ROUND(out_shift)) >> out_shift, 16); in arm_nn_mult_q15() 93 out4 = (q15_t)__SSAT((q31_t)(mul4 + NN_ROUND(out_shift)) >> out_shift, 16); in arm_nn_mult_q15() 129 … = (q15_t)__SSAT(((q31_t)((q31_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 16); in arm_nn_mult_q15()
|
| /hal_nxp-latest/mcux/mcux-sdk/CMSIS/NN/Include/ |
| D | arm_nnfunctions.h | 305 const uint16_t out_shift, 349 const uint16_t out_shift, 386 const uint16_t out_shift, 427 const uint16_t out_shift, 478 const uint16_t out_shift, 534 const uint16_t out_shift, 687 const uint16_t out_shift, 729 const uint16_t out_shift, 790 const uint16_t out_shift, 833 const uint16_t out_shift, [all …]
|
| /hal_nxp-latest/mcux/mcux-sdk/CMSIS/NN/Source/BasicMathFunctions/ |
| D | arm_elementwise_mul_s8.c | 57 const int32_t out_shift, in arm_elementwise_mul_s8() argument 81 res_0 = arm_requantize_mve_32x4(res_0, vdupq_n_s32(out_mult), vdupq_n_s32(out_shift)); in arm_elementwise_mul_s8() 130 mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; in arm_elementwise_mul_s8() 141 mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; in arm_elementwise_mul_s8() 151 mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; in arm_elementwise_mul_s8() 161 mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; in arm_elementwise_mul_s8() 184 mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; in arm_elementwise_mul_s8()
|
| D | arm_elementwise_add_s8.c | 82 const int32_t out_shift, in arm_elementwise_add_s8() argument 110 SAT_INPUT_VECT(vect_1, out_mult, out_shift); in arm_elementwise_add_s8() 164 SAT_INPUT(sum, out_mult, out_shift); in arm_elementwise_add_s8() 178 SAT_INPUT(sum, out_mult, out_shift); in arm_elementwise_add_s8() 192 SAT_INPUT(sum, out_mult, out_shift); in arm_elementwise_add_s8() 206 SAT_INPUT(sum, out_mult, out_shift); in arm_elementwise_add_s8() 236 SAT_INPUT(sum, out_mult, out_shift); in arm_elementwise_add_s8()
|