1 /* 2 * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com> 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 * 6 * Licensed under the Apache License, Version 2.0 (the License); you may 7 * not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 /* ---------------------------------------------------------------------- 20 * Project: CMSIS NN Library 21 * Title: arm_nnfunctions.h 22 * Description: Public header file for CMSIS NN Library 23 * 24 * $Date: 23 April 2024 25 * $Revision: V.16.0.0 26 * 27 * Target : Arm(R) M-Profile Architecture 28 * -------------------------------------------------------------------- */ 29 30 /** 31 * @defgroup Public Public 32 * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support 33 * TensorFlow Lite framework. 34 */ 35 36 #ifndef ARM_NNFUNCTIONS_H 37 #define ARM_NNFUNCTIONS_H 38 39 #include "arm_nn_math_types.h" 40 #include "arm_nn_types.h" 41 42 #define USE_INTRINSIC 43 44 #ifdef __cplusplus 45 extern "C" { 46 #endif 47 48 /** 49 * @defgroup NNConv Convolution Functions 50 * 51 * Collection of convolution, depthwise convolution functions and their variants. 52 * 53 * The convolution is implemented in 2 steps: im2col and General Matrix Multiplication(GEMM) 54 * 55 * im2col is a process of converting each patch of image data into 56 * a column. After im2col, the convolution is computed as matrix-matrix 57 * multiplication. 58 * 59 * To reduce the memory footprint, the im2col is performed partially. 60 * Each iteration, only a few column (i.e., patches) are generated followed 61 * by GEMM. 62 * 63 */ 64 65 /** 66 * @brief s4 convolution layer wrapper function with the main purpose to call the optimal kernel available in 67 * cmsis-nn to perform the convolution. 68 * 69 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 70 * arm_convolve_wrapper_s4_get_buffer_size will return the buffer_size if required. 71 * The caller is expected to clear the buffer ,if applicable, for security reasons. 72 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 73 * Range of conv_params->input_offset : [-127, 128] 74 * Range of conv_params->output_offset : [-128, 127] 75 * @param[in] quant_params Per-channel quantization info. 76 * It contains the multiplier and shift values to be applied to each output channel 77 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 78 * @param[in] input_data Input (activation) data pointer. Data type: int8 79 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the 80 * spatial filter dimensions 81 * @param[in] filter_data Filter data pointer. Data type: int8 packed with 2x int4 82 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 83 * @param[in] bias_data Bias data pointer. Data type: int32 84 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 85 * @param[out] output_data Output data pointer. Data type: int8 86 * 87 * @return The function returns either 88 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 89 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 90 * 91 */ 92 arm_cmsis_nn_status arm_convolve_wrapper_s4(const cmsis_nn_context *ctx, 93 const cmsis_nn_conv_params *conv_params, 94 const cmsis_nn_per_channel_quant_params *quant_params, 95 const cmsis_nn_dims *input_dims, 96 const int8_t *input_data, 97 const cmsis_nn_dims *filter_dims, 98 const int8_t *filter_data, 99 const cmsis_nn_dims *bias_dims, 100 const int32_t *bias_data, 101 const cmsis_nn_dims *output_dims, 102 int8_t *output_data); 103 104 /** 105 * @brief Get the required buffer size for arm_convolve_wrapper_s4 106 * 107 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 108 * Range of conv_params->input_offset : [-127, 128] 109 * Range of conv_params->output_offset : [-128, 127] 110 * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN] 111 * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial 112 * filter dimensions 113 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 114 * 115 * @return The function returns required buffer size(bytes) 116 * 117 */ 118 int32_t arm_convolve_wrapper_s4_get_buffer_size(const cmsis_nn_conv_params *conv_params, 119 const cmsis_nn_dims *input_dims, 120 const cmsis_nn_dims *filter_dims, 121 const cmsis_nn_dims *output_dims); 122 123 /** 124 * @brief Get the required buffer size for arm_convolve_wrapper_s4 for Arm(R) Helium Architecture case. 125 * Refer to arm_convolve_wrapper_s4_get_buffer_size() for function argument details. 126 * 127 * @note Intended for compilation on Host. If compiling for an Arm target, use 128 * arm_convolve_wrapper_s4_get_buffer_size(). Currently this operator does not have an 129 * mve implementation, so dsp will be used. 130 * 131 */ 132 int32_t arm_convolve_wrapper_s4_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params, 133 const cmsis_nn_dims *input_dims, 134 const cmsis_nn_dims *filter_dims, 135 const cmsis_nn_dims *output_dims); 136 137 /** 138 * @brief Get the required buffer size for arm_convolve_wrapper_s4 for processors with DSP extension. 139 * Refer to arm_convolve_wrapper_s4_get_buffer_size() for function argument details. 140 * 141 * @note Intended for compilation on Host. If compiling for an Arm target, use 142 * arm_convolve_wrapper_s4_get_buffer_size(). 143 * 144 */ 145 int32_t arm_convolve_wrapper_s4_get_buffer_size_dsp(const cmsis_nn_conv_params *conv_params, 146 const cmsis_nn_dims *input_dims, 147 const cmsis_nn_dims *filter_dims, 148 const cmsis_nn_dims *output_dims); 149 150 /** 151 * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in 152 * cmsis-nn to perform the convolution. 153 * 154 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 155 * arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required. 156 * The caller is expected to clear the buffer, if applicable, for security reasons. 157 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 158 * Range of conv_params->input_offset : [-127, 128] 159 * Range of conv_params->output_offset : [-128, 127] 160 * @param[in] quant_params Per-channel quantization info. 161 * It contains the multiplier and shift values to be applied to each output channel 162 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 163 * @param[in] input_data Input (activation) data pointer. Data type: int8 164 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the 165 * spatial filter dimensions 166 * @param[in] filter_data Filter data pointer. Data type: int8 167 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 168 * @param[in] bias_data Bias data pointer. Data type: int32 169 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 170 * @param[out] output_data Output data pointer. Data type: int8 171 * 172 * @return The function returns either 173 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 174 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 175 * 176 */ 177 arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx, 178 const cmsis_nn_conv_params *conv_params, 179 const cmsis_nn_per_channel_quant_params *quant_params, 180 const cmsis_nn_dims *input_dims, 181 const int8_t *input_data, 182 const cmsis_nn_dims *filter_dims, 183 const int8_t *filter_data, 184 const cmsis_nn_dims *bias_dims, 185 const int32_t *bias_data, 186 const cmsis_nn_dims *output_dims, 187 int8_t *output_data); 188 189 /** 190 * @brief Get the required buffer size for arm_convolve_wrapper_s8 191 * 192 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 193 * Range of conv_params->input_offset : [-127, 128] 194 * Range of conv_params->output_offset : [-128, 127] 195 * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN] 196 * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial 197 * filter dimensions 198 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 199 * 200 * @return The function returns required buffer size(bytes) 201 * 202 */ 203 int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params, 204 const cmsis_nn_dims *input_dims, 205 const cmsis_nn_dims *filter_dims, 206 const cmsis_nn_dims *output_dims); 207 208 /** 209 * @brief Get the required buffer size for arm_convolve_wrapper_s8 for Arm(R) Helium Architecture case. 210 * Refer to arm_convolve_wrapper_s8_get_buffer_size() for function argument details. 211 * 212 * @note Intended for compilation on Host. If compiling for an Arm target, use 213 * arm_convolve_wrapper_s8_get_buffer_size(). 214 * 215 */ 216 int32_t arm_convolve_wrapper_s8_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params, 217 const cmsis_nn_dims *input_dims, 218 const cmsis_nn_dims *filter_dims, 219 const cmsis_nn_dims *output_dims); 220 221 /** 222 * @brief Get the required buffer size for arm_convolve_wrapper_s8 for processors with DSP extension. 223 * Refer to arm_convolve_wrapper_s8_get_buffer_size() for function argument details. 224 * 225 * @note Intended for compilation on Host. If compiling for an Arm target, use 226 * arm_convolve_wrapper_s8_get_buffer_size(). 227 * 228 */ 229 int32_t arm_convolve_wrapper_s8_get_buffer_size_dsp(const cmsis_nn_conv_params *conv_params, 230 const cmsis_nn_dims *input_dims, 231 const cmsis_nn_dims *filter_dims, 232 const cmsis_nn_dims *output_dims); 233 234 /** 235 * @brief s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in 236 * cmsis-nn to perform the convolution. 237 * 238 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 239 * arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required 240 * The caller is expected to clear the buffer, if applicable, for security reasons. 241 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 242 * conv_params->input_offset : Not used 243 * conv_params->output_offset : Not used 244 * @param[in] quant_params Per-channel quantization info. 245 * It contains the multiplier and shift values to be applied to each output channel 246 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 247 * @param[in] input_data Input (activation) data pointer. Data type: int16 248 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the 249 * spatial filter dimensions 250 * @param[in] filter_data Filter data pointer. Data type: int8 251 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 252 * @param[in] bias_data Struct with optional bias data pointer. Bias data type can be int64 or int32 depending 253 * flag in struct. 254 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 255 * @param[out] output_data Output data pointer. Data type: int16 256 * 257 * @return The function returns either 258 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 259 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 260 * 261 */ 262 arm_cmsis_nn_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx, 263 const cmsis_nn_conv_params *conv_params, 264 const cmsis_nn_per_channel_quant_params *quant_params, 265 const cmsis_nn_dims *input_dims, 266 const int16_t *input_data, 267 const cmsis_nn_dims *filter_dims, 268 const int8_t *filter_data, 269 const cmsis_nn_dims *bias_dims, 270 const cmsis_nn_bias_data *bias_data, 271 const cmsis_nn_dims *output_dims, 272 int16_t *output_data); 273 274 /** 275 * @brief Get the required buffer size for arm_convolve_wrapper_s16. 276 * 277 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 278 * conv_params->input_offset : Not used 279 * conv_params->output_offset : Not used 280 * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN] 281 * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial 282 * filter dimensions 283 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 284 * 285 * @return The function returns required buffer size(bytes) 286 * 287 */ 288 int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params, 289 const cmsis_nn_dims *input_dims, 290 const cmsis_nn_dims *filter_dims, 291 const cmsis_nn_dims *output_dims); 292 293 /** 294 * @brief Get the required buffer size for arm_convolve_wrapper_s16 for for processors with DSP extension. 295 * Refer to arm_convolve_wrapper_s16_get_buffer_size() for function argument details. 296 * 297 * @note Intended for compilation on Host. If compiling for an Arm target, use 298 * arm_convolve_wrapper_s16_get_buffer_size(). 299 * 300 */ 301 int32_t arm_convolve_wrapper_s16_get_buffer_size_dsp(const cmsis_nn_conv_params *conv_params, 302 const cmsis_nn_dims *input_dims, 303 const cmsis_nn_dims *filter_dims, 304 const cmsis_nn_dims *output_dims); 305 306 /** 307 * @brief Get the required buffer size for arm_convolve_wrapper_s16 for Arm(R) Helium Architecture case. 308 * Refer to arm_convolve_wrapper_s16_get_buffer_size() for function argument details. 309 * 310 * @note Intended for compilation on Host. If compiling for an Arm target, use 311 * arm_convolve_wrapper_s16_get_buffer_size(). 312 * 313 */ 314 int32_t arm_convolve_wrapper_s16_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params, 315 const cmsis_nn_dims *input_dims, 316 const cmsis_nn_dims *filter_dims, 317 const cmsis_nn_dims *output_dims); 318 319 /** 320 * @brief Basic s4 convolution function 321 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 322 * arm_convolve_s4_get_buffer_size will return the buffer_size if required. 323 * The caller is expected to clear the buffer ,if applicable, for security reasons. 324 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 325 * Range of conv_params->input_offset : [-127, 128] 326 * Range of conv_params->output_offset : [-128, 127] 327 * @param[in] quant_params Per-channel quantization info. 328 * It contains the multiplier and shift values to be applied to each output channel 329 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 330 * @param[in] input_data Input (activation) data pointer. Data type: int8 331 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the 332 * spatial filter dimensions 333 * @param[in] filter_data Packed Filter data pointer. Data type: int8 packed with 2x int4 334 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 335 * @param[in] bias_data Optional bias data pointer. Data type: int32 336 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 337 * @param[out] output_data Output data pointer. Data type: int8 338 339 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 340 * 341 * @details 342 * 1. Supported framework: TensorFlow Lite micro 343 * 2. Additional memory is required for optimization. Refer to argument 'ctx' for details. 344 * 345 */ 346 arm_cmsis_nn_status arm_convolve_s4(const cmsis_nn_context *ctx, 347 const cmsis_nn_conv_params *conv_params, 348 const cmsis_nn_per_channel_quant_params *quant_params, 349 const cmsis_nn_dims *input_dims, 350 const int8_t *input_data, 351 const cmsis_nn_dims *filter_dims, 352 const int8_t *filter_data, 353 const cmsis_nn_dims *bias_dims, 354 const int32_t *bias_data, 355 const cmsis_nn_dims *output_dims, 356 int8_t *output_data); 357 /** 358 * @brief Basic s8 convolution function 359 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 360 * arm_convolve_s8_get_buffer_size will return the buffer_size if required. 361 * The caller is expected to clear the buffer, if applicable, for security reasons. 362 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 363 * Range of conv_params->input_offset : [-127, 128] 364 * Range of conv_params->output_offset : [-128, 127] 365 * @param[in] quant_params Per-channel quantization info. 366 * It contains the multiplier and shift values to be applied to each output channel 367 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 368 * @param[in] input_data Input (activation) data pointer. Data type: int8 369 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, CK] where HK, WK and CK are the 370 * spatial filter dimensions. CK != C_IN is used for grouped convolution, in which 371 * case the required conditions are C_IN = N * CK and C_OUT = N * M for N groups of 372 * size M. 373 * @param[in] filter_data Filter data pointer. Data type: int8 374 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 375 * @param[in] bias_data Optional bias data pointer. Data type: int32 376 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 377 * @param[out] output_data Output data pointer. Data type: int8 378 * 379 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> if successful or 380 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if incorrect arguments or 381 * <code>ARM_CMSIS_NN_NO_IMPL_ERROR</code> 382 * 383 * @details 384 * 1. Supported framework: TensorFlow Lite micro 385 * 2. Additional memory is required for optimization. Refer to argument 'ctx' for details. 386 * 387 */ 388 arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx, 389 const cmsis_nn_conv_params *conv_params, 390 const cmsis_nn_per_channel_quant_params *quant_params, 391 const cmsis_nn_dims *input_dims, 392 const int8_t *input_data, 393 const cmsis_nn_dims *filter_dims, 394 const int8_t *filter_data, 395 const cmsis_nn_dims *bias_dims, 396 const int32_t *bias_data, 397 const cmsis_nn_dims *output_dims, 398 int8_t *output_data); 399 400 /** 401 * @brief Get the required buffer size for s4 convolution function 402 * 403 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 404 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK 405 * are the spatial filter dimensions 406 * @return The function returns required buffer size(bytes) 407 * 408 */ 409 int32_t arm_convolve_s4_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); 410 411 /** 412 * @brief Get the required buffer size for s8 convolution function 413 * 414 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 415 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK 416 * are the spatial filter dimensions 417 * @return The function returns required buffer size(bytes) 418 * 419 */ 420 int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); 421 422 /** 423 * @brief Basic s8 transpose convolution function 424 * @param[in, out] ctx Function context that contains the additional buffer if required by the 425 * function. 426 * arm_transpose_conv_s8_get_buffer_size will return the buffer_size if required. 427 * The caller is expected to clear the buffer, if applicable, for security 428 reasons. 429 * @param[in, out] output_ctx Temporary scratch buffer. 430 * The size required size is: output width * output height * output channel * 4 431 * The caller is expected to clear the buffer, if applicable, for security 432 * reasons. 433 * @param[in] transpose_conv_params Convolution parameters (e.g. strides, dilations, pads,...). 434 * Range of transpose_conv_params->input_offset : [-127, 128] 435 * Range of transpose_conv_params->output_offset : [-128, 127] 436 * @param[in] quant_params Per-channel quantization info. 437 * It contains the multiplier and shift values to be applied to each out channel. 438 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 439 * @param[in] input_data Input (activation) data pointer. Data type: int8 440 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the 441 * spatial filter dimensions 442 * @param[in] filter_data Filter data pointer. Data type: int8 443 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 444 * @param[in] bias_data Optional bias data pointer. Data type: int32 445 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 446 * @param[out] output_data Output data pointer. Data type: int8 447 448 * @return The function returns either 449 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 450 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 451 * 452 * @details 453 * 1. Supported framework: TensorFlow Lite micro 454 * 2. Additional memory is required for optimization. Refer to arguments 'ctx' and 'output_ctx' for details. 455 * 456 */ 457 arm_cmsis_nn_status arm_transpose_conv_s8(const cmsis_nn_context *ctx, 458 const cmsis_nn_context *output_ctx, 459 const cmsis_nn_transpose_conv_params *transpose_conv_params, 460 const cmsis_nn_per_channel_quant_params *quant_params, 461 const cmsis_nn_dims *input_dims, 462 const int8_t *input_data, 463 const cmsis_nn_dims *filter_dims, 464 const int8_t *filter_data, 465 const cmsis_nn_dims *bias_dims, 466 const int32_t *bias_data, 467 const cmsis_nn_dims *output_dims, 468 int8_t *output_data); 469 470 /** 471 * @brief Get the required buffer size for s8 transpose conv function 472 * 473 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 474 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK 475 * are the spatial filter dimensions 476 * @param[in] out_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 477 * @return The function returns required buffer size(bytes) 478 * 479 */ 480 int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_dims *input_dims, 481 const cmsis_nn_dims *filter_dims, 482 const cmsis_nn_dims *out_dims); 483 484 /** 485 * @brief Get size of additional buffer required by arm_transpose_conv_s8() for processors with DSP extension. 486 * Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details. 487 * 488 * @note Intended for compilation on Host. If compiling for an Arm target, use 489 * arm_transpose_conv_s8_get_buffer_size(). 490 * 491 */ 492 int32_t arm_transpose_conv_s8_get_buffer_size_dsp(const cmsis_nn_dims *input_dims, 493 const cmsis_nn_dims *filter_dims, 494 const cmsis_nn_dims *out_dims); 495 496 /** 497 * @brief Get size of additional buffer required by arm_transpose_conv_s8() for Arm(R) Helium Architecture case. 498 * Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details. 499 * 500 * @note Intended for compilation on Host. If compiling for an Arm target, use 501 * arm_transpose_conv_s8_get_buffer_size(). 502 * 503 */ 504 int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims, 505 const cmsis_nn_dims *filter_dims, 506 const cmsis_nn_dims *out_dims); 507 508 /** 509 * @brief Basic s16 convolution function 510 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 511 * arm_convolve_s16_get_buffer_size will return the buffer_size if required. 512 * The caller is expected to clear the buffer, if applicable, for security reasons. 513 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 514 * conv_params->input_offset : Not used 515 * conv_params->output_offset : Not used 516 * @param[in] quant_params Per-channel quantization info. 517 * It contains the multiplier and shift values to be applied to each output channel 518 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 519 * @param[in] input_data Input (activation) data pointer. Data type: int16 520 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the 521 * spatial filter dimensions 522 * @param[in] filter_data Filter data pointer. Data type: int8 523 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 524 * @param[in] bias_data Struct with optional bias data pointer. Bias data type can be int64 or int32 depending 525 * flag in struct. 526 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 527 * @param[out] output_data Output data pointer. Data type: int16 528 * 529 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> if successful or 530 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if incorrect arguments or 531 * <code>ARM_CMSIS_NN_NO_IMPL_ERROR</code> 532 * 533 * @details 534 * 1. Supported framework: TensorFlow Lite micro 535 * 2. Additional memory is required for optimization. Refer to argument 'ctx' for details. 536 * 537 */ 538 arm_cmsis_nn_status arm_convolve_s16(const cmsis_nn_context *ctx, 539 const cmsis_nn_conv_params *conv_params, 540 const cmsis_nn_per_channel_quant_params *quant_params, 541 const cmsis_nn_dims *input_dims, 542 const int16_t *input_data, 543 const cmsis_nn_dims *filter_dims, 544 const int8_t *filter_data, 545 const cmsis_nn_dims *bias_dims, 546 const cmsis_nn_bias_data *bias_data, 547 const cmsis_nn_dims *output_dims, 548 int16_t *output_data); 549 550 /** 551 * @brief Get the required buffer size for s16 convolution function 552 * 553 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 554 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK 555 * are the spatial filter dimensions 556 * @return The function returns required buffer size(bytes) 557 * 558 */ 559 int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); 560 561 /** 562 * @brief Fast s4 version for 1x1 convolution (non-square shape) 563 * 564 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 565 * arm_convolve_1x1_s4_fast_get_buffer_size will return the buffer_size if required. 566 * The caller is expected to clear the buffer ,if applicable, for security reasons. 567 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 568 * Range of conv_params->input_offset : [-127, 128] 569 * Range of conv_params->output_offset : [-128, 127] 570 * @param[in] quant_params Per-channel quantization info. 571 * It contains the multiplier and shift values to be applied to each output channel 572 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 573 * @param[in] input_data Input (activation) data pointer. Data type: int8 574 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 575 * @param[in] filter_data Filter data pointer. Data type: int8 packed with 2x int4 576 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 577 * @param[in] bias_data Optional bias data pointer. Data type: int32 578 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 579 * @param[out] output_data Output data pointer. Data type: int8 580 * 581 * @return The function returns either 582 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 583 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 584 * 585 * @details 586 * - Supported framework : TensorFlow Lite Micro 587 * - The following constrains on the arguments apply 588 * -# conv_params->padding.w = conv_params->padding.h = 0 589 * -# conv_params->stride.w = conv_params->stride.h = 1 590 * 591 */ 592 arm_cmsis_nn_status arm_convolve_1x1_s4_fast(const cmsis_nn_context *ctx, 593 const cmsis_nn_conv_params *conv_params, 594 const cmsis_nn_per_channel_quant_params *quant_params, 595 const cmsis_nn_dims *input_dims, 596 const int8_t *input_data, 597 const cmsis_nn_dims *filter_dims, 598 const int8_t *filter_data, 599 const cmsis_nn_dims *bias_dims, 600 const int32_t *bias_data, 601 const cmsis_nn_dims *output_dims, 602 int8_t *output_data); 603 604 /** 605 * @brief s4 version for 1x1 convolution with support for non-unity stride values 606 * 607 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 608 * None is required by this function. 609 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 610 * Range of conv_params->input_offset : [-127, 128] 611 * Range of conv_params->output_offset : [-128, 127] 612 * @param[in] quant_params Per-channel quantization info. 613 * It contains the multiplier and shift values to be applied to each output channel 614 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 615 * @param[in] input_data Input (activation) data pointer. Data type: int8 616 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 617 * @param[in] filter_data Filter data pointer. Data type: int8 packed with 2x int4 618 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 619 * @param[in] bias_data Optional bias data pointer. Data type: int32 620 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 621 * @param[out] output_data Output data pointer. Data type: int8 622 * 623 * @return The function returns either 624 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 625 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 626 * @details 627 * - Supported framework : TensorFlow Lite Micro 628 * - The following constrains on the arguments apply 629 * -# conv_params->padding.w = conv_params->padding.h = 0 630 * 631 */ 632 arm_cmsis_nn_status arm_convolve_1x1_s4(const cmsis_nn_context *ctx, 633 const cmsis_nn_conv_params *conv_params, 634 const cmsis_nn_per_channel_quant_params *quant_params, 635 const cmsis_nn_dims *input_dims, 636 const int8_t *input_data, 637 const cmsis_nn_dims *filter_dims, 638 const int8_t *filter_data, 639 const cmsis_nn_dims *bias_dims, 640 const int32_t *bias_data, 641 const cmsis_nn_dims *output_dims, 642 int8_t *output_data); 643 644 /** 645 * @brief Fast s8 version for 1x1 convolution (non-square shape) 646 * 647 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 648 * arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required. 649 * The caller is expected to clear the buffer, if applicable, for security reasons. 650 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 651 * Range of conv_params->input_offset : [-127, 128] 652 * Range of conv_params->output_offset : [-128, 127] 653 * @param[in] quant_params Per-channel quantization info. 654 * It contains the multiplier and shift values to be applied to each output channel 655 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 656 * @param[in] input_data Input (activation) data pointer. Data type: int8 657 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 658 * @param[in] filter_data Filter data pointer. Data type: int8 659 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 660 * @param[in] bias_data Optional bias data pointer. Data type: int32 661 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 662 * @param[out] output_data Output data pointer. Data type: int8 663 * 664 * @return The function returns either 665 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 666 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 667 * 668 * @details 669 * - Supported framework : TensorFlow Lite Micro 670 * - The following constrains on the arguments apply 671 * -# conv_params->padding.w = conv_params->padding.h = 0 672 * -# conv_params->stride.w = conv_params->stride.h = 1 673 * 674 */ 675 arm_cmsis_nn_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx, 676 const cmsis_nn_conv_params *conv_params, 677 const cmsis_nn_per_channel_quant_params *quant_params, 678 const cmsis_nn_dims *input_dims, 679 const int8_t *input_data, 680 const cmsis_nn_dims *filter_dims, 681 const int8_t *filter_data, 682 const cmsis_nn_dims *bias_dims, 683 const int32_t *bias_data, 684 const cmsis_nn_dims *output_dims, 685 int8_t *output_data); 686 687 /** 688 * @brief Get the required buffer size for arm_convolve_1x1_s4_fast 689 * 690 * @param[in] input_dims Input (activation) dimensions 691 * @return The function returns the required buffer size in bytes 692 * 693 */ 694 int32_t arm_convolve_1x1_s4_fast_get_buffer_size(const cmsis_nn_dims *input_dims); 695 696 /** 697 * @brief Get the required buffer size for arm_convolve_1x1_s8_fast 698 * 699 * @param[in] input_dims Input (activation) dimensions 700 * @return The function returns the required buffer size in bytes 701 * 702 */ 703 int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims); 704 705 /** 706 * @brief s8 version for 1x1 convolution with support for non-unity stride values 707 * 708 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 709 * None is required by this function. 710 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 711 * Range of conv_params->input_offset : [-127, 128] 712 * Range of conv_params->output_offset : [-128, 127] 713 * @param[in] quant_params Per-channel quantization info. 714 * It contains the multiplier and shift values to be applied to each output channel 715 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 716 * @param[in] input_data Input (activation) data pointer. Data type: int8 717 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 718 * @param[in] filter_data Filter data pointer. Data type: int8 719 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 720 * @param[in] bias_data Optional bias data pointer. Data type: int32 721 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 722 * @param[out] output_data Output data pointer. Data type: int8 723 * 724 * @return The function returns either 725 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 726 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 727 * @details 728 * - Supported framework : TensorFlow Lite Micro 729 * - The following constrains on the arguments apply 730 * -# conv_params->padding.w = conv_params->padding.h = 0 731 * 732 */ 733 arm_cmsis_nn_status arm_convolve_1x1_s8(const cmsis_nn_context *ctx, 734 const cmsis_nn_conv_params *conv_params, 735 const cmsis_nn_per_channel_quant_params *quant_params, 736 const cmsis_nn_dims *input_dims, 737 const int8_t *input_data, 738 const cmsis_nn_dims *filter_dims, 739 const int8_t *filter_data, 740 const cmsis_nn_dims *bias_dims, 741 const int32_t *bias_data, 742 const cmsis_nn_dims *output_dims, 743 int8_t *output_data); 744 745 /** 746 * @brief 1xn convolution 747 * 748 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 749 * arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required 750 * The caller is expected to clear the buffer, if applicable, for security reasons. 751 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 752 * Range of conv_params->input_offset : [-127, 128] 753 * Range of conv_params->output_offset : [-128, 127] 754 * @param[in] quant_params Per-channel quantization info. 755 * It contains the multiplier and shift values to be applied to each output channel 756 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 757 * @param[in] input_data Input (activation) data pointer. Data type: int8 758 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal 759 * spatial filter dimension 760 * @param[in] filter_data Filter data pointer. Data type: int8 761 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 762 * @param[in] bias_data Optional bias data pointer. Data type: int32 763 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 764 * @param[out] output_data Output data pointer. Data type: int8 765 * 766 * @return The function returns either 767 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 768 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 769 * 770 * @details 771 * - Supported framework : TensorFlow Lite Micro 772 * - The following constrains on the arguments apply 773 * -# input_dims->n equals 1 774 * -# ouput_dims->w is a multiple of 4 775 * -# Explicit constraints(since it is for 1xN convolution) 776 * -## input_dims->h equals 1 777 * -## output_dims->h equals 1 778 * -## filter_dims->h equals 1 779 *@todo Remove constraint on output_dims->w to make the function generic. 780 * 781 */ 782 arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx, 783 const cmsis_nn_conv_params *conv_params, 784 const cmsis_nn_per_channel_quant_params *quant_params, 785 const cmsis_nn_dims *input_dims, 786 const int8_t *input_data, 787 const cmsis_nn_dims *filter_dims, 788 const int8_t *filter_data, 789 const cmsis_nn_dims *bias_dims, 790 const int32_t *bias_data, 791 const cmsis_nn_dims *output_dims, 792 int8_t *output_data); 793 794 /** 795 * @brief 1xn convolution for s4 weights 796 * 797 * @param[in, out] ctx Function context that contains the additional buffer if required by the function. 798 * arm_convolve_1_x_n_s4_get_buffer_size will return the buffer_size if required 799 * The caller is expected to clear the buffer, if applicable, for security reasons. 800 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 801 * Range of conv_params->input_offset : [-127, 128] 802 * Range of conv_params->output_offset : [-128, 127] 803 * @param[in] quant_params Per-channel quantization info. 804 * It contains the multiplier and shift values to be applied to each output channel 805 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 806 * @param[in] input_data Input (activation) data pointer. Data type: int8 807 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal 808 * spatial filter dimension 809 * @param[in] filter_data Filter data pointer. Data type: int8 as packed int4 810 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 811 * @param[in] bias_data Optional bias data pointer. Data type: int32 812 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 813 * @param[out] output_data Output data pointer. Data type: int8 814 * 815 * @return The function returns either 816 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 817 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 818 * 819 * @details 820 * - Supported framework : TensorFlow Lite Micro 821 * - The following constrains on the arguments apply 822 * -# stride.w * input_dims->c is a multiple of 4 823 * -# Explicit constraints(since it is for 1xN convolution) 824 * -## input_dims->h equals 1 825 * -## output_dims->h equals 1 826 * -## filter_dims->h equals 1 827 *@todo Remove constraint on output_dims->w to make the function generic. 828 * 829 */ 830 arm_cmsis_nn_status arm_convolve_1_x_n_s4(const cmsis_nn_context *ctx, 831 const cmsis_nn_conv_params *conv_params, 832 const cmsis_nn_per_channel_quant_params *quant_params, 833 const cmsis_nn_dims *input_dims, 834 const int8_t *input_data, 835 const cmsis_nn_dims *filter_dims, 836 const int8_t *filter_data, 837 const cmsis_nn_dims *bias_dims, 838 const int32_t *bias_data, 839 const cmsis_nn_dims *output_dims, 840 int8_t *output_data); 841 842 /** 843 * @brief Get the required additional buffer size for 1xn convolution 844 * 845 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 846 * Range of conv_params->input_offset : [-127, 128] 847 * Range of conv_params->output_offset : [-128, 127] 848 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 849 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the 850 * horizontal spatial filter dimension 851 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 852 * 853 * @return The function returns required buffer size(bytes) 854 * 855 */ 856 int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params, 857 const cmsis_nn_dims *input_dims, 858 const cmsis_nn_dims *filter_dims, 859 const cmsis_nn_dims *output_dims); 860 861 /** 862 * @brief Get the required additional buffer size for 1xn convolution 863 * 864 * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). 865 * Range of conv_params->input_offset : [-127, 128] 866 * Range of conv_params->output_offset : [-128, 127] 867 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 868 * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the 869 * horizontal spatial filter dimension 870 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 871 * 872 * @return The function returns required buffer size(bytes) 873 * 874 */ 875 int32_t arm_convolve_1_x_n_s4_get_buffer_size(const cmsis_nn_conv_params *conv_params, 876 const cmsis_nn_dims *input_dims, 877 const cmsis_nn_dims *filter_dims, 878 const cmsis_nn_dims *output_dims); 879 880 /** 881 * @brief Wrapper function to pick the right optimized s8 depthwise convolution function 882 * 883 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 884 * definition file to see if an additional buffer is required. 885 * Optional function {API}_get_buffer_size() provides the buffer 886 * size if required. 887 * The caller is expected to clear the buffer, if applicable, for security reasons. 888 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 889 * dw_conv_params->dilation is not used. 890 * Range of dw_conv_params->input_offset : [-127, 128] 891 * Range of dw_conv_params->output_offset : [-128, 127] 892 * @param[in] quant_params Per-channel quantization info. 893 * It contains the multiplier and shift values to be applied to each 894 * output channel 895 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 896 * Batch argument N is not used and assumed to be 1. 897 * @param[in] input_data Input (activation) data pointer. Data type: int8 898 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 899 * @param[in] filter_data Filter data pointer. Data type: int8 900 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 901 * @param[in] bias_data Bias data pointer. Data type: int32 902 * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] 903 * @param[in, out] output_data Output data pointer. Data type: int8 904 * @return The function returns 905 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful completion. 906 * 907 * @details 908 * - Supported framework: TensorFlow Lite 909 * - Picks one of the the following functions 910 * -# arm_depthwise_conv_s8() 911 * -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only 912 * -# arm_depthwise_conv_s8_opt() 913 * - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the 914 * boundary. 915 */ 916 arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx, 917 const cmsis_nn_dw_conv_params *dw_conv_params, 918 const cmsis_nn_per_channel_quant_params *quant_params, 919 const cmsis_nn_dims *input_dims, 920 const int8_t *input_data, 921 const cmsis_nn_dims *filter_dims, 922 const int8_t *filter_data, 923 const cmsis_nn_dims *bias_dims, 924 const int32_t *bias_data, 925 const cmsis_nn_dims *output_dims, 926 int8_t *output_data); 927 928 /** 929 * @brief Wrapper function to pick the right optimized s4 depthwise convolution function 930 * 931 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 932 * definition file to see if an additional buffer is required. 933 * Optional function {API}_get_buffer_size() provides the buffer 934 * size if required. 935 * The caller is expected to clear the buffer ,if applicable, for security reasons. 936 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 937 * dw_conv_params->dilation is not used. 938 * Range of dw_conv_params->input_offset : [-127, 128] 939 * Range of dw_conv_params->output_offset : [-128, 127] 940 * @param[in] quant_params Per-channel quantization info. 941 * It contains the multiplier and shift values to be applied to each 942 * output channel 943 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 944 * Batch argument N is not used and assumed to be 1. 945 * @param[in] input_data Input (activation) data pointer. Data type: int8 946 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 947 * @param[in] filter_data Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential 948 * weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43]. 949 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 950 * @param[in] bias_data Bias data pointer. Data type: int32 951 * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] 952 * @param[in, out] output_data Output data pointer. Data type: int8 953 * @return The function returns 954 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful completion. 955 * 956 * @details 957 * - Supported framework: TensorFlow Lite 958 */ 959 arm_cmsis_nn_status arm_depthwise_conv_wrapper_s4(const cmsis_nn_context *ctx, 960 const cmsis_nn_dw_conv_params *dw_conv_params, 961 const cmsis_nn_per_channel_quant_params *quant_params, 962 const cmsis_nn_dims *input_dims, 963 const int8_t *input_data, 964 const cmsis_nn_dims *filter_dims, 965 const int8_t *filter_data, 966 const cmsis_nn_dims *bias_dims, 967 const int32_t *bias_data, 968 const cmsis_nn_dims *output_dims, 969 int8_t *output_data); 970 971 /** 972 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() 973 * 974 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 975 * Range of dw_conv_params->input_offset : [-127, 128] 976 * Range of dw_conv_params->input_offset : [-128, 127] 977 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 978 * Batch argument N is not used and assumed to be 1. 979 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 980 * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] 981 * @return Size of additional memory required for optimizations in bytes. 982 * 983 */ 984 int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params, 985 const cmsis_nn_dims *input_dims, 986 const cmsis_nn_dims *filter_dims, 987 const cmsis_nn_dims *output_dims); 988 989 /** 990 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() for processors with DSP extension. 991 * Refer to arm_depthwise_conv_wrapper_s8_get_buffer_size() for function argument details. 992 * 993 * @note Intended for compilation on Host. If compiling for an Arm target, use 994 * arm_depthwise_conv_wrapper_s8_get_buffer_size(). 995 * 996 */ 997 int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size_dsp(const cmsis_nn_dw_conv_params *dw_conv_params, 998 const cmsis_nn_dims *input_dims, 999 const cmsis_nn_dims *filter_dims, 1000 const cmsis_nn_dims *output_dims); 1001 1002 /** 1003 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() for Arm(R) Helium Architecture case. 1004 * Refer to arm_depthwise_conv_wrapper_s8_get_buffer_size() for function argument details. 1005 * 1006 * @note Intended for compilation on Host. If compiling for an Arm target, use 1007 * arm_depthwise_conv_wrapper_s8_get_buffer_size(). 1008 * 1009 */ 1010 int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size_mve(const cmsis_nn_dw_conv_params *dw_conv_params, 1011 const cmsis_nn_dims *input_dims, 1012 const cmsis_nn_dims *filter_dims, 1013 const cmsis_nn_dims *output_dims); 1014 1015 /** 1016 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s4() 1017 * 1018 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 1019 * Range of dw_conv_params->input_offset : [-127, 128] 1020 * Range of dw_conv_params->input_offset : [-128, 127] 1021 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 1022 * Batch argument N is not used and assumed to be 1. 1023 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1024 * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] 1025 * @return Size of additional memory required for optimizations in bytes. 1026 * 1027 */ 1028 int32_t arm_depthwise_conv_wrapper_s4_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params, 1029 const cmsis_nn_dims *input_dims, 1030 const cmsis_nn_dims *filter_dims, 1031 const cmsis_nn_dims *output_dims); 1032 1033 /** 1034 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s4() for processors with DSP extension. 1035 * Refer to arm_depthwise_conv_wrapper_s4_get_buffer_size() for function argument details. 1036 * 1037 * @note Intended for compilation on Host. If compiling for an Arm target, use 1038 * arm_depthwise_conv_wrapper_s4_get_buffer_size(). 1039 * 1040 */ 1041 int32_t arm_depthwise_conv_wrapper_s4_get_buffer_size_dsp(const cmsis_nn_dw_conv_params *dw_conv_params, 1042 const cmsis_nn_dims *input_dims, 1043 const cmsis_nn_dims *filter_dims, 1044 const cmsis_nn_dims *output_dims); 1045 1046 /** 1047 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s4() for Arm(R) Helium Architecture case. 1048 * Refer to arm_depthwise_conv_wrapper_s4_get_buffer_size() for function argument details. 1049 * 1050 * @note Intended for compilation on Host. If compiling for an Arm target, use 1051 * arm_depthwise_conv_wrapper_s4_get_buffer_size(). 1052 * 1053 */ 1054 int32_t arm_depthwise_conv_wrapper_s4_get_buffer_size_mve(const cmsis_nn_dw_conv_params *dw_conv_params, 1055 const cmsis_nn_dims *input_dims, 1056 const cmsis_nn_dims *filter_dims, 1057 const cmsis_nn_dims *output_dims); 1058 1059 /** 1060 * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. 1061 * 1062 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1063 * definition file to see if an additional buffer is required. 1064 * Optional function {API}_get_buffer_size() provides the buffer 1065 * size if an additional buffer is required exists if additional memory is. 1066 * The caller is expected to clear the buffer, if applicable, for security reasons. 1067 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 1068 * dw_conv_params->dilation is not used. 1069 * Range of dw_conv_params->input_offset : [-127, 128] 1070 * Range of dw_conv_params->input_offset : [-128, 127] 1071 * @param[in] quant_params Per-channel quantization info. 1072 * It contains the multiplier and shift values to be applied to each 1073 * output channel 1074 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 1075 * Batch argument N is not used. 1076 * @param[in] input_data Input (activation) data pointer. Data type: int8 1077 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1078 * @param[in] filter_data Filter data pointer. Data type: int8 1079 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 1080 * @param[in] bias_data Bias data pointer. Data type: int32 1081 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 1082 * @param[in, out] output_data Output data pointer. Data type: int8 1083 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 1084 * 1085 * @details 1086 * - Supported framework: TensorFlow Lite 1087 */ 1088 arm_cmsis_nn_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx, 1089 const cmsis_nn_dw_conv_params *dw_conv_params, 1090 const cmsis_nn_per_channel_quant_params *quant_params, 1091 const cmsis_nn_dims *input_dims, 1092 const int8_t *input_data, 1093 const cmsis_nn_dims *filter_dims, 1094 const int8_t *filter_data, 1095 const cmsis_nn_dims *bias_dims, 1096 const int32_t *bias_data, 1097 const cmsis_nn_dims *output_dims, 1098 int8_t *output_data); 1099 1100 /** 1101 * @brief Basic s4 depthwise convolution function that doesn't have any constraints on the input dimensions. 1102 * 1103 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1104 * definition file to see if an additional buffer is required. 1105 * Optional function {API}_get_buffer_size() provides the buffer 1106 * size if an additional buffer is required exists if additional memory is. 1107 * The caller is expected to clear the buffer ,if applicable, for security reasons. 1108 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 1109 * dw_conv_params->dilation is not used. 1110 * Range of dw_conv_params->input_offset : [-127, 128] 1111 * Range of dw_conv_params->input_offset : [-128, 127] 1112 * @param[in] quant_params Per-channel quantization info. 1113 * It contains the multiplier and shift values to be applied to each 1114 * output channel 1115 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 1116 * Batch argument N is not used. 1117 * @param[in] input Input (activation) data pointer. Data type: int8 1118 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1119 * @param[in] kernel Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential 1120 * weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43]. 1121 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 1122 * @param[in] bias Bias data pointer. Data type: int32 1123 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 1124 * @param[in, out] output Output data pointer. Data type: int8 1125 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 1126 * 1127 * @details 1128 * - Supported framework: TensorFlow Lite 1129 */ 1130 arm_cmsis_nn_status arm_depthwise_conv_s4(const cmsis_nn_context *ctx, 1131 const cmsis_nn_dw_conv_params *dw_conv_params, 1132 const cmsis_nn_per_channel_quant_params *quant_params, 1133 const cmsis_nn_dims *input_dims, 1134 const int8_t *input, 1135 const cmsis_nn_dims *filter_dims, 1136 const int8_t *kernel, 1137 const cmsis_nn_dims *bias_dims, 1138 const int32_t *bias, 1139 const cmsis_nn_dims *output_dims, 1140 int8_t *output); 1141 1142 /** 1143 * @brief Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions. 1144 * 1145 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1146 * definition file to see if an additional buffer is required. 1147 * Optional function {API}_get_buffer_size() provides the buffer 1148 * size if an additional buffer is required. 1149 * exists if additional memory is. 1150 * The caller is expected to clear the buffer, if applicable, for security reasons. 1151 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 1152 * conv_params->input_offset : Not used 1153 * conv_params->output_offset : Not used 1154 * @param[in] quant_params Per-channel quantization info. 1155 * It contains the multiplier and shift values to be applied to each 1156 * output channel 1157 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 1158 * Batch argument N is not used. 1159 * @param[in] input_data Input (activation) data pointer. Data type: int8 1160 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1161 * @param[in] filter_data Filter data pointer. Data type: int8 1162 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 1163 * @param[in] bias_data Bias data pointer. Data type: int64 1164 * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] 1165 * @param[in, out] output_data Output data pointer. Data type: int16 1166 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 1167 * 1168 * @details 1169 * - Supported framework: TensorFlow Lite 1170 */ 1171 arm_cmsis_nn_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx, 1172 const cmsis_nn_dw_conv_params *dw_conv_params, 1173 const cmsis_nn_per_channel_quant_params *quant_params, 1174 const cmsis_nn_dims *input_dims, 1175 const int16_t *input_data, 1176 const cmsis_nn_dims *filter_dims, 1177 const int8_t *filter_data, 1178 const cmsis_nn_dims *bias_dims, 1179 const int64_t *bias_data, 1180 const cmsis_nn_dims *output_dims, 1181 int16_t *output_data); 1182 1183 /** 1184 * @brief Wrapper function to pick the right optimized s16 depthwise convolution function 1185 * 1186 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1187 * definition file to see if an additional buffer is required. 1188 * Optional function {API}_get_buffer_size() provides the buffer 1189 * size if required. 1190 * The caller is expected to clear the buffer, if applicable, for security reasons. 1191 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 1192 * dw_conv_params->dilation is not used. 1193 * Range of dw_conv_params->input_offset : Not used 1194 * Range of dw_conv_params->output_offset : Not used 1195 * @param[in] quant_params Per-channel quantization info. 1196 * It contains the multiplier and shift values to be applied to each 1197 * output channel 1198 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 1199 * Batch argument N is not used and assumed to be 1. 1200 * @param[in] input_data Input (activation) data pointer. Data type: int16 1201 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1202 * @param[in] filter_data Filter data pointer. Data type: int8 1203 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 1204 * @param[in] bias_data Bias data pointer. Data type: int64 1205 * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] 1206 * @param[in, out] output_data Output data pointer. Data type: int16 1207 * @return The function returns 1208 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful completion. 1209 * 1210 * @details 1211 * - Supported framework: TensorFlow Lite 1212 * - Picks one of the the following functions 1213 * -# arm_depthwise_conv_s16() 1214 * -# arm_depthwise_conv_fast_s16() - Cortex-M CPUs with DSP extension only 1215 */ 1216 arm_cmsis_nn_status arm_depthwise_conv_wrapper_s16(const cmsis_nn_context *ctx, 1217 const cmsis_nn_dw_conv_params *dw_conv_params, 1218 const cmsis_nn_per_channel_quant_params *quant_params, 1219 const cmsis_nn_dims *input_dims, 1220 const int16_t *input_data, 1221 const cmsis_nn_dims *filter_dims, 1222 const int8_t *filter_data, 1223 const cmsis_nn_dims *bias_dims, 1224 const int64_t *bias_data, 1225 const cmsis_nn_dims *output_dims, 1226 int16_t *output_data); 1227 1228 /** 1229 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s16() 1230 * 1231 * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) 1232 * Range of dw_conv_params->input_offset : Not used 1233 * Range of dw_conv_params->input_offset : Not used 1234 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 1235 * Batch argument N is not used and assumed to be 1. 1236 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1237 * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] 1238 * @return Size of additional memory required for optimizations in bytes. 1239 * 1240 */ 1241 int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params, 1242 const cmsis_nn_dims *input_dims, 1243 const cmsis_nn_dims *filter_dims, 1244 const cmsis_nn_dims *output_dims); 1245 1246 /** 1247 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s16() for processors with DSP extension. 1248 * Refer to arm_depthwise_conv_wrapper_s16_get_buffer_size() for function argument details. 1249 * 1250 * @note Intended for compilation on Host. If compiling for an Arm target, use 1251 * arm_depthwise_conv_wrapper_s16_get_buffer_size(). 1252 * 1253 */ 1254 int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size_dsp(const cmsis_nn_dw_conv_params *dw_conv_params, 1255 const cmsis_nn_dims *input_dims, 1256 const cmsis_nn_dims *filter_dims, 1257 const cmsis_nn_dims *output_dims); 1258 1259 /** 1260 * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s16() for Arm(R) Helium Architecture 1261 * case. Refer to arm_depthwise_conv_wrapper_s16_get_buffer_size() for function argument details. 1262 * 1263 * @note Intended for compilation on Host. If compiling for an Arm target, use 1264 * arm_depthwise_conv_wrapper_s16_get_buffer_size(). 1265 * 1266 */ 1267 int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size_mve(const cmsis_nn_dw_conv_params *dw_conv_params, 1268 const cmsis_nn_dims *input_dims, 1269 const cmsis_nn_dims *filter_dims, 1270 const cmsis_nn_dims *output_dims); 1271 1272 /** 1273 * @brief Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel. 1274 * Refer arm_depthwise_conv_s16() for function argument details. 1275 * 1276 * @return The function returns one of the following 1277 * <code>ARM_CMSIS_NN_ARG_ERROR</code> - ctx-buff == NULL and 1278 * arm_depthwise_conv_fast_s16_get_buffer_size() > 0 or 1279 * input channel != output channel or 1280 * ch_mult != 1 1281 * 1282 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 1283 * 1284 * @details 1285 * - Supported framework: TensorFlow Lite 1286 * - The following constrains on the arguments apply 1287 * -# Number of input channel equals number of output channels or ch_mult equals 1 1288 * - Reccomended when number of channels is 4 or greater. 1289 * 1290 */ 1291 arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx, 1292 const cmsis_nn_dw_conv_params *dw_conv_params, 1293 const cmsis_nn_per_channel_quant_params *quant_params, 1294 const cmsis_nn_dims *input_dims, 1295 const int16_t *input_data, 1296 const cmsis_nn_dims *filter_dims, 1297 const int8_t *filter_data, 1298 const cmsis_nn_dims *bias_dims, 1299 const int64_t *bias_data, 1300 const cmsis_nn_dims *output_dims, 1301 int16_t *output_data); 1302 1303 /** 1304 * @brief Get the required buffer size for optimized s16 depthwise convolution 1305 * function with constraint that in_channel equals out_channel. 1306 * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN] 1307 * Batch argument N is not used. 1308 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1309 * @return The function returns required buffer size in bytes 1310 * 1311 */ 1312 int32_t arm_depthwise_conv_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); 1313 1314 /** 1315 * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on 1316 * the input arguments(documented below). Refer arm_depthwise_conv_s8() for function 1317 * argument details. 1318 * 1319 * @return The function returns one of the following 1320 * <code>ARM_CMSIS_NN_ARG_ERROR</code> - Unsupported dimension of tensors 1321 * - Unsupported pad size along the x axis 1322 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 1323 * 1324 * @details 1325 * - Supported framework : TensorFlow Lite Micro 1326 * - The following constrains on the arguments apply 1327 * -# Number of input channel equals number of output channels 1328 * -# Filter height and width equals 3 1329 * -# Padding along x is either 0 or 1. 1330 * 1331 */ 1332 arm_cmsis_nn_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx, 1333 const cmsis_nn_dw_conv_params *dw_conv_params, 1334 const cmsis_nn_per_channel_quant_params *quant_params, 1335 const cmsis_nn_dims *input_dims, 1336 const int8_t *input_data, 1337 const cmsis_nn_dims *filter_dims, 1338 const int8_t *filter_data, 1339 const cmsis_nn_dims *bias_dims, 1340 const int32_t *bias_data, 1341 const cmsis_nn_dims *output_dims, 1342 int8_t *output_data); 1343 1344 /** 1345 * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. 1346 * Refer arm_depthwise_conv_s8() for function argument details. 1347 * 1348 * @return The function returns one of the following 1349 * <code>ARM_CMSIS_NN_ARG_ERROR</code> - input channel != output channel or 1350 * ch_mult != 1 1351 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 1352 * 1353 * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out 1354 * for the following if MVE optimizations(Arm Helium Technology) are used. 1355 * - Output shift 1356 * - Output multiplier 1357 * - Output bias 1358 * - kernel 1359 * @details 1360 * - Supported framework: TensorFlow Lite 1361 * - The following constrains on the arguments apply 1362 * -# Number of input channel equals number of output channels or ch_mult equals 1 1363 * - Reccomended when number of channels is 4 or greater. 1364 * 1365 */ 1366 arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx, 1367 const cmsis_nn_dw_conv_params *dw_conv_params, 1368 const cmsis_nn_per_channel_quant_params *quant_params, 1369 const cmsis_nn_dims *input_dims, 1370 const int8_t *input_data, 1371 const cmsis_nn_dims *filter_dims, 1372 const int8_t *filter_data, 1373 const cmsis_nn_dims *bias_dims, 1374 const int32_t *bias_data, 1375 const cmsis_nn_dims *output_dims, 1376 int8_t *output_data); 1377 1378 /** 1379 * @brief Optimized s4 depthwise convolution function with constraint that in_channel equals out_channel. 1380 * Refer arm_depthwise_conv_s4() for function argument details. 1381 * 1382 * @return The function returns one of the following 1383 * <code>ARM_CMSIS_NN_ARG_ERROR</code> - input channel != output channel or 1384 * ch_mult != 1 1385 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 1386 * 1387 * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out 1388 * for the following if MVE optimizations(Arm Helium Technology) are used. 1389 * - Output shift 1390 * - Output multiplier 1391 * - Output bias 1392 * - kernel 1393 * @details 1394 * - Supported framework: TensorFlow Lite 1395 * - The following constrains on the arguments apply 1396 * -# Number of input channel equals number of output channels or ch_mult equals 1 1397 * - Reccomended when number of channels is 4 or greater. 1398 * 1399 */ 1400 arm_cmsis_nn_status arm_depthwise_conv_s4_opt(const cmsis_nn_context *ctx, 1401 const cmsis_nn_dw_conv_params *dw_conv_params, 1402 const cmsis_nn_per_channel_quant_params *quant_params, 1403 const cmsis_nn_dims *input_dims, 1404 const int8_t *input_data, 1405 const cmsis_nn_dims *filter_dims, 1406 const int8_t *filter_data, 1407 const cmsis_nn_dims *bias_dims, 1408 const int32_t *bias_data, 1409 const cmsis_nn_dims *output_dims, 1410 int8_t *output_data); 1411 1412 /** 1413 * @brief Get the required buffer size for optimized s8 depthwise convolution 1414 * function with constraint that in_channel equals out_channel. 1415 * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN] 1416 * Batch argument N is not used. 1417 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1418 * @return The function returns required buffer size in bytes 1419 * 1420 */ 1421 int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); 1422 1423 /** 1424 * @brief Get the required buffer size for optimized s4 depthwise convolution 1425 * function with constraint that in_channel equals out_channel. 1426 * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN] 1427 * Batch argument N is not used. 1428 * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] 1429 * @return The function returns required buffer size in bytes 1430 * 1431 */ 1432 int32_t arm_depthwise_conv_s4_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); 1433 1434 /** 1435 * @defgroup FC Fully-connected Layer Functions 1436 * 1437 * Collection of fully-connected and matrix multiplication functions. 1438 * 1439 * Fully-connected layer is basically a matrix-vector multiplication 1440 * with bias. The matrix is the weights and the input/output vectors 1441 * are the activation values. Supported {weight, activation} precisions 1442 * include {8-bit, 8-bit} and {8-bit, 16-bit} 1443 * 1444 * 1445 */ 1446 1447 /** 1448 * @brief Basic s4 Fully Connected function. 1449 * 1450 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1451 * definition file to see if an additional buffer is required. 1452 * Optional function {API}_get_buffer_size() provides the buffer 1453 * size if an additional buffer is required. 1454 * The caller is expected to clear the buffer ,if applicable, for security reasons. 1455 * @param[in] fc_params Fully Connected layer parameters. 1456 * Range of fc_params->input_offset : [-127, 128] 1457 * fc_params->filter_offset : 0 1458 * Range of fc_params->output_offset : [-128, 127] 1459 * @param[in] quant_params Per-tensor quantization info. 1460 * It contains the multiplier and shift values to be applied to the output tensor. 1461 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 1462 * Input dimension is taken as Nx(H * W * C_IN) 1463 * @param[in] input_data Input (activation) data pointer. Data type: int8 1464 * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] 1465 * N : accumulation depth and equals (H * W * C_IN) from input_dims 1466 * C : output depth and equals C_OUT in output_dims 1467 * H & W : Not used 1468 * @param[in] filter_data Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential 1469 * weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43]. 1470 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 1471 * N, H, W : Not used 1472 * @param[in] bias_data Bias data pointer. Data type: int32 1473 * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] 1474 * N : Batches 1475 * C_OUT : Output depth 1476 * H & W : Not used. 1477 * @param[in, out] output_data Output data pointer. Data type: int8 1478 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 1479 * 1480 * @details 1481 * - Supported framework: TensorFlow Lite 1482 */ 1483 arm_cmsis_nn_status arm_fully_connected_s4(const cmsis_nn_context *ctx, 1484 const cmsis_nn_fc_params *fc_params, 1485 const cmsis_nn_per_tensor_quant_params *quant_params, 1486 const cmsis_nn_dims *input_dims, 1487 const int8_t *input_data, 1488 const cmsis_nn_dims *filter_dims, 1489 const int8_t *filter_data, 1490 const cmsis_nn_dims *bias_dims, 1491 const int32_t *bias_data, 1492 const cmsis_nn_dims *output_dims, 1493 int8_t *output_data); 1494 1495 /** 1496 * @brief Basic s8 Fully Connected function. 1497 * 1498 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1499 * definition file to see if an additional buffer is required. 1500 * Optional function {API}_get_buffer_size() provides the buffer 1501 * size if an additional buffer is required. 1502 * The caller is expected to clear the buffer, if applicable, for security reasons. 1503 * @param[in] fc_params Fully Connected layer parameters. 1504 * Range of fc_params->input_offset : [-127, 128] 1505 * fc_params->filter_offset : 0 1506 * Range of fc_params->output_offset : [-128, 127] 1507 * @param[in] quant_params Per-tensor quantization info. 1508 * It contains the multiplier and shift values to be applied to the output tensor. 1509 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 1510 * Input dimension is taken as Nx(H * W * C_IN) 1511 * @param[in] input_data Input (activation) data pointer. Data type: int8 1512 * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] 1513 * N : accumulation depth and equals (H * W * C_IN) from input_dims 1514 * C : output depth and equals C_OUT in output_dims 1515 * H & W : Not used 1516 * @param[in] filter_data Filter data pointer. Data type: int8 1517 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 1518 * N, H, W : Not used 1519 * @param[in] bias_data Bias data pointer. Data type: int32 1520 * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] 1521 * N : Batches 1522 * C_OUT : Output depth 1523 * H & W : Not used. 1524 * @param[in, out] output_data Output data pointer. Data type: int8 1525 * 1526 * @return The function returns either 1527 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 1528 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 1529 * 1530 * @details 1531 * - Supported framework: TensorFlow Lite 1532 */ 1533 arm_cmsis_nn_status arm_fully_connected_s8(const cmsis_nn_context *ctx, 1534 const cmsis_nn_fc_params *fc_params, 1535 const cmsis_nn_per_tensor_quant_params *quant_params, 1536 const cmsis_nn_dims *input_dims, 1537 const int8_t *input_data, 1538 const cmsis_nn_dims *filter_dims, 1539 const int8_t *filter_data, 1540 const cmsis_nn_dims *bias_dims, 1541 const int32_t *bias_data, 1542 const cmsis_nn_dims *output_dims, 1543 int8_t *output_data); 1544 1545 /** 1546 * @brief Calculate the sum of each row in vector_data, multiply by lhs_offset and optionally add s32 bias_data. 1547 * @param[in, out] vector_sum_buf Buffer for vector sums 1548 * @param[in] vector_cols Number of vector columns 1549 * @param[in] vector_rows Number of vector rows 1550 * @param[in] vector_data Vector of weigths data 1551 * @param[in] lhs_offset Constant multiplied with each sum 1552 * @param[in] bias_data Vector of bias data, added to each sum. 1553 * @return The function returns 1554 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 1555 */ 1556 arm_cmsis_nn_status arm_vector_sum_s8(int32_t *vector_sum_buf, 1557 const int32_t vector_cols, 1558 const int32_t vector_rows, 1559 const int8_t *vector_data, 1560 const int32_t lhs_offset, 1561 const int32_t *bias_data); 1562 1563 /** 1564 * @brief Calculate the sum of each row in vector_data, multiply by lhs_offset and optionally add s64 bias_data. 1565 * @param[in, out] vector_sum_buf Buffer for vector sums 1566 * @param[in] vector_cols Number of vector columns 1567 * @param[in] vector_rows Number of vector rows 1568 * @param[in] vector_data Vector of weigths data 1569 * @param[in] lhs_offset Constant multiplied with each sum 1570 * @param[in] bias_data Vector of bias data, added to each sum. 1571 * @return The function returns 1572 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 1573 */ 1574 arm_cmsis_nn_status arm_vector_sum_s8_s64(int64_t *vector_sum_buf, 1575 const int32_t vector_cols, 1576 const int32_t vector_rows, 1577 const int8_t *vector_data, 1578 const int32_t lhs_offset, 1579 const int64_t *bias_data); 1580 1581 /** 1582 * @brief Get size of additional buffer required by arm_fully_connected_s8(). 1583 * See also arm_vector_sum_s8, which is required if buffer size is > 0. 1584 * @param[in] filter_dims dimension of filter 1585 * @return The function returns required buffer size in bytes 1586 * 1587 */ 1588 int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims); 1589 1590 /** 1591 * @brief Get size of additional buffer required by arm_fully_connected_s8() for processors with DSP extension. 1592 * Refer to arm_fully_connected_s8_get_buffer_size() for function argument details. 1593 * 1594 * @note Intended for compilation on Host. If compiling for an Arm target, use 1595 * arm_fully_connected_s8_get_buffer_size(). 1596 * 1597 */ 1598 int32_t arm_fully_connected_s8_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims); 1599 1600 /** 1601 * @brief Get size of additional buffer required by arm_fully_connected_s8() for Arm(R) Helium Architecture case. 1602 * Refer to arm_fully_connected_s8_get_buffer_size() for function argument details. 1603 * 1604 * @note Intended for compilation on Host. If compiling for an Arm target, use 1605 * arm_fully_connected_s8_get_buffer_size(). 1606 * 1607 */ 1608 int32_t arm_fully_connected_s8_get_buffer_size_mve(const cmsis_nn_dims *filter_dims); 1609 1610 /** 1611 * @brief Basic s16 Fully Connected function. 1612 * 1613 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1614 * definition file to see if an additional buffer is required. 1615 * Optional function {API}_get_buffer_size() provides the buffer 1616 * size if an additional buffer is required. 1617 * The caller is expected to clear the buffer, if applicable, for security reasons. 1618 * @param[in] fc_params Fully Connected layer parameters. 1619 * fc_params->input_offset : 0 1620 * fc_params->filter_offset : 0 1621 * fc_params->output_offset : 0 1622 * @param[in] quant_params Per-tensor quantization info. 1623 * It contains the multiplier and shift values to be applied to the output tensor. 1624 * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 1625 * Input dimension is taken as Nx(H * W * C_IN) 1626 * @param[in] input_data Input (activation) data pointer. Data type: int16 1627 * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] 1628 * N : accumulation depth and equals (H * W * C_IN) from input_dims 1629 * C : output depth and equals C_OUT in output_dims 1630 * H & W : Not used 1631 * @param[in] filter_data Filter data pointer. Data type: int8 1632 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] 1633 * N, H, W : Not used 1634 * @param[in] bias_data Bias data pointer. Data type: int64 1635 * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] 1636 * N : Batches 1637 * C_OUT : Output depth 1638 * H & W : Not used. 1639 * @param[in, out] output_data Output data pointer. Data type: int16 1640 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 1641 * 1642 * @details 1643 * - Supported framework: TensorFlow Lite 1644 */ 1645 arm_cmsis_nn_status arm_fully_connected_s16(const cmsis_nn_context *ctx, 1646 const cmsis_nn_fc_params *fc_params, 1647 const cmsis_nn_per_tensor_quant_params *quant_params, 1648 const cmsis_nn_dims *input_dims, 1649 const int16_t *input_data, 1650 const cmsis_nn_dims *filter_dims, 1651 const int8_t *filter_data, 1652 const cmsis_nn_dims *bias_dims, 1653 const int64_t *bias_data, 1654 const cmsis_nn_dims *output_dims, 1655 int16_t *output_data); 1656 1657 /** 1658 * @brief Get size of additional buffer required by arm_fully_connected_s16(). 1659 * @param[in] filter_dims dimension of filter 1660 * @return The function returns required buffer size in bytes 1661 * 1662 */ 1663 int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims); 1664 1665 /** 1666 * @brief Get size of additional buffer required by arm_fully_connected_s16() for processors with DSP extension. 1667 * Refer to arm_fully_connected_s16_get_buffer_size() for function argument details. 1668 * 1669 * @note Intended for compilation on Host. If compiling for an Arm target, use 1670 * arm_fully_connected_s16_get_buffer_size(). 1671 * 1672 */ 1673 int32_t arm_fully_connected_s16_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims); 1674 1675 /** 1676 * @brief Get size of additional buffer required by arm_fully_connected_s16() for Arm(R) Helium Architecture case. 1677 * Refer to arm_fully_connected_s16_get_buffer_size() for function argument details. 1678 * 1679 * @note Intended for compilation on Host. If compiling for an Arm target, use 1680 * arm_fully_connected_s16_get_buffer_size(). 1681 * 1682 */ 1683 int32_t arm_fully_connected_s16_get_buffer_size_mve(const cmsis_nn_dims *filter_dims); 1684 1685 /** 1686 * @defgroup groupElementwise Elementwise Functions 1687 * 1688 * Elementwise add and multiplication functions. 1689 * 1690 */ 1691 1692 /** 1693 * @brief s8 elementwise add of two vectors 1694 * @param[in] input_1_vect pointer to input vector 1 1695 * @param[in] input_2_vect pointer to input vector 2 1696 * @param[in] input_1_offset offset for input 1. Range: -127 to 128 1697 * @param[in] input_1_mult multiplier for input 1 1698 * @param[in] input_1_shift shift for input 1 1699 * @param[in] input_2_offset offset for input 2. Range: -127 to 128 1700 * @param[in] input_2_mult multiplier for input 2 1701 * @param[in] input_2_shift shift for input 2 1702 * @param[in] left_shift input left shift 1703 * @param[in,out] output pointer to output vector 1704 * @param[in] out_offset output offset. Range: -128 to 127 1705 * @param[in] out_mult output multiplier 1706 * @param[in] out_shift output shift 1707 * @param[in] out_activation_min minimum value to clamp output to. Min: -128 1708 * @param[in] out_activation_max maximum value to clamp output to. Max: 127 1709 * @param[in] block_size number of samples 1710 * @return The function returns ARM_CMSIS_NN_SUCCESS 1711 */ 1712 arm_cmsis_nn_status arm_elementwise_add_s8(const int8_t *input_1_vect, 1713 const int8_t *input_2_vect, 1714 const int32_t input_1_offset, 1715 const int32_t input_1_mult, 1716 const int32_t input_1_shift, 1717 const int32_t input_2_offset, 1718 const int32_t input_2_mult, 1719 const int32_t input_2_shift, 1720 const int32_t left_shift, 1721 int8_t *output, 1722 const int32_t out_offset, 1723 const int32_t out_mult, 1724 const int32_t out_shift, 1725 const int32_t out_activation_min, 1726 const int32_t out_activation_max, 1727 const int32_t block_size); 1728 1729 /** 1730 * @brief s16 elementwise add of two vectors 1731 * @param[in] input_1_vect pointer to input vector 1 1732 * @param[in] input_2_vect pointer to input vector 2 1733 * @param[in] input_1_offset offset for input 1. Not used. 1734 * @param[in] input_1_mult multiplier for input 1 1735 * @param[in] input_1_shift shift for input 1 1736 * @param[in] input_2_offset offset for input 2. Not used. 1737 * @param[in] input_2_mult multiplier for input 2 1738 * @param[in] input_2_shift shift for input 2 1739 * @param[in] left_shift input left shift 1740 * @param[in,out] output pointer to output vector 1741 * @param[in] out_offset output offset. Not used. 1742 * @param[in] out_mult output multiplier 1743 * @param[in] out_shift output shift 1744 * @param[in] out_activation_min minimum value to clamp output to. Min: -32768 1745 * @param[in] out_activation_max maximum value to clamp output to. Max: 32767 1746 * @param[in] block_size number of samples 1747 * @return The function returns ARM_CMSIS_NN_SUCCESS 1748 */ 1749 arm_cmsis_nn_status arm_elementwise_add_s16(const int16_t *input_1_vect, 1750 const int16_t *input_2_vect, 1751 const int32_t input_1_offset, 1752 const int32_t input_1_mult, 1753 const int32_t input_1_shift, 1754 const int32_t input_2_offset, 1755 const int32_t input_2_mult, 1756 const int32_t input_2_shift, 1757 const int32_t left_shift, 1758 int16_t *output, 1759 const int32_t out_offset, 1760 const int32_t out_mult, 1761 const int32_t out_shift, 1762 const int32_t out_activation_min, 1763 const int32_t out_activation_max, 1764 const int32_t block_size); 1765 1766 /** 1767 * @brief s8 elementwise multiplication 1768 * @param[in] input_1_vect pointer to input vector 1 1769 * @param[in] input_2_vect pointer to input vector 2 1770 * @param[in] input_1_offset offset for input 1. Range: -127 to 128 1771 * @param[in] input_2_offset offset for input 2. Range: -127 to 128 1772 * @param[in,out] output pointer to output vector 1773 * @param[in] out_offset output offset. Range: -128 to 127 1774 * @param[in] out_mult output multiplier 1775 * @param[in] out_shift output shift 1776 * @param[in] out_activation_min minimum value to clamp output to. Min: -128 1777 * @param[in] out_activation_max maximum value to clamp output to. Max: 127 1778 * @param[in] block_size number of samples 1779 * @return The function returns ARM_CMSIS_NN_SUCCESS 1780 * 1781 * @details Supported framework: TensorFlow Lite micro 1782 */ 1783 arm_cmsis_nn_status arm_elementwise_mul_s8(const int8_t *input_1_vect, 1784 const int8_t *input_2_vect, 1785 const int32_t input_1_offset, 1786 const int32_t input_2_offset, 1787 int8_t *output, 1788 const int32_t out_offset, 1789 const int32_t out_mult, 1790 const int32_t out_shift, 1791 const int32_t out_activation_min, 1792 const int32_t out_activation_max, 1793 const int32_t block_size); 1794 1795 /** 1796 * @brief s16 elementwise multiplication 1797 * @param[in] input_1_vect pointer to input vector 1 1798 * @param[in] input_2_vect pointer to input vector 2 1799 * @param[in] input_1_offset offset for input 1. Not used. 1800 * @param[in] input_2_offset offset for input 2. Not used. 1801 * @param[in,out] output pointer to output vector 1802 * @param[in] out_offset output offset. Not used. 1803 * @param[in] out_mult output multiplier 1804 * @param[in] out_shift output shift 1805 * @param[in] out_activation_min minimum value to clamp output to. Min: -32768 1806 * @param[in] out_activation_max maximum value to clamp output to. Max: 32767 1807 * @param[in] block_size number of samples 1808 * @return The function returns ARM_CMSIS_NN_SUCCESS 1809 * 1810 * @details Supported framework: TensorFlow Lite micro 1811 */ 1812 arm_cmsis_nn_status arm_elementwise_mul_s16(const int16_t *input_1_vect, 1813 const int16_t *input_2_vect, 1814 const int32_t input_1_offset, 1815 const int32_t input_2_offset, 1816 int16_t *output, 1817 const int32_t out_offset, 1818 const int32_t out_mult, 1819 const int32_t out_shift, 1820 const int32_t out_activation_min, 1821 const int32_t out_activation_max, 1822 const int32_t block_size); 1823 1824 /** 1825 * @defgroup Acti Activation Functions 1826 * 1827 * Perform activation layers, including ReLU (Rectified Linear Unit), 1828 * sigmoid and tanh 1829 * 1830 */ 1831 1832 /** 1833 * @brief Q7 RELU function 1834 * @param[in,out] data pointer to input 1835 * @param[in] size number of elements 1836 */ 1837 void arm_relu_q7(int8_t *data, uint16_t size); 1838 1839 /** 1840 * @brief s8 ReLU6 function 1841 * @param[in,out] data pointer to input 1842 * @param[in] size number of elements 1843 */ 1844 void arm_relu6_s8(int8_t *data, uint16_t size); 1845 1846 /** 1847 * @brief Q15 RELU function 1848 * @param[in,out] data pointer to input 1849 * @param[in] size number of elements 1850 */ 1851 void arm_relu_q15(int16_t *data, uint16_t size); 1852 1853 /** 1854 * @brief s16 neural network activation function using direct table look-up 1855 * @param[in] input pointer to input data 1856 * @param[out] output pointer to output 1857 * @param[in] size number of elements 1858 * @param[in] left_shift bit-width of the integer part, assumed to be smaller than 3. 1859 * @param[in] type type of activation functions 1860 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 1861 1862 * 1863 * @details Supported framework: TensorFlow Lite for Microcontrollers. 1864 * This activation function must be bit precise congruent with the corresponding TFLM tanh and sigmoid activation 1865 * functions 1866 */ 1867 arm_cmsis_nn_status arm_nn_activation_s16(const int16_t *input, 1868 int16_t *output, 1869 const int32_t size, 1870 const int32_t left_shift, 1871 const arm_nn_activation_type type); 1872 1873 /** 1874 * @defgroup Pooling Pooling Functions 1875 * 1876 * Perform max and average pooling operations 1877 * 1878 */ 1879 1880 /** 1881 * @brief s8 average pooling function. 1882 * 1883 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1884 * definition file to see if an additional buffer is required. 1885 * Optional function {API}_get_buffer_size() provides the buffer 1886 * size if an additional buffer is required. 1887 * The caller is expected to clear the buffer, if applicable, for security reasons. 1888 * @param[in] pool_params Pooling parameters 1889 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 1890 * @param[in] input_data Input (activation) data pointer. Data type: int8 1891 * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] 1892 * Argument N and C are not used. 1893 * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] 1894 * Argument N is not used. 1895 * C_OUT equals C_IN. 1896 * @param[in, out] output_data Output data pointer. Data type: int8 1897 * 1898 * @return The function returns either 1899 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 1900 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 1901 * 1902 * @details 1903 * - Supported Framework: TensorFlow Lite 1904 * 1905 */ 1906 arm_cmsis_nn_status arm_avgpool_s8(const cmsis_nn_context *ctx, 1907 const cmsis_nn_pool_params *pool_params, 1908 const cmsis_nn_dims *input_dims, 1909 const int8_t *input_data, 1910 const cmsis_nn_dims *filter_dims, 1911 const cmsis_nn_dims *output_dims, 1912 int8_t *output_data); 1913 1914 /** 1915 * @brief Get the required buffer size for S8 average pooling function 1916 * @param[in] dim_dst_width output tensor dimension 1917 * @param[in] ch_src number of input tensor channels 1918 * @return The function returns required buffer size in bytes 1919 * 1920 */ 1921 int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, const int ch_src); 1922 1923 /** 1924 * @brief Get the required buffer size for S8 average pooling function for processors with DSP extension. 1925 * Refer to arm_avgpool_s8_get_buffer_size() for function argument details. 1926 * 1927 * @note Intended for compilation on Host. If compiling for an Arm target, use 1928 * arm_avgpool_s8_get_buffer_size(). 1929 * 1930 */ 1931 int32_t arm_avgpool_s8_get_buffer_size_dsp(const int dim_dst_width, const int ch_src); 1932 1933 /** 1934 * @brief Get the required buffer size for S8 average pooling function for Arm(R) Helium Architecture case. 1935 * Refer to arm_avgpool_s8_get_buffer_size() for function argument details. 1936 * 1937 * @note Intended for compilation on Host. If compiling for an Arm target, use 1938 * arm_avgpool_s8_get_buffer_size(). 1939 * 1940 */ 1941 int32_t arm_avgpool_s8_get_buffer_size_mve(const int dim_dst_width, const int ch_src); 1942 1943 /** 1944 * @brief s16 average pooling function. 1945 * 1946 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 1947 * definition file to see if an additional buffer is required. 1948 * Optional function {API}_get_buffer_size() provides the buffer 1949 * size if an additional buffer is required. 1950 * The caller is expected to clear the buffer, if applicable, for security reasons. 1951 * @param[in] pool_params Pooling parameters 1952 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 1953 * @param[in] input_data Input (activation) data pointer. Data type: int16 1954 * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] 1955 * Argument N and C are not used. 1956 * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] 1957 * Argument N is not used. 1958 * C_OUT equals C_IN. 1959 * @param[in, out] output_data Output data pointer. Data type: int16 1960 * 1961 * @return The function returns 1962 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 1963 * <code>ARM_CMSIS_NN_ARG_ERROR</code> - In case of invalid arguments 1964 * 1965 * @details 1966 * - Supported Framework: TensorFlow Lite 1967 * 1968 */ 1969 arm_cmsis_nn_status arm_avgpool_s16(const cmsis_nn_context *ctx, 1970 const cmsis_nn_pool_params *pool_params, 1971 const cmsis_nn_dims *input_dims, 1972 const int16_t *input_data, 1973 const cmsis_nn_dims *filter_dims, 1974 const cmsis_nn_dims *output_dims, 1975 int16_t *output_data); 1976 1977 /** 1978 * @brief Get the required buffer size for S16 average pooling function 1979 * @param[in] dim_dst_width output tensor dimension 1980 * @param[in] ch_src number of input tensor channels 1981 * @return The function returns required buffer size in bytes 1982 * 1983 */ 1984 int32_t arm_avgpool_s16_get_buffer_size(const int dim_dst_width, const int ch_src); 1985 1986 /** 1987 * @brief Get the required buffer size for S16 average pooling function for processors with DSP extension. 1988 * Refer to arm_avgpool_s16_get_buffer_size() for function argument details. 1989 * 1990 * @note Intended for compilation on Host. If compiling for an Arm target, use 1991 * arm_avgpool_s16_get_buffer_size(). 1992 * 1993 */ 1994 int32_t arm_avgpool_s16_get_buffer_size_dsp(const int dim_dst_width, const int ch_src); 1995 1996 /** 1997 * @brief Get the required buffer size for S16 average pooling function for Arm(R) Helium Architecture case. 1998 * Refer to arm_avgpool_s16_get_buffer_size() for function argument details. 1999 * 2000 * @note Intended for compilation on Host. If compiling for an Arm target, use 2001 * arm_avgpool_s16_get_buffer_size(). 2002 * 2003 */ 2004 int32_t arm_avgpool_s16_get_buffer_size_mve(const int dim_dst_width, const int ch_src); 2005 2006 /** 2007 * @brief s8 max pooling function. 2008 * 2009 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 2010 * definition file to see if an additional buffer is required. 2011 * Optional function {API}_get_buffer_size() provides the buffer 2012 * size if an additional buffer is required. 2013 * The caller is expected to clear the buffer, if applicable, for security reasons. 2014 * @param[in] pool_params Pooling parameters 2015 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 2016 * @param[in] input_data Input (activation) data pointer. The input tensor must not 2017 * overlap with the output tensor. Data type: int8 2018 * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] 2019 * Argument N and C are not used. 2020 * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] 2021 * Argument N is not used. 2022 * C_OUT equals C_IN. 2023 * @param[in, out] output_data Output data pointer. Data type: int8 2024 * 2025 * @return The function returns either 2026 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 2027 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 2028 * 2029 * @details 2030 * - Supported Framework: TensorFlow Lite 2031 * 2032 */ 2033 arm_cmsis_nn_status arm_max_pool_s8(const cmsis_nn_context *ctx, 2034 const cmsis_nn_pool_params *pool_params, 2035 const cmsis_nn_dims *input_dims, 2036 const int8_t *input_data, 2037 const cmsis_nn_dims *filter_dims, 2038 const cmsis_nn_dims *output_dims, 2039 int8_t *output_data); 2040 2041 /** 2042 * @brief s16 max pooling function. 2043 * 2044 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 2045 * definition file to see if an additional buffer is required. 2046 * Optional function {API}_get_buffer_size() provides the buffer 2047 * size if an additional buffer is required. 2048 * The caller is expected to clear the buffer, if applicable, for security reasons. 2049 * @param[in] pool_params Pooling parameters 2050 * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] 2051 * @param[in] src Input (activation) data pointer. The input tensor must not 2052 * overlap with the output tensor. Data type: int16 2053 * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] 2054 * Argument N and C are not used. 2055 * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] 2056 * Argument N is not used. 2057 * C_OUT equals C_IN. 2058 * @param[in, out] dst Output data pointer. Data type: int16 2059 * 2060 * @return The function returns either 2061 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 2062 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 2063 * 2064 * @details 2065 * - Supported Framework: TensorFlow Lite 2066 * 2067 */ 2068 arm_cmsis_nn_status arm_max_pool_s16(const cmsis_nn_context *ctx, 2069 const cmsis_nn_pool_params *pool_params, 2070 const cmsis_nn_dims *input_dims, 2071 const int16_t *src, 2072 const cmsis_nn_dims *filter_dims, 2073 const cmsis_nn_dims *output_dims, 2074 int16_t *dst); 2075 2076 /** 2077 * @defgroup Softmax Softmax Functions 2078 * 2079 * 2080 */ 2081 2082 /** 2083 * @brief S8 softmax function 2084 * @param[in] input Pointer to the input tensor 2085 * @param[in] num_rows Number of rows in the input tensor 2086 * @param[in] row_size Number of elements in each input row 2087 * @param[in] mult Input quantization multiplier 2088 * @param[in] shift Input quantization shift within the range [0, 31] 2089 * @param[in] diff_min Minimum difference with max in row. Used to check if 2090 * the quantized exponential operation can be performed 2091 * @param[out] output Pointer to the output tensor 2092 * 2093 * @note Supported framework: TensorFlow Lite micro (bit-accurate) 2094 * 2095 */ 2096 void arm_softmax_s8(const int8_t *input, 2097 const int32_t num_rows, 2098 const int32_t row_size, 2099 const int32_t mult, 2100 const int32_t shift, 2101 const int32_t diff_min, 2102 int8_t *output); 2103 2104 /** 2105 * @brief S8 to s16 softmax function 2106 * @param[in] input Pointer to the input tensor 2107 * @param[in] num_rows Number of rows in the input tensor 2108 * @param[in] row_size Number of elements in each input row 2109 * @param[in] mult Input quantization multiplier 2110 * @param[in] shift Input quantization shift within the range [0, 31] 2111 * @param[in] diff_min Minimum difference with max in row. Used to check if 2112 * the quantized exponential operation can be performed 2113 * @param[out] output Pointer to the output tensor 2114 * 2115 * @note Supported framework: TensorFlow Lite micro (bit-accurate) 2116 * 2117 */ 2118 void arm_softmax_s8_s16(const int8_t *input, 2119 const int32_t num_rows, 2120 const int32_t row_size, 2121 const int32_t mult, 2122 const int32_t shift, 2123 const int32_t diff_min, 2124 int16_t *output); 2125 2126 /** 2127 * @brief S16 softmax function 2128 * @param[in] input Pointer to the input tensor 2129 * @param[in] num_rows Number of rows in the input tensor 2130 * @param[in] row_size Number of elements in each input row 2131 * @param[in] mult Input quantization multiplier 2132 * @param[in] shift Input quantization shift within the range [0, 31] 2133 * @param[in] softmax_params Softmax s16 layer parameters with two pointers to LUTs speficied below. 2134 * For indexing the high 9 bits are used and 7 remaining for interpolation. 2135 * That means 512 entries for the 9-bit indexing and 1 extra for interpolation, i.e. 513 2136 * values for each LUT. 2137 * - Lookup table for exp(x), where x uniform distributed between [-10.0 , 0.0] 2138 * - Lookup table for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0] 2139 * @param[out] output Pointer to the output tensor 2140 * @return The function returns 2141 * <code>ARM_CMSIS_NN_ARG_ERROR</code> Argument error check failed 2142 * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation 2143 * 2144 * @note Supported framework: TensorFlow Lite micro (bit-accurate) 2145 * 2146 */ 2147 arm_cmsis_nn_status arm_softmax_s16(const int16_t *input, 2148 const int32_t num_rows, 2149 const int32_t row_size, 2150 const int32_t mult, 2151 const int32_t shift, 2152 const cmsis_nn_softmax_lut_s16 *softmax_params, 2153 int16_t *output); 2154 2155 /** 2156 * @brief U8 softmax function 2157 * @param[in] input Pointer to the input tensor 2158 * @param[in] num_rows Number of rows in the input tensor 2159 * @param[in] row_size Number of elements in each input row 2160 * @param[in] mult Input quantization multiplier 2161 * @param[in] shift Input quantization shift within the range [0, 31] 2162 * @param[in] diff_min Minimum difference with max in row. Used to check if 2163 * the quantized exponential operation can be performed 2164 * @param[out] output Pointer to the output tensor 2165 * 2166 * @note Supported framework: TensorFlow Lite micro (bit-accurate) 2167 * 2168 */ 2169 2170 void arm_softmax_u8(const uint8_t *input, 2171 const int32_t num_rows, 2172 const int32_t row_size, 2173 const int32_t mult, 2174 const int32_t shift, 2175 const int32_t diff_min, 2176 uint8_t *output); 2177 2178 /** 2179 * @defgroup Reshape Reshape Functions 2180 * 2181 */ 2182 2183 /** 2184 * @brief Reshape a s8 vector into another with different shape 2185 * @param[in] input points to the s8 input vector 2186 * @param[out] output points to the s8 output vector 2187 * @param[in] total_size total size of the input and output vectors in bytes 2188 * 2189 * @note The output is expected to be in a memory area that does not overlap with the input's 2190 * 2191 */ 2192 void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size); 2193 2194 /** 2195 * @defgroup Concatenation Concatenation Functions 2196 * 2197 */ 2198 2199 /** 2200 * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis 2201 * This function should be called for each input tensor to concatenate. The argument offset_x 2202 * will be used to store the input tensor in the correct position in the output tensor 2203 * 2204 * i.e. offset_x = 0 2205 * for(i = 0 i < num_input_tensors; ++i) 2206 * { 2207 * arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x) 2208 * offset_x += input_x[i] 2209 * } 2210 * 2211 * This function assumes that the output tensor has: 2212 * -# The same height of the input tensor 2213 * -# The same number of channels of the input tensor 2214 * -# The same batch size of the input tensor 2215 * 2216 * Unless specified otherwise, arguments are mandatory. 2217 * 2218 * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it 2219 * does not involve any arithmetic operation 2220 * 2221 * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor. 2222 * @param[in] input_x Width of input tensor 2223 * @param[in] input_y Height of input tensor 2224 * @param[in] input_z Channels in input tensor 2225 * @param[in] input_w Batch size in input tensor 2226 * @param[out] output Pointer to output tensor. Expected to be at least 2227 * (input_x * input_y * input_z * input_w) + offset_x 2228 * bytes. 2229 * @param[in] output_x Width of output tensor 2230 * @param[in] offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor 2231 * It is user responsibility to provide the correct value 2232 * 2233 * <b> Input constraints</b> 2234 * offset_x is less than output_x 2235 * 2236 */ 2237 void arm_concatenation_s8_x(const int8_t *input, 2238 const uint16_t input_x, 2239 const uint16_t input_y, 2240 const uint16_t input_z, 2241 const uint16_t input_w, 2242 int8_t *output, 2243 const uint16_t output_x, 2244 const uint32_t offset_x); 2245 2246 /** 2247 * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis 2248 * This function should be called for each input tensor to concatenate. The argument offset_y 2249 * will be used to store the input tensor in the correct position in the output tensor 2250 * 2251 * i.e. offset_y = 0 2252 * for(i = 0 i < num_input_tensors; ++i) 2253 * { 2254 * arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y) 2255 * offset_y += input_y[i] 2256 * } 2257 * 2258 * This function assumes that the output tensor has: 2259 * -# The same width of the input tensor 2260 * -# The same number of channels of the input tensor 2261 * -# The same batch size of the input tensor 2262 * 2263 * Unless specified otherwise, arguments are mandatory. 2264 * 2265 * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it 2266 * does not involve any arithmetic operation 2267 * 2268 * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor. 2269 * @param[in] input_x Width of input tensor 2270 * @param[in] input_y Height of input tensor 2271 * @param[in] input_z Channels in input tensor 2272 * @param[in] input_w Batch size in input tensor 2273 * @param[out] output Pointer to output tensor. Expected to be at least 2274 * (input_z * input_w * input_x * input_y) + offset_y 2275 * bytes. 2276 * @param[in] output_y Height of output tensor 2277 * @param[in] offset_y The offset on the Y axis to start concatenating the input tensor 2278 * It is user responsibility to provide the correct value 2279 * 2280 * <b> Input constraints</b> 2281 * offset_y is less than output_y 2282 * 2283 */ 2284 void arm_concatenation_s8_y(const int8_t *input, 2285 const uint16_t input_x, 2286 const uint16_t input_y, 2287 const uint16_t input_z, 2288 const uint16_t input_w, 2289 int8_t *output, 2290 const uint16_t output_y, 2291 const uint32_t offset_y); 2292 2293 /** 2294 * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis 2295 * This function should be called for each input tensor to concatenate. The argument offset_z 2296 * will be used to store the input tensor in the correct position in the output tensor 2297 * 2298 * i.e. offset_z = 0 2299 * for(i = 0 i < num_input_tensors; ++i) 2300 * { 2301 * arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z) 2302 * offset_z += input_z[i] 2303 * } 2304 * 2305 * This function assumes that the output tensor has: 2306 * -# The same width of the input tensor 2307 * -# The same height of the input tensor 2308 * -# The same batch size of the input tensor 2309 * 2310 * Unless specified otherwise, arguments are mandatory. 2311 * 2312 * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it 2313 * does not involve any arithmetic operation 2314 * 2315 * @param[in] input Pointer to input tensor. Input tensor must not overlap with output tensor. 2316 * @param[in] input_x Width of input tensor 2317 * @param[in] input_y Height of input tensor 2318 * @param[in] input_z Channels in input tensor 2319 * @param[in] input_w Batch size in input tensor 2320 * @param[out] output Pointer to output tensor. Expected to be at least 2321 * (input_x * input_y * input_z * input_w) + offset_z 2322 * bytes. 2323 * @param[in] output_z Channels in output tensor 2324 * @param[in] offset_z The offset on the Z axis to start concatenating the input tensor 2325 * It is user responsibility to provide the correct value 2326 * 2327 * <b> Input constraints</b> 2328 * offset_z is less than output_z 2329 * 2330 */ 2331 void arm_concatenation_s8_z(const int8_t *input, 2332 const uint16_t input_x, 2333 const uint16_t input_y, 2334 const uint16_t input_z, 2335 const uint16_t input_w, 2336 int8_t *output, 2337 const uint16_t output_z, 2338 const uint32_t offset_z); 2339 2340 /** 2341 * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size) 2342 * This function should be called for each input tensor to concatenate. The argument offset_w 2343 * will be used to store the input tensor in the correct position in the output tensor 2344 * 2345 * i.e. offset_w = 0 2346 * for(i = 0 i < num_input_tensors; ++i) 2347 * { 2348 * arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w) 2349 * offset_w += input_w[i] 2350 * } 2351 * 2352 * This function assumes that the output tensor has: 2353 * -# The same width of the input tensor 2354 * -# The same height of the input tensor 2355 * -# The same number o channels of the input tensor 2356 * 2357 * Unless specified otherwise, arguments are mandatory. 2358 * 2359 * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it 2360 * does not involve any arithmetic operation 2361 * 2362 * @param[in] input Pointer to input tensor 2363 * @param[in] input_x Width of input tensor 2364 * @param[in] input_y Height of input tensor 2365 * @param[in] input_z Channels in input tensor 2366 * @param[in] input_w Batch size in input tensor 2367 * @param[out] output Pointer to output tensor. Expected to be at least 2368 * input_x * input_y * input_z * input_w 2369 * bytes. 2370 * @param[in] offset_w The offset on the W axis to start concatenating the input tensor 2371 * It is user responsibility to provide the correct value 2372 * 2373 */ 2374 void arm_concatenation_s8_w(const int8_t *input, 2375 const uint16_t input_x, 2376 const uint16_t input_y, 2377 const uint16_t input_z, 2378 const uint16_t input_w, 2379 int8_t *output, 2380 const uint32_t offset_w); 2381 /** 2382 * @defgroup SVDF SVDF Functions 2383 * 2384 */ 2385 2386 /** 2387 * @brief s8 SVDF function with 8 bit state tensor and 8 bit time weights 2388 * 2389 * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function 2390 * definition file to see if an additional buffer is required. 2391 * Optional function arm_fully_connected_s8_get_buffer_size() provides the buffer 2392 * size if an additional buffer is required. 2393 * The caller is expected to clear the buffer, if applicable, for security reasons. 2394 * @param[in] input_ctx Temporary scratch buffer 2395 * The caller is expected to clear the buffer, if applicable, for security reasons. 2396 * @param[in] output_ctx Temporary output scratch buffer 2397 * The caller is expected to clear the buffer, if applicable, for security reasons. 2398 * @param[in] svdf_params SVDF Parameters 2399 * Range of svdf_params->input_offset : [-128, 127] 2400 * Range of svdf_params->output_offset : [-128, 127] 2401 * @param[in] input_quant_params Input quantization parameters 2402 * @param[in] output_quant_params Output quantization parameters 2403 * @param[in] input_dims Input tensor dimensions 2404 * @param[in] input_data Pointer to input tensor 2405 * @param[in] state_dims State tensor dimensions 2406 * @param[in] state_data Pointer to state tensor 2407 * @param[in] weights_feature_dims Weights (feature) tensor dimensions 2408 * @param[in] weights_feature_data Pointer to the weights (feature) tensor 2409 * @param[in] weights_time_dims Weights (time) tensor dimensions 2410 * @param[in] weights_time_data Pointer to the weights (time) tensor 2411 * @param[in] bias_dims Bias tensor dimensions 2412 * @param[in] bias_data Pointer to bias tensor 2413 * @param[in] output_dims Output tensor dimensions 2414 * @param[out] output_data Pointer to the output tensor 2415 * 2416 * @return The function returns either 2417 * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or, 2418 * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion. 2419 * 2420 * @details 2421 * 1. Supported framework: TensorFlow Lite micro 2422 */ 2423 arm_cmsis_nn_status arm_svdf_s8(const cmsis_nn_context *ctx, 2424 const cmsis_nn_context *input_ctx, 2425 const cmsis_nn_context *output_ctx, 2426 const cmsis_nn_svdf_params *svdf_params, 2427 const cmsis_nn_per_tensor_quant_params *input_quant_params, 2428 const cmsis_nn_per_tensor_quant_params *output_quant_params, 2429 const cmsis_nn_dims *input_dims, 2430 const int8_t *input_data, 2431 const cmsis_nn_dims *state_dims, 2432 int8_t *state_data, 2433 const cmsis_nn_dims *weights_feature_dims, 2434 const int8_t *weights_feature_data, 2435 const cmsis_nn_dims *weights_time_dims, 2436 const int8_t *weights_time_data, 2437 const cmsis_nn_dims *bias_dims, 2438 const int32_t *bias_data, 2439 const cmsis_nn_dims *output_dims, 2440 int8_t *output_data); 2441 2442 /** 2443 * @brief s8 SVDF function with 16 bit state tensor and 16 bit time weights 2444 * 2445 * @param[in] input_ctx Temporary scratch buffer 2446 * The caller is expected to clear the buffer, if applicable, for security reasons. 2447 * @param[in] output_ctx Temporary output scratch buffer 2448 * The caller is expected to clear the buffer, if applicable, for security reasons. 2449 * @param[in] svdf_params SVDF Parameters 2450 * Range of svdf_params->input_offset : [-128, 127] 2451 * Range of svdf_params->output_offset : [-128, 127] 2452 * @param[in] input_quant_params Input quantization parameters 2453 * @param[in] output_quant_params Output quantization parameters 2454 * @param[in] input_dims Input tensor dimensions 2455 * @param[in] input_data Pointer to input tensor 2456 * @param[in] state_dims State tensor dimensions 2457 * @param[in] state_data Pointer to state tensor 2458 * @param[in] weights_feature_dims Weights (feature) tensor dimensions 2459 * @param[in] weights_feature_data Pointer to the weights (feature) tensor 2460 * @param[in] weights_time_dims Weights (time) tensor dimensions 2461 * @param[in] weights_time_data Pointer to the weights (time) tensor 2462 * @param[in] bias_dims Bias tensor dimensions 2463 * @param[in] bias_data Pointer to bias tensor 2464 * @param[in] output_dims Output tensor dimensions 2465 * @param[out] output_data Pointer to the output tensor 2466 * 2467 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 2468 * 2469 * @details 2470 * 1. Supported framework: TensorFlow Lite micro 2471 */ 2472 arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx, 2473 const cmsis_nn_context *output_ctx, 2474 const cmsis_nn_svdf_params *svdf_params, 2475 const cmsis_nn_per_tensor_quant_params *input_quant_params, 2476 const cmsis_nn_per_tensor_quant_params *output_quant_params, 2477 const cmsis_nn_dims *input_dims, 2478 const int8_t *input_data, 2479 const cmsis_nn_dims *state_dims, 2480 int16_t *state_data, 2481 const cmsis_nn_dims *weights_feature_dims, 2482 const int8_t *weights_feature_data, 2483 const cmsis_nn_dims *weights_time_dims, 2484 const int16_t *weights_time_data, 2485 const cmsis_nn_dims *bias_dims, 2486 const int32_t *bias_data, 2487 const cmsis_nn_dims *output_dims, 2488 int8_t *output_data); 2489 2490 /** 2491 * @brief Get size of additional buffer required by arm_svdf_s8(). 2492 * @param[in] filter_dims dimension of filter 2493 * @return The function returns required buffer size in bytes 2494 * 2495 */ 2496 int32_t arm_svdf_s8_get_buffer_size(const cmsis_nn_dims *filter_dims); 2497 2498 /** 2499 * @brief Get size of additional buffer required by arm_svdf_s8() for processors with DSP extension. 2500 * Refer to arm_svdf_s8_get_buffer_size() for function argument details. 2501 * 2502 * @note Intended for compilation on Host. If compiling for an Arm target, use 2503 * arm_svdf_s8_get_buffer_size(). 2504 * 2505 */ 2506 int32_t arm_svdf_s8_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims); 2507 2508 /** 2509 * @brief Get size of additional buffer required by arm_svdf_s8() for Arm(R) Helium Architecture case. 2510 * Refer to arm_svdf_s8_get_buffer_size() for function argument details. 2511 * 2512 * @note Intended for compilation on Host. If compiling for an Arm target, use 2513 * arm_svdf_s8_get_buffer_size(). 2514 * 2515 */ 2516 int32_t arm_svdf_s8_get_buffer_size_mve(const cmsis_nn_dims *filter_dims); 2517 2518 /** 2519 * @defgroup LSTM LSTM Layer Functions 2520 * 2521 */ 2522 2523 /** 2524 * @brief LSTM unidirectional function with 8 bit input and output and 16 bit gate output, 32 bit bias. 2525 * 2526 * @param[in] input Pointer to input data 2527 * @param[out] output Pointer to output data 2528 * @param[in] params Struct containing all information about the lstm operator, see arm_nn_types. 2529 * @param[in] buffers Struct containing pointers to all temporary scratch buffers needed for the 2530 * lstm operator, see arm_nn_types. 2531 * 2532 * 2533 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 2534 * 2535 * @details 2536 * 1. Supported framework: TensorFlow Lite Micro 2537 * 2538 */ 2539 arm_cmsis_nn_status arm_lstm_unidirectional_s8(const int8_t *input, 2540 int8_t *output, 2541 const cmsis_nn_lstm_params *params, 2542 cmsis_nn_lstm_context *buffers); 2543 2544 /** 2545 * @brief LSTM unidirectional function with 16 bit input and output and 16 bit gate output, 64 bit bias. 2546 * 2547 * @param[in] input Pointer to input data 2548 * @param[out] output Pointer to output data 2549 * @param[in] params Struct containing all information about the lstm operator, see arm_nn_types. 2550 * @param[in] buffers Struct containing pointers to all temporary scratch buffers needed for the 2551 * lstm operator, see arm_nn_types. 2552 * 2553 * 2554 * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> 2555 * 2556 * @details 2557 * 1. Supported framework: TensorFlow Lite Micro 2558 * 2559 */ 2560 arm_cmsis_nn_status arm_lstm_unidirectional_s16(const int16_t *input, 2561 int16_t *output, 2562 const cmsis_nn_lstm_params *params, 2563 cmsis_nn_lstm_context *buffers); 2564 2565 #ifdef __cplusplus 2566 } 2567 #endif 2568 2569 #endif /* ARM_NNFUNCTIONS_H */ 2570