1/* ###################### CMSIS Support for Cortex-M4/7/33/35P/55 SIMD Instructions ####################### */ 2/** 3 4\defgroup intrinsic_SIMD_gr Intrinsic Functions for SIMD Instructions 5\brief Access to dedicated SIMD instructions available on Armv7E-M (Cortex-M4/M7), Armv8-M Mainline 6 (Cortex-M33/M35P), and Armv8.1-M (Cortex-M55/M85). 7 8\details 9 10<b>Single Instruction Multiple Data (SIMD)</b> extensions are provided <b>only for Cortex-M4 and Cortex-M7 cores</b> 11to simplify development of application software. SIMD extensions increase the processing capability 12without materially increasing the power consumption. The SIMD extensions are completely transparent 13to the operating system (OS), allowing existing OS ports to be used. 14 15<b>SIMD Features:</b> 16 17 - Simultaneous computation of 2x16-bit or 4x8-bit operands 18 - Fractional arithmetic 19 - User definable saturation modes (arbitrary word-width) 20 - Dual 16x16 multiply-add/subtract 32x32 fractional MAC 21 - Simultaneous 8/16-bit select operations 22 - Performance up to 3.2 GOPS at 800MHz 23 - Performance is achieved with a "near zero" increase in power consumption on a typical implementation 24 25\b Examples: 26 27\b Addition: Add two values using SIMD function 28 29\code 30uint32_t add_halfwords(uint32_t val1, uint32_t val2) 31{ 32 return __SADD16(val1, val2); 33} 34\endcode 35 36 37 38\b Subtraction: Subtract two values using SIMD function 39 40\code 41uint32_t sub_halfwords(uint32_t val1, uint32_t val2) 42{ 43 return __SSUB16(val1, val2); 44} 45\endcode 46 47 48\b Multiplication: Performing a multiplication using SIMD function 49 50\code 51uint32_t dual_mul_add_products(uint32_t val1, uint32_t val2) 52{ 53 return __SMUAD(val1, val2); 54} 55\endcode 56 57 @{ 58*/ 59 60 61/**************************************************************************************************/ 62/** 63 \brief GE setting quad 8-bit signed addition 64 \details This function performs four 8-bit signed integer additions. 65 The GE bits of the APSR are set according to the results of the additions. 66 \param val1 first four 8-bit summands. 67 \param val2 second four 8-bit summands. 68 69 \returns 70 \li the addition of the first bytes from each operand, in the first byte of the return value. 71 \li the addition of the second bytes of each operand, in the second byte of the return value. 72 \li the addition of the third bytes of each operand, in the third byte of the return value. 73 \li the addition of the fourth bytes of each operand, in the fourth byte of the return value. 74 \par 75 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 76 the results of the operation. 77 \par 78 If \em res is the return value, then: 79 \li if res[7:0] \>= 0 then APSR.GE[0] = 1 else 0 80 \li if res[15:8] \>= 0 then APSR.GE[1] = 1 else 0 81 \li if res[23:16] \>= 0 then APSR.GE[2] = 1 else 0 82 \li if res[31:24] \>= 0 then APSR.GE[3] = 1 else 0 83 84 \par Operation: 85 \code 86 res[7:0] = val1[7:0] + val2[7:0] 87 res[15:8] = val1[15:8] + val2[15:8] 88 res[23:16] = val1[23:16] + val2[23:16] 89 res[31:24] = val1[31:24] + val2[31:24] 90 \endcode 91*/ 92uint32_t __SADD8(uint32_t val1, uint32_t val2); 93 94 95/**************************************************************************************************/ 96/** \ingroup Intrinsic_SIMD_gr 97 \brief Q setting quad 8-bit saturating addition 98 \details This function enables you to perform four 8-bit integer additions, saturating the results to 99 the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1. 100 \param val1 first four 8-bit summands. 101 \param val2 second four 8-bit summands. 102 103 \returns 104 \li the saturated addition of the first byte of each operand in the first byte of the return value. 105 \li the saturated addition of the second byte of each operand in the second byte of the return value. 106 \li the saturated addition of the third byte of each operand in the third byte of the return value. 107 \li the saturated addition of the fourth byte of each operand in the fourth byte of the return value. 108 \par 109 The returned results are saturated to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1. 110 111 \par Operation: 112 \code 113 res[7:0] = val1[7:0] + val2[7:0] 114 res[15:8] = val1[15:8] + val2[15:8] 115 res[23:16] = val1[23:16] + val2[23:16] 116 res[31:24] = val1[31:24] + val2[31:24] 117 \endcode 118*/ 119uint32_t __QADD8(uint32_t val1, uint32_t val2); 120 121 122/**************************************************************************************************/ 123/** 124 \brief Quad 8-bit signed addition with halved results 125 \details This function enables you to perform four signed 8-bit integer additions, halving the results. 126 \param val1 first four 8-bit summands. 127 \param val2 second four 8-bit summands. 128 129 \returns 130 \li the halved addition of the first bytes from each operand, in the first byte of the return value. 131 \li the halved addition of the second bytes from each operand, in the second byte of the return value. 132 \li the halved addition of the third bytes from each operand, in the third byte of the return value. 133 \li the halved addition of the fourth bytes from each operand, in the fourth byte of the return value. 134 135 \par Operation: 136 \code 137 res[7:0] = val1[7:0] + val2[7:0] >> 1 138 res[15:8] = val1[15:8] + val2[15:8] >> 1 139 res[23:16] = val1[23:16] + val2[23:16] >> 1 140 res[31:24] = val1[31:24] + val2[31:24] >> 1 141 \endcode 142*/ 143uint32_t __SHADD8(uint32_t val1, uint32_t val2); 144 145 146/**************************************************************************************************/ 147/** \ingroup Intrinsic_SIMD_gr 148 \brief GE setting quad 8-bit unsigned addition 149 150 \details This function enables you to perform four unsigned 8-bit integer additions. 151 The GE bits of the APSR are set according to the results. 152 153 \param val1 first four 8-bit summands for each addition. 154 \param val2 second four 8-bit summands for each addition. 155 156 \returns 157 \li the addition of the first bytes from each operand, in the first byte of the return value. 158 \li the addition of the second bytes from each operand, in the second byte of the return value. 159 \li the addition of the third bytes from each operand, in the third byte of the return value. 160 \li the addition of the fourth bytes from each operand, in the fourth byte of the return value. 161 162 \par 163 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on the results of the operation. 164 165 \par 166 If \em res is the return value, then: 167 \li if res[7:0] \>= 0x100 then APSR.GE[0] = 1 else 0 168 \li if res[15:8] \>= 0x100 then APSR.GE[1] = 1 else 0 169 \li if res[23:16] \>= 0x100 then APSR.GE[2] = 1 else 0 170 \li if res[31:24] \>= 0x100 then APSR.GE[3] = 1 else 0 171 172 \par Operation: 173 \code 174 res[7:0] = val1[7:0] + val2[7:0] 175 res[15:8] = val1[15:8] + val2[15:8] 176 res[23:16] = val1[23:16] + val2[23:16] 177 res[31:24] = val1[31:24] + val2[31:24] 178 \endcode 179*/ 180uint32_t __UADD8(uint32_t val1, uint32_t val2); 181 182 183/**************************************************************************************************/ 184/** 185 \brief Quad 8-bit unsigned saturating addition 186 187 \details This function enables you to perform four unsigned 8-bit integer additions, saturating the 188 results to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1. 189 190 \param val1 first four 8-bit summands. 191 \param val2 second four 8-bit summands. 192 193 \returns 194 \li the saturated addition of the first bytes in each operand, in the first byte of the return value. 195 \li the saturated addition of the second bytes in each operand, in the second byte of the return value. 196 \li the saturated addition of the third bytes in each operand, in the third byte of the return value. 197 \li the saturated addition of the fourth bytes in each operand, in the fourth byte of the return value. 198 199 \par 200 The results are saturated to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1. 201 202 \par Operation: 203 \code 204 res[7:0] = val1[7:0] + val2[7:0] 205 res[15:8] = val1[15:8] + val2[15:8] 206 res[23:16] = val1[23:16] + val2[23:16] 207 res[31:24] = val1[31:24] + val2[31:24] 208 \endcode 209*/ 210uint32_t __UQADD8(uint32_t val1, uint32_t val2); 211 212 213/**************************************************************************************************/ 214/** 215 \brief Quad 8-bit unsigned addition with halved results 216 217 \details This function enables you to perform four unsigned 8-bit integer additions, halving the results. 218 219 \param val1 first four 8-bit summands. 220 \param val2 second four 8-bit summands. 221 222 \returns 223 \li the halved addition of the first bytes in each operand, in the first byte of the return value. 224 \li the halved addition of the second bytes in each operand, in the second byte of the return value. 225 \li the halved addition of the third bytes in each operand, in the third byte of the return value. 226 \li the halved addition of the fourth bytes in each operand, in the fourth byte of the return value. 227 228 \par Operation: 229 \code 230 res[7:0] = val1[7:0] + val2[7:0] >> 1 231 res[15:8] = val1[15:8] + val2[15:8] >> 1 232 res[23:16] = val1[23:16] + val2[23:16] >> 1 233 res[31:24] = val1[31:24] + val2[31:24] >> 1 234 \endcode 235*/ 236uint32_t __UHADD8(uint32_t val1, uint32_t val2); 237 238 239/**************************************************************************************************/ 240/** 241 \brief GE setting quad 8-bit signed subtraction 242 243 \details This function enables you to perform four 8-bit signed integer subtractions.<br> 244 The GE bits in the APSR are set according to the results. 245 246 \param val1 first four 8-bit operands of each subtraction. 247 \param val2 second four 8-bit operands of each subtraction. 248 249 \returns 250 \li the subtraction of the first byte in the second operand from the first byte in the 251 first operand, in the first bytes of the return value. 252 \li the subtraction of the second byte in the second operand from the second byte in 253 the first operand, in the second byte of the return value. 254 \li the subtraction of the third byte in the second operand from the third byte in the 255 first operand, in the third byte of the return value. 256 \li the subtraction of the fourth byte in the second operand from the fourth byte in 257 the first operand, in the fourth byte of the return value. 258 259 \par Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 260 the results of the operation. 261 262 \par 263 If \em res is the return value, then: 264 \li if res[8:0] \>= 0 then APSR.GE[0] = 1 else 0 265 \li if res[15:8] \>= 0 then APSR.GE[1] = 1 else 0 266 \li if res[23:16] \>= 0 then APSR.GE[2] = 1 else 0 267 \li if res[31:24] \>= 0 then APSR.GE[3] = 1 else 0 268 269 270 \par Operation: 271 \code 272 res[7:0] = val1[7:0] - val2[7:0] 273 res[15:8] = val1[15:8] - val2[15:8] 274 res[23:16] = val1[23:16] - val2[23:16] 275 res[31:24] = val1[31:24] - val2[31:24] 276 \endcode 277*/ 278uint32_t __SSUB8(uint32_t val1, uint32_t val2); 279 280 281/**************************************************************************************************/ 282/** 283 \brief Q setting quad 8-bit saturating subtract 284 285 \details This function enables you to perform four 8-bit integer subtractions, saturating the results 286 to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1. 287 288 \param val1 first four 8-bit operands. 289 \param val2 second four 8-bit operands. 290 291 \returns 292 \li the subtraction of the first byte in the second operand from the first byte in the 293 first operand, in the first bytes of the return value. 294 \li the subtraction of the second byte in the second operand from the second byte in 295 the first operand, in the second byte of the return value. 296 \li the subtraction of the third byte in the second operand from the third byte in the 297 first operand, in the third byte of the return value. 298 \li the subtraction of the fourth byte in the second operand from the fourth byte in 299 the first operand, in the fourth byte of the return value. 300 301 \par 302 The returned results are saturated to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1. 303 304 305 \par Operation: 306 \code 307 res[7:0] = val1[7:0] - val2[7:0] 308 res[15:8] = val1[15:8] - val2[15:8] 309 res[23:16] = val1[23:16] - val2[23:16] 310 res[31:24] = val1[31:24] - val2[31:24] 311 \endcode 312*/ 313uint32_t __QSUB8(uint32_t val1, uint32_t val2); 314 315 316/**************************************************************************************************/ 317/** 318 \brief Quad 8-bit signed subtraction with halved results 319 320 \details This function enables you to perform four signed 8-bit integer subtractions, halving the 321 results. 322 323 \param val1 first four 8-bit operands. 324 \param val2 second four 8-bit operands. 325 326 \returns 327 \li the halved subtraction of the first byte in the second operand from the first byte in the 328 first operand, in the first bytes of the return value. 329 \li the halved subtraction of the second byte in the second operand from the second byte in 330 the first operand, in the second byte of the return value. 331 \li the halved subtraction of the third byte in the second operand from the third byte in the 332 first operand, in the third byte of the return value. 333 \li the halved subtraction of the fourth byte in the second operand from the fourth byte in 334 the first operand, in the fourth byte of the return value. 335 336 \par Operation: 337 \code 338 res[7:0] = val1[7:0] - val2[7:0] >> 1 339 res[15:8] = val1[15:8] - val2[15:8] >> 1 340 res[23:16] = val1[23:16] - val2[23:16] >> 1 341 res[31:24] = val1[31:24] - val2[31:24] >> 1 342 \endcode 343*/ 344uint32_t __SHSUB8(uint32_t val1, uint32_t val2); 345 346 347/**************************************************************************************************/ 348/** 349 \brief GE setting quad 8-bit unsigned subtract 350 351 \details This function enables you to perform four 8-bit unsigned integer subtractions. 352 The GE bits in the APSR are set according to the results. 353 354 \param val1 first four 8-bit operands. 355 \param val2 second four 8-bit operands. 356 357 \returns 358 \li the subtraction of the first byte in the second operand from the first byte in the 359 first operand, in the first bytes of the return value. 360 \li the subtraction of the second byte in the second operand from the second byte in 361 the first operand, in the second byte of the return value. 362 \li the subtraction of the third byte in the second operand from the third byte in the 363 first operand, in the third byte of the return value. 364 \li the subtraction of the fourth byte in the second operand from the fourth byte in 365 the first operand, in the fourth byte of the return value. 366 367 \par 368 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 369 the results of the operation. 370 371 \par 372 If \em res is the return value, then: 373 \li if res[8:0] \>= 0 then APSR.GE[0] = 1 else 0 374 \li if res[15:8] \>= 0 then APSR.GE[1] = 1 else 0 375 \li if res[23:16] \>= 0 then APSR.GE[2] = 1 else 0 376 \li if res[31:24] \>= 0 then APSR.GE[3] = 1 else 0 377 378 379 \par Operation: 380 \code 381 res[7:0] = val1[7:0] - val2[7:0] 382 res[15:8] = val1[15:8] - val2[15:8] 383 res[23:16] = val1[23:16] - val2[23:16] 384 res[31:24] = val1[31:24] - val2[31:24] 385 \endcode 386*/ 387uint32_t __USUB8(uint32_t val1, uint32_t val2); 388 389 390/**************************************************************************************************/ 391/** 392 \brief Quad 8-bit unsigned saturating subtraction 393 394 \details This function enables you to perform four unsigned 8-bit integer subtractions, saturating 395 the results to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1. 396 397 \param val1 first four 8-bit operands. 398 \param val2 second four 8-bit operands. 399 400 \returns 401 \li the subtraction of the first byte in the second operand from the first byte in the 402 first operand, in the first bytes of the return value. 403 \li the subtraction of the second byte in the second operand from the second byte in 404 the first operand, in the second byte of the return value. 405 \li the subtraction of the third byte in the second operand from the third byte in the 406 first operand, in the third byte of the return value. 407 \li the subtraction of the fourth byte in the second operand from the fourth byte in 408 the first operand, in the fourth byte of the return value. 409 410 \par 411 The results are saturated to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1. 412 413 414 \par Operation: 415 \code 416 res[7:0] = val1[7:0] - val2[7:0] 417 res[15:8] = val1[15:8] - val2[15:8] 418 res[23:16] = val1[23:16] - val2[23:16] 419 res[31:24] = val1[31:24] - val2[31:24] 420 \endcode 421*/ 422uint32_t __UQSUB8(uint32_t val1, uint32_t val2); 423 424 425/**************************************************************************************************/ 426/** 427 \brief Quad 8-bit unsigned subtraction with halved results 428 429 \details This function enables you to perform four unsigned 8-bit integer subtractions, halving the 430 results. 431 432 \param val1 first four 8-bit operands. 433 \param val2 second four 8-bit operands. 434 435 \returns 436 \li the halved subtraction of the first byte in the second operand from the first byte in the 437 first operand, in the first bytes of the return value. 438 \li the halved subtraction of the second byte in the second operand from the second byte in 439 the first operand, in the second byte of the return value. 440 \li the halved subtraction of the third byte in the second operand from the third byte in the 441 first operand, in the third byte of the return value. 442 \li the halved subtraction of the fourth byte in the second operand from the fourth byte in 443 the first operand, in the fourth byte of the return value. 444 445 \par Operation: 446 \code 447 res[7:0] = val1[7:0] - val2[7:0] >> 1 448 res[15:8] = val1[15:8] - val2[15:8] >> 1 449 res[23:16] = val1[23:16] - val2[23:16] >> 1 450 res[31:24] = val1[31:24] - val2[31:24] >> 1 451 \endcode 452*/ 453uint32_t __UHSUB8(uint32_t val1, uint32_t val2); 454 455 456/**************************************************************************************************/ 457/** 458 \brief GE setting dual 16-bit signed addition 459 460 \details This function enables you to perform two 16-bit signed integer additions.<br> 461 The GE bits in the APSR are set according to the results of the additions. 462 463 \param val1 first two 16-bit summands. 464 \param val2 second two 16-bit summands. 465 466 \returns 467 \li the addition of the low halfwords in the low halfword of the return value. 468 \li the addition of the high halfwords in the high halfword of the return value. 469 470 \par 471 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 472 the results of the operation. 473 \par 474 If \em res is the return value, then: 475 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00 476 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00 477 478 479 \par Operation: 480 \code 481 res[15:0] = val1[15:0] + val2[15:0] 482 res[31:16] = val1[31:16] + val2[31:16] 483 \endcode 484*/ 485uint32_t __SADD16(uint32_t val1, uint32_t val2); 486 487 488/**************************************************************************************************/ 489/** 490 \brief Q setting dual 16-bit saturating addition 491 492 \details This function enables you to perform two 16-bit integer arithmetic additions in parallel, 493 saturating the results to the 16-bit signed integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1. 494 495 \param val1 first two 16-bit summands. 496 \param val2 second two 16-bit summands. 497 498 \returns 499 \li the saturated addition of the low halfwords, in the low halfword of the return value. 500 \li the saturated addition of the high halfwords, in the high halfword of the return value. 501 502 \par 503 The returned results are saturated to the 16-bit signed integer 504 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1 505 506 \par Operation: 507 \code 508 res[15:0] = val1[15:0] + val2[15:0] 509 res[31:16] = val1[31:16] + val2[31:16] 510 \endcode 511*/ 512uint32_t __QADD16(uint32_t val1, uint32_t val2); 513 514 515/**************************************************************************************************/ 516/** 517 \brief Dual 16-bit signed addition with halved results 518 519 \details This function enables you to perform two signed 16-bit integer additions, halving the 520 results. 521 522 \param val1 first two 16-bit summands. 523 \param val2 second two 16-bit summands. 524 525 \returns 526 \li the halved addition of the low halfwords, in the low halfword of the return value. 527 \li the halved addition of the high halfwords, in the high halfword of the return value. 528 529 \par Operation: 530 \code 531 res[15:0] = val1[15:0] + val2[15:0] >> 1 532 res[31:16] = val1[31:16] + val2[31:16] >> 1 533 \endcode 534*/ 535uint32_t __SHADD16(uint32_t val1, uint32_t val2); 536 537 538/**************************************************************************************************/ 539/** 540 \brief GE setting dual 16-bit unsigned addition 541 542 \details This function enables you to perform two 16-bit unsigned integer additions.<br> 543 The GE bits in the APSR are set according to the results. 544 545 \param val1 first two 16-bit summands for each addition. 546 \param val2 second two 16-bit summands for each addition. 547 548 \returns 549 \li the addition of the low halfwords in each operand, in the low halfword of the 550 return value. 551 \li the addition of the high halfwords in each operand, in the high halfword of the 552 return value. 553 554 \par 555 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 556 the results of the operation. 557 \par 558 If \em res is the return value, then: 559 \li if res[15:0] \>= 0x10000 then APSR.GE[0] = 11 else 00 560 \li if res[31:16] \>= 0x10000 then APSR.GE[1] = 11 else 00 561 562 \par Operation: 563 \code 564 res[15:0] = val1[15:0] + val2[15:0] 565 res[31:16] = val1[31:16] + val2[31:16] 566 \endcode 567*/ 568uint32_t __UADD16(uint32_t val1, uint32_t val2); 569 570 571/**************************************************************************************************/ 572/** 573 \brief Dual 16-bit unsigned saturating addition 574 575 \details This function enables you to perform two unsigned 16-bit integer additions, saturating the 576 results to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1. 577 578 \param val1 first two 16-bit summands. 579 \param val2 second two 16-bit summands. 580 581 \returns 582 \li the addition of the low halfword in the first operand and the low halfword in the 583 second operand, in the low halfword of the return value. 584 \li the addition of the high halfword in the first operand and the high halfword in the 585 second operand, in the high halfword of the return value. 586 587 \par 588 The results are saturated to the 16-bit unsigned integer 589 range 0 \< x \< 2<sup>16</sup> - 1. 590 591 \par Operation: 592 \code 593 res[15:0] = val1[15:0] + val2[15:0] 594 res[31:16] = val1[31:16] + val2[31:16] 595 \endcode 596*/ 597uint32_t __UQADD16(uint32_t val1, uint32_t val2); 598 599 600/**************************************************************************************************/ 601/** 602 \brief Dual 16-bit unsigned addition with halved results 603 604 \details This function enables you to perform two unsigned 16-bit integer additions, halving the 605 results. 606 607 \param val1 first two 16-bit summands. 608 \param val2 second two 16-bit summands. 609 610 \returns 611 \li the halved addition of the low halfwords in each operand, in the low halfword of 612 the return value. 613 \li the halved addition of the high halfwords in each operand, in the high halfword 614 of the return value. 615 616 \par Operation: 617 \code 618 res[15:0] = val1[15:0] + val2[15:0] >> 1 619 res[31:16] = val1[31:16] + val2[31:16] >> 1 620 \endcode 621*/ 622uint32_t __UHADD16(uint32_t val1, uint32_t val2); 623 624 625/**************************************************************************************************/ 626/** 627 \brief GE setting dual 16-bit signed subtraction 628 629 \details This function enables you to perform two 16-bit signed integer subtractions.<br> 630 The GE bits in the APSR are set according to the results. 631 632 \param val1 first two 16-bit operands of each subtraction. 633 \param val2 second two 16-bit operands of each subtraction. 634 635 \returns 636 \li the subtraction of the low halfword in the second operand from the low halfword 637 in the first operand, in the low halfword of the return value. 638 \li the subtraction of the high halfword in the second operand from the high halfword 639 in the first operand, in the high halfword of the return value. 640 641 \par 642 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 643 the results of the operation. 644 \par 645 If \li res is the return value, then: 646 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00 647 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00 648 649 650 \par Operation: 651 \code 652 res[15:0] = val1[15:0] - val2[15:0] 653 res[31:16] = val1[31:16] - val2[31:16] 654 \endcode 655*/ 656uint32_t __SSUB16(uint32_t val1, uint32_t val2); 657 658 659/**************************************************************************************************/ 660/** 661 \brief Q setting dual 16-bit saturating subtract 662 663 \details This function enables you to perform two 16-bit integer subtractions, saturating the 664 results to the 16-bit signed integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1. 665 666 \param val1 first two 16-bit operands. 667 \param val2 second two 16-bit operands. 668 669 \returns 670 \li the saturated subtraction of the low halfword in the second operand from the low 671 halfword in the first operand, in the low halfword of the returned result. 672 \li the saturated subtraction of the high halfword in the second operand from the high 673 halfword in the first operand, in the high halfword of the returned result. 674 675 \par 676 The returned results are saturated to the 16-bit signed integer 677 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1. 678 679 \par Operation: 680 \code 681 res[15:0] = val1[15:0] - val2[15:0] 682 res[31:16] = val1[31:16] - val2[31:16] 683 \endcode 684*/ 685uint32_t __QSUB16(uint32_t val1, uint32_t val2); 686 687 688/**************************************************************************************************/ 689/** 690 \brief Dual 16-bit signed subtraction with halved results 691 692 \details This function enables you to perform two signed 16-bit integer subtractions, halving the 693 results. 694 695 \param val1 first two 16-bit operands. 696 \param val2 second two 16-bit operands. 697 698 \returns 699 \li the halved subtraction of the low halfword in the second operand from the low 700 halfword in the first operand, in the low halfword of the returned result. 701 \li the halved subtraction of the high halfword in the second operand from the high 702 halfword in the first operand, in the high halfword of the returned result. 703 704 705 \par Operation: 706 \code 707 res[15:0] = val1[15:0] - val2[15:0] >> 1 708 res[31:16] = val1[31:16] - val2[31:16] >> 1 709 \endcode 710*/ 711uint32_t __SHSUB16(uint32_t val1, uint32_t val2); 712 713 714/**************************************************************************************************/ 715/** 716 \brief GE setting dual 16-bit unsigned subtract 717 718 \details This function enables you to perform two 16-bit unsigned integer subtractions.<br> 719 The GE bits in the APSR are set according to the results. 720 721 \param val1 first two 16-bit operands. 722 \param val2 second two 16-bit operands. 723 724 \returns 725 \li the subtraction of the low halfword in the second operand from the low halfword 726 in the first operand, in the low halfword of the return value. 727 \li the subtraction of the high halfword in the second operand from the high halfword 728 in the first operand, in the high halfword of the return value. 729 730 \par 731 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 732 the results of the operation. 733 734 \par 735 If \em res is the return value, then: 736 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00 737 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00 738 739 \par Operation: 740 \code 741 res[15:0] = val1[15:0] - val2[15:0] 742 res[31:16] = val1[31:16] - val2[31:16] 743 \endcode 744*/ 745uint32_t __USUB16(uint32_t val1, uint32_t val2); 746 747 748/**************************************************************************************************/ 749/** 750 \brief Dual 16-bit unsigned saturating subtraction 751 752 \details This function enables you to perform two unsigned 16-bit integer subtractions, saturating 753 the results to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1. 754 755 \param val1 first two 16-bit operands for each subtraction. 756 \param val2 second two 16-bit operands for each subtraction. 757 758 \returns 759 \li the subtraction of the low halfword in the second operand from the low halfword 760 in the first operand, in the low halfword of the return value. 761 \li the subtraction of the high halfword in the second operand from the high halfword 762 in the first operand, in the high halfword of the return value. 763 764 \par 765 The results are saturated to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1. 766 767 768 \par Operation: 769 \code 770 res[15:0] = val1[15:0] - val2[15:0] 771 res[31:16] = val1[31:16] - val2[31:16] 772 \endcode 773*/ 774uint32_t __UQSUB16(uint32_t val1, uint32_t val2); 775 776 777/**************************************************************************************************/ 778/** 779 \brief Dual 16-bit unsigned subtraction with halved results 780 781 \details This function enables you to perform two unsigned 16-bit integer subtractions, halving 782 the results. 783 784 \param val1 first two 16-bit operands. 785 \param val2 second two 16-bit operands. 786 787 \returns 788 \li the halved subtraction of the low halfword in the second operand from the low halfword 789 in the first operand, in the low halfword of the return value. 790 \li the halved subtraction of the high halfword in the second operand from the high halfword 791 in the first operand, in the high halfword of the return value. 792 793 794 \par Operation: 795 \code 796 res[15:0] = val1[15:0] - val2[15:0] >> 1 797 res[31:16] = val1[31:16] - val2[31:16] >> 1 798 \endcode 799*/ 800uint32_t __UHSUB16(uint32_t val1, uint32_t val2); 801 802 803/**************************************************************************************************/ 804/** 805 \brief GE setting dual 16-bit addition and subtraction with exchange 806 807 \details This function inserts an SASX instruction into the instruction stream generated by the 808 compiler. It enables you to exchange the halfwords of the second operand, add the high 809 halfwords and subtract the low halfwords.<br> 810 The GE bits in the APRS are set according to the results. 811 812 \param val1 first operand for the subtraction in the low halfword, and the 813 first operand for the addition in the high halfword. 814 \param val2 second operand for the subtraction in the high halfword, and the 815 second operand for the addition in the low halfword. 816 817 \returns 818 \li the subtraction of the high halfword in the second operand from the low halfword 819 in the first operand, in the low halfword of the return value. 820 \li the addition of the high halfword in the first operand and the low halfword in the 821 second operand, in the high halfword of the return value. 822 823 \par 824 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 825 the results of the operation. 826 \par 827 If \em res is the return value, then: 828 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00 829 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00 830 831 \par Operation: 832 \code 833 res[15:0] = val1[15:0] - val2[31:16] 834 res[31:16] = val1[31:16] + val2[15:0] 835 \endcode 836*/ 837uint32_t __SASX(uint32_t val1, uint32_t val2); 838 839 840/**************************************************************************************************/ 841/** 842 \brief Q setting dual 16-bit add and subtract with exchange 843 844 \details This function enables you to exchange the halfwords of the one operand, then add the high 845 halfwords and subtract the low halfwords, saturating the results to the 16-bit signed 846 integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1. 847 848 \param val1 first operand for the subtraction in the low halfword, and the 849 first operand for the addition in the high halfword. 850 \param val2 second operand for the subtraction in the high halfword, and the 851 second operand for the addition in the low halfword. 852 853 \returns 854 \li the saturated subtraction of the high halfword in the second operand from the low 855 halfword in the first operand, in the low halfword of the return value. 856 \li the saturated addition of the high halfword in the first operand and the low 857 halfword in the second operand, in the high halfword of the return value. 858 859 \par 860 The returned results are saturated to the 16-bit signed integer 861 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1. 862 863 \par Operation: 864 \code 865 res[15:0] = val1[15:0] - val2[31:16] 866 res[31:16] = val1[31:16] + val2[15:0] 867 \endcode 868*/ 869uint32_t __QASX(uint32_t val1, uint32_t val2); 870 871 872/**************************************************************************************************/ 873/** 874 \brief Dual 16-bit signed addition and subtraction with halved results 875 876 \details This function enables you to exchange the two halfwords of one operand, perform one 877 signed 16-bit integer addition and one signed 16-bit subtraction, and halve the results. 878 879 \param val1 first 16-bit operands. 880 \param val2 second 16-bit operands. 881 882 \returns 883 \li the halved subtraction of the high halfword in the second operand from the low 884 halfword in the first operand, in the low halfword of the return value. 885 \li the halved addition of the low halfword in the second operand and the high 886 halfword in the first operand, in the high halfword of the return value. 887 888 \par Operation: 889 \code 890 res[15:0] = (val1[15:0] - val2[31:16]) >> 1 891 res[31:16] = (val1[31:16] + val2[15:0] ) >> 1 892 \endcode 893*/ 894uint32_t __SHASX(uint32_t val1, uint32_t val2); 895 896 897/**************************************************************************************************/ 898/** 899 \brief GE setting dual 16-bit unsigned addition and subtraction with exchange 900 901 \details This function enables you to exchange the two halfwords of the second operand, add the 902 high halfwords and subtract the low halfwords.<br> 903 The GE bits in the APSR are set according to the results. 904 905 \param val1 first operand for the subtraction in the low halfword, and the 906 first operand for the addition in the high halfword. 907 \param val2 second operand for the subtraction in the high halfword and the 908 second operand for the addition in the low halfword. 909 910 \returns 911 \li the subtraction of the high halfword in the second operand from the low halfword 912 in the first operand, in the low halfword of the return value. 913 \li the addition of the high halfword in the first operand and the low halfword in the 914 second operand, in the high halfword of the return value. 915 916 \par 917 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 918 the results of the operation. 919 920 \par If \em res is the return value, then: 921 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00 922 \li if res[31:16] \>= 0x10000 then APSR.GE[3:2] = 11 else 00 923 924 \par Operation: 925 \code 926 res[15:0] = val1[15:0] - val2[31:16] 927 res[31:16] = val1[31:16] + val2[15:0] 928 \endcode 929*/ 930uint32_t __UASX(uint32_t val1, uint32_t val2); 931 932 933/**************************************************************************************************/ 934/** 935 \brief Dual 16-bit unsigned saturating addition and subtraction with exchange 936 937 \details This function enables you to exchange the halfwords of the second operand and perform 938 one unsigned 16-bit integer addition and one unsigned 16-bit subtraction, saturating the 939 results to the 16-bit unsigned integer range 0 \<= x \<= 2<sup>16</sup> - 1. 940 941 \param val1 first two 16-bit operands. 942 \param val2 second two 16-bit operands. 943 944 \returns 945 \li the subtraction of the high halfword in the second operand from the low halfword 946 in the first operand, in the low halfword of the return value. 947 \li the subtraction of the low halfword in the second operand from the high halfword 948 in the first operand, in the high halfword of the return value. 949 950 \par 951 The results are saturated to the 16-bit unsigned integer 952 range 0 \<= x \<= 2<sup>16</sup> - 1. 953 954 \par Operation: 955 \code 956 res[15:0] = val1[15:0] - val2[31:16] 957 res[31:16] = val1[31:16] + val2[15:0] 958 \endcode 959*/ 960uint32_t __UQASX(uint32_t val1, uint32_t val2); 961 962 963/**************************************************************************************************/ 964/** 965 \brief Dual 16-bit unsigned addition and subtraction with halved results and exchange 966 967 \details This function enables you to exchange the halfwords of the second operand, add the high 968 halfwords and subtract the low halfwords, halving the results. 969 970 \param val1 first operand for the subtraction in the low halfword, and the 971 first operand for the addition in the high halfword. 972 \param val2 second operand for the subtraction in the high halfword, and the 973 second operand for the addition in the low halfword. 974 975 \returns 976 \li the halved subtraction of the high halfword in the second operand from the low 977 halfword in the first operand. 978 \li the halved addition of the high halfword in the first operand and the low halfword 979 in the second operand. 980 981 982 \par Operation: 983 \code 984 res[15:0] = (val1[15:0] - val2[31:16]) >> 1 985 res[31:16] = (val1[31:16] + val2[15:0] ) >> 1 986 \endcode 987*/ 988uint32_t __UHASX(uint32_t val1, uint32_t val2); 989 990 991/**************************************************************************************************/ 992/** 993 \brief GE setting dual 16-bit signed subtraction and addition with exchange 994 995 \details This function enables you to exchange the two halfwords of one operand and perform one 996 16-bit integer subtraction and one 16-bit addition.<br> 997 The GE bits in the APSR are set according to the results. 998 999 \param val1 first operand for the addition in the low halfword, and the first 1000 operand for the subtraction in the high halfword. 1001 \param val2 second operand for the addition in the high halfword, and the 1002 second operand for the subtraction in the low halfword. 1003 1004 \returns 1005 \li the addition of the low halfword in the first operand and the high halfword in the 1006 second operand, in the low halfword of the return value. 1007 \li the subtraction of the low halfword in the second operand from the high halfword 1008 in the first operand, in the high halfword of the return value. 1009 \par 1010 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 1011 the results of the operation. 1012 \par 1013 If \em res is the return value, then: 1014 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00 1015 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00 1016 1017 \par Operation: 1018 \code 1019 res[15:0] = val1[15:0] + val2[31:16] 1020 res[31:16] = val1[31:16] - val2[15:0] 1021 \endcode 1022*/ 1023uint32_t __SSAX(uint32_t val1, uint32_t val2); 1024 1025 1026/**************************************************************************************************/ 1027/** 1028 \brief Q setting dual 16-bit subtract and add with exchange 1029 1030 \details This function enables you to exchange the halfwords of one operand, then subtract the 1031 high halfwords and add the low halfwords, saturating the results to the 16-bit signed 1032 integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1. 1033 1034 \param val1 first operand for the addition in the low halfword, and the first 1035 operand for the subtraction in the high halfword. 1036 \param val2 second operand for the addition in the high halfword, and the 1037 second operand for the subtraction in the low halfword. 1038 1039 \returns 1040 \li the saturated addition of the low halfword of the first operand and the high 1041 halfword of the second operand, in the low halfword of the return value. 1042 \li the saturated subtraction of the low halfword of the second operand from the high 1043 halfword of the first operand, in the high halfword of the return value. 1044 \par 1045 The returned results are saturated to the 16-bit signed integer 1046 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1. 1047 1048 \par Operation: 1049 \code 1050 res[15:0] = val1[15:0] + val2[31:16] 1051 res[31:16] = val1[31:16] - val2[15:0] 1052 \endcode 1053*/ 1054uint32_t __QSAX(uint32_t val1, uint32_t val2); 1055 1056 1057/**************************************************************************************************/ 1058/** 1059 \brief Dual 16-bit signed subtraction and addition with halved results 1060 1061 \details This function enables you to exchange the two halfwords of one operand, perform one 1062 signed 16-bit integer subtraction and one signed 16-bit addition, and halve the results. 1063 1064 \param val1 first 16-bit operands. 1065 \param val2 second 16-bit operands. 1066 1067 \returns 1068 \li the halved addition of the low halfword in the first operand and the high halfword 1069 in the second operand, in the low halfword of the return value. 1070 \li the halved subtraction of the low halfword in the second operand from the high 1071 halfword in the first operand, in the high halfword of the return value. 1072 1073 \par Operation: 1074 \code 1075 res[15:0] = (val1[15:0] + val2[31:16]) >> 1 1076 res[31:16] = (val1[31:16] - val2[15:0] ) >> 1 1077 \endcode 1078*/ 1079uint32_t __SHSAX(uint32_t val1, uint32_t val2); 1080 1081 1082/**************************************************************************************************/ 1083/** 1084 \brief GE setting dual 16-bit unsigned subtract and add with exchange 1085 1086 \details This function enables you to exchange the halfwords of the second operand, subtract the 1087 high halfwords and add the low halfwords.<br> 1088 The GE bits in the APSR are set according to the results. 1089 1090 \param val1 first operand for the addition in the low halfword, and the first 1091 operand for the subtraction in the high halfword. 1092 \param val2 second operand for the addition in the high halfword, and the 1093 second operand for the subtraction in the low halfword. 1094 1095 \returns 1096 \li the addition of the low halfword in the first operand and the high halfword in the 1097 second operand, in the low halfword of the return value. 1098 \li the subtraction of the low halfword in the second operand from the high halfword 1099 in the first operand, in the high halfword of the return value. 1100 \par 1101 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 1102 the results of the operation. 1103 \par 1104 If \em res is the return value, then: 1105 \li if res[15:0] \>= 0x10000 then APSR.GE[1:0] = 11 else 00 1106 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00 1107 1108 \par Operation: 1109 \code 1110 res[15:0] = val1[15:0] + val2[31:16] 1111 res[31:16] = val1[31:16] - val2[15:0] 1112 \endcode 1113*/ 1114uint32_t __USAX(uint32_t val1, uint32_t val2); 1115 1116 1117/**************************************************************************************************/ 1118/** 1119 \brief Dual 16-bit unsigned saturating subtraction and addition with exchange 1120 1121 \details This function enables you to exchange the halfwords of the second operand and perform 1122 one unsigned 16-bit integer subtraction and one unsigned 16-bit addition, saturating the 1123 results to the 16-bit unsigned integer range 0 \<= x \<= 2<sup>16</sup> - 1. 1124 1125 \param val1 first 16-bit operand for the addition in the low halfword, and the 1126 first 16-bit operand for the subtraction in the high halfword. 1127 \param val2 second 16-bit halfword for the addition in the high halfword, 1128 and the second 16-bit halfword for the subtraction in the low halfword. 1129 1130 \returns 1131 \li the addition of the low halfword in the first operand and the high halfword in the 1132 second operand, in the low halfword of the return value. 1133 \li the subtraction of the low halfword in the second operand from the high halfword 1134 in the first operand, in the high halfword of the return value. 1135 \par 1136 The results are saturated to the 16-bit unsigned integer 1137 range 0 \<= x \<= 2<sup>16</sup> - 1. 1138 1139 \par Operation: 1140 \code 1141 res[15:0] = val1[15:0] + val2[31:16] 1142 res[31:16] = val1[31:16] - val2[15:0] 1143 \endcode 1144*/ 1145uint32_t __UQSAX(uint32_t val1, uint32_t val2); 1146 1147 1148/**************************************************************************************************/ 1149/** 1150 \brief Dual 16-bit unsigned subtraction and addition with halved results and exchange 1151 1152 \details This function enables you to exchange the halfwords of the second operand, subtract the 1153 high halfwords and add the low halfwords, halving the results. 1154 1155 \param val1 first operand for the addition in the low halfword, and the first 1156 operand for the subtraction in the high halfword. 1157 \param val2 second operand for the addition in the high halfword, and the 1158 second operand for the subtraction in the low halfword. 1159 1160 \returns 1161 \li the halved addition of the high halfword in the second operand and the low 1162 halfword in the first operand, in the low halfword of the return value. 1163 \li the halved subtraction of the low halfword in the second operand from the high 1164 halfword in the first operand, in the high halfword of the return value. 1165 1166 \par Operation: 1167 \code 1168 res[15:0] = (val1[15:0] + val2[31:16]) >> 1 1169 res[31:16] = (val1[31:16] - val2[15:0] ) >> 1 1170 \endcode 1171*/ 1172uint32_t __UHSAX(uint32_t val1, uint32_t val2); 1173 1174 1175/**************************************************************************************************/ 1176/** 1177 \brief Unsigned sum of quad 8-bit unsigned absolute difference 1178 1179 \details This function enables you to perform four unsigned 8-bit subtractions, and add the 1180 absolute values of the differences together, returning the result as a single unsigned 1181 integer. 1182 1183 \param val1 first four 8-bit operands for the subtractions. 1184 \param val2 second four 8-bit operands for the subtractions. 1185 1186 \returns 1187 \li the subtraction of the first byte in the second operand from the first byte in the 1188 first operand. 1189 \li the subtraction of the second byte in the second operand from the second byte in 1190 the first operand. 1191 \li the subtraction of the third byte in the second operand from the third byte in the 1192 first operand. 1193 \li the subtraction of the fourth byte in the second operand from the fourth byte in 1194 the first operand. 1195 \par 1196 The sum is returned as a single unsigned integer. 1197 1198 1199 \par Operation: 1200 \code 1201 absdiff1 = val1[7:0] - val2[7:0] 1202 absdiff2 = val1[15:8] - val2[15:8] 1203 absdiff3 = val1[23:16] - val2[23:16] 1204 absdiff4 = val1[31:24] - val2[31:24] 1205 res[31:0] = absdiff1 + absdiff2 + absdiff3 + absdiff4 1206 \endcode 1207*/ 1208uint32_t __USAD8(uint32_t val1, uint32_t val2); 1209 1210 1211/**************************************************************************************************/ 1212/** 1213 \brief Unsigned sum of quad 8-bit unsigned absolute difference with 32-bit accumulate 1214 1215 \details This function enables you to perform four unsigned 8-bit subtractions, and add the 1216 absolute values of the differences to a 32-bit accumulate operand. 1217 1218 \param val1 first four 8-bit operands for the subtractions. 1219 \param val2 second four 8-bit operands for the subtractions. 1220 \param val3 accumulation value. 1221 1222 \returns 1223 the sum of the absolute differences of the following 1224 bytes, added to the accumulation value: 1225 \li the subtraction of the first byte in the second operand from the first byte in the 1226 first operand. 1227 \li the subtraction of the second byte in the second operand from the second byte in 1228 the first operand. 1229 \li the subtraction of the third byte in the second operand from the third byte in the 1230 first operand. 1231 \li the subtraction of the fourth byte in the second operand from the fourth byte in 1232 the first operand. 1233 1234 1235 \par Operation: 1236 \code 1237 absdiff1 = val1[7:0] - val2[7:0] 1238 absdiff2 = val1[15:8] - val2[15:8] 1239 absdiff3 = val1[23:16] - val2[23:16] 1240 absdiff4 = val1[31:24] - val2[31:24] 1241 sum = absdiff1 + absdiff2 + absdiff3 + absdiff4 1242 res[31:0] = sum[31:0] + val3[31:0] 1243 \endcode 1244*/ 1245uint32_t __USADA8(uint32_t val1, uint32_t val2, uint32_t val3); 1246 1247 1248/**************************************************************************************************/ 1249/** 1250 \brief Q setting dual 16-bit saturate 1251 1252 \details This function enables you to saturate two signed 16-bit values to a selected signed range.<br> 1253 The Q bit is set if either operation saturates. 1254 1255 \param val1 two signed 16-bit values to be saturated. 1256 \param val2 bit position for saturation, an integral constant expression in the 1257 range 1 to 16. 1258 1259 1260 \returns 1261 the sum of the absolute differences of the following 1262 bytes, added to the accumulation value: 1263 \li the signed saturation of the low halfword in \em val1, saturated to the bit position 1264 specified in \em val2 and returned in the low halfword of the return value. 1265 \li the signed saturation of the high halfword in <i>val1</i>, saturated to the bit position 1266 specified in <i>val2</i> and returned in the high halfword of the return value. 1267 1268 1269 \par Operation: 1270 \code 1271 Saturate halfwords in val1 to the signed range specified by the bit position in val2 1272 \endcode 1273*/ 1274uint32_t __SSAT16(uint32_t val1, const uint32_t val2); 1275 1276 1277/**************************************************************************************************/ 1278/** 1279 \brief Q setting dual 16-bit unsigned saturate 1280 1281 \details This function enables you to saturate two signed 16-bit values to a selected unsigned 1282 range.<br> 1283 The Q bit is set if either operation saturates. 1284 1285 \param val1 two 16-bit values that are to be saturated. 1286 \param val2 bit position for saturation, and must be an integral constant 1287 expression in the range 0 to 15. 1288 1289 1290 \returns 1291 the saturation of the two signed 16-bit values, as non-negative values. 1292 \li the saturation of the low halfword in \em val1, saturated to the bit position 1293 specified in \em val2 and returned in the low halfword of the return value. 1294 \li the saturation of the high halfword in \em val1, saturated to the bit position 1295 specified in \em val2 and returned in the high halfword of the return value. 1296 1297 1298 \par Operation: 1299 \code 1300 Saturate halfwords in val1 to the unsigned range specified by the bit position in val2 1301 \endcode 1302*/ 1303uint32_t __USAT16(uint32_t val1, const uint32_t val2); 1304 1305 1306/**************************************************************************************************/ 1307/** 1308 \brief Dual extract 8-bits and zero-extend to 16-bits 1309 1310 \details This function enables you to extract two 8-bit values from an operand and zero-extend 1311 them to 16 bits each. 1312 1313 \param val two 8-bit values in val[7:0] and val[23:16] to be sign-extended. 1314 1315 1316 \returns 1317 the 8-bit values zero-extended to 16-bit values. 1318 \li zero-extended value of val[7:0] in the low halfword of the return value. 1319 \li zero-extended value of val[23:16] in the high halfword of the return value. 1320 1321 1322 \par Operation: 1323 \code 1324 res[15:0] = ZeroExtended(val[7:0] ) 1325 res[31:16] = ZeroExtended(val[23:16]) 1326 \endcode 1327*/ 1328uint32_t __UXTB16(uint32_t val); 1329 1330 1331/**************************************************************************************************/ 1332/** 1333 \brief Extracted 16-bit to 32-bit unsigned addition 1334 1335 \details This function enables you to extract two 8-bit values from one operand, zero-extend them 1336 to 16 bits each, and add the results to two 16-bit values from another operand. 1337 1338 \param val1 value added to the zero-extended to 16-bit values. 1339 \param val2 two 8-bit values to be extracted and zero-extended. 1340 1341 1342 \returns 1343 the 8-bit values in \em val2, zero-extended to 16-bit values 1344 and added to \em val1. 1345 1346 1347 \par Operation: 1348 \code 1349 res[15:0] = ZeroExt(val2[7:0] to 16 bits) + val1[15:0] 1350 res[31:16] = ZeroExt(val2[31:16] to 16 bits) + val1[31:16] 1351 \endcode 1352*/ 1353uint32_t __UXTAB16(uint32_t val1, uint32_t val2); 1354 1355 1356/**************************************************************************************************/ 1357/** 1358 \brief Dual extract 8-bits and sign extend each to 16-bits 1359 1360 \details This function enables you to extract two 8-bit values from an operand and sign-extend 1361 them to 16 bits each. 1362 1363 \param val two 8-bit values in val[7:0] and val[23:16] to be sign-extended. 1364 1365 1366 1367 \returns 1368 the 8-bit values sign-extended to 16-bit values. 1369 \li sign-extended value of val[7:0] in the low halfword of the return value. 1370 \li sign-extended value of val[23:16] in the high halfword of the return value. 1371 1372 1373 \par Operation: 1374 \code 1375 res[15:0] = SignExtended(val[7:0] 1376 res[31:16] = SignExtended(val[23:16] 1377 \endcode 1378*/ 1379uint32_t __SXTB16(uint32_t val); 1380 1381 1382/**************************************************************************************************/ 1383/** 1384 \brief Rotate right, dual extract 8-bits and sign extend each to 16-bits 1385 1386 \details This function enables you to rotate an operand by 8/16/24 bit, extract two 8-bit values and sign-extend 1387 them to 16 bits each. 1388 1389 \param val two 8-bit values in val[7:0] and val[23:16] to be sign-extended. 1390 \param rotate number of bits to rotate val. Constant rotate value of 8, 16 and 24 can be 1391 optimally used with a single __SXTB16 instruction. Any other valid constant rotate 1392 value will result in use of two instructions, __ROR and __SXTB16 1393 1394 1395 \returns 1396 the 8-bit values sign-extended to 16-bit values. 1397 \li sign-extended value of val[7:0] in the low halfword of the return value. 1398 \li sign-extended value of val[23:16] in the high halfword of the return value. 1399 1400 1401 \par Operation: 1402 \code 1403 val = Rotate(val, rotate) 1404 res[15:0] = SignExtended(val[7:0]) 1405 res[31:16] = SignExtended(val[23:16]) 1406 \endcode 1407*/ 1408uint32_t __SXTB16_RORn(uint32_t val, uint32_r rotate); 1409 1410 1411/**************************************************************************************************/ 1412/** 1413 \brief Dual extracted 8-bit to 16-bit signed addition 1414 1415 \details This function enables you to extract two 8-bit values from the second operand (at bit 1416 positions [7:0] and [23:16]), sign-extend them to 16-bits each, and add the results to the 1417 first operand. 1418 1419 \param val1 values added to the zero-extended to 16-bit values. 1420 \param val2 two 8-bit values to be extracted and zero-extended. 1421 1422 1423 1424 \returns 1425 the addition of \em val1 and \em val2, where the 8-bit values in 1426 val2[7:0] and val2[23:16] have been extracted and sign-extended prior to the addition. 1427 1428 1429 \par Operation: 1430 \code 1431 res[15:0] = val1[15:0] + SignExtended(val2[7:0]) 1432 res[31:16] = val1[31:16] + SignExtended(val2[23:16]) 1433 \endcode 1434*/ 1435uint32_t __SXTAB16(uint32_t val1, uint32_t val2); 1436 1437 1438/**************************************************************************************************/ 1439/** 1440 \brief Rotate right, followed by sign extension of two 8-bits with add to 16-bits 1441 1442 \details This function enables you to rotate the second operand by 8/16/24 bit as specified by the third 1443 operand, extract two 8-bit values from the rotated result (at bit positions [7:0] and [23:16]), 1444 sign-extend them to 16-bits each, and add the results to the first operand. 1445 1446 \param val1 two 16-bit values in val1[15:0] and val1[31:16] 1447 \param val2 two 8-bit values in val[7:0] and val[23:16] to be sign-extended post rotation 1448 \param rotate number of bits to rotate val2. Constant rotate value of 8, 16 and 24 can be 1449 optimally used with a single __SXTAB16 instruction. Any other valid constant rotate 1450 value will result in use of two instructions, __ROR and __SXTAB16 1451 1452 1453 \returns 1454 the addition of \em val1 and \em val2, where the rotated 8-bit values in 1455 val2[7:0] and val2[23:16] have been extracted and sign-extended prior to the addition. 1456 1457 1458 \par Operation: 1459 \code 1460 val2 = Rotate(val2, rotate) 1461 res[15:0] = val1[15:0] + SignExtended(val2[7:0]) 1462 res[31:16] = val1[31:16] + SignExtended(val2[23:16]) 1463 \endcode 1464*/ 1465uint32_t __SXTAB16_RORn(uint32_t val1, uint32_t val2, uint32_r rotate); 1466 1467 1468/**************************************************************************************************/ 1469/** 1470 \brief Q setting sum of dual 16-bit signed multiply 1471 1472 \details This function enables you to perform two 16-bit signed multiplications, adding the 1473 products together.<br> 1474 The Q bit is set if the addition overflows. 1475 1476 \param val1 first 16-bit operands for each multiplication. 1477 \param val2 second 16-bit operands for each multiplication. 1478 1479 1480 1481 \returns 1482 the sum of the products of the two 16-bit signed multiplications. 1483 1484 1485 \par Operation: 1486 \code 1487 p1 = val1[15:0] * val2[15:0] 1488 p2 = val1[31:16] * val2[31:16] 1489 res[31:0] = p1 + p2 1490 \endcode 1491*/ 1492uint32_t __SMUAD(uint32_t val1, uint32_t val2); 1493 1494 1495/**************************************************************************************************/ 1496/** 1497 \brief Q setting sum of dual 16-bit signed multiply with exchange 1498 1499 \details This function enables you to perform two 16-bit signed multiplications with exchanged 1500 halfwords of the second operand, adding the products together.<br> 1501 The Q bit is set if the addition overflows. 1502 1503 \param val1 first 16-bit operands for each multiplication. 1504 \param val2 second 16-bit operands for each multiplication. 1505 1506 1507 1508 \returns 1509 the sum of the products of the two 16-bit signed multiplications with exchanged 1510 halfwords of the second operand. 1511 1512 1513 \par Operation: 1514 \code 1515 p1 = val1[15:0] * val2[31:16] 1516 p2 = val1[31:16] * val2[15:0] 1517 res[31:0] = p1 + p2 1518 \endcode 1519*/ 1520uint32_t __SMUADX(uint32_t val1, uint32_t val2); 1521 1522 1523/**************************************************************************************************/ 1524/** 1525 \brief 32-bit signed multiply with 32-bit truncated accumulator. 1526 1527 \details This function enables you to perform a signed 32-bit multiplications, adding the most significant 32 bits 1528 of the 64-bit result to a 32-bit accumulate operand.<br> 1529 1530 \param val1 first operand for multiplication. 1531 \param val2 second operand for multiplication. 1532 \param val3 accumulate value. 1533 1534 1535 \returns the product of multiplication (most significant 32 bits) is added to the accumulate 1536 value, as a 32-bit integer. 1537 1538 \par Operation: 1539 \code 1540 p = val1 * val2 1541 res[31:0] = p[61:32] + val3[31:0] 1542 \endcode 1543*/ 1544uint32_t __SMMLA (int32_t val1, int32_t val2, int32_t val3); 1545 1546 1547/**************************************************************************************************/ 1548/** 1549 \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator 1550 1551 \details This function enables you to perform two signed 16-bit multiplications, adding both 1552 results to a 32-bit accumulate operand.<br> 1553 The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications. 1554 1555 \param val1 first 16-bit operands for each multiplication. 1556 \param val2 second 16-bit operands for each multiplication. 1557 \param val3 accumulate value. 1558 1559 1560 \returns 1561 the product of each multiplication added to the accumulate 1562 value, as a 32-bit integer. 1563 1564 1565 \par Operation: 1566 \code 1567 p1 = val1[15:0] * val2[15:0] 1568 p2 = val1[31:16] * val2[31:16] 1569 res[31:0] = p1 + p2 + val3[31:0] 1570 \endcode 1571*/ 1572uint32_t __SMLAD(uint32_t val1, uint32_t val2, uint32_t val3); 1573 1574 1575/**************************************************************************************************/ 1576/** 1577 \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator 1578 1579 \details This function enables you to perform two signed 16-bit multiplications with exchanged 1580 halfwords of the second operand, adding both results to a 32-bit accumulate operand.<br> 1581 The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications. 1582 1583 \param val1 first 16-bit operands for each multiplication. 1584 \param val2 second 16-bit operands for each multiplication. 1585 \param val3 accumulate value. 1586 1587 1588 \returns 1589 the product of each multiplication with exchanged 1590 halfwords of the second operand added to the accumulate value, as a 32-bit integer. 1591 1592 1593 \par Operation: 1594 \code 1595 p1 = val1[15:0] * val2[31:16] 1596 p2 = val1[31:16] * val2[15:0] 1597 res[31:0] = p1 + p2 + val3[31:0] 1598 \endcode 1599*/ 1600uint32_t __SMLADX(uint32_t val1, uint32_t val2, uint32_t val3); 1601 1602 1603/**************************************************************************************************/ 1604/** 1605 \brief Dual 16-bit signed multiply with single 64-bit accumulator 1606 1607 \details This function enables you to perform two signed 16-bit multiplications, adding both 1608 results to a 64-bit accumulate operand. Overflow is only possible as a result of the 64-bit 1609 addition. This overflow is not detected if it occurs. Instead, the result wraps around 1610 modulo2<sup>64</sup>. 1611 1612 \param val1 first 16-bit operands for each multiplication. 1613 \param val2 second 16-bit operands for each multiplication. 1614 \param val3 accumulate value. 1615 1616 1617 \returns 1618 the product of each multiplication added to the accumulate value. 1619 1620 1621 \par Operation: 1622 \code 1623 p1 = val1[15:0] * val2[15:0] 1624 p2 = val1[31:16] * val2[31:16] 1625 sum = p1 + p2 + val3[63:32][31:0] 1626 res[63:32] = sum[63:32] 1627 res[31:0] = sum[31:0] 1628 \endcode 1629*/ 1630uint64_t __SMLALD(uint32_t val1, uint32_t val2, uint64_t val3); 1631 1632 1633/**************************************************************************************************/ 1634/** 1635 \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator 1636 1637 \details This function enables you to exchange the halfwords of the second operand, and perform 1638 two signed 16-bit multiplications, adding both results to a 64-bit accumulate operand. 1639 Overflow is only possible as a result of the 64-bit addition. This overflow is not detected 1640 if it occurs. Instead, the result wraps around modulo2<sup>64</sup>. 1641 1642 \param val1 first 16-bit operands for each multiplication. 1643 \param val2 second 16-bit operands for each multiplication. 1644 \param val3 accumulate value. 1645 1646 1647 \returns 1648 the product of each multiplication added to the accumulate value. 1649 1650 1651 \par Operation: 1652 \code 1653 p1 = val1[15:0] * val2[31:16] 1654 p2 = val1[31:16] * val2[15:0] 1655 sum = p1 + p2 + val3[63:32][31:0] 1656 res[63:32] = sum[63:32] 1657 res[31:0] = sum[31:0] 1658 \endcode 1659*/ 1660unsigned long long __SMLALDX(uint32_t val1, uint32_t val2, unsigned long long val3); 1661 1662 1663/**************************************************************************************************/ 1664/** 1665 \brief Dual 16-bit signed multiply returning difference 1666 1667 \details This function enables you to perform two 16-bit signed multiplications, taking the 1668 difference of the products by subtracting the high halfword product from the low 1669 halfword product. 1670 1671 \param val1 first 16-bit operands for each multiplication. 1672 \param val2 second 16-bit operands for each multiplication. 1673 1674 1675 \returns 1676 the difference of the products of the two 16-bit signed multiplications. 1677 1678 1679 \par Operation: 1680 \code 1681 p1 = val1[15:0] * val2[15:0] 1682 p2 = val1[31:16] * val2[31:16] 1683 res[31:0] = p1 - p2 1684 \endcode 1685*/ 1686uint32_t __SMUSD(uint32_t val1, uint32_t val2); 1687 1688 1689/**************************************************************************************************/ 1690/** 1691 \brief Dual 16-bit signed multiply with exchange returning difference 1692 1693 \details This function enables you to perform two 16-bit signed multiplications, subtracting one 1694 of the products from the other. The halfwords of the second operand are exchanged 1695 before performing the arithmetic. This produces top * bottom and bottom * top 1696 multiplication. 1697 1698 \param val1 first 16-bit operands for each multiplication. 1699 \param val2 second 16-bit operands for each multiplication. 1700 1701 1702 \returns 1703 the difference of the products of the two 16-bit signed multiplications. 1704 1705 1706 \par Operation: 1707 \code 1708 p1 = val1[15:0] * val2[31:16] 1709 p2 = val1[31:16] * val2[15:0] 1710 res[31:0] = p1 - p2 1711 \endcode 1712*/ 1713uint32_t __SMUSDX(uint32_t val1, uint32_t val2); 1714 1715 1716/**************************************************************************************************/ 1717/** 1718 \brief Q setting dual 16-bit signed multiply subtract with 32-bit accumulate 1719 1720 \details This function enables you to perform two 16-bit signed multiplications, take the 1721 difference of the products, subtracting the high halfword product from the low halfword 1722 product, and add the difference to a 32-bit accumulate operand.<br> 1723 The Q bit is set if the accumulation overflows. Overflow cannot occur during the multiplications or the 1724 subtraction. 1725 1726 \param val1 first 16-bit operands for each multiplication. 1727 \param val2 second 16-bit operands for each multiplication. 1728 \param val3 accumulate value. 1729 1730 1731 \returns 1732 the difference of the product of each multiplication, added 1733 to the accumulate value. 1734 1735 1736 \par Operation: 1737 \code 1738 p1 = val1[15:0] * val2[15:0] 1739 p2 = val1[31:16] * val2[31:16] 1740 res[31:0] = p1 - p2 + val3[31:0] 1741 \endcode 1742*/ 1743uint32_t __SMLSD(uint32_t val1, uint32_t val2, uint32_t val3); 1744 1745 1746/**************************************************************************************************/ 1747/** 1748 \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate 1749 1750 \details This function enables you to exchange the halfwords in the second operand, then perform 1751 two 16-bit signed multiplications. The difference of the products is added to a 32-bit 1752 accumulate operand.<br> 1753 The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications or the subtraction. 1754 1755 \param val1 first 16-bit operands for each multiplication. 1756 \param val2 second 16-bit operands for each multiplication. 1757 \param val3 accumulate value. 1758 1759 1760 \returns 1761 the difference of the product of each multiplication, added 1762 to the accumulate value. 1763 1764 1765 \par Operation: 1766 \code 1767 p1 = val1[15:0] * val2[31:16] 1768 p2 = val1[31:16] * val2[15:0] 1769 res[31:0] = p1 - p2 + val3[31:0] 1770 \endcode 1771*/ 1772uint32_t __SMLSDX(uint32_t val1, uint32_t val2, uint32_t val3); 1773 1774 1775/**************************************************************************************************/ 1776/** 1777 \brief Q setting dual 16-bit signed multiply subtract with 64-bit accumulate 1778 1779 \details This function It enables you to perform two 16-bit signed multiplications, take the 1780 difference of the products, subtracting the high halfword product from the low halfword 1781 product, and add the difference to a 64-bit accumulate operand. Overflow cannot occur 1782 during the multiplications or the subtraction. Overflow can occur as a result of the 64-bit 1783 addition, and this overflow is not detected. Instead, the result wraps round to 1784 modulo2<sup>64</sup>. 1785 1786 \param val1 first 16-bit operands for each multiplication. 1787 \param val2 second 16-bit operands for each multiplication. 1788 \param val3 accumulate value. 1789 1790 1791 \returns 1792 the difference of the product of each multiplication, 1793 added to the accumulate value. 1794 1795 1796 \par Operation: 1797 \code 1798 p1 = val1[15:0] * val2[15:0] 1799 p2 = val1[31:16] * val2[31:16] 1800 res[63:0] = p1 - p2 + val3[63:0] 1801 \endcode 1802*/ 1803uint64_t __SMLSLD(uint32_t val1, uint32_t val2, uint64_t val3); 1804 1805 1806/**************************************************************************************************/ 1807/** 1808 \brief Q setting dual 16-bit signed multiply with exchange subtract with 64-bit accumulate 1809 1810 \details This function enables you to exchange the halfwords of the second operand, perform two 1811 16-bit multiplications, adding the difference of the products to a 64-bit accumulate 1812 operand. Overflow cannot occur during the multiplications or the subtraction. Overflow 1813 can occur as a result of the 64-bit addition, and this overflow is not detected. Instead, 1814 the result wraps round to modulo2<sup>64</sup>. 1815 1816 \param val1 first 16-bit operands for each multiplication. 1817 \param val2 second 16-bit operands for each multiplication. 1818 \param val3 accumulate value. 1819 1820 1821 \returns 1822 the difference of the product of each multiplication, 1823 added to the accumulate value. 1824 1825 1826 \par Operation: 1827 \code 1828 p1 = val1[15:0] * val2[31:16] 1829 p2 = val1[31:16] * val2[15:0] 1830 res[63:0] = p1 - p2 + val3[63:0] 1831 \endcode 1832*/ 1833unsigned long long __SMLSLDX(uint32_t val1, uint32_t val2, unsigned long long val3); 1834 1835 1836/**************************************************************************************************/ 1837/** 1838 \brief Select bytes based on GE bits 1839 1840 \details This function inserts a SEL instruction into the instruction stream generated by the 1841 compiler. It enables you to select bytes from the input parameters, whereby the bytes 1842 that are selected depend upon the results of previous SIMD instruction function. The 1843 results of previous SIMD instruction function are represented by the Greater than or 1844 Equal flags in the Application Program Status Register (APSR). 1845 The __SEL function works equally well on both halfword and byte operand function 1846 results. This is because halfword operand operations set two (duplicate) GE bits per 1847 value. 1848 1849 \param val1 four selectable 8-bit values. 1850 \param val2 four selectable 8-bit values. 1851 1852 1853 \returns 1854 The function selects bytes from the input parameters and returns them in the 1855 return value, res, according to the following criteria: 1856 \li if APSR.GE[0] == 1 then res[7:0] = val1[7:0] else res[7:0] = val2[7:0] 1857 \li if APSR.GE[1] == 1 then res[15:8] = val1[15:8] else res[15:8] = val2[15:8] 1858 \li if APSR.GE[2] == 1 then res[23:16] = val1[23:16] else res[23:16] = val2[23:16] 1859 \li if APSR.GE[3] == 1 then res[31;24] = val1[31:24] else res = val2[31:24] 1860 1861*/ 1862uint32_t __SEL(uint32_t val1, uint32_t val2); 1863 1864 1865/**************************************************************************************************/ 1866/** 1867 \brief Q setting saturating add 1868 1869 \details This function enables you to obtain the saturating add of two integers.<br> 1870 The Q bit is set if the operation saturates. 1871 1872 \param val1 first summand of the saturating add operation. 1873 \param val2 second summand of the saturating add operation. 1874 1875 1876 \returns 1877 the saturating addition of val1 and val2. 1878 1879 \par Operation: 1880 \code 1881 res[31:0] = SAT(val1 + SAT(val2)) 1882 \endcode 1883*/ 1884uint32_t __QADD(uint32_t val1, uint32_t val2); 1885 1886 1887/**************************************************************************************************/ 1888/** 1889 \brief Q setting saturating subtract 1890 1891 \details This function enables you to obtain the saturating subtraction of two integers.<br> 1892 The Q bit is set if the operation saturates. 1893 1894 \param val1 minuend of the saturating subtraction operation. 1895 \param val2 subtrahend of the saturating subtraction operation. 1896 1897 1898 \returns 1899 the saturating subtraction of val1 and val2. 1900 1901 \par Operation: 1902 \code 1903 res[31:0] = SAT(val1 - SAT(val2)) 1904 \endcode 1905*/ 1906uint32_t __QSUB(uint32_t val1, uint32_t val2); 1907 1908 1909/**************************************************************************************************/ 1910/** 1911 \brief Halfword packing instruction. Combines bits[15:0] of <i>val1</i> 1912 with bits[31:16] of <i>val2</i> levitated with the <i>val3</i>. 1913 1914 \details Combine a halfword from one register with a halfword from another register. 1915 The second argument can be left-shifted before extraction of the halfword. The registers 1916 PC and SP are not allowed as arguments. This instruction does not change the flags. 1917 1918 \param val1 first 16-bit operands 1919 \param val2 second 16-bit operands 1920 \param val3 value for left-shifting <i>val2</i>. Value range [0..31]. 1921 1922 1923 \returns 1924 the combination of halfwords. 1925 1926 \par Operation: 1927 \code 1928 res[15:0] = val1[15:0] 1929 res[31:16] = val2[31:16]<<val3 1930 \endcode 1931*/ 1932uint32_t __PKHBT(uint32_t val1, uint32_t val2, uint32_t val3); 1933 1934 1935/**************************************************************************************************/ 1936/** 1937 \brief Halfword packing instruction. Combines bits[31:16] of <i>val1</i> 1938 with bits[15:0] of <i>val2</i> right-shifted with the <i>val3</i>. 1939 1940 \details Combines a halfword from one register with a halfword from another register. 1941 The second argument can be right-shifted before extraction of the halfword. The registers 1942 PC and SP are not allowed as arguments. This instruction does not change the flags. 1943 1944 \param val1 second 16-bit operands 1945 \param val2 first 16-bit operands 1946 \param val3 value for right-shifting <i>val2</i>. Value range [1..32]. 1947 1948 1949 \returns 1950 the combination of halfwords. 1951 1952 \par Operation: 1953 \code 1954 res[15:0] = val2[15:0]>>val3 1955 res[31:16] = val1[31:16] 1956 \endcode 1957*/ 1958uint32_t __PKHTB(uint32_t val1, uint32_t val2, uint32_t val3); 1959 1960/** @} */ /* end group intrinsic_SIMD_gr */ 1961