1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 * Note: I added some stuff for use with gnupg 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, 5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 6 * 7 * This file is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Library General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or (at your 10 * option) any later version. 11 * 12 * This file is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 15 * License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this file; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, 20 * MA 02111-1307, USA. */ 21 22 #include <linux/count_zeros.h> 23 24 /* You have to define the following before including this file: 25 * 26 * UWtype -- An unsigned type, default type for operations (typically a "word") 27 * UHWtype -- An unsigned type, at least half the size of UWtype. 28 * UDWtype -- An unsigned type, at least twice as large a UWtype 29 * W_TYPE_SIZE -- size in bits of UWtype 30 * 31 * SItype, USItype -- Signed and unsigned 32 bit types. 32 * DItype, UDItype -- Signed and unsigned 64 bit types. 33 * 34 * On a 32 bit machine UWtype should typically be USItype; 35 * on a 64 bit machine, UWtype should typically be UDItype. 36 */ 37 38 #define __BITS4 (W_TYPE_SIZE / 4) 39 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 40 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 41 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 42 43 /* This is used to make sure no undesirable sharing between different libraries 44 that use this file takes place. */ 45 #ifndef __MPN 46 #define __MPN(x) __##x 47 #endif 48 49 /* Define auxiliary asm macros. 50 * 51 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two 52 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype 53 * word product in HIGH_PROD and LOW_PROD. 54 * 55 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 56 * UDWtype product. This is just a variant of umul_ppmm. 57 58 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 59 * denominator) divides a UDWtype, composed by the UWtype integers 60 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 61 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 62 * than DENOMINATOR for correct operation. If, in addition, the most 63 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol 64 * UDIV_NEEDS_NORMALIZATION is defined to 1. 65 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 66 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient 67 * is rounded towards 0. 68 * 69 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the 70 * msb to the first non-zero bit in the UWtype X. This is the number of 71 * steps X needs to be shifted left to set the msb. Undefined for X == 0, 72 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 73 * 74 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 75 * from the least significant end. 76 * 77 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 78 * high_addend_2, low_addend_2) adds two UWtype integers, composed by 79 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 80 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 81 * (i.e. carry out) is not stored anywhere, and is lost. 82 * 83 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 84 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 85 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 86 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 87 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 88 * and is lost. 89 * 90 * If any of these macros are left undefined for a particular CPU, 91 * C macros are used. */ 92 93 /* The CPUs come in alphabetical order below. 94 * 95 * Please add support for more CPUs here, or improve the current support 96 * for the CPUs below! */ 97 98 #if defined(__GNUC__) && !defined(NO_ASM) 99 100 /* We sometimes need to clobber "cc" with gcc2, but that would not be 101 understood by gcc1. Use cpp to avoid major code duplication. */ 102 #if __GNUC__ < 2 103 #define __CLOBBER_CC 104 #define __AND_CLOBBER_CC 105 #else /* __GNUC__ >= 2 */ 106 #define __CLOBBER_CC : "cc" 107 #define __AND_CLOBBER_CC , "cc" 108 #endif /* __GNUC__ < 2 */ 109 110 /*************************************** 111 ************** A29K ***************** 112 ***************************************/ 113 #if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 114 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 115 __asm__ ("add %1,%4,%5\n" \ 116 "addc %0,%2,%3" \ 117 : "=r" ((USItype)(sh)), \ 118 "=&r" ((USItype)(sl)) \ 119 : "%r" ((USItype)(ah)), \ 120 "rI" ((USItype)(bh)), \ 121 "%r" ((USItype)(al)), \ 122 "rI" ((USItype)(bl))) 123 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 124 __asm__ ("sub %1,%4,%5\n" \ 125 "subc %0,%2,%3" \ 126 : "=r" ((USItype)(sh)), \ 127 "=&r" ((USItype)(sl)) \ 128 : "r" ((USItype)(ah)), \ 129 "rI" ((USItype)(bh)), \ 130 "r" ((USItype)(al)), \ 131 "rI" ((USItype)(bl))) 132 #define umul_ppmm(xh, xl, m0, m1) \ 133 do { \ 134 USItype __m0 = (m0), __m1 = (m1); \ 135 __asm__ ("multiplu %0,%1,%2" \ 136 : "=r" ((USItype)(xl)) \ 137 : "r" (__m0), \ 138 "r" (__m1)); \ 139 __asm__ ("multmu %0,%1,%2" \ 140 : "=r" ((USItype)(xh)) \ 141 : "r" (__m0), \ 142 "r" (__m1)); \ 143 } while (0) 144 #define udiv_qrnnd(q, r, n1, n0, d) \ 145 __asm__ ("dividu %0,%3,%4" \ 146 : "=r" ((USItype)(q)), \ 147 "=q" ((USItype)(r)) \ 148 : "1" ((USItype)(n1)), \ 149 "r" ((USItype)(n0)), \ 150 "r" ((USItype)(d))) 151 #endif /* __a29k__ */ 152 153 #if defined(__alpha) && W_TYPE_SIZE == 64 154 #define umul_ppmm(ph, pl, m0, m1) \ 155 do { \ 156 UDItype __m0 = (m0), __m1 = (m1); \ 157 (ph) = __builtin_alpha_umulh(__m0, __m1); \ 158 (pl) = __m0 * __m1; \ 159 } while (0) 160 #define UMUL_TIME 46 161 #ifndef LONGLONG_STANDALONE 162 #define udiv_qrnnd(q, r, n1, n0, d) \ 163 do { UDItype __r; \ 164 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 165 (r) = __r; \ 166 } while (0) 167 extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); 168 #define UDIV_TIME 220 169 #endif /* LONGLONG_STANDALONE */ 170 #endif /* __alpha */ 171 172 /*************************************** 173 ************** ARM ****************** 174 ***************************************/ 175 #if defined(__arm__) && W_TYPE_SIZE == 32 176 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 177 __asm__ ("adds %1, %4, %5\n" \ 178 "adc %0, %2, %3" \ 179 : "=r" (sh), \ 180 "=&r" (sl) \ 181 : "%r" ((USItype)(ah)), \ 182 "rI" ((USItype)(bh)), \ 183 "%r" ((USItype)(al)), \ 184 "rI" ((USItype)(bl))) 185 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 186 __asm__ ("subs %1, %4, %5\n" \ 187 "sbc %0, %2, %3" \ 188 : "=r" (sh), \ 189 "=&r" (sl) \ 190 : "r" ((USItype)(ah)), \ 191 "rI" ((USItype)(bh)), \ 192 "r" ((USItype)(al)), \ 193 "rI" ((USItype)(bl))) 194 #if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ 195 #define umul_ppmm(xh, xl, a, b) \ 196 __asm__ ("@ Inlined umul_ppmm\n" \ 197 "mov %|r0, %2, lsr #16 @ AAAA\n" \ 198 "mov %|r2, %3, lsr #16 @ BBBB\n" \ 199 "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ 200 "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ 201 "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ 202 "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ 203 "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ 204 "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ 205 "adds %|r0, %1, %0 @ central sum\n" \ 206 "addcs %|r2, %|r2, #65536\n" \ 207 "adds %1, %|r1, %|r0, lsl #16\n" \ 208 "adc %0, %|r2, %|r0, lsr #16" \ 209 : "=&r" (xh), \ 210 "=r" (xl) \ 211 : "r" ((USItype)(a)), \ 212 "r" ((USItype)(b)) \ 213 : "r0", "r1", "r2") 214 #else 215 #define umul_ppmm(xh, xl, a, b) \ 216 __asm__ ("@ Inlined umul_ppmm\n" \ 217 "umull %1, %0, %2, %3" \ 218 : "=&r" (xh), \ 219 "=&r" (xl) \ 220 : "r" ((USItype)(a)), \ 221 "r" ((USItype)(b)) \ 222 : "r0", "r1") 223 #endif 224 #define UMUL_TIME 20 225 #define UDIV_TIME 100 226 #endif /* __arm__ */ 227 228 /*************************************** 229 ************** CLIPPER ************** 230 ***************************************/ 231 #if defined(__clipper__) && W_TYPE_SIZE == 32 232 #define umul_ppmm(w1, w0, u, v) \ 233 ({union {UDItype __ll; \ 234 struct {USItype __l, __h; } __i; \ 235 } __xx; \ 236 __asm__ ("mulwux %2,%0" \ 237 : "=r" (__xx.__ll) \ 238 : "%0" ((USItype)(u)), \ 239 "r" ((USItype)(v))); \ 240 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 241 #define smul_ppmm(w1, w0, u, v) \ 242 ({union {DItype __ll; \ 243 struct {SItype __l, __h; } __i; \ 244 } __xx; \ 245 __asm__ ("mulwx %2,%0" \ 246 : "=r" (__xx.__ll) \ 247 : "%0" ((SItype)(u)), \ 248 "r" ((SItype)(v))); \ 249 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 250 #define __umulsidi3(u, v) \ 251 ({UDItype __w; \ 252 __asm__ ("mulwux %2,%0" \ 253 : "=r" (__w) \ 254 : "%0" ((USItype)(u)), \ 255 "r" ((USItype)(v))); \ 256 __w; }) 257 #endif /* __clipper__ */ 258 259 /*************************************** 260 ************** GMICRO *************** 261 ***************************************/ 262 #if defined(__gmicro__) && W_TYPE_SIZE == 32 263 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 264 __asm__ ("add.w %5,%1\n" \ 265 "addx %3,%0" \ 266 : "=g" ((USItype)(sh)), \ 267 "=&g" ((USItype)(sl)) \ 268 : "%0" ((USItype)(ah)), \ 269 "g" ((USItype)(bh)), \ 270 "%1" ((USItype)(al)), \ 271 "g" ((USItype)(bl))) 272 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 273 __asm__ ("sub.w %5,%1\n" \ 274 "subx %3,%0" \ 275 : "=g" ((USItype)(sh)), \ 276 "=&g" ((USItype)(sl)) \ 277 : "0" ((USItype)(ah)), \ 278 "g" ((USItype)(bh)), \ 279 "1" ((USItype)(al)), \ 280 "g" ((USItype)(bl))) 281 #define umul_ppmm(ph, pl, m0, m1) \ 282 __asm__ ("mulx %3,%0,%1" \ 283 : "=g" ((USItype)(ph)), \ 284 "=r" ((USItype)(pl)) \ 285 : "%0" ((USItype)(m0)), \ 286 "g" ((USItype)(m1))) 287 #define udiv_qrnnd(q, r, nh, nl, d) \ 288 __asm__ ("divx %4,%0,%1" \ 289 : "=g" ((USItype)(q)), \ 290 "=r" ((USItype)(r)) \ 291 : "1" ((USItype)(nh)), \ 292 "0" ((USItype)(nl)), \ 293 "g" ((USItype)(d))) 294 #endif 295 296 /*************************************** 297 ************** HPPA ***************** 298 ***************************************/ 299 #if defined(__hppa) && W_TYPE_SIZE == 32 300 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 301 __asm__ ("add %4,%5,%1\n" \ 302 "addc %2,%3,%0" \ 303 : "=r" ((USItype)(sh)), \ 304 "=&r" ((USItype)(sl)) \ 305 : "%rM" ((USItype)(ah)), \ 306 "rM" ((USItype)(bh)), \ 307 "%rM" ((USItype)(al)), \ 308 "rM" ((USItype)(bl))) 309 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 310 __asm__ ("sub %4,%5,%1\n" \ 311 "subb %2,%3,%0" \ 312 : "=r" ((USItype)(sh)), \ 313 "=&r" ((USItype)(sl)) \ 314 : "rM" ((USItype)(ah)), \ 315 "rM" ((USItype)(bh)), \ 316 "rM" ((USItype)(al)), \ 317 "rM" ((USItype)(bl))) 318 #if 0 && defined(_PA_RISC1_1) 319 /* xmpyu uses floating point register which is not allowed in Linux kernel. */ 320 #define umul_ppmm(wh, wl, u, v) \ 321 do { \ 322 union {UDItype __ll; \ 323 struct {USItype __h, __l; } __i; \ 324 } __xx; \ 325 __asm__ ("xmpyu %1,%2,%0" \ 326 : "=*f" (__xx.__ll) \ 327 : "*f" ((USItype)(u)), \ 328 "*f" ((USItype)(v))); \ 329 (wh) = __xx.__i.__h; \ 330 (wl) = __xx.__i.__l; \ 331 } while (0) 332 #define UMUL_TIME 8 333 #define UDIV_TIME 60 334 #else 335 #define UMUL_TIME 40 336 #define UDIV_TIME 80 337 #endif 338 #if 0 /* #ifndef LONGLONG_STANDALONE */ 339 #define udiv_qrnnd(q, r, n1, n0, d) \ 340 do { USItype __r; \ 341 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 342 (r) = __r; \ 343 } while (0) 344 extern USItype __udiv_qrnnd(); 345 #endif /* LONGLONG_STANDALONE */ 346 #endif /* hppa */ 347 348 /*************************************** 349 ************** I370 ***************** 350 ***************************************/ 351 #if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 352 #define umul_ppmm(xh, xl, m0, m1) \ 353 do { \ 354 union {UDItype __ll; \ 355 struct {USItype __h, __l; } __i; \ 356 } __xx; \ 357 USItype __m0 = (m0), __m1 = (m1); \ 358 __asm__ ("mr %0,%3" \ 359 : "=r" (__xx.__i.__h), \ 360 "=r" (__xx.__i.__l) \ 361 : "%1" (__m0), \ 362 "r" (__m1)); \ 363 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 364 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 365 + (((SItype) __m1 >> 31) & __m0)); \ 366 } while (0) 367 #define smul_ppmm(xh, xl, m0, m1) \ 368 do { \ 369 union {DItype __ll; \ 370 struct {USItype __h, __l; } __i; \ 371 } __xx; \ 372 __asm__ ("mr %0,%3" \ 373 : "=r" (__xx.__i.__h), \ 374 "=r" (__xx.__i.__l) \ 375 : "%1" (m0), \ 376 "r" (m1)); \ 377 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 378 } while (0) 379 #define sdiv_qrnnd(q, r, n1, n0, d) \ 380 do { \ 381 union {DItype __ll; \ 382 struct {USItype __h, __l; } __i; \ 383 } __xx; \ 384 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 385 __asm__ ("dr %0,%2" \ 386 : "=r" (__xx.__ll) \ 387 : "0" (__xx.__ll), "r" (d)); \ 388 (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ 389 } while (0) 390 #endif 391 392 /*************************************** 393 ************** I386 ***************** 394 ***************************************/ 395 #undef __i386__ 396 #if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 397 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 398 __asm__ ("addl %5,%1\n" \ 399 "adcl %3,%0" \ 400 : "=r" ((USItype)(sh)), \ 401 "=&r" ((USItype)(sl)) \ 402 : "%0" ((USItype)(ah)), \ 403 "g" ((USItype)(bh)), \ 404 "%1" ((USItype)(al)), \ 405 "g" ((USItype)(bl))) 406 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 407 __asm__ ("subl %5,%1\n" \ 408 "sbbl %3,%0" \ 409 : "=r" ((USItype)(sh)), \ 410 "=&r" ((USItype)(sl)) \ 411 : "0" ((USItype)(ah)), \ 412 "g" ((USItype)(bh)), \ 413 "1" ((USItype)(al)), \ 414 "g" ((USItype)(bl))) 415 #define umul_ppmm(w1, w0, u, v) \ 416 __asm__ ("mull %3" \ 417 : "=a" ((USItype)(w0)), \ 418 "=d" ((USItype)(w1)) \ 419 : "%0" ((USItype)(u)), \ 420 "rm" ((USItype)(v))) 421 #define udiv_qrnnd(q, r, n1, n0, d) \ 422 __asm__ ("divl %4" \ 423 : "=a" ((USItype)(q)), \ 424 "=d" ((USItype)(r)) \ 425 : "0" ((USItype)(n0)), \ 426 "1" ((USItype)(n1)), \ 427 "rm" ((USItype)(d))) 428 #ifndef UMUL_TIME 429 #define UMUL_TIME 40 430 #endif 431 #ifndef UDIV_TIME 432 #define UDIV_TIME 40 433 #endif 434 #endif /* 80x86 */ 435 436 /*************************************** 437 ************** I860 ***************** 438 ***************************************/ 439 #if defined(__i860__) && W_TYPE_SIZE == 32 440 #define rshift_rhlc(r, h, l, c) \ 441 __asm__ ("shr %3,r0,r0\n" \ 442 "shrd %1,%2,%0" \ 443 "=r" (r) : "r" (h), "r" (l), "rn" (c)) 444 #endif /* i860 */ 445 446 /*************************************** 447 ************** I960 ***************** 448 ***************************************/ 449 #if defined(__i960__) && W_TYPE_SIZE == 32 450 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 451 __asm__ ("cmpo 1,0\n" \ 452 "addc %5,%4,%1\n" \ 453 "addc %3,%2,%0" \ 454 : "=r" ((USItype)(sh)), \ 455 "=&r" ((USItype)(sl)) \ 456 : "%dI" ((USItype)(ah)), \ 457 "dI" ((USItype)(bh)), \ 458 "%dI" ((USItype)(al)), \ 459 "dI" ((USItype)(bl))) 460 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 461 __asm__ ("cmpo 0,0\n" \ 462 "subc %5,%4,%1\n" \ 463 "subc %3,%2,%0" \ 464 : "=r" ((USItype)(sh)), \ 465 "=&r" ((USItype)(sl)) \ 466 : "dI" ((USItype)(ah)), \ 467 "dI" ((USItype)(bh)), \ 468 "dI" ((USItype)(al)), \ 469 "dI" ((USItype)(bl))) 470 #define umul_ppmm(w1, w0, u, v) \ 471 ({union {UDItype __ll; \ 472 struct {USItype __l, __h; } __i; \ 473 } __xx; \ 474 __asm__ ("emul %2,%1,%0" \ 475 : "=d" (__xx.__ll) \ 476 : "%dI" ((USItype)(u)), \ 477 "dI" ((USItype)(v))); \ 478 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 479 #define __umulsidi3(u, v) \ 480 ({UDItype __w; \ 481 __asm__ ("emul %2,%1,%0" \ 482 : "=d" (__w) \ 483 : "%dI" ((USItype)(u)), \ 484 "dI" ((USItype)(v))); \ 485 __w; }) 486 #define udiv_qrnnd(q, r, nh, nl, d) \ 487 do { \ 488 union {UDItype __ll; \ 489 struct {USItype __l, __h; } __i; \ 490 } __nn; \ 491 __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ 492 __asm__ ("ediv %d,%n,%0" \ 493 : "=d" (__rq.__ll) \ 494 : "dI" (__nn.__ll), \ 495 "dI" ((USItype)(d))); \ 496 (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ 497 } while (0) 498 #if defined(__i960mx) /* what is the proper symbol to test??? */ 499 #define rshift_rhlc(r, h, l, c) \ 500 do { \ 501 union {UDItype __ll; \ 502 struct {USItype __l, __h; } __i; \ 503 } __nn; \ 504 __nn.__i.__h = (h); __nn.__i.__l = (l); \ 505 __asm__ ("shre %2,%1,%0" \ 506 : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ 507 } 508 #endif /* i960mx */ 509 #endif /* i960 */ 510 511 /*************************************** 512 ************** 68000 **************** 513 ***************************************/ 514 #if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 515 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 516 __asm__ ("add%.l %5,%1\n" \ 517 "addx%.l %3,%0" \ 518 : "=d" ((USItype)(sh)), \ 519 "=&d" ((USItype)(sl)) \ 520 : "%0" ((USItype)(ah)), \ 521 "d" ((USItype)(bh)), \ 522 "%1" ((USItype)(al)), \ 523 "g" ((USItype)(bl))) 524 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 525 __asm__ ("sub%.l %5,%1\n" \ 526 "subx%.l %3,%0" \ 527 : "=d" ((USItype)(sh)), \ 528 "=&d" ((USItype)(sl)) \ 529 : "0" ((USItype)(ah)), \ 530 "d" ((USItype)(bh)), \ 531 "1" ((USItype)(al)), \ 532 "g" ((USItype)(bl))) 533 #if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) 534 #define umul_ppmm(w1, w0, u, v) \ 535 __asm__ ("mulu%.l %3,%1:%0" \ 536 : "=d" ((USItype)(w0)), \ 537 "=d" ((USItype)(w1)) \ 538 : "%0" ((USItype)(u)), \ 539 "dmi" ((USItype)(v))) 540 #define UMUL_TIME 45 541 #define udiv_qrnnd(q, r, n1, n0, d) \ 542 __asm__ ("divu%.l %4,%1:%0" \ 543 : "=d" ((USItype)(q)), \ 544 "=d" ((USItype)(r)) \ 545 : "0" ((USItype)(n0)), \ 546 "1" ((USItype)(n1)), \ 547 "dmi" ((USItype)(d))) 548 #define UDIV_TIME 90 549 #define sdiv_qrnnd(q, r, n1, n0, d) \ 550 __asm__ ("divs%.l %4,%1:%0" \ 551 : "=d" ((USItype)(q)), \ 552 "=d" ((USItype)(r)) \ 553 : "0" ((USItype)(n0)), \ 554 "1" ((USItype)(n1)), \ 555 "dmi" ((USItype)(d))) 556 #else /* not mc68020 */ 557 #define umul_ppmm(xh, xl, a, b) \ 558 do { USItype __umul_tmp1, __umul_tmp2; \ 559 __asm__ ("| Inlined umul_ppmm\n" \ 560 "move%.l %5,%3\n" \ 561 "move%.l %2,%0\n" \ 562 "move%.w %3,%1\n" \ 563 "swap %3\n" \ 564 "swap %0\n" \ 565 "mulu %2,%1\n" \ 566 "mulu %3,%0\n" \ 567 "mulu %2,%3\n" \ 568 "swap %2\n" \ 569 "mulu %5,%2\n" \ 570 "add%.l %3,%2\n" \ 571 "jcc 1f\n" \ 572 "add%.l %#0x10000,%0\n" \ 573 "1: move%.l %2,%3\n" \ 574 "clr%.w %2\n" \ 575 "swap %2\n" \ 576 "swap %3\n" \ 577 "clr%.w %3\n" \ 578 "add%.l %3,%1\n" \ 579 "addx%.l %2,%0\n" \ 580 "| End inlined umul_ppmm" \ 581 : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ 582 "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ 583 : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ 584 } while (0) 585 #define UMUL_TIME 100 586 #define UDIV_TIME 400 587 #endif /* not mc68020 */ 588 #endif /* mc68000 */ 589 590 /*************************************** 591 ************** 88000 **************** 592 ***************************************/ 593 #if defined(__m88000__) && W_TYPE_SIZE == 32 594 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 595 __asm__ ("addu.co %1,%r4,%r5\n" \ 596 "addu.ci %0,%r2,%r3" \ 597 : "=r" ((USItype)(sh)), \ 598 "=&r" ((USItype)(sl)) \ 599 : "%rJ" ((USItype)(ah)), \ 600 "rJ" ((USItype)(bh)), \ 601 "%rJ" ((USItype)(al)), \ 602 "rJ" ((USItype)(bl))) 603 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 604 __asm__ ("subu.co %1,%r4,%r5\n" \ 605 "subu.ci %0,%r2,%r3" \ 606 : "=r" ((USItype)(sh)), \ 607 "=&r" ((USItype)(sl)) \ 608 : "rJ" ((USItype)(ah)), \ 609 "rJ" ((USItype)(bh)), \ 610 "rJ" ((USItype)(al)), \ 611 "rJ" ((USItype)(bl))) 612 #if defined(__m88110__) 613 #define umul_ppmm(wh, wl, u, v) \ 614 do { \ 615 union {UDItype __ll; \ 616 struct {USItype __h, __l; } __i; \ 617 } __x; \ 618 __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ 619 (wh) = __x.__i.__h; \ 620 (wl) = __x.__i.__l; \ 621 } while (0) 622 #define udiv_qrnnd(q, r, n1, n0, d) \ 623 ({union {UDItype __ll; \ 624 struct {USItype __h, __l; } __i; \ 625 } __x, __q; \ 626 __x.__i.__h = (n1); __x.__i.__l = (n0); \ 627 __asm__ ("divu.d %0,%1,%2" \ 628 : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ 629 (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) 630 #define UMUL_TIME 5 631 #define UDIV_TIME 25 632 #else 633 #define UMUL_TIME 17 634 #define UDIV_TIME 150 635 #endif /* __m88110__ */ 636 #endif /* __m88000__ */ 637 638 /*************************************** 639 ************** MIPS ***************** 640 ***************************************/ 641 #if defined(__mips__) && W_TYPE_SIZE == 32 642 #if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) 643 #define umul_ppmm(w1, w0, u, v) \ 644 do { \ 645 UDItype __ll = (UDItype)(u) * (v); \ 646 w1 = __ll >> 32; \ 647 w0 = __ll; \ 648 } while (0) 649 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 650 #define umul_ppmm(w1, w0, u, v) \ 651 __asm__ ("multu %2,%3" \ 652 : "=l" ((USItype)(w0)), \ 653 "=h" ((USItype)(w1)) \ 654 : "d" ((USItype)(u)), \ 655 "d" ((USItype)(v))) 656 #else 657 #define umul_ppmm(w1, w0, u, v) \ 658 __asm__ ("multu %2,%3\n" \ 659 "mflo %0\n" \ 660 "mfhi %1" \ 661 : "=d" ((USItype)(w0)), \ 662 "=d" ((USItype)(w1)) \ 663 : "d" ((USItype)(u)), \ 664 "d" ((USItype)(v))) 665 #endif 666 #define UMUL_TIME 10 667 #define UDIV_TIME 100 668 #endif /* __mips__ */ 669 670 /*************************************** 671 ************** MIPS/64 ************** 672 ***************************************/ 673 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 674 #if defined(__mips_isa_rev) && __mips_isa_rev >= 6 675 /* 676 * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C 677 * code below, so we special case MIPS64r6 until the compiler can do better. 678 */ 679 #define umul_ppmm(w1, w0, u, v) \ 680 do { \ 681 __asm__ ("dmulu %0,%1,%2" \ 682 : "=d" ((UDItype)(w0)) \ 683 : "d" ((UDItype)(u)), \ 684 "d" ((UDItype)(v))); \ 685 __asm__ ("dmuhu %0,%1,%2" \ 686 : "=d" ((UDItype)(w1)) \ 687 : "d" ((UDItype)(u)), \ 688 "d" ((UDItype)(v))); \ 689 } while (0) 690 #elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) 691 #define umul_ppmm(w1, w0, u, v) \ 692 do { \ 693 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ 694 __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ 695 w1 = __ll >> 64; \ 696 w0 = __ll; \ 697 } while (0) 698 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 699 #define umul_ppmm(w1, w0, u, v) \ 700 __asm__ ("dmultu %2,%3" \ 701 : "=l" ((UDItype)(w0)), \ 702 "=h" ((UDItype)(w1)) \ 703 : "d" ((UDItype)(u)), \ 704 "d" ((UDItype)(v))) 705 #else 706 #define umul_ppmm(w1, w0, u, v) \ 707 __asm__ ("dmultu %2,%3\n" \ 708 "mflo %0\n" \ 709 "mfhi %1" \ 710 : "=d" ((UDItype)(w0)), \ 711 "=d" ((UDItype)(w1)) \ 712 : "d" ((UDItype)(u)), \ 713 "d" ((UDItype)(v))) 714 #endif 715 #define UMUL_TIME 20 716 #define UDIV_TIME 140 717 #endif /* __mips__ */ 718 719 /*************************************** 720 ************** 32000 **************** 721 ***************************************/ 722 #if defined(__ns32000__) && W_TYPE_SIZE == 32 723 #define umul_ppmm(w1, w0, u, v) \ 724 ({union {UDItype __ll; \ 725 struct {USItype __l, __h; } __i; \ 726 } __xx; \ 727 __asm__ ("meid %2,%0" \ 728 : "=g" (__xx.__ll) \ 729 : "%0" ((USItype)(u)), \ 730 "g" ((USItype)(v))); \ 731 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 732 #define __umulsidi3(u, v) \ 733 ({UDItype __w; \ 734 __asm__ ("meid %2,%0" \ 735 : "=g" (__w) \ 736 : "%0" ((USItype)(u)), \ 737 "g" ((USItype)(v))); \ 738 __w; }) 739 #define udiv_qrnnd(q, r, n1, n0, d) \ 740 ({union {UDItype __ll; \ 741 struct {USItype __l, __h; } __i; \ 742 } __xx; \ 743 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 744 __asm__ ("deid %2,%0" \ 745 : "=g" (__xx.__ll) \ 746 : "0" (__xx.__ll), \ 747 "g" ((USItype)(d))); \ 748 (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) 749 #endif /* __ns32000__ */ 750 751 /*************************************** 752 ************** PPC ****************** 753 ***************************************/ 754 #if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 755 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 756 do { \ 757 if (__builtin_constant_p(bh) && (bh) == 0) \ 758 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ 759 : "=r" ((USItype)(sh)), \ 760 "=&r" ((USItype)(sl)) \ 761 : "%r" ((USItype)(ah)), \ 762 "%r" ((USItype)(al)), \ 763 "rI" ((USItype)(bl))); \ 764 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 765 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ 766 : "=r" ((USItype)(sh)), \ 767 "=&r" ((USItype)(sl)) \ 768 : "%r" ((USItype)(ah)), \ 769 "%r" ((USItype)(al)), \ 770 "rI" ((USItype)(bl))); \ 771 else \ 772 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ 773 : "=r" ((USItype)(sh)), \ 774 "=&r" ((USItype)(sl)) \ 775 : "%r" ((USItype)(ah)), \ 776 "r" ((USItype)(bh)), \ 777 "%r" ((USItype)(al)), \ 778 "rI" ((USItype)(bl))); \ 779 } while (0) 780 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 781 do { \ 782 if (__builtin_constant_p(ah) && (ah) == 0) \ 783 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ 784 : "=r" ((USItype)(sh)), \ 785 "=&r" ((USItype)(sl)) \ 786 : "r" ((USItype)(bh)), \ 787 "rI" ((USItype)(al)), \ 788 "r" ((USItype)(bl))); \ 789 else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ 790 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ 791 : "=r" ((USItype)(sh)), \ 792 "=&r" ((USItype)(sl)) \ 793 : "r" ((USItype)(bh)), \ 794 "rI" ((USItype)(al)), \ 795 "r" ((USItype)(bl))); \ 796 else if (__builtin_constant_p(bh) && (bh) == 0) \ 797 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ 798 : "=r" ((USItype)(sh)), \ 799 "=&r" ((USItype)(sl)) \ 800 : "r" ((USItype)(ah)), \ 801 "rI" ((USItype)(al)), \ 802 "r" ((USItype)(bl))); \ 803 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 804 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ 805 : "=r" ((USItype)(sh)), \ 806 "=&r" ((USItype)(sl)) \ 807 : "r" ((USItype)(ah)), \ 808 "rI" ((USItype)(al)), \ 809 "r" ((USItype)(bl))); \ 810 else \ 811 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ 812 : "=r" ((USItype)(sh)), \ 813 "=&r" ((USItype)(sl)) \ 814 : "r" ((USItype)(ah)), \ 815 "r" ((USItype)(bh)), \ 816 "rI" ((USItype)(al)), \ 817 "r" ((USItype)(bl))); \ 818 } while (0) 819 #if defined(_ARCH_PPC) 820 #define umul_ppmm(ph, pl, m0, m1) \ 821 do { \ 822 USItype __m0 = (m0), __m1 = (m1); \ 823 __asm__ ("mulhwu %0,%1,%2" \ 824 : "=r" ((USItype) ph) \ 825 : "%r" (__m0), \ 826 "r" (__m1)); \ 827 (pl) = __m0 * __m1; \ 828 } while (0) 829 #define UMUL_TIME 15 830 #define smul_ppmm(ph, pl, m0, m1) \ 831 do { \ 832 SItype __m0 = (m0), __m1 = (m1); \ 833 __asm__ ("mulhw %0,%1,%2" \ 834 : "=r" ((SItype) ph) \ 835 : "%r" (__m0), \ 836 "r" (__m1)); \ 837 (pl) = __m0 * __m1; \ 838 } while (0) 839 #define SMUL_TIME 14 840 #define UDIV_TIME 120 841 #else 842 #define umul_ppmm(xh, xl, m0, m1) \ 843 do { \ 844 USItype __m0 = (m0), __m1 = (m1); \ 845 __asm__ ("mul %0,%2,%3" \ 846 : "=r" ((USItype)(xh)), \ 847 "=q" ((USItype)(xl)) \ 848 : "r" (__m0), \ 849 "r" (__m1)); \ 850 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 851 + (((SItype) __m1 >> 31) & __m0)); \ 852 } while (0) 853 #define UMUL_TIME 8 854 #define smul_ppmm(xh, xl, m0, m1) \ 855 __asm__ ("mul %0,%2,%3" \ 856 : "=r" ((SItype)(xh)), \ 857 "=q" ((SItype)(xl)) \ 858 : "r" (m0), \ 859 "r" (m1)) 860 #define SMUL_TIME 4 861 #define sdiv_qrnnd(q, r, nh, nl, d) \ 862 __asm__ ("div %0,%2,%4" \ 863 : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ 864 : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) 865 #define UDIV_TIME 100 866 #endif 867 #endif /* Power architecture variants. */ 868 869 /*************************************** 870 ************** PYR ****************** 871 ***************************************/ 872 #if defined(__pyr__) && W_TYPE_SIZE == 32 873 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 874 __asm__ ("addw %5,%1\n" \ 875 "addwc %3,%0" \ 876 : "=r" ((USItype)(sh)), \ 877 "=&r" ((USItype)(sl)) \ 878 : "%0" ((USItype)(ah)), \ 879 "g" ((USItype)(bh)), \ 880 "%1" ((USItype)(al)), \ 881 "g" ((USItype)(bl))) 882 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 883 __asm__ ("subw %5,%1\n" \ 884 "subwb %3,%0" \ 885 : "=r" ((USItype)(sh)), \ 886 "=&r" ((USItype)(sl)) \ 887 : "0" ((USItype)(ah)), \ 888 "g" ((USItype)(bh)), \ 889 "1" ((USItype)(al)), \ 890 "g" ((USItype)(bl))) 891 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ 892 #define umul_ppmm(w1, w0, u, v) \ 893 ({union {UDItype __ll; \ 894 struct {USItype __h, __l; } __i; \ 895 } __xx; \ 896 __asm__ ("movw %1,%R0\n" \ 897 "uemul %2,%0" \ 898 : "=&r" (__xx.__ll) \ 899 : "g" ((USItype) (u)), \ 900 "g" ((USItype)(v))); \ 901 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 902 #endif /* __pyr__ */ 903 904 /*************************************** 905 ************** RT/ROMP ************** 906 ***************************************/ 907 #if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 908 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 909 __asm__ ("a %1,%5\n" \ 910 "ae %0,%3" \ 911 : "=r" ((USItype)(sh)), \ 912 "=&r" ((USItype)(sl)) \ 913 : "%0" ((USItype)(ah)), \ 914 "r" ((USItype)(bh)), \ 915 "%1" ((USItype)(al)), \ 916 "r" ((USItype)(bl))) 917 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 918 __asm__ ("s %1,%5\n" \ 919 "se %0,%3" \ 920 : "=r" ((USItype)(sh)), \ 921 "=&r" ((USItype)(sl)) \ 922 : "0" ((USItype)(ah)), \ 923 "r" ((USItype)(bh)), \ 924 "1" ((USItype)(al)), \ 925 "r" ((USItype)(bl))) 926 #define umul_ppmm(ph, pl, m0, m1) \ 927 do { \ 928 USItype __m0 = (m0), __m1 = (m1); \ 929 __asm__ ( \ 930 "s r2,r2\n" \ 931 "mts r10,%2\n" \ 932 "m r2,%3\n" \ 933 "m r2,%3\n" \ 934 "m r2,%3\n" \ 935 "m r2,%3\n" \ 936 "m r2,%3\n" \ 937 "m r2,%3\n" \ 938 "m r2,%3\n" \ 939 "m r2,%3\n" \ 940 "m r2,%3\n" \ 941 "m r2,%3\n" \ 942 "m r2,%3\n" \ 943 "m r2,%3\n" \ 944 "m r2,%3\n" \ 945 "m r2,%3\n" \ 946 "m r2,%3\n" \ 947 "m r2,%3\n" \ 948 "cas %0,r2,r0\n" \ 949 "mfs r10,%1" \ 950 : "=r" ((USItype)(ph)), \ 951 "=r" ((USItype)(pl)) \ 952 : "%r" (__m0), \ 953 "r" (__m1) \ 954 : "r2"); \ 955 (ph) += ((((SItype) __m0 >> 31) & __m1) \ 956 + (((SItype) __m1 >> 31) & __m0)); \ 957 } while (0) 958 #define UMUL_TIME 20 959 #define UDIV_TIME 200 960 #endif /* RT/ROMP */ 961 962 /*************************************** 963 ************** SH2 ****************** 964 ***************************************/ 965 #if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ 966 && W_TYPE_SIZE == 32 967 #define umul_ppmm(w1, w0, u, v) \ 968 __asm__ ( \ 969 "dmulu.l %2,%3\n" \ 970 "sts macl,%1\n" \ 971 "sts mach,%0" \ 972 : "=r" ((USItype)(w1)), \ 973 "=r" ((USItype)(w0)) \ 974 : "r" ((USItype)(u)), \ 975 "r" ((USItype)(v)) \ 976 : "macl", "mach") 977 #define UMUL_TIME 5 978 #endif 979 980 /*************************************** 981 ************** SPARC **************** 982 ***************************************/ 983 #if defined(__sparc__) && W_TYPE_SIZE == 32 984 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 985 __asm__ ("addcc %r4,%5,%1\n" \ 986 "addx %r2,%3,%0" \ 987 : "=r" ((USItype)(sh)), \ 988 "=&r" ((USItype)(sl)) \ 989 : "%rJ" ((USItype)(ah)), \ 990 "rI" ((USItype)(bh)), \ 991 "%rJ" ((USItype)(al)), \ 992 "rI" ((USItype)(bl)) \ 993 __CLOBBER_CC) 994 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 995 __asm__ ("subcc %r4,%5,%1\n" \ 996 "subx %r2,%3,%0" \ 997 : "=r" ((USItype)(sh)), \ 998 "=&r" ((USItype)(sl)) \ 999 : "rJ" ((USItype)(ah)), \ 1000 "rI" ((USItype)(bh)), \ 1001 "rJ" ((USItype)(al)), \ 1002 "rI" ((USItype)(bl)) \ 1003 __CLOBBER_CC) 1004 #if defined(__sparc_v8__) 1005 /* Don't match immediate range because, 1) it is not often useful, 1006 2) the 'I' flag thinks of the range as a 13 bit signed interval, 1007 while we want to match a 13 bit interval, sign extended to 32 bits, 1008 but INTERPRETED AS UNSIGNED. */ 1009 #define umul_ppmm(w1, w0, u, v) \ 1010 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1011 : "=r" ((USItype)(w1)), \ 1012 "=r" ((USItype)(w0)) \ 1013 : "r" ((USItype)(u)), \ 1014 "r" ((USItype)(v))) 1015 #define UMUL_TIME 5 1016 #ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ 1017 #define udiv_qrnnd(q, r, n1, n0, d) \ 1018 do { \ 1019 USItype __q; \ 1020 __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ 1021 : "=r" ((USItype)(__q)) \ 1022 : "r" ((USItype)(n1)), \ 1023 "r" ((USItype)(n0)), \ 1024 "r" ((USItype)(d))); \ 1025 (r) = (n0) - __q * (d); \ 1026 (q) = __q; \ 1027 } while (0) 1028 #define UDIV_TIME 25 1029 #endif /* SUPERSPARC */ 1030 #else /* ! __sparc_v8__ */ 1031 #if defined(__sparclite__) 1032 /* This has hardware multiply but not divide. It also has two additional 1033 instructions scan (ffs from high bit) and divscc. */ 1034 #define umul_ppmm(w1, w0, u, v) \ 1035 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1036 : "=r" ((USItype)(w1)), \ 1037 "=r" ((USItype)(w0)) \ 1038 : "r" ((USItype)(u)), \ 1039 "r" ((USItype)(v))) 1040 #define UMUL_TIME 5 1041 #define udiv_qrnnd(q, r, n1, n0, d) \ 1042 __asm__ ("! Inlined udiv_qrnnd\n" \ 1043 "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 1044 "tst %%g0\n" \ 1045 "divscc %3,%4,%%g1\n" \ 1046 "divscc %%g1,%4,%%g1\n" \ 1047 "divscc %%g1,%4,%%g1\n" \ 1048 "divscc %%g1,%4,%%g1\n" \ 1049 "divscc %%g1,%4,%%g1\n" \ 1050 "divscc %%g1,%4,%%g1\n" \ 1051 "divscc %%g1,%4,%%g1\n" \ 1052 "divscc %%g1,%4,%%g1\n" \ 1053 "divscc %%g1,%4,%%g1\n" \ 1054 "divscc %%g1,%4,%%g1\n" \ 1055 "divscc %%g1,%4,%%g1\n" \ 1056 "divscc %%g1,%4,%%g1\n" \ 1057 "divscc %%g1,%4,%%g1\n" \ 1058 "divscc %%g1,%4,%%g1\n" \ 1059 "divscc %%g1,%4,%%g1\n" \ 1060 "divscc %%g1,%4,%%g1\n" \ 1061 "divscc %%g1,%4,%%g1\n" \ 1062 "divscc %%g1,%4,%%g1\n" \ 1063 "divscc %%g1,%4,%%g1\n" \ 1064 "divscc %%g1,%4,%%g1\n" \ 1065 "divscc %%g1,%4,%%g1\n" \ 1066 "divscc %%g1,%4,%%g1\n" \ 1067 "divscc %%g1,%4,%%g1\n" \ 1068 "divscc %%g1,%4,%%g1\n" \ 1069 "divscc %%g1,%4,%%g1\n" \ 1070 "divscc %%g1,%4,%%g1\n" \ 1071 "divscc %%g1,%4,%%g1\n" \ 1072 "divscc %%g1,%4,%%g1\n" \ 1073 "divscc %%g1,%4,%%g1\n" \ 1074 "divscc %%g1,%4,%%g1\n" \ 1075 "divscc %%g1,%4,%%g1\n" \ 1076 "divscc %%g1,%4,%0\n" \ 1077 "rd %%y,%1\n" \ 1078 "bl,a 1f\n" \ 1079 "add %1,%4,%1\n" \ 1080 "1: ! End of inline udiv_qrnnd" \ 1081 : "=r" ((USItype)(q)), \ 1082 "=r" ((USItype)(r)) \ 1083 : "r" ((USItype)(n1)), \ 1084 "r" ((USItype)(n0)), \ 1085 "rI" ((USItype)(d)) \ 1086 : "%g1" __AND_CLOBBER_CC) 1087 #define UDIV_TIME 37 1088 #endif /* __sparclite__ */ 1089 #endif /* __sparc_v8__ */ 1090 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ 1091 #ifndef umul_ppmm 1092 #define umul_ppmm(w1, w0, u, v) \ 1093 __asm__ ("! Inlined umul_ppmm\n" \ 1094 "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ 1095 "sra %3,31,%%g2 ! Don't move this insn\n" \ 1096 "and %2,%%g2,%%g2 ! Don't move this insn\n" \ 1097 "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 1098 "mulscc %%g1,%3,%%g1\n" \ 1099 "mulscc %%g1,%3,%%g1\n" \ 1100 "mulscc %%g1,%3,%%g1\n" \ 1101 "mulscc %%g1,%3,%%g1\n" \ 1102 "mulscc %%g1,%3,%%g1\n" \ 1103 "mulscc %%g1,%3,%%g1\n" \ 1104 "mulscc %%g1,%3,%%g1\n" \ 1105 "mulscc %%g1,%3,%%g1\n" \ 1106 "mulscc %%g1,%3,%%g1\n" \ 1107 "mulscc %%g1,%3,%%g1\n" \ 1108 "mulscc %%g1,%3,%%g1\n" \ 1109 "mulscc %%g1,%3,%%g1\n" \ 1110 "mulscc %%g1,%3,%%g1\n" \ 1111 "mulscc %%g1,%3,%%g1\n" \ 1112 "mulscc %%g1,%3,%%g1\n" \ 1113 "mulscc %%g1,%3,%%g1\n" \ 1114 "mulscc %%g1,%3,%%g1\n" \ 1115 "mulscc %%g1,%3,%%g1\n" \ 1116 "mulscc %%g1,%3,%%g1\n" \ 1117 "mulscc %%g1,%3,%%g1\n" \ 1118 "mulscc %%g1,%3,%%g1\n" \ 1119 "mulscc %%g1,%3,%%g1\n" \ 1120 "mulscc %%g1,%3,%%g1\n" \ 1121 "mulscc %%g1,%3,%%g1\n" \ 1122 "mulscc %%g1,%3,%%g1\n" \ 1123 "mulscc %%g1,%3,%%g1\n" \ 1124 "mulscc %%g1,%3,%%g1\n" \ 1125 "mulscc %%g1,%3,%%g1\n" \ 1126 "mulscc %%g1,%3,%%g1\n" \ 1127 "mulscc %%g1,%3,%%g1\n" \ 1128 "mulscc %%g1,%3,%%g1\n" \ 1129 "mulscc %%g1,%3,%%g1\n" \ 1130 "mulscc %%g1,0,%%g1\n" \ 1131 "add %%g1,%%g2,%0\n" \ 1132 "rd %%y,%1" \ 1133 : "=r" ((USItype)(w1)), \ 1134 "=r" ((USItype)(w0)) \ 1135 : "%rI" ((USItype)(u)), \ 1136 "r" ((USItype)(v)) \ 1137 : "%g1", "%g2" __AND_CLOBBER_CC) 1138 #define UMUL_TIME 39 /* 39 instructions */ 1139 /* It's quite necessary to add this much assembler for the sparc. 1140 The default udiv_qrnnd (in C) is more than 10 times slower! */ 1141 #define udiv_qrnnd(q, r, n1, n0, d) \ 1142 __asm__ ("! Inlined udiv_qrnnd\n\t" \ 1143 "mov 32,%%g1\n\t" \ 1144 "subcc %1,%2,%%g0\n\t" \ 1145 "1: bcs 5f\n\t" \ 1146 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1147 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1148 "addx %1,%1,%1 ! so this can't give carry\n\t" \ 1149 "subcc %%g1,1,%%g1\n\t" \ 1150 "2: bne 1b\n\t" \ 1151 "subcc %1,%2,%%g0\n\t" \ 1152 "bcs 3f\n\t" \ 1153 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1154 "b 3f\n\t" \ 1155 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1156 "4: sub %1,%2,%1\n\t" \ 1157 "5: addxcc %1,%1,%1\n\t" \ 1158 "bcc 2b\n\t" \ 1159 "subcc %%g1,1,%%g1\n\t" \ 1160 "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ 1161 "bne 4b\n\t" \ 1162 "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ 1163 "sub %1,%2,%1\n\t" \ 1164 "3: xnor %0,0,%0\n\t" \ 1165 "! End of inline udiv_qrnnd\n" \ 1166 : "=&r" ((USItype)(q)), \ 1167 "=&r" ((USItype)(r)) \ 1168 : "r" ((USItype)(d)), \ 1169 "1" ((USItype)(n1)), \ 1170 "0" ((USItype)(n0)) : "%g1", "cc") 1171 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 1172 #endif 1173 #endif /* __sparc__ */ 1174 1175 /*************************************** 1176 ************** VAX ****************** 1177 ***************************************/ 1178 #if defined(__vax__) && W_TYPE_SIZE == 32 1179 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1180 __asm__ ("addl2 %5,%1\n" \ 1181 "adwc %3,%0" \ 1182 : "=g" ((USItype)(sh)), \ 1183 "=&g" ((USItype)(sl)) \ 1184 : "%0" ((USItype)(ah)), \ 1185 "g" ((USItype)(bh)), \ 1186 "%1" ((USItype)(al)), \ 1187 "g" ((USItype)(bl))) 1188 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1189 __asm__ ("subl2 %5,%1\n" \ 1190 "sbwc %3,%0" \ 1191 : "=g" ((USItype)(sh)), \ 1192 "=&g" ((USItype)(sl)) \ 1193 : "0" ((USItype)(ah)), \ 1194 "g" ((USItype)(bh)), \ 1195 "1" ((USItype)(al)), \ 1196 "g" ((USItype)(bl))) 1197 #define umul_ppmm(xh, xl, m0, m1) \ 1198 do { \ 1199 union {UDItype __ll; \ 1200 struct {USItype __l, __h; } __i; \ 1201 } __xx; \ 1202 USItype __m0 = (m0), __m1 = (m1); \ 1203 __asm__ ("emul %1,%2,$0,%0" \ 1204 : "=g" (__xx.__ll) \ 1205 : "g" (__m0), \ 1206 "g" (__m1)); \ 1207 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1208 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1209 + (((SItype) __m1 >> 31) & __m0)); \ 1210 } while (0) 1211 #define sdiv_qrnnd(q, r, n1, n0, d) \ 1212 do { \ 1213 union {DItype __ll; \ 1214 struct {SItype __l, __h; } __i; \ 1215 } __xx; \ 1216 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1217 __asm__ ("ediv %3,%2,%0,%1" \ 1218 : "=g" (q), "=g" (r) \ 1219 : "g" (__xx.__ll), "g" (d)); \ 1220 } while (0) 1221 #endif /* __vax__ */ 1222 1223 /*************************************** 1224 ************** Z8000 **************** 1225 ***************************************/ 1226 #if defined(__z8000__) && W_TYPE_SIZE == 16 1227 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1228 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1229 : "=r" ((unsigned int)(sh)), \ 1230 "=&r" ((unsigned int)(sl)) \ 1231 : "%0" ((unsigned int)(ah)), \ 1232 "r" ((unsigned int)(bh)), \ 1233 "%1" ((unsigned int)(al)), \ 1234 "rQR" ((unsigned int)(bl))) 1235 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1236 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1237 : "=r" ((unsigned int)(sh)), \ 1238 "=&r" ((unsigned int)(sl)) \ 1239 : "0" ((unsigned int)(ah)), \ 1240 "r" ((unsigned int)(bh)), \ 1241 "1" ((unsigned int)(al)), \ 1242 "rQR" ((unsigned int)(bl))) 1243 #define umul_ppmm(xh, xl, m0, m1) \ 1244 do { \ 1245 union {long int __ll; \ 1246 struct {unsigned int __h, __l; } __i; \ 1247 } __xx; \ 1248 unsigned int __m0 = (m0), __m1 = (m1); \ 1249 __asm__ ("mult %S0,%H3" \ 1250 : "=r" (__xx.__i.__h), \ 1251 "=r" (__xx.__i.__l) \ 1252 : "%1" (__m0), \ 1253 "rQR" (__m1)); \ 1254 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1255 (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1256 + (((signed int) __m1 >> 15) & __m0)); \ 1257 } while (0) 1258 #endif /* __z8000__ */ 1259 1260 #endif /* __GNUC__ */ 1261 1262 /*************************************** 1263 *********** Generic Versions ******** 1264 ***************************************/ 1265 #if !defined(umul_ppmm) && defined(__umulsidi3) 1266 #define umul_ppmm(ph, pl, m0, m1) \ 1267 { \ 1268 UDWtype __ll = __umulsidi3(m0, m1); \ 1269 ph = (UWtype) (__ll >> W_TYPE_SIZE); \ 1270 pl = (UWtype) __ll; \ 1271 } 1272 #endif 1273 1274 #if !defined(__umulsidi3) 1275 #define __umulsidi3(u, v) \ 1276 ({UWtype __hi, __lo; \ 1277 umul_ppmm(__hi, __lo, u, v); \ 1278 ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) 1279 #endif 1280 1281 /* If this machine has no inline assembler, use C macros. */ 1282 1283 #if !defined(add_ssaaaa) 1284 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1285 do { \ 1286 UWtype __x; \ 1287 __x = (al) + (bl); \ 1288 (sh) = (ah) + (bh) + (__x < (al)); \ 1289 (sl) = __x; \ 1290 } while (0) 1291 #endif 1292 1293 #if !defined(sub_ddmmss) 1294 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1295 do { \ 1296 UWtype __x; \ 1297 __x = (al) - (bl); \ 1298 (sh) = (ah) - (bh) - (__x > (al)); \ 1299 (sl) = __x; \ 1300 } while (0) 1301 #endif 1302 1303 #if !defined(umul_ppmm) 1304 #define umul_ppmm(w1, w0, u, v) \ 1305 do { \ 1306 UWtype __x0, __x1, __x2, __x3; \ 1307 UHWtype __ul, __vl, __uh, __vh; \ 1308 UWtype __u = (u), __v = (v); \ 1309 \ 1310 __ul = __ll_lowpart(__u); \ 1311 __uh = __ll_highpart(__u); \ 1312 __vl = __ll_lowpart(__v); \ 1313 __vh = __ll_highpart(__v); \ 1314 \ 1315 __x0 = (UWtype) __ul * __vl; \ 1316 __x1 = (UWtype) __ul * __vh; \ 1317 __x2 = (UWtype) __uh * __vl; \ 1318 __x3 = (UWtype) __uh * __vh; \ 1319 \ 1320 __x1 += __ll_highpart(__x0);/* this can't give carry */ \ 1321 __x1 += __x2; /* but this indeed can */ \ 1322 if (__x1 < __x2) /* did we get it? */ \ 1323 __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1324 \ 1325 (w1) = __x3 + __ll_highpart(__x1); \ 1326 (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ 1327 } while (0) 1328 #endif 1329 1330 #if !defined(umul_ppmm) 1331 #define smul_ppmm(w1, w0, u, v) \ 1332 do { \ 1333 UWtype __w1; \ 1334 UWtype __m0 = (u), __m1 = (v); \ 1335 umul_ppmm(__w1, w0, __m0, __m1); \ 1336 (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ 1337 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ 1338 } while (0) 1339 #endif 1340 1341 /* Define this unconditionally, so it can be used for debugging. */ 1342 #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1343 do { \ 1344 UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ 1345 __d1 = __ll_highpart(d); \ 1346 __d0 = __ll_lowpart(d); \ 1347 \ 1348 __r1 = (n1) % __d1; \ 1349 __q1 = (n1) / __d1; \ 1350 __m = (UWtype) __q1 * __d0; \ 1351 __r1 = __r1 * __ll_B | __ll_highpart(n0); \ 1352 if (__r1 < __m) { \ 1353 __q1--, __r1 += (d); \ 1354 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ 1355 if (__r1 < __m) \ 1356 __q1--, __r1 += (d); \ 1357 } \ 1358 __r1 -= __m; \ 1359 \ 1360 __r0 = __r1 % __d1; \ 1361 __q0 = __r1 / __d1; \ 1362 __m = (UWtype) __q0 * __d0; \ 1363 __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ 1364 if (__r0 < __m) { \ 1365 __q0--, __r0 += (d); \ 1366 if (__r0 >= (d)) \ 1367 if (__r0 < __m) \ 1368 __q0--, __r0 += (d); \ 1369 } \ 1370 __r0 -= __m; \ 1371 \ 1372 (q) = (UWtype) __q1 * __ll_B | __q0; \ 1373 (r) = __r0; \ 1374 } while (0) 1375 1376 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1377 __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1378 #if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) 1379 #define udiv_qrnnd(q, r, nh, nl, d) \ 1380 do { \ 1381 UWtype __r; \ 1382 (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ 1383 (r) = __r; \ 1384 } while (0) 1385 #endif 1386 1387 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1388 #if !defined(udiv_qrnnd) 1389 #define UDIV_NEEDS_NORMALIZATION 1 1390 #define udiv_qrnnd __udiv_qrnnd_c 1391 #endif 1392 1393 #ifndef UDIV_NEEDS_NORMALIZATION 1394 #define UDIV_NEEDS_NORMALIZATION 0 1395 #endif 1396