d2s_intrinsics.h - OpenGrok cross reference for /picolibc-latest/newlib/libc/tinystdio/ryu/d2s

Lines Matching +full:lookup +full:- +full:only
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
11 //    (See accompanying file LICENSE-Boost or copy at
48   // In the current implementation of the double-precision version  in shiftright128()
72 // Returns the high 64 bits of the 128-bit product of a and b.
82 // On 32-bit platforms, compilers typically generate calls to library
83 // functions for 64-bit divisions, even if the divisor is a constant.
90 // The functions here perform division-by-constant using multiplications
91 // in the same way as 64-bit compilers would do.
118   // Avoid 64-bit math as much as possible.  in mod1e9()
119   // Returning (uint32_t) (x - 1000000000 * div1e9(x)) would  in mod1e9()
120   // perform 32x64-bit multiplication and 64-bit subtraction.  in mod1e9()
127   return ((uint32_t) x) - 1000000000 * ((uint32_t) div1e9(x));  in mod1e9()
153   return (uint32_t) (x - 1000000000 * div1e9(x));  in mod1e9()
172   return (value & ((1ull << p) - 1)) == 0;  in multipleOfPowerOf2()
175 // We need a 64x128-bit multiplication and a subsequent 128-bit shift.
177 //   The 64-bit factor is variable and passed in, the 128-bit factor comes
178 //   from a lookup table. We know that the 64-bit factor only has 55
179 //   significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
180 //   factor only has 124 significant bits (i.e., the 4 topmost bits are
185 //   at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
186 //   bits. This means that we only need the topmost 64 significant bits of
187 //   the 64x128-bit multiplication.
190 // 1. Best case: the compiler exposes a 128-bit type.
191 //    We perform two 64x64-bit multiplications, add the higher 64 bits of the
192 //    lower result to the higher result, and shift by j - 64 bits.
194 //    We explicitly cast from 64-bit to 128-bit, so the compiler can tell
195 //    that these are only 64-bit inputs, and can map these to the best
198 //    64x64-bit multiplications and 128-bit shifts.
203 // 3. We only have 64x64 bit instructions that return the lower 64 bits of
207 //    b. Split both into 31-bit pieces, which guarantees no internal overflow,
208 //       but requires extra work upfront (unless we change the lookup table).
209 //    c. Split only the first factor into 31-bit pieces, which also guarantees
214 // Best case: use 128-bit type.
218   return (uint64_t) (((b0 >> 64) + b2) >> (j - 64));  in mulShift64()
231 //  *vp = (uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));  in mulShiftAll64()
232 //  uint128_t vmLo = lo - (factor << mmShift);  in mulShiftAll64()
233 //  *vm = (uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));  in mulShiftAll64()
234 //  return (uint64_t) (hi >> (j - 64));  in mulShiftAll64()
236   *vm = mulShift64(4 * m - 1 - mmShift, mul, j);  in mulShiftAll64()
252   return shiftright128(sum, high1, j - 64);  in mulShift64()
258   *vm = mulShift64(4 * m - 1 - mmShift, mul, j);  in mulShiftAll64()
274   return shiftright128(sum, high1, j - 64);  in mulShift64()
277 // This is faster if we don't have a 64x64->128-bit multiplication.
291   *vp = shiftright128(mid2, hi2, (uint32_t) (j - 64 - 1));  in mulShiftAll64()
294     const uint64_t lo3 = lo - mul[0];  in mulShiftAll64()
295     const uint64_t mid3 = mid - mul[1] - (lo3 > lo);  in mulShiftAll64()
296     const uint64_t hi3 = hi - (mid3 > mid);  in mulShiftAll64()
297     *vm = shiftright128(mid3, hi3, (uint32_t) (j - 64 - 1));  in mulShiftAll64()
302     const uint64_t lo4 = lo3 - mul[0];  in mulShiftAll64()
303     const uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);  in mulShiftAll64()
304     const uint64_t hi4 = hi3 - (mid4 > mid3);  in mulShiftAll64()
305     *vm = shiftright128(mid4, hi4, (uint32_t) (j - 64));  in mulShiftAll64()
308   return shiftright128(mid, hi, (uint32_t) (j - 64 - 1));  in mulShiftAll64()