Lines Matching +full:lookup +full:- +full:only
6 // (See accompanying file LICENSE-Apache or copy at
7 // http://www.apache.org/licenses/LICENSE-2.0)
11 // (See accompanying file LICENSE-Boost or copy at
48 // In the current implementation of the double-precision version in shiftright128()
72 // Returns the high 64 bits of the 128-bit product of a and b.
82 // On 32-bit platforms, compilers typically generate calls to library
83 // functions for 64-bit divisions, even if the divisor is a constant.
90 // The functions here perform division-by-constant using multiplications
91 // in the same way as 64-bit compilers would do.
118 // Avoid 64-bit math as much as possible. in mod1e9()
119 // Returning (uint32_t) (x - 1000000000 * div1e9(x)) would in mod1e9()
120 // perform 32x64-bit multiplication and 64-bit subtraction. in mod1e9()
127 return ((uint32_t) x) - 1000000000 * ((uint32_t) div1e9(x)); in mod1e9()
153 return (uint32_t) (x - 1000000000 * div1e9(x)); in mod1e9()
172 return (value & ((1ull << p) - 1)) == 0; in multipleOfPowerOf2()
175 // We need a 64x128-bit multiplication and a subsequent 128-bit shift.
177 // The 64-bit factor is variable and passed in, the 128-bit factor comes
178 // from a lookup table. We know that the 64-bit factor only has 55
179 // significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
180 // factor only has 124 significant bits (i.e., the 4 topmost bits are
185 // at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
186 // bits. This means that we only need the topmost 64 significant bits of
187 // the 64x128-bit multiplication.
190 // 1. Best case: the compiler exposes a 128-bit type.
191 // We perform two 64x64-bit multiplications, add the higher 64 bits of the
192 // lower result to the higher result, and shift by j - 64 bits.
194 // We explicitly cast from 64-bit to 128-bit, so the compiler can tell
195 // that these are only 64-bit inputs, and can map these to the best
198 // 64x64-bit multiplications and 128-bit shifts.
203 // 3. We only have 64x64 bit instructions that return the lower 64 bits of
207 // b. Split both into 31-bit pieces, which guarantees no internal overflow,
208 // but requires extra work upfront (unless we change the lookup table).
209 // c. Split only the first factor into 31-bit pieces, which also guarantees
214 // Best case: use 128-bit type.
218 return (uint64_t) (((b0 >> 64) + b2) >> (j - 64)); in mulShift64()
231 // *vp = (uint64_t) ((hi + (vpLo >> 64)) >> (j - 64)); in mulShiftAll64()
232 // uint128_t vmLo = lo - (factor << mmShift); in mulShiftAll64()
233 // *vm = (uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64)); in mulShiftAll64()
234 // return (uint64_t) (hi >> (j - 64)); in mulShiftAll64()
236 *vm = mulShift64(4 * m - 1 - mmShift, mul, j); in mulShiftAll64()
252 return shiftright128(sum, high1, j - 64); in mulShift64()
258 *vm = mulShift64(4 * m - 1 - mmShift, mul, j); in mulShiftAll64()
274 return shiftright128(sum, high1, j - 64); in mulShift64()
277 // This is faster if we don't have a 64x64->128-bit multiplication.
291 *vp = shiftright128(mid2, hi2, (uint32_t) (j - 64 - 1)); in mulShiftAll64()
294 const uint64_t lo3 = lo - mul[0]; in mulShiftAll64()
295 const uint64_t mid3 = mid - mul[1] - (lo3 > lo); in mulShiftAll64()
296 const uint64_t hi3 = hi - (mid3 > mid); in mulShiftAll64()
297 *vm = shiftright128(mid3, hi3, (uint32_t) (j - 64 - 1)); in mulShiftAll64()
302 const uint64_t lo4 = lo3 - mul[0]; in mulShiftAll64()
303 const uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3); in mulShiftAll64()
304 const uint64_t hi4 = hi3 - (mid4 > mid3); in mulShiftAll64()
305 *vm = shiftright128(mid4, hi4, (uint32_t) (j - 64)); in mulShiftAll64()
308 return shiftright128(mid, hi, (uint32_t) (j - 64 - 1)); in mulShiftAll64()