1 // Copyright 2018 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 //    (See accompanying file LICENSE-Boost or copy at
12 //     https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17 
18 #include "ryu/f2s_intrinsics.h"
19 
20 // It seems to be slightly faster to avoid uint128_t here, although the
21 // generated code for uint128_t looks slightly nicer.
mulShift32(const uint32_t m,const uint64_t factor,const int32_t shift)22 static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) {
23   assert(shift > 32);
24 
25   // The casts here help MSVC to avoid calls to the __allmul library
26   // function.
27   const uint32_t factorLo = (uint32_t)(factor);
28   const uint32_t factorHi = (uint32_t)(factor >> 32);
29   const uint64_t bits0 = (uint64_t)m * factorLo;
30   const uint64_t bits1 = (uint64_t)m * factorHi;
31 
32   /*
33    * This function *does* get called with shift < 32 when converting
34    * strings to floats, so disable this optimization
35    */
36 #if defined(RYU_32_BIT_PLATFORM)
37   // On 32-bit platforms we can avoid a 64-bit shift-right since we only
38   // need the upper 32 bits of the result and the shift value is > 32.
39   const uint32_t bits0Hi = (uint32_t)(bits0 >> 32);
40   uint32_t bits1Lo = (uint32_t)(bits1);
41   uint32_t bits1Hi = (uint32_t)(bits1 >> 32);
42   bits1Lo += bits0Hi;
43   bits1Hi += (bits1Lo < bits0Hi);
44   if (shift >= 64) {
45     // s2f can call this with a shift value >= 64, which we have to handle.
46     // This could now be slower than the !defined(RYU_32_BIT_PLATFORM) case.
47     return (uint32_t)(bits1Hi >> (shift - 64));
48   } else {
49     const int32_t s = shift - 32;
50     return (bits1Hi << (32 - s)) | (bits1Lo >> s);
51   }
52 #else // RYU_32_BIT_PLATFORM
53   const uint64_t sum = (bits0 >> 32) + bits1;
54   const uint64_t shiftedSum = sum >> (shift - 32);
55   assert(shiftedSum <= UINT32_MAX);
56   return (uint32_t) shiftedSum;
57 #endif // RYU_32_BIT_PLATFORM
58 }
59 
__mulPow5InvDivPow2(const uint32_t m,const uint32_t q,const int32_t j)60 uint32_t __mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) {
61   // The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup
62   // table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the
63   // fact that the added 1 that's already stored in the table never overflows into the upper 64 bits.
64   uint64_t pow5[2];
65   __double_computeInvPow5(q, pow5);
66   return mulShift32(m, pow5[1] + 1, j);
67 }
68 
__mulPow5divPow2(const uint32_t m,const uint32_t i,const int32_t j)69 uint32_t __mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) {
70   uint64_t pow5[2];
71   __double_computePow5(i, pow5);
72   return mulShift32(m, pow5[1], j);
73 }
74 
75