1 // Copyright 2019 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 //    (See accompanying file LICENSE-Boost or copy at
12 //     https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17 
18 #include "stdio_private.h"
19 
20 #ifdef RYU_DEBUG
21 #include <inttypes.h>
22 #endif
23 
24 #include "ryu/common.h"
25 #include "ryu/d2s_intrinsics.h"
26 
27 #if __SIZEOF_DOUBLE__ == 8
28 #define FLOAT64 double
29 #elif __SIZEOF_LONG_DOUBLE__ == 8
30 #define FLOAT64 long double
31 #endif
32 
33 #ifdef FLOAT64
34 
35 #define DOUBLE_MANTISSA_BITS 52
36 #define DOUBLE_EXPONENT_BITS 11
37 #define DOUBLE_EXPONENT_BIAS 1023
38 
39 #if defined(_MSC_VER)
40 #include <intrin.h>
41 
floor_log2(const uint64_t value)42 static inline uint32_t floor_log2(const uint64_t value) {
43   long index;
44   return _BitScanReverse64(&index, value) ? index : 64;
45 }
46 
47 #else
48 
floor_log2(const uint64_t value)49 static inline uint32_t floor_log2(const uint64_t value) {
50   return 63 - __builtin_clzll(value);
51 }
52 
53 #endif
54 
55 // The max function is already defined on Windows.
max32(int32_t a,int32_t b)56 static inline int32_t max32(int32_t a, int32_t b) {
57   return a < b ? b : a;
58 }
59 
int64Bits2Double(uint64_t bits)60 static inline FLOAT64 int64Bits2Double(uint64_t bits) {
61   FLOAT64 f;
62   memcpy(&f, &bits, sizeof(FLOAT64));
63   return f;
64 }
65 
66 FLOAT64
__atod_engine(uint64_t m10,int e10)67 __atod_engine(uint64_t m10, int e10)
68 {
69 #ifdef RYU_DEBUG
70     printf("m10 = %lu\n", m10);
71     printf("e10 = %d\n", e10);
72     printf("m10 * 10^e10 = %" PRIu64 " * 10^%d\n", m10, e10);
73 #endif
74 #define signedM 0
75 #define RYU_OPTIMIZE_SIZE
76 
77     // Convert to binary float m2 * 2^e2, while retaining information about whether the conversion
78     // was exact (trailingZeros).
79     int32_t e2;
80     uint64_t m2;
81     bool trailingZeros;
82     if (e10 >= 0) {
83 	// The length of m * 10^e in bits is:
84 	//   log2(m10 * 10^e10) = log2(m10) + e10 log2(10) = log2(m10) + e10 + e10 * log2(5)
85 	//
86 	// We want to compute the DOUBLE_MANTISSA_BITS + 1 top-most bits (+1 for the implicit leading
87 	// one in IEEE format). We therefore choose a binary output exponent of
88 	//   log2(m10 * 10^e10) - (DOUBLE_MANTISSA_BITS + 1).
89 	//
90 	// We use floor(log2(5^e10)) so that we get at least this many bits; better to
91 	// have an additional bit than to not have enough bits.
92 	e2 = floor_log2(m10) + e10 + log2pow5(e10) - (DOUBLE_MANTISSA_BITS + 1);
93 
94 	// We now compute [m10 * 10^e10 / 2^e2] = [m10 * 5^e10 / 2^(e2-e10)].
95 	// To that end, we use the DOUBLE_POW5_SPLIT table.
96 	int j = e2 - e10 - ceil_log2pow5(e10) + DOUBLE_POW5_BITCOUNT;
97 	assert(j >= 0);
98 #if defined(RYU_OPTIMIZE_SIZE)
99 	uint64_t pow5[2];
100 	__double_computePow5(e10, pow5);
101 	m2 = mulShift64(m10, pow5, j);
102 #else
103 	assert(e10 < DOUBLE_POW5_TABLE_SIZE);
104 	m2 = mulShift64(m10, DOUBLE_POW5_SPLIT[e10], j);
105 #endif
106 	// We also compute if the result is exact, i.e.,
107 	//   [m10 * 10^e10 / 2^e2] == m10 * 10^e10 / 2^e2.
108 	// This can only be the case if 2^e2 divides m10 * 10^e10, which in turn requires that the
109 	// largest power of 2 that divides m10 + e10 is greater than e2. If e2 is less than e10, then
110 	// the result must be exact. Otherwise we use the existing multipleOfPowerOf2 function.
111 	trailingZeros = e2 < e10 || (e2 - e10 < 64 && multipleOfPowerOf2(m10, e2 - e10));
112     } else {
113 	e2 = floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (DOUBLE_MANTISSA_BITS + 1);
114 	int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + DOUBLE_POW5_INV_BITCOUNT;
115 #if defined(RYU_OPTIMIZE_SIZE)
116 	uint64_t pow5[2];
117 	__double_computeInvPow5(-e10, pow5);
118 	m2 = mulShift64(m10, pow5, j);
119 #else
120 	assert(-e10 < DOUBLE_POW5_INV_TABLE_SIZE);
121 	m2 = mulShift64(m10, DOUBLE_POW5_INV_SPLIT[-e10], j);
122 #endif
123 	trailingZeros = multipleOfPowerOf5(m10, -e10);
124     }
125 
126 #ifdef RYU_DEBUG
127     printf("m2 * 2^e2 = 0x%" PRIx64 " * 2^%d\n", m2, e2);
128 #endif
129 
130     // Compute the final IEEE exponent.
131     uint32_t ieee_e2 = (uint32_t) max32(0, e2 + DOUBLE_EXPONENT_BIAS + floor_log2(m2));
132 
133     if (ieee_e2 > 0x7fe) {
134 	// Final IEEE exponent is larger than the maximum representable; return +/-Infinity.
135 	uint64_t ieee = (((uint64_t) signedM) << (DOUBLE_EXPONENT_BITS + DOUBLE_MANTISSA_BITS)) | (0x7ffull << DOUBLE_MANTISSA_BITS);
136 	return int64Bits2Double(ieee);
137     }
138 
139     // We need to figure out how much we need to shift m2. The tricky part is that we need to take
140     // the final IEEE exponent into account, so we need to reverse the bias and also special-case
141     // the value 0.
142     int32_t shift = (ieee_e2 == 0 ? 1 : ieee_e2) - e2 - DOUBLE_EXPONENT_BIAS - DOUBLE_MANTISSA_BITS;
143     assert(shift >= 0);
144 #ifdef RYU_DEBUG
145     printf("ieee_e2 = %d\n", ieee_e2);
146     printf("shift = %d\n", shift);
147 #endif
148 
149     // We need to round up if the exact value is more than 0.5 above the value we computed. That's
150     // equivalent to checking if the last removed bit was 1 and either the value was not just
151     // trailing zeros or the result would otherwise be odd.
152     //
153     // We need to update trailingZeros given that we have the exact output exponent ieee_e2 now.
154     trailingZeros &= (m2 & ((1ull << (shift - 1)) - 1)) == 0;
155     uint64_t lastRemovedBit = (m2 >> (shift - 1)) & 1;
156     bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0));
157 
158 #ifdef RYU_DEBUG
159     printf("roundUp = %d\n", roundUp);
160     printf("ieee_m2 = 0x%" PRIx64 "\n", (m2 >> shift) + roundUp);
161 #endif
162     uint64_t ieee_m2 = (m2 >> shift) + roundUp;
163     assert(ieee_m2 <= (1ull << (DOUBLE_MANTISSA_BITS + 1)));
164     ieee_m2 &= (1ull << DOUBLE_MANTISSA_BITS) - 1;
165     if (ieee_m2 == 0 && roundUp) {
166 	// Due to how the IEEE represents +/-Infinity, we don't need to check for overflow here.
167 	ieee_e2++;
168     }
169 
170     uint64_t ieee = (((((uint64_t) signedM) << DOUBLE_EXPONENT_BITS) | (uint64_t)ieee_e2) << DOUBLE_MANTISSA_BITS) | ieee_m2;
171     return int64Bits2Double(ieee);
172 }
173 
174 #endif /* FLOAT64 */
175