1 // Copyright 2019 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 //    (See accompanying file LICENSE-Boost or copy at
12 //     https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17 
18 #include "stdio_private.h"
19 
20 #ifdef RYU_DEBUG
21 #include <inttypes.h>
22 #endif
23 
24 #include "ryu/common.h"
25 #include "ryu/f2s_intrinsics.h"
26 
27 #define FLOAT_MANTISSA_BITS 23
28 #define FLOAT_EXPONENT_BITS 8
29 #define FLOAT_EXPONENT_BIAS 127
30 
31 #if defined(_MSC_VER)
32 #include <intrin.h>
33 
floor_log2(const uint32_t value)34 static inline uint32_t floor_log2(const uint32_t value) {
35   uint32_t index;
36   return _BitScanReverse(&index, value) ? index : 32;
37 }
38 
39 #else
40 
floor_log2(const uint32_t value)41 static inline uint32_t floor_log2(const uint32_t value) {
42 #if __SIZEOF_INT__ >= 4
43   return 31 - __builtin_clz(value);
44 #elif __SIZEOF_INT__ < 4 && __SIZEOF_LONG__ >= 4
45   return 31 - __builtin_clzl(value);
46 #else
47 #error no usable clz
48 #endif
49 }
50 
51 #endif
52 
53 // The max function is already defined on Windows.
max32(int32_t a,int32_t b)54 static inline int32_t max32(int32_t a, int32_t b) {
55   return a < b ? b : a;
56 }
57 
int32Bits2Float(uint32_t bits)58 static inline float int32Bits2Float(uint32_t bits) {
59   float f;
60   memcpy(&f, &bits, sizeof(float));
61   return f;
62 }
63 
64 float
__atof_engine(uint32_t m10,int e10)65 __atof_engine(uint32_t m10, int e10)
66 {
67 #ifdef RYU_DEBUG
68 	printf("m10digits = %ld\n", m10);
69 	printf("e10digits = %d\n", e10);
70 	printf("m10 * 10^e10 = %lu * 10^%d\n", m10, e10);
71 #endif
72 #define signedM 0
73 
74 	// Convert to binary float m2 * 2^e2, while retaining information about whether the conversion
75 	// was exact (trailingZeros).
76 	int32_t e2;
77 	uint32_t m2;
78 	bool trailingZeros;
79 	if (e10 >= 0) {
80 		// The length of m * 10^e in bits is:
81 		//   log2(m10 * 10^e10) = log2(m10) + e10 log2(10) = log2(m10) + e10 + e10 * log2(5)
82 		//
83 		// We want to compute the FLOAT_MANTISSA_BITS + 1 top-most bits (+1 for the implicit leading
84 		// one in IEEE format). We therefore choose a binary output exponent of
85 		//   log2(m10 * 10^e10) - (FLOAT_MANTISSA_BITS + 1).
86 		//
87 		// We use floor(log2(5^e10)) so that we get at least this many bits; better to
88 		// have an additional bit than to not have enough bits.
89 		e2 = floor_log2(m10) + e10 + log2pow5(e10) - (FLOAT_MANTISSA_BITS + 1);
90 
91 		// We now compute [m10 * 10^e10 / 2^e2] = [m10 * 5^e10 / 2^(e2-e10)].
92 		// To that end, we use the FLOAT_POW5_SPLIT table.
93 		int j = e2 - e10 - ceil_log2pow5(e10) + FLOAT_POW5_BITCOUNT;
94 		assert(j >= 0);
95 		m2 = mulPow5divPow2(m10, e10, j);
96 
97 		// We also compute if the result is exact, i.e.,
98 		//   [m10 * 10^e10 / 2^e2] == m10 * 10^e10 / 2^e2.
99 		// This can only be the case if 2^e2 divides m10 * 10^e10, which in turn requires that the
100 		// largest power of 2 that divides m10 + e10 is greater than e2. If e2 is less than e10, then
101 		// the result must be exact. Otherwise we use the existing multipleOfPowerOf2 function.
102 		trailingZeros = e2 < e10 || (e2 - e10 < 32 && multipleOfPowerOf2_32(m10, e2 - e10));
103 	} else {
104 		e2 = floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (FLOAT_MANTISSA_BITS + 1);
105 
106 		// We now compute [m10 * 10^e10 / 2^e2] = [m10 / (5^(-e10) 2^(e2-e10))].
107 		int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + FLOAT_POW5_INV_BITCOUNT;
108 		m2 = mulPow5InvDivPow2(m10, -e10, j);
109 
110 		// We also compute if the result is exact, i.e.,
111 		//   [m10 / (5^(-e10) 2^(e2-e10))] == m10 / (5^(-e10) 2^(e2-e10))
112 		//
113 		// If e2-e10 >= 0, we need to check whether (5^(-e10) 2^(e2-e10)) divides m10, which is the
114 		// case iff pow5(m10) >= -e10 AND pow2(m10) >= e2-e10.
115 		//
116 		// If e2-e10 < 0, we have actually computed [m10 * 2^(e10 e2) / 5^(-e10)] above,
117 		// and we need to check whether 5^(-e10) divides (m10 * 2^(e10-e2)), which is the case iff
118 		// pow5(m10 * 2^(e10-e2)) = pow5(m10) >= -e10.
119 		trailingZeros = (e2 < e10 || (e2 - e10 < 32 && multipleOfPowerOf2_32(m10, e2 - e10)))
120 			&& multipleOfPowerOf5_32(m10, -e10);
121 	}
122 
123 #ifdef RYU_DEBUG
124 	printf("m2 * 2^e2 = %lu * 2^%d\n", m2, e2);
125 #endif
126 
127 	// Compute the final IEEE exponent.
128 	uint32_t ieee_e2 = (uint32_t) max32(0, e2 + FLOAT_EXPONENT_BIAS + floor_log2(m2));
129 
130 	if (ieee_e2 > 0xfe) {
131 		// Final IEEE exponent is larger than the maximum representable; return +/-Infinity.
132                 uint32_t ieee = (((uint32_t) signedM) << (FLOAT_EXPONENT_BITS + FLOAT_MANTISSA_BITS)) | ((uint32_t) 0xffu << FLOAT_MANTISSA_BITS);
133 		return int32Bits2Float(ieee);
134 	}
135 
136 	// We need to figure out how much we need to shift m2. The tricky part is that we need to take
137 	// the final IEEE exponent into account, so we need to reverse the bias and also special-case
138 	// the value 0.
139 	int32_t shift = (ieee_e2 == 0 ? 1 : ieee_e2) - e2 - FLOAT_EXPONENT_BIAS - FLOAT_MANTISSA_BITS;
140 	assert(shift >= 0);
141 #ifdef RYU_DEBUG
142 	printf("ieee_e2 = %ld\n", ieee_e2);
143 	printf("shift = %ld\n", shift);
144 #endif
145 
146 	// We need to round up if the exact value is more than 0.5 above the value we computed. That's
147 	// equivalent to checking if the last removed bit was 1 and either the value was not just
148 	// trailing zeros or the result would otherwise be odd.
149 	//
150 	// We need to update trailingZeros given that we have the exact output exponent ieee_e2 now.
151 	trailingZeros &= (m2 & ((1u << (shift - 1)) - 1)) == 0;
152 	uint32_t lastRemovedBit = (m2 >> (shift - 1)) & 1;
153 	bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0));
154 
155 #ifdef RYU_DEBUG
156 	printf("roundUp = %d\n", roundUp);
157 	printf("ieee_m2 = %lu\n", (m2 >> shift) + roundUp);
158 #endif
159 	uint32_t ieee_m2 = (m2 >> shift) + roundUp;
160 	assert(ieee_m2 <= (1u << (FLOAT_MANTISSA_BITS + 1)));
161 	ieee_m2 &= ((uint32_t)1u << FLOAT_MANTISSA_BITS) - 1;
162 	if (ieee_m2 == 0 && roundUp) {
163 		// Rounding up may overflow the mantissa.
164 		// In this case we move a trailing zero of the mantissa into the exponent.
165 		// Due to how the IEEE represents +/-Infinity, we don't need to check for overflow here.
166 		ieee_e2++;
167 	}
168 	uint32_t ieee = (((((uint32_t) signedM) << FLOAT_EXPONENT_BITS) | (uint32_t)ieee_e2) << FLOAT_MANTISSA_BITS) | ieee_m2;
169 	return int32Bits2Float(ieee);
170 }
171