1 // Copyright 2018 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 //    (See accompanying file LICENSE-Boost or copy at
12 //     https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17 
18 // Runtime compiler options:
19 // -DRYU_DEBUG Generate verbose debugging output to stdout.
20 
21 #include "ryu/ryu.h"
22 
23 #include <stdbool.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <limits.h>
28 
29 #ifdef RYU_DEBUG
30 #include <stdio.h>
31 #endif
32 
33 #include "ryu/common.h"
34 #include "ryu/f2s_intrinsics.h"
35 #include "ryu/digit_table.h"
36 
37 #define FLOAT_MANTISSA_BITS 23
38 #define FLOAT_EXPONENT_BITS 8
39 #define FLOAT_BIAS 127
40 
41 // Returns the number of decimal digits in v, which must not contain more than 9 digits.
decimalLength9(const uint32_t v)42 static int decimalLength9(const uint32_t v) {
43 	int len = 1;
44 	uint32_t c = 10;
45 	while (c <= v) {
46 		len++;
47 		c = (c << 3) + (c << 1);
48 	}
49 	return len;
50 }
51 
52 // A floating decimal representing m * 10^e.
53 typedef struct floating_decimal_32 {
54 	uint32_t mantissa;
55 	// Decimal exponent's range is -45 to 38
56 	// inclusive, and can fit in a short if needed.
57 	int16_t exponent;
58 	int16_t olength;
59 } floating_decimal_32;
60 
61 static inline floating_decimal_32
f2d(const uint32_t ieeeMantissa,const uint32_t ieeeExponent,int max_digits,bool fmode,int max_decimals)62 f2d(const uint32_t ieeeMantissa, const uint32_t ieeeExponent, int max_digits, bool fmode, int max_decimals)
63 {
64 	int32_t e2;
65 	uint32_t m2;
66 	if (ieeeExponent == 0) {
67 		// We subtract 2 so that the bounds computation has 2 additional bits.
68 		e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
69 		m2 = ieeeMantissa;
70 	} else {
71 		e2 = (int32_t) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
72 		m2 = ((uint32_t)1u << FLOAT_MANTISSA_BITS) | ieeeMantissa;
73 	}
74 	const bool even = (m2 & 1) == 0;
75 	const bool acceptBounds = even;
76 	bool truncate_max = false;
77 
78 #ifdef RYU_DEBUG
79 	printf("-> %u * 2^%d\n", m2, e2 + 2);
80 #endif
81 
82 	// Step 2: Determine the interval of valid decimal representations.
83 	const uint32_t mv = 4 * m2;
84 	const uint32_t mp = 4 * m2 + 2;
85 	// Implicit bool -> int conversion. True is 1, false is 0.
86 	const uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
87 	const uint32_t mm = 4 * m2 - 1 - mmShift;
88 
89 	// Step 3: Convert to a decimal power base using 64-bit arithmetic.
90 	uint32_t vr, vp, vm;
91 	int32_t e10;
92 	bool vmIsTrailingZeros = false;
93 	bool vrIsTrailingZeros = false;
94 	uint8_t lastRemovedDigit = 0;
95 	if (e2 >= 0) {
96 		const uint32_t q = log10Pow2(e2);
97 		e10 = (int32_t) q;
98 		const int32_t k = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t) q) - 1;
99 		const int32_t i = -e2 + (int32_t) q + k;
100 		vr = mulPow5InvDivPow2(mv, q, i);
101 		vp = mulPow5InvDivPow2(mp, q, i);
102 		vm = mulPow5InvDivPow2(mm, q, i);
103 #ifdef RYU_DEBUG
104 		printf("%u * 2^%d / 10^%u\n", mv, e2, q);
105 		printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
106 #endif
107 		if (q != 0 && (vp - 1) / 10 <= vm / 10) {
108 			// We need to know one removed digit even if we are not going to loop below. We could use
109 			// q = X - 1 above, except that would require 33 bits for the result, and we've found that
110 			// 32-bit arithmetic is faster even on 64-bit machines.
111 			const int32_t l = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t) (q - 1)) - 1;
112 			lastRemovedDigit = (uint8_t) (mulPow5InvDivPow2(mv, q - 1, -e2 + (int32_t) q - 1 + l) % 10);
113 		}
114 		if (q <= 9) {
115 			// The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be safe as well.
116 			// Only one of mp, mv, and mm can be a multiple of 5, if any.
117 			if (mv % 5 == 0) {
118 				vrIsTrailingZeros = multipleOfPowerOf5_32(mv, q);
119 			} else if (acceptBounds) {
120 				vmIsTrailingZeros = multipleOfPowerOf5_32(mm, q);
121 			} else {
122 				vp -= multipleOfPowerOf5_32(mp, q);
123 			}
124 		}
125 	} else {
126 		const uint32_t q = log10Pow5(-e2);
127 		e10 = (int32_t) q + e2;
128 		const int32_t i = -e2 - (int32_t) q;
129 		const int32_t k = pow5bits(i) - FLOAT_POW5_BITCOUNT;
130 		int32_t j = (int32_t) q - k;
131 		vr = mulPow5divPow2(mv, (uint32_t) i, j);
132 		vp = mulPow5divPow2(mp, (uint32_t) i, j);
133 		vm = mulPow5divPow2(mm, (uint32_t) i, j);
134 #ifdef RYU_DEBUG
135 		printf("%u * 5^%d / 10^%u\n", mv, -e2, q);
136 		printf("%u %d %d %d\n", q, i, k, j);
137 		printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
138 #endif
139 		if (q != 0 && (vp - 1) / 10 <= vm / 10) {
140 			j = (int32_t) q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT);
141 			lastRemovedDigit = (uint8_t) (mulPow5divPow2(mv, (uint32_t) (i + 1), j) % 10);
142 		}
143 		if (q <= 1) {
144 			// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
145 			// mv = 4 * m2, so it always has at least two trailing 0 bits.
146 			vrIsTrailingZeros = true;
147 			if (acceptBounds) {
148 				// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
149 				vmIsTrailingZeros = mmShift == 1;
150 			} else {
151 				// mp = mv + 2, so it always has at least one trailing 0 bit.
152 				--vp;
153 			}
154 		} else if (q < 31) { // TODO(ulfjack): Use a tighter bound here.
155 			vrIsTrailingZeros = multipleOfPowerOf2_32(mv, q - 1);
156 #ifdef RYU_DEBUG
157 			printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
158 #endif
159 		}
160 	}
161 #ifdef RYU_DEBUG
162 	printf("e10=%d\n", e10);
163 	printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
164 	printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
165 	printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
166 #endif
167 
168 	// Step 4: Find the shortest decimal representation in the interval of valid representations.
169 	int32_t removed = 0;
170 	uint32_t output;
171 
172 	/* If limiting decimals, then limit the max digits
173 	 * to no more than the number of digits left of the decimal
174 	 * plus the number of digits right of the decimal
175 	 *
176 	 * exp:          exponent value. If negative, there are
177 	 *		 -exp - 1 zeros left of the first non-zero
178 	 *               digit in 'f' format. If non-negative,
179 	 *               there are exp digits to the left of
180 	 *               the decimal point
181 	 *
182 	 * max_decimals: Only used in 'f' format. Round to this many
183 	 *               digits to the right of the decimal point
184 	 *               (left if negative)
185 	 *
186 	 * max_digits:	 We can't convert more than this number of digits given
187 	 *               the limits of the buffer
188 	 */
189 
190 	int save_max_digits = max_digits;
191 	if(fmode) {
192 		int exp = e10 + decimalLength9(vr) - 1;
193 		/*
194 		 * This covers two cases:
195 		 *
196 		 * When exp is < 0, there are -exp-1 zeros taking up
197 		 * space before we can display any of the real digits,
198 		 * so we have to subtract those off max_decimals before
199 		 * we round that (max_decimals - (-exp - 1)). This
200 		 * may end up less than zero, in which case we have
201 		 * no digits to display.
202 		 *
203 		 * When exp >= 0, there are exp + 1 digits left of the
204 		 * decimal point *plus* max_decimals right of the
205 		 * decimal point that need to be generated
206 		 *
207 		 * A single expression gives the right answer in both
208 		 * cases, which is kinda cool
209 		 */
210 		max_digits = min_int(max_digits, max_int(1, max_decimals + exp + 1));
211 	}
212 
213 	for (;;) {
214 		if (vp / 10 <= vm / 10) {
215 			if (decimalLength9(vr) <= max_digits || (max_digits == 0 && vr == 0))
216 				break;
217 			else
218 				truncate_max = true;
219 		}
220 #ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=23106
221 		// The compiler does not realize that vm % 10 can be computed from vm / 10
222 		// as vm - (vm / 10) * 10.
223 		vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0;
224 #else
225 		vmIsTrailingZeros &= vm % 10 == 0;
226 #endif
227 		vrIsTrailingZeros &= lastRemovedDigit == 0;
228 		lastRemovedDigit = (uint8_t) (vr % 10);
229 		vr /= 10;
230 		vp /= 10;
231 		vm /= 10;
232 		++removed;
233 	}
234 #ifdef RYU_DEBUG
235 	printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
236 	printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
237 #endif
238 	if (vmIsTrailingZeros) {
239 		while (vm % 10 == 0) {
240 			vrIsTrailingZeros &= lastRemovedDigit == 0;
241 			lastRemovedDigit = (uint8_t) (vr % 10);
242 			vr /= 10;
243 			vp /= 10;
244 			vm /= 10;
245 			++removed;
246 		}
247 	}
248 #ifdef RYU_DEBUG
249 	printf("%u %d\n", vr, lastRemovedDigit);
250 	printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
251 #endif
252 	if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
253 		// Round even if the exact number is .....50..0.
254 		lastRemovedDigit = 4;
255 	}
256 	// We need to take vr + 1 if vr is outside bounds or we need to round up.
257 	output = vr;
258 	e10 += removed;
259 
260 	uint8_t carry = ((!truncate_max && vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
261 	output += carry;
262 
263 	int len = decimalLength9(output);
264 
265 	if (carry) {
266 		/* This can only happen if output has carried out of the top digit */
267 		if (len > max_digits) {
268 
269 			/* Recompute max digits in this case */
270                         if(fmode) {
271 				int exp = e10 + len - 1;
272 				max_digits = min_int(save_max_digits, max_int(1, max_decimals + exp + 1));
273 			}
274 
275 			if (len > max_digits) {
276 				output += 5;
277 				output /= 10;
278 				e10++;
279 				len--;
280 			}
281 		}
282 	}
283 	if (len > max_digits)
284 		len = max_digits;
285 
286 
287 #ifdef RYU_DEBUG
288 	printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
289 	printf("O=%u\n", output);
290 	printf("EXP=%d\n", exp);
291 #endif
292 
293 	floating_decimal_32 fd;
294 	fd.exponent = e10;
295 	fd.olength = len;
296 	fd.mantissa = output;
297 	return fd;
298 }
299 
300 #include "ftoa_engine.h"
301 
302 int
__ftoa_engine(float x,struct ftoa * ftoa,int max_digits,bool fmode,int max_decimals)303 __ftoa_engine(float x, struct ftoa *ftoa, int max_digits, bool fmode, int max_decimals)
304 {
305 	// Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
306 	const uint32_t bits = float_to_bits(x);
307 
308 	// Decode bits into sign, mantissa, and exponent.
309 	const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
310 	const uint64_t ieeeMantissa = bits & ((1ull << FLOAT_MANTISSA_BITS) - 1);
311 	const uint32_t ieeeExponent = (uint32_t) ((bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1));
312 
313 	uint8_t	flags = 0;
314 
315 	if (ieeeSign)
316 		flags |= FTOA_MINUS;
317 
318 	if (ieeeExponent == 0 && ieeeMantissa == 0) {
319 		flags |= FTOA_ZERO;
320 		ftoa->digits[0] = '0';
321 		ftoa->digits[1] = '\0';
322 		ftoa->flags = flags;
323 		ftoa->exp = 0;
324 		return 1;
325 	}
326 	if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u)) {
327 		if (ieeeMantissa) {
328 			flags |= FTOA_NAN;
329 		} else {
330 			flags |= FTOA_INF;
331 		}
332 		ftoa->flags = flags;
333 		return 0;
334 	}
335 
336 	floating_decimal_32 v;
337 
338 	v = f2d(ieeeMantissa, ieeeExponent, max_digits, fmode, max_decimals);
339 
340 	uint32_t mant = v.mantissa;
341 	int32_t olength = v.olength;
342 	int32_t exp = v.exponent + olength - 1;
343 
344 	int i;
345 
346 	for (i = 0; i < olength; i++) {
347 		ftoa->digits[olength - i - 1] = (mant % 10) + '0';
348 		mant /= 10;
349 	}
350 	ftoa->digits[olength] = '\0';
351 
352 	ftoa->exp = exp;
353 	ftoa->flags = flags;
354 	return olength;
355 }
356