1 // Copyright 2018 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 //    (See accompanying file LICENSE-Boost or copy at
12 //     https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17 
18 // Runtime compiler options:
19 // -DRYU_DEBUG Generate verbose debugging output to stdout.
20 //
21 // -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,
22 //     depending on your compiler.
23 //
24 
25 #include "ryu/ryu.h"
26 
27 #include <stdbool.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #ifdef RYU_DEBUG
33 #include <inttypes.h>
34 #include <stdio.h>
35 #endif
36 
37 #include "ryu/common.h"
38 #include "ryu/d2s_intrinsics.h"
39 
40 #define DOUBLE_MANTISSA_BITS 52
41 #define DOUBLE_EXPONENT_BITS 11
42 #define DOUBLE_BIAS 1023
43 
decimalLength17(const uint64_t v)44 static int decimalLength17(const uint64_t v) {
45 	int len = 1;
46 	uint64_t c = 10;
47 	while (c <= v) {
48 		len++;
49 		c = (c << 3) + (c << 1);
50 	}
51 	return len;
52 }
53 
54 // A floating decimal representing m * 10^e.
55 typedef struct floating_decimal_64 {
56 	uint64_t mantissa;
57 	// Decimal exponent's range is -324 to 308
58 	// inclusive, and can fit in a short if needed.
59 	int16_t exponent;
60 	int16_t olength;
61 } floating_decimal_64;
62 
63 static inline floating_decimal_64
d2d(const uint64_t ieeeMantissa,const uint32_t ieeeExponent,int max_digits,bool fmode,int max_decimals)64 d2d(const uint64_t ieeeMantissa, const uint32_t ieeeExponent, int max_digits, bool fmode, int max_decimals)
65 {
66 	int32_t e2;
67 	uint64_t m2;
68 	if (ieeeExponent == 0) {
69 		// We subtract 2 so that the bounds computation has 2 additional bits.
70 		e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
71 		m2 = ieeeMantissa;
72 	} else {
73 		e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
74 		m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
75 	}
76 	const bool even = (m2 & 1) == 0;
77 	const bool acceptBounds = even;
78 	/* Set if we're truncating more digits to meet format request */
79 	bool truncate_max = false;
80 
81 #ifdef RYU_DEBUG
82 	printf("-> %" PRIu64 " * 2^%d\n", m2, e2 + 2);
83 #endif
84 
85 	// Step 2: Determine the interval of valid decimal representations.
86 	const uint64_t mv = 4 * m2;
87 	// Implicit bool -> int conversion. True is 1, false is 0.
88 	const uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
89 	// We would compute mp and mm like this:
90 	// uint64_t mp = 4 * m2 + 2;
91 	// uint64_t mm = mv - 1 - mmShift;
92 
93 	// Step 3: Convert to a decimal power base using 128-bit arithmetic.
94 	uint64_t vr, vp, vm;
95 	int32_t e10;
96 	bool vmIsTrailingZeros = false;
97 	bool vrIsTrailingZeros = false;
98 	if (e2 >= 0) {
99 		// I tried special-casing q == 0, but there was no effect on performance.
100 		// This expression is slightly faster than max_int(0, log10Pow2(e2) - 1).
101 		const uint32_t q = log10Pow2(e2) - (e2 > 3);
102 		e10 = (int32_t) q;
103 		const int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t) q) - 1;
104 		const int32_t i = -e2 + (int32_t) q + k;
105 		uint64_t pow5[2];
106 		__double_computeInvPow5(q, pow5);
107 		vr = mulShiftAll64(m2, pow5, i, &vp, &vm, mmShift);
108 #ifdef RYU_DEBUG
109 		printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);
110 		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
111 #endif
112 		if (q <= 21) {
113 			// This should use q <= 22, but I think 21 is also safe. Smaller values
114 			// may still be safe, but it's more difficult to reason about them.
115 			// Only one of mp, mv, and mm can be a multiple of 5, if any.
116 			const uint32_t mvMod5 = ((uint32_t) mv) - 5 * ((uint32_t) div5(mv));
117 			if (mvMod5 == 0) {
118 				vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
119 			} else if (acceptBounds) {
120 				// Same as min_int(e2 + (~mm & 1), pow5Factor(mm)) >= q
121 				// <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
122 				// <=> true && pow5Factor(mm) >= q, since e2 >= q.
123 				vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
124 			} else {
125 				// Same as min_int(e2 + 1, pow5Factor(mp)) >= q.
126 				vp -= multipleOfPowerOf5(mv + 2, q);
127 			}
128 		}
129 	} else {
130 		// This expression is slightly faster than max_int(0, log10Pow5(-e2) - 1).
131 		const uint32_t q = log10Pow5(-e2) - (-e2 > 1);
132 		e10 = (int32_t) q + e2;
133 		const int32_t i = -e2 - (int32_t) q;
134 		const int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
135 		const int32_t j = (int32_t) q - k;
136 		uint64_t pow5[2];
137 		__double_computePow5(i, pow5);
138 		vr = mulShiftAll64(m2, pow5, j, &vp, &vm, mmShift);
139 
140 #ifdef RYU_DEBUG
141 		printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);
142 		printf("%u %d %d %d\n", q, i, k, j);
143 		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
144 #endif
145 		if (q <= 1) {
146 			// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
147 			// mv = 4 * m2, so it always has at least two trailing 0 bits.
148 			vrIsTrailingZeros = true;
149 			if (acceptBounds) {
150 				// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
151 				vmIsTrailingZeros = mmShift == 1;
152 			} else {
153 				// mp = mv + 2, so it always has at least one trailing 0 bit.
154 				--vp;
155 			}
156 		} else if (q < 63) { // TODO(ulfjack): Use a tighter bound here.
157 			// We want to know if the full product has at least q trailing zeros.
158 			// We need to compute min_int(p2(mv), p5(mv) - e2) >= q
159 			// <=> p2(mv) >= q && p5(mv) - e2 >= q
160 			// <=> p2(mv) >= q (because -e2 >= q)
161 			vrIsTrailingZeros = multipleOfPowerOf2(mv, q);
162 #ifdef RYU_DEBUG
163 			printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
164 #endif
165 		}
166 	}
167 #ifdef RYU_DEBUG
168 	printf("e10=%d\n", e10);
169 	printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
170 	printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
171 	printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
172 #endif
173 
174 	// Step 4: Find the shortest decimal representation in the interval of valid representations.
175 	int32_t removed = 0;
176 	uint8_t lastRemovedDigit = 0;
177 	uint64_t output;
178 	// On average, we remove ~2 digits.
179 	// General case, which happens rarely (~0.7%).
180 
181 	/* If limiting decimals, then limit the max digits
182 	 * to no more than the number of digits left of the decimal
183 	 * plus the number of digits right of the decimal
184 	 *
185 	 * exp:          exponent value. If negative, there are
186 	 *		 -exp - 1 zeros left of the first non-zero
187 	 *               digit in 'f' format. If non-negative,
188 	 *               there are exp digits to the left of
189 	 *               the decimal point
190 	 *
191 	 * max_decimals: Only used in 'f' format. Round to this many
192 	 *               digits to the right of the decimal point
193 	 *               (left if negative)
194 	 *
195 	 * max_digits:	 We can't convert more than this number of digits given
196 	 *               the limits of the buffer
197 	 */
198 
199 	int save_max_digits = max_digits;
200 	if(fmode) {
201 		int exp = e10 + decimalLength17(vr) - 1;
202 		/*
203 		 * This covers two cases:
204 		 *
205 		 * When exp is < 0, there are -exp-1 zeros taking up
206 		 * space before we can display any of the real digits,
207 		 * so we have to subtract those off max_decimals before
208 		 * we round that (max_decimals - (-exp - 1)). This
209 		 * may end up less than zero, in which case we have
210 		 * no digits to display.
211 		 *
212 		 * When exp >= 0, there are exp + 1 digits left of the
213 		 * decimal point *plus* max_decimals right of the
214 		 * decimal point that need to be generated
215 		 *
216 		 * A single expression gives the right answer in both
217 		 * cases, which is kinda cool
218 		 */
219 		max_digits = min_int(max_digits, max_int(1, max_decimals + exp + 1));
220 	}
221 
222 	for (;;) {
223 		const uint64_t vpDiv10 = div10(vp);
224 		const uint64_t vmDiv10 = div10(vm);
225 		if (vpDiv10 <= vmDiv10) {
226 			if (decimalLength17(vr) <= max_digits || (max_digits == 0 && vr == 0))
227 				break;
228 			else
229 				truncate_max = true;
230 		}
231 		const uint32_t vmMod10 = ((uint32_t) vm) - 10 * ((uint32_t) vmDiv10);
232 		const uint64_t vrDiv10 = div10(vr);
233 		const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
234 		vmIsTrailingZeros &= vmMod10 == 0;
235 		vrIsTrailingZeros &= lastRemovedDigit == 0;
236 		lastRemovedDigit = (uint8_t) vrMod10;
237 		vr = vrDiv10;
238 		vp = vpDiv10;
239 		vm = vmDiv10;
240 		++removed;
241 	}
242 #ifdef RYU_DEBUG
243 	printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
244 	printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
245 #endif
246 	if (vmIsTrailingZeros) {
247 		for (;;) {
248 			const uint64_t vmDiv10 = div10(vm);
249 			const uint32_t vmMod10 = ((uint32_t) vm) - 10 * ((uint32_t) vmDiv10);
250 			if (vmMod10 != 0) {
251 				break;
252 			}
253 			const uint64_t vpDiv10 = div10(vp);
254 			const uint64_t vrDiv10 = div10(vr);
255 			const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
256 			vrIsTrailingZeros &= lastRemovedDigit == 0;
257 			lastRemovedDigit = (uint8_t) vrMod10;
258 			vr = vrDiv10;
259 			vp = vpDiv10;
260 			vm = vmDiv10;
261 			++removed;
262 		}
263 	}
264 #ifdef RYU_DEBUG
265 	printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);
266 	printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
267 #endif
268 	if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
269 		// Round even if the exact number is .....50..0.
270 		lastRemovedDigit = 4;
271 	}
272 
273 	output = vr;
274 	e10 += removed;
275 
276 	// We need to take vr + 1 if vr is outside bounds or we need to round up.
277 	// I don't know if the 'truncate_max' case is entirely correct; need some tests
278 	uint8_t carry = ((!truncate_max && vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
279 	output += carry;
280 
281 	int len = decimalLength17(output);
282 
283 	if (carry) {
284 		/* This can only happen if output has carried out of the top digit */
285 		if (len > max_digits) {
286 
287 			/* Recompute max digits in this case */
288                         if(fmode) {
289 				int exp = e10 + len - 1;
290 				/* max_decimals comes in biased by 1 to flag the 'f' case */
291 				max_digits = min_int(save_max_digits, max_int(0, max_decimals + exp + 1));
292 			}
293 
294 			if (len > max_digits) {
295 				output += 5;
296 				output /= 10;
297 				e10++;
298 				len--;
299 			}
300 		}
301 	}
302 	if (len > max_digits)
303 		len = max_digits;
304 
305 #ifdef RYU_DEBUG
306 	printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
307 	printf("O=%" PRIu64 "\n", output);
308 	printf("EXP=%d\n", exp);
309 #endif
310 
311 	floating_decimal_64 fd;
312 	fd.exponent = e10;
313 	fd.olength = len;
314 	fd.mantissa = output;
315 	return fd;
316 }
317 
d2d_small_int(const uint64_t ieeeMantissa,const uint32_t ieeeExponent,floating_decimal_64 * const v)318 static inline bool d2d_small_int(const uint64_t ieeeMantissa, const uint32_t ieeeExponent,
319 				 floating_decimal_64* const v) {
320 	const uint64_t m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
321 	const int32_t e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
322 
323 	if (e2 > 0) {
324 		// f = m2 * 2^e2 >= 2^53 is an integer.
325 		// Ignore this case for now.
326 		return false;
327 	}
328 
329 	if (e2 < -52) {
330 		// f < 1.
331 		return false;
332 	}
333 
334 	// Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: 1 <= f = m2 / 2^-e2 < 2^53.
335 	// Test if the lower -e2 bits of the significand are 0, i.e. whether the fraction is 0.
336 	const uint64_t mask = (1ull << -e2) - 1;
337 	const uint64_t fraction = m2 & mask;
338 	if (fraction != 0) {
339 		return false;
340 	}
341 
342 	// f is an integer in the range [1, 2^53).
343 	// Note: mantissa might contain trailing (decimal) 0's.
344 	// Note: since 2^53 < 10^16, there is no need to adjust decimalLength17().
345 	v->mantissa = m2 >> -e2;
346 	v->exponent = 0;
347 	return true;
348 }
349 
350 #include "dtoa_engine.h"
351 
352 int
__dtoa_engine(double x,struct dtoa * dtoa,int max_digits,bool fmode,int max_decimals)353 __dtoa_engine(double x, struct dtoa *dtoa, int max_digits, bool fmode, int max_decimals)
354 {
355 	// Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
356 	const uint64_t bits = double_to_bits(x);
357 
358 #ifdef RYU_DEBUG
359 	printf("IN=");
360 	for (int32_t bit = 63; bit >= 0; --bit) {
361 		printf("%d", (int) ((bits >> bit) & 1));
362 	}
363 	printf("\n");
364 #endif
365 
366 	// Decode bits into sign, mantissa, and exponent.
367 	const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
368 	const uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);
369 	const uint32_t ieeeExponent = (uint32_t) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
370 
371 	uint8_t	flags = 0;
372 
373 	if (ieeeSign)
374 		flags |= DTOA_MINUS;
375 	if (ieeeExponent == 0 && ieeeMantissa == 0) {
376 		flags |= DTOA_ZERO;
377 		dtoa->digits[0] = '0';
378 		dtoa->digits[1] = '\0';
379 		dtoa->flags = flags;
380 		dtoa->exp = 0;
381 		return 1;
382 	}
383 	if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) {
384 		if (ieeeMantissa) {
385 			flags |= DTOA_NAN;
386 		} else {
387 			flags |= DTOA_INF;
388 		}
389 		dtoa->flags = flags;
390 		return 0;
391 	}
392 
393 	floating_decimal_64 v;
394 	const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);
395 	if (isSmallInt && 0) {
396 		// For small integers in the range [1, 2^53), v.mantissa might contain trailing (decimal) zeros.
397 		// For scientific notation we need to move these zeros into the exponent.
398 		// (This is not needed for fixed-point notation, so it might be beneficial to trim
399 		// trailing zeros in to_chars only if needed - once fixed-point notation output is implemented.)
400 		for (;;) {
401 			const uint64_t q = div10(v.mantissa);
402 			const uint32_t r = ((uint32_t) v.mantissa) - 10 * ((uint32_t) q);
403 			if (r != 0) {
404 				break;
405 			}
406 			v.mantissa = q;
407 			++v.exponent;
408 		}
409 	} else {
410 		v = d2d(ieeeMantissa, ieeeExponent, max_digits, fmode, max_decimals);
411 	}
412 
413 	uint64_t mant = v.mantissa;
414 	int32_t olength = v.olength;
415 	int32_t exp = v.exponent + olength - 1;
416 
417 	int i;
418 
419 	for (i = 0; i < olength; i++) {
420 		dtoa->digits[olength - i - 1] = (mant % 10) + '0';
421 		mant /= 10;
422 	}
423 	dtoa->digits[olength] = '\0';
424 
425 	dtoa->exp = exp;
426 	dtoa->flags = flags;
427 	return olength;
428 }
429