1 // Copyright 2018 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 // (See accompanying file LICENSE-Apache or copy at
7 // http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 // (See accompanying file LICENSE-Boost or copy at
12 // https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17
18 // Runtime compiler options:
19 // -DRYU_DEBUG Generate verbose debugging output to stdout.
20
21 #include "ryu/ryu.h"
22
23 #include <stdbool.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <limits.h>
28
29 #ifdef RYU_DEBUG
30 #include <stdio.h>
31 #endif
32
33 #include "ryu/common.h"
34 #include "ryu/f2s_intrinsics.h"
35 #include "ryu/digit_table.h"
36
37 #define FLOAT_MANTISSA_BITS 23
38 #define FLOAT_EXPONENT_BITS 8
39 #define FLOAT_BIAS 127
40
41 // Returns the number of decimal digits in v, which must not contain more than 9 digits.
decimalLength9(const uint32_t v)42 static int decimalLength9(const uint32_t v) {
43 int len = 1;
44 uint32_t c = 10;
45 while (c <= v) {
46 len++;
47 c = (c << 3) + (c << 1);
48 }
49 return len;
50 }
51
52 // A floating decimal representing m * 10^e.
53 typedef struct floating_decimal_32 {
54 uint32_t mantissa;
55 // Decimal exponent's range is -45 to 38
56 // inclusive, and can fit in a short if needed.
57 int16_t exponent;
58 int16_t olength;
59 } floating_decimal_32;
60
61 static inline floating_decimal_32
f2d(const uint32_t ieeeMantissa,const uint32_t ieeeExponent,int max_digits,bool fmode,int max_decimals)62 f2d(const uint32_t ieeeMantissa, const uint32_t ieeeExponent, int max_digits, bool fmode, int max_decimals)
63 {
64 int32_t e2;
65 uint32_t m2;
66 if (ieeeExponent == 0) {
67 // We subtract 2 so that the bounds computation has 2 additional bits.
68 e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
69 m2 = ieeeMantissa;
70 } else {
71 e2 = (int32_t) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
72 m2 = ((uint32_t)1u << FLOAT_MANTISSA_BITS) | ieeeMantissa;
73 }
74 const bool even = (m2 & 1) == 0;
75 const bool acceptBounds = even;
76 bool truncate_max = false;
77
78 #ifdef RYU_DEBUG
79 printf("-> %u * 2^%d\n", m2, e2 + 2);
80 #endif
81
82 // Step 2: Determine the interval of valid decimal representations.
83 const uint32_t mv = 4 * m2;
84 const uint32_t mp = 4 * m2 + 2;
85 // Implicit bool -> int conversion. True is 1, false is 0.
86 const uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
87 const uint32_t mm = 4 * m2 - 1 - mmShift;
88
89 // Step 3: Convert to a decimal power base using 64-bit arithmetic.
90 uint32_t vr, vp, vm;
91 int32_t e10;
92 bool vmIsTrailingZeros = false;
93 bool vrIsTrailingZeros = false;
94 uint8_t lastRemovedDigit = 0;
95 if (e2 >= 0) {
96 const uint32_t q = log10Pow2(e2);
97 e10 = (int32_t) q;
98 const int32_t k = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t) q) - 1;
99 const int32_t i = -e2 + (int32_t) q + k;
100 vr = mulPow5InvDivPow2(mv, q, i);
101 vp = mulPow5InvDivPow2(mp, q, i);
102 vm = mulPow5InvDivPow2(mm, q, i);
103 #ifdef RYU_DEBUG
104 printf("%u * 2^%d / 10^%u\n", mv, e2, q);
105 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
106 #endif
107 if (q != 0 && (vp - 1) / 10 <= vm / 10) {
108 // We need to know one removed digit even if we are not going to loop below. We could use
109 // q = X - 1 above, except that would require 33 bits for the result, and we've found that
110 // 32-bit arithmetic is faster even on 64-bit machines.
111 const int32_t l = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t) (q - 1)) - 1;
112 lastRemovedDigit = (uint8_t) (mulPow5InvDivPow2(mv, q - 1, -e2 + (int32_t) q - 1 + l) % 10);
113 }
114 if (q <= 9) {
115 // The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be safe as well.
116 // Only one of mp, mv, and mm can be a multiple of 5, if any.
117 if (mv % 5 == 0) {
118 vrIsTrailingZeros = multipleOfPowerOf5_32(mv, q);
119 } else if (acceptBounds) {
120 vmIsTrailingZeros = multipleOfPowerOf5_32(mm, q);
121 } else {
122 vp -= multipleOfPowerOf5_32(mp, q);
123 }
124 }
125 } else {
126 const uint32_t q = log10Pow5(-e2);
127 e10 = (int32_t) q + e2;
128 const int32_t i = -e2 - (int32_t) q;
129 const int32_t k = pow5bits(i) - FLOAT_POW5_BITCOUNT;
130 int32_t j = (int32_t) q - k;
131 vr = mulPow5divPow2(mv, (uint32_t) i, j);
132 vp = mulPow5divPow2(mp, (uint32_t) i, j);
133 vm = mulPow5divPow2(mm, (uint32_t) i, j);
134 #ifdef RYU_DEBUG
135 printf("%u * 5^%d / 10^%u\n", mv, -e2, q);
136 printf("%u %d %d %d\n", q, i, k, j);
137 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
138 #endif
139 if (q != 0 && (vp - 1) / 10 <= vm / 10) {
140 j = (int32_t) q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT);
141 lastRemovedDigit = (uint8_t) (mulPow5divPow2(mv, (uint32_t) (i + 1), j) % 10);
142 }
143 if (q <= 1) {
144 // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
145 // mv = 4 * m2, so it always has at least two trailing 0 bits.
146 vrIsTrailingZeros = true;
147 if (acceptBounds) {
148 // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
149 vmIsTrailingZeros = mmShift == 1;
150 } else {
151 // mp = mv + 2, so it always has at least one trailing 0 bit.
152 --vp;
153 }
154 } else if (q < 31) { // TODO(ulfjack): Use a tighter bound here.
155 vrIsTrailingZeros = multipleOfPowerOf2_32(mv, q - 1);
156 #ifdef RYU_DEBUG
157 printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
158 #endif
159 }
160 }
161 #ifdef RYU_DEBUG
162 printf("e10=%d\n", e10);
163 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
164 printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
165 printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
166 #endif
167
168 // Step 4: Find the shortest decimal representation in the interval of valid representations.
169 int32_t removed = 0;
170 uint32_t output;
171
172 /* If limiting decimals, then limit the max digits
173 * to no more than the number of digits left of the decimal
174 * plus the number of digits right of the decimal
175 *
176 * exp: exponent value. If negative, there are
177 * -exp - 1 zeros left of the first non-zero
178 * digit in 'f' format. If non-negative,
179 * there are exp digits to the left of
180 * the decimal point
181 *
182 * max_decimals: Only used in 'f' format. Round to this many
183 * digits to the right of the decimal point
184 * (left if negative)
185 *
186 * max_digits: We can't convert more than this number of digits given
187 * the limits of the buffer
188 */
189
190 int save_max_digits = max_digits;
191 if(fmode) {
192 int exp = e10 + decimalLength9(vr) - 1;
193 /*
194 * This covers two cases:
195 *
196 * When exp is < 0, there are -exp-1 zeros taking up
197 * space before we can display any of the real digits,
198 * so we have to subtract those off max_decimals before
199 * we round that (max_decimals - (-exp - 1)). This
200 * may end up less than zero, in which case we have
201 * no digits to display.
202 *
203 * When exp >= 0, there are exp + 1 digits left of the
204 * decimal point *plus* max_decimals right of the
205 * decimal point that need to be generated
206 *
207 * A single expression gives the right answer in both
208 * cases, which is kinda cool
209 */
210 max_digits = min_int(max_digits, max_int(1, max_decimals + exp + 1));
211 }
212
213 for (;;) {
214 if (vp / 10 <= vm / 10) {
215 if (decimalLength9(vr) <= max_digits || (max_digits == 0 && vr == 0))
216 break;
217 else
218 truncate_max = true;
219 }
220 #ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=23106
221 // The compiler does not realize that vm % 10 can be computed from vm / 10
222 // as vm - (vm / 10) * 10.
223 vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0;
224 #else
225 vmIsTrailingZeros &= vm % 10 == 0;
226 #endif
227 vrIsTrailingZeros &= lastRemovedDigit == 0;
228 lastRemovedDigit = (uint8_t) (vr % 10);
229 vr /= 10;
230 vp /= 10;
231 vm /= 10;
232 ++removed;
233 }
234 #ifdef RYU_DEBUG
235 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
236 printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
237 #endif
238 if (vmIsTrailingZeros) {
239 while (vm % 10 == 0) {
240 vrIsTrailingZeros &= lastRemovedDigit == 0;
241 lastRemovedDigit = (uint8_t) (vr % 10);
242 vr /= 10;
243 vp /= 10;
244 vm /= 10;
245 ++removed;
246 }
247 }
248 #ifdef RYU_DEBUG
249 printf("%u %d\n", vr, lastRemovedDigit);
250 printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
251 #endif
252 if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
253 // Round even if the exact number is .....50..0.
254 lastRemovedDigit = 4;
255 }
256 // We need to take vr + 1 if vr is outside bounds or we need to round up.
257 output = vr;
258 e10 += removed;
259
260 uint8_t carry = ((!truncate_max && vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
261 output += carry;
262
263 int len = decimalLength9(output);
264
265 if (carry) {
266 /* This can only happen if output has carried out of the top digit */
267 if (len > max_digits) {
268
269 /* Recompute max digits in this case */
270 if(fmode) {
271 int exp = e10 + len - 1;
272 max_digits = min_int(save_max_digits, max_int(1, max_decimals + exp + 1));
273 }
274
275 if (len > max_digits) {
276 output += 5;
277 output /= 10;
278 e10++;
279 len--;
280 }
281 }
282 }
283 if (len > max_digits)
284 len = max_digits;
285
286
287 #ifdef RYU_DEBUG
288 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm);
289 printf("O=%u\n", output);
290 printf("EXP=%d\n", exp);
291 #endif
292
293 floating_decimal_32 fd;
294 fd.exponent = e10;
295 fd.olength = len;
296 fd.mantissa = output;
297 return fd;
298 }
299
300 #include "ftoa_engine.h"
301
302 int
__ftoa_engine(float x,struct ftoa * ftoa,int max_digits,bool fmode,int max_decimals)303 __ftoa_engine(float x, struct ftoa *ftoa, int max_digits, bool fmode, int max_decimals)
304 {
305 // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
306 const uint32_t bits = float_to_bits(x);
307
308 // Decode bits into sign, mantissa, and exponent.
309 const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
310 const uint64_t ieeeMantissa = bits & ((1ull << FLOAT_MANTISSA_BITS) - 1);
311 const uint32_t ieeeExponent = (uint32_t) ((bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1));
312
313 uint8_t flags = 0;
314
315 if (ieeeSign)
316 flags |= FTOA_MINUS;
317
318 if (ieeeExponent == 0 && ieeeMantissa == 0) {
319 flags |= FTOA_ZERO;
320 ftoa->digits[0] = '0';
321 ftoa->digits[1] = '\0';
322 ftoa->flags = flags;
323 ftoa->exp = 0;
324 return 1;
325 }
326 if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u)) {
327 if (ieeeMantissa) {
328 flags |= FTOA_NAN;
329 } else {
330 flags |= FTOA_INF;
331 }
332 ftoa->flags = flags;
333 return 0;
334 }
335
336 floating_decimal_32 v;
337
338 v = f2d(ieeeMantissa, ieeeExponent, max_digits, fmode, max_decimals);
339
340 uint32_t mant = v.mantissa;
341 int32_t olength = v.olength;
342 int32_t exp = v.exponent + olength - 1;
343
344 int i;
345
346 for (i = 0; i < olength; i++) {
347 ftoa->digits[olength - i - 1] = (mant % 10) + '0';
348 mant /= 10;
349 }
350 ftoa->digits[olength] = '\0';
351
352 ftoa->exp = exp;
353 ftoa->flags = flags;
354 return olength;
355 }
356