1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 #include "clzll.h"
24 #include <limits>
25 
26 #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
27 #include <intrin.h>
28 #if !defined(_ARM64EC_)
29 #pragma intrinsic(_umul128)
30 #else
31 #pragma comment(lib,"softintrin")
32 #endif
33 #endif
34 
35 RAPIDJSON_NAMESPACE_BEGIN
36 namespace internal {
37 
38 #ifdef __GNUC__
39 RAPIDJSON_DIAG_PUSH
40 RAPIDJSON_DIAG_OFF(effc++)
41 #endif
42 
43 #ifdef __clang__
44 RAPIDJSON_DIAG_PUSH
45 RAPIDJSON_DIAG_OFF(padded)
46 #endif
47 
48 struct DiyFp {
DiyFpDiyFp49     DiyFp() : f(), e() {}
50 
DiyFpDiyFp51     DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
52 
DiyFpDiyFp53     explicit DiyFp(double d) {
54         union {
55             double d;
56             uint64_t u64;
57         } u = { d };
58 
59         int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
60         uint64_t significand = (u.u64 & kDpSignificandMask);
61         if (biased_e != 0) {
62             f = significand + kDpHiddenBit;
63             e = biased_e - kDpExponentBias;
64         }
65         else {
66             f = significand;
67             e = kDpMinExponent + 1;
68         }
69     }
70 
71     DiyFp operator-(const DiyFp& rhs) const {
72         return DiyFp(f - rhs.f, e);
73     }
74 
75     DiyFp operator*(const DiyFp& rhs) const {
76 #if defined(_MSC_VER) && defined(_M_AMD64)
77         uint64_t h;
78         uint64_t l = _umul128(f, rhs.f, &h);
79         if (l & (uint64_t(1) << 63)) // rounding
80             h++;
81         return DiyFp(h, e + rhs.e + 64);
82 #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
83         __extension__ typedef unsigned __int128 uint128;
84         uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
85         uint64_t h = static_cast<uint64_t>(p >> 64);
86         uint64_t l = static_cast<uint64_t>(p);
87         if (l & (uint64_t(1) << 63)) // rounding
88             h++;
89         return DiyFp(h, e + rhs.e + 64);
90 #else
91         const uint64_t M32 = 0xFFFFFFFF;
92         const uint64_t a = f >> 32;
93         const uint64_t b = f & M32;
94         const uint64_t c = rhs.f >> 32;
95         const uint64_t d = rhs.f & M32;
96         const uint64_t ac = a * c;
97         const uint64_t bc = b * c;
98         const uint64_t ad = a * d;
99         const uint64_t bd = b * d;
100         uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
101         tmp += 1U << 31;  /// mult_round
102         return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
103 #endif
104     }
105 
NormalizeDiyFp106     DiyFp Normalize() const {
107         int s = static_cast<int>(clzll(f));
108         return DiyFp(f << s, e - s);
109     }
110 
NormalizeBoundaryDiyFp111     DiyFp NormalizeBoundary() const {
112         DiyFp res = *this;
113         while (!(res.f & (kDpHiddenBit << 1))) {
114             res.f <<= 1;
115             res.e--;
116         }
117         res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
118         res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
119         return res;
120     }
121 
NormalizedBoundariesDiyFp122     void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
123         DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
124         DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
125         mi.f <<= mi.e - pl.e;
126         mi.e = pl.e;
127         *plus = pl;
128         *minus = mi;
129     }
130 
ToDoubleDiyFp131     double ToDouble() const {
132         union {
133             double d;
134             uint64_t u64;
135         }u;
136         RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
137         if (e < kDpDenormalExponent) {
138             // Underflow.
139             return 0.0;
140         }
141         if (e >= kDpMaxExponent) {
142             // Overflow.
143             return std::numeric_limits<double>::infinity();
144         }
145         const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
146             static_cast<uint64_t>(e + kDpExponentBias);
147         u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
148         return u.d;
149     }
150 
151     static const int kDiySignificandSize = 64;
152     static const int kDpSignificandSize = 52;
153     static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
154     static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
155     static const int kDpMinExponent = -kDpExponentBias;
156     static const int kDpDenormalExponent = -kDpExponentBias + 1;
157     static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
158     static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
159     static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
160 
161     uint64_t f;
162     int e;
163 };
164 
GetCachedPowerByIndex(size_t index)165 inline DiyFp GetCachedPowerByIndex(size_t index) {
166     // 10^-348, 10^-340, ..., 10^340
167     static const uint64_t kCachedPowers_F[] = {
168         RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
169         RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
170         RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
171         RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
172         RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
173         RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
174         RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
175         RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
176         RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
177         RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
178         RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
179         RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
180         RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
181         RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
182         RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
183         RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
184         RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
185         RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
186         RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
187         RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
188         RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
189         RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
190         RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
191         RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
192         RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
193         RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
194         RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
195         RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
196         RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
197         RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
198         RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
199         RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
200         RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
201         RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
202         RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
203         RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
204         RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
205         RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
206         RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
207         RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
208         RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
209         RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
210         RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
211         RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
212     };
213     static const int16_t kCachedPowers_E[] = {
214         -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
215         -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
216         -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
217         -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
218         -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
219         109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
220         375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
221         641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
222         907,   933,   960,   986,  1013,  1039,  1066
223     };
224     RAPIDJSON_ASSERT(index < 87);
225     return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
226 }
227 
GetCachedPower(int e,int * K)228 inline DiyFp GetCachedPower(int e, int* K) {
229 
230     //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
231     double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
232     int k = static_cast<int>(dk);
233     if (dk - k > 0.0)
234         k++;
235 
236     unsigned index = static_cast<unsigned>((k >> 3) + 1);
237     *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
238 
239     return GetCachedPowerByIndex(index);
240 }
241 
GetCachedPower10(int exp,int * outExp)242 inline DiyFp GetCachedPower10(int exp, int *outExp) {
243     RAPIDJSON_ASSERT(exp >= -348);
244     unsigned index = static_cast<unsigned>(exp + 348) / 8u;
245     *outExp = -348 + static_cast<int>(index) * 8;
246     return GetCachedPowerByIndex(index);
247 }
248 
249 #ifdef __GNUC__
250 RAPIDJSON_DIAG_POP
251 #endif
252 
253 #ifdef __clang__
254 RAPIDJSON_DIAG_POP
255 RAPIDJSON_DIAG_OFF(padded)
256 #endif
257 
258 } // namespace internal
259 RAPIDJSON_NAMESPACE_END
260 
261 #endif // RAPIDJSON_DIYFP_H_
262