1 // Copyright 2018 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 //    (See accompanying file LICENSE-Boost or copy at
12 //     https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17 
18 // Defines HAS_UINT128 and uint128_t if applicable.
19 #include <stdbool.h>
20 #include "ryu/common.h"
21 #include "ryu/d2s_intrinsics.h"
22 
23 static const uint64_t DOUBLE_POW5_INV_SPLIT2[15][2] = {
24   {                    1u, 2305843009213693952u },
25   {  5955668970331000884u, 1784059615882449851u },
26   {  8982663654677661702u, 1380349269358112757u },
27   {  7286864317269821294u, 2135987035920910082u },
28   {  7005857020398200553u, 1652639921975621497u },
29   { 17965325103354776697u, 1278668206209430417u },
30   {  8928596168509315048u, 1978643211784836272u },
31   { 10075671573058298858u, 1530901034580419511u },
32   {   597001226353042382u, 1184477304306571148u },
33   {  1527430471115325346u, 1832889850782397517u },
34   { 12533209867169019542u, 1418129833677084982u },
35   {  5577825024675947042u, 2194449627517475473u },
36   { 11006974540203867551u, 1697873161311732311u },
37   { 10313493231639821582u, 1313665730009899186u },
38   { 12701016819766672773u, 2032799256770390445u }
39 };
40 static const uint32_t POW5_INV_OFFSETS[21] = {
41   0x54544554, 0x04055545, 0x10041000, 0x00400414, 0x40010000, 0x41155555,
42   0x00000454, 0x00010044, 0x40000000, 0x44000041, 0x50454450, 0x55550054,
43   0x51655554, 0x40004000, 0x01000001, 0x00010500, 0x51515411, 0x05555554,
44   0x00000000, 0x00000000, 0x00000000
45 };
46 
47 static const uint64_t DOUBLE_POW5_SPLIT2[13][2] = {
48   {                    0u, 1152921504606846976u },
49   {                    0u, 1490116119384765625u },
50   {  1032610780636961552u, 1925929944387235853u },
51   {  7910200175544436838u, 1244603055572228341u },
52   { 16941905809032713930u, 1608611746708759036u },
53   { 13024893955298202172u, 2079081953128979843u },
54   {  6607496772837067824u, 1343575221513417750u },
55   { 17332926989895652603u, 1736530273035216783u },
56   { 13037379183483547984u, 2244412773384604712u },
57   {  1605989338741628675u, 1450417759929778918u },
58   {  9630225068416591280u, 1874621017369538693u },
59   {   665883850346957067u, 1211445438634777304u },
60   { 14931890668723713708u, 1565756531257009982u }
61 };
62 static const uint32_t POW5_OFFSETS[21] = {
63   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x59695995,
64   0x55545555, 0x56555515, 0x41150504, 0x40555410, 0x44555145, 0x44504540,
65   0x45555550, 0x40004000, 0x96440440, 0x55565565, 0x54454045, 0x40154151,
66   0x55559155, 0x51405555, 0x00000105
67 };
68 
69 #define POW5_TABLE_SIZE 26
70 static const uint64_t DOUBLE_POW5_TABLE[POW5_TABLE_SIZE] = {
71 1ull, 5ull, 25ull, 125ull, 625ull, 3125ull, 15625ull, 78125ull, 390625ull,
72 1953125ull, 9765625ull, 48828125ull, 244140625ull, 1220703125ull, 6103515625ull,
73 30517578125ull, 152587890625ull, 762939453125ull, 3814697265625ull,
74 19073486328125ull, 95367431640625ull, 476837158203125ull,
75 2384185791015625ull, 11920928955078125ull, 59604644775390625ull,
76 298023223876953125ull //, 1490116119384765625ull
77 };
78 
__pow5Factor(uint64_t value)79 uint32_t __pow5Factor(uint64_t value) {
80   const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64)
81   const uint64_t n_div_5 = 3689348814741910323u;  // #{ n | n = 0 (mod 2^64) } = 2^64 / 5
82   uint32_t count = 0;
83   for (;;) {
84     assert(value != 0);
85     value *= m_inv_5;
86     if (value > n_div_5)
87       break;
88     ++count;
89   }
90   return count;
91 }
92 
93 #if defined(HAS_UINT128)
94 
95 // Computes 5^i in the form required by Ryu, and stores it in the given pointer.
__double_computePow5(const uint32_t i,uint64_t * const result)96 void __double_computePow5(const uint32_t i, uint64_t* const result) {
97   const uint32_t base = i / POW5_TABLE_SIZE;
98   const uint32_t base2 = base * POW5_TABLE_SIZE;
99   const uint32_t offset = i - base2;
100   const uint64_t* const mul = DOUBLE_POW5_SPLIT2[base];
101   if (offset == 0) {
102     result[0] = mul[0];
103     result[1] = mul[1];
104     return;
105   }
106   const uint64_t m = DOUBLE_POW5_TABLE[offset];
107   const uint128_t b0 = ((uint128_t) m) * mul[0];
108   const uint128_t b2 = ((uint128_t) m) * mul[1];
109   const uint32_t delta = pow5bits(i) - pow5bits(base2);
110   const uint128_t shiftedSum = (b0 >> delta) + (b2 << (64 - delta)) + ((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
111   result[0] = (uint64_t) shiftedSum;
112   result[1] = (uint64_t) (shiftedSum >> 64);
113 }
114 
115 // Computes 5^-i in the form required by Ryu, and stores it in the given pointer.
__double_computeInvPow5(const uint32_t i,uint64_t * const result)116 void __double_computeInvPow5(const uint32_t i, uint64_t* const result) {
117   const uint32_t base = (i + POW5_TABLE_SIZE - 1) / POW5_TABLE_SIZE;
118   const uint32_t base2 = base * POW5_TABLE_SIZE;
119   const uint32_t offset = base2 - i;
120   const uint64_t* const mul = DOUBLE_POW5_INV_SPLIT2[base]; // 1/5^base2
121   if (offset == 0) {
122     result[0] = mul[0];
123     result[1] = mul[1];
124     return;
125   }
126   const uint64_t m = DOUBLE_POW5_TABLE[offset]; // 5^offset
127   const uint128_t b0 = ((uint128_t) m) * (mul[0] - 1);
128   const uint128_t b2 = ((uint128_t) m) * mul[1]; // 1/5^base2 * 5^offset = 1/5^(base2-offset) = 1/5^i
129   const uint32_t delta = pow5bits(base2) - pow5bits(i);
130   const uint128_t shiftedSum =
131     ((b0 >> delta) + (b2 << (64 - delta))) + 1 + ((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
132   result[0] = (uint64_t) shiftedSum;
133   result[1] = (uint64_t) (shiftedSum >> 64);
134 }
135 
136 #else // defined(HAS_UINT128)
137 
138 // Computes 5^i in the form required by Ryu, and stores it in the given pointer.
__double_computePow5(const uint32_t i,uint64_t * const result)139 void __double_computePow5(const uint32_t i, uint64_t* const result) {
140   const uint32_t base = i / POW5_TABLE_SIZE;
141   const uint32_t base2 = base * POW5_TABLE_SIZE;
142   const uint32_t offset = i - base2;
143   const uint64_t* const mul = DOUBLE_POW5_SPLIT2[base];
144   if (offset == 0) {
145     result[0] = mul[0];
146     result[1] = mul[1];
147     return;
148   }
149   const uint64_t m = DOUBLE_POW5_TABLE[offset];
150   uint64_t high1;
151   const uint64_t low1 = umul128(m, mul[1], &high1);
152   uint64_t high0;
153   const uint64_t low0 = umul128(m, mul[0], &high0);
154   const uint64_t sum = high0 + low1;
155   if (sum < high0) {
156     ++high1; // overflow into high1
157   }
158   // high1 | sum | low0
159   const uint32_t delta = pow5bits(i) - pow5bits(base2);
160   result[0] = shiftright128(low0, sum, delta) + ((POW5_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
161   result[1] = shiftright128(sum, high1, delta);
162 }
163 
164 // Computes 5^-i in the form required by Ryu, and stores it in the given pointer.
__double_computeInvPow5(const uint32_t i,uint64_t * const result)165 void __double_computeInvPow5(const uint32_t i, uint64_t* const result) {
166   const uint32_t base = (i + POW5_TABLE_SIZE - 1) / POW5_TABLE_SIZE;
167   const uint32_t base2 = base * POW5_TABLE_SIZE;
168   const uint32_t offset = base2 - i;
169   const uint64_t* const mul = DOUBLE_POW5_INV_SPLIT2[base]; // 1/5^base2
170   if (offset == 0) {
171     result[0] = mul[0];
172     result[1] = mul[1];
173     return;
174   }
175   const uint64_t m = DOUBLE_POW5_TABLE[offset];
176   uint64_t high1;
177   const uint64_t low1 = umul128(m, mul[1], &high1);
178   uint64_t high0;
179   const uint64_t low0 = umul128(m, mul[0] - 1, &high0);
180   const uint64_t sum = high0 + low1;
181   if (sum < high0) {
182     ++high1; // overflow into high1
183   }
184   // high1 | sum | low0
185   const uint32_t delta = pow5bits(base2) - pow5bits(i);
186   result[0] = shiftright128(low0, sum, delta) + 1 + ((POW5_INV_OFFSETS[i / 16] >> ((i % 16) << 1)) & 3);
187   result[1] = shiftright128(sum, high1, delta);
188 }
189 
190 #endif // defined(HAS_UINT128)
191 
192 
193