1 /*
2 * Copyright (c) 2024 Raspberry Pi Ltd.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 #include <stdio.h>
8 #include <fenv.h>
9 #include <stdbool.h>
10 #include <stdint.h>
11
12 // xoroshiro256++ pseudorandom number generator.
13 // Adapted from: https://prng.di.unimi.it/xoshiro256plusplus.c
14 // Original copyright notice:
15
16 /* Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org)
17
18 To the extent possible under law, the author has dedicated all copyright
19 and related and neighboring rights to this software to the public domain
20 worldwide. This software is distributed without any warranty.
21
22 See <http://creativecommons.org/publicdomain/zero/1.0/>. */
23
24 /* This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.
25 It has excellent (sub-ns) speed, a state (256 bits) that is large
26 enough for any parallel application, and it passes all tests we are
27 aware of.
28
29 For generating just floating-point numbers, xoshiro256+ is even faster.
30
31 The state must be seeded so that it is not everywhere zero. If you have
32 a 64-bit seed, we suggest to seed a splitmix64 generator and use its
33 output to fill s. */
34
xr256_rotl(const uint64_t x,int k)35 static inline uint64_t xr256_rotl(const uint64_t x, int k) {
36 return (x << k) | (x >> (64 - k));
37 }
38
xr256_next(uint64_t s[4])39 uint64_t xr256_next(uint64_t s[4]) {
40 const uint64_t result = xr256_rotl(s[0] + s[3], 23) + s[0];
41
42 const uint64_t t = s[1] << 17;
43
44 s[2] ^= s[0];
45 s[3] ^= s[1];
46 s[1] ^= s[2];
47 s[0] ^= s[3];
48
49 s[2] ^= t;
50
51 s[3] = xr256_rotl(s[3], 45);
52
53 return result;
54 }
bitcast_f2u(float x)55 uint32_t bitcast_f2u(float x) {
56 // This is UB but then so is every C program
57 union {
58 float f;
59 uint32_t u;
60 } un;
61 un.f = x;
62 return un.u;
63 }
64
bitcast_u2f(uint32_t x)65 float bitcast_u2f(uint32_t x) {
66 union {
67 float f;
68 uint32_t u;
69 } un;
70 un.u = x;
71 return un.f;
72 }
73
is_nan_u(uint32_t x)74 bool is_nan_u(uint32_t x) {
75 return ((x >> 23) & 0xffu) == 0xffu && (x & ~(-1u << 23));
76 }
77
flush_to_zero_u(uint32_t x)78 uint32_t flush_to_zero_u(uint32_t x) {
79 if (!(x & (0xffu << 23))) {
80 x &= -1u << 23;
81 }
82 return x;
83 }
84
model_fadd(uint32_t x,uint32_t y)85 uint32_t model_fadd(uint32_t x, uint32_t y) {
86 x = flush_to_zero_u(x);
87 y = flush_to_zero_u(y);
88 // Use local hardware implementation to perform calculation
89 uint32_t result = bitcast_f2u(bitcast_u2f(x) + bitcast_u2f(y));
90 // Use correct canonical generated nan
91 if (is_nan_u(result)) {
92 result = -1u;
93 }
94 result = flush_to_zero_u(result);
95 return result;
96 }
97
model_fmul(uint32_t x,uint32_t y)98 uint32_t model_fmul(uint32_t x, uint32_t y) {
99 x = flush_to_zero_u(x);
100 y = flush_to_zero_u(y);
101 // Use local hardware implementation to perform calculation
102 uint32_t result = bitcast_f2u(bitcast_u2f(x) * bitcast_u2f(y));
103 // Use correct canonical generated nan
104 if (is_nan_u(result)) {
105 result = -1u;
106 }
107 result = flush_to_zero_u(result);
108 return result;
109 }
110
main()111 int main() {
112 // SHA-256 of a rude word
113 uint64_t rand_state[4] = {
114 0x5891b5b522d5df08u,
115 0x6d0ff0b110fbd9d2u,
116 0x1bb4fc7163af34d0u,
117 0x8286a2e846f6be03u
118 };
119 for (int i = 0; i < 1000; ++i) {
120 uint32_t x, y;
121 x = xr256_next(rand_state) & 0xffffffffu;
122 y = xr256_next(rand_state) & 0xffffffffu;
123 // Map nan to +-inf (input nans should already be well-covered)
124 if (is_nan_u(x)) {
125 x &= -1u << 23;
126 }
127 if (is_nan_u(y)) {
128 y &= -1u << 23;
129 }
130 #if 1
131 printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fadd(x, y));
132 #else
133 printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fmul(x, y));
134 #endif
135 }
136 }
137