1 /*
2  * Copyright (c) 2024 Raspberry Pi Ltd.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include <stdio.h>
8 #include <fenv.h>
9 #include <stdbool.h>
10 #include <stdint.h>
11 
12 // xoroshiro256++ pseudorandom number generator.
13 // Adapted from: https://prng.di.unimi.it/xoshiro256plusplus.c
14 // Original copyright notice:
15 
16 /*  Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org)
17 
18 To the extent possible under law, the author has dedicated all copyright
19 and related and neighboring rights to this software to the public domain
20 worldwide. This software is distributed without any warranty.
21 
22 See <http://creativecommons.org/publicdomain/zero/1.0/>. */
23 
24 /* This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.
25    It has excellent (sub-ns) speed, a state (256 bits) that is large
26    enough for any parallel application, and it passes all tests we are
27    aware of.
28 
29    For generating just floating-point numbers, xoshiro256+ is even faster.
30 
31    The state must be seeded so that it is not everywhere zero. If you have
32    a 64-bit seed, we suggest to seed a splitmix64 generator and use its
33    output to fill s. */
34 
xr256_rotl(const uint64_t x,int k)35 static inline uint64_t xr256_rotl(const uint64_t x, int k) {
36 	return (x << k) | (x >> (64 - k));
37 }
38 
xr256_next(uint64_t s[4])39 uint64_t xr256_next(uint64_t s[4]) {
40 	const uint64_t result = xr256_rotl(s[0] + s[3], 23) + s[0];
41 
42 	const uint64_t t = s[1] << 17;
43 
44 	s[2] ^= s[0];
45 	s[3] ^= s[1];
46 	s[1] ^= s[2];
47 	s[0] ^= s[3];
48 
49 	s[2] ^= t;
50 
51 	s[3] = xr256_rotl(s[3], 45);
52 
53 	return result;
54 }
bitcast_f2u(float x)55 uint32_t bitcast_f2u(float x) {
56 	// This is UB but then so is every C program
57 	union {
58 		float f;
59 		uint32_t u;
60 	} un;
61 	un.f = x;
62 	return un.u;
63 }
64 
bitcast_u2f(uint32_t x)65 float bitcast_u2f(uint32_t x) {
66 	union {
67 		float f;
68 		uint32_t u;
69 	} un;
70 	un.u = x;
71 	return un.f;
72 }
73 
is_nan_u(uint32_t x)74 bool is_nan_u(uint32_t x) {
75 	return ((x >> 23) & 0xffu) == 0xffu && (x & ~(-1u << 23));
76 }
77 
flush_to_zero_u(uint32_t x)78 uint32_t flush_to_zero_u(uint32_t x) {
79 	if (!(x & (0xffu << 23))) {
80 		x &= -1u << 23;
81 	}
82 	return x;
83 }
84 
model_fadd(uint32_t x,uint32_t y)85 uint32_t model_fadd(uint32_t x, uint32_t y) {
86 	x = flush_to_zero_u(x);
87 	y = flush_to_zero_u(y);
88 	// Use local hardware implementation to perform calculation
89 	uint32_t result = bitcast_f2u(bitcast_u2f(x) + bitcast_u2f(y));
90 	// Use correct canonical generated nan
91 	if (is_nan_u(result)) {
92 		result = -1u;
93 	}
94 	result = flush_to_zero_u(result);
95 	return result;
96 }
97 
model_fmul(uint32_t x,uint32_t y)98 uint32_t model_fmul(uint32_t x, uint32_t y) {
99 	x = flush_to_zero_u(x);
100 	y = flush_to_zero_u(y);
101 	// Use local hardware implementation to perform calculation
102 	uint32_t result = bitcast_f2u(bitcast_u2f(x) * bitcast_u2f(y));
103 	// Use correct canonical generated nan
104 	if (is_nan_u(result)) {
105 		result = -1u;
106 	}
107 	result = flush_to_zero_u(result);
108 	return result;
109 }
110 
main()111 int main() {
112 	// SHA-256 of a rude word
113 	uint64_t rand_state[4] = {
114 		0x5891b5b522d5df08u,
115 		0x6d0ff0b110fbd9d2u,
116 		0x1bb4fc7163af34d0u,
117 		0x8286a2e846f6be03u
118 	};
119 	for (int i = 0; i < 1000; ++i) {
120 		uint32_t x, y;
121 		x = xr256_next(rand_state) & 0xffffffffu;
122 		y = xr256_next(rand_state) & 0xffffffffu;
123 		// Map nan to +-inf (input nans should already be well-covered)
124 		if (is_nan_u(x)) {
125 			x &= -1u << 23;
126 		}
127 		if (is_nan_u(y)) {
128 			y &= -1u << 23;
129 		}
130 #if 1
131 		printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fadd(x, y));
132 #else
133 		printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fmul(x, y));
134 #endif
135 	}
136 }
137