1 /*
2 * Copyright (c) 2022 Raspberry Pi (Trading) Ltd.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 /* xoroshiro128ss(), rotl():
8
9 Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
10
11 To the extent possible under law, the author has dedicated all copyright
12 and related and neighboring rights to this software to the public domain
13 worldwide. This software is distributed without any warranty.
14
15 See <http://creativecommons.org/publicdomain/zero/1.0/>
16
17 splitmix64() implementation:
18
19 Written in 2015 by Sebastiano Vigna (vigna@acm.org)
20 To the extent possible under law, the author has dedicated all copyright
21 and related and neighboring rights to this software to the public domain
22 worldwide. This software is distributed without any warranty.
23
24 See <http://creativecommons.org/publicdomain/zero/1.0/>
25 */
26
27 #include "pico/rand.h"
28 #include "pico/unique_id.h"
29 #include "pico/time.h"
30 #include "hardware/clocks.h"
31 #include "hardware/structs/rosc.h"
32 #include "hardware/structs/busctrl.h"
33 #include "hardware/sync.h"
34
35 static bool rng_initialised = false;
36
37 // Note: By design, do not initialise any of the variables that hold entropy,
38 // they may have useful junk in them, either from power-up or a previous boot.
39 static rng_128_t __uninitialized_ram(rng_state);
40 #if PICO_RAND_SEED_ENTROPY_SRC_RAM_HASH
41 static uint64_t __uninitialized_ram(ram_hash);
42 #endif
43
44 #if PICO_RAND_ENTROPY_SRC_ROSC | PICO_RAND_SEED_ENTROPY_SRC_ROSC
45 static uint64_t __uninitialized_ram(rosc_samples);
46 #endif
47
48 #if PICO_RAND_ENTROPY_SRC_BUS_PERF_COUNTER
49 static uint8_t bus_counter_idx;
50 #endif
51
52 /* From the original source:
53
54 This is a fixed-increment version of Java 8's SplittableRandom generator
55 See http://dx.doi.org/10.1145/2714064.2660195 and
56 http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
57
58 It is a very fast generator passing BigCrush, and it can be useful if
59 for some reason you absolutely want 64 bits of state; otherwise, we
60 rather suggest to use a xoroshiro128+ (for moderately parallel
61 computations) or xorshift1024* (for massively parallel computations)
62 generator.
63
64 Note: This can be called with any value (i.e. including 0)
65 */
splitmix64(uint64_t x)66 static __noinline uint64_t splitmix64(uint64_t x) {
67 uint64_t z = x + 0x9E3779B97F4A7C15ull;
68 z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
69 z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
70 return z ^ (z >> 31);
71 }
72
73 /* From the original source:
74
75 This is xoroshiro128** 1.0, one of our all-purpose, rock-solid,
76 small-state generators. It is extremely (sub-ns) fast and it passes all
77 tests we are aware of, but its state space is large enough only for
78 mild parallelism.
79
80 For generating just floating-point numbers, xoroshiro128+ is even
81 faster (but it has a very mild bias, see notes in the comments).
82
83 The state must be seeded so that it is not everywhere zero. If you have
84 a 64-bit seed, we suggest to seed a splitmix64 generator and use its
85 output to fill s.
86 */
rotl(const uint64_t x,int k)87 static inline uint64_t rotl(const uint64_t x, int k) {
88 return (x << k) | (x >> (64 - k));
89 }
90
xoroshiro128ss(rng_128_t * local_rng_state)91 static __noinline uint64_t xoroshiro128ss(rng_128_t *local_rng_state) {
92 const uint64_t s0 = local_rng_state->r[0];
93 uint64_t s1 = local_rng_state->r[1];
94
95 // Because the state is *modified* outside of this function, there is a
96 // 1 in 2^128 chance that it could be all zeroes (which is not allowed).
97 while (s0 == 0 && s1 == 0) {
98 s1 = time_us_64(); // should not be 0, but loop anyway
99 }
100
101 const uint64_t result = rotl(s0 * 5, 7) * 9;
102
103 s1 ^= s0;
104 local_rng_state->r[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
105 local_rng_state->r[1] = rotl(s1, 37); // c
106
107 return result;
108 }
109
110 #if PICO_RAND_SEED_ENTROPY_SRC_RAM_HASH
sdbm_hash64_sram(uint64_t hash)111 static uint64_t sdbm_hash64_sram(uint64_t hash) {
112 // save some time by hashing a word at a time
113 for (uint i = (PICO_RAND_RAM_HASH_START + 3) & ~3; i < PICO_RAND_RAM_HASH_END; i+=4) {
114 uint32_t c = *(uint32_t *) i;
115 hash = (uint64_t) c + (hash << 6) + (hash << 16) - hash;
116 }
117 return hash;
118 }
119 #endif
120
121 #if PICO_RAND_SEED_ENTROPY_SRC_TRNG | PICO_RAND_ENTROPY_SRC_TRNG
122 #if !HAS_RP2350_TRNG
123 #error PICO_RAND_SEED_ENTROPY_SRC_TRNG and PICO_RAND_ENTROPY_SRC_TRNG are only valid on RP2350
124 #endif
125 #include "hardware/structs/trng.h"
126
127 uint32_t trng_sample_words[count_of(trng_hw->ehr_data)];
128 static_assert(count_of(trng_hw->ehr_data) >= 2 && count_of(trng_hw->ehr_data) < 255, "");
129 uint8_t trng_sample_word_count;
130
capture_additional_trng_samples(void)131 static uint64_t capture_additional_trng_samples(void) {
132 spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
133 uint32_t save = spin_lock_blocking(lock);
134 if (trng_sample_word_count < 2) {
135 // Sample one ROSC bit into EHR every cycle, subject to CPU keeping up.
136 // More temporal resolution to measure ROSC phase noise is better, if we
137 // use a high quality hash function instead of naive VN decorrelation.
138 // (Also more metastability events, which are a secondary noise source)
139 //
140 // This is out of the loop because writing to this register seems to
141 // restart the sampling, slowing things down. We don't care if this write
142 // is skipped as that would just make sampling take longer.
143 trng_hw->sample_cnt1 = 0;
144
145 // TRNG setup is inside loop in case it is skipped. Disable checks and
146 // bypass decorrelators, to stream raw TRNG ROSC samples:
147 trng_hw->trng_debug_control = -1u;
148 // Start ROSC if it is not already started
149 trng_hw->rnd_source_enable = -1u;
150 // Clear all interrupts (including EHR_VLD) -- we will check this
151 // later, after seeding RCP.
152 trng_hw->rng_icr = -1u;
153
154 // Wait for 192 ROSC samples to fill EHR, this should take constant time:
155 while (trng_hw->trng_busy);
156
157 for (uint i = 0; i < count_of(trng_sample_words); i++) {
158 trng_sample_words[i] = trng_hw->ehr_data[i];
159 }
160 trng_sample_word_count = count_of(trng_sample_words);
161
162 // TRNG is now sampling again, having started after we read the last
163 // EHR word. Grab some random bits and use them to modulate
164 // the chain length, to reduce chance of injection locking:
165 trng_hw->trng_config = rng_state.r[0];
166 }
167 trng_sample_word_count -= 2;
168 uint64_t rc = trng_sample_words[trng_sample_word_count] |
169 (((uint64_t)trng_sample_words[trng_sample_word_count + 1]) << 32);
170 spin_unlock(lock, save);
171 return rc;
172 }
173
174 #endif
175 #if PICO_RAND_SEED_ENTROPY_SRC_ROSC | PICO_RAND_ENTROPY_SRC_ROSC
176 /* gather an additional n bits of entropy, and shift them into the 64 bit entropy counter */
capture_additional_rosc_samples(uint n)177 static uint64_t capture_additional_rosc_samples(uint n) {
178 static absolute_time_t next_sample_time;
179
180 // provide an override if someone really wants it, but disabling ROSC as an entropy source makes more sense
181 #if !PICO_RAND_DISABLE_ROSC_CHECK
182 // check that the ROSC is running but that the processors are NOT running from it
183 hard_assert((rosc_hw->status & ROSC_STATUS_ENABLED_BITS) &&
184 ((clocks_hw->clk[clk_sys].ctrl & CLOCKS_CLK_SYS_CTRL_AUXSRC_BITS) != (CLOCKS_CLK_SYS_CTRL_AUXSRC_VALUE_ROSC_CLKSRC << CLOCKS_CLK_SYS_CTRL_AUXSRC_LSB)));
185 #endif
186
187 bool in_exception = __get_current_exception();
188 assert(n); // save us having to special case samples for this
189 uint64_t samples = 0;
190 for(uint i=0; i<n; i++) {
191 bool bit_done = false;
192 do {
193 // Ensure that the ROSC random bit is not sampled too quickly,
194 // ROSC may be ticking only a few times a microsecond.
195 // Note: In general (i.e. sporadic) use, very often there will be no delay here.
196
197 // note this is not read under lock, so the two 32 bit halves could be skewed, but in that
198 // case we'll fail the check later, which is fine in this rare case
199 absolute_time_t cached_next_sample_time = next_sample_time;
200 // we support being called from IRQ, so be careful about sleeping... still not
201 // ideal, but not much that can be done
202 if (in_exception) {
203 busy_wait_until(next_sample_time);
204 } else {
205 sleep_until(next_sample_time);
206 }
207 spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
208 uint32_t save = spin_lock_blocking(lock);
209 if (!absolute_time_diff_us(cached_next_sample_time, next_sample_time)) {
210 // we won the race (if any) for the bit, so we collect it locally
211 samples <<= 1;
212 samples |= rosc_hw->randombit & 1u;
213 // use of relative time to now, rather than offset from before makes things
214 // a bit less predictable at the cost of some speed.
215 next_sample_time = make_timeout_time_us(PICO_RAND_MIN_ROSC_BIT_SAMPLE_TIME_US);
216 bit_done = true;
217 if (i == n - 1) {
218 // samples has our random bits, so let's mix them in now
219 samples = rosc_samples = (rosc_samples << n) | samples;
220 }
221 }
222 spin_unlock(lock, save);
223 } while (!bit_done);
224 }
225 return samples;
226 }
227 #endif
228
229 #if PICO_RAND_SEED_ENTROPY_SRC_BOOT_RANDOM
230 #include "pico/bootrom.h"
231 #endif
232
initialise_rand(void)233 static void initialise_rand(void) {
234 rng_128_t local_rng_state = local_rng_state;
235 uint which = 0;
236 #if PICO_RAND_SEED_ENTROPY_SRC_RAM_HASH
237 ram_hash = sdbm_hash64_sram(ram_hash);
238 local_rng_state.r[which] ^= splitmix64(ram_hash);
239 which ^= 1;
240 #endif
241
242 #if PICO_RAND_SEED_ENTROPY_SRC_BOARD_ID
243 static_assert(PICO_UNIQUE_BOARD_ID_SIZE_BYTES == sizeof(uint64_t),
244 "Code below requires that 'board_id' is 64-bits in size");
245
246 // Note! The safety of the length assumption here is protected by a 'static_assert' above
247 union unique_id_u {
248 pico_unique_board_id_t board_id_native;
249 uint64_t board_id_u64;
250 } unique_id;
251 // Note! The safety of the length assumption here is protected by a 'static_assert' above
252 pico_get_unique_board_id(&unique_id.board_id_native);
253 local_rng_state.r[which] ^= splitmix64(unique_id.board_id_u64);
254 which ^= 1;
255 #endif
256
257 #if PICO_RAND_SEED_ENTROPY_SRC_ROSC
258 // this is really quite slow (10ms per iteration), and I'm not sure that it adds value over the 64 random bits
259 // uint ref_khz = clock_get_hz(clk_ref) / 100;
260 // for (int i = 0; i < 5; i++) {
261 // // Apply hash of the rosc frequency, limited but still 'extra' entropy
262 // uint measurement = frequency_count_raw(CLOCKS_FC0_SRC_VALUE_ROSC_CLKSRC, ref_khz);
263 // local_rng_state.r[which] ^= splitmix64(measurement);
264 // (void) xoroshiro128ss(&local_rng_state); //churn to mix seed sources
265 // }
266
267 // Gather a full ROSC sample array with sample bits
268 local_rng_state.r[which] ^= splitmix64(capture_additional_rosc_samples(8 * sizeof(rosc_samples)));
269 which ^= 1;
270 #endif
271
272 #if PICO_RAND_SEED_ENTROPY_SRC_BOOT_RANDOM
273 // Mix in boot random.
274 union {
275 uint64_t u64[2];
276 uint32_t u32[4];
277 } br;
278 rom_get_boot_random(br.u32);
279 local_rng_state.r[which] ^= splitmix64(br.u64[0]);
280 local_rng_state.r[which ^ 1] ^= splitmix64(br.u64[1]);
281 #endif
282
283 #if PICO_RAND_SEED_ENTROPY_SRC_TIME
284 // Mix in hashed time. This is [possibly] predictable boot-to-boot
285 // but will vary application-to-application.
286 local_rng_state.r[which] ^= splitmix64(time_us_64());
287 which ^= 1;
288 #endif
289 #if PICO_RAND_SEED_ENTROPY_SRC_TRNG
290 local_rng_state.r[which] ^= splitmix64(capture_additional_trng_samples());
291 which ^= 1;
292 #endif
293
294 spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
295 uint32_t save = spin_lock_blocking(lock);
296 if (!rng_initialised) {
297 #if PICO_RAND_SEED_ENTROPY_SRC_BUS_PERF_COUNTER
298 #if !PICO_RAND_BUS_PERF_COUNTER_INDEX
299 int idx = -1;
300 for(uint i = 0; i < count_of(busctrl_hw->counter); i++) {
301 if (busctrl_hw->counter[i].sel == BUSCTRL_PERFSEL0_RESET) {
302 idx = (int)i;
303 break;
304 }
305 }
306 hard_assert(idx != -1);
307 bus_counter_idx = (uint8_t)idx;
308 #else
309 bus_counter_idx = (uint8_t)PICO_RAND_BUS_PERF_COUNTER_INDEX;
310 #endif
311 busctrl_hw->counter[bus_counter_idx].sel = PICO_RAND_BUS_PERF_COUNTER_EVENT;
312 #endif
313 (void) xoroshiro128ss(&local_rng_state);
314 rng_state = local_rng_state;
315 rng_initialised = true;
316 }
317 spin_unlock(lock, save);
318 }
319
get_rand_64(void)320 uint64_t get_rand_64(void) {
321 if (!rng_initialised) {
322 // Do not provide 'RNs' until the system has been initialised. Note:
323 // The first initialisation can be quite time-consuming depending on
324 // the amount of RAM hashed, see RAM_HASH_START and RAM_HASH_END
325 initialise_rand();
326 }
327
328 static volatile uint8_t check_byte;
329 rng_128_t local_rng_state = rng_state;
330 uint8_t local_check_byte = check_byte;
331 // Modify PRNG state with the run-time entropy sources,
332 // hashed to reduce correlation with previous modifications.
333 uint which = 0;
334 #if PICO_RAND_ENTROPY_SRC_TIME
335 local_rng_state.r[which] ^= splitmix64(time_us_64());
336 which ^= 1;
337 #endif
338 #if PICO_RAND_ENTROPY_SRC_ROSC
339 local_rng_state.r[which] ^= splitmix64(capture_additional_rosc_samples(PICO_RAND_ROSC_BIT_SAMPLE_COUNT));
340 which ^= 1;
341 #endif
342 #if PICO_RAND_ENTROPY_SRC_TRNG
343 uint64_t foo = capture_additional_trng_samples();
344 local_rng_state.r[which] ^= splitmix64(foo);
345 which ^= 1;
346 #endif
347 #if PICO_RAND_ENTROPY_SRC_BUS_PERF_COUNTER
348 uint32_t bus_counter_value = busctrl_hw->counter[bus_counter_idx].value;
349 // counter is saturating, so clear it if it has reached saturation
350 if (bus_counter_value == BUSCTRL_PERFCTR0_BITS) {
351 busctrl_hw->counter[bus_counter_idx].value = 0;
352 }
353 local_rng_state.r[which] ^= splitmix64(bus_counter_value);
354 which ^= 1;
355 #endif
356
357 spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
358 uint32_t save = spin_lock_blocking(lock);
359 if (local_check_byte != check_byte) {
360 // someone got a random number in the interim, so mix it in
361 local_rng_state.r[0] ^= rng_state.r[0];
362 local_rng_state.r[1] ^= rng_state.r[1];
363 }
364 // Generate a 64-bit RN from the modified PRNG state.
365 // Note: This also "churns" the 128-bit state for next time.
366 uint64_t rand64 = xoroshiro128ss(&local_rng_state);
367 rng_state = local_rng_state;
368 check_byte++;
369 spin_unlock(lock, save);
370
371 return rand64;
372 }
373
get_rand_128(rng_128_t * ptr128)374 void get_rand_128(rng_128_t *ptr128) {
375 ptr128->r[0] = get_rand_64();
376 ptr128->r[1] = get_rand_64();
377 }
378
get_rand_32(void)379 uint32_t get_rand_32(void) {
380 return (uint32_t) get_rand_64();
381 }
382