1 /*
2  * Copyright (c) 2022 Raspberry Pi (Trading) Ltd.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 /*  xoroshiro128ss(), rotl():
8 
9     Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
10 
11     To the extent possible under law, the author has dedicated all copyright
12     and related and neighboring rights to this software to the public domain
13     worldwide. This software is distributed without any warranty.
14 
15     See <http://creativecommons.org/publicdomain/zero/1.0/>
16 
17     splitmix64() implementation:
18 
19     Written in 2015 by Sebastiano Vigna (vigna@acm.org)
20     To the extent possible under law, the author has dedicated all copyright
21     and related and neighboring rights to this software to the public domain
22     worldwide. This software is distributed without any warranty.
23 
24     See <http://creativecommons.org/publicdomain/zero/1.0/>
25 */
26 
27 #include "pico/rand.h"
28 #include "pico/unique_id.h"
29 #include "pico/time.h"
30 #include "hardware/clocks.h"
31 #include "hardware/structs/rosc.h"
32 #include "hardware/structs/busctrl.h"
33 #include "hardware/sync.h"
34 
35 static bool rng_initialised = false;
36 
37 // Note: By design, do not initialise any of the variables that hold entropy,
38 // they may have useful junk in them, either from power-up or a previous boot.
39 static rng_128_t __uninitialized_ram(rng_state);
40 #if PICO_RAND_SEED_ENTROPY_SRC_RAM_HASH
41 static uint64_t __uninitialized_ram(ram_hash);
42 #endif
43 
44 #if PICO_RAND_ENTROPY_SRC_ROSC | PICO_RAND_SEED_ENTROPY_SRC_ROSC
45 static uint64_t __uninitialized_ram(rosc_samples);
46 #endif
47 
48 #if PICO_RAND_ENTROPY_SRC_BUS_PERF_COUNTER
49 static uint8_t bus_counter_idx;
50 #endif
51 
52 /* From the original source:
53 
54    This is a fixed-increment version of Java 8's SplittableRandom generator
55    See http://dx.doi.org/10.1145/2714064.2660195 and
56    http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
57 
58    It is a very fast generator passing BigCrush, and it can be useful if
59    for some reason you absolutely want 64 bits of state; otherwise, we
60    rather suggest to use a xoroshiro128+ (for moderately parallel
61    computations) or xorshift1024* (for massively parallel computations)
62    generator.
63 
64    Note:  This can be called with any value (i.e. including 0)
65 */
splitmix64(uint64_t x)66 static __noinline uint64_t splitmix64(uint64_t x) {
67     uint64_t z = x + 0x9E3779B97F4A7C15ull;
68     z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
69     z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
70     return z ^ (z >> 31);
71 }
72 
73 /* From the original source:
74 
75    This is xoroshiro128** 1.0, one of our all-purpose, rock-solid,
76    small-state generators. It is extremely (sub-ns) fast and it passes all
77    tests we are aware of, but its state space is large enough only for
78    mild parallelism.
79 
80    For generating just floating-point numbers, xoroshiro128+ is even
81    faster (but it has a very mild bias, see notes in the comments).
82 
83    The state must be seeded so that it is not everywhere zero. If you have
84    a 64-bit seed, we suggest to seed a splitmix64 generator and use its
85    output to fill s.
86 */
rotl(const uint64_t x,int k)87 static inline uint64_t rotl(const uint64_t x, int k) {
88     return (x << k) | (x >> (64 - k));
89 }
90 
xoroshiro128ss(rng_128_t * local_rng_state)91 static __noinline uint64_t xoroshiro128ss(rng_128_t *local_rng_state) {
92     const uint64_t s0 = local_rng_state->r[0];
93     uint64_t s1 = local_rng_state->r[1];
94 
95     // Because the state is *modified* outside of this function, there is a
96     // 1 in 2^128 chance that it could be all zeroes (which is not allowed).
97     while (s0 == 0 && s1 == 0) {
98         s1 = time_us_64();   // should not be 0, but loop anyway
99     }
100 
101     const uint64_t result = rotl(s0 * 5, 7) * 9;
102 
103     s1 ^= s0;
104     local_rng_state->r[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
105     local_rng_state->r[1] = rotl(s1, 37); // c
106 
107     return result;
108 }
109 
110 #if PICO_RAND_SEED_ENTROPY_SRC_RAM_HASH
sdbm_hash64_sram(uint64_t hash)111 static uint64_t sdbm_hash64_sram(uint64_t hash) {
112     // save some time by hashing a word at a time
113     for (uint i = (PICO_RAND_RAM_HASH_START + 3) & ~3; i < PICO_RAND_RAM_HASH_END; i+=4) {
114         uint32_t c = *(uint32_t *) i;
115         hash = (uint64_t) c + (hash << 6) + (hash << 16) - hash;
116     }
117     return hash;
118 }
119 #endif
120 
121 #if PICO_RAND_SEED_ENTROPY_SRC_TRNG | PICO_RAND_ENTROPY_SRC_TRNG
122 #if !HAS_RP2350_TRNG
123 #error PICO_RAND_SEED_ENTROPY_SRC_TRNG and PICO_RAND_ENTROPY_SRC_TRNG are only valid on RP2350
124 #endif
125 #include "hardware/structs/trng.h"
126 
127 uint32_t trng_sample_words[count_of(trng_hw->ehr_data)];
128 static_assert(count_of(trng_hw->ehr_data) >= 2 && count_of(trng_hw->ehr_data) < 255, "");
129 uint8_t trng_sample_word_count;
130 
capture_additional_trng_samples(void)131 static uint64_t capture_additional_trng_samples(void) {
132     spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
133     uint32_t save = spin_lock_blocking(lock);
134     if (trng_sample_word_count < 2) {
135         // Sample one ROSC bit into EHR every cycle, subject to CPU keeping up.
136         // More temporal resolution to measure ROSC phase noise is better, if we
137         // use a high quality hash function instead of naive VN decorrelation.
138         // (Also more metastability events, which are a secondary noise source)
139         //
140         // This is out of the loop because writing to this register seems to
141         // restart the sampling, slowing things down. We don't care if this write
142         // is skipped as that would just make sampling take longer.
143         trng_hw->sample_cnt1 = 0;
144 
145         // TRNG setup is inside loop in case it is skipped. Disable checks and
146         // bypass decorrelators, to stream raw TRNG ROSC samples:
147         trng_hw->trng_debug_control = -1u;
148         // Start ROSC if it is not already started
149         trng_hw->rnd_source_enable = -1u;
150         // Clear all interrupts (including EHR_VLD) -- we will check this
151         // later, after seeding RCP.
152         trng_hw->rng_icr = -1u;
153 
154         // Wait for 192 ROSC samples to fill EHR, this should take constant time:
155         while (trng_hw->trng_busy);
156 
157         for (uint i = 0; i < count_of(trng_sample_words); i++) {
158             trng_sample_words[i] = trng_hw->ehr_data[i];
159         }
160         trng_sample_word_count = count_of(trng_sample_words);
161 
162         // TRNG is now sampling again, having started after we read the last
163         // EHR word. Grab some random bits and use them to modulate
164         // the chain length, to reduce chance of injection locking:
165         trng_hw->trng_config = rng_state.r[0];
166     }
167     trng_sample_word_count -= 2;
168     uint64_t rc = trng_sample_words[trng_sample_word_count] |
169                   (((uint64_t)trng_sample_words[trng_sample_word_count + 1]) << 32);
170     spin_unlock(lock, save);
171     return rc;
172 }
173 
174 #endif
175 #if PICO_RAND_SEED_ENTROPY_SRC_ROSC | PICO_RAND_ENTROPY_SRC_ROSC
176 /* gather an additional n bits of entropy, and shift them into the 64 bit entropy counter */
capture_additional_rosc_samples(uint n)177 static uint64_t capture_additional_rosc_samples(uint n) {
178     static absolute_time_t next_sample_time;
179 
180     // provide an override if someone really wants it, but disabling ROSC as an entropy source makes more sense
181 #if !PICO_RAND_DISABLE_ROSC_CHECK
182     // check that the ROSC is running but that the processors are NOT running from it
183     hard_assert((rosc_hw->status & ROSC_STATUS_ENABLED_BITS) &&
184                 ((clocks_hw->clk[clk_sys].ctrl & CLOCKS_CLK_SYS_CTRL_AUXSRC_BITS) != (CLOCKS_CLK_SYS_CTRL_AUXSRC_VALUE_ROSC_CLKSRC << CLOCKS_CLK_SYS_CTRL_AUXSRC_LSB)));
185 #endif
186 
187     bool in_exception = __get_current_exception();
188     assert(n); // save us having to special case samples for this
189     uint64_t samples = 0;
190     for(uint i=0; i<n; i++) {
191         bool bit_done = false;
192         do {
193             // Ensure that the ROSC random bit is not sampled too quickly,
194             // ROSC may be ticking only a few times a microsecond.
195             // Note: In general (i.e. sporadic) use, very often there will be no delay here.
196 
197             // note this is not read under lock, so the two 32 bit halves could be skewed, but in that
198             // case we'll fail the check later, which is fine in this rare case
199             absolute_time_t cached_next_sample_time = next_sample_time;
200             // we support being called from IRQ, so be careful about sleeping... still not
201             // ideal, but not much that can be done
202             if (in_exception) {
203                 busy_wait_until(next_sample_time);
204             } else {
205                 sleep_until(next_sample_time);
206             }
207             spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
208             uint32_t save = spin_lock_blocking(lock);
209             if (!absolute_time_diff_us(cached_next_sample_time, next_sample_time)) {
210                 // we won the race (if any) for the bit, so we collect it locally
211                 samples <<= 1;
212                 samples |= rosc_hw->randombit & 1u;
213                 // use of relative time to now, rather than offset from before makes things
214                 // a bit less predictable at the cost of some speed.
215                 next_sample_time = make_timeout_time_us(PICO_RAND_MIN_ROSC_BIT_SAMPLE_TIME_US);
216                 bit_done = true;
217                 if (i == n - 1) {
218                     // samples has our random bits, so let's mix them in now
219                     samples = rosc_samples = (rosc_samples << n) | samples;
220                 }
221             }
222             spin_unlock(lock, save);
223         } while (!bit_done);
224     }
225     return samples;
226 }
227 #endif
228 
229 #if PICO_RAND_SEED_ENTROPY_SRC_BOOT_RANDOM
230 #include "pico/bootrom.h"
231 #endif
232 
initialise_rand(void)233 static void initialise_rand(void) {
234     rng_128_t local_rng_state = local_rng_state;
235     uint which = 0;
236 #if PICO_RAND_SEED_ENTROPY_SRC_RAM_HASH
237     ram_hash = sdbm_hash64_sram(ram_hash);
238     local_rng_state.r[which] ^= splitmix64(ram_hash);
239     which ^= 1;
240 #endif
241 
242 #if PICO_RAND_SEED_ENTROPY_SRC_BOARD_ID
243     static_assert(PICO_UNIQUE_BOARD_ID_SIZE_BYTES == sizeof(uint64_t),
244                   "Code below requires that 'board_id' is 64-bits in size");
245 
246     // Note! The safety of the length assumption here is protected by a 'static_assert' above
247     union unique_id_u {
248         pico_unique_board_id_t board_id_native;
249         uint64_t board_id_u64;
250     } unique_id;
251     // Note! The safety of the length assumption here is protected by a 'static_assert' above
252     pico_get_unique_board_id(&unique_id.board_id_native);
253     local_rng_state.r[which] ^= splitmix64(unique_id.board_id_u64);
254     which ^= 1;
255 #endif
256 
257 #if PICO_RAND_SEED_ENTROPY_SRC_ROSC
258     // this is really quite slow (10ms per iteration), and I'm not sure that it adds value over the 64 random bits
259 //    uint ref_khz = clock_get_hz(clk_ref) / 100;
260 //    for (int i = 0; i < 5; i++) {
261 //        // Apply hash of the rosc frequency, limited but still 'extra' entropy
262 //        uint measurement = frequency_count_raw(CLOCKS_FC0_SRC_VALUE_ROSC_CLKSRC, ref_khz);
263 //        local_rng_state.r[which] ^= splitmix64(measurement);
264 //        (void) xoroshiro128ss(&local_rng_state);  //churn to mix seed sources
265 //    }
266 
267     // Gather a full ROSC sample array with sample bits
268     local_rng_state.r[which] ^= splitmix64(capture_additional_rosc_samples(8 * sizeof(rosc_samples)));
269     which ^= 1;
270 #endif
271 
272 #if PICO_RAND_SEED_ENTROPY_SRC_BOOT_RANDOM
273     // Mix in boot random.
274     union {
275         uint64_t u64[2];
276         uint32_t u32[4];
277     } br;
278     rom_get_boot_random(br.u32);
279     local_rng_state.r[which] ^= splitmix64(br.u64[0]);
280     local_rng_state.r[which ^ 1] ^= splitmix64(br.u64[1]);
281 #endif
282 
283 #if PICO_RAND_SEED_ENTROPY_SRC_TIME
284     // Mix in hashed time.  This is [possibly] predictable boot-to-boot
285     // but will vary application-to-application.
286     local_rng_state.r[which] ^= splitmix64(time_us_64());
287     which ^= 1;
288 #endif
289 #if PICO_RAND_SEED_ENTROPY_SRC_TRNG
290     local_rng_state.r[which] ^= splitmix64(capture_additional_trng_samples());
291     which ^= 1;
292 #endif
293 
294     spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
295     uint32_t save = spin_lock_blocking(lock);
296     if (!rng_initialised) {
297 #if PICO_RAND_SEED_ENTROPY_SRC_BUS_PERF_COUNTER
298 #if !PICO_RAND_BUS_PERF_COUNTER_INDEX
299         int idx = -1;
300         for(uint i = 0; i < count_of(busctrl_hw->counter); i++) {
301             if (busctrl_hw->counter[i].sel == BUSCTRL_PERFSEL0_RESET) {
302                 idx = (int)i;
303                 break;
304             }
305         }
306         hard_assert(idx != -1);
307         bus_counter_idx = (uint8_t)idx;
308 #else
309         bus_counter_idx = (uint8_t)PICO_RAND_BUS_PERF_COUNTER_INDEX;
310 #endif
311         busctrl_hw->counter[bus_counter_idx].sel = PICO_RAND_BUS_PERF_COUNTER_EVENT;
312 #endif
313         (void) xoroshiro128ss(&local_rng_state);
314         rng_state = local_rng_state;
315         rng_initialised = true;
316     }
317     spin_unlock(lock, save);
318 }
319 
get_rand_64(void)320 uint64_t get_rand_64(void) {
321     if (!rng_initialised) {
322         // Do not provide 'RNs' until the system has been initialised.  Note:
323         // The first initialisation can be quite time-consuming depending on
324         // the amount of RAM hashed, see RAM_HASH_START and RAM_HASH_END
325         initialise_rand();
326     }
327 
328     static volatile uint8_t check_byte;
329     rng_128_t local_rng_state = rng_state;
330     uint8_t local_check_byte = check_byte;
331     // Modify PRNG state with the run-time entropy sources,
332     // hashed to reduce correlation with previous modifications.
333     uint which = 0;
334 #if PICO_RAND_ENTROPY_SRC_TIME
335     local_rng_state.r[which] ^= splitmix64(time_us_64());
336     which ^= 1;
337 #endif
338 #if PICO_RAND_ENTROPY_SRC_ROSC
339     local_rng_state.r[which] ^= splitmix64(capture_additional_rosc_samples(PICO_RAND_ROSC_BIT_SAMPLE_COUNT));
340     which ^= 1;
341 #endif
342 #if PICO_RAND_ENTROPY_SRC_TRNG
343     uint64_t foo = capture_additional_trng_samples();
344     local_rng_state.r[which] ^= splitmix64(foo);
345     which ^= 1;
346 #endif
347 #if PICO_RAND_ENTROPY_SRC_BUS_PERF_COUNTER
348     uint32_t bus_counter_value = busctrl_hw->counter[bus_counter_idx].value;
349     // counter is saturating, so clear it if it has reached saturation
350     if (bus_counter_value == BUSCTRL_PERFCTR0_BITS) {
351         busctrl_hw->counter[bus_counter_idx].value = 0;
352     }
353     local_rng_state.r[which] ^= splitmix64(bus_counter_value);
354     which ^= 1;
355 #endif
356 
357     spin_lock_t *lock = spin_lock_instance(PICO_SPINLOCK_ID_RAND);
358     uint32_t save = spin_lock_blocking(lock);
359     if (local_check_byte != check_byte) {
360         // someone got a random number in the interim, so mix it in
361         local_rng_state.r[0] ^= rng_state.r[0];
362         local_rng_state.r[1] ^= rng_state.r[1];
363     }
364     // Generate a 64-bit RN from the modified PRNG state.
365     // Note: This also "churns" the 128-bit state for next time.
366     uint64_t rand64 = xoroshiro128ss(&local_rng_state);
367     rng_state = local_rng_state;
368     check_byte++;
369     spin_unlock(lock, save);
370 
371     return rand64;
372 }
373 
get_rand_128(rng_128_t * ptr128)374 void get_rand_128(rng_128_t *ptr128) {
375     ptr128->r[0] = get_rand_64();
376     ptr128->r[1] = get_rand_64();
377 }
378 
get_rand_32(void)379 uint32_t get_rand_32(void) {
380     return (uint32_t) get_rand_64();
381 }
382