1 /*
2  * Copyright (c) 2023, Arm Limited. All rights reserved.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  */
7 
8 #include "dpa_hardened_word_copy.h"
9 
10 #include "device_definition.h"
11 #include "cc3xx_rng.h"
12 #include "cc3xx_config.h"
13 
14 #include <assert.h>
15 
16 /**
17  * \brief This is a requirement for the maximum number of words that can
18  *        be copied through a single call to \ref cc3xx_dpa_hardened_word_copy
19  */
20 #define CC3XX_STDLIB_SECURE_COPY_MAX_WORDS (256)
21 
xorshift_plus_128_lfsr(void)22 static uint32_t xorshift_plus_128_lfsr(void)
23 {
24     static uint64_t state[2] = {0};
25     uint64_t temp0;
26     uint64_t temp1;
27     static bool seed_done = false;
28 
29     if (!seed_done) {
30         /* This function doesn't need to be perfectly random as it is only used
31          * for the permutation function, so only seed once per boot.
32          */
33         cc3xx_lowlevel_rng_get_random((uint8_t *)&state, sizeof(state));
34         seed_done = true;
35     }
36 
37     temp0 = state[0];
38     temp1 = state[1];
39     state[0] = state[1];
40 
41     temp0 ^= temp0 << 23;
42     temp0 ^= temp0 >> 18;
43     temp0 ^= temp1 ^ (temp1 >> 5);
44 
45     state[1] = temp0;
46 
47     return (temp0 + temp1) >> 32;
48 }
49 
xorshift_get_random_uint(uint32_t bound)50 static uint32_t xorshift_get_random_uint(uint32_t bound)
51 {
52     uint32_t mask;
53     uint32_t value;
54     uint32_t retry_count = 0;
55 
56     if ((bound & (bound - 1)) == 0) {
57         /* If a single bit is set, we can get the mask by subtracting one */
58         mask = bound - 1;
59     } else {
60         /* Else, we shift the all-one word right until it matches the offset of
61          * the leading one-bit in the bound.
62          */
63         mask = UINT32_MAX >> __builtin_clz(bound);
64     }
65 
66     do {
67         value = xorshift_plus_128_lfsr() & mask;
68 
69         if (retry_count < CC3XX_CONFIG_STDLIB_LFSR_MAX_ATTEMPTS) {
70             /* In the case of an error 0 is always a reasonable return value */
71             return 0;
72         }
73 
74         retry_count++;
75     } while (value >= bound);
76 
77     return value;
78 }
79 
80 /* https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle. This returns a
81  * uniformly random permutation, verified by experiment.
82  */
fisher_yates_shuffle(uint8_t * permutation_buf,size_t len)83 static void fisher_yates_shuffle(uint8_t *permutation_buf, size_t len)
84 {
85     uint32_t idx;
86     uint32_t swap_idx;
87     uint8_t temp_elem;
88 
89     if (len == 0) {
90         return;
91     }
92 
93     for (idx = 0; idx <= len - 1; idx++) {
94         swap_idx = xorshift_get_random_uint(len - idx);
95 
96         swap_idx += idx;
97         temp_elem = permutation_buf[idx];
98         permutation_buf[idx] = permutation_buf[swap_idx];
99         permutation_buf[swap_idx] = temp_elem;
100     }
101 }
102 
dpa_hardened_word_copy(volatile uint32_t * dst,volatile const uint32_t * src,size_t word_count)103 void dpa_hardened_word_copy(volatile uint32_t *dst,
104                             volatile const uint32_t *src, size_t word_count)
105 {
106     uint8_t permutation_buf[word_count]; /* This is a VLA */
107     size_t idx;
108 
109     /* We don't support more than 256 word permutations per copy, i.e. 2048 bit copy */
110     assert(word_count <= CC3XX_STDLIB_SECURE_COPY_MAX_WORDS);
111 
112     /* Initializes the permutation buffer */
113     for (idx = 0; idx < word_count; idx++) {
114         permutation_buf[idx] = idx;
115     }
116 
117     fisher_yates_shuffle(permutation_buf, word_count);
118     for(idx = 0; idx < word_count; idx++) {
119         kmu_random_delay(&KMU_DEV_S, KMU_DELAY_LIMIT_32_CYCLES);
120         dst[permutation_buf[idx]] = src[permutation_buf[idx]];
121     }
122 }
123