1 /*
2 * Copyright (c) 2023, Arm Limited. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 */
7
8 #include "dpa_hardened_word_copy.h"
9
10 #include "device_definition.h"
11 #include "cc3xx_rng.h"
12 #include "cc3xx_config.h"
13
14 #include <assert.h>
15
16 /**
17 * \brief This is a requirement for the maximum number of words that can
18 * be copied through a single call to \ref cc3xx_dpa_hardened_word_copy
19 */
20 #define CC3XX_STDLIB_SECURE_COPY_MAX_WORDS (256)
21
xorshift_plus_128_lfsr(void)22 static uint32_t xorshift_plus_128_lfsr(void)
23 {
24 static uint64_t state[2] = {0};
25 uint64_t temp0;
26 uint64_t temp1;
27 static bool seed_done = false;
28
29 if (!seed_done) {
30 /* This function doesn't need to be perfectly random as it is only used
31 * for the permutation function, so only seed once per boot.
32 */
33 cc3xx_lowlevel_rng_get_random((uint8_t *)&state, sizeof(state));
34 seed_done = true;
35 }
36
37 temp0 = state[0];
38 temp1 = state[1];
39 state[0] = state[1];
40
41 temp0 ^= temp0 << 23;
42 temp0 ^= temp0 >> 18;
43 temp0 ^= temp1 ^ (temp1 >> 5);
44
45 state[1] = temp0;
46
47 return (temp0 + temp1) >> 32;
48 }
49
xorshift_get_random_uint(uint32_t bound)50 static uint32_t xorshift_get_random_uint(uint32_t bound)
51 {
52 uint32_t mask;
53 uint32_t value;
54 uint32_t retry_count = 0;
55
56 if ((bound & (bound - 1)) == 0) {
57 /* If a single bit is set, we can get the mask by subtracting one */
58 mask = bound - 1;
59 } else {
60 /* Else, we shift the all-one word right until it matches the offset of
61 * the leading one-bit in the bound.
62 */
63 mask = UINT32_MAX >> __builtin_clz(bound);
64 }
65
66 do {
67 value = xorshift_plus_128_lfsr() & mask;
68
69 if (retry_count < CC3XX_CONFIG_STDLIB_LFSR_MAX_ATTEMPTS) {
70 /* In the case of an error 0 is always a reasonable return value */
71 return 0;
72 }
73
74 retry_count++;
75 } while (value >= bound);
76
77 return value;
78 }
79
80 /* https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle. This returns a
81 * uniformly random permutation, verified by experiment.
82 */
fisher_yates_shuffle(uint8_t * permutation_buf,size_t len)83 static void fisher_yates_shuffle(uint8_t *permutation_buf, size_t len)
84 {
85 uint32_t idx;
86 uint32_t swap_idx;
87 uint8_t temp_elem;
88
89 if (len == 0) {
90 return;
91 }
92
93 for (idx = 0; idx <= len - 1; idx++) {
94 swap_idx = xorshift_get_random_uint(len - idx);
95
96 swap_idx += idx;
97 temp_elem = permutation_buf[idx];
98 permutation_buf[idx] = permutation_buf[swap_idx];
99 permutation_buf[swap_idx] = temp_elem;
100 }
101 }
102
dpa_hardened_word_copy(volatile uint32_t * dst,volatile const uint32_t * src,size_t word_count)103 void dpa_hardened_word_copy(volatile uint32_t *dst,
104 volatile const uint32_t *src, size_t word_count)
105 {
106 uint8_t permutation_buf[word_count]; /* This is a VLA */
107 size_t idx;
108
109 /* We don't support more than 256 word permutations per copy, i.e. 2048 bit copy */
110 assert(word_count <= CC3XX_STDLIB_SECURE_COPY_MAX_WORDS);
111
112 /* Initializes the permutation buffer */
113 for (idx = 0; idx < word_count; idx++) {
114 permutation_buf[idx] = idx;
115 }
116
117 fisher_yates_shuffle(permutation_buf, word_count);
118 for(idx = 0; idx < word_count; idx++) {
119 kmu_random_delay(&KMU_DEV_S, KMU_DELAY_LIMIT_32_CYCLES);
120 dst[permutation_buf[idx]] = src[permutation_buf[idx]];
121 }
122 }
123