1 /*
2 * Multi-precision integer library
3 * ESP32 hardware accelerated parts based on mbedTLS implementation
4 *
5 * SPDX-FileCopyrightText: The Mbed TLS Contributors
6 *
7 * SPDX-License-Identifier: Apache-2.0
8 *
9 * SPDX-FileContributor: 2016-2022 Espressif Systems (Shanghai) CO LTD
10 */
11 #include "soc/hwcrypto_periph.h"
12 #include "soc/dport_reg.h"
13 #include "esp_private/periph_ctrl.h"
14 #include <mbedtls/bignum.h>
15 #include "bignum_impl.h"
16 #include <sys/param.h>
17 #include <sys/lock.h>
18
19 static _lock_t mpi_lock;
20
21 /* Round up number of words to nearest
22 512 bit (16 word) block count.
23 */
esp_mpi_hardware_words(size_t words)24 size_t esp_mpi_hardware_words(size_t words)
25 {
26 return (words + 0xF) & ~0xF;
27 }
28
esp_mpi_enable_hardware_hw_op(void)29 void esp_mpi_enable_hardware_hw_op( void )
30 {
31 /* newlib locks lazy initialize on ESP-IDF */
32 _lock_acquire(&mpi_lock);
33
34 /* Enable RSA hardware */
35 periph_module_enable(PERIPH_RSA_MODULE);
36 DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
37
38 while (DPORT_REG_READ(RSA_CLEAN_REG) != 1)
39 { }
40 // Note: from enabling RSA clock to here takes about 1.3us
41 }
42
esp_mpi_disable_hardware_hw_op(void)43 void esp_mpi_disable_hardware_hw_op( void )
44 {
45 DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
46
47 /* Disable RSA hardware */
48 periph_module_disable(PERIPH_RSA_MODULE);
49
50 _lock_release(&mpi_lock);
51 }
52
53
esp_mpi_interrupt_enable(bool enable)54 void esp_mpi_interrupt_enable( bool enable )
55 {
56 DPORT_REG_WRITE(RSA_INTERRUPT_REG, enable);
57 }
58
esp_mpi_interrupt_clear(void)59 void esp_mpi_interrupt_clear( void )
60 {
61 DPORT_REG_WRITE(RSA_CLEAR_INTERRUPT_REG, 1);
62 }
63
64 /* Copy mbedTLS MPI bignum 'mpi' to hardware memory block at 'mem_base'.
65
66 If hw_words is higher than the number of words in the bignum then
67 these additional words will be zeroed in the memory buffer.
68
69 */
70
71 /* Please see detailed note inside the function body below.
72 * Relevant: IDF-6029
73 https://github.com/espressif/esp-idf/issues/8710
74 https://github.com/espressif/esp-idf/issues/10403
75 */
mpi_to_mem_block(uint32_t mem_base,const mbedtls_mpi * mpi,size_t hw_words)76 static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t hw_words)
77 {
78 uint32_t copy_words = MIN(hw_words, mpi->MBEDTLS_PRIVATE(n));
79
80 /* Copy MPI data to memory block registers */
81 for (uint32_t i = 0; i < copy_words; i++) {
82 DPORT_REG_WRITE(mem_base + i * 4, mpi->MBEDTLS_PRIVATE(p[i]));
83 }
84
85 /* Zero any remaining memory block data */
86 for (uint32_t i = copy_words; i < hw_words; i++) {
87 DPORT_REG_WRITE(mem_base + i * 4, 0);
88 }
89
90 #if _INTERNAL_DEBUG_PURPOSE
91 /*
92 * With Xtensa GCC 11.2.0 (from ESP-IDF v5.x), it was observed that above zero initialization
93 * loop gets optimized to `memset` call from the ROM library. This was causing an issue that
94 * specific write (store) operation to the MPI peripheral block was getting lost erroneously.
95 * Following data re-verify loop could catch it during runtime.
96 *
97 * As a workaround, we are using DPORT_WRITE_REG (volatile writes) wrappers to write to
98 * the MPI peripheral.
99 *
100 */
101
102 //for (uint32_t i = copy_words; i < hw_words; i++) { assert(pbase[i] == 0); }
103 #endif
104 }
105
106 /* Read mbedTLS MPI bignum back from hardware memory block.
107
108 Reads num_words words from block.
109
110 Bignum 'x' should already be grown to at least num_words by caller (can be done while
111 calculation is in progress, to save some cycles)
112 */
mem_block_to_mpi(mbedtls_mpi * x,uint32_t mem_base,size_t num_words)113 static inline void mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, size_t num_words)
114 {
115 assert(x->MBEDTLS_PRIVATE(n) >= num_words);
116
117 /* Copy data from memory block registers */
118 esp_dport_access_read_buffer(x->MBEDTLS_PRIVATE(p), mem_base, num_words);
119
120 /* Zero any remaining limbs in the bignum, if the buffer is bigger
121 than num_words */
122 for (size_t i = num_words; i < x->MBEDTLS_PRIVATE(n); i++) {
123 x->MBEDTLS_PRIVATE(p[i]) = 0;
124 }
125 }
126
127
128 /* Begin an RSA operation. op_reg specifies which 'START' register
129 to write to.
130 */
start_op(uint32_t op_reg)131 static inline void start_op(uint32_t op_reg)
132 {
133 /* Clear interrupt status */
134 DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
135
136 /* Note: above REG_WRITE includes a memw, so we know any writes
137 to the memory blocks are also complete. */
138
139 DPORT_REG_WRITE(op_reg, 1);
140 }
141
142 /* Wait for an RSA operation to complete.
143 */
wait_op_complete(void)144 static inline void wait_op_complete(void)
145 {
146 while (DPORT_REG_READ(RSA_INTERRUPT_REG) != 1)
147 { }
148
149 /* clear the interrupt */
150 DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
151 }
152
153 /* Read result from last MPI operation */
esp_mpi_read_result_hw_op(mbedtls_mpi * Z,size_t z_words)154 void esp_mpi_read_result_hw_op(mbedtls_mpi *Z, size_t z_words)
155 {
156 wait_op_complete();
157 mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, z_words);
158 }
159
160 /* Z = (X * Y) mod M */
esp_mpi_mul_mpi_mod_hw_op(const mbedtls_mpi * X,const mbedtls_mpi * Y,const mbedtls_mpi * M,const mbedtls_mpi * Rinv,mbedtls_mpi_uint Mprime,size_t hw_words)161 void esp_mpi_mul_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t hw_words)
162 {
163 /* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
164 mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
165 mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
166 mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, hw_words);
167 DPORT_REG_WRITE(RSA_M_DASH_REG, (uint32_t)Mprime);
168
169 /* "mode" register loaded with number of 512-bit blocks, minus 1 */
170 DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hw_words / 16) - 1);
171
172 /* Execute first stage montgomery multiplication */
173 start_op(RSA_MULT_START_REG);
174
175 wait_op_complete();
176
177 /* execute second stage */
178 /* Load Y to X input memory block, rerun */
179 mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, hw_words);
180
181 start_op(RSA_MULT_START_REG);
182 }
183
184 /* Z = X * Y */
esp_mpi_mul_mpi_hw_op(const mbedtls_mpi * X,const mbedtls_mpi * Y,size_t hw_words)185 void esp_mpi_mul_mpi_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t hw_words)
186 {
187 /* Copy X (right-extended) & Y (left-extended) to memory block */
188 mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
189 mpi_to_mem_block(RSA_MEM_Z_BLOCK_BASE + hw_words * 4, Y, hw_words);
190 /* NB: as Y is left-extended, we don't zero the bottom words_mult words of Y block.
191 This is OK for now because zeroing is done by hardware when we do esp_mpi_acquire_hardware().
192 */
193
194 DPORT_REG_WRITE(RSA_M_DASH_REG, 0);
195
196 /* "mode" register loaded with number of 512-bit blocks in result,
197 plus 7 (for range 9-12). (this is ((N~ / 32) - 1) + 8))
198 */
199 DPORT_REG_WRITE(RSA_MULT_MODE_REG, ((hw_words * 2) / 16) + 7);
200
201 start_op(RSA_MULT_START_REG);
202
203 }
204
205
esp_mont_hw_op(mbedtls_mpi * Z,const mbedtls_mpi * X,const mbedtls_mpi * Y,const mbedtls_mpi * M,mbedtls_mpi_uint Mprime,size_t hw_words,bool again)206 int esp_mont_hw_op(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M,
207 mbedtls_mpi_uint Mprime,
208 size_t hw_words,
209 bool again)
210 {
211 // Note Z may be the same pointer as X or Y
212 int ret = 0;
213
214 // montgomery mult prepare
215 if (again == false) {
216 mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
217 DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
218 DPORT_REG_WRITE(RSA_MULT_MODE_REG, hw_words / 16 - 1);
219 }
220
221 mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
222 mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Y, hw_words);
223
224 start_op(RSA_MULT_START_REG);
225 Z->MBEDTLS_PRIVATE(s) = 1; // The sign of Z will be = M->s (but M->s is always 1)
226 MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, hw_words) );
227
228 wait_op_complete();
229
230 /* Read back the result */
231 mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, hw_words);
232
233
234 /* from HAC 14.36 - 3. If Z >= M then Z = Z - M */
235 if (mbedtls_mpi_cmp_mpi(Z, M) >= 0) {
236 MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(Z, Z, M));
237 }
238 cleanup:
239 return ret;
240 }
241
242
243
244 /* Special-case of mbedtls_mpi_mult_mpi(), where we use hardware montgomery mod
245 multiplication to calculate an mbedtls_mpi_mult_mpi result where either
246 A or B are >2048 bits so can't use the standard multiplication method.
247
248 Result (z_words, based on A bits + B bits) must still be less than 4096 bits.
249
250 This case is simpler than the general case modulo multiply of
251 esp_mpi_mul_mpi_mod() because we can control the other arguments:
252
253 * Modulus is chosen with M=(2^num_bits - 1) (ie M=R-1), so output
254 isn't actually modulo anything.
255 * Mprime and Rinv are therefore predictable as follows:
256 Mprime = 1
257 Rinv = 1
258
259 (See RSA Accelerator section in Technical Reference for more about Mprime, Rinv)
260 */
esp_mpi_mult_mpi_failover_mod_mult_hw_op(const mbedtls_mpi * X,const mbedtls_mpi * Y,size_t num_words)261 void esp_mpi_mult_mpi_failover_mod_mult_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
262 {
263 size_t hw_words = num_words;
264
265 /* M = 2^num_words - 1, so block is entirely FF */
266 for (size_t i = 0; i < hw_words; i++) {
267 DPORT_REG_WRITE(RSA_MEM_M_BLOCK_BASE + i * 4, UINT32_MAX);
268 }
269 /* Mprime = 1 */
270 DPORT_REG_WRITE(RSA_M_DASH_REG, 1);
271
272 /* "mode" register loaded with number of 512-bit blocks, minus 1 */
273 DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hw_words / 16) - 1);
274
275 /* Load X */
276 mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
277
278 /* Rinv = 1, write first word */
279 DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE, 1);
280
281 /* Zero out rest of the Rinv words */
282 for (size_t i = 1; i < hw_words; i++) {
283 DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
284 }
285
286 start_op(RSA_MULT_START_REG);
287
288 wait_op_complete();
289
290 /* finish the modular multiplication */
291 /* Load Y to X input memory block, rerun */
292 mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, hw_words);
293
294 start_op(RSA_MULT_START_REG);
295
296 }
297