1 /*
2  *  Core bignum functions
3  *
4  *  Copyright The Mbed TLS Contributors
5  *  SPDX-License-Identifier: Apache-2.0
6  *
7  *  Licensed under the Apache License, Version 2.0 (the "License"); you may
8  *  not use this file except in compliance with the License.
9  *  You may obtain a copy of the License at
10  *
11  *  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  *  Unless required by applicable law or agreed to in writing, software
14  *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15  *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  *  See the License for the specific language governing permissions and
17  *  limitations under the License.
18  */
19 
20 #include "common.h"
21 
22 #if defined(MBEDTLS_BIGNUM_C)
23 
24 #include <string.h>
25 
26 #include "mbedtls/error.h"
27 #include "mbedtls/platform_util.h"
28 #include "constant_time_internal.h"
29 
30 #include "mbedtls/platform.h"
31 
32 #include "bignum_core.h"
33 #include "bn_mul.h"
34 #include "constant_time_internal.h"
35 
mbedtls_mpi_core_clz(mbedtls_mpi_uint a)36 size_t mbedtls_mpi_core_clz( mbedtls_mpi_uint a )
37 {
38     size_t j;
39     mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1);
40 
41     for( j = 0; j < biL; j++ )
42     {
43         if( a & mask ) break;
44 
45         mask >>= 1;
46     }
47 
48     return( j );
49 }
50 
mbedtls_mpi_core_bitlen(const mbedtls_mpi_uint * A,size_t A_limbs)51 size_t mbedtls_mpi_core_bitlen( const mbedtls_mpi_uint *A, size_t A_limbs )
52 {
53     size_t i, j;
54 
55     if( A_limbs == 0 )
56         return( 0 );
57 
58     for( i = A_limbs - 1; i > 0; i-- )
59         if( A[i] != 0 )
60             break;
61 
62     j = biL - mbedtls_mpi_core_clz( A[i] );
63 
64     return( ( i * biL ) + j );
65 }
66 
67 /* Convert a big-endian byte array aligned to the size of mbedtls_mpi_uint
68  * into the storage form used by mbedtls_mpi. */
mpi_bigendian_to_host_c(mbedtls_mpi_uint a)69 static mbedtls_mpi_uint mpi_bigendian_to_host_c( mbedtls_mpi_uint a )
70 {
71     uint8_t i;
72     unsigned char *a_ptr;
73     mbedtls_mpi_uint tmp = 0;
74 
75     for( i = 0, a_ptr = (unsigned char *) &a; i < ciL; i++, a_ptr++ )
76     {
77         tmp <<= CHAR_BIT;
78         tmp |= (mbedtls_mpi_uint) *a_ptr;
79     }
80 
81     return( tmp );
82 }
83 
mpi_bigendian_to_host(mbedtls_mpi_uint a)84 static mbedtls_mpi_uint mpi_bigendian_to_host( mbedtls_mpi_uint a )
85 {
86 #if defined(__BYTE_ORDER__)
87 
88 /* Nothing to do on bigendian systems. */
89 #if ( __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )
90     return( a );
91 #endif /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */
92 
93 #if ( __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ )
94 
95 /* For GCC and Clang, have builtins for byte swapping. */
96 #if defined(__GNUC__) && defined(__GNUC_PREREQ)
97 #if __GNUC_PREREQ(4,3)
98 #define have_bswap
99 #endif
100 #endif
101 
102 #if defined(__clang__) && defined(__has_builtin)
103 #if __has_builtin(__builtin_bswap32)  &&                 \
104     __has_builtin(__builtin_bswap64)
105 #define have_bswap
106 #endif
107 #endif
108 
109 #if defined(have_bswap)
110     /* The compiler is hopefully able to statically evaluate this! */
111     switch( sizeof(mbedtls_mpi_uint) )
112     {
113         case 4:
114             return( __builtin_bswap32(a) );
115         case 8:
116             return( __builtin_bswap64(a) );
117     }
118 #endif
119 #endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
120 #endif /* __BYTE_ORDER__ */
121 
122     /* Fall back to C-based reordering if we don't know the byte order
123      * or we couldn't use a compiler-specific builtin. */
124     return( mpi_bigendian_to_host_c( a ) );
125 }
126 
mbedtls_mpi_core_bigendian_to_host(mbedtls_mpi_uint * A,size_t A_limbs)127 void mbedtls_mpi_core_bigendian_to_host( mbedtls_mpi_uint *A,
128                                          size_t A_limbs )
129 {
130     mbedtls_mpi_uint *cur_limb_left;
131     mbedtls_mpi_uint *cur_limb_right;
132     if( A_limbs == 0 )
133         return;
134 
135     /*
136      * Traverse limbs and
137      * - adapt byte-order in each limb
138      * - swap the limbs themselves.
139      * For that, simultaneously traverse the limbs from left to right
140      * and from right to left, as long as the left index is not bigger
141      * than the right index (it's not a problem if limbs is odd and the
142      * indices coincide in the last iteration).
143      */
144     for( cur_limb_left = A, cur_limb_right = A + ( A_limbs - 1 );
145          cur_limb_left <= cur_limb_right;
146          cur_limb_left++, cur_limb_right-- )
147     {
148         mbedtls_mpi_uint tmp;
149         /* Note that if cur_limb_left == cur_limb_right,
150          * this code effectively swaps the bytes only once. */
151         tmp             = mpi_bigendian_to_host( *cur_limb_left );
152         *cur_limb_left  = mpi_bigendian_to_host( *cur_limb_right );
153         *cur_limb_right = tmp;
154     }
155 }
156 
mbedtls_mpi_core_cond_assign(mbedtls_mpi_uint * X,const mbedtls_mpi_uint * A,size_t limbs,unsigned char assign)157 void mbedtls_mpi_core_cond_assign( mbedtls_mpi_uint *X,
158                                    const mbedtls_mpi_uint *A,
159                                    size_t limbs,
160                                    unsigned char assign )
161 {
162     if( X == A )
163         return;
164 
165     mbedtls_ct_mpi_uint_cond_assign( limbs, X, A, assign );
166 }
167 
mbedtls_mpi_core_cond_swap(mbedtls_mpi_uint * X,mbedtls_mpi_uint * Y,size_t limbs,unsigned char swap)168 void mbedtls_mpi_core_cond_swap( mbedtls_mpi_uint *X,
169                                  mbedtls_mpi_uint *Y,
170                                  size_t limbs,
171                                  unsigned char swap )
172 {
173     if( X == Y )
174         return;
175 
176     /* all-bits 1 if swap is 1, all-bits 0 if swap is 0 */
177     mbedtls_mpi_uint limb_mask = mbedtls_ct_mpi_uint_mask( swap );
178 
179     for( size_t i = 0; i < limbs; i++ )
180     {
181         mbedtls_mpi_uint tmp = X[i];
182         X[i] = ( X[i] & ~limb_mask ) | ( Y[i] & limb_mask );
183         Y[i] = ( Y[i] & ~limb_mask ) | (  tmp & limb_mask );
184     }
185 }
186 
mbedtls_mpi_core_read_le(mbedtls_mpi_uint * X,size_t X_limbs,const unsigned char * input,size_t input_length)187 int mbedtls_mpi_core_read_le( mbedtls_mpi_uint *X,
188                               size_t X_limbs,
189                               const unsigned char *input,
190                               size_t input_length )
191 {
192     const size_t limbs = CHARS_TO_LIMBS( input_length );
193 
194     if( X_limbs < limbs )
195         return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
196 
197     if( X != NULL )
198     {
199         memset( X, 0, X_limbs * ciL );
200 
201         for( size_t i = 0; i < input_length; i++ )
202         {
203             size_t offset = ( ( i % ciL ) << 3 );
204             X[i / ciL] |= ( (mbedtls_mpi_uint) input[i] ) << offset;
205         }
206     }
207 
208     return( 0 );
209 }
210 
mbedtls_mpi_core_read_be(mbedtls_mpi_uint * X,size_t X_limbs,const unsigned char * input,size_t input_length)211 int mbedtls_mpi_core_read_be( mbedtls_mpi_uint *X,
212                               size_t X_limbs,
213                               const unsigned char *input,
214                               size_t input_length )
215 {
216     const size_t limbs = CHARS_TO_LIMBS( input_length );
217 
218     if( X_limbs < limbs )
219         return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
220 
221     /* If X_limbs is 0, input_length must also be 0 (from previous test).
222      * Nothing to do. */
223     if( X_limbs == 0 )
224         return( 0 );
225 
226     memset( X, 0, X_limbs * ciL );
227 
228     /* memcpy() with (NULL, 0) is undefined behaviour */
229     if( input_length != 0 )
230     {
231         size_t overhead = ( X_limbs * ciL ) - input_length;
232         unsigned char *Xp = (unsigned char *) X;
233         memcpy( Xp + overhead, input, input_length );
234     }
235 
236     mbedtls_mpi_core_bigendian_to_host( X, X_limbs );
237 
238     return( 0 );
239 }
240 
mbedtls_mpi_core_write_le(const mbedtls_mpi_uint * A,size_t A_limbs,unsigned char * output,size_t output_length)241 int mbedtls_mpi_core_write_le( const mbedtls_mpi_uint *A,
242                                size_t A_limbs,
243                                unsigned char *output,
244                                size_t output_length )
245 {
246     size_t stored_bytes = A_limbs * ciL;
247     size_t bytes_to_copy;
248 
249     if( stored_bytes < output_length )
250     {
251         bytes_to_copy = stored_bytes;
252     }
253     else
254     {
255         bytes_to_copy = output_length;
256 
257         /* The output buffer is smaller than the allocated size of A.
258          * However A may fit if its leading bytes are zero. */
259         for( size_t i = bytes_to_copy; i < stored_bytes; i++ )
260         {
261             if( GET_BYTE( A, i ) != 0 )
262                 return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
263         }
264     }
265 
266     for( size_t i = 0; i < bytes_to_copy; i++ )
267         output[i] = GET_BYTE( A, i );
268 
269     if( stored_bytes < output_length )
270     {
271         /* Write trailing 0 bytes */
272         memset( output + stored_bytes, 0, output_length - stored_bytes );
273     }
274 
275     return( 0 );
276 }
277 
mbedtls_mpi_core_write_be(const mbedtls_mpi_uint * X,size_t X_limbs,unsigned char * output,size_t output_length)278 int mbedtls_mpi_core_write_be( const mbedtls_mpi_uint *X,
279                                size_t X_limbs,
280                                unsigned char *output,
281                                size_t output_length )
282 {
283     size_t stored_bytes;
284     size_t bytes_to_copy;
285     unsigned char *p;
286 
287     stored_bytes = X_limbs * ciL;
288 
289     if( stored_bytes < output_length )
290     {
291         /* There is enough space in the output buffer. Write initial
292          * null bytes and record the position at which to start
293          * writing the significant bytes. In this case, the execution
294          * trace of this function does not depend on the value of the
295          * number. */
296         bytes_to_copy = stored_bytes;
297         p = output + output_length - stored_bytes;
298         memset( output, 0, output_length - stored_bytes );
299     }
300     else
301     {
302         /* The output buffer is smaller than the allocated size of X.
303          * However X may fit if its leading bytes are zero. */
304         bytes_to_copy = output_length;
305         p = output;
306         for( size_t i = bytes_to_copy; i < stored_bytes; i++ )
307         {
308             if( GET_BYTE( X, i ) != 0 )
309                 return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
310         }
311     }
312 
313     for( size_t i = 0; i < bytes_to_copy; i++ )
314         p[bytes_to_copy - i - 1] = GET_BYTE( X, i );
315 
316     return( 0 );
317 }
318 
mbedtls_mpi_core_shift_r(mbedtls_mpi_uint * X,size_t limbs,size_t count)319 void mbedtls_mpi_core_shift_r( mbedtls_mpi_uint *X, size_t limbs,
320                                size_t count )
321 {
322     size_t i, v0, v1;
323     mbedtls_mpi_uint r0 = 0, r1;
324 
325     v0 = count /  biL;
326     v1 = count & (biL - 1);
327 
328     if( v0 > limbs || ( v0 == limbs && v1 > 0 ) )
329     {
330         memset( X, 0, limbs * ciL );
331         return;
332     }
333 
334     /*
335      * shift by count / limb_size
336      */
337     if( v0 > 0 )
338     {
339         for( i = 0; i < limbs - v0; i++ )
340             X[i] = X[i + v0];
341 
342         for( ; i < limbs; i++ )
343             X[i] = 0;
344     }
345 
346     /*
347      * shift by count % limb_size
348      */
349     if( v1 > 0 )
350     {
351         for( i = limbs; i > 0; i-- )
352         {
353             r1 = X[i - 1] << (biL - v1);
354             X[i - 1] >>= v1;
355             X[i - 1] |= r0;
356             r0 = r1;
357         }
358     }
359 }
360 
mbedtls_mpi_core_add(mbedtls_mpi_uint * X,const mbedtls_mpi_uint * A,const mbedtls_mpi_uint * B,size_t limbs)361 mbedtls_mpi_uint mbedtls_mpi_core_add( mbedtls_mpi_uint *X,
362                                        const mbedtls_mpi_uint *A,
363                                        const mbedtls_mpi_uint *B,
364                                        size_t limbs )
365 {
366     mbedtls_mpi_uint c = 0;
367 
368     for( size_t i = 0; i < limbs; i++ )
369     {
370         mbedtls_mpi_uint t = c + A[i];
371         c = ( t < A[i] );
372         t += B[i];
373         c += ( t < B[i] );
374         X[i] = t;
375     }
376 
377     return( c );
378 }
379 
mbedtls_mpi_core_add_if(mbedtls_mpi_uint * X,const mbedtls_mpi_uint * A,size_t limbs,unsigned cond)380 mbedtls_mpi_uint mbedtls_mpi_core_add_if( mbedtls_mpi_uint *X,
381                                           const mbedtls_mpi_uint *A,
382                                           size_t limbs,
383                                           unsigned cond )
384 {
385     mbedtls_mpi_uint c = 0;
386 
387     /* all-bits 0 if cond is 0, all-bits 1 if cond is non-0 */
388     const mbedtls_mpi_uint mask = mbedtls_ct_mpi_uint_mask( cond );
389 
390     for( size_t i = 0; i < limbs; i++ )
391     {
392         mbedtls_mpi_uint add = mask & A[i];
393         mbedtls_mpi_uint t = c + X[i];
394         c = ( t < X[i] );
395         t += add;
396         c += ( t < add );
397         X[i] = t;
398     }
399 
400     return( c );
401 }
402 
mbedtls_mpi_core_sub(mbedtls_mpi_uint * X,const mbedtls_mpi_uint * A,const mbedtls_mpi_uint * B,size_t limbs)403 mbedtls_mpi_uint mbedtls_mpi_core_sub( mbedtls_mpi_uint *X,
404                                        const mbedtls_mpi_uint *A,
405                                        const mbedtls_mpi_uint *B,
406                                        size_t limbs )
407 {
408     mbedtls_mpi_uint c = 0;
409 
410     for( size_t i = 0; i < limbs; i++ )
411     {
412         mbedtls_mpi_uint z = ( A[i] < c );
413         mbedtls_mpi_uint t = A[i] - c;
414         c = ( t < B[i] ) + z;
415         X[i] = t - B[i];
416     }
417 
418     return( c );
419 }
420 
mbedtls_mpi_core_mla(mbedtls_mpi_uint * d,size_t d_len,const mbedtls_mpi_uint * s,size_t s_len,mbedtls_mpi_uint b)421 mbedtls_mpi_uint mbedtls_mpi_core_mla( mbedtls_mpi_uint *d, size_t d_len,
422                                        const mbedtls_mpi_uint *s, size_t s_len,
423                                        mbedtls_mpi_uint b )
424 {
425     mbedtls_mpi_uint c = 0; /* carry */
426     /*
427      * It is a documented precondition of this function that d_len >= s_len.
428      * If that's not the case, we swap these round: this turns what would be
429      * a buffer overflow into an incorrect result.
430      */
431     if( d_len < s_len )
432         s_len = d_len;
433     size_t excess_len = d_len - s_len;
434     size_t steps_x8 = s_len / 8;
435     size_t steps_x1 = s_len & 7;
436 
437     while( steps_x8-- )
438     {
439         MULADDC_X8_INIT
440         MULADDC_X8_CORE
441         MULADDC_X8_STOP
442     }
443 
444     while( steps_x1-- )
445     {
446         MULADDC_X1_INIT
447         MULADDC_X1_CORE
448         MULADDC_X1_STOP
449     }
450 
451     while( excess_len-- )
452     {
453         *d += c;
454         c = ( *d < c );
455         d++;
456     }
457 
458     return( c );
459 }
460 
461 /*
462  * Fast Montgomery initialization (thanks to Tom St Denis).
463  */
mbedtls_mpi_core_montmul_init(const mbedtls_mpi_uint * N)464 mbedtls_mpi_uint mbedtls_mpi_core_montmul_init( const mbedtls_mpi_uint *N )
465 {
466     mbedtls_mpi_uint x = N[0];
467 
468     x += ( ( N[0] + 2 ) & 4 ) << 1;
469 
470     for( unsigned int i = biL; i >= 8; i /= 2 )
471         x *= ( 2 - ( N[0] * x ) );
472 
473     return( ~x + 1 );
474 }
475 
mbedtls_mpi_core_montmul(mbedtls_mpi_uint * X,const mbedtls_mpi_uint * A,const mbedtls_mpi_uint * B,size_t B_limbs,const mbedtls_mpi_uint * N,size_t AN_limbs,mbedtls_mpi_uint mm,mbedtls_mpi_uint * T)476 void mbedtls_mpi_core_montmul( mbedtls_mpi_uint *X,
477                                const mbedtls_mpi_uint *A,
478                                const mbedtls_mpi_uint *B,
479                                size_t B_limbs,
480                                const mbedtls_mpi_uint *N,
481                                size_t AN_limbs,
482                                mbedtls_mpi_uint mm,
483                                mbedtls_mpi_uint *T )
484 {
485     memset( T, 0, ( 2 * AN_limbs + 1 ) * ciL );
486 
487     for( size_t i = 0; i < AN_limbs; i++ )
488     {
489         /* T = (T + u0*B + u1*N) / 2^biL */
490         mbedtls_mpi_uint u0 = A[i];
491         mbedtls_mpi_uint u1 = ( T[0] + u0 * B[0] ) * mm;
492 
493         (void) mbedtls_mpi_core_mla( T, AN_limbs + 2, B, B_limbs, u0 );
494         (void) mbedtls_mpi_core_mla( T, AN_limbs + 2, N, AN_limbs, u1 );
495 
496         T++;
497     }
498 
499     /*
500      * The result we want is (T >= N) ? T - N : T.
501      *
502      * For better constant-time properties in this function, we always do the
503      * subtraction, with the result in X.
504      *
505      * We also look to see if there was any carry in the final additions in the
506      * loop above.
507      */
508 
509     mbedtls_mpi_uint carry  = T[AN_limbs];
510     mbedtls_mpi_uint borrow = mbedtls_mpi_core_sub( X, T, N, AN_limbs );
511 
512     /*
513      * Using R as the Montgomery radix (auxiliary modulus) i.e. 2^(biL*AN_limbs):
514      *
515      * T can be in one of 3 ranges:
516      *
517      * 1) T < N      : (carry, borrow) = (0, 1): we want T
518      * 2) N <= T < R : (carry, borrow) = (0, 0): we want X
519      * 3) T >= R     : (carry, borrow) = (1, 1): we want X
520      *
521      * and (carry, borrow) = (1, 0) can't happen.
522      *
523      * So the correct return value is already in X if (carry ^ borrow) = 0,
524      * but is in (the lower AN_limbs limbs of) T if (carry ^ borrow) = 1.
525      */
526     mbedtls_ct_mpi_uint_cond_assign( AN_limbs, X, T, (unsigned char) ( carry ^ borrow ) );
527 }
528 
mbedtls_mpi_core_get_mont_r2_unsafe(mbedtls_mpi * X,const mbedtls_mpi * N)529 int mbedtls_mpi_core_get_mont_r2_unsafe( mbedtls_mpi *X,
530                                          const mbedtls_mpi *N )
531 {
532     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
533 
534     MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 1 ) );
535     MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( X, N->n * 2 * biL ) );
536     MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( X, X, N ) );
537     MBEDTLS_MPI_CHK( mbedtls_mpi_shrink( X, N->n ) );
538 
539 cleanup:
540     return( ret );
541 }
542 
543 MBEDTLS_STATIC_TESTABLE
mbedtls_mpi_core_ct_uint_table_lookup(mbedtls_mpi_uint * dest,const mbedtls_mpi_uint * table,size_t limbs,size_t count,size_t index)544 void mbedtls_mpi_core_ct_uint_table_lookup( mbedtls_mpi_uint *dest,
545                                             const mbedtls_mpi_uint *table,
546                                             size_t limbs,
547                                             size_t count,
548                                             size_t index )
549 {
550     for( size_t i = 0; i < count; i++, table += limbs )
551     {
552         unsigned char assign = mbedtls_ct_size_bool_eq( i, index );
553         mbedtls_mpi_core_cond_assign( dest, table, limbs, assign );
554     }
555 }
556 
557 /* Fill X with n_bytes random bytes.
558  * X must already have room for those bytes.
559  * The ordering of the bytes returned from the RNG is suitable for
560  * deterministic ECDSA (see RFC 6979 §3.3 and the specification of
561  * mbedtls_mpi_core_random()).
562  */
mbedtls_mpi_core_fill_random(mbedtls_mpi_uint * X,size_t X_limbs,size_t n_bytes,int (* f_rng)(void *,unsigned char *,size_t),void * p_rng)563 int mbedtls_mpi_core_fill_random(
564     mbedtls_mpi_uint *X, size_t X_limbs,
565     size_t n_bytes,
566     int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
567 {
568     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
569     const size_t limbs = CHARS_TO_LIMBS( n_bytes );
570     const size_t overhead = ( limbs * ciL ) - n_bytes;
571 
572     if( X_limbs < limbs )
573         return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
574 
575     memset( X, 0, overhead );
576     memset( (unsigned char *) X + limbs * ciL, 0, ( X_limbs - limbs ) * ciL );
577     MBEDTLS_MPI_CHK( f_rng( p_rng, (unsigned char *) X + overhead, n_bytes ) );
578     mbedtls_mpi_core_bigendian_to_host( X, limbs );
579 
580 cleanup:
581     return( ret );
582 }
583 
584 /* BEGIN MERGE SLOT 1 */
585 
exp_mod_get_window_size(size_t Ebits)586 static size_t exp_mod_get_window_size( size_t Ebits )
587 {
588     size_t wsize = ( Ebits > 671 ) ? 6 : ( Ebits > 239 ) ? 5 :
589                    ( Ebits >  79 ) ? 4 : 1;
590 
591 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
592     if( wsize > MBEDTLS_MPI_WINDOW_SIZE )
593         wsize = MBEDTLS_MPI_WINDOW_SIZE;
594 #endif
595 
596     return( wsize );
597 }
598 
mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs,size_t E_limbs)599 size_t mbedtls_mpi_core_exp_mod_working_limbs( size_t AN_limbs, size_t E_limbs )
600 {
601     const size_t wsize = exp_mod_get_window_size( E_limbs * biL );
602     const size_t welem = ( (size_t) 1 ) << wsize;
603 
604     /* How big does each part of the working memory pool need to be? */
605     const size_t table_limbs   = welem * AN_limbs;
606     const size_t select_limbs  = AN_limbs;
607     const size_t temp_limbs    = 2 * AN_limbs + 1;
608 
609     return( table_limbs + select_limbs + temp_limbs );
610 }
611 
exp_mod_precompute_window(const mbedtls_mpi_uint * A,const mbedtls_mpi_uint * N,size_t AN_limbs,mbedtls_mpi_uint mm,const mbedtls_mpi_uint * RR,size_t welem,mbedtls_mpi_uint * Wtable,mbedtls_mpi_uint * temp)612 static void exp_mod_precompute_window( const mbedtls_mpi_uint *A,
613                                        const mbedtls_mpi_uint *N,
614                                        size_t AN_limbs,
615                                        mbedtls_mpi_uint mm,
616                                        const mbedtls_mpi_uint *RR,
617                                        size_t welem,
618                                        mbedtls_mpi_uint *Wtable,
619                                        mbedtls_mpi_uint *temp )
620 {
621     /* W[0] = 1 (in Montgomery presentation) */
622     memset( Wtable, 0, AN_limbs * ciL );
623     Wtable[0] = 1;
624     mbedtls_mpi_core_montmul( Wtable, Wtable, RR, AN_limbs, N, AN_limbs, mm, temp );
625 
626     /* W[1] = A (already in Montgomery presentation) */
627     mbedtls_mpi_uint *W1 = Wtable + AN_limbs;
628     memcpy( W1, A, AN_limbs * ciL );
629 
630     /* W[i+1] = W[i] * W[1], i >= 2 */
631     mbedtls_mpi_uint *Wprev = W1;
632     for( size_t i = 2; i < welem; i++ )
633     {
634         mbedtls_mpi_uint *Wcur = Wprev + AN_limbs;
635         mbedtls_mpi_core_montmul( Wcur, Wprev, W1, AN_limbs, N, AN_limbs, mm, temp );
636         Wprev = Wcur;
637     }
638 }
639 
640 /* Exponentiation: X := A^E mod N.
641  *
642  * A must already be in Montgomery form.
643  *
644  * As in other bignum functions, assume that AN_limbs and E_limbs are nonzero.
645  *
646  * RR must contain 2^{2*biL} mod N.
647  *
648  * The algorithm is a variant of Left-to-right k-ary exponentiation: HAC 14.82
649  * (The difference is that the body in our loop processes a single bit instead
650  * of a full window.)
651  */
mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint * X,const mbedtls_mpi_uint * A,const mbedtls_mpi_uint * N,size_t AN_limbs,const mbedtls_mpi_uint * E,size_t E_limbs,const mbedtls_mpi_uint * RR,mbedtls_mpi_uint * T)652 void mbedtls_mpi_core_exp_mod( mbedtls_mpi_uint *X,
653                                const mbedtls_mpi_uint *A,
654                                const mbedtls_mpi_uint *N,
655                                size_t AN_limbs,
656                                const mbedtls_mpi_uint *E,
657                                size_t E_limbs,
658                                const mbedtls_mpi_uint *RR,
659                                mbedtls_mpi_uint *T )
660 {
661     const size_t wsize = exp_mod_get_window_size( E_limbs * biL );
662     const size_t welem = ( (size_t) 1 ) << wsize;
663 
664     /* This is how we will use the temporary storage T, which must have space
665      * for table_limbs, select_limbs and (2 * AN_limbs + 1) for montmul. */
666     const size_t table_limbs  = welem * AN_limbs;
667     const size_t select_limbs = AN_limbs;
668 
669     /* Pointers to specific parts of the temporary working memory pool */
670     mbedtls_mpi_uint *const Wtable  = T;
671     mbedtls_mpi_uint *const Wselect = Wtable  +  table_limbs;
672     mbedtls_mpi_uint *const temp    = Wselect + select_limbs;
673 
674     /*
675      * Window precomputation
676      */
677 
678     const mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init( N );
679 
680     /* Set Wtable[i] = A^(2^i) (in Montgomery representation) */
681     exp_mod_precompute_window( A, N, AN_limbs,
682                                mm, RR,
683                                welem, Wtable, temp );
684 
685     /*
686      * Fixed window exponentiation
687      */
688 
689     /* X = 1 (in Montgomery presentation) initially */
690     memcpy( X, Wtable, AN_limbs * ciL );
691 
692     /* We'll process the bits of E from most significant
693      * (limb_index=E_limbs-1, E_bit_index=biL-1) to least significant
694      * (limb_index=0, E_bit_index=0). */
695     size_t E_limb_index = E_limbs;
696     size_t E_bit_index = 0;
697     /* At any given time, window contains window_bits bits from E.
698      * window_bits can go up to wsize. */
699     size_t window_bits = 0;
700     mbedtls_mpi_uint window = 0;
701 
702     do
703     {
704         /* Square */
705         mbedtls_mpi_core_montmul( X, X, X, AN_limbs, N, AN_limbs, mm, temp );
706 
707         /* Move to the next bit of the exponent */
708         if( E_bit_index == 0 )
709         {
710             --E_limb_index;
711             E_bit_index = biL - 1;
712         }
713         else
714         {
715             --E_bit_index;
716         }
717         /* Insert next exponent bit into window */
718         ++window_bits;
719         window <<= 1;
720         window |= ( E[E_limb_index] >> E_bit_index ) & 1;
721 
722         /* Clear window if it's full. Also clear the window at the end,
723          * when we've finished processing the exponent. */
724         if( window_bits == wsize ||
725             ( E_bit_index == 0 && E_limb_index == 0 ) )
726         {
727             /* Select Wtable[window] without leaking window through
728              * memory access patterns. */
729             mbedtls_mpi_core_ct_uint_table_lookup( Wselect, Wtable,
730                                                    AN_limbs, welem, window );
731             /* Multiply X by the selected element. */
732             mbedtls_mpi_core_montmul( X, X, Wselect, AN_limbs, N, AN_limbs, mm,
733                                       temp );
734             window = 0;
735             window_bits = 0;
736         }
737     }
738     while( ! ( E_bit_index == 0 && E_limb_index == 0 ) );
739 }
740 
741 /* END MERGE SLOT 1 */
742 
743 /* BEGIN MERGE SLOT 2 */
744 
745 /* END MERGE SLOT 2 */
746 
747 /* BEGIN MERGE SLOT 3 */
748 
mbedtls_mpi_core_sub_int(mbedtls_mpi_uint * X,const mbedtls_mpi_uint * A,mbedtls_mpi_uint c,size_t limbs)749 mbedtls_mpi_uint mbedtls_mpi_core_sub_int( mbedtls_mpi_uint *X,
750                                            const mbedtls_mpi_uint *A,
751                                            mbedtls_mpi_uint c, /* doubles as carry */
752                                            size_t limbs )
753 {
754     for( size_t i = 0; i < limbs; i++ )
755     {
756         mbedtls_mpi_uint s = A[i];
757         mbedtls_mpi_uint t = s - c;
758         c = ( t > s );
759         X[i] = t;
760     }
761 
762     return( c );
763 }
764 
765 /* END MERGE SLOT 3 */
766 
767 /* BEGIN MERGE SLOT 4 */
768 
769 /* END MERGE SLOT 4 */
770 
771 /* BEGIN MERGE SLOT 5 */
772 
773 /* END MERGE SLOT 5 */
774 
775 /* BEGIN MERGE SLOT 6 */
776 
777 /* END MERGE SLOT 6 */
778 
779 /* BEGIN MERGE SLOT 7 */
780 
781 /* END MERGE SLOT 7 */
782 
783 /* BEGIN MERGE SLOT 8 */
784 
785 /* END MERGE SLOT 8 */
786 
787 /* BEGIN MERGE SLOT 9 */
788 
789 /* END MERGE SLOT 9 */
790 
791 /* BEGIN MERGE SLOT 10 */
792 
793 /* END MERGE SLOT 10 */
794 
795 #endif /* MBEDTLS_BIGNUM_C */
796