1 /**
2  * \file poly1305.c
3  *
4  * \brief Poly1305 authentication algorithm.
5  *
6  *  Copyright The Mbed TLS Contributors
7  *  SPDX-License-Identifier: Apache-2.0
8  *
9  *  Licensed under the Apache License, Version 2.0 (the "License"); you may
10  *  not use this file except in compliance with the License.
11  *  You may obtain a copy of the License at
12  *
13  *  http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *  Unless required by applicable law or agreed to in writing, software
16  *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17  *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  *  See the License for the specific language governing permissions and
19  *  limitations under the License.
20  */
21 #include "common.h"
22 
23 #if defined(MBEDTLS_POLY1305_C)
24 
25 #include "mbedtls/poly1305.h"
26 #include "mbedtls/platform_util.h"
27 #include "mbedtls/error.h"
28 
29 #include <string.h>
30 
31 #include "mbedtls/platform.h"
32 
33 #if !defined(MBEDTLS_POLY1305_ALT)
34 
35 #define POLY1305_BLOCK_SIZE_BYTES ( 16U )
36 
37 /*
38  * Our implementation is tuned for 32-bit platforms with a 64-bit multiplier.
39  * However we provided an alternative for platforms without such a multiplier.
40  */
41 #if defined(MBEDTLS_NO_64BIT_MULTIPLICATION)
mul64(uint32_t a,uint32_t b)42 static uint64_t mul64( uint32_t a, uint32_t b )
43 {
44     /* a = al + 2**16 ah, b = bl + 2**16 bh */
45     const uint16_t al = (uint16_t) a;
46     const uint16_t bl = (uint16_t) b;
47     const uint16_t ah = a >> 16;
48     const uint16_t bh = b >> 16;
49 
50     /* ab = al*bl + 2**16 (ah*bl + bl*bh) + 2**32 ah*bh */
51     const uint32_t lo = (uint32_t) al * bl;
52     const uint64_t me = (uint64_t)( (uint32_t) ah * bl ) + (uint32_t) al * bh;
53     const uint32_t hi = (uint32_t) ah * bh;
54 
55     return( lo + ( me << 16 ) + ( (uint64_t) hi << 32 ) );
56 }
57 #else
mul64(uint32_t a,uint32_t b)58 static inline uint64_t mul64( uint32_t a, uint32_t b )
59 {
60     return( (uint64_t) a * b );
61 }
62 #endif
63 
64 
65 /**
66  * \brief                   Process blocks with Poly1305.
67  *
68  * \param ctx               The Poly1305 context.
69  * \param nblocks           Number of blocks to process. Note that this
70  *                          function only processes full blocks.
71  * \param input             Buffer containing the input block(s).
72  * \param needs_padding     Set to 0 if the padding bit has already been
73  *                          applied to the input data before calling this
74  *                          function.  Otherwise, set this parameter to 1.
75  */
poly1305_process(mbedtls_poly1305_context * ctx,size_t nblocks,const unsigned char * input,uint32_t needs_padding)76 static void poly1305_process( mbedtls_poly1305_context *ctx,
77                               size_t nblocks,
78                               const unsigned char *input,
79                               uint32_t needs_padding )
80 {
81     uint64_t d0, d1, d2, d3;
82     uint32_t acc0, acc1, acc2, acc3, acc4;
83     uint32_t r0, r1, r2, r3;
84     uint32_t rs1, rs2, rs3;
85     size_t offset  = 0U;
86     size_t i;
87 
88     r0 = ctx->r[0];
89     r1 = ctx->r[1];
90     r2 = ctx->r[2];
91     r3 = ctx->r[3];
92 
93     rs1 = r1 + ( r1 >> 2U );
94     rs2 = r2 + ( r2 >> 2U );
95     rs3 = r3 + ( r3 >> 2U );
96 
97     acc0 = ctx->acc[0];
98     acc1 = ctx->acc[1];
99     acc2 = ctx->acc[2];
100     acc3 = ctx->acc[3];
101     acc4 = ctx->acc[4];
102 
103     /* Process full blocks */
104     for( i = 0U; i < nblocks; i++ )
105     {
106         /* The input block is treated as a 128-bit little-endian integer */
107         d0   = MBEDTLS_GET_UINT32_LE( input, offset + 0  );
108         d1   = MBEDTLS_GET_UINT32_LE( input, offset + 4  );
109         d2   = MBEDTLS_GET_UINT32_LE( input, offset + 8  );
110         d3   = MBEDTLS_GET_UINT32_LE( input, offset + 12 );
111 
112         /* Compute: acc += (padded) block as a 130-bit integer */
113         d0  += (uint64_t) acc0;
114         d1  += (uint64_t) acc1 + ( d0 >> 32U );
115         d2  += (uint64_t) acc2 + ( d1 >> 32U );
116         d3  += (uint64_t) acc3 + ( d2 >> 32U );
117         acc0 = (uint32_t) d0;
118         acc1 = (uint32_t) d1;
119         acc2 = (uint32_t) d2;
120         acc3 = (uint32_t) d3;
121         acc4 += (uint32_t) ( d3 >> 32U ) + needs_padding;
122 
123         /* Compute: acc *= r */
124         d0 = mul64( acc0, r0  ) +
125              mul64( acc1, rs3 ) +
126              mul64( acc2, rs2 ) +
127              mul64( acc3, rs1 );
128         d1 = mul64( acc0, r1  ) +
129              mul64( acc1, r0  ) +
130              mul64( acc2, rs3 ) +
131              mul64( acc3, rs2 ) +
132              mul64( acc4, rs1 );
133         d2 = mul64( acc0, r2  ) +
134              mul64( acc1, r1  ) +
135              mul64( acc2, r0  ) +
136              mul64( acc3, rs3 ) +
137              mul64( acc4, rs2 );
138         d3 = mul64( acc0, r3  ) +
139              mul64( acc1, r2  ) +
140              mul64( acc2, r1  ) +
141              mul64( acc3, r0  ) +
142              mul64( acc4, rs3 );
143         acc4 *= r0;
144 
145         /* Compute: acc %= (2^130 - 5) (partial remainder) */
146         d1 += ( d0 >> 32 );
147         d2 += ( d1 >> 32 );
148         d3 += ( d2 >> 32 );
149         acc0 = (uint32_t) d0;
150         acc1 = (uint32_t) d1;
151         acc2 = (uint32_t) d2;
152         acc3 = (uint32_t) d3;
153         acc4 = (uint32_t) ( d3 >> 32 ) + acc4;
154 
155         d0 = (uint64_t) acc0 + ( acc4 >> 2 ) + ( acc4 & 0xFFFFFFFCU );
156         acc4 &= 3U;
157         acc0 = (uint32_t) d0;
158         d0 = (uint64_t) acc1 + ( d0 >> 32U );
159         acc1 = (uint32_t) d0;
160         d0 = (uint64_t) acc2 + ( d0 >> 32U );
161         acc2 = (uint32_t) d0;
162         d0 = (uint64_t) acc3 + ( d0 >> 32U );
163         acc3 = (uint32_t) d0;
164         d0 = (uint64_t) acc4 + ( d0 >> 32U );
165         acc4 = (uint32_t) d0;
166 
167         offset    += POLY1305_BLOCK_SIZE_BYTES;
168     }
169 
170     ctx->acc[0] = acc0;
171     ctx->acc[1] = acc1;
172     ctx->acc[2] = acc2;
173     ctx->acc[3] = acc3;
174     ctx->acc[4] = acc4;
175 }
176 
177 /**
178  * \brief                   Compute the Poly1305 MAC
179  *
180  * \param ctx               The Poly1305 context.
181  * \param mac               The buffer to where the MAC is written. Must be
182  *                          big enough to contain the 16-byte MAC.
183  */
poly1305_compute_mac(const mbedtls_poly1305_context * ctx,unsigned char mac[16])184 static void poly1305_compute_mac( const mbedtls_poly1305_context *ctx,
185                                   unsigned char mac[16] )
186 {
187     uint64_t d;
188     uint32_t g0, g1, g2, g3, g4;
189     uint32_t acc0, acc1, acc2, acc3, acc4;
190     uint32_t mask;
191     uint32_t mask_inv;
192 
193     acc0 = ctx->acc[0];
194     acc1 = ctx->acc[1];
195     acc2 = ctx->acc[2];
196     acc3 = ctx->acc[3];
197     acc4 = ctx->acc[4];
198 
199     /* Before adding 's' we ensure that the accumulator is mod 2^130 - 5.
200      * We do this by calculating acc - (2^130 - 5), then checking if
201      * the 131st bit is set. If it is, then reduce: acc -= (2^130 - 5)
202      */
203 
204     /* Calculate acc + -(2^130 - 5) */
205     d  = ( (uint64_t) acc0 + 5U );
206     g0 = (uint32_t) d;
207     d  = ( (uint64_t) acc1 + ( d >> 32 ) );
208     g1 = (uint32_t) d;
209     d  = ( (uint64_t) acc2 + ( d >> 32 ) );
210     g2 = (uint32_t) d;
211     d  = ( (uint64_t) acc3 + ( d >> 32 ) );
212     g3 = (uint32_t) d;
213     g4 = acc4 + (uint32_t) ( d >> 32U );
214 
215     /* mask == 0xFFFFFFFF if 131st bit is set, otherwise mask == 0 */
216     mask = (uint32_t) 0U - ( g4 >> 2U );
217     mask_inv = ~mask;
218 
219     /* If 131st bit is set then acc=g, otherwise, acc is unmodified */
220     acc0 = ( acc0 & mask_inv ) | ( g0 & mask );
221     acc1 = ( acc1 & mask_inv ) | ( g1 & mask );
222     acc2 = ( acc2 & mask_inv ) | ( g2 & mask );
223     acc3 = ( acc3 & mask_inv ) | ( g3 & mask );
224 
225     /* Add 's' */
226     d = (uint64_t) acc0 + ctx->s[0];
227     acc0 = (uint32_t) d;
228     d = (uint64_t) acc1 + ctx->s[1] + ( d >> 32U );
229     acc1 = (uint32_t) d;
230     d = (uint64_t) acc2 + ctx->s[2] + ( d >> 32U );
231     acc2 = (uint32_t) d;
232     acc3 += ctx->s[3] + (uint32_t) ( d >> 32U );
233 
234     /* Compute MAC (128 least significant bits of the accumulator) */
235     MBEDTLS_PUT_UINT32_LE( acc0, mac,  0 );
236     MBEDTLS_PUT_UINT32_LE( acc1, mac,  4 );
237     MBEDTLS_PUT_UINT32_LE( acc2, mac,  8 );
238     MBEDTLS_PUT_UINT32_LE( acc3, mac, 12 );
239 }
240 
mbedtls_poly1305_init(mbedtls_poly1305_context * ctx)241 void mbedtls_poly1305_init( mbedtls_poly1305_context *ctx )
242 {
243     mbedtls_platform_zeroize( ctx, sizeof( mbedtls_poly1305_context ) );
244 }
245 
mbedtls_poly1305_free(mbedtls_poly1305_context * ctx)246 void mbedtls_poly1305_free( mbedtls_poly1305_context *ctx )
247 {
248     if( ctx == NULL )
249         return;
250 
251     mbedtls_platform_zeroize( ctx, sizeof( mbedtls_poly1305_context ) );
252 }
253 
mbedtls_poly1305_starts(mbedtls_poly1305_context * ctx,const unsigned char key[32])254 int mbedtls_poly1305_starts( mbedtls_poly1305_context *ctx,
255                              const unsigned char key[32] )
256 {
257     /* r &= 0x0ffffffc0ffffffc0ffffffc0fffffff */
258     ctx->r[0] = MBEDTLS_GET_UINT32_LE( key, 0 )  & 0x0FFFFFFFU;
259     ctx->r[1] = MBEDTLS_GET_UINT32_LE( key, 4 )  & 0x0FFFFFFCU;
260     ctx->r[2] = MBEDTLS_GET_UINT32_LE( key, 8 )  & 0x0FFFFFFCU;
261     ctx->r[3] = MBEDTLS_GET_UINT32_LE( key, 12 ) & 0x0FFFFFFCU;
262 
263     ctx->s[0] = MBEDTLS_GET_UINT32_LE( key, 16 );
264     ctx->s[1] = MBEDTLS_GET_UINT32_LE( key, 20 );
265     ctx->s[2] = MBEDTLS_GET_UINT32_LE( key, 24 );
266     ctx->s[3] = MBEDTLS_GET_UINT32_LE( key, 28 );
267 
268     /* Initial accumulator state */
269     ctx->acc[0] = 0U;
270     ctx->acc[1] = 0U;
271     ctx->acc[2] = 0U;
272     ctx->acc[3] = 0U;
273     ctx->acc[4] = 0U;
274 
275     /* Queue initially empty */
276     mbedtls_platform_zeroize( ctx->queue, sizeof( ctx->queue ) );
277     ctx->queue_len = 0U;
278 
279     return( 0 );
280 }
281 
mbedtls_poly1305_update(mbedtls_poly1305_context * ctx,const unsigned char * input,size_t ilen)282 int mbedtls_poly1305_update( mbedtls_poly1305_context *ctx,
283                              const unsigned char *input,
284                              size_t ilen )
285 {
286     size_t offset    = 0U;
287     size_t remaining = ilen;
288     size_t queue_free_len;
289     size_t nblocks;
290 
291     if( ( remaining > 0U ) && ( ctx->queue_len > 0U ) )
292     {
293         queue_free_len = ( POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len );
294 
295         if( ilen < queue_free_len )
296         {
297             /* Not enough data to complete the block.
298              * Store this data with the other leftovers.
299              */
300             memcpy( &ctx->queue[ctx->queue_len],
301                     input,
302                     ilen );
303 
304             ctx->queue_len += ilen;
305 
306             remaining = 0U;
307         }
308         else
309         {
310             /* Enough data to produce a complete block */
311             memcpy( &ctx->queue[ctx->queue_len],
312                     input,
313                     queue_free_len );
314 
315             ctx->queue_len = 0U;
316 
317             poly1305_process( ctx, 1U, ctx->queue, 1U ); /* add padding bit */
318 
319             offset    += queue_free_len;
320             remaining -= queue_free_len;
321         }
322     }
323 
324     if( remaining >= POLY1305_BLOCK_SIZE_BYTES )
325     {
326         nblocks = remaining / POLY1305_BLOCK_SIZE_BYTES;
327 
328         poly1305_process( ctx, nblocks, &input[offset], 1U );
329 
330         offset += nblocks * POLY1305_BLOCK_SIZE_BYTES;
331         remaining %= POLY1305_BLOCK_SIZE_BYTES;
332     }
333 
334     if( remaining > 0U )
335     {
336         /* Store partial block */
337         ctx->queue_len = remaining;
338         memcpy( ctx->queue, &input[offset], remaining );
339     }
340 
341     return( 0 );
342 }
343 
mbedtls_poly1305_finish(mbedtls_poly1305_context * ctx,unsigned char mac[16])344 int mbedtls_poly1305_finish( mbedtls_poly1305_context *ctx,
345                              unsigned char mac[16] )
346 {
347     /* Process any leftover data */
348     if( ctx->queue_len > 0U )
349     {
350         /* Add padding bit */
351         ctx->queue[ctx->queue_len] = 1U;
352         ctx->queue_len++;
353 
354         /* Pad with zeroes */
355         memset( &ctx->queue[ctx->queue_len],
356                 0,
357                 POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len );
358 
359         poly1305_process( ctx, 1U,          /* Process 1 block */
360                           ctx->queue, 0U ); /* Already padded above */
361     }
362 
363     poly1305_compute_mac( ctx, mac );
364 
365     return( 0 );
366 }
367 
mbedtls_poly1305_mac(const unsigned char key[32],const unsigned char * input,size_t ilen,unsigned char mac[16])368 int mbedtls_poly1305_mac( const unsigned char key[32],
369                           const unsigned char *input,
370                           size_t ilen,
371                           unsigned char mac[16] )
372 {
373     mbedtls_poly1305_context ctx;
374     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
375 
376     mbedtls_poly1305_init( &ctx );
377 
378     ret = mbedtls_poly1305_starts( &ctx, key );
379     if( ret != 0 )
380         goto cleanup;
381 
382     ret = mbedtls_poly1305_update( &ctx, input, ilen );
383     if( ret != 0 )
384         goto cleanup;
385 
386     ret = mbedtls_poly1305_finish( &ctx, mac );
387 
388 cleanup:
389     mbedtls_poly1305_free( &ctx );
390     return( ret );
391 }
392 
393 #endif /* MBEDTLS_POLY1305_ALT */
394 
395 #if defined(MBEDTLS_SELF_TEST)
396 
397 static const unsigned char test_keys[2][32] =
398 {
399     {
400         0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
401         0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
402         0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
403         0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
404     },
405     {
406         0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
407         0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
408         0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
409         0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
410     }
411 };
412 
413 static const unsigned char test_data[2][127] =
414 {
415     {
416         0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72,
417         0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f,
418         0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65,
419         0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f,
420         0x75, 0x70
421     },
422     {
423         0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72,
424         0x69, 0x6c, 0x6c, 0x69, 0x67, 0x2c, 0x20, 0x61,
425         0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73,
426         0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f,
427         0x76, 0x65, 0x73, 0x0a, 0x44, 0x69, 0x64, 0x20,
428         0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64,
429         0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20,
430         0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77,
431         0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c,
432         0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77,
433         0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20,
434         0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65,
435         0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74,
436         0x68, 0x65, 0x20, 0x6d, 0x6f, 0x6d, 0x65, 0x20,
437         0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75,
438         0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e
439     }
440 };
441 
442 static const size_t test_data_len[2] =
443 {
444     34U,
445     127U
446 };
447 
448 static const unsigned char test_mac[2][16] =
449 {
450     {
451         0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
452         0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
453     },
454     {
455         0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61,
456         0xe7, 0x08, 0xdc, 0x7c, 0xbc, 0xc5, 0xeb, 0x62
457     }
458 };
459 
460 /* Make sure no other definition is already present. */
461 #undef ASSERT
462 
463 #define ASSERT( cond, args )            \
464     do                                  \
465     {                                   \
466         if( ! ( cond ) )                \
467         {                               \
468             if( verbose != 0 )          \
469                 mbedtls_printf args;    \
470                                         \
471             return( -1 );               \
472         }                               \
473     }                                   \
474     while( 0 )
475 
mbedtls_poly1305_self_test(int verbose)476 int mbedtls_poly1305_self_test( int verbose )
477 {
478     unsigned char mac[16];
479     unsigned i;
480     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
481 
482     for( i = 0U; i < 2U; i++ )
483     {
484         if( verbose != 0 )
485             mbedtls_printf( "  Poly1305 test %u ", i );
486 
487         ret = mbedtls_poly1305_mac( test_keys[i],
488                                     test_data[i],
489                                     test_data_len[i],
490                                     mac );
491         ASSERT( 0 == ret, ( "error code: %i\n", ret ) );
492 
493         ASSERT( 0 == memcmp( mac, test_mac[i], 16U ), ( "failed (mac)\n" ) );
494 
495         if( verbose != 0 )
496             mbedtls_printf( "passed\n" );
497     }
498 
499     if( verbose != 0 )
500         mbedtls_printf( "\n" );
501 
502     return( 0 );
503 }
504 
505 #endif /* MBEDTLS_SELF_TEST */
506 
507 #endif /* MBEDTLS_POLY1305_C */
508