1 /**
2  * \file poly1305.c
3  *
4  * \brief Poly1305 authentication algorithm.
5  *
6  *  Copyright The Mbed TLS Contributors
7  *  SPDX-License-Identifier: Apache-2.0
8  *
9  *  Licensed under the Apache License, Version 2.0 (the "License"); you may
10  *  not use this file except in compliance with the License.
11  *  You may obtain a copy of the License at
12  *
13  *  http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *  Unless required by applicable law or agreed to in writing, software
16  *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17  *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  *  See the License for the specific language governing permissions and
19  *  limitations under the License.
20  */
21 #include "common.h"
22 
23 #if defined(MBEDTLS_POLY1305_C)
24 
25 #include "mbedtls/poly1305.h"
26 #include "mbedtls/platform_util.h"
27 #include "mbedtls/error.h"
28 
29 #include <string.h>
30 
31 #include "mbedtls/platform.h"
32 
33 #if !defined(MBEDTLS_POLY1305_ALT)
34 
35 #define POLY1305_BLOCK_SIZE_BYTES (16U)
36 
37 /*
38  * Our implementation is tuned for 32-bit platforms with a 64-bit multiplier.
39  * However we provided an alternative for platforms without such a multiplier.
40  */
41 #if defined(MBEDTLS_NO_64BIT_MULTIPLICATION)
mul64(uint32_t a,uint32_t b)42 static uint64_t mul64(uint32_t a, uint32_t b)
43 {
44     /* a = al + 2**16 ah, b = bl + 2**16 bh */
45     const uint16_t al = (uint16_t) a;
46     const uint16_t bl = (uint16_t) b;
47     const uint16_t ah = a >> 16;
48     const uint16_t bh = b >> 16;
49 
50     /* ab = al*bl + 2**16 (ah*bl + bl*bh) + 2**32 ah*bh */
51     const uint32_t lo = (uint32_t) al * bl;
52     const uint64_t me = (uint64_t) ((uint32_t) ah * bl) + (uint32_t) al * bh;
53     const uint32_t hi = (uint32_t) ah * bh;
54 
55     return lo + (me << 16) + ((uint64_t) hi << 32);
56 }
57 #else
mul64(uint32_t a,uint32_t b)58 static inline uint64_t mul64(uint32_t a, uint32_t b)
59 {
60     return (uint64_t) a * b;
61 }
62 #endif
63 
64 
65 /**
66  * \brief                   Process blocks with Poly1305.
67  *
68  * \param ctx               The Poly1305 context.
69  * \param nblocks           Number of blocks to process. Note that this
70  *                          function only processes full blocks.
71  * \param input             Buffer containing the input block(s).
72  * \param needs_padding     Set to 0 if the padding bit has already been
73  *                          applied to the input data before calling this
74  *                          function.  Otherwise, set this parameter to 1.
75  */
poly1305_process(mbedtls_poly1305_context * ctx,size_t nblocks,const unsigned char * input,uint32_t needs_padding)76 static void poly1305_process(mbedtls_poly1305_context *ctx,
77                              size_t nblocks,
78                              const unsigned char *input,
79                              uint32_t needs_padding)
80 {
81     uint64_t d0, d1, d2, d3;
82     uint32_t acc0, acc1, acc2, acc3, acc4;
83     uint32_t r0, r1, r2, r3;
84     uint32_t rs1, rs2, rs3;
85     size_t offset  = 0U;
86     size_t i;
87 
88     r0 = ctx->r[0];
89     r1 = ctx->r[1];
90     r2 = ctx->r[2];
91     r3 = ctx->r[3];
92 
93     rs1 = r1 + (r1 >> 2U);
94     rs2 = r2 + (r2 >> 2U);
95     rs3 = r3 + (r3 >> 2U);
96 
97     acc0 = ctx->acc[0];
98     acc1 = ctx->acc[1];
99     acc2 = ctx->acc[2];
100     acc3 = ctx->acc[3];
101     acc4 = ctx->acc[4];
102 
103     /* Process full blocks */
104     for (i = 0U; i < nblocks; i++) {
105         /* The input block is treated as a 128-bit little-endian integer */
106         d0   = MBEDTLS_GET_UINT32_LE(input, offset + 0);
107         d1   = MBEDTLS_GET_UINT32_LE(input, offset + 4);
108         d2   = MBEDTLS_GET_UINT32_LE(input, offset + 8);
109         d3   = MBEDTLS_GET_UINT32_LE(input, offset + 12);
110 
111         /* Compute: acc += (padded) block as a 130-bit integer */
112         d0  += (uint64_t) acc0;
113         d1  += (uint64_t) acc1 + (d0 >> 32U);
114         d2  += (uint64_t) acc2 + (d1 >> 32U);
115         d3  += (uint64_t) acc3 + (d2 >> 32U);
116         acc0 = (uint32_t) d0;
117         acc1 = (uint32_t) d1;
118         acc2 = (uint32_t) d2;
119         acc3 = (uint32_t) d3;
120         acc4 += (uint32_t) (d3 >> 32U) + needs_padding;
121 
122         /* Compute: acc *= r */
123         d0 = mul64(acc0, r0) +
124              mul64(acc1, rs3) +
125              mul64(acc2, rs2) +
126              mul64(acc3, rs1);
127         d1 = mul64(acc0, r1) +
128              mul64(acc1, r0) +
129              mul64(acc2, rs3) +
130              mul64(acc3, rs2) +
131              mul64(acc4, rs1);
132         d2 = mul64(acc0, r2) +
133              mul64(acc1, r1) +
134              mul64(acc2, r0) +
135              mul64(acc3, rs3) +
136              mul64(acc4, rs2);
137         d3 = mul64(acc0, r3) +
138              mul64(acc1, r2) +
139              mul64(acc2, r1) +
140              mul64(acc3, r0) +
141              mul64(acc4, rs3);
142         acc4 *= r0;
143 
144         /* Compute: acc %= (2^130 - 5) (partial remainder) */
145         d1 += (d0 >> 32);
146         d2 += (d1 >> 32);
147         d3 += (d2 >> 32);
148         acc0 = (uint32_t) d0;
149         acc1 = (uint32_t) d1;
150         acc2 = (uint32_t) d2;
151         acc3 = (uint32_t) d3;
152         acc4 = (uint32_t) (d3 >> 32) + acc4;
153 
154         d0 = (uint64_t) acc0 + (acc4 >> 2) + (acc4 & 0xFFFFFFFCU);
155         acc4 &= 3U;
156         acc0 = (uint32_t) d0;
157         d0 = (uint64_t) acc1 + (d0 >> 32U);
158         acc1 = (uint32_t) d0;
159         d0 = (uint64_t) acc2 + (d0 >> 32U);
160         acc2 = (uint32_t) d0;
161         d0 = (uint64_t) acc3 + (d0 >> 32U);
162         acc3 = (uint32_t) d0;
163         d0 = (uint64_t) acc4 + (d0 >> 32U);
164         acc4 = (uint32_t) d0;
165 
166         offset    += POLY1305_BLOCK_SIZE_BYTES;
167     }
168 
169     ctx->acc[0] = acc0;
170     ctx->acc[1] = acc1;
171     ctx->acc[2] = acc2;
172     ctx->acc[3] = acc3;
173     ctx->acc[4] = acc4;
174 }
175 
176 /**
177  * \brief                   Compute the Poly1305 MAC
178  *
179  * \param ctx               The Poly1305 context.
180  * \param mac               The buffer to where the MAC is written. Must be
181  *                          big enough to contain the 16-byte MAC.
182  */
poly1305_compute_mac(const mbedtls_poly1305_context * ctx,unsigned char mac[16])183 static void poly1305_compute_mac(const mbedtls_poly1305_context *ctx,
184                                  unsigned char mac[16])
185 {
186     uint64_t d;
187     uint32_t g0, g1, g2, g3, g4;
188     uint32_t acc0, acc1, acc2, acc3, acc4;
189     uint32_t mask;
190     uint32_t mask_inv;
191 
192     acc0 = ctx->acc[0];
193     acc1 = ctx->acc[1];
194     acc2 = ctx->acc[2];
195     acc3 = ctx->acc[3];
196     acc4 = ctx->acc[4];
197 
198     /* Before adding 's' we ensure that the accumulator is mod 2^130 - 5.
199      * We do this by calculating acc - (2^130 - 5), then checking if
200      * the 131st bit is set. If it is, then reduce: acc -= (2^130 - 5)
201      */
202 
203     /* Calculate acc + -(2^130 - 5) */
204     d  = ((uint64_t) acc0 + 5U);
205     g0 = (uint32_t) d;
206     d  = ((uint64_t) acc1 + (d >> 32));
207     g1 = (uint32_t) d;
208     d  = ((uint64_t) acc2 + (d >> 32));
209     g2 = (uint32_t) d;
210     d  = ((uint64_t) acc3 + (d >> 32));
211     g3 = (uint32_t) d;
212     g4 = acc4 + (uint32_t) (d >> 32U);
213 
214     /* mask == 0xFFFFFFFF if 131st bit is set, otherwise mask == 0 */
215     mask = (uint32_t) 0U - (g4 >> 2U);
216     mask_inv = ~mask;
217 
218     /* If 131st bit is set then acc=g, otherwise, acc is unmodified */
219     acc0 = (acc0 & mask_inv) | (g0 & mask);
220     acc1 = (acc1 & mask_inv) | (g1 & mask);
221     acc2 = (acc2 & mask_inv) | (g2 & mask);
222     acc3 = (acc3 & mask_inv) | (g3 & mask);
223 
224     /* Add 's' */
225     d = (uint64_t) acc0 + ctx->s[0];
226     acc0 = (uint32_t) d;
227     d = (uint64_t) acc1 + ctx->s[1] + (d >> 32U);
228     acc1 = (uint32_t) d;
229     d = (uint64_t) acc2 + ctx->s[2] + (d >> 32U);
230     acc2 = (uint32_t) d;
231     acc3 += ctx->s[3] + (uint32_t) (d >> 32U);
232 
233     /* Compute MAC (128 least significant bits of the accumulator) */
234     MBEDTLS_PUT_UINT32_LE(acc0, mac,  0);
235     MBEDTLS_PUT_UINT32_LE(acc1, mac,  4);
236     MBEDTLS_PUT_UINT32_LE(acc2, mac,  8);
237     MBEDTLS_PUT_UINT32_LE(acc3, mac, 12);
238 }
239 
mbedtls_poly1305_init(mbedtls_poly1305_context * ctx)240 void mbedtls_poly1305_init(mbedtls_poly1305_context *ctx)
241 {
242     mbedtls_platform_zeroize(ctx, sizeof(mbedtls_poly1305_context));
243 }
244 
mbedtls_poly1305_free(mbedtls_poly1305_context * ctx)245 void mbedtls_poly1305_free(mbedtls_poly1305_context *ctx)
246 {
247     if (ctx == NULL) {
248         return;
249     }
250 
251     mbedtls_platform_zeroize(ctx, sizeof(mbedtls_poly1305_context));
252 }
253 
mbedtls_poly1305_starts(mbedtls_poly1305_context * ctx,const unsigned char key[32])254 int mbedtls_poly1305_starts(mbedtls_poly1305_context *ctx,
255                             const unsigned char key[32])
256 {
257     /* r &= 0x0ffffffc0ffffffc0ffffffc0fffffff */
258     ctx->r[0] = MBEDTLS_GET_UINT32_LE(key, 0)  & 0x0FFFFFFFU;
259     ctx->r[1] = MBEDTLS_GET_UINT32_LE(key, 4)  & 0x0FFFFFFCU;
260     ctx->r[2] = MBEDTLS_GET_UINT32_LE(key, 8)  & 0x0FFFFFFCU;
261     ctx->r[3] = MBEDTLS_GET_UINT32_LE(key, 12) & 0x0FFFFFFCU;
262 
263     ctx->s[0] = MBEDTLS_GET_UINT32_LE(key, 16);
264     ctx->s[1] = MBEDTLS_GET_UINT32_LE(key, 20);
265     ctx->s[2] = MBEDTLS_GET_UINT32_LE(key, 24);
266     ctx->s[3] = MBEDTLS_GET_UINT32_LE(key, 28);
267 
268     /* Initial accumulator state */
269     ctx->acc[0] = 0U;
270     ctx->acc[1] = 0U;
271     ctx->acc[2] = 0U;
272     ctx->acc[3] = 0U;
273     ctx->acc[4] = 0U;
274 
275     /* Queue initially empty */
276     mbedtls_platform_zeroize(ctx->queue, sizeof(ctx->queue));
277     ctx->queue_len = 0U;
278 
279     return 0;
280 }
281 
mbedtls_poly1305_update(mbedtls_poly1305_context * ctx,const unsigned char * input,size_t ilen)282 int mbedtls_poly1305_update(mbedtls_poly1305_context *ctx,
283                             const unsigned char *input,
284                             size_t ilen)
285 {
286     size_t offset    = 0U;
287     size_t remaining = ilen;
288     size_t queue_free_len;
289     size_t nblocks;
290 
291     if ((remaining > 0U) && (ctx->queue_len > 0U)) {
292         queue_free_len = (POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len);
293 
294         if (ilen < queue_free_len) {
295             /* Not enough data to complete the block.
296              * Store this data with the other leftovers.
297              */
298             memcpy(&ctx->queue[ctx->queue_len],
299                    input,
300                    ilen);
301 
302             ctx->queue_len += ilen;
303 
304             remaining = 0U;
305         } else {
306             /* Enough data to produce a complete block */
307             memcpy(&ctx->queue[ctx->queue_len],
308                    input,
309                    queue_free_len);
310 
311             ctx->queue_len = 0U;
312 
313             poly1305_process(ctx, 1U, ctx->queue, 1U);   /* add padding bit */
314 
315             offset    += queue_free_len;
316             remaining -= queue_free_len;
317         }
318     }
319 
320     if (remaining >= POLY1305_BLOCK_SIZE_BYTES) {
321         nblocks = remaining / POLY1305_BLOCK_SIZE_BYTES;
322 
323         poly1305_process(ctx, nblocks, &input[offset], 1U);
324 
325         offset += nblocks * POLY1305_BLOCK_SIZE_BYTES;
326         remaining %= POLY1305_BLOCK_SIZE_BYTES;
327     }
328 
329     if (remaining > 0U) {
330         /* Store partial block */
331         ctx->queue_len = remaining;
332         memcpy(ctx->queue, &input[offset], remaining);
333     }
334 
335     return 0;
336 }
337 
mbedtls_poly1305_finish(mbedtls_poly1305_context * ctx,unsigned char mac[16])338 int mbedtls_poly1305_finish(mbedtls_poly1305_context *ctx,
339                             unsigned char mac[16])
340 {
341     /* Process any leftover data */
342     if (ctx->queue_len > 0U) {
343         /* Add padding bit */
344         ctx->queue[ctx->queue_len] = 1U;
345         ctx->queue_len++;
346 
347         /* Pad with zeroes */
348         memset(&ctx->queue[ctx->queue_len],
349                0,
350                POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len);
351 
352         poly1305_process(ctx, 1U,           /* Process 1 block */
353                          ctx->queue, 0U);   /* Already padded above */
354     }
355 
356     poly1305_compute_mac(ctx, mac);
357 
358     return 0;
359 }
360 
mbedtls_poly1305_mac(const unsigned char key[32],const unsigned char * input,size_t ilen,unsigned char mac[16])361 int mbedtls_poly1305_mac(const unsigned char key[32],
362                          const unsigned char *input,
363                          size_t ilen,
364                          unsigned char mac[16])
365 {
366     mbedtls_poly1305_context ctx;
367     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
368 
369     mbedtls_poly1305_init(&ctx);
370 
371     ret = mbedtls_poly1305_starts(&ctx, key);
372     if (ret != 0) {
373         goto cleanup;
374     }
375 
376     ret = mbedtls_poly1305_update(&ctx, input, ilen);
377     if (ret != 0) {
378         goto cleanup;
379     }
380 
381     ret = mbedtls_poly1305_finish(&ctx, mac);
382 
383 cleanup:
384     mbedtls_poly1305_free(&ctx);
385     return ret;
386 }
387 
388 #endif /* MBEDTLS_POLY1305_ALT */
389 
390 #if defined(MBEDTLS_SELF_TEST)
391 
392 static const unsigned char test_keys[2][32] =
393 {
394     {
395         0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
396         0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
397         0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
398         0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
399     },
400     {
401         0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
402         0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
403         0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
404         0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
405     }
406 };
407 
408 static const unsigned char test_data[2][127] =
409 {
410     {
411         0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72,
412         0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f,
413         0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65,
414         0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f,
415         0x75, 0x70
416     },
417     {
418         0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72,
419         0x69, 0x6c, 0x6c, 0x69, 0x67, 0x2c, 0x20, 0x61,
420         0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73,
421         0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f,
422         0x76, 0x65, 0x73, 0x0a, 0x44, 0x69, 0x64, 0x20,
423         0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64,
424         0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20,
425         0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77,
426         0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c,
427         0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77,
428         0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20,
429         0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65,
430         0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74,
431         0x68, 0x65, 0x20, 0x6d, 0x6f, 0x6d, 0x65, 0x20,
432         0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75,
433         0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e
434     }
435 };
436 
437 static const size_t test_data_len[2] =
438 {
439     34U,
440     127U
441 };
442 
443 static const unsigned char test_mac[2][16] =
444 {
445     {
446         0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
447         0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
448     },
449     {
450         0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61,
451         0xe7, 0x08, 0xdc, 0x7c, 0xbc, 0xc5, 0xeb, 0x62
452     }
453 };
454 
455 /* Make sure no other definition is already present. */
456 #undef ASSERT
457 
458 #define ASSERT(cond, args)            \
459     do                                  \
460     {                                   \
461         if (!(cond))                \
462         {                               \
463             if (verbose != 0)          \
464             mbedtls_printf args;    \
465                                         \
466             return -1;               \
467         }                               \
468     }                                   \
469     while (0)
470 
mbedtls_poly1305_self_test(int verbose)471 int mbedtls_poly1305_self_test(int verbose)
472 {
473     unsigned char mac[16];
474     unsigned i;
475     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
476 
477     for (i = 0U; i < 2U; i++) {
478         if (verbose != 0) {
479             mbedtls_printf("  Poly1305 test %u ", i);
480         }
481 
482         ret = mbedtls_poly1305_mac(test_keys[i],
483                                    test_data[i],
484                                    test_data_len[i],
485                                    mac);
486         ASSERT(0 == ret, ("error code: %i\n", ret));
487 
488         ASSERT(0 == memcmp(mac, test_mac[i], 16U), ("failed (mac)\n"));
489 
490         if (verbose != 0) {
491             mbedtls_printf("passed\n");
492         }
493     }
494 
495     if (verbose != 0) {
496         mbedtls_printf("\n");
497     }
498 
499     return 0;
500 }
501 
502 #endif /* MBEDTLS_SELF_TEST */
503 
504 #endif /* MBEDTLS_POLY1305_C */
505