1 /**
2 * \file poly1305.c
3 *
4 * \brief Poly1305 authentication algorithm.
5 *
6 * Copyright The Mbed TLS Contributors
7 * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
8 */
9 #include "common.h"
10
11 #if defined(MBEDTLS_POLY1305_C)
12
13 #include "mbedtls/poly1305.h"
14 #include "mbedtls/platform_util.h"
15 #include "mbedtls/error.h"
16
17 #include <string.h>
18
19 #include "mbedtls/platform.h"
20
21 #if !defined(MBEDTLS_POLY1305_ALT)
22
23 #define POLY1305_BLOCK_SIZE_BYTES (16U)
24
25 /*
26 * Our implementation is tuned for 32-bit platforms with a 64-bit multiplier.
27 * However we provided an alternative for platforms without such a multiplier.
28 */
29 #if defined(MBEDTLS_NO_64BIT_MULTIPLICATION)
mul64(uint32_t a,uint32_t b)30 static uint64_t mul64(uint32_t a, uint32_t b)
31 {
32 /* a = al + 2**16 ah, b = bl + 2**16 bh */
33 const uint16_t al = (uint16_t) a;
34 const uint16_t bl = (uint16_t) b;
35 const uint16_t ah = a >> 16;
36 const uint16_t bh = b >> 16;
37
38 /* ab = al*bl + 2**16 (ah*bl + bl*bh) + 2**32 ah*bh */
39 const uint32_t lo = (uint32_t) al * bl;
40 const uint64_t me = (uint64_t) ((uint32_t) ah * bl) + (uint32_t) al * bh;
41 const uint32_t hi = (uint32_t) ah * bh;
42
43 return lo + (me << 16) + ((uint64_t) hi << 32);
44 }
45 #else
mul64(uint32_t a,uint32_t b)46 static inline uint64_t mul64(uint32_t a, uint32_t b)
47 {
48 return (uint64_t) a * b;
49 }
50 #endif
51
52
53 /**
54 * \brief Process blocks with Poly1305.
55 *
56 * \param ctx The Poly1305 context.
57 * \param nblocks Number of blocks to process. Note that this
58 * function only processes full blocks.
59 * \param input Buffer containing the input block(s).
60 * \param needs_padding Set to 0 if the padding bit has already been
61 * applied to the input data before calling this
62 * function. Otherwise, set this parameter to 1.
63 */
poly1305_process(mbedtls_poly1305_context * ctx,size_t nblocks,const unsigned char * input,uint32_t needs_padding)64 static void poly1305_process(mbedtls_poly1305_context *ctx,
65 size_t nblocks,
66 const unsigned char *input,
67 uint32_t needs_padding)
68 {
69 uint64_t d0, d1, d2, d3;
70 uint32_t acc0, acc1, acc2, acc3, acc4;
71 uint32_t r0, r1, r2, r3;
72 uint32_t rs1, rs2, rs3;
73 size_t offset = 0U;
74 size_t i;
75
76 r0 = ctx->r[0];
77 r1 = ctx->r[1];
78 r2 = ctx->r[2];
79 r3 = ctx->r[3];
80
81 rs1 = r1 + (r1 >> 2U);
82 rs2 = r2 + (r2 >> 2U);
83 rs3 = r3 + (r3 >> 2U);
84
85 acc0 = ctx->acc[0];
86 acc1 = ctx->acc[1];
87 acc2 = ctx->acc[2];
88 acc3 = ctx->acc[3];
89 acc4 = ctx->acc[4];
90
91 /* Process full blocks */
92 for (i = 0U; i < nblocks; i++) {
93 /* The input block is treated as a 128-bit little-endian integer */
94 d0 = MBEDTLS_GET_UINT32_LE(input, offset + 0);
95 d1 = MBEDTLS_GET_UINT32_LE(input, offset + 4);
96 d2 = MBEDTLS_GET_UINT32_LE(input, offset + 8);
97 d3 = MBEDTLS_GET_UINT32_LE(input, offset + 12);
98
99 /* Compute: acc += (padded) block as a 130-bit integer */
100 d0 += (uint64_t) acc0;
101 d1 += (uint64_t) acc1 + (d0 >> 32U);
102 d2 += (uint64_t) acc2 + (d1 >> 32U);
103 d3 += (uint64_t) acc3 + (d2 >> 32U);
104 acc0 = (uint32_t) d0;
105 acc1 = (uint32_t) d1;
106 acc2 = (uint32_t) d2;
107 acc3 = (uint32_t) d3;
108 acc4 += (uint32_t) (d3 >> 32U) + needs_padding;
109
110 /* Compute: acc *= r */
111 d0 = mul64(acc0, r0) +
112 mul64(acc1, rs3) +
113 mul64(acc2, rs2) +
114 mul64(acc3, rs1);
115 d1 = mul64(acc0, r1) +
116 mul64(acc1, r0) +
117 mul64(acc2, rs3) +
118 mul64(acc3, rs2) +
119 mul64(acc4, rs1);
120 d2 = mul64(acc0, r2) +
121 mul64(acc1, r1) +
122 mul64(acc2, r0) +
123 mul64(acc3, rs3) +
124 mul64(acc4, rs2);
125 d3 = mul64(acc0, r3) +
126 mul64(acc1, r2) +
127 mul64(acc2, r1) +
128 mul64(acc3, r0) +
129 mul64(acc4, rs3);
130 acc4 *= r0;
131
132 /* Compute: acc %= (2^130 - 5) (partial remainder) */
133 d1 += (d0 >> 32);
134 d2 += (d1 >> 32);
135 d3 += (d2 >> 32);
136 acc0 = (uint32_t) d0;
137 acc1 = (uint32_t) d1;
138 acc2 = (uint32_t) d2;
139 acc3 = (uint32_t) d3;
140 acc4 = (uint32_t) (d3 >> 32) + acc4;
141
142 d0 = (uint64_t) acc0 + (acc4 >> 2) + (acc4 & 0xFFFFFFFCU);
143 acc4 &= 3U;
144 acc0 = (uint32_t) d0;
145 d0 = (uint64_t) acc1 + (d0 >> 32U);
146 acc1 = (uint32_t) d0;
147 d0 = (uint64_t) acc2 + (d0 >> 32U);
148 acc2 = (uint32_t) d0;
149 d0 = (uint64_t) acc3 + (d0 >> 32U);
150 acc3 = (uint32_t) d0;
151 d0 = (uint64_t) acc4 + (d0 >> 32U);
152 acc4 = (uint32_t) d0;
153
154 offset += POLY1305_BLOCK_SIZE_BYTES;
155 }
156
157 ctx->acc[0] = acc0;
158 ctx->acc[1] = acc1;
159 ctx->acc[2] = acc2;
160 ctx->acc[3] = acc3;
161 ctx->acc[4] = acc4;
162 }
163
164 /**
165 * \brief Compute the Poly1305 MAC
166 *
167 * \param ctx The Poly1305 context.
168 * \param mac The buffer to where the MAC is written. Must be
169 * big enough to contain the 16-byte MAC.
170 */
poly1305_compute_mac(const mbedtls_poly1305_context * ctx,unsigned char mac[16])171 static void poly1305_compute_mac(const mbedtls_poly1305_context *ctx,
172 unsigned char mac[16])
173 {
174 uint64_t d;
175 uint32_t g0, g1, g2, g3, g4;
176 uint32_t acc0, acc1, acc2, acc3, acc4;
177 uint32_t mask;
178 uint32_t mask_inv;
179
180 acc0 = ctx->acc[0];
181 acc1 = ctx->acc[1];
182 acc2 = ctx->acc[2];
183 acc3 = ctx->acc[3];
184 acc4 = ctx->acc[4];
185
186 /* Before adding 's' we ensure that the accumulator is mod 2^130 - 5.
187 * We do this by calculating acc - (2^130 - 5), then checking if
188 * the 131st bit is set. If it is, then reduce: acc -= (2^130 - 5)
189 */
190
191 /* Calculate acc + -(2^130 - 5) */
192 d = ((uint64_t) acc0 + 5U);
193 g0 = (uint32_t) d;
194 d = ((uint64_t) acc1 + (d >> 32));
195 g1 = (uint32_t) d;
196 d = ((uint64_t) acc2 + (d >> 32));
197 g2 = (uint32_t) d;
198 d = ((uint64_t) acc3 + (d >> 32));
199 g3 = (uint32_t) d;
200 g4 = acc4 + (uint32_t) (d >> 32U);
201
202 /* mask == 0xFFFFFFFF if 131st bit is set, otherwise mask == 0 */
203 mask = (uint32_t) 0U - (g4 >> 2U);
204 mask_inv = ~mask;
205
206 /* If 131st bit is set then acc=g, otherwise, acc is unmodified */
207 acc0 = (acc0 & mask_inv) | (g0 & mask);
208 acc1 = (acc1 & mask_inv) | (g1 & mask);
209 acc2 = (acc2 & mask_inv) | (g2 & mask);
210 acc3 = (acc3 & mask_inv) | (g3 & mask);
211
212 /* Add 's' */
213 d = (uint64_t) acc0 + ctx->s[0];
214 acc0 = (uint32_t) d;
215 d = (uint64_t) acc1 + ctx->s[1] + (d >> 32U);
216 acc1 = (uint32_t) d;
217 d = (uint64_t) acc2 + ctx->s[2] + (d >> 32U);
218 acc2 = (uint32_t) d;
219 acc3 += ctx->s[3] + (uint32_t) (d >> 32U);
220
221 /* Compute MAC (128 least significant bits of the accumulator) */
222 MBEDTLS_PUT_UINT32_LE(acc0, mac, 0);
223 MBEDTLS_PUT_UINT32_LE(acc1, mac, 4);
224 MBEDTLS_PUT_UINT32_LE(acc2, mac, 8);
225 MBEDTLS_PUT_UINT32_LE(acc3, mac, 12);
226 }
227
mbedtls_poly1305_init(mbedtls_poly1305_context * ctx)228 void mbedtls_poly1305_init(mbedtls_poly1305_context *ctx)
229 {
230 mbedtls_platform_zeroize(ctx, sizeof(mbedtls_poly1305_context));
231 }
232
mbedtls_poly1305_free(mbedtls_poly1305_context * ctx)233 void mbedtls_poly1305_free(mbedtls_poly1305_context *ctx)
234 {
235 if (ctx == NULL) {
236 return;
237 }
238
239 mbedtls_platform_zeroize(ctx, sizeof(mbedtls_poly1305_context));
240 }
241
mbedtls_poly1305_starts(mbedtls_poly1305_context * ctx,const unsigned char key[32])242 int mbedtls_poly1305_starts(mbedtls_poly1305_context *ctx,
243 const unsigned char key[32])
244 {
245 /* r &= 0x0ffffffc0ffffffc0ffffffc0fffffff */
246 ctx->r[0] = MBEDTLS_GET_UINT32_LE(key, 0) & 0x0FFFFFFFU;
247 ctx->r[1] = MBEDTLS_GET_UINT32_LE(key, 4) & 0x0FFFFFFCU;
248 ctx->r[2] = MBEDTLS_GET_UINT32_LE(key, 8) & 0x0FFFFFFCU;
249 ctx->r[3] = MBEDTLS_GET_UINT32_LE(key, 12) & 0x0FFFFFFCU;
250
251 ctx->s[0] = MBEDTLS_GET_UINT32_LE(key, 16);
252 ctx->s[1] = MBEDTLS_GET_UINT32_LE(key, 20);
253 ctx->s[2] = MBEDTLS_GET_UINT32_LE(key, 24);
254 ctx->s[3] = MBEDTLS_GET_UINT32_LE(key, 28);
255
256 /* Initial accumulator state */
257 ctx->acc[0] = 0U;
258 ctx->acc[1] = 0U;
259 ctx->acc[2] = 0U;
260 ctx->acc[3] = 0U;
261 ctx->acc[4] = 0U;
262
263 /* Queue initially empty */
264 mbedtls_platform_zeroize(ctx->queue, sizeof(ctx->queue));
265 ctx->queue_len = 0U;
266
267 return 0;
268 }
269
mbedtls_poly1305_update(mbedtls_poly1305_context * ctx,const unsigned char * input,size_t ilen)270 int mbedtls_poly1305_update(mbedtls_poly1305_context *ctx,
271 const unsigned char *input,
272 size_t ilen)
273 {
274 size_t offset = 0U;
275 size_t remaining = ilen;
276 size_t queue_free_len;
277 size_t nblocks;
278
279 if ((remaining > 0U) && (ctx->queue_len > 0U)) {
280 queue_free_len = (POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len);
281
282 if (ilen < queue_free_len) {
283 /* Not enough data to complete the block.
284 * Store this data with the other leftovers.
285 */
286 memcpy(&ctx->queue[ctx->queue_len],
287 input,
288 ilen);
289
290 ctx->queue_len += ilen;
291
292 remaining = 0U;
293 } else {
294 /* Enough data to produce a complete block */
295 memcpy(&ctx->queue[ctx->queue_len],
296 input,
297 queue_free_len);
298
299 ctx->queue_len = 0U;
300
301 poly1305_process(ctx, 1U, ctx->queue, 1U); /* add padding bit */
302
303 offset += queue_free_len;
304 remaining -= queue_free_len;
305 }
306 }
307
308 if (remaining >= POLY1305_BLOCK_SIZE_BYTES) {
309 nblocks = remaining / POLY1305_BLOCK_SIZE_BYTES;
310
311 poly1305_process(ctx, nblocks, &input[offset], 1U);
312
313 offset += nblocks * POLY1305_BLOCK_SIZE_BYTES;
314 remaining %= POLY1305_BLOCK_SIZE_BYTES;
315 }
316
317 if (remaining > 0U) {
318 /* Store partial block */
319 ctx->queue_len = remaining;
320 memcpy(ctx->queue, &input[offset], remaining);
321 }
322
323 return 0;
324 }
325
mbedtls_poly1305_finish(mbedtls_poly1305_context * ctx,unsigned char mac[16])326 int mbedtls_poly1305_finish(mbedtls_poly1305_context *ctx,
327 unsigned char mac[16])
328 {
329 /* Process any leftover data */
330 if (ctx->queue_len > 0U) {
331 /* Add padding bit */
332 ctx->queue[ctx->queue_len] = 1U;
333 ctx->queue_len++;
334
335 /* Pad with zeroes */
336 memset(&ctx->queue[ctx->queue_len],
337 0,
338 POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len);
339
340 poly1305_process(ctx, 1U, /* Process 1 block */
341 ctx->queue, 0U); /* Already padded above */
342 }
343
344 poly1305_compute_mac(ctx, mac);
345
346 return 0;
347 }
348
mbedtls_poly1305_mac(const unsigned char key[32],const unsigned char * input,size_t ilen,unsigned char mac[16])349 int mbedtls_poly1305_mac(const unsigned char key[32],
350 const unsigned char *input,
351 size_t ilen,
352 unsigned char mac[16])
353 {
354 mbedtls_poly1305_context ctx;
355 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
356
357 mbedtls_poly1305_init(&ctx);
358
359 ret = mbedtls_poly1305_starts(&ctx, key);
360 if (ret != 0) {
361 goto cleanup;
362 }
363
364 ret = mbedtls_poly1305_update(&ctx, input, ilen);
365 if (ret != 0) {
366 goto cleanup;
367 }
368
369 ret = mbedtls_poly1305_finish(&ctx, mac);
370
371 cleanup:
372 mbedtls_poly1305_free(&ctx);
373 return ret;
374 }
375
376 #endif /* MBEDTLS_POLY1305_ALT */
377
378 #if defined(MBEDTLS_SELF_TEST)
379
380 static const unsigned char test_keys[2][32] =
381 {
382 {
383 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
384 0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
385 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
386 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
387 },
388 {
389 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
390 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
391 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
392 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
393 }
394 };
395
396 static const unsigned char test_data[2][127] =
397 {
398 {
399 0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72,
400 0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f,
401 0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65,
402 0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f,
403 0x75, 0x70
404 },
405 {
406 0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72,
407 0x69, 0x6c, 0x6c, 0x69, 0x67, 0x2c, 0x20, 0x61,
408 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73,
409 0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f,
410 0x76, 0x65, 0x73, 0x0a, 0x44, 0x69, 0x64, 0x20,
411 0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64,
412 0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20,
413 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77,
414 0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c,
415 0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77,
416 0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20,
417 0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65,
418 0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74,
419 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x6d, 0x65, 0x20,
420 0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75,
421 0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e
422 }
423 };
424
425 static const size_t test_data_len[2] =
426 {
427 34U,
428 127U
429 };
430
431 static const unsigned char test_mac[2][16] =
432 {
433 {
434 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
435 0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
436 },
437 {
438 0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61,
439 0xe7, 0x08, 0xdc, 0x7c, 0xbc, 0xc5, 0xeb, 0x62
440 }
441 };
442
443 /* Make sure no other definition is already present. */
444 #undef ASSERT
445
446 #define ASSERT(cond, args) \
447 do \
448 { \
449 if (!(cond)) \
450 { \
451 if (verbose != 0) \
452 mbedtls_printf args; \
453 \
454 return -1; \
455 } \
456 } \
457 while (0)
458
mbedtls_poly1305_self_test(int verbose)459 int mbedtls_poly1305_self_test(int verbose)
460 {
461 unsigned char mac[16];
462 unsigned i;
463 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
464
465 for (i = 0U; i < 2U; i++) {
466 if (verbose != 0) {
467 mbedtls_printf(" Poly1305 test %u ", i);
468 }
469
470 ret = mbedtls_poly1305_mac(test_keys[i],
471 test_data[i],
472 test_data_len[i],
473 mac);
474 ASSERT(0 == ret, ("error code: %i\n", ret));
475
476 ASSERT(0 == memcmp(mac, test_mac[i], 16U), ("failed (mac)\n"));
477
478 if (verbose != 0) {
479 mbedtls_printf("passed\n");
480 }
481 }
482
483 if (verbose != 0) {
484 mbedtls_printf("\n");
485 }
486
487 return 0;
488 }
489
490 #endif /* MBEDTLS_SELF_TEST */
491
492 #endif /* MBEDTLS_POLY1305_C */
493