1 /*
2  * Copyright (c) 2001-2022, Arm Limited and Contributors. All rights reserved.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include "cc_pal_types.h"
8 #include "cc_pal_mem.h"
9 #include "pka.h"
10 #include "pka_error.h"
11 #include "pka_hw_defs.h"
12 #include "mbedtls_cc_poly.h"
13 #include "mbedtls_cc_poly_error.h"
14 #include "cc_common.h"
15 #include "poly.h"
16 
17 /* Declaration of global buffers defined in common/pka.c */
18 extern const int8_t regTemps[PKA_MAX_COUNT_OF_PHYS_MEM_REGS];
19 
20 /* PKA registers required for the operation. We require R0-R5 registers plus
21  * the two reserved registers that are always required, PKA_REG_T0/PKA_REG_T1
22  */
23 #define  PRIME_REG  regTemps[0]
24 #define  NP_REG     regTemps[1]
25 #define  ACC_REG    regTemps[2]
26 #define  KEY_R_REG  regTemps[3]
27 #define  KEY_S_REG  regTemps[4]
28 #define  DATA_REG   regTemps[5]
29 #define  POLY_PKA_REGS_NUM (6+2) // +2 temp registers
30 
31 /* Macro for read non aligned word from RAM */
32 #define GET_NON_ALIGNED_WORD(m_w0, m_w1, m_shift, m_shift_) \
33     (((m_w0)>>(m_shift)) | ((m_w1)<<(m_shift_)))
34 
35 /* Macro to calc full data blocks from a generic data size */
36 #define GET_FULL_DATA_BLOCKS_SIZE(data_size) \
37     ((data_size/CC_POLY_BLOCK_SIZE_IN_BYTES)*CC_POLY_BLOCK_SIZE_IN_BYTES)
38 
39 /* Mask for Key "r" buffer: clearing 4 high bits of bytes 3, 7, 11, 15;
40  * clearing low 2 bits of bytes 4,8,12
41  */
42 static const uint32_t g_PolyMaskKeyR[CC_POLY_KEY_SIZE_IN_WORDS/2] = {
43     0x0fffffff,
44     0x0ffffffc,
45     0x0ffffffc,
46     0x0ffffffc
47 };
48 /* POLY PRIME: 2^130-5 = 0x3_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFB */
49 static const uint32_t g_PolyPrime[POLY_PRIME_SIZE_IN_WORDS] = {
50     0xfffffffb,
51     0xffffffff,
52     0xffffffff,
53     0xffffffff,
54     0x3
55 };
56 /* Barrett tag Np = 0x80_00000000_00000000*/
57 static const uint32_t g_PolyNp[] = {0x00000000, 0x00000000, 0x00000080};
58 
59 /**
60  * @brief The function loads the remaining bytes (not a full block) of data
61  *        together with setting of extra bit according to Poly1305 algorithm.
62  *
63  * @note  Assuming register size is 128+64 bits according to PKA multiplier
64  *        64x16, and assumes the PKA engine is already working
65  *
66  * @param[in] pSrc           Pointer to source (little endian) buffer.
67  * @param[in] sizeBytes      Size of remaining data in bytes, should be in range
68  *                           [1...15]
69  * @param[in] isPolyAeadMode Flag indicating whether to consider the block as a
70  *                           16-bytes block padded with zeros towards the MSB
71  *
72  * @return None
73  */
PolyAccRemainBlock(const uint8_t * pSrc,uint32_t sizeBytes,bool isPolyAeadMode)74 static void PolyAccRemainBlock(const uint8_t *pSrc,
75                                uint32_t sizeBytes,
76                                bool isPolyAeadMode)
77 {
78     uint32_t tmp[CC_POLY_BLOCK_SIZE_IN_WORDS+2] = {0};
79     uint32_t i;
80     uint32_t dataRegSramAddr;
81 
82     /* load block into words buffer and set high bit 1 */
83     CC_PalMemCopy((uint8_t*)tmp, pSrc, sizeBytes);
84     if(isPolyAeadMode) {
85         sizeBytes = CC_POLY_BLOCK_SIZE_IN_BYTES;
86     }
87     ((uint8_t*)tmp)[sizeBytes] = 1;
88 
89     /* get DATA_REG address */
90     PKA_GET_REG_ADDRESS(DATA_REG, dataRegSramAddr/*reg.addr*/);
91 
92     /* set address */
93     PKA_WAIT_ON_PKA_DONE();
94     CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_ADDR), dataRegSramAddr);
95 
96     /* load block into PKA reg. */
97     for(i = 0; i < CC_POLY_BLOCK_SIZE_IN_WORDS+2; i++) {
98         CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA), tmp[i]);
99     }
100 
101     /* acumulate the data and do modular multiplication by keyR */
102     PKA_MOD_ADD(LEN_ID_N_BITS, ACC_REG, ACC_REG, DATA_REG);
103     PKA_MOD_MUL(LEN_ID_N_BITS, ACC_REG, ACC_REG, KEY_R_REG);
104 
105     return;
106 }
107 
108 /**
109  * @brief Function to accumulate full blocks (i.e. multiples of 16 bytes only)
110  *        in the Poly1305 algorithm
111  *
112  * @note  This function assumes the PKA engine is already working.
113  *
114  * @param[in] pSrc The pointer to the data buffer.
115  * @param[in] size The size in bytes of data.
116  *
117  * @return CC_OK or error code.
118  */
PolyAccCalcFullBlocks(const uint8_t * pSrc,size_t size)119 static CCError_t PolyAccCalcFullBlocks(const uint8_t *pSrc, size_t size)
120 {
121     uint32_t shift, shift_; /* shift left (in bits) needed for aligning data to 32-bit words */
122     uint32_t remSize, blocksCount;
123     uint32_t *pSrc32;
124     uint32_t i;
125     uint32_t dataRegSramAddr;
126     uint32_t word0 = 0;
127 
128     /* count of full blocks */
129     blocksCount = size / CC_POLY_BLOCK_SIZE_IN_BYTES;
130     /* remining data: count of bytes in not full 32-bit word */
131     remSize = size % CC_POLY_BLOCK_SIZE_IN_BYTES;
132     /* Calling this function for not an integer number of blocks is an error */
133     if (remSize) {
134         return CC_POLY_DATA_INVALID_ERROR;
135     }
136     /* Calling this function for zero input size, do nothing and return */
137     if (!blocksCount) {
138         return CC_OK;
139     }
140 
141     /* count of non aligned bytes and aligned pointer */
142     shift = (size_t)pSrc % CC_32BIT_WORD_SIZE;
143     shift_ = CC_32BIT_WORD_SIZE - shift;
144     pSrc32 = (uint32_t*)((uint32_t)pSrc - shift);
145 
146     /* set first non aligned bytes into word0 */
147     if(shift) {
148 
149         word0 = (uint32_t)pSrc[0];
150         if(shift_ > 1){
151             word0 |= ((uint32_t)pSrc[1] << 8);
152         }
153         if(shift_ > 2){
154             word0 |= ((uint32_t)pSrc[2] << 16);
155         }
156 
157         shift = shift << 3; /*now shift is in bits*/
158         shift_ = shift_ << 3;
159         word0 <<= shift;
160     }
161 
162     /*---------------------*/
163     /* process full blocks */
164     /*---------------------*/
165 
166     /* get DATA_REG address */
167     PKA_GET_REG_ADDRESS(DATA_REG, dataRegSramAddr/*reg.addr*/);
168 
169     for(i = 0; i < blocksCount; i++) {
170 
171         /* set address of DATA_REG PKA register */
172         CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_ADDR), dataRegSramAddr);
173         PKA_WAIT_ON_PKA_DONE();
174 
175         /* load block of 4 words */
176         if(shift) {
177             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA),
178                                   (uint32_t)GET_NON_ALIGNED_WORD(word0,     pSrc32[1], shift, shift_));
179             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA),
180                                   (uint32_t)GET_NON_ALIGNED_WORD(pSrc32[1], pSrc32[2], shift, shift_));
181             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA),
182                                   (uint32_t)GET_NON_ALIGNED_WORD(pSrc32[2], pSrc32[3], shift, shift_));
183             word0  = pSrc32[4];
184             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA),
185                                   (uint32_t)GET_NON_ALIGNED_WORD(pSrc32[3], word0, shift, shift_));
186         } else {
187             /* write data block */
188             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA), pSrc32[0]);
189             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA), pSrc32[1]);
190             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA), pSrc32[2]);
191             CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA), pSrc32[3]);
192         }
193         pSrc32 += CC_POLY_BLOCK_SIZE_IN_WORDS;
194 
195         /* set MSBit 129 and zeroe other high bits of register */
196         CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA), 1UL);
197         CC_HAL_WRITE_REGISTER(CC_REG_OFFSET(CRY_KERNEL, PKA_SRAM_WDATA), 0UL);
198 
199         /* acumulate the data and do modular multiplication by keyR */
200         PKA_MOD_ADD(LEN_ID_N_BITS, ACC_REG, ACC_REG, DATA_REG);
201         PKA_MOD_MUL(LEN_ID_N_BITS, ACC_REG, ACC_REG, KEY_R_REG);
202     }
203 
204     return CC_OK;
205 }
206 
207 /**
208  * @brief performs internal operations on PKA buffer to calculate the POLY
209  *        accumulator, i.e. Acc = ((Acc+block)*r) % p.
210  *
211  * @param[in] pSrc           The pointer to the data buffer
212  * @param[in] size           The size in bytes of data
213  * @param[in] isPolyAeadMode Flag indicating whether to consider the last
214  *                           (partial) block as a full block padded with
215  *                           zeros towards the MSB
216  *
217  * @return CC_OK On success, otherwise indicates failure
218  */
PolyAccCalc(const uint8_t * pSrc,size_t size,bool isPolyAeadMode)219 static uint32_t PolyAccCalc(const uint8_t *pSrc, size_t size,
220                             bool isPolyAeadMode)
221 {
222     uint32_t blocksCount = size / CC_POLY_BLOCK_SIZE_IN_BYTES;
223     uint32_t remSize = size % CC_POLY_BLOCK_SIZE_IN_BYTES;
224 
225     CCError_t ret = PolyAccCalcFullBlocks(pSrc, blocksCount * CC_POLY_BLOCK_SIZE_IN_BYTES);
226     if (ret != CC_OK) {
227         return ret;
228     }
229 
230     /*-----------------------------------*/
231     /* process remaining (not full) block */
232     /*-----------------------------------*/
233     if (remSize) {
234         PolyAccRemainBlock(pSrc + blocksCount*CC_POLY_BLOCK_SIZE_IN_BYTES,
235                            remSize, isPolyAeadMode);
236     }
237     return CC_OK;
238 }
239 
240 /**
241  * @brief This function performs clamping of the first 128 bit of the key
242  *        pair (r,s), i.e. r, as specified by RFC7539.
243  *
244  * @param[in] pKeyR Pointer to a buffer containing the r value, i.e. 128 bits
245  *
246  * @return None.
247  */
poly_clamp_r(uint32_t * pKeyR)248 static void poly_clamp_r(uint32_t *pKeyR)
249 {
250     if (pKeyR == NULL) {
251         return;
252     }
253 
254     /* clamp "KeyR"  */
255     for (int i = 0; i < CC_POLY_KEY_SIZE_IN_WORDS/2; i++) {
256         pKeyR[i] = pKeyR[i] & g_PolyMaskKeyR[i];
257     }
258 }
259 
260 /**
261  * @brief This function performs context restoration on the PKA registers. It
262  *        must be called with the PKA Mutex already taken to avoid undefined
263  *        behaviour. The key in the ctx must be already clamped as per RFC7539
264  *
265  * @param[out] ctx Pointer to the PKA context to be used to restore the
266  *                 registers from, i.e. (ACC_REG, KEY_R_REG, KEY_S_REG)
267  *
268  * @return  CC_OK On success, otherwise indicates failure
269  */
poly_restore_context(PolyPkaContext_t * ctx)270 static CCError_t poly_restore_context(PolyPkaContext_t *ctx)
271 {
272     if (ctx == NULL) {
273         return CC_POLY_DATA_INVALID_ERROR;
274     }
275 
276     uint32_t *pKeyR = ctx->key;
277     uint32_t *pKeyS = ctx->key + CC_POLY_KEY_SIZE_IN_WORDS/2;
278     uint32_t *acc = ctx->acc;
279 
280     /* clear ACC_REG register */
281     PKA_CLEAR(LEN_ID_MAX_BITS, ACC_REG );
282 
283     /* set values in PKA register for the MAC operation: */
284     /* Set the prime number to ((1<<130) -5) */
285     PkaCopyDataIntoPkaReg(PRIME_REG, LEN_ID_MAX_BITS, g_PolyPrime, CALC_32BIT_WORDS_FROM_BYTES(sizeof(g_PolyPrime)));
286 
287     /* Calculate NP for modulus operation */
288     PkaCopyDataIntoPkaReg(NP_REG, LEN_ID_MAX_BITS, g_PolyNp, CALC_32BIT_WORDS_FROM_BYTES(sizeof(g_PolyNp)));
289 
290     /* Copy accumulator value into PKA accumulator register */
291     PkaCopyDataIntoPkaReg(ACC_REG, LEN_ID_MAX_BITS, acc, CALC_32BIT_WORDS_FROM_BYTES(sizeof(ctx->acc)));
292     /* Copy pKeyR to PKA register #2 */
293     PkaCopyDataIntoPkaReg(KEY_R_REG, LEN_ID_MAX_BITS, pKeyR, CC_POLY_KEY_SIZE_IN_WORDS/2);
294     /* Copy pKeyS to PKA register #3 */
295     PkaCopyDataIntoPkaReg(KEY_S_REG, LEN_ID_MAX_BITS, pKeyS, CC_POLY_KEY_SIZE_IN_WORDS/2);
296 
297     /* clear other registers  */
298     PKA_CLEAR(LEN_ID_MAX_BITS, DATA_REG);
299     PKA_CLEAR(LEN_ID_MAX_BITS, PKA_REG_T0);
300     PKA_CLEAR(LEN_ID_MAX_BITS, PKA_REG_T1);
301 
302     return CC_OK;
303 }
304 
305 /**
306  * @brief This function performs context saving from the PKA registers. It
307  *        must be called with the PKA Mutex already taken to avoid undefined
308  *        behaviour.
309  *
310  * @param[out] ctx Pointer to the PKA context to be used to store the ACC reg
311  *
312  * @return CC_OK On success, otherwise indicates failure
313  */
poly_save_context(PolyPkaContext_t * ctx)314 static CCError_t poly_save_context(PolyPkaContext_t *ctx)
315 {
316     if (ctx == NULL) {
317         return CC_POLY_DATA_INVALID_ERROR;
318     }
319 
320     uint32_t *acc = ctx->acc;
321 
322     /* copy ACC register into the PKA context structure */
323     PkaCopyDataFromPkaReg(acc, CALC_32BIT_WORDS_FROM_BYTES(sizeof(ctx->acc)), ACC_REG);
324 
325     return CC_OK;
326 }
327 
328 /**
329  * @brief This function performs the setup steps required to implement
330  *        the Poly1305 algorithm, i.e. register initialisations. The function
331  *        also performs the clamping of the key as specified by RFC7539 before
332  *        setting the key onto the PKA regs
333  *
334  * @note The PKA engine must be already initialised before calling this function
335  *
336  * @param[in] key Pointer to the buffer containing the 256 bit key pair (r,s)
337  *
338  * @return None
339  */
PolyMacCalc_setup(mbedtls_poly_key key,uint32_t * pkaRegsNum)340 static void PolyMacCalc_setup(mbedtls_poly_key key, uint32_t *pkaRegsNum)
341 {
342     uint32_t *pKeyR = key;
343     uint32_t *pKeyS = pKeyR + CC_POLY_KEY_SIZE_IN_WORDS/2;
344 
345     /* Clamp "KeyR" */
346     poly_clamp_r(pKeyR);
347 
348     /* Set values in PKA register for the MAC operation: */
349     /* Set the prime number to ((1<<130) -5) */
350     PkaCopyDataIntoPkaReg(PRIME_REG, LEN_ID_MAX_BITS, g_PolyPrime, CALC_32BIT_WORDS_FROM_BYTES(sizeof(g_PolyPrime)));
351 
352     /* Calculate NP for modulus operation */
353     PkaCopyDataIntoPkaReg(NP_REG, LEN_ID_MAX_BITS, g_PolyNp, CALC_32BIT_WORDS_FROM_BYTES(sizeof(g_PolyNp)));
354 
355     /* Copy pKeyR to PKA register #2 */
356     PkaCopyDataIntoPkaReg(KEY_R_REG, LEN_ID_MAX_BITS, pKeyR, CC_POLY_KEY_SIZE_IN_WORDS/2);
357     /* Copy pKeyS to PKA register #3 */
358     PkaCopyDataIntoPkaReg(KEY_S_REG, LEN_ID_MAX_BITS, pKeyS, CC_POLY_KEY_SIZE_IN_WORDS/2);
359 
360     /* clear some registers  */
361     PKA_CLEAR(LEN_ID_MAX_BITS, DATA_REG);
362     PKA_CLEAR(LEN_ID_MAX_BITS, ACC_REG);
363     PKA_CLEAR(LEN_ID_MAX_BITS, PKA_REG_T0);
364     PKA_CLEAR(LEN_ID_MAX_BITS, PKA_REG_T1);
365 }
366 
367 /**
368  * @brief This function performs the finalization of the Poly1305 operation by
369  *        doing "a += s", i.e. it returns the accumulator value after having
370  *        added the 128 bit s part of the key previously setup on PKA registers.
371  *        The PKA engine must be explicitly released after a call to this
372  *        function.
373  *
374  * @param[out] macRes Pointer to the buffer to hold the produced 16 bytes tag.
375  *
376  * @return None
377  */
PolyMacCalc_finalize(mbedtls_poly_mac macRes)378 static void PolyMacCalc_finalize(mbedtls_poly_mac macRes)
379 {
380     /* acc = acc+pkeyS */
381     PKA_ADD(LEN_ID_N_BITS, ACC_REG, ACC_REG, KEY_S_REG);
382 
383     /* copy acc into macRes */
384     PkaCopyDataFromPkaReg(macRes, CC_POLY_MAC_SIZE_IN_WORDS, ACC_REG);
385 }
386 
387 /*********** Public functions ***********/
388 
PolyMacCalc(mbedtls_poly_key key,const uint8_t * pAddData,size_t addDataSize,const uint8_t * pDataIn,size_t dataInSize,mbedtls_poly_mac macRes,bool isPolyAeadMode)389 CCError_t PolyMacCalc(mbedtls_poly_key key,
390                       const uint8_t *pAddData,
391                       size_t addDataSize,
392                       const uint8_t *pDataIn,
393                       size_t dataInSize,
394                       mbedtls_poly_mac macRes,
395                       bool isPolyAeadMode)
396 {
397     uint32_t rc = CC_FAIL;
398     uint32_t lastBlock[CC_POLY_BLOCK_SIZE_IN_WORDS];
399     uint32_t pkaRegsNum = POLY_PKA_REGS_NUM;
400 
401     /* verify inputs for the key and output tag */
402     if ((key == NULL) || (macRes == NULL)) {
403         return CC_POLY_DATA_INVALID_ERROR;
404     }
405 
406     /* Check that if pointers are NULL sizes are also zero for data and aad */
407     if (((pDataIn == NULL) && (dataInSize != 0)) ||
408         ((pAddData == NULL) && (addDataSize != 0))) {
409         return CC_POLY_DATA_INVALID_ERROR;
410     }
411 
412     /* Initialize the PKA engine and obtain the mutex on success */
413     rc = PkaInitAndMutexLock(POLY_PRIME_SIZE_IN_BITS, &pkaRegsNum);
414     if (rc != CC_OK) {
415         return rc;
416     }
417 
418     /* Setup the registers of the PKA engine. It also clamps the key as
419      * specified by RFC7539. This procedure can't return failure.
420      */
421     PolyMacCalc_setup(key, &pkaRegsNum);
422 
423     /* process the additional Data */
424     if (addDataSize) {
425         rc = PolyAccCalc(pAddData, addDataSize, isPolyAeadMode);
426         if (rc != CC_OK) {
427             goto end_func;
428         }
429     }
430     /*  process the DataIn input */
431     if (dataInSize) {
432         rc = PolyAccCalc(pDataIn, dataInSize, isPolyAeadMode);
433         if (rc != CC_OK) {
434             goto end_func;
435         }
436     }
437 
438     /* The last block needs to be formatted as RFC7539 if AEAD mode is set */
439     if (isPolyAeadMode) {
440         /* Fill lastBlock with 64-bit LE words: addDataSize | dataInSize */
441         lastBlock[0] = addDataSize;
442         lastBlock[1] = 0;
443         lastBlock[2] = dataInSize;
444         lastBlock[3] = 0;
445 
446         rc = PolyAccCalc((uint8_t*)lastBlock, CC_POLY_BLOCK_SIZE_IN_BYTES, false);
447         if (rc != CC_OK) {
448             goto end_func;
449         }
450     }
451 
452     /* Finalize macRes by adding KEY_S_REG to ACC_REG. Can't fail. */
453     PolyMacCalc_finalize(macRes);
454 
455 end_func:
456     PkaFinishAndMutexUnlock(pkaRegsNum);
457 
458     return rc;
459 }
460 
PolyInit(PolyState_t * state,const uint8_t * key,size_t key_size)461 CCError_t PolyInit(PolyState_t *state, const uint8_t *key, size_t key_size)
462 {
463     if ((key == NULL) || (key_size != CC_POLY_KEY_SIZE_IN_BYTES)) {
464         return CC_POLY_DATA_INVALID_ERROR;
465     }
466 
467     /* Copy the 256-bit pair (r,s) in the context and clamp first 128 bits */
468     CC_PalMemCopy(state->context.key, key, sizeof(state->context.key));
469     poly_clamp_r(state->context.key);
470 
471     /* Re-set ACC value in the context */
472     CC_PalMemSetZero(state->context.acc, sizeof(state->context.acc));
473 
474     /* Reset the stored message chunk, CC_POLY_BLOCK_SIZE_IN_BYTES bytes */
475     CC_PalMemSetZero(state->msg_state, sizeof(state->msg_state));
476 
477     /* Reset the current size of the buffered message */
478     state->msg_state_size = 0;
479 
480     return CC_OK;
481 }
482 
PolyUpdate(PolyState_t * state,const uint8_t * data,size_t data_size,bool isPolyAeadMode)483 CCError_t PolyUpdate(
484     PolyState_t *state,
485     const uint8_t *data,
486     size_t data_size,
487     bool isPolyAeadMode)
488 {
489     size_t data_to_add = 0;
490     uint8_t *pStateByte = NULL;
491     CCError_t rc = CC_FAIL;
492     uint32_t pkaRegsNum = POLY_PKA_REGS_NUM;
493 
494     if ((state == NULL) || (data == NULL && data_size != 0)) {
495         return CC_POLY_DATA_INVALID_ERROR;
496     }
497     pStateByte = (uint8_t *) &state->msg_state[0];
498 
499     /* Ad additional data of size 0 is a legitimate case */
500     if (data_size == 0) {
501         return CC_OK;
502     }
503 
504     /* If we don't need to pad to a block for this call, it's fine to just
505      * cache the bytes received and leave the accumulation to a later call
506      */
507     if (!isPolyAeadMode) {
508         if (data_size + state->msg_state_size < CC_POLY_BLOCK_SIZE_IN_BYTES) {
509             for (int i=0; i<data_size; i++) {
510                 pStateByte[i + state->msg_state_size] = data[i];
511             }
512             state->msg_state_size += data_size;
513             return CC_OK;
514         }
515     }
516 
517     if (state->msg_state_size != 0) {
518         /* Fill one block of cached data */
519         data_to_add = CC_POLY_BLOCK_SIZE_IN_BYTES - state->msg_state_size;
520         if (data_to_add > data_size) {
521             /* This can happen only when isPolyAeadMode is set, and the
522              * difference between data_to_add and data_size must be padded with
523              * zeros in the cached data state
524              */
525             size_t num_of_zeros = data_to_add - data_size;
526             for (int i = num_of_zeros; i != 0; i--) {
527                 pStateByte[CC_POLY_BLOCK_SIZE_IN_BYTES-i] = 0;
528             }
529             /* Copy all the available data in this case, but no more */
530             data_to_add = data_size;
531         }
532         CC_PalMemCopy(&pStateByte[state->msg_state_size], data, data_to_add);
533 
534         data += data_to_add;
535         data_size -= data_to_add;
536 
537         state->msg_state_size = CC_POLY_BLOCK_SIZE_IN_BYTES;
538     }
539 
540     /* Init PKA engine and take the mutex */
541     rc = PkaInitAndMutexLock(POLY_PRIME_SIZE_IN_BITS, &pkaRegsNum);
542     if (rc != CC_OK) {
543         return rc;
544     }
545 
546     /* Restore the context on the PKA engine */
547     poly_restore_context(&state->context);
548 
549     if (state->msg_state_size == CC_POLY_BLOCK_SIZE_IN_BYTES) {
550         rc = PolyAccCalcFullBlocks(pStateByte, CC_POLY_BLOCK_SIZE_IN_BYTES);
551         if (rc != CC_OK) {
552             goto cleanup;
553         }
554         /* The state has been consumed */
555         state->msg_state_size = 0;
556     }
557 
558     /* Again if we don't need to pad to a block for this call, just cache the
559      * remaining data and leave the accumulation to a later call
560      */
561     if (!isPolyAeadMode) {
562         if (data_size < CC_POLY_BLOCK_SIZE_IN_BYTES) {
563             CC_PalMemCopy(&pStateByte[0], data, data_size);
564             state->msg_state_size = data_size;
565             goto store_and_cleanup;
566         }
567     }
568 
569     rc = PolyAccCalcFullBlocks(data, GET_FULL_DATA_BLOCKS_SIZE(data_size));
570     if (rc != CC_OK) {
571         goto cleanup;
572     }
573 
574     data += GET_FULL_DATA_BLOCKS_SIZE(data_size);
575     data_size -= GET_FULL_DATA_BLOCKS_SIZE(data_size);
576 
577     if (data_size) {
578         if (!isPolyAeadMode) {
579             CC_PalMemCopy(&pStateByte[0], data, data_size);
580             state->msg_state_size = data_size;
581         } else {
582             /* If isPolyAeadMode is true, that means we need to pad the block
583              * to 16 bytes with zeros. This should happen only when the AD
584              * data or the payload data authentication get to its last call
585              */
586             PolyAccRemainBlock(data, data_size, true);
587         }
588     }
589 
590 store_and_cleanup:
591     poly_save_context(&state->context);
592 cleanup:
593     PkaFinishAndMutexUnlock(pkaRegsNum);
594     return rc;
595 }
596 
PolyFinish(PolyState_t * state,uint8_t * tag,size_t tag_size)597 CCError_t PolyFinish(PolyState_t *state, uint8_t *tag, size_t tag_size)
598 {
599     CCError_t rc = CC_FAIL;
600     uint32_t pkaRegsNum = POLY_PKA_REGS_NUM;
601 
602     if ((tag == NULL) || (tag_size != CC_POLY_BLOCK_SIZE_IN_BYTES)) {
603         return CC_POLY_DATA_INVALID_ERROR;
604     }
605 
606     /* Init PKA engine and take the mutex */
607     rc = PkaInitAndMutexLock(POLY_PRIME_SIZE_IN_BITS, &pkaRegsNum);
608     if (rc != CC_OK) {
609         return rc;
610     }
611 
612     /* Restore the context on the PKA engine */
613     poly_restore_context(&state->context);
614 
615     /* Accumulate the last partial block */
616     if (state->msg_state_size) {
617         PolyAccRemainBlock((const uint8_t *)state->msg_state,
618                            state->msg_state_size, false);
619     }
620 
621     /* Add the s part of the key to the value of the accumulator */
622     PKA_ADD(LEN_ID_N_BITS, ACC_REG, ACC_REG, KEY_S_REG);
623 
624     poly_save_context(&state->context);
625 
626     PkaFinishAndMutexUnlock(pkaRegsNum);
627 
628     CC_PalMemCopy(tag, state->context.acc, tag_size);
629 
630     return CC_OK;
631 }
632