1 /*
2 * Copyright (c) 2001-2019, Arm Limited and Contributors. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 /*
8 * All the includes that are needed for code using this module to
9 * compile correctly should be #included here.
10 */
11 #include "cc_pal_mem.h"
12 #include "cc_pal_types.h"
13 #include "cc_hal_plat.h"
14 #include "cc_common_math.h"
15 #include "mbedtls_cc_ec_mont_edw_error.h"
16 #include "cc_ec_edw_api.h"
17
18 #include "ec_edw_local.h"
19 #include "pka_defs.h"
20 #include "pka_hw_defs.h"
21 #include "pki.h"
22 #include "pka.h"
23 #include "ec_edw.h"
24 #include "pka_error.h"
25 #include "pka_ec_edw_glob_regs_def.h"
26
27
28 /****** Definitions *******/
29
30 /*!> Macros for expansion of actual parameters (registers) for EC point "a" */
31 #define REGS_MODIF_POINT(a) EDW_REG_X##a,EDW_REG_Y##a,EDW_REG_Z##a,EDW_REG_T##a /*for Modified point*/
32 #define REGS_PREC_POINT(a) EDW_REG_S##a,EDW_REG_D##a,EDW_REG_P##a /*for Precalculated point*/
33 #define REGS_NEG_PREC_POINT(a) EDW_REG_D##a,EDW_REG_S##a,EDW_REG_MP##a /*for Precalculated negative point*/
34
35 /**
36 * EC Edwards adding with extended (Ext) coordinates of points.
37 * EEE: (X1,Y1,Z1,T1) + (X2,Y2,Z2,T2) -> (X,Y,Z,T)
38 *
39 */
PkaEcEdwAddExtExtExt(uint32_t rX,uint32_t rY,uint32_t rZ,uint32_t rT,uint32_t rX1,uint32_t rY1,uint32_t rZ1,uint32_t rT1,uint32_t rX2,uint32_t rY2,uint32_t rZ2,uint32_t rT2)40 static void PkaEcEdwAddExtExtExt(
41 uint32_t rX, uint32_t rY, uint32_t rZ, uint32_t rT, /*!< [out] - virt. pointers to PKA registers, containing
42 coordinates of result extended EC point. */
43 uint32_t rX1, uint32_t rY1, uint32_t rZ1, uint32_t rT1, /*!< [in] - virt. pointers to PKA registers, containing
44 coordinates of inputt extended EC point1. */
45 uint32_t rX2, uint32_t rY2, uint32_t rZ2, uint32_t rT2) /*!< [in] - virt. pointers to PKA registers, containing
46 coordinates of inputt extended EC point2. */
47 {
48 PKA_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, EDW_REG_N_4,rX1);
49 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, EDW_REG_T3, rY1);
50 PKA_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, EDW_REG_N_4,rX2);
51 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, EDW_REG_T4, rY2);
52 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T5, EDW_REG_T3, EDW_REG_T4);
53 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, rY1, rX1);
54 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, rY2, rX2);
55 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T6, EDW_REG_T3, EDW_REG_T4);
56 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T3, rT1, rT2);
57 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T3, EDW_REG_T3, EDW_REG_D2);
58 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, rZ1, rZ1);
59 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T4, EDW_REG_T4, rZ2);
60 PKA_SUB(LEN_ID_N_PKA_REG_BITS, rT, EDW_REG_N_4, EDW_REG_T5);
61 PKA_ADD(LEN_ID_N_PKA_REG_BITS, rT, rT, EDW_REG_T6);
62 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T6, EDW_REG_T6, EDW_REG_T5);
63 PKA_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_T5, EDW_REG_N_4, EDW_REG_T3);
64 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T5, EDW_REG_T5, EDW_REG_T4);
65 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, EDW_REG_T4, EDW_REG_T3);
66 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, rT, EDW_REG_T5);
67 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rY, EDW_REG_T4, EDW_REG_T6);
68 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rZ, EDW_REG_T5, EDW_REG_T4);
69 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rT, rT, EDW_REG_T6);
70 return;
71 }
72
73
74 /**
75 * EC Edwards doubing with extended (Ext) coordinates of point.
76 * EE: 2*(X,Y,Z,T) -> (X,Y,Z,T)
77 *
78 */
PkaEcEdwDoublExtExt(uint32_t rX,uint32_t rY,uint32_t rZ,uint32_t rT)79 static void PkaEcEdwDoublExtExt(uint32_t rX, uint32_t rY, uint32_t rZ, uint32_t rT) /*!< [in/out] - virt. pointers to PKA registers, containing
80 coordinates of input/output point. */
81 {
82 PKA_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, EDW_REG_N_4, rX); // hwsub(t3, n_4, x);
83 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, EDW_REG_T3, rY); // hwadd(t3, t3, y);
84 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T5, EDW_REG_T3, EDW_REG_T3); // hwmmul(t5, t3, t3, n, np);
85 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, rY, rX); // hwadd(t3, y,x);
86 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T6, EDW_REG_T3, EDW_REG_T3); // hwmmul(t6, t3, t3, n, np);
87 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T3, rT, rT); // hwmmul(t3, t, t, n, np);
88 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T3, EDW_REG_T3, EDW_REG_D2); // hwmmul(t3, t3, d2, n, np);
89 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, rZ, rZ); // hwadd(t4, z, z);
90 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T4, EDW_REG_T4, rZ); // hwmmul(t4, t4, z, n, np);
91 PKA_SUB(LEN_ID_N_PKA_REG_BITS, rT, EDW_REG_N_4, EDW_REG_T5); // hwsub(t, n_4, t5);
92 PKA_ADD(LEN_ID_N_PKA_REG_BITS, rT, rT, EDW_REG_T6); // hwadd(t, t, t6);
93 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T6, EDW_REG_T6, EDW_REG_T5); // hwadd(t6, t6, t5);
94 PKA_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_T5, EDW_REG_N_4, EDW_REG_T3); // hwsub(t5, n_4, t3);
95 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T5, EDW_REG_T5, EDW_REG_T4); // hwadd(t5, t5, t4);
96 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, EDW_REG_T4, EDW_REG_T3); // hwadd(t4, t4, t3);
97 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, rT, EDW_REG_T5); // hwmmul(x, t, t5, n, np);
98 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rY, EDW_REG_T4, EDW_REG_T6); // hwmmul(y, t4, t6, n, np);
99 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rZ, EDW_REG_T5, EDW_REG_T4); // hwmmul(z, t5, t4, n, np);
100 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rT, rT, EDW_REG_T6); // hwmmul(t, t, t6, n, np);
101 return;
102 }
103
104
105 /**
106 * EC Edwards adding with extended (Ext) and proective (Prc) coordinates of
107 * points: (X1,Y1,Z1,T1) + (S2,D2,P2) -> (X,Y,Z,T)
108 *
109 */
PkaEcEdwAddExtPrcExt(uint32_t rX,uint32_t rY,uint32_t rZ,uint32_t rT,uint32_t rX1,uint32_t rY1,uint32_t rZ1,uint32_t rT1,uint32_t rS2,uint32_t rD2,uint32_t rP2)110 static void PkaEcEdwAddExtPrcExt(
111 uint32_t rX, uint32_t rY, uint32_t rZ, uint32_t rT, /*!< [out] - virt. pointers to PKA registers,
112 containing coordinates of result extended EC point. */
113 uint32_t rX1, uint32_t rY1, uint32_t rZ1, uint32_t rT1, /*!< [in] - virt. pointers to PKA registers,
114 containing coordinates of extended EC point1. */
115 uint32_t rS2, uint32_t rD2, uint32_t rP2) /*!< [in] - virt. pointers to PKA registers,
116 containing coordinates of precomputed EC point2. */
117 {
118 PKA_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, EDW_REG_N_4, rX1); PkiDbgPrintReg("t3: -x1 ", EDW_REG_T3); // hwsub(t3, n_4, x1);
119 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, EDW_REG_T3, rY1); PkiDbgPrintReg("t3: t3+y1 ", EDW_REG_T3); // hwadd(t3, t3, y1);
120 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T5, EDW_REG_T3, rD2); PkiDbgPrintReg("t5: t3*d2 ", EDW_REG_T5); // hwmmul(t5, t3, d2, n, np);
121 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, rY1, rX1); PkiDbgPrintReg("t3: y1+x1 ", EDW_REG_T3); // hwadd(t3, y1, x1);
122 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T6, EDW_REG_T3, rS2); PkiDbgPrintReg("t6: t3*s2 ", EDW_REG_T6); // hwmmul(t6, t3, s2, n, np);
123 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T3, rT1, rP2); PkiDbgPrintReg("t3: t1*p2 ", EDW_REG_T3); // hwmmul(t3, t1, p2, n, np);
124 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, rZ1, rZ1); PkiDbgPrintReg("t4: dbl(z1) ", EDW_REG_T4); // hwadd(t4, z1, z1);
125 PKA_SUB(LEN_ID_N_PKA_REG_BITS, rT, EDW_REG_N_4, EDW_REG_T5); PkiDbgPrintReg("t: -t5 ", rT); // hwsub(t, n_4, t5);
126 PKA_ADD(LEN_ID_N_PKA_REG_BITS, rT, rT, EDW_REG_T6); PkiDbgPrintReg("t: t+t6 ", rT); // hwadd(t, t, t6);
127 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T6, EDW_REG_T6, EDW_REG_T5); PkiDbgPrintReg("t6: t6+t5 ", EDW_REG_T6); // hwadd(t6, t6, t5);
128 PKA_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_T5, EDW_REG_N_4, EDW_REG_T3); PkiDbgPrintReg("t5: -t3 ", EDW_REG_T5); // hwsub(t5, n_4, t3);
129 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T5, EDW_REG_T5, EDW_REG_T4); PkiDbgPrintReg("t5: t5+t4 ", EDW_REG_T5); // wadd(t5, t5, t4);
130 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, EDW_REG_T4, EDW_REG_T3); PkiDbgPrintReg("t4: t4+t3 ", EDW_REG_T4); // hwadd(t4, t4, t3);
131 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, rT, EDW_REG_T5); PkiDbgPrintReg("x: t*t5 ", rX); // hwmmul(x, t, t5, n, np);
132 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rY, EDW_REG_T4, EDW_REG_T6); PkiDbgPrintReg("y: t4*t6 ", rY); // hwmmul(y, t4, t6, n, np);
133 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rZ, EDW_REG_T5, EDW_REG_T4); PkiDbgPrintReg("z: t5*t4 ", rZ); // hwmmul(z, t5, t4, n, np);
134 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rT, rT, EDW_REG_T6); PkiDbgPrintReg("t: t*t6 ", rT); // hwmmul(t, t, t6, n, np);
135 return;
136 }
137
138
139 /**
140 * EC Edwards convert affine point (Afn) to precalculated (Prc) form of coordinates.
141 * A2P: (X,Y) -> (S,D,P)
142 */
PkaEcEdwConvertAfn2Prc(uint32_t rS,uint32_t rD,uint32_t rP,uint32_t rX,uint32_t rY)143 void PkaEcEdwConvertAfn2Prc(
144 uint32_t rS, uint32_t rD, uint32_t rP, /*!< virtual pointers to 3 coordinates
145 of output precomputed EC pont */
146 uint32_t rX, uint32_t rY) /*!< virtual pointers to 2 coordinates
147 of input affine point */
148 {
149 /* if we have x,y,z,t regs, than is efficient to define:
150 rS->rZ, rD->rT, rP->rX */
151 PKA_ADD(LEN_ID_N_PKA_REG_BITS, rS, rY, rX); // hwadd(s, y, x);
152 PKA_SUB(LEN_ID_N_PKA_REG_BITS, rD, EDW_REG_N, rX); // hwsub(d, n, x);
153 PKA_ADD(LEN_ID_N_PKA_REG_BITS, rD, rD, rY); // hwadd(d, d, y);
154 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rP, EDW_REG_D2, rX); // hwmmul(p, d2, x, n, np);
155 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rP, rP, rY); // hwmmul(p, p, y, n, np);
156 return;
157 }
158
159
160 // this SCA_RESISTANT is unrelated to SCA_PROTECTION in hardware
161 /**
162 * EC Edwards convert point to affine (A) form of coordinates.
163 * ToA: (X1,Y1,Z1) -> (X,Y)
164 *
165 * @param rX,rY - virt. pointers to PKA registers, containing
166 * affine coordinates of result EC point.
167 * @param rX1,rY1,rZ1 - virt. pointers to PKA registers, containing
168 * coordinates of result EC point1.
169 * @param EDW_REG_Q - virt. pointer to temp PKA register.
170 *
171 */
PkaEcEdwPointToAfn(uint32_t rX,uint32_t rY,uint32_t rX1,uint32_t rY1,uint32_t rZ1)172 void PkaEcEdwPointToAfn(
173 uint32_t rX, uint32_t rY,
174 uint32_t rX1, uint32_t rY1, uint32_t rZ1)
175 {
176 // PKA_MOD_INV_W_EXP(rY/*q*/, rZ1, rX/*tmp*/); // hwinv(q, z1, n, np); /* by exponent*/
177 PKA_MOD_INV(LEN_ID_N_BITS, EDW_REG_T4/*q*/, rZ1); // PkiDbgPrintReg("rY: inv(rZ1)", rY);
178 PKA_MOD_MUL(LEN_ID_N_BITS, rX, rX1, EDW_REG_T4/*q*/); // PkiDbgPrintReg("rX: rX1*rY", rX);
179 PKA_MOD_MUL(LEN_ID_N_BITS, rY, rY1, EDW_REG_T4/*q*/); // PkiDbgPrintReg("rY: rY1*rY", rY);
180 // PKA_DIV(LEN_ID_N_BITS, EDW_REG_T4, rX, EDW_REG_N); // PkiDbgPrintReg("rX: reduce", rX);
181 // PKA_DIV(LEN_ID_N_BITS, EDW_REG_T4, rY, EDW_REG_N); // PkiDbgPrintReg("rY: reduce", rY);
182
183 return;
184 }
185
186
187 /**
188 * The function performs multiplication of base point by scalar:
189 * P(x,y) = k*G(x,y).
190 *
191 * Implemented algorithm, enhanced by A.Klimov.
192 *
193 * The function can work with any scalar > 0 and used (for example) in Edw.
194 * signature function.
195 *
196 * Assuming:
197 * PKA registers are implicitly defined in pka_ec_edw_glob_regs_def.h file, in
198 * partial: output point P(x,y) is set in registers (EDW_REG_XS, EDW_REG_YS).
199 * All needed data must be loaded into PKA registers in caller function.
200 *
201 * @author reuvenl (11/25/2015)
202 *
203 * @return CC_OK or an error according to mbedtls_cc_ec_mont_edw_error.h definitions.
204 */
PkaEcEdwScalarMultBase(uint32_t * pScalar,size_t scalarSizeInBits)205 CCError_t PkaEcEdwScalarMultBase(
206 uint32_t *pScalar, /*!< [in] the pointer to the scalsr (LS word is
207 leftmost one, MS word - rightmost). */
208 size_t scalarSizeInBits /*!< exact size of the scalar in bits. */ )
209 {
210
211 /* Definitions */
212 CCError_t err = CC_OK;
213 int32_t i, carry = -1;
214 uint32_t twoBits;
215
216 if(scalarSizeInBits == 0)
217 return CC_EC_EDW_INVALID_SCALAR_SIZE_ERROR;
218
219 /* set bits counter to scalar size, rounded up to even */
220 i = ((scalarSizeInBits + 1) & ~1) - 2;
221 twoBits = PKI_GET_TWO_BITS_FROM_WORDS_ARRAY(pScalar, i);
222
223 /*-------------------------------------------------------*/
224 /* init point S according to MS bits of the scalar */
225 /*-------------------------------------------------------*/
226 switch (twoBits) {
227 case 1: /* S = G2 */
228 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_XS, EDW_REG_XG2);
229 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_YS, EDW_REG_YG2);
230 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_TS, EDW_REG_TG2);
231 carry = -1;
232 break;
233 case 2: /* S = G2 */
234 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_XS, EDW_REG_XG2);
235 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_YS, EDW_REG_YG2);
236 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_TS, EDW_REG_TG2);
237 carry = 0;
238 break;
239 case 3: /* S = G4 */
240 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_XS, EDW_REG_XG4);
241 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_YS, EDW_REG_YG4);
242 PKA_COPY(LEN_ID_MAX_BITS, EDW_REG_TS, EDW_REG_TG4);
243 carry = -1;
244 break;
245 default:
246 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
247 }
248
249 /* set ZS = 1 */
250 PKA_CLEAR(LEN_ID_MAX_BITS, EDW_REG_ZS);
251 PKA_SET_BIT0(LEN_ID_N_BITS, EDW_REG_ZS, EDW_REG_ZS);
252
253 /* calculate 4N */
254 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_N_4, EDW_REG_N, EDW_REG_N);
255 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_N_4, EDW_REG_N_4, EDW_REG_N_4);
256
257 /*-------------------------------------------------*/
258 /* double/add loop according to scalar bits */
259 /*-------------------------------------------------*/
260 for(i -= 2; i >= 0; i-= 2) {
261 int32_t swt;
262
263 /* EC doubling */
264 PkaEcEdwDoublExtExt(EDW_REG_XS, EDW_REG_YS, EDW_REG_ZS, EDW_REG_TS);
265 PkaEcEdwDoublExtExt(EDW_REG_XS, EDW_REG_YS, EDW_REG_ZS, EDW_REG_TS); // S *= 4
266
267 /* get two next MS bits of key */
268 twoBits = PKI_GET_TWO_BITS_FROM_WORDS_ARRAY(pScalar, i);
269
270 swt = carry*4 + twoBits;
271
272 /* EC points adding according to MS bits and carry */
273 switch (swt) {
274 /*negative points adding*/
275 case -4: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG4, EDW_REG_SG4, EDW_REG_MPG4); carry = 0; break;
276 case -3: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG2, EDW_REG_SG2, EDW_REG_MPG2); carry = -1; break;
277 case -2: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG2, EDW_REG_SG2, EDW_REG_MPG2); carry = 0; break;
278 case -1: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG, EDW_REG_SG, EDW_REG_MPG); carry = 0; break;
279 /*positive points adding*/
280 case 0: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG, EDW_REG_DG, EDW_REG_PG); carry = -1; break;
281 case 1: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG, EDW_REG_DG, EDW_REG_PG); carry = 0; break;
282 case 2: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG2, EDW_REG_DG2, EDW_REG_PG2); carry = 0; break;
283 case 3: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG4, EDW_REG_DG4, EDW_REG_PG4); carry = -1; break;
284 default:
285 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
286 }
287
288 }
289 /* calculation of result for case that carry is -1; note: registers *
290 * of point G4 now are used as temp registers for point S2 */
291 PkaEcEdwAddExtPrcExt(EDW_REG_SG4,EDW_REG_DG4,EDW_REG_PG4,EDW_REG_MPG4/*modif S2 -> G4*/,
292 EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS/*modif S*/,
293 EDW_REG_DG,EDW_REG_SG,EDW_REG_MPG/*precalc. G*/);
294 /* for afine result used registers of the point G */
295 if (carry == -1)
296 PkaEcEdwPointToAfn(EDW_REG_SG,EDW_REG_DG, EDW_REG_SG4,EDW_REG_DG4,EDW_REG_PG4/*S2*/);
297 else // carry = 0
298 PkaEcEdwPointToAfn(EDW_REG_SG,EDW_REG_DG, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS/*S*/);
299
300 return err;
301 }
302
303
304 /**
305 * The function performs multiplication of base point by scalar of special form:
306 * P(x,y) = k*G(x,y).
307 *
308 * Implemented algorithm, enhanced by A.Klimov.
309 *
310 * The function can work with scalars of special form: exact size is 255
311 * bit and it is a multiple of 8 bit (as requiered in Edw. KeyGen algorithm).
312 *
313 * Assuming:
314 * 1. PKA registers are implicitly defined in pka_ec_edw_glob_regs_def.h file,
315 * in partial: output point P(x,y) is stated by registers (EDW_REG_XS,
316 * EDW_REG_YS).
317 * 2. All needed data must be loaded into PKA registers in caller function.
318 * 3. PKA registers are defined in pka_ec_edw_glob_regs_defh file, in partial:
319 * - output point R(x,y) shall be registers (rXR=EDW_REG_XR, rYR=EDW_REG_YR),
320 * - input point P(X,Y) by (rXP=EDW_REG_XP, rYP=EDW_REG_YP).
321 *
322 * @author reuvenl (11/25/2015)
323 *
324 * @return CC_OK or an error according to mbedtls_cc_ec_mont_edw_error.h definitions.
325 */
PkaEcEdwSpecialScalarMultBase(uint32_t * pScalar,size_t scalarSizeInBits)326 CCError_t PkaEcEdwSpecialScalarMultBase(
327 uint32_t *pScalar, /*!< [in] the pointer to the scalsr (LS word is
328 leftmost one, MS word - rightmost). */
329 size_t scalarSizeInBits /*!< exact size of the scalar in bits. */ )
330 {
331
332 /* Definitions */
333 CCError_t err = CC_OK;
334 int32_t carry, i, twoBits;
335 size_t sizeInWords;
336 uint32_t word;
337
338 if(scalarSizeInBits != 255)
339 return CC_EC_EDW_INVALID_SCALAR_SIZE_ERROR;
340
341 sizeInWords = (scalarSizeInBits + CC_BITS_IN_32BIT_WORD - 1) / CC_BITS_IN_32BIT_WORD;
342 word = pScalar[sizeInWords-1];
343
344 if((word >> 30) != 1)
345 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
346
347
348 carry = -1; // always 0 or -1
349 /* set counter to size rounded up to even */
350 i = ((scalarSizeInBits + 1) & ~1) - 2;
351
352 twoBits = PkiGetNextTwoMsBits(pScalar, &word, i);
353 /* RL ! check for EC edw25519 */
354 if (twoBits != 1)
355 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
356 i -= 2;
357
358 PKA_PRINTF("Init loop: i= %d, twoBits= %d\n", i, twoBits);
359 PKI_DBG_PRINT_REGS("S=g32: \n", EDW_REG_XS, EDW_REG_YS, EDW_REG_ZS, EDW_REG_TS);
360
361 while (1) {
362 int32_t swt;
363
364 /* get two next MS bits of key */
365 twoBits = PkiGetNextTwoMsBits(pScalar, &word, i);
366 swt = carry*4 + twoBits;
367
368 switch (swt) {
369 /*negative points add*/
370 case -4: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG16,EDW_REG_SG16,EDW_REG_MPG16); carry = 0; break;
371 case -3: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG8, EDW_REG_SG8, EDW_REG_MPG8); carry = -1; break;
372 case -2: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG8, EDW_REG_SG8, EDW_REG_MPG8); carry = 0; break;
373 case -1: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG4, EDW_REG_SG4, EDW_REG_MPG4); carry = 0; break;
374 /*positive points add*/
375 case 0: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG4, EDW_REG_DG4, EDW_REG_PG4); carry = -1; break;
376 case 1: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG4, EDW_REG_DG4, EDW_REG_PG4); carry = 0; break;
377 case 2: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG8, EDW_REG_DG8, EDW_REG_PG8); carry = 0; break;
378 case 3: PkaEcEdwAddExtPrcExt(EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_SG16,EDW_REG_DG16,EDW_REG_PG16); carry = -1; break;
379 default:
380 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
381 }
382
383 PKA_PRINTF("\ni= %d b2= %02X b2c4= %3d carry= %d \n", i, twoBits, swt, carry);
384 PKI_DBG_PRINT_REGS("", EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS);
385
386 if ((i -= 2) == 0) {
387 // RL Debug
388 PKA_PRINTF("break: i= %d carry= %d\n", i, carry);
389 break; /*end loop*/
390 }
391
392 /* doubling */
393 PkaEcEdwDoublExtExt(EDW_REG_XS, EDW_REG_YS, EDW_REG_ZS, EDW_REG_TS);
394 PkaEcEdwDoublExtExt(EDW_REG_XS ,EDW_REG_YS, EDW_REG_ZS, EDW_REG_TS); // s *= 4
395 }
396
397 /* check, that the LS bit pair is 00 */
398 if(PkiGetNextTwoMsBits(pScalar, &word, i) != 0) {
399 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
400 }
401
402 /* calculation of result for case that carry is -1; note: registers of *
403 * point G16 now are used as temp registers for rXs2,rYs2,rZs2,rTs2 */
404 PkaEcEdwAddExtPrcExt(EDW_REG_SG16/*xS2*/,EDW_REG_DG16/*yS2*/,EDW_REG_PG16/*zS2*/,EDW_REG_MPG16/*tS2*/,
405 EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS,EDW_REG_TS, EDW_REG_DG4,EDW_REG_SG4,EDW_REG_MPG4);
406
407 if (carry == -1)
408 PkaEcEdwPointToAfn(EDW_REG_SG8,EDW_REG_DG8, EDW_REG_SG16/*xS2*/,EDW_REG_DG16/*yS2*/,EDW_REG_PG16/*zS2*/);
409 else
410 PkaEcEdwPointToAfn(EDW_REG_SG8,EDW_REG_DG8, EDW_REG_XS,EDW_REG_YS,EDW_REG_ZS);
411
412 return err;
413 }
414
415
416
417 /**
418 * The function performs two scalar mult. and add of input and base points simultaneously:
419 * R(x,y) = a*P(x,y) + b*G(x,y), where P - point, G - base point.
420 *
421 * Implemented algorithm of Bernstein D. etc. (version of A.Klimov).
422 *
423 * PKA registers are defined in pka_ec_edw_glob_regs_defh file, in partial:
424 * - output point R(x,y) shall be registers (rXR=EDW_REG_XR, rYR=EDW_REG_YR),
425 * - input point P(X,Y) by (rXP=EDW_REG_XP, rYP=EDW_REG_YP).
426 *
427 * @author reuvenl (11/25/2015)
428 *
429 * @return CC_OK or an error according to mbedtls_cc_ec_mont_edw_error.h definitions.
430 */
PkaEcEdwAddTwoScalarMult(uint32_t rXR,uint32_t rYR,uint32_t rXP,uint32_t rYP,uint32_t * pScalarA,size_t scAsizeInBits,uint32_t * pScalarB,size_t scBsizeInBits,const CCEcEdwDomain_t * pEcDomain)431 CCError_t PkaEcEdwAddTwoScalarMult(
432 uint32_t rXR, uint32_t rYR, /*!< [out] the ID-s of registers, containing aff.
433 coordinates of result point P */
434 uint32_t rXP, uint32_t rYP, /*!< [in] the ID-s of registers, containing aff.
435 coordinates of input point P */
436 uint32_t *pScalarA, /*!< [in] the pointer to the scalsr A (LS word is
437 leftmost one, MS word - rightmost). */
438 size_t scAsizeInBits, /*!< exact size of the scalar A in bits. */
439 uint32_t *pScalarB, /*!< [in] the pointer to the scalsr B (LS word is
440 leftmost one, MS word - rightmost). */
441 size_t scBsizeInBits, /*!< exact size of the scalar B in bits. */
442 const CCEcEdwDomain_t *pEcDomain /*!< [in] pointer to EC domain (curve). */)
443 {
444 /* Definitions */
445 uint32_t twoBits;
446 int32_t i;
447 size_t edwSizeWords = pEcDomain->ecModSizeInWords;
448
449 if(scAsizeInBits == 0 || scBsizeInBits == 0)
450 return CC_EC_EDW_INVALID_SCALAR_SIZE_ERROR;
451
452 /*------------------------------------------------------------------*/
453 /* Load and calculate all EC domain and input data for scalar mult. */
454 /*------------------------------------------------------------------*/
455
456 /* set D2 */
457 PkaCopyDataIntoPkaReg(EDW_REG_D2, LEN_ID_N_PKA_REG_BITS, pEcDomain->ecParamD, edwSizeWords);
458 PKA_MOD_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_D2, EDW_REG_D2, EDW_REG_D2);
459 /* set EDW_REG_N_4 = 4*N */
460 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_N_4, EDW_REG_N, EDW_REG_N);
461 PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_N_4, EDW_REG_N_4, EDW_REG_N_4);
462
463 /* set modified coordinates of EC point G */
464 PkaCopyDataIntoPkaReg(EDW_REG_XG, LEN_ID_N_PKA_REG_BITS, pEcDomain->ecGenX, edwSizeWords);
465 PkaCopyDataIntoPkaReg(EDW_REG_YG, LEN_ID_N_PKA_REG_BITS, pEcDomain->ecGenY, edwSizeWords);
466 PKA_CLEAR(LEN_ID_N_PKA_REG_BITS, EDW_REG_ZG);
467 PKA_SET_BIT0(LEN_ID_N_PKA_REG_BITS, EDW_REG_ZG, EDW_REG_ZG); /*ZG = 1*/
468 PkaCopyDataIntoPkaReg(EDW_REG_TG, LEN_ID_N_PKA_REG_BITS, pEcDomain->ecGenT, edwSizeWords);
469
470 /* load EC G point in precalculated form */
471 PkaCopyDataIntoPkaReg(EDW_REG_SG1, LEN_ID_N_PKA_REG_BITS, pEcDomain->sg, edwSizeWords);
472 PkaCopyDataIntoPkaReg(EDW_REG_DG1, LEN_ID_N_PKA_REG_BITS, pEcDomain->dg, edwSizeWords);
473 PkaCopyDataIntoPkaReg(EDW_REG_PG1, LEN_ID_N_PKA_REG_BITS, pEcDomain->pg, edwSizeWords);
474
475 /* convert input point P to precalculated form: sp=yp+xp; dp=yp-xp; pp=d2*xp*yp mod n */
476 PKA_MOD_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_SP, rYP, rXP);
477 //PKA_ADD(LEN_ID_N_PKA_REG_BITS, EDW_REG_DP, rYP, EDW_REG_N);
478 PKA_MOD_SUB(LEN_ID_N_PKA_REG_BITS, EDW_REG_DP, rYP, rXP);
479 PKA_MOD_MUL(LEN_ID_N_BITS, EDW_REG_PP, EDW_REG_D2, rXP);
480 PKA_MOD_MUL(LEN_ID_N_BITS, EDW_REG_PP, EDW_REG_PP, rYP);
481 // P+G
482 PkaEcEdwAddExtPrcExt(EDW_REG_XPG, EDW_REG_YPG, EDW_REG_ZPG, EDW_REG_TPG, /*modif. P+G*/
483 EDW_REG_XG, EDW_REG_YG, EDW_REG_ZG, EDW_REG_TG, /*modif. G*/
484 EDW_REG_SP, EDW_REG_DP, EDW_REG_PP ); /*precalc. P*/
485
486 /*------------------------------------------------*/
487 /* load start points according to scalars MS bits */
488 /*------------------------------------------------*/
489
490 i = CC_MAX(scAsizeInBits, scBsizeInBits) - 1;
491 twoBits = (PKI_GET_BIT_FROM_WORDS_ARRAY(pScalarA, i) << 1) +
492 PKI_GET_BIT_FROM_WORDS_ARRAY(pScalarB, i);
493
494 switch (twoBits) {
495 case 1: // 01: r = G
496 PKA_COPY(LEN_ID_N_PKA_REG_BITS, rXR, EDW_REG_XG);
497 PKA_COPY(LEN_ID_N_PKA_REG_BITS, rYR, EDW_REG_YG);
498 PKA_COPY(LEN_ID_N_PKA_REG_BITS, EDW_REG_ZR, EDW_REG_ZG);
499 PKA_COPY(LEN_ID_N_PKA_REG_BITS, EDW_REG_TR, EDW_REG_TG);
500 break;
501 case 2: // 10: r = P
502 PKA_COPY(LEN_ID_N_PKA_REG_BITS, rXR, rXP);
503 PKA_COPY(LEN_ID_N_PKA_REG_BITS, rYR, rYP);
504 PKA_CLEAR(LEN_ID_N_PKA_REG_BITS, EDW_REG_ZR);
505 PKA_SET_BIT0(LEN_ID_N_PKA_REG_BITS, EDW_REG_ZR, EDW_REG_ZR);
506 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_TR, rXR, rYR);
507 break;
508 case 3: // 11: r = P+G
509 /* set result point R = P+G*/
510 PKA_COPY(LEN_ID_N_PKA_REG_BITS, rXR, EDW_REG_XPG);
511 PKA_COPY(LEN_ID_N_PKA_REG_BITS, rYR, EDW_REG_YPG);
512 PKA_COPY(LEN_ID_N_PKA_REG_BITS, EDW_REG_ZR, EDW_REG_ZPG);
513 PKA_COPY(LEN_ID_N_PKA_REG_BITS, EDW_REG_TR, EDW_REG_TPG);
514 break;
515
516 default:
517 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
518 }
519
520 /*------------------------------------------------------------------*/
521 /* Perform two points scalar mult. and adding simultaneously */
522 /*------------------------------------------------------------------*/
523
524 while (--i >= 0) {
525
526 /* get two MSBits from scalars A snd B */
527 twoBits = (PKI_GET_BIT_FROM_WORDS_ARRAY(pScalarA, i) << 1) +
528 PKI_GET_BIT_FROM_WORDS_ARRAY(pScalarB, i);
529
530 /* point doubling */
531 PkaEcEdwDoublExtExt(rXR, rYR, EDW_REG_ZR, EDW_REG_TR);
532
533 switch (twoBits) {
534 case 0:
535 break;
536 case 1:
537 // 10: r += G
538 PkaEcEdwAddExtPrcExt(rXR, rYR, EDW_REG_ZR, EDW_REG_TR,
539 rXR, rYR, EDW_REG_ZR, EDW_REG_TR,
540 EDW_REG_SG1, EDW_REG_DG1, EDW_REG_PG1);
541 break;
542 case 2:
543 // 01: r += P
544 PkaEcEdwAddExtPrcExt(rXR, rYR, EDW_REG_ZR, EDW_REG_TR,
545 rXR, rYR, EDW_REG_ZR, EDW_REG_TR,
546 EDW_REG_SP, EDW_REG_DP, EDW_REG_PP);
547 break;
548 case 3:
549 // 11: r += P+G
550 PkaEcEdwAddExtExtExt(rXR, rYR, EDW_REG_ZR, EDW_REG_TR,
551 rXR, rYR, EDW_REG_ZR, EDW_REG_TR,
552 EDW_REG_XPG, EDW_REG_YPG, EDW_REG_ZPG, EDW_REG_TPG);
553 break;
554 default:
555 return CC_EC_EDW_INVALID_SCALAR_DATA_ERROR;
556 }
557 }
558
559 /* convert result to affine */
560 PkaEcEdwPointToAfn(rXR, rYR,/*out*/ rXR, rYR, EDW_REG_ZR/*in*/);
561
562 return CC_OK;
563 }
564
565 /**
566 * The function calculated coordinate X of compressed EC point,
567 * using the given coordinate Y.
568 *
569 * Implemented algorithm Bernstain D. etc (stated by Klimov A.).
570 *
571 * @author reuvenl (1/11/2016)
572 *
573 * Imlicit parametrs
574 * @param [out] rX - ID of PKA register for output decompressed coordinate X.
575 * @param [in/out] rY - ID of PKA register, containing compressed/decompressed coordinate Y.
576 * @param [in] isOddX - indication: "Is the coordinate X odd".
577 *
578 */
PkaEcEdwDecompress(uint32_t rX,uint32_t rY,uint32_t isOddX)579 void PkaEcEdwDecompress(uint32_t rX, uint32_t rY,
580 uint32_t isOddX) /*!< one bit indication: "Is the coordinate X odd" */
581 {
582 uint32_t bit0; /*used to read values from regs. */
583
584 /* decompress: (YP) -> (XP,YP,ZP=1,TP) */
585 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T3, rY, rY); // hwmmul(t3, y, y, n, np);
586 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T4, EDW_REG_T3, EDW_REG_D); // hwmmul(t4, t3, ec_d, n, np);
587 PKA_SUB_IM(LEN_ID_N_PKA_REG_BITS, EDW_REG_T3, EDW_REG_T3, 1); // hwdec(t3, t3);
588 PKA_ADD_IM(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, EDW_REG_T4, 1); // hwinc(t4, t4);
589 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T, EDW_REG_T4, EDW_REG_T4); // hwmmul(t, t4, t4, n, np);
590 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T, EDW_REG_T4, EDW_REG_T); // hwmmul(t, t4, t, n, np);
591 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, EDW_REG_T, EDW_REG_T); // hwmmul(x, t, t, n, np);
592 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, rX, EDW_REG_T4); // hwmmul(x, x, t4, n, np);
593 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T5, rX, EDW_REG_T3); // hwmmul(t5, x, t3, n, np);
594 PKA_MOD_EXP(LEN_ID_N_BITS, rX, EDW_REG_T5, EDW_REG_Q58); // hwmexp(x, t5, q58, n, np);
595 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, rX, EDW_REG_T3); // hwmmul(x, x, t3, n, np);
596 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, rX, EDW_REG_T); // hwmmul(x, x, t, n,np);
597 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, EDW_REG_T, rX, rX); // hwmmul(t, x, x, n,np);
598 PKA_MOD_MUL_ACC(LEN_ID_N_BITS, EDW_REG_T, EDW_REG_T4, EDW_REG_T, EDW_REG_T3); // hwmlap(t,t4, t, t3, n, np, 0);
599
600 PKA_DIV(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, EDW_REG_T, EDW_REG_N);
601 PKA_COMPARE_IM_STATUS(LEN_ID_N_PKA_REG_BITS, EDW_REG_T, 0/*im.val*/, bit0/*status*/);
602 if(bit0) {
603 PKA_MOD_MUL_NFR(LEN_ID_N_BITS, rX, rX, EDW_REG_SQRTM1);
604 }
605
606 PKA_DIV(LEN_ID_N_PKA_REG_BITS, EDW_REG_T4, rX, EDW_REG_N);
607 PKA_READ_BIT0(LEN_ID_N_PKA_REG_BITS, rX, bit0/*bit0*/);
608 if(bit0 != isOddX)
609 PKA_SUB(LEN_ID_N_PKA_REG_BITS, rX, EDW_REG_N, rX);
610
611 }
612
613
614
615