1 /*
2 Copyright (c) 1996, David Mazieres <dm@uun.org>
3 Copyright (c) 2008, Damien Miller <djm@openbsd.org>
4 Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
5 Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
6 
7 Permission to use, copy, modify, and distribute this software for any
8 purpose with or without fee is hereby granted, provided that the above
9 copyright notice and this permission notice appear in all copies.
10 
11 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 /*
20 chacha-merged.c version 20080118
21 D. J. Bernstein
22 Public domain.
23 */
24 
25 /* $OpenBSD: chacha_private.h,v 1.3 2022/02/28 21:56:29 dtucker Exp $ */
26 
27 #include <stdint.h>
28 
29 typedef uint8_t u8;
30 typedef uint32_t u32;
31 
32 typedef struct
33 {
34   u32 input[16]; /* could be compressed */
35 } chacha_ctx;
36 
37 #define U8V(v) ((u8)(v))
38 #define U32V(v) ((u32)(v))
39 
40 #define ROTL32(v, n) \
41   (U32V((v) << (n)) | ((v) >> (32 - (n))))
42 
43 #define U8TO32_LITTLE(p) \
44   (((u32)((p)[0])      ) | \
45    ((u32)((p)[1]) <<  8) | \
46    ((u32)((p)[2]) << 16) | \
47    ((u32)((p)[3]) << 24))
48 
49 #define U32TO8_LITTLE(p, v) \
50   do { \
51     (p)[0] = U8V((v)      ); \
52     (p)[1] = U8V((v) >>  8); \
53     (p)[2] = U8V((v) >> 16); \
54     (p)[3] = U8V((v) >> 24); \
55   } while (0)
56 
57 #define ROTATE(v,c) (ROTL32(v,c))
58 #define XOR(v,w) ((v) ^ (w))
59 #define PLUS(v,w) (U32V((v) + (w)))
60 #define PLUSONE(v) (PLUS((v),1))
61 
62 #define QUARTERROUND(a,b,c,d) \
63   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
64   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
65   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
66   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
67 
68 static const char sigma[16] = "expand 32-byte k";
69 static const char tau[16] = "expand 16-byte k";
70 
71 static void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)72 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
73 {
74   const char *constants;
75 
76   x->input[4] = U8TO32_LITTLE(k + 0);
77   x->input[5] = U8TO32_LITTLE(k + 4);
78   x->input[6] = U8TO32_LITTLE(k + 8);
79   x->input[7] = U8TO32_LITTLE(k + 12);
80   if (kbits == 256) { /* recommended */
81     k += 16;
82     constants = sigma;
83   } else { /* kbits == 128 */
84     constants = tau;
85   }
86   x->input[8] = U8TO32_LITTLE(k + 0);
87   x->input[9] = U8TO32_LITTLE(k + 4);
88   x->input[10] = U8TO32_LITTLE(k + 8);
89   x->input[11] = U8TO32_LITTLE(k + 12);
90   x->input[0] = U8TO32_LITTLE(constants + 0);
91   x->input[1] = U8TO32_LITTLE(constants + 4);
92   x->input[2] = U8TO32_LITTLE(constants + 8);
93   x->input[3] = U8TO32_LITTLE(constants + 12);
94 }
95 
96 static void
chacha_ivsetup(chacha_ctx * x,const u8 * iv)97 chacha_ivsetup(chacha_ctx *x,const u8 *iv)
98 {
99   x->input[12] = 0;
100   x->input[13] = 0;
101   x->input[14] = U8TO32_LITTLE(iv + 0);
102   x->input[15] = U8TO32_LITTLE(iv + 4);
103 }
104 
105 static void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)106 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
107 {
108   u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
109   u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
110   u8 tmp[64];
111   u8 *ctarget = tmp;
112   u_int i;
113 
114   if (!bytes) return;
115 
116   j0 = x->input[0];
117   j1 = x->input[1];
118   j2 = x->input[2];
119   j3 = x->input[3];
120   j4 = x->input[4];
121   j5 = x->input[5];
122   j6 = x->input[6];
123   j7 = x->input[7];
124   j8 = x->input[8];
125   j9 = x->input[9];
126   j10 = x->input[10];
127   j11 = x->input[11];
128   j12 = x->input[12];
129   j13 = x->input[13];
130   j14 = x->input[14];
131   j15 = x->input[15];
132 
133   for (;;) {
134     if (bytes < 64) {
135       for (i = 0;i < bytes;++i) tmp[i] = m[i];
136       m = tmp;
137       ctarget = c;
138       c = tmp;
139     }
140     x0 = j0;
141     x1 = j1;
142     x2 = j2;
143     x3 = j3;
144     x4 = j4;
145     x5 = j5;
146     x6 = j6;
147     x7 = j7;
148     x8 = j8;
149     x9 = j9;
150     x10 = j10;
151     x11 = j11;
152     x12 = j12;
153     x13 = j13;
154     x14 = j14;
155     x15 = j15;
156     for (i = 20;i > 0;i -= 2) {
157       QUARTERROUND( x0, x4, x8,x12)
158       QUARTERROUND( x1, x5, x9,x13)
159       QUARTERROUND( x2, x6,x10,x14)
160       QUARTERROUND( x3, x7,x11,x15)
161       QUARTERROUND( x0, x5,x10,x15)
162       QUARTERROUND( x1, x6,x11,x12)
163       QUARTERROUND( x2, x7, x8,x13)
164       QUARTERROUND( x3, x4, x9,x14)
165     }
166     x0 = PLUS(x0,j0);
167     x1 = PLUS(x1,j1);
168     x2 = PLUS(x2,j2);
169     x3 = PLUS(x3,j3);
170     x4 = PLUS(x4,j4);
171     x5 = PLUS(x5,j5);
172     x6 = PLUS(x6,j6);
173     x7 = PLUS(x7,j7);
174     x8 = PLUS(x8,j8);
175     x9 = PLUS(x9,j9);
176     x10 = PLUS(x10,j10);
177     x11 = PLUS(x11,j11);
178     x12 = PLUS(x12,j12);
179     x13 = PLUS(x13,j13);
180     x14 = PLUS(x14,j14);
181     x15 = PLUS(x15,j15);
182 
183 #ifndef KEYSTREAM_ONLY
184     x0 = XOR(x0,U8TO32_LITTLE(m + 0));
185     x1 = XOR(x1,U8TO32_LITTLE(m + 4));
186     x2 = XOR(x2,U8TO32_LITTLE(m + 8));
187     x3 = XOR(x3,U8TO32_LITTLE(m + 12));
188     x4 = XOR(x4,U8TO32_LITTLE(m + 16));
189     x5 = XOR(x5,U8TO32_LITTLE(m + 20));
190     x6 = XOR(x6,U8TO32_LITTLE(m + 24));
191     x7 = XOR(x7,U8TO32_LITTLE(m + 28));
192     x8 = XOR(x8,U8TO32_LITTLE(m + 32));
193     x9 = XOR(x9,U8TO32_LITTLE(m + 36));
194     x10 = XOR(x10,U8TO32_LITTLE(m + 40));
195     x11 = XOR(x11,U8TO32_LITTLE(m + 44));
196     x12 = XOR(x12,U8TO32_LITTLE(m + 48));
197     x13 = XOR(x13,U8TO32_LITTLE(m + 52));
198     x14 = XOR(x14,U8TO32_LITTLE(m + 56));
199     x15 = XOR(x15,U8TO32_LITTLE(m + 60));
200 #endif
201 
202     j12 = PLUSONE(j12);
203     if (!j12) {
204       j13 = PLUSONE(j13);
205       /* stopping at 2^70 bytes per nonce is user's responsibility */
206     }
207 
208     U32TO8_LITTLE(c + 0,x0);
209     U32TO8_LITTLE(c + 4,x1);
210     U32TO8_LITTLE(c + 8,x2);
211     U32TO8_LITTLE(c + 12,x3);
212     U32TO8_LITTLE(c + 16,x4);
213     U32TO8_LITTLE(c + 20,x5);
214     U32TO8_LITTLE(c + 24,x6);
215     U32TO8_LITTLE(c + 28,x7);
216     U32TO8_LITTLE(c + 32,x8);
217     U32TO8_LITTLE(c + 36,x9);
218     U32TO8_LITTLE(c + 40,x10);
219     U32TO8_LITTLE(c + 44,x11);
220     U32TO8_LITTLE(c + 48,x12);
221     U32TO8_LITTLE(c + 52,x13);
222     U32TO8_LITTLE(c + 56,x14);
223     U32TO8_LITTLE(c + 60,x15);
224 
225     if (bytes <= 64) {
226       if (bytes < 64) {
227         for (i = 0;i < bytes;++i) ctarget[i] = c[i];
228       }
229       x->input[12] = j12;
230       x->input[13] = j13;
231       return;
232     }
233     bytes -= 64;
234     c += 64;
235 #ifndef KEYSTREAM_ONLY
236     m += 64;
237 #endif
238   }
239 }
240