1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Glue Code for the AVX assembler implementation of the Cast5 Cipher
4  *
5  * Copyright (C) 2012 Johannes Goetzfried
6  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
7  */
8 
9 #include <asm/crypto/glue_helper.h>
10 #include <crypto/algapi.h>
11 #include <crypto/cast5.h>
12 #include <crypto/internal/simd.h>
13 #include <linux/crypto.h>
14 #include <linux/err.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 
18 #define CAST5_PARALLEL_BLOCKS 16
19 
20 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
21 				    const u8 *src);
22 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
23 				    const u8 *src);
24 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
25 				    const u8 *src);
26 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
27 				__be64 *iv);
28 
cast5_setkey_skcipher(struct crypto_skcipher * tfm,const u8 * key,unsigned int keylen)29 static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
30 				 unsigned int keylen)
31 {
32 	return cast5_setkey(&tfm->base, key, keylen);
33 }
34 
cast5_fpu_begin(bool fpu_enabled,struct skcipher_walk * walk,unsigned int nbytes)35 static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
36 				   unsigned int nbytes)
37 {
38 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
39 			      walk, fpu_enabled, nbytes);
40 }
41 
cast5_fpu_end(bool fpu_enabled)42 static inline void cast5_fpu_end(bool fpu_enabled)
43 {
44 	return glue_fpu_end(fpu_enabled);
45 }
46 
ecb_crypt(struct skcipher_request * req,bool enc)47 static int ecb_crypt(struct skcipher_request *req, bool enc)
48 {
49 	bool fpu_enabled = false;
50 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
51 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
52 	struct skcipher_walk walk;
53 	const unsigned int bsize = CAST5_BLOCK_SIZE;
54 	unsigned int nbytes;
55 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
56 	int err;
57 
58 	err = skcipher_walk_virt(&walk, req, false);
59 
60 	while ((nbytes = walk.nbytes)) {
61 		u8 *wsrc = walk.src.virt.addr;
62 		u8 *wdst = walk.dst.virt.addr;
63 
64 		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
65 
66 		/* Process multi-block batch */
67 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
68 			fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
69 			do {
70 				fn(ctx, wdst, wsrc);
71 
72 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
73 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
74 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
75 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
76 
77 			if (nbytes < bsize)
78 				goto done;
79 		}
80 
81 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
82 
83 		/* Handle leftovers */
84 		do {
85 			fn(ctx, wdst, wsrc);
86 
87 			wsrc += bsize;
88 			wdst += bsize;
89 			nbytes -= bsize;
90 		} while (nbytes >= bsize);
91 
92 done:
93 		err = skcipher_walk_done(&walk, nbytes);
94 	}
95 
96 	cast5_fpu_end(fpu_enabled);
97 	return err;
98 }
99 
ecb_encrypt(struct skcipher_request * req)100 static int ecb_encrypt(struct skcipher_request *req)
101 {
102 	return ecb_crypt(req, true);
103 }
104 
ecb_decrypt(struct skcipher_request * req)105 static int ecb_decrypt(struct skcipher_request *req)
106 {
107 	return ecb_crypt(req, false);
108 }
109 
cbc_encrypt(struct skcipher_request * req)110 static int cbc_encrypt(struct skcipher_request *req)
111 {
112 	const unsigned int bsize = CAST5_BLOCK_SIZE;
113 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
114 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
115 	struct skcipher_walk walk;
116 	unsigned int nbytes;
117 	int err;
118 
119 	err = skcipher_walk_virt(&walk, req, false);
120 
121 	while ((nbytes = walk.nbytes)) {
122 		u64 *src = (u64 *)walk.src.virt.addr;
123 		u64 *dst = (u64 *)walk.dst.virt.addr;
124 		u64 *iv = (u64 *)walk.iv;
125 
126 		do {
127 			*dst = *src ^ *iv;
128 			__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
129 			iv = dst;
130 			src++;
131 			dst++;
132 			nbytes -= bsize;
133 		} while (nbytes >= bsize);
134 
135 		*(u64 *)walk.iv = *iv;
136 		err = skcipher_walk_done(&walk, nbytes);
137 	}
138 
139 	return err;
140 }
141 
__cbc_decrypt(struct cast5_ctx * ctx,struct skcipher_walk * walk)142 static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
143 				  struct skcipher_walk *walk)
144 {
145 	const unsigned int bsize = CAST5_BLOCK_SIZE;
146 	unsigned int nbytes = walk->nbytes;
147 	u64 *src = (u64 *)walk->src.virt.addr;
148 	u64 *dst = (u64 *)walk->dst.virt.addr;
149 	u64 last_iv;
150 
151 	/* Start of the last block. */
152 	src += nbytes / bsize - 1;
153 	dst += nbytes / bsize - 1;
154 
155 	last_iv = *src;
156 
157 	/* Process multi-block batch */
158 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
159 		do {
160 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
161 			src -= CAST5_PARALLEL_BLOCKS - 1;
162 			dst -= CAST5_PARALLEL_BLOCKS - 1;
163 
164 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
165 
166 			nbytes -= bsize;
167 			if (nbytes < bsize)
168 				goto done;
169 
170 			*dst ^= *(src - 1);
171 			src -= 1;
172 			dst -= 1;
173 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
174 	}
175 
176 	/* Handle leftovers */
177 	for (;;) {
178 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
179 
180 		nbytes -= bsize;
181 		if (nbytes < bsize)
182 			break;
183 
184 		*dst ^= *(src - 1);
185 		src -= 1;
186 		dst -= 1;
187 	}
188 
189 done:
190 	*dst ^= *(u64 *)walk->iv;
191 	*(u64 *)walk->iv = last_iv;
192 
193 	return nbytes;
194 }
195 
cbc_decrypt(struct skcipher_request * req)196 static int cbc_decrypt(struct skcipher_request *req)
197 {
198 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
199 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
200 	bool fpu_enabled = false;
201 	struct skcipher_walk walk;
202 	unsigned int nbytes;
203 	int err;
204 
205 	err = skcipher_walk_virt(&walk, req, false);
206 
207 	while ((nbytes = walk.nbytes)) {
208 		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
209 		nbytes = __cbc_decrypt(ctx, &walk);
210 		err = skcipher_walk_done(&walk, nbytes);
211 	}
212 
213 	cast5_fpu_end(fpu_enabled);
214 	return err;
215 }
216 
ctr_crypt_final(struct skcipher_walk * walk,struct cast5_ctx * ctx)217 static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
218 {
219 	u8 *ctrblk = walk->iv;
220 	u8 keystream[CAST5_BLOCK_SIZE];
221 	u8 *src = walk->src.virt.addr;
222 	u8 *dst = walk->dst.virt.addr;
223 	unsigned int nbytes = walk->nbytes;
224 
225 	__cast5_encrypt(ctx, keystream, ctrblk);
226 	crypto_xor_cpy(dst, keystream, src, nbytes);
227 
228 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
229 }
230 
__ctr_crypt(struct skcipher_walk * walk,struct cast5_ctx * ctx)231 static unsigned int __ctr_crypt(struct skcipher_walk *walk,
232 				struct cast5_ctx *ctx)
233 {
234 	const unsigned int bsize = CAST5_BLOCK_SIZE;
235 	unsigned int nbytes = walk->nbytes;
236 	u64 *src = (u64 *)walk->src.virt.addr;
237 	u64 *dst = (u64 *)walk->dst.virt.addr;
238 
239 	/* Process multi-block batch */
240 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
241 		do {
242 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
243 					(__be64 *)walk->iv);
244 
245 			src += CAST5_PARALLEL_BLOCKS;
246 			dst += CAST5_PARALLEL_BLOCKS;
247 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
248 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
249 
250 		if (nbytes < bsize)
251 			goto done;
252 	}
253 
254 	/* Handle leftovers */
255 	do {
256 		u64 ctrblk;
257 
258 		if (dst != src)
259 			*dst = *src;
260 
261 		ctrblk = *(u64 *)walk->iv;
262 		be64_add_cpu((__be64 *)walk->iv, 1);
263 
264 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
265 		*dst ^= ctrblk;
266 
267 		src += 1;
268 		dst += 1;
269 		nbytes -= bsize;
270 	} while (nbytes >= bsize);
271 
272 done:
273 	return nbytes;
274 }
275 
ctr_crypt(struct skcipher_request * req)276 static int ctr_crypt(struct skcipher_request *req)
277 {
278 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
279 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
280 	bool fpu_enabled = false;
281 	struct skcipher_walk walk;
282 	unsigned int nbytes;
283 	int err;
284 
285 	err = skcipher_walk_virt(&walk, req, false);
286 
287 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
288 		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
289 		nbytes = __ctr_crypt(&walk, ctx);
290 		err = skcipher_walk_done(&walk, nbytes);
291 	}
292 
293 	cast5_fpu_end(fpu_enabled);
294 
295 	if (walk.nbytes) {
296 		ctr_crypt_final(&walk, ctx);
297 		err = skcipher_walk_done(&walk, 0);
298 	}
299 
300 	return err;
301 }
302 
303 static struct skcipher_alg cast5_algs[] = {
304 	{
305 		.base.cra_name		= "__ecb(cast5)",
306 		.base.cra_driver_name	= "__ecb-cast5-avx",
307 		.base.cra_priority	= 200,
308 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
309 		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
310 		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
311 		.base.cra_module	= THIS_MODULE,
312 		.min_keysize		= CAST5_MIN_KEY_SIZE,
313 		.max_keysize		= CAST5_MAX_KEY_SIZE,
314 		.setkey			= cast5_setkey_skcipher,
315 		.encrypt		= ecb_encrypt,
316 		.decrypt		= ecb_decrypt,
317 	}, {
318 		.base.cra_name		= "__cbc(cast5)",
319 		.base.cra_driver_name	= "__cbc-cast5-avx",
320 		.base.cra_priority	= 200,
321 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
322 		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
323 		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
324 		.base.cra_module	= THIS_MODULE,
325 		.min_keysize		= CAST5_MIN_KEY_SIZE,
326 		.max_keysize		= CAST5_MAX_KEY_SIZE,
327 		.ivsize			= CAST5_BLOCK_SIZE,
328 		.setkey			= cast5_setkey_skcipher,
329 		.encrypt		= cbc_encrypt,
330 		.decrypt		= cbc_decrypt,
331 	}, {
332 		.base.cra_name		= "__ctr(cast5)",
333 		.base.cra_driver_name	= "__ctr-cast5-avx",
334 		.base.cra_priority	= 200,
335 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
336 		.base.cra_blocksize	= 1,
337 		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
338 		.base.cra_module	= THIS_MODULE,
339 		.min_keysize		= CAST5_MIN_KEY_SIZE,
340 		.max_keysize		= CAST5_MAX_KEY_SIZE,
341 		.ivsize			= CAST5_BLOCK_SIZE,
342 		.chunksize		= CAST5_BLOCK_SIZE,
343 		.setkey			= cast5_setkey_skcipher,
344 		.encrypt		= ctr_crypt,
345 		.decrypt		= ctr_crypt,
346 	}
347 };
348 
349 static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
350 
cast5_init(void)351 static int __init cast5_init(void)
352 {
353 	const char *feature_name;
354 
355 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
356 				&feature_name)) {
357 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
358 		return -ENODEV;
359 	}
360 
361 	return simd_register_skciphers_compat(cast5_algs,
362 					      ARRAY_SIZE(cast5_algs),
363 					      cast5_simd_algs);
364 }
365 
cast5_exit(void)366 static void __exit cast5_exit(void)
367 {
368 	simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
369 				  cast5_simd_algs);
370 }
371 
372 module_init(cast5_init);
373 module_exit(cast5_exit);
374 
375 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
376 MODULE_LICENSE("GPL");
377 MODULE_ALIAS_CRYPTO("cast5");
378