1 /*
2  * Driver for IBM PowerNV 842 compression accelerator
3  *
4  * Copyright (C) 2015 Dan Streetman, IBM Corp
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  */
16 
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18 
19 #include "nx-842.h"
20 
21 #include <linux/timer.h>
22 
23 #include <asm/prom.h>
24 #include <asm/icswx.h>
25 #include <asm/vas.h>
26 #include <asm/reg.h>
27 #include <asm/opal-api.h>
28 #include <asm/opal.h>
29 
30 MODULE_LICENSE("GPL");
31 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
32 MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
33 MODULE_ALIAS_CRYPTO("842");
34 MODULE_ALIAS_CRYPTO("842-nx");
35 
36 #define WORKMEM_ALIGN	(CRB_ALIGN)
37 #define CSB_WAIT_MAX	(5000) /* ms */
38 #define VAS_RETRIES	(10)
39 /* # of requests allowed per RxFIFO at a time. 0 for unlimited */
40 #define MAX_CREDITS_PER_RXFIFO	(1024)
41 
42 struct nx842_workmem {
43 	/* Below fields must be properly aligned */
44 	struct coprocessor_request_block crb; /* CRB_ALIGN align */
45 	struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
46 	struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
47 	/* Above fields must be properly aligned */
48 
49 	ktime_t start;
50 
51 	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
52 } __packed __aligned(WORKMEM_ALIGN);
53 
54 struct nx842_coproc {
55 	unsigned int chip_id;
56 	unsigned int ct;
57 	unsigned int ci;	/* Coprocessor instance, used with icswx */
58 	struct {
59 		struct vas_window *rxwin;
60 		int id;
61 	} vas;
62 	struct list_head list;
63 };
64 
65 /*
66  * Send the request to NX engine on the chip for the corresponding CPU
67  * where the process is executing. Use with VAS function.
68  */
69 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
70 
71 /* no cpu hotplug on powernv, so this list never changes after init */
72 static LIST_HEAD(nx842_coprocs);
73 static unsigned int nx842_ct;	/* used in icswx function */
74 
75 static int (*nx842_powernv_exec)(const unsigned char *in,
76 				unsigned int inlen, unsigned char *out,
77 				unsigned int *outlenp, void *workmem, int fc);
78 
79 /**
80  * setup_indirect_dde - Setup an indirect DDE
81  *
82  * The DDE is setup with the the DDE count, byte count, and address of
83  * first direct DDE in the list.
84  */
setup_indirect_dde(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned int dde_count,unsigned int byte_count)85 static void setup_indirect_dde(struct data_descriptor_entry *dde,
86 			       struct data_descriptor_entry *ddl,
87 			       unsigned int dde_count, unsigned int byte_count)
88 {
89 	dde->flags = 0;
90 	dde->count = dde_count;
91 	dde->index = 0;
92 	dde->length = cpu_to_be32(byte_count);
93 	dde->address = cpu_to_be64(nx842_get_pa(ddl));
94 }
95 
96 /**
97  * setup_direct_dde - Setup single DDE from buffer
98  *
99  * The DDE is setup with the buffer and length.  The buffer must be properly
100  * aligned.  The used length is returned.
101  * Returns:
102  *   N    Successfully set up DDE with N bytes
103  */
setup_direct_dde(struct data_descriptor_entry * dde,unsigned long pa,unsigned int len)104 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
105 				     unsigned long pa, unsigned int len)
106 {
107 	unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
108 
109 	dde->flags = 0;
110 	dde->count = 0;
111 	dde->index = 0;
112 	dde->length = cpu_to_be32(l);
113 	dde->address = cpu_to_be64(pa);
114 
115 	return l;
116 }
117 
118 /**
119  * setup_ddl - Setup DDL from buffer
120  *
121  * Returns:
122  *   0		Successfully set up DDL
123  */
setup_ddl(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned char * buf,unsigned int len,bool in)124 static int setup_ddl(struct data_descriptor_entry *dde,
125 		     struct data_descriptor_entry *ddl,
126 		     unsigned char *buf, unsigned int len,
127 		     bool in)
128 {
129 	unsigned long pa = nx842_get_pa(buf);
130 	int i, ret, total_len = len;
131 
132 	if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
133 		pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
134 			 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
135 		return -EINVAL;
136 	}
137 
138 	/* only need to check last mult; since buffer must be
139 	 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
140 	 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
141 	 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
142 	 */
143 	if (len % DDE_BUFFER_LAST_MULT) {
144 		pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
145 			 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
146 		if (in)
147 			return -EINVAL;
148 		len = round_down(len, DDE_BUFFER_LAST_MULT);
149 	}
150 
151 	/* use a single direct DDE */
152 	if (len <= LEN_ON_PAGE(pa)) {
153 		ret = setup_direct_dde(dde, pa, len);
154 		WARN_ON(ret < len);
155 		return 0;
156 	}
157 
158 	/* use the DDL */
159 	for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
160 		ret = setup_direct_dde(&ddl[i], pa, len);
161 		buf += ret;
162 		len -= ret;
163 		pa = nx842_get_pa(buf);
164 	}
165 
166 	if (len > 0) {
167 		pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
168 			 total_len, in ? "input" : "output", len);
169 		if (in)
170 			return -EMSGSIZE;
171 		total_len -= len;
172 	}
173 	setup_indirect_dde(dde, ddl, i, total_len);
174 
175 	return 0;
176 }
177 
178 #define CSB_ERR(csb, msg, ...)					\
179 	pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n",	\
180 	       ##__VA_ARGS__, (csb)->flags,			\
181 	       (csb)->cs, (csb)->cc, (csb)->ce,			\
182 	       be32_to_cpu((csb)->count))
183 
184 #define CSB_ERR_ADDR(csb, msg, ...)				\
185 	CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__,		\
186 		(unsigned long)be64_to_cpu((csb)->address))
187 
188 /**
189  * wait_for_csb
190  */
wait_for_csb(struct nx842_workmem * wmem,struct coprocessor_status_block * csb)191 static int wait_for_csb(struct nx842_workmem *wmem,
192 			struct coprocessor_status_block *csb)
193 {
194 	ktime_t start = wmem->start, now = ktime_get();
195 	ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
196 
197 	while (!(READ_ONCE(csb->flags) & CSB_V)) {
198 		cpu_relax();
199 		now = ktime_get();
200 		if (ktime_after(now, timeout))
201 			break;
202 	}
203 
204 	/* hw has updated csb and output buffer */
205 	barrier();
206 
207 	/* check CSB flags */
208 	if (!(csb->flags & CSB_V)) {
209 		CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
210 			(long)ktime_us_delta(now, start));
211 		return -ETIMEDOUT;
212 	}
213 	if (csb->flags & CSB_F) {
214 		CSB_ERR(csb, "Invalid CSB format");
215 		return -EPROTO;
216 	}
217 	if (csb->flags & CSB_CH) {
218 		CSB_ERR(csb, "Invalid CSB chaining state");
219 		return -EPROTO;
220 	}
221 
222 	/* verify CSB completion sequence is 0 */
223 	if (csb->cs) {
224 		CSB_ERR(csb, "Invalid CSB completion sequence");
225 		return -EPROTO;
226 	}
227 
228 	/* check CSB Completion Code */
229 	switch (csb->cc) {
230 	/* no error */
231 	case CSB_CC_SUCCESS:
232 		break;
233 	case CSB_CC_TPBC_GT_SPBC:
234 		/* not an error, but the compressed data is
235 		 * larger than the uncompressed data :(
236 		 */
237 		break;
238 
239 	/* input data errors */
240 	case CSB_CC_OPERAND_OVERLAP:
241 		/* input and output buffers overlap */
242 		CSB_ERR(csb, "Operand Overlap error");
243 		return -EINVAL;
244 	case CSB_CC_INVALID_OPERAND:
245 		CSB_ERR(csb, "Invalid operand");
246 		return -EINVAL;
247 	case CSB_CC_NOSPC:
248 		/* output buffer too small */
249 		return -ENOSPC;
250 	case CSB_CC_ABORT:
251 		CSB_ERR(csb, "Function aborted");
252 		return -EINTR;
253 	case CSB_CC_CRC_MISMATCH:
254 		CSB_ERR(csb, "CRC mismatch");
255 		return -EINVAL;
256 	case CSB_CC_TEMPL_INVALID:
257 		CSB_ERR(csb, "Compressed data template invalid");
258 		return -EINVAL;
259 	case CSB_CC_TEMPL_OVERFLOW:
260 		CSB_ERR(csb, "Compressed data template shows data past end");
261 		return -EINVAL;
262 	case CSB_CC_EXCEED_BYTE_COUNT:	/* P9 or later */
263 		/*
264 		 * DDE byte count exceeds the limit specified in Maximum
265 		 * byte count register.
266 		 */
267 		CSB_ERR(csb, "DDE byte count exceeds the limit");
268 		return -EINVAL;
269 
270 	/* these should not happen */
271 	case CSB_CC_INVALID_ALIGN:
272 		/* setup_ddl should have detected this */
273 		CSB_ERR_ADDR(csb, "Invalid alignment");
274 		return -EINVAL;
275 	case CSB_CC_DATA_LENGTH:
276 		/* setup_ddl should have detected this */
277 		CSB_ERR(csb, "Invalid data length");
278 		return -EINVAL;
279 	case CSB_CC_WR_TRANSLATION:
280 	case CSB_CC_TRANSLATION:
281 	case CSB_CC_TRANSLATION_DUP1:
282 	case CSB_CC_TRANSLATION_DUP2:
283 	case CSB_CC_TRANSLATION_DUP3:
284 	case CSB_CC_TRANSLATION_DUP4:
285 	case CSB_CC_TRANSLATION_DUP5:
286 	case CSB_CC_TRANSLATION_DUP6:
287 		/* should not happen, we use physical addrs */
288 		CSB_ERR_ADDR(csb, "Translation error");
289 		return -EPROTO;
290 	case CSB_CC_WR_PROTECTION:
291 	case CSB_CC_PROTECTION:
292 	case CSB_CC_PROTECTION_DUP1:
293 	case CSB_CC_PROTECTION_DUP2:
294 	case CSB_CC_PROTECTION_DUP3:
295 	case CSB_CC_PROTECTION_DUP4:
296 	case CSB_CC_PROTECTION_DUP5:
297 	case CSB_CC_PROTECTION_DUP6:
298 		/* should not happen, we use physical addrs */
299 		CSB_ERR_ADDR(csb, "Protection error");
300 		return -EPROTO;
301 	case CSB_CC_PRIVILEGE:
302 		/* shouldn't happen, we're in HYP mode */
303 		CSB_ERR(csb, "Insufficient Privilege error");
304 		return -EPROTO;
305 	case CSB_CC_EXCESSIVE_DDE:
306 		/* shouldn't happen, setup_ddl doesn't use many dde's */
307 		CSB_ERR(csb, "Too many DDEs in DDL");
308 		return -EINVAL;
309 	case CSB_CC_TRANSPORT:
310 	case CSB_CC_INVALID_CRB:	/* P9 or later */
311 		/* shouldn't happen, we setup CRB correctly */
312 		CSB_ERR(csb, "Invalid CRB");
313 		return -EINVAL;
314 	case CSB_CC_INVALID_DDE:	/* P9 or later */
315 		/*
316 		 * shouldn't happen, setup_direct/indirect_dde creates
317 		 * DDE right
318 		 */
319 		CSB_ERR(csb, "Invalid DDE");
320 		return -EINVAL;
321 	case CSB_CC_SEGMENTED_DDL:
322 		/* shouldn't happen, setup_ddl creates DDL right */
323 		CSB_ERR(csb, "Segmented DDL error");
324 		return -EINVAL;
325 	case CSB_CC_DDE_OVERFLOW:
326 		/* shouldn't happen, setup_ddl creates DDL right */
327 		CSB_ERR(csb, "DDE overflow error");
328 		return -EINVAL;
329 	case CSB_CC_SESSION:
330 		/* should not happen with ICSWX */
331 		CSB_ERR(csb, "Session violation error");
332 		return -EPROTO;
333 	case CSB_CC_CHAIN:
334 		/* should not happen, we don't use chained CRBs */
335 		CSB_ERR(csb, "Chained CRB error");
336 		return -EPROTO;
337 	case CSB_CC_SEQUENCE:
338 		/* should not happen, we don't use chained CRBs */
339 		CSB_ERR(csb, "CRB sequence number error");
340 		return -EPROTO;
341 	case CSB_CC_UNKNOWN_CODE:
342 		CSB_ERR(csb, "Unknown subfunction code");
343 		return -EPROTO;
344 
345 	/* hardware errors */
346 	case CSB_CC_RD_EXTERNAL:
347 	case CSB_CC_RD_EXTERNAL_DUP1:
348 	case CSB_CC_RD_EXTERNAL_DUP2:
349 	case CSB_CC_RD_EXTERNAL_DUP3:
350 		CSB_ERR_ADDR(csb, "Read error outside coprocessor");
351 		return -EPROTO;
352 	case CSB_CC_WR_EXTERNAL:
353 		CSB_ERR_ADDR(csb, "Write error outside coprocessor");
354 		return -EPROTO;
355 	case CSB_CC_INTERNAL:
356 		CSB_ERR(csb, "Internal error in coprocessor");
357 		return -EPROTO;
358 	case CSB_CC_PROVISION:
359 		CSB_ERR(csb, "Storage provision error");
360 		return -EPROTO;
361 	case CSB_CC_HW:
362 		CSB_ERR(csb, "Correctable hardware error");
363 		return -EPROTO;
364 	case CSB_CC_HW_EXPIRED_TIMER:	/* P9 or later */
365 		CSB_ERR(csb, "Job did not finish within allowed time");
366 		return -EPROTO;
367 
368 	default:
369 		CSB_ERR(csb, "Invalid CC %d", csb->cc);
370 		return -EPROTO;
371 	}
372 
373 	/* check Completion Extension state */
374 	if (csb->ce & CSB_CE_TERMINATION) {
375 		CSB_ERR(csb, "CSB request was terminated");
376 		return -EPROTO;
377 	}
378 	if (csb->ce & CSB_CE_INCOMPLETE) {
379 		CSB_ERR(csb, "CSB request not complete");
380 		return -EPROTO;
381 	}
382 	if (!(csb->ce & CSB_CE_TPBC)) {
383 		CSB_ERR(csb, "TPBC not provided, unknown target length");
384 		return -EPROTO;
385 	}
386 
387 	/* successful completion */
388 	pr_debug_ratelimited("Processed %u bytes in %lu us\n",
389 			     be32_to_cpu(csb->count),
390 			     (unsigned long)ktime_us_delta(now, start));
391 
392 	return 0;
393 }
394 
nx842_config_crb(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int outlen,struct nx842_workmem * wmem)395 static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
396 			unsigned char *out, unsigned int outlen,
397 			struct nx842_workmem *wmem)
398 {
399 	struct coprocessor_request_block *crb;
400 	struct coprocessor_status_block *csb;
401 	u64 csb_addr;
402 	int ret;
403 
404 	crb = &wmem->crb;
405 	csb = &crb->csb;
406 
407 	/* Clear any previous values */
408 	memset(crb, 0, sizeof(*crb));
409 
410 	/* set up DDLs */
411 	ret = setup_ddl(&crb->source, wmem->ddl_in,
412 			(unsigned char *)in, inlen, true);
413 	if (ret)
414 		return ret;
415 
416 	ret = setup_ddl(&crb->target, wmem->ddl_out,
417 			out, outlen, false);
418 	if (ret)
419 		return ret;
420 
421 	/* set up CRB's CSB addr */
422 	csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
423 	csb_addr |= CRB_CSB_AT; /* Addrs are phys */
424 	crb->csb_addr = cpu_to_be64(csb_addr);
425 
426 	return 0;
427 }
428 
429 /**
430  * nx842_exec_icswx - compress/decompress data using the 842 algorithm
431  *
432  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
433  * This compresses or decompresses the provided input buffer into the provided
434  * output buffer.
435  *
436  * Upon return from this function @outlen contains the length of the
437  * output data.  If there is an error then @outlen will be 0 and an
438  * error will be specified by the return code from this function.
439  *
440  * The @workmem buffer should only be used by one function call at a time.
441  *
442  * @in: input buffer pointer
443  * @inlen: input buffer size
444  * @out: output buffer pointer
445  * @outlenp: output buffer size pointer
446  * @workmem: working memory buffer pointer, size determined by
447  *           nx842_powernv_driver.workmem_size
448  * @fc: function code, see CCW Function Codes in nx-842.h
449  *
450  * Returns:
451  *   0		Success, output of length @outlenp stored in the buffer at @out
452  *   -ENODEV	Hardware unavailable
453  *   -ENOSPC	Output buffer is to small
454  *   -EMSGSIZE	Input buffer too large
455  *   -EINVAL	buffer constraints do not fix nx842_constraints
456  *   -EPROTO	hardware error during operation
457  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
458  *   -EINTR	operation was aborted
459  */
nx842_exec_icswx(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)460 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
461 				  unsigned char *out, unsigned int *outlenp,
462 				  void *workmem, int fc)
463 {
464 	struct coprocessor_request_block *crb;
465 	struct coprocessor_status_block *csb;
466 	struct nx842_workmem *wmem;
467 	int ret;
468 	u32 ccw;
469 	unsigned int outlen = *outlenp;
470 
471 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
472 
473 	*outlenp = 0;
474 
475 	/* shoudn't happen, we don't load without a coproc */
476 	if (!nx842_ct) {
477 		pr_err_ratelimited("coprocessor CT is 0");
478 		return -ENODEV;
479 	}
480 
481 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
482 	if (ret)
483 		return ret;
484 
485 	crb = &wmem->crb;
486 	csb = &crb->csb;
487 
488 	/* set up CCW */
489 	ccw = 0;
490 	ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
491 	ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
492 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
493 
494 	wmem->start = ktime_get();
495 
496 	/* do ICSWX */
497 	ret = icswx(cpu_to_be32(ccw), crb);
498 
499 	pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
500 			     (unsigned int)ccw,
501 			     (unsigned int)be32_to_cpu(crb->ccw));
502 
503 	/*
504 	 * NX842 coprocessor sets 3rd bit in CR register with XER[S0].
505 	 * XER[S0] is the integer summary overflow bit which is nothing
506 	 * to do NX. Since this bit can be set with other return values,
507 	 * mask this bit.
508 	 */
509 	ret &= ~ICSWX_XERS0;
510 
511 	switch (ret) {
512 	case ICSWX_INITIATED:
513 		ret = wait_for_csb(wmem, csb);
514 		break;
515 	case ICSWX_BUSY:
516 		pr_debug_ratelimited("842 Coprocessor busy\n");
517 		ret = -EBUSY;
518 		break;
519 	case ICSWX_REJECTED:
520 		pr_err_ratelimited("ICSWX rejected\n");
521 		ret = -EPROTO;
522 		break;
523 	}
524 
525 	if (!ret)
526 		*outlenp = be32_to_cpu(csb->count);
527 
528 	return ret;
529 }
530 
531 /**
532  * nx842_exec_vas - compress/decompress data using the 842 algorithm
533  *
534  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
535  * This compresses or decompresses the provided input buffer into the provided
536  * output buffer.
537  *
538  * Upon return from this function @outlen contains the length of the
539  * output data.  If there is an error then @outlen will be 0 and an
540  * error will be specified by the return code from this function.
541  *
542  * The @workmem buffer should only be used by one function call at a time.
543  *
544  * @in: input buffer pointer
545  * @inlen: input buffer size
546  * @out: output buffer pointer
547  * @outlenp: output buffer size pointer
548  * @workmem: working memory buffer pointer, size determined by
549  *           nx842_powernv_driver.workmem_size
550  * @fc: function code, see CCW Function Codes in nx-842.h
551  *
552  * Returns:
553  *   0		Success, output of length @outlenp stored in the buffer
554  *		at @out
555  *   -ENODEV	Hardware unavailable
556  *   -ENOSPC	Output buffer is to small
557  *   -EMSGSIZE	Input buffer too large
558  *   -EINVAL	buffer constraints do not fix nx842_constraints
559  *   -EPROTO	hardware error during operation
560  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
561  *   -EINTR	operation was aborted
562  */
nx842_exec_vas(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)563 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
564 				  unsigned char *out, unsigned int *outlenp,
565 				  void *workmem, int fc)
566 {
567 	struct coprocessor_request_block *crb;
568 	struct coprocessor_status_block *csb;
569 	struct nx842_workmem *wmem;
570 	struct vas_window *txwin;
571 	int ret, i = 0;
572 	u32 ccw;
573 	unsigned int outlen = *outlenp;
574 
575 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
576 
577 	*outlenp = 0;
578 
579 	crb = &wmem->crb;
580 	csb = &crb->csb;
581 
582 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
583 	if (ret)
584 		return ret;
585 
586 	ccw = 0;
587 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
588 	crb->ccw = cpu_to_be32(ccw);
589 
590 	do {
591 		wmem->start = ktime_get();
592 		preempt_disable();
593 		txwin = this_cpu_read(cpu_txwin);
594 
595 		/*
596 		 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
597 		 * @crb and @offset.
598 		 */
599 		vas_copy_crb(crb, 0);
600 
601 		/*
602 		 * VAS paste previously copied CRB to NX.
603 		 * @txwin, @offset and @last (must be true).
604 		 */
605 		ret = vas_paste_crb(txwin, 0, 1);
606 		preempt_enable();
607 		/*
608 		 * Retry copy/paste function for VAS failures.
609 		 */
610 	} while (ret && (i++ < VAS_RETRIES));
611 
612 	if (ret) {
613 		pr_err_ratelimited("VAS copy/paste failed\n");
614 		return ret;
615 	}
616 
617 	ret = wait_for_csb(wmem, csb);
618 	if (!ret)
619 		*outlenp = be32_to_cpu(csb->count);
620 
621 	return ret;
622 }
623 
624 /**
625  * nx842_powernv_compress - Compress data using the 842 algorithm
626  *
627  * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
628  * The input buffer is compressed and the result is stored in the
629  * provided output buffer.
630  *
631  * Upon return from this function @outlen contains the length of the
632  * compressed data.  If there is an error then @outlen will be 0 and an
633  * error will be specified by the return code from this function.
634  *
635  * @in: input buffer pointer
636  * @inlen: input buffer size
637  * @out: output buffer pointer
638  * @outlenp: output buffer size pointer
639  * @workmem: working memory buffer pointer, size determined by
640  *           nx842_powernv_driver.workmem_size
641  *
642  * Returns: see @nx842_powernv_exec()
643  */
nx842_powernv_compress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)644 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
645 				  unsigned char *out, unsigned int *outlenp,
646 				  void *wmem)
647 {
648 	return nx842_powernv_exec(in, inlen, out, outlenp,
649 				      wmem, CCW_FC_842_COMP_CRC);
650 }
651 
652 /**
653  * nx842_powernv_decompress - Decompress data using the 842 algorithm
654  *
655  * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
656  * The input buffer is decompressed and the result is stored in the
657  * provided output buffer.
658  *
659  * Upon return from this function @outlen contains the length of the
660  * decompressed data.  If there is an error then @outlen will be 0 and an
661  * error will be specified by the return code from this function.
662  *
663  * @in: input buffer pointer
664  * @inlen: input buffer size
665  * @out: output buffer pointer
666  * @outlenp: output buffer size pointer
667  * @workmem: working memory buffer pointer, size determined by
668  *           nx842_powernv_driver.workmem_size
669  *
670  * Returns: see @nx842_powernv_exec()
671  */
nx842_powernv_decompress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)672 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
673 				    unsigned char *out, unsigned int *outlenp,
674 				    void *wmem)
675 {
676 	return nx842_powernv_exec(in, inlen, out, outlenp,
677 				      wmem, CCW_FC_842_DECOMP_CRC);
678 }
679 
nx842_add_coprocs_list(struct nx842_coproc * coproc,int chipid)680 static inline void nx842_add_coprocs_list(struct nx842_coproc *coproc,
681 					int chipid)
682 {
683 	coproc->chip_id = chipid;
684 	INIT_LIST_HEAD(&coproc->list);
685 	list_add(&coproc->list, &nx842_coprocs);
686 }
687 
nx842_alloc_txwin(struct nx842_coproc * coproc)688 static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc)
689 {
690 	struct vas_window *txwin = NULL;
691 	struct vas_tx_win_attr txattr;
692 
693 	/*
694 	 * Kernel requests will be high priority. So open send
695 	 * windows only for high priority RxFIFO entries.
696 	 */
697 	vas_init_tx_win_attr(&txattr, coproc->ct);
698 	txattr.lpid = 0;	/* lpid is 0 for kernel requests */
699 	txattr.pid = 0;		/* pid is 0 for kernel requests */
700 
701 	/*
702 	 * Open a VAS send window which is used to send request to NX.
703 	 */
704 	txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
705 	if (IS_ERR(txwin))
706 		pr_err("ibm,nx-842: Can not open TX window: %ld\n",
707 				PTR_ERR(txwin));
708 
709 	return txwin;
710 }
711 
712 /*
713  * Identify chip ID for each CPU, open send wndow for the corresponding NX
714  * engine and save txwin in percpu cpu_txwin.
715  * cpu_txwin is used in copy/paste operation for each compression /
716  * decompression request.
717  */
nx842_open_percpu_txwins(void)718 static int nx842_open_percpu_txwins(void)
719 {
720 	struct nx842_coproc *coproc, *n;
721 	unsigned int i, chip_id;
722 
723 	for_each_possible_cpu(i) {
724 		struct vas_window *txwin = NULL;
725 
726 		chip_id = cpu_to_chip_id(i);
727 
728 		list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
729 			/*
730 			 * Kernel requests use only high priority FIFOs. So
731 			 * open send windows for these FIFOs.
732 			 */
733 
734 			if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
735 				continue;
736 
737 			if (coproc->chip_id == chip_id) {
738 				txwin = nx842_alloc_txwin(coproc);
739 				if (IS_ERR(txwin))
740 					return PTR_ERR(txwin);
741 
742 				per_cpu(cpu_txwin, i) = txwin;
743 				break;
744 			}
745 		}
746 
747 		if (!per_cpu(cpu_txwin, i)) {
748 			/* shouldn't happen, Each chip will have NX engine */
749 			pr_err("NX engine is not available for CPU %d\n", i);
750 			return -EINVAL;
751 		}
752 	}
753 
754 	return 0;
755 }
756 
vas_cfg_coproc_info(struct device_node * dn,int chip_id,int vasid,int * ct)757 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
758 					int vasid, int *ct)
759 {
760 	struct vas_window *rxwin = NULL;
761 	struct vas_rx_win_attr rxattr;
762 	struct nx842_coproc *coproc;
763 	u32 lpid, pid, tid, fifo_size;
764 	u64 rx_fifo;
765 	const char *priority;
766 	int ret;
767 
768 	ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
769 	if (ret) {
770 		pr_err("Missing rx-fifo-address property\n");
771 		return ret;
772 	}
773 
774 	ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
775 	if (ret) {
776 		pr_err("Missing rx-fifo-size property\n");
777 		return ret;
778 	}
779 
780 	ret = of_property_read_u32(dn, "lpid", &lpid);
781 	if (ret) {
782 		pr_err("Missing lpid property\n");
783 		return ret;
784 	}
785 
786 	ret = of_property_read_u32(dn, "pid", &pid);
787 	if (ret) {
788 		pr_err("Missing pid property\n");
789 		return ret;
790 	}
791 
792 	ret = of_property_read_u32(dn, "tid", &tid);
793 	if (ret) {
794 		pr_err("Missing tid property\n");
795 		return ret;
796 	}
797 
798 	ret = of_property_read_string(dn, "priority", &priority);
799 	if (ret) {
800 		pr_err("Missing priority property\n");
801 		return ret;
802 	}
803 
804 	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
805 	if (!coproc)
806 		return -ENOMEM;
807 
808 	if (!strcmp(priority, "High"))
809 		coproc->ct = VAS_COP_TYPE_842_HIPRI;
810 	else if (!strcmp(priority, "Normal"))
811 		coproc->ct = VAS_COP_TYPE_842;
812 	else {
813 		pr_err("Invalid RxFIFO priority value\n");
814 		ret =  -EINVAL;
815 		goto err_out;
816 	}
817 
818 	vas_init_rx_win_attr(&rxattr, coproc->ct);
819 	rxattr.rx_fifo = (void *)rx_fifo;
820 	rxattr.rx_fifo_size = fifo_size;
821 	rxattr.lnotify_lpid = lpid;
822 	rxattr.lnotify_pid = pid;
823 	rxattr.lnotify_tid = tid;
824 	rxattr.wcreds_max = MAX_CREDITS_PER_RXFIFO;
825 
826 	/*
827 	 * Open a VAS receice window which is used to configure RxFIFO
828 	 * for NX.
829 	 */
830 	rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
831 	if (IS_ERR(rxwin)) {
832 		ret = PTR_ERR(rxwin);
833 		pr_err("setting RxFIFO with VAS failed: %d\n",
834 			ret);
835 		goto err_out;
836 	}
837 
838 	coproc->vas.rxwin = rxwin;
839 	coproc->vas.id = vasid;
840 	nx842_add_coprocs_list(coproc, chip_id);
841 
842 	/*
843 	 * (lpid, pid, tid) combination has to be unique for each
844 	 * coprocessor instance in the system. So to make it
845 	 * unique, skiboot uses coprocessor type such as 842 or
846 	 * GZIP for pid and provides this value to kernel in pid
847 	 * device-tree property.
848 	 */
849 	*ct = pid;
850 
851 	return 0;
852 
853 err_out:
854 	kfree(coproc);
855 	return ret;
856 }
857 
858 
nx842_powernv_probe_vas(struct device_node * pn)859 static int __init nx842_powernv_probe_vas(struct device_node *pn)
860 {
861 	struct device_node *dn;
862 	int chip_id, vasid, ret = 0;
863 	int nx_fifo_found = 0;
864 	int uninitialized_var(ct);
865 
866 	chip_id = of_get_ibm_chip_id(pn);
867 	if (chip_id < 0) {
868 		pr_err("ibm,chip-id missing\n");
869 		return -EINVAL;
870 	}
871 
872 	vasid = chip_to_vas_id(chip_id);
873 	if (vasid < 0) {
874 		pr_err("Unable to map chip_id %d to vasid\n", chip_id);
875 		return -EINVAL;
876 	}
877 
878 	for_each_child_of_node(pn, dn) {
879 		if (of_device_is_compatible(dn, "ibm,p9-nx-842")) {
880 			ret = vas_cfg_coproc_info(dn, chip_id, vasid, &ct);
881 			if (ret) {
882 				of_node_put(dn);
883 				return ret;
884 			}
885 			nx_fifo_found++;
886 		}
887 	}
888 
889 	if (!nx_fifo_found) {
890 		pr_err("NX842 FIFO nodes are missing\n");
891 		return -EINVAL;
892 	}
893 
894 	/*
895 	 * Initialize NX instance for both high and normal priority FIFOs.
896 	 */
897 	if (opal_check_token(OPAL_NX_COPROC_INIT)) {
898 		ret = opal_nx_coproc_init(chip_id, ct);
899 		if (ret) {
900 			pr_err("Failed to initialize NX for chip(%d): %d\n",
901 				chip_id, ret);
902 			ret = opal_error_code(ret);
903 		}
904 	} else
905 		pr_warn("Firmware doesn't support NX initialization\n");
906 
907 	return ret;
908 }
909 
nx842_powernv_probe(struct device_node * dn)910 static int __init nx842_powernv_probe(struct device_node *dn)
911 {
912 	struct nx842_coproc *coproc;
913 	unsigned int ct, ci;
914 	int chip_id;
915 
916 	chip_id = of_get_ibm_chip_id(dn);
917 	if (chip_id < 0) {
918 		pr_err("ibm,chip-id missing\n");
919 		return -EINVAL;
920 	}
921 
922 	if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
923 		pr_err("ibm,842-coprocessor-type missing\n");
924 		return -EINVAL;
925 	}
926 
927 	if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
928 		pr_err("ibm,842-coprocessor-instance missing\n");
929 		return -EINVAL;
930 	}
931 
932 	coproc = kmalloc(sizeof(*coproc), GFP_KERNEL);
933 	if (!coproc)
934 		return -ENOMEM;
935 
936 	coproc->ct = ct;
937 	coproc->ci = ci;
938 	nx842_add_coprocs_list(coproc, chip_id);
939 
940 	pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
941 
942 	if (!nx842_ct)
943 		nx842_ct = ct;
944 	else if (nx842_ct != ct)
945 		pr_err("NX842 chip %d, CT %d != first found CT %d\n",
946 		       chip_id, ct, nx842_ct);
947 
948 	return 0;
949 }
950 
nx842_delete_coprocs(void)951 static void nx842_delete_coprocs(void)
952 {
953 	struct nx842_coproc *coproc, *n;
954 	struct vas_window *txwin;
955 	int i;
956 
957 	/*
958 	 * close percpu txwins that are opened for the corresponding coproc.
959 	 */
960 	for_each_possible_cpu(i) {
961 		txwin = per_cpu(cpu_txwin, i);
962 		if (txwin)
963 			vas_win_close(txwin);
964 
965 		per_cpu(cpu_txwin, i) = 0;
966 	}
967 
968 	list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
969 		if (coproc->vas.rxwin)
970 			vas_win_close(coproc->vas.rxwin);
971 
972 		list_del(&coproc->list);
973 		kfree(coproc);
974 	}
975 }
976 
977 static struct nx842_constraints nx842_powernv_constraints = {
978 	.alignment =	DDE_BUFFER_ALIGN,
979 	.multiple =	DDE_BUFFER_LAST_MULT,
980 	.minimum =	DDE_BUFFER_LAST_MULT,
981 	.maximum =	(DDL_LEN_MAX - 1) * PAGE_SIZE,
982 };
983 
984 static struct nx842_driver nx842_powernv_driver = {
985 	.name =		KBUILD_MODNAME,
986 	.owner =	THIS_MODULE,
987 	.workmem_size =	sizeof(struct nx842_workmem),
988 	.constraints =	&nx842_powernv_constraints,
989 	.compress =	nx842_powernv_compress,
990 	.decompress =	nx842_powernv_decompress,
991 };
992 
nx842_powernv_crypto_init(struct crypto_tfm * tfm)993 static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
994 {
995 	return nx842_crypto_init(tfm, &nx842_powernv_driver);
996 }
997 
998 static struct crypto_alg nx842_powernv_alg = {
999 	.cra_name		= "842",
1000 	.cra_driver_name	= "842-nx",
1001 	.cra_priority		= 300,
1002 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
1003 	.cra_ctxsize		= sizeof(struct nx842_crypto_ctx),
1004 	.cra_module		= THIS_MODULE,
1005 	.cra_init		= nx842_powernv_crypto_init,
1006 	.cra_exit		= nx842_crypto_exit,
1007 	.cra_u			= { .compress = {
1008 	.coa_compress		= nx842_crypto_compress,
1009 	.coa_decompress		= nx842_crypto_decompress } }
1010 };
1011 
nx842_powernv_init(void)1012 static __init int nx842_powernv_init(void)
1013 {
1014 	struct device_node *dn;
1015 	int ret;
1016 
1017 	/* verify workmem size/align restrictions */
1018 	BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
1019 	BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
1020 	BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
1021 	/* verify buffer size/align restrictions */
1022 	BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
1023 	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
1024 	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
1025 
1026 	for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
1027 		ret = nx842_powernv_probe_vas(dn);
1028 		if (ret) {
1029 			nx842_delete_coprocs();
1030 			return ret;
1031 		}
1032 	}
1033 
1034 	if (list_empty(&nx842_coprocs)) {
1035 		for_each_compatible_node(dn, NULL, "ibm,power-nx")
1036 			nx842_powernv_probe(dn);
1037 
1038 		if (!nx842_ct)
1039 			return -ENODEV;
1040 
1041 		nx842_powernv_exec = nx842_exec_icswx;
1042 	} else {
1043 		ret = nx842_open_percpu_txwins();
1044 		if (ret) {
1045 			nx842_delete_coprocs();
1046 			return ret;
1047 		}
1048 
1049 		nx842_powernv_exec = nx842_exec_vas;
1050 	}
1051 
1052 	ret = crypto_register_alg(&nx842_powernv_alg);
1053 	if (ret) {
1054 		nx842_delete_coprocs();
1055 		return ret;
1056 	}
1057 
1058 	return 0;
1059 }
1060 module_init(nx842_powernv_init);
1061 
nx842_powernv_exit(void)1062 static void __exit nx842_powernv_exit(void)
1063 {
1064 	crypto_unregister_alg(&nx842_powernv_alg);
1065 
1066 	nx842_delete_coprocs();
1067 }
1068 module_exit(nx842_powernv_exit);
1069