1 /*
2 * Cavium ThunderX memory controller kernel module
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
9 *
10 */
11
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/edac.h>
15 #include <linux/interrupt.h>
16 #include <linux/string.h>
17 #include <linux/stop_machine.h>
18 #include <linux/delay.h>
19 #include <linux/sizes.h>
20 #include <linux/atomic.h>
21 #include <linux/bitfield.h>
22 #include <linux/circ_buf.h>
23
24 #include <asm/page.h>
25
26 #include "edac_module.h"
27
28 #define phys_to_pfn(phys) (PFN_DOWN(phys))
29
30 #define THUNDERX_NODE GENMASK(45, 44)
31
32 enum {
33 ERR_CORRECTED = 1,
34 ERR_UNCORRECTED = 2,
35 ERR_UNKNOWN = 3,
36 };
37
38 #define MAX_SYNDROME_REGS 4
39
40 struct error_syndrome {
41 u64 reg[MAX_SYNDROME_REGS];
42 };
43
44 struct error_descr {
45 int type;
46 u64 mask;
47 char *descr;
48 };
49
decode_register(char * str,size_t size,const struct error_descr * descr,const uint64_t reg)50 static void decode_register(char *str, size_t size,
51 const struct error_descr *descr,
52 const uint64_t reg)
53 {
54 int ret = 0;
55
56 while (descr->type && descr->mask && descr->descr) {
57 if (reg & descr->mask) {
58 ret = snprintf(str, size, "\n\t%s, %s",
59 descr->type == ERR_CORRECTED ?
60 "Corrected" : "Uncorrected",
61 descr->descr);
62 str += ret;
63 size -= ret;
64 }
65 descr++;
66 }
67 }
68
get_bits(unsigned long data,int pos,int width)69 static unsigned long get_bits(unsigned long data, int pos, int width)
70 {
71 return (data >> pos) & ((1 << width) - 1);
72 }
73
74 #define L2C_CTL 0x87E080800000
75 #define L2C_CTL_DISIDXALIAS BIT(0)
76
77 #define PCI_DEVICE_ID_THUNDER_LMC 0xa022
78
79 #define LMC_FADR 0x20
80 #define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1)
81 #define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1)
82 #define LMC_FADR_FBANK(x) ((x >> 32) & 0xf)
83 #define LMC_FADR_FROW(x) ((x >> 14) & 0xffff)
84 #define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff)
85
86 #define LMC_NXM_FADR 0x28
87 #define LMC_ECC_SYND 0x38
88
89 #define LMC_ECC_PARITY_TEST 0x108
90
91 #define LMC_INT_W1S 0x150
92
93 #define LMC_INT_ENA_W1C 0x158
94 #define LMC_INT_ENA_W1S 0x160
95
96 #define LMC_CONFIG 0x188
97
98 #define LMC_CONFIG_BG2 BIT(62)
99 #define LMC_CONFIG_RANK_ENA BIT(42)
100 #define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF)
101 #define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7)
102
103 #define LMC_CONTROL 0x190
104 #define LMC_CONTROL_XOR_BANK BIT(16)
105
106 #define LMC_INT 0x1F0
107
108 #define LMC_INT_DDR_ERR BIT(11)
109 #define LMC_INT_DED_ERR (0xFUL << 5)
110 #define LMC_INT_SEC_ERR (0xFUL << 1)
111 #define LMC_INT_NXM_WR_MASK BIT(0)
112
113 #define LMC_DDR_PLL_CTL 0x258
114 #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
115
116 #define LMC_FADR_SCRAMBLED 0x330
117
118 #define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
119 LMC_INT_NXM_WR_MASK)
120
121 #define LMC_INT_CE (LMC_INT_SEC_ERR)
122
123 static const struct error_descr lmc_errors[] = {
124 {
125 .type = ERR_CORRECTED,
126 .mask = LMC_INT_SEC_ERR,
127 .descr = "Single-bit ECC error",
128 },
129 {
130 .type = ERR_UNCORRECTED,
131 .mask = LMC_INT_DDR_ERR,
132 .descr = "DDR chip error",
133 },
134 {
135 .type = ERR_UNCORRECTED,
136 .mask = LMC_INT_DED_ERR,
137 .descr = "Double-bit ECC error",
138 },
139 {
140 .type = ERR_UNCORRECTED,
141 .mask = LMC_INT_NXM_WR_MASK,
142 .descr = "Non-existent memory write",
143 },
144 {0, 0, NULL},
145 };
146
147 #define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5)
148 #define LMC_INT_EN_DLCRAM_DED_ERR BIT(4)
149 #define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3)
150 #define LMC_INT_INTR_DED_ENA BIT(2)
151 #define LMC_INT_INTR_SEC_ENA BIT(1)
152 #define LMC_INT_INTR_NXM_WR_ENA BIT(0)
153
154 #define LMC_INT_ENA_ALL GENMASK(5, 0)
155
156 #define LMC_DDR_PLL_CTL 0x258
157 #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
158
159 #define LMC_CONTROL 0x190
160 #define LMC_CONTROL_RDIMM BIT(0)
161
162 #define LMC_SCRAM_FADR 0x330
163
164 #define LMC_CHAR_MASK0 0x228
165 #define LMC_CHAR_MASK2 0x238
166
167 #define RING_ENTRIES 8
168
169 struct debugfs_entry {
170 const char *name;
171 umode_t mode;
172 const struct file_operations fops;
173 };
174
175 struct lmc_err_ctx {
176 u64 reg_int;
177 u64 reg_fadr;
178 u64 reg_nxm_fadr;
179 u64 reg_scram_fadr;
180 u64 reg_ecc_synd;
181 };
182
183 struct thunderx_lmc {
184 void __iomem *regs;
185 struct pci_dev *pdev;
186 struct msix_entry msix_ent;
187
188 atomic_t ecc_int;
189
190 u64 mask0;
191 u64 mask2;
192 u64 parity_test;
193 u64 node;
194
195 int xbits;
196 int bank_width;
197 int pbank_lsb;
198 int dimm_lsb;
199 int rank_lsb;
200 int bank_lsb;
201 int row_lsb;
202 int col_hi_lsb;
203
204 int xor_bank;
205 int l2c_alias;
206
207 struct page *mem;
208
209 struct lmc_err_ctx err_ctx[RING_ENTRIES];
210 unsigned long ring_head;
211 unsigned long ring_tail;
212 };
213
214 #define ring_pos(pos, size) ((pos) & (size - 1))
215
216 #define DEBUGFS_STRUCT(_name, _mode, _write, _read) \
217 static struct debugfs_entry debugfs_##_name = { \
218 .name = __stringify(_name), \
219 .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
220 .fops = { \
221 .open = simple_open, \
222 .write = _write, \
223 .read = _read, \
224 .llseek = generic_file_llseek, \
225 }, \
226 }
227
228 #define DEBUGFS_FIELD_ATTR(_type, _field) \
229 static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \
230 char __user *data, \
231 size_t count, loff_t *ppos) \
232 { \
233 struct thunderx_##_type *pdata = file->private_data; \
234 char buf[20]; \
235 \
236 snprintf(buf, count, "0x%016llx", pdata->_field); \
237 return simple_read_from_buffer(data, count, ppos, \
238 buf, sizeof(buf)); \
239 } \
240 \
241 static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \
242 const char __user *data, \
243 size_t count, loff_t *ppos) \
244 { \
245 struct thunderx_##_type *pdata = file->private_data; \
246 int res; \
247 \
248 res = kstrtoull_from_user(data, count, 0, &pdata->_field); \
249 \
250 return res ? res : count; \
251 } \
252 \
253 DEBUGFS_STRUCT(_field, 0600, \
254 thunderx_##_type##_##_field##_write, \
255 thunderx_##_type##_##_field##_read) \
256
257 #define DEBUGFS_REG_ATTR(_type, _name, _reg) \
258 static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \
259 char __user *data, \
260 size_t count, loff_t *ppos) \
261 { \
262 struct thunderx_##_type *pdata = file->private_data; \
263 char buf[20]; \
264 \
265 sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \
266 return simple_read_from_buffer(data, count, ppos, \
267 buf, sizeof(buf)); \
268 } \
269 \
270 static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \
271 const char __user *data, \
272 size_t count, loff_t *ppos) \
273 { \
274 struct thunderx_##_type *pdata = file->private_data; \
275 u64 val; \
276 int res; \
277 \
278 res = kstrtoull_from_user(data, count, 0, &val); \
279 \
280 if (!res) { \
281 writeq(val, pdata->regs + _reg); \
282 res = count; \
283 } \
284 \
285 return res; \
286 } \
287 \
288 DEBUGFS_STRUCT(_name, 0600, \
289 thunderx_##_type##_##_name##_write, \
290 thunderx_##_type##_##_name##_read)
291
292 #define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field)
293
294 /*
295 * To get an ECC error injected, the following steps are needed:
296 * - Setup the ECC injection by writing the appropriate parameters:
297 * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
298 * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
299 * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
300 * - Do the actual injection:
301 * echo 1 > /sys/kernel/debug/<device number>/inject_ecc
302 */
thunderx_lmc_inject_int_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)303 static ssize_t thunderx_lmc_inject_int_write(struct file *file,
304 const char __user *data,
305 size_t count, loff_t *ppos)
306 {
307 struct thunderx_lmc *lmc = file->private_data;
308 u64 val;
309 int res;
310
311 res = kstrtoull_from_user(data, count, 0, &val);
312
313 if (!res) {
314 /* Trigger the interrupt */
315 writeq(val, lmc->regs + LMC_INT_W1S);
316 res = count;
317 }
318
319 return res;
320 }
321
thunderx_lmc_int_read(struct file * file,char __user * data,size_t count,loff_t * ppos)322 static ssize_t thunderx_lmc_int_read(struct file *file,
323 char __user *data,
324 size_t count, loff_t *ppos)
325 {
326 struct thunderx_lmc *lmc = file->private_data;
327 char buf[20];
328 u64 lmc_int = readq(lmc->regs + LMC_INT);
329
330 snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
331 return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
332 }
333
334 #define TEST_PATTERN 0xa5
335
inject_ecc_fn(void * arg)336 static int inject_ecc_fn(void *arg)
337 {
338 struct thunderx_lmc *lmc = arg;
339 uintptr_t addr, phys;
340 unsigned int cline_size = cache_line_size();
341 const unsigned int lines = PAGE_SIZE / cline_size;
342 unsigned int i, cl_idx;
343
344 addr = (uintptr_t)page_address(lmc->mem);
345 phys = (uintptr_t)page_to_phys(lmc->mem);
346
347 cl_idx = (phys & 0x7f) >> 4;
348 lmc->parity_test &= ~(7ULL << 8);
349 lmc->parity_test |= (cl_idx << 8);
350
351 writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
352 writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
353 writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
354
355 readq(lmc->regs + LMC_CHAR_MASK0);
356 readq(lmc->regs + LMC_CHAR_MASK2);
357 readq(lmc->regs + LMC_ECC_PARITY_TEST);
358
359 for (i = 0; i < lines; i++) {
360 memset((void *)addr, TEST_PATTERN, cline_size);
361 barrier();
362
363 /*
364 * Flush L1 cachelines to the PoC (L2).
365 * This will cause cacheline eviction to the L2.
366 */
367 asm volatile("dc civac, %0\n"
368 "dsb sy\n"
369 : : "r"(addr + i * cline_size));
370 }
371
372 for (i = 0; i < lines; i++) {
373 /*
374 * Flush L2 cachelines to the DRAM.
375 * This will cause cacheline eviction to the DRAM
376 * and ECC corruption according to the masks set.
377 */
378 __asm__ volatile("sys #0,c11,C1,#2, %0\n"
379 : : "r"(phys + i * cline_size));
380 }
381
382 for (i = 0; i < lines; i++) {
383 /*
384 * Invalidate L2 cachelines.
385 * The subsequent load will cause cacheline fetch
386 * from the DRAM and an error interrupt
387 */
388 __asm__ volatile("sys #0,c11,C1,#1, %0"
389 : : "r"(phys + i * cline_size));
390 }
391
392 for (i = 0; i < lines; i++) {
393 /*
394 * Invalidate L1 cachelines.
395 * The subsequent load will cause cacheline fetch
396 * from the L2 and/or DRAM
397 */
398 asm volatile("dc ivac, %0\n"
399 "dsb sy\n"
400 : : "r"(addr + i * cline_size));
401 }
402
403 return 0;
404 }
405
thunderx_lmc_inject_ecc_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)406 static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
407 const char __user *data,
408 size_t count, loff_t *ppos)
409 {
410 struct thunderx_lmc *lmc = file->private_data;
411 unsigned int cline_size = cache_line_size();
412 u8 *tmp;
413 void __iomem *addr;
414 unsigned int offs, timeout = 100000;
415
416 atomic_set(&lmc->ecc_int, 0);
417
418 lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
419 if (!lmc->mem)
420 return -ENOMEM;
421
422 tmp = kmalloc(cline_size, GFP_KERNEL);
423 if (!tmp) {
424 __free_pages(lmc->mem, 0);
425 return -ENOMEM;
426 }
427
428 addr = page_address(lmc->mem);
429
430 while (!atomic_read(&lmc->ecc_int) && timeout--) {
431 stop_machine(inject_ecc_fn, lmc, NULL);
432
433 for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
434 /*
435 * Do a load from the previously rigged location
436 * This should generate an error interrupt.
437 */
438 memcpy(tmp, addr + offs, cline_size);
439 asm volatile("dsb ld\n");
440 }
441 }
442
443 kfree(tmp);
444 __free_pages(lmc->mem, 0);
445
446 return count;
447 }
448
449 LMC_DEBUGFS_ENT(mask0);
450 LMC_DEBUGFS_ENT(mask2);
451 LMC_DEBUGFS_ENT(parity_test);
452
453 DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
454 DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
455 DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
456
457 static struct debugfs_entry *lmc_dfs_ents[] = {
458 &debugfs_mask0,
459 &debugfs_mask2,
460 &debugfs_parity_test,
461 &debugfs_inject_ecc,
462 &debugfs_inject_int,
463 &debugfs_int_w1c,
464 };
465
thunderx_create_debugfs_nodes(struct dentry * parent,struct debugfs_entry * attrs[],void * data,size_t num)466 static int thunderx_create_debugfs_nodes(struct dentry *parent,
467 struct debugfs_entry *attrs[],
468 void *data,
469 size_t num)
470 {
471 int i;
472 struct dentry *ent;
473
474 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
475 return 0;
476
477 if (!parent)
478 return -ENOENT;
479
480 for (i = 0; i < num; i++) {
481 ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
482 parent, data, &attrs[i]->fops);
483
484 if (IS_ERR(ent))
485 break;
486 }
487
488 return i;
489 }
490
thunderx_faddr_to_phys(u64 faddr,struct thunderx_lmc * lmc)491 static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
492 {
493 phys_addr_t addr = 0;
494 int bank, xbits;
495
496 addr |= lmc->node << 40;
497 addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
498 addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
499 addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
500 addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
501
502 bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
503
504 if (lmc->xor_bank)
505 bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
506
507 addr |= bank << lmc->bank_lsb;
508
509 xbits = PCI_FUNC(lmc->pdev->devfn);
510
511 if (lmc->l2c_alias)
512 xbits ^= get_bits(addr, 20, lmc->xbits) ^
513 get_bits(addr, 12, lmc->xbits);
514
515 addr |= xbits << 7;
516
517 return addr;
518 }
519
thunderx_get_num_lmcs(unsigned int node)520 static unsigned int thunderx_get_num_lmcs(unsigned int node)
521 {
522 unsigned int number = 0;
523 struct pci_dev *pdev = NULL;
524
525 do {
526 pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
527 PCI_DEVICE_ID_THUNDER_LMC,
528 pdev);
529 if (pdev) {
530 #ifdef CONFIG_NUMA
531 if (pdev->dev.numa_node == node)
532 number++;
533 #else
534 number++;
535 #endif
536 }
537 } while (pdev);
538
539 return number;
540 }
541
542 #define LMC_MESSAGE_SIZE 120
543 #define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors))
544
thunderx_lmc_err_isr(int irq,void * dev_id)545 static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
546 {
547 struct mem_ctl_info *mci = dev_id;
548 struct thunderx_lmc *lmc = mci->pvt_info;
549
550 unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
551 struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
552
553 writeq(0, lmc->regs + LMC_CHAR_MASK0);
554 writeq(0, lmc->regs + LMC_CHAR_MASK2);
555 writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
556
557 ctx->reg_int = readq(lmc->regs + LMC_INT);
558 ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
559 ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
560 ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
561 ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
562
563 lmc->ring_head++;
564
565 atomic_set(&lmc->ecc_int, 1);
566
567 /* Clear the interrupt */
568 writeq(ctx->reg_int, lmc->regs + LMC_INT);
569
570 return IRQ_WAKE_THREAD;
571 }
572
thunderx_lmc_threaded_isr(int irq,void * dev_id)573 static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
574 {
575 struct mem_ctl_info *mci = dev_id;
576 struct thunderx_lmc *lmc = mci->pvt_info;
577 phys_addr_t phys_addr;
578
579 unsigned long tail;
580 struct lmc_err_ctx *ctx;
581
582 irqreturn_t ret = IRQ_NONE;
583
584 char *msg;
585 char *other;
586
587 msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
588 other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
589
590 if (!msg || !other)
591 goto err_free;
592
593 while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
594 ARRAY_SIZE(lmc->err_ctx))) {
595 tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
596
597 ctx = &lmc->err_ctx[tail];
598
599 dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
600 ctx->reg_int);
601 dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
602 ctx->reg_fadr);
603 dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
604 ctx->reg_nxm_fadr);
605 dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
606 ctx->reg_scram_fadr);
607 dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
608 ctx->reg_ecc_synd);
609
610 snprintf(msg, LMC_MESSAGE_SIZE,
611 "DIMM %lld rank %lld bank %lld row %lld col %lld",
612 LMC_FADR_FDIMM(ctx->reg_scram_fadr),
613 LMC_FADR_FBUNK(ctx->reg_scram_fadr),
614 LMC_FADR_FBANK(ctx->reg_scram_fadr),
615 LMC_FADR_FROW(ctx->reg_scram_fadr),
616 LMC_FADR_FCOL(ctx->reg_scram_fadr));
617
618 decode_register(other, LMC_OTHER_SIZE, lmc_errors,
619 ctx->reg_int);
620
621 phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
622
623 if (ctx->reg_int & LMC_INT_UE)
624 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
625 phys_to_pfn(phys_addr),
626 offset_in_page(phys_addr),
627 0, -1, -1, -1, msg, other);
628 else if (ctx->reg_int & LMC_INT_CE)
629 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
630 phys_to_pfn(phys_addr),
631 offset_in_page(phys_addr),
632 0, -1, -1, -1, msg, other);
633
634 lmc->ring_tail++;
635 }
636
637 ret = IRQ_HANDLED;
638
639 err_free:
640 kfree(msg);
641 kfree(other);
642
643 return ret;
644 }
645
646 static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
647 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
648 { 0, },
649 };
650
pci_dev_to_mc_idx(struct pci_dev * pdev)651 static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
652 {
653 int node = dev_to_node(&pdev->dev);
654 int ret = PCI_FUNC(pdev->devfn);
655
656 ret += max(node, 0) << 3;
657
658 return ret;
659 }
660
thunderx_lmc_probe(struct pci_dev * pdev,const struct pci_device_id * id)661 static int thunderx_lmc_probe(struct pci_dev *pdev,
662 const struct pci_device_id *id)
663 {
664 struct thunderx_lmc *lmc;
665 struct edac_mc_layer layer;
666 struct mem_ctl_info *mci;
667 u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
668 int ret;
669 u64 lmc_int;
670 void *l2c_ioaddr;
671
672 layer.type = EDAC_MC_LAYER_SLOT;
673 layer.size = 2;
674 layer.is_virt_csrow = false;
675
676 ret = pcim_enable_device(pdev);
677 if (ret) {
678 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
679 return ret;
680 }
681
682 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
683 if (ret) {
684 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
685 return ret;
686 }
687
688 mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
689 sizeof(struct thunderx_lmc));
690 if (!mci)
691 return -ENOMEM;
692
693 mci->pdev = &pdev->dev;
694 lmc = mci->pvt_info;
695
696 pci_set_drvdata(pdev, mci);
697
698 lmc->regs = pcim_iomap_table(pdev)[0];
699
700 lmc_control = readq(lmc->regs + LMC_CONTROL);
701 lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
702 lmc_config = readq(lmc->regs + LMC_CONFIG);
703
704 if (lmc_control & LMC_CONTROL_RDIMM) {
705 mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
706 lmc_ddr_pll_ctl) ?
707 MEM_RDDR4 : MEM_RDDR3;
708 } else {
709 mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
710 lmc_ddr_pll_ctl) ?
711 MEM_DDR4 : MEM_DDR3;
712 }
713
714 mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
715 mci->edac_cap = EDAC_FLAG_SECDED;
716
717 mci->mod_name = "thunderx-lmc";
718 mci->ctl_name = "thunderx-lmc";
719 mci->dev_name = dev_name(&pdev->dev);
720 mci->scrub_mode = SCRUB_NONE;
721
722 lmc->pdev = pdev;
723 lmc->msix_ent.entry = 0;
724
725 lmc->ring_head = 0;
726 lmc->ring_tail = 0;
727
728 ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
729 if (ret) {
730 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
731 goto err_free;
732 }
733
734 ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
735 thunderx_lmc_err_isr,
736 thunderx_lmc_threaded_isr, 0,
737 "[EDAC] ThunderX LMC", mci);
738 if (ret) {
739 dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
740 goto err_free;
741 }
742
743 lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
744
745 lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
746 lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
747 FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
748
749 lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
750 lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits;
751 lmc->rank_lsb = lmc->dimm_lsb;
752 lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
753 lmc->bank_lsb = 7 + lmc->xbits;
754 lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
755
756 lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
757
758 lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
759
760 l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE);
761 if (!l2c_ioaddr) {
762 dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
763 ret = -ENOMEM;
764 goto err_free;
765 }
766
767 lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
768
769 iounmap(l2c_ioaddr);
770
771 ret = edac_mc_add_mc(mci);
772 if (ret) {
773 dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
774 goto err_free;
775 }
776
777 lmc_int = readq(lmc->regs + LMC_INT);
778 writeq(lmc_int, lmc->regs + LMC_INT);
779
780 writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
781
782 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
783 ret = thunderx_create_debugfs_nodes(mci->debugfs,
784 lmc_dfs_ents,
785 lmc,
786 ARRAY_SIZE(lmc_dfs_ents));
787
788 if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
789 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
790 ret, ret >= 0 ? " created" : "");
791 }
792 }
793
794 return 0;
795
796 err_free:
797 pci_set_drvdata(pdev, NULL);
798 edac_mc_free(mci);
799
800 return ret;
801 }
802
thunderx_lmc_remove(struct pci_dev * pdev)803 static void thunderx_lmc_remove(struct pci_dev *pdev)
804 {
805 struct mem_ctl_info *mci = pci_get_drvdata(pdev);
806 struct thunderx_lmc *lmc = mci->pvt_info;
807
808 writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
809
810 edac_mc_del_mc(&pdev->dev);
811 edac_mc_free(mci);
812 }
813
814 MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
815
816 static struct pci_driver thunderx_lmc_driver = {
817 .name = "thunderx_lmc_edac",
818 .probe = thunderx_lmc_probe,
819 .remove = thunderx_lmc_remove,
820 .id_table = thunderx_lmc_pci_tbl,
821 };
822
823 /*---------------------- OCX driver ---------------------------------*/
824
825 #define PCI_DEVICE_ID_THUNDER_OCX 0xa013
826
827 #define OCX_LINK_INTS 3
828 #define OCX_INTS (OCX_LINK_INTS + 1)
829 #define OCX_RX_LANES 24
830 #define OCX_RX_LANE_STATS 15
831
832 #define OCX_COM_INT 0x100
833 #define OCX_COM_INT_W1S 0x108
834 #define OCX_COM_INT_ENA_W1S 0x110
835 #define OCX_COM_INT_ENA_W1C 0x118
836
837 #define OCX_COM_IO_BADID BIT(54)
838 #define OCX_COM_MEM_BADID BIT(53)
839 #define OCX_COM_COPR_BADID BIT(52)
840 #define OCX_COM_WIN_REQ_BADID BIT(51)
841 #define OCX_COM_WIN_REQ_TOUT BIT(50)
842 #define OCX_COM_RX_LANE GENMASK(23, 0)
843
844 #define OCX_COM_INT_CE (OCX_COM_IO_BADID | \
845 OCX_COM_MEM_BADID | \
846 OCX_COM_COPR_BADID | \
847 OCX_COM_WIN_REQ_BADID | \
848 OCX_COM_WIN_REQ_TOUT)
849
850 static const struct error_descr ocx_com_errors[] = {
851 {
852 .type = ERR_CORRECTED,
853 .mask = OCX_COM_IO_BADID,
854 .descr = "Invalid IO transaction node ID",
855 },
856 {
857 .type = ERR_CORRECTED,
858 .mask = OCX_COM_MEM_BADID,
859 .descr = "Invalid memory transaction node ID",
860 },
861 {
862 .type = ERR_CORRECTED,
863 .mask = OCX_COM_COPR_BADID,
864 .descr = "Invalid coprocessor transaction node ID",
865 },
866 {
867 .type = ERR_CORRECTED,
868 .mask = OCX_COM_WIN_REQ_BADID,
869 .descr = "Invalid SLI transaction node ID",
870 },
871 {
872 .type = ERR_CORRECTED,
873 .mask = OCX_COM_WIN_REQ_TOUT,
874 .descr = "Window/core request timeout",
875 },
876 {0, 0, NULL},
877 };
878
879 #define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8)
880 #define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8)
881 #define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8)
882 #define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8)
883
884 #define OCX_COM_LINK_BAD_WORD BIT(13)
885 #define OCX_COM_LINK_ALIGN_FAIL BIT(12)
886 #define OCX_COM_LINK_ALIGN_DONE BIT(11)
887 #define OCX_COM_LINK_UP BIT(10)
888 #define OCX_COM_LINK_STOP BIT(9)
889 #define OCX_COM_LINK_BLK_ERR BIT(8)
890 #define OCX_COM_LINK_REINIT BIT(7)
891 #define OCX_COM_LINK_LNK_DATA BIT(6)
892 #define OCX_COM_LINK_RXFIFO_DBE BIT(5)
893 #define OCX_COM_LINK_RXFIFO_SBE BIT(4)
894 #define OCX_COM_LINK_TXFIFO_DBE BIT(3)
895 #define OCX_COM_LINK_TXFIFO_SBE BIT(2)
896 #define OCX_COM_LINK_REPLAY_DBE BIT(1)
897 #define OCX_COM_LINK_REPLAY_SBE BIT(0)
898
899 static const struct error_descr ocx_com_link_errors[] = {
900 {
901 .type = ERR_CORRECTED,
902 .mask = OCX_COM_LINK_REPLAY_SBE,
903 .descr = "Replay buffer single-bit error",
904 },
905 {
906 .type = ERR_CORRECTED,
907 .mask = OCX_COM_LINK_TXFIFO_SBE,
908 .descr = "TX FIFO single-bit error",
909 },
910 {
911 .type = ERR_CORRECTED,
912 .mask = OCX_COM_LINK_RXFIFO_SBE,
913 .descr = "RX FIFO single-bit error",
914 },
915 {
916 .type = ERR_CORRECTED,
917 .mask = OCX_COM_LINK_BLK_ERR,
918 .descr = "Block code error",
919 },
920 {
921 .type = ERR_CORRECTED,
922 .mask = OCX_COM_LINK_ALIGN_FAIL,
923 .descr = "Link alignment failure",
924 },
925 {
926 .type = ERR_CORRECTED,
927 .mask = OCX_COM_LINK_BAD_WORD,
928 .descr = "Bad code word",
929 },
930 {
931 .type = ERR_UNCORRECTED,
932 .mask = OCX_COM_LINK_REPLAY_DBE,
933 .descr = "Replay buffer double-bit error",
934 },
935 {
936 .type = ERR_UNCORRECTED,
937 .mask = OCX_COM_LINK_TXFIFO_DBE,
938 .descr = "TX FIFO double-bit error",
939 },
940 {
941 .type = ERR_UNCORRECTED,
942 .mask = OCX_COM_LINK_RXFIFO_DBE,
943 .descr = "RX FIFO double-bit error",
944 },
945 {
946 .type = ERR_UNCORRECTED,
947 .mask = OCX_COM_LINK_STOP,
948 .descr = "Link stopped",
949 },
950 {0, 0, NULL},
951 };
952
953 #define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \
954 OCX_COM_LINK_TXFIFO_DBE | \
955 OCX_COM_LINK_RXFIFO_DBE | \
956 OCX_COM_LINK_STOP)
957
958 #define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \
959 OCX_COM_LINK_TXFIFO_SBE | \
960 OCX_COM_LINK_RXFIFO_SBE | \
961 OCX_COM_LINK_BLK_ERR | \
962 OCX_COM_LINK_ALIGN_FAIL | \
963 OCX_COM_LINK_BAD_WORD)
964
965 #define OCX_LNE_INT(x) (0x8018 + (x) * 0x100)
966 #define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100)
967 #define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100)
968 #define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100)
969 #define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8)
970
971 #define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8)
972 #define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2)
973 #define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1)
974 #define OCX_LNE_CFG_RX_STAT_ENA BIT(0)
975
976
977 #define OCX_LANE_BAD_64B67B BIT(8)
978 #define OCX_LANE_DSKEW_FIFO_OVFL BIT(5)
979 #define OCX_LANE_SCRM_SYNC_LOSS BIT(4)
980 #define OCX_LANE_UKWN_CNTL_WORD BIT(3)
981 #define OCX_LANE_CRC32_ERR BIT(2)
982 #define OCX_LANE_BDRY_SYNC_LOSS BIT(1)
983 #define OCX_LANE_SERDES_LOCK_LOSS BIT(0)
984
985 #define OCX_COM_LANE_INT_UE (0)
986 #define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \
987 OCX_LANE_BDRY_SYNC_LOSS | \
988 OCX_LANE_CRC32_ERR | \
989 OCX_LANE_UKWN_CNTL_WORD | \
990 OCX_LANE_SCRM_SYNC_LOSS | \
991 OCX_LANE_DSKEW_FIFO_OVFL | \
992 OCX_LANE_BAD_64B67B)
993
994 static const struct error_descr ocx_lane_errors[] = {
995 {
996 .type = ERR_CORRECTED,
997 .mask = OCX_LANE_SERDES_LOCK_LOSS,
998 .descr = "RX SerDes lock lost",
999 },
1000 {
1001 .type = ERR_CORRECTED,
1002 .mask = OCX_LANE_BDRY_SYNC_LOSS,
1003 .descr = "RX word boundary lost",
1004 },
1005 {
1006 .type = ERR_CORRECTED,
1007 .mask = OCX_LANE_CRC32_ERR,
1008 .descr = "CRC32 error",
1009 },
1010 {
1011 .type = ERR_CORRECTED,
1012 .mask = OCX_LANE_UKWN_CNTL_WORD,
1013 .descr = "Unknown control word",
1014 },
1015 {
1016 .type = ERR_CORRECTED,
1017 .mask = OCX_LANE_SCRM_SYNC_LOSS,
1018 .descr = "Scrambler synchronization lost",
1019 },
1020 {
1021 .type = ERR_CORRECTED,
1022 .mask = OCX_LANE_DSKEW_FIFO_OVFL,
1023 .descr = "RX deskew FIFO overflow",
1024 },
1025 {
1026 .type = ERR_CORRECTED,
1027 .mask = OCX_LANE_BAD_64B67B,
1028 .descr = "Bad 64B/67B codeword",
1029 },
1030 {0, 0, NULL},
1031 };
1032
1033 #define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0))
1034 #define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0))
1035 #define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \
1036 GENMASK(9, 7) | GENMASK(5, 0))
1037
1038 #define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000)
1039 #define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000)
1040
1041 struct ocx_com_err_ctx {
1042 u64 reg_com_int;
1043 u64 reg_lane_int[OCX_RX_LANES];
1044 u64 reg_lane_stat11[OCX_RX_LANES];
1045 };
1046
1047 struct ocx_link_err_ctx {
1048 u64 reg_com_link_int;
1049 int link;
1050 };
1051
1052 struct thunderx_ocx {
1053 void __iomem *regs;
1054 int com_link;
1055 struct pci_dev *pdev;
1056 struct edac_device_ctl_info *edac_dev;
1057
1058 struct dentry *debugfs;
1059 struct msix_entry msix_ent[OCX_INTS];
1060
1061 struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
1062 struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
1063
1064 unsigned long com_ring_head;
1065 unsigned long com_ring_tail;
1066
1067 unsigned long link_ring_head;
1068 unsigned long link_ring_tail;
1069 };
1070
1071 #define OCX_MESSAGE_SIZE SZ_1K
1072 #define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors))
1073
1074 /* This handler is threaded */
thunderx_ocx_com_isr(int irq,void * irq_id)1075 static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
1076 {
1077 struct msix_entry *msix = irq_id;
1078 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1079 msix_ent[msix->entry]);
1080
1081 int lane;
1082 unsigned long head = ring_pos(ocx->com_ring_head,
1083 ARRAY_SIZE(ocx->com_err_ctx));
1084 struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
1085
1086 ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
1087
1088 for (lane = 0; lane < OCX_RX_LANES; lane++) {
1089 ctx->reg_lane_int[lane] =
1090 readq(ocx->regs + OCX_LNE_INT(lane));
1091 ctx->reg_lane_stat11[lane] =
1092 readq(ocx->regs + OCX_LNE_STAT(lane, 11));
1093
1094 writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
1095 }
1096
1097 writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
1098
1099 ocx->com_ring_head++;
1100
1101 return IRQ_WAKE_THREAD;
1102 }
1103
thunderx_ocx_com_threaded_isr(int irq,void * irq_id)1104 static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
1105 {
1106 struct msix_entry *msix = irq_id;
1107 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1108 msix_ent[msix->entry]);
1109
1110 irqreturn_t ret = IRQ_NONE;
1111
1112 unsigned long tail;
1113 struct ocx_com_err_ctx *ctx;
1114 int lane;
1115 char *msg;
1116 char *other;
1117
1118 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1119 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1120
1121 if (!msg || !other)
1122 goto err_free;
1123
1124 while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
1125 ARRAY_SIZE(ocx->com_err_ctx))) {
1126 tail = ring_pos(ocx->com_ring_tail,
1127 ARRAY_SIZE(ocx->com_err_ctx));
1128 ctx = &ocx->com_err_ctx[tail];
1129
1130 snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
1131 ocx->edac_dev->ctl_name, ctx->reg_com_int);
1132
1133 decode_register(other, OCX_OTHER_SIZE,
1134 ocx_com_errors, ctx->reg_com_int);
1135
1136 strncat(msg, other, OCX_MESSAGE_SIZE);
1137
1138 for (lane = 0; lane < OCX_RX_LANES; lane++)
1139 if (ctx->reg_com_int & BIT(lane)) {
1140 snprintf(other, OCX_OTHER_SIZE,
1141 "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
1142 lane, ctx->reg_lane_int[lane],
1143 lane, ctx->reg_lane_stat11[lane]);
1144
1145 strncat(msg, other, OCX_MESSAGE_SIZE);
1146
1147 decode_register(other, OCX_OTHER_SIZE,
1148 ocx_lane_errors,
1149 ctx->reg_lane_int[lane]);
1150 strncat(msg, other, OCX_MESSAGE_SIZE);
1151 }
1152
1153 if (ctx->reg_com_int & OCX_COM_INT_CE)
1154 edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1155
1156 ocx->com_ring_tail++;
1157 }
1158
1159 ret = IRQ_HANDLED;
1160
1161 err_free:
1162 kfree(other);
1163 kfree(msg);
1164
1165 return ret;
1166 }
1167
thunderx_ocx_lnk_isr(int irq,void * irq_id)1168 static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
1169 {
1170 struct msix_entry *msix = irq_id;
1171 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1172 msix_ent[msix->entry]);
1173 unsigned long head = ring_pos(ocx->link_ring_head,
1174 ARRAY_SIZE(ocx->link_err_ctx));
1175 struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
1176
1177 ctx->link = msix->entry;
1178 ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1179
1180 writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1181
1182 ocx->link_ring_head++;
1183
1184 return IRQ_WAKE_THREAD;
1185 }
1186
thunderx_ocx_lnk_threaded_isr(int irq,void * irq_id)1187 static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
1188 {
1189 struct msix_entry *msix = irq_id;
1190 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1191 msix_ent[msix->entry]);
1192 irqreturn_t ret = IRQ_NONE;
1193 unsigned long tail;
1194 struct ocx_link_err_ctx *ctx;
1195
1196 char *msg;
1197 char *other;
1198
1199 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1200 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1201
1202 if (!msg || !other)
1203 goto err_free;
1204
1205 while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
1206 ARRAY_SIZE(ocx->link_err_ctx))) {
1207 tail = ring_pos(ocx->link_ring_head,
1208 ARRAY_SIZE(ocx->link_err_ctx));
1209
1210 ctx = &ocx->link_err_ctx[tail];
1211
1212 snprintf(msg, OCX_MESSAGE_SIZE,
1213 "%s: OCX_COM_LINK_INT[%d]: %016llx",
1214 ocx->edac_dev->ctl_name,
1215 ctx->link, ctx->reg_com_link_int);
1216
1217 decode_register(other, OCX_OTHER_SIZE,
1218 ocx_com_link_errors, ctx->reg_com_link_int);
1219
1220 strncat(msg, other, OCX_MESSAGE_SIZE);
1221
1222 if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
1223 edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
1224 else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
1225 edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1226
1227 ocx->link_ring_tail++;
1228 }
1229
1230 ret = IRQ_HANDLED;
1231 err_free:
1232 kfree(other);
1233 kfree(msg);
1234
1235 return ret;
1236 }
1237
1238 #define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg)
1239
1240 OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
1241 OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
1242 OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
1243
1244 OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
1245 OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
1246 OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
1247
1248 OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
1249 OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
1250 OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
1251
1252 OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
1253 OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
1254 OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
1255 OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
1256 OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
1257 OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
1258 OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
1259 OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
1260
1261 OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
1262 OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
1263 OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
1264 OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
1265 OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
1266 OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
1267 OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
1268 OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
1269
1270 OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
1271 OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
1272 OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
1273 OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
1274 OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
1275 OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
1276 OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
1277 OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
1278
1279 OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
1280
1281 static struct debugfs_entry *ocx_dfs_ents[] = {
1282 &debugfs_tlk0_ecc_ctl,
1283 &debugfs_tlk1_ecc_ctl,
1284 &debugfs_tlk2_ecc_ctl,
1285
1286 &debugfs_rlk0_ecc_ctl,
1287 &debugfs_rlk1_ecc_ctl,
1288 &debugfs_rlk2_ecc_ctl,
1289
1290 &debugfs_com_link0_int,
1291 &debugfs_com_link1_int,
1292 &debugfs_com_link2_int,
1293
1294 &debugfs_lne00_badcnt,
1295 &debugfs_lne01_badcnt,
1296 &debugfs_lne02_badcnt,
1297 &debugfs_lne03_badcnt,
1298 &debugfs_lne04_badcnt,
1299 &debugfs_lne05_badcnt,
1300 &debugfs_lne06_badcnt,
1301 &debugfs_lne07_badcnt,
1302 &debugfs_lne08_badcnt,
1303 &debugfs_lne09_badcnt,
1304 &debugfs_lne10_badcnt,
1305 &debugfs_lne11_badcnt,
1306 &debugfs_lne12_badcnt,
1307 &debugfs_lne13_badcnt,
1308 &debugfs_lne14_badcnt,
1309 &debugfs_lne15_badcnt,
1310 &debugfs_lne16_badcnt,
1311 &debugfs_lne17_badcnt,
1312 &debugfs_lne18_badcnt,
1313 &debugfs_lne19_badcnt,
1314 &debugfs_lne20_badcnt,
1315 &debugfs_lne21_badcnt,
1316 &debugfs_lne22_badcnt,
1317 &debugfs_lne23_badcnt,
1318
1319 &debugfs_com_int,
1320 };
1321
1322 static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
1323 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
1324 { 0, },
1325 };
1326
thunderx_ocx_clearstats(struct thunderx_ocx * ocx)1327 static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
1328 {
1329 int lane, stat, cfg;
1330
1331 for (lane = 0; lane < OCX_RX_LANES; lane++) {
1332 cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
1333 cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
1334 cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
1335 writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
1336
1337 for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
1338 readq(ocx->regs + OCX_LNE_STAT(lane, stat));
1339 }
1340 }
1341
thunderx_ocx_probe(struct pci_dev * pdev,const struct pci_device_id * id)1342 static int thunderx_ocx_probe(struct pci_dev *pdev,
1343 const struct pci_device_id *id)
1344 {
1345 struct thunderx_ocx *ocx;
1346 struct edac_device_ctl_info *edac_dev;
1347 char name[32];
1348 int idx;
1349 int i;
1350 int ret;
1351 u64 reg;
1352
1353 ret = pcim_enable_device(pdev);
1354 if (ret) {
1355 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1356 return ret;
1357 }
1358
1359 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
1360 if (ret) {
1361 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1362 return ret;
1363 }
1364
1365 idx = edac_device_alloc_index();
1366 snprintf(name, sizeof(name), "OCX%d", idx);
1367 edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
1368 name, 1, "CCPI", 1,
1369 0, NULL, 0, idx);
1370 if (!edac_dev) {
1371 dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
1372 return -ENOMEM;
1373 }
1374 ocx = edac_dev->pvt_info;
1375 ocx->edac_dev = edac_dev;
1376 ocx->com_ring_head = 0;
1377 ocx->com_ring_tail = 0;
1378 ocx->link_ring_head = 0;
1379 ocx->link_ring_tail = 0;
1380
1381 ocx->regs = pcim_iomap_table(pdev)[0];
1382 if (!ocx->regs) {
1383 dev_err(&pdev->dev, "Cannot map PCI resources\n");
1384 ret = -ENODEV;
1385 goto err_free;
1386 }
1387
1388 ocx->pdev = pdev;
1389
1390 for (i = 0; i < OCX_INTS; i++) {
1391 ocx->msix_ent[i].entry = i;
1392 ocx->msix_ent[i].vector = 0;
1393 }
1394
1395 ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
1396 if (ret) {
1397 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
1398 goto err_free;
1399 }
1400
1401 for (i = 0; i < OCX_INTS; i++) {
1402 ret = devm_request_threaded_irq(&pdev->dev,
1403 ocx->msix_ent[i].vector,
1404 (i == 3) ?
1405 thunderx_ocx_com_isr :
1406 thunderx_ocx_lnk_isr,
1407 (i == 3) ?
1408 thunderx_ocx_com_threaded_isr :
1409 thunderx_ocx_lnk_threaded_isr,
1410 0, "[EDAC] ThunderX OCX",
1411 &ocx->msix_ent[i]);
1412 if (ret)
1413 goto err_free;
1414 }
1415
1416 edac_dev->dev = &pdev->dev;
1417 edac_dev->dev_name = dev_name(&pdev->dev);
1418 edac_dev->mod_name = "thunderx-ocx";
1419 edac_dev->ctl_name = "thunderx-ocx";
1420
1421 ret = edac_device_add_device(edac_dev);
1422 if (ret) {
1423 dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
1424 goto err_free;
1425 }
1426
1427 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
1428 ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1429
1430 ret = thunderx_create_debugfs_nodes(ocx->debugfs,
1431 ocx_dfs_ents,
1432 ocx,
1433 ARRAY_SIZE(ocx_dfs_ents));
1434 if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
1435 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
1436 ret, ret >= 0 ? " created" : "");
1437 }
1438 }
1439
1440 pci_set_drvdata(pdev, edac_dev);
1441
1442 thunderx_ocx_clearstats(ocx);
1443
1444 for (i = 0; i < OCX_RX_LANES; i++) {
1445 writeq(OCX_LNE_INT_ENA_ALL,
1446 ocx->regs + OCX_LNE_INT_EN(i));
1447
1448 reg = readq(ocx->regs + OCX_LNE_INT(i));
1449 writeq(reg, ocx->regs + OCX_LNE_INT(i));
1450
1451 }
1452
1453 for (i = 0; i < OCX_LINK_INTS; i++) {
1454 reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
1455 writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
1456
1457 writeq(OCX_COM_LINKX_INT_ENA_ALL,
1458 ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
1459 }
1460
1461 reg = readq(ocx->regs + OCX_COM_INT);
1462 writeq(reg, ocx->regs + OCX_COM_INT);
1463
1464 writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
1465
1466 return 0;
1467 err_free:
1468 edac_device_free_ctl_info(edac_dev);
1469
1470 return ret;
1471 }
1472
thunderx_ocx_remove(struct pci_dev * pdev)1473 static void thunderx_ocx_remove(struct pci_dev *pdev)
1474 {
1475 struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
1476 struct thunderx_ocx *ocx = edac_dev->pvt_info;
1477 int i;
1478
1479 writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
1480
1481 for (i = 0; i < OCX_INTS; i++) {
1482 writeq(OCX_COM_LINKX_INT_ENA_ALL,
1483 ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
1484 }
1485
1486 edac_debugfs_remove_recursive(ocx->debugfs);
1487
1488 edac_device_del_device(&pdev->dev);
1489 edac_device_free_ctl_info(edac_dev);
1490 }
1491
1492 MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
1493
1494 static struct pci_driver thunderx_ocx_driver = {
1495 .name = "thunderx_ocx_edac",
1496 .probe = thunderx_ocx_probe,
1497 .remove = thunderx_ocx_remove,
1498 .id_table = thunderx_ocx_pci_tbl,
1499 };
1500
1501 /*---------------------- L2C driver ---------------------------------*/
1502
1503 #define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
1504 #define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
1505 #define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
1506
1507 #define L2C_TAD_INT_W1C 0x40000
1508 #define L2C_TAD_INT_W1S 0x40008
1509
1510 #define L2C_TAD_INT_ENA_W1C 0x40020
1511 #define L2C_TAD_INT_ENA_W1S 0x40028
1512
1513
1514 #define L2C_TAD_INT_L2DDBE BIT(1)
1515 #define L2C_TAD_INT_SBFSBE BIT(2)
1516 #define L2C_TAD_INT_SBFDBE BIT(3)
1517 #define L2C_TAD_INT_FBFSBE BIT(4)
1518 #define L2C_TAD_INT_FBFDBE BIT(5)
1519 #define L2C_TAD_INT_TAGDBE BIT(9)
1520 #define L2C_TAD_INT_RDDISLMC BIT(15)
1521 #define L2C_TAD_INT_WRDISLMC BIT(16)
1522 #define L2C_TAD_INT_LFBTO BIT(17)
1523 #define L2C_TAD_INT_GSYNCTO BIT(18)
1524 #define L2C_TAD_INT_RTGSBE BIT(32)
1525 #define L2C_TAD_INT_RTGDBE BIT(33)
1526 #define L2C_TAD_INT_RDDISOCI BIT(34)
1527 #define L2C_TAD_INT_WRDISOCI BIT(35)
1528
1529 #define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \
1530 L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
1531 L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
1532
1533 #define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \
1534 L2C_TAD_INT_FBFSBE)
1535
1536 #define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \
1537 L2C_TAD_INT_SBFDBE | \
1538 L2C_TAD_INT_FBFDBE | \
1539 L2C_TAD_INT_TAGDBE | \
1540 L2C_TAD_INT_RTGDBE | \
1541 L2C_TAD_INT_WRDISOCI | \
1542 L2C_TAD_INT_RDDISOCI | \
1543 L2C_TAD_INT_WRDISLMC | \
1544 L2C_TAD_INT_RDDISLMC | \
1545 L2C_TAD_INT_LFBTO | \
1546 L2C_TAD_INT_GSYNCTO)
1547
1548 static const struct error_descr l2_tad_errors[] = {
1549 {
1550 .type = ERR_CORRECTED,
1551 .mask = L2C_TAD_INT_SBFSBE,
1552 .descr = "SBF single-bit error",
1553 },
1554 {
1555 .type = ERR_CORRECTED,
1556 .mask = L2C_TAD_INT_FBFSBE,
1557 .descr = "FBF single-bit error",
1558 },
1559 {
1560 .type = ERR_UNCORRECTED,
1561 .mask = L2C_TAD_INT_L2DDBE,
1562 .descr = "L2D double-bit error",
1563 },
1564 {
1565 .type = ERR_UNCORRECTED,
1566 .mask = L2C_TAD_INT_SBFDBE,
1567 .descr = "SBF double-bit error",
1568 },
1569 {
1570 .type = ERR_UNCORRECTED,
1571 .mask = L2C_TAD_INT_FBFDBE,
1572 .descr = "FBF double-bit error",
1573 },
1574 {
1575 .type = ERR_UNCORRECTED,
1576 .mask = L2C_TAD_INT_TAGDBE,
1577 .descr = "TAG double-bit error",
1578 },
1579 {
1580 .type = ERR_UNCORRECTED,
1581 .mask = L2C_TAD_INT_RTGDBE,
1582 .descr = "RTG double-bit error",
1583 },
1584 {
1585 .type = ERR_UNCORRECTED,
1586 .mask = L2C_TAD_INT_WRDISOCI,
1587 .descr = "Write to a disabled CCPI",
1588 },
1589 {
1590 .type = ERR_UNCORRECTED,
1591 .mask = L2C_TAD_INT_RDDISOCI,
1592 .descr = "Read from a disabled CCPI",
1593 },
1594 {
1595 .type = ERR_UNCORRECTED,
1596 .mask = L2C_TAD_INT_WRDISLMC,
1597 .descr = "Write to a disabled LMC",
1598 },
1599 {
1600 .type = ERR_UNCORRECTED,
1601 .mask = L2C_TAD_INT_RDDISLMC,
1602 .descr = "Read from a disabled LMC",
1603 },
1604 {
1605 .type = ERR_UNCORRECTED,
1606 .mask = L2C_TAD_INT_LFBTO,
1607 .descr = "LFB entry timeout",
1608 },
1609 {
1610 .type = ERR_UNCORRECTED,
1611 .mask = L2C_TAD_INT_GSYNCTO,
1612 .descr = "Global sync CCPI timeout",
1613 },
1614 {0, 0, NULL},
1615 };
1616
1617 #define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE)
1618
1619 #define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE)
1620
1621 #define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
1622
1623 #define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
1624
1625 #define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
1626 L2C_TAD_INT_RTG | \
1627 L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
1628 L2C_TAD_INT_LFBTO)
1629
1630 #define L2C_TAD_TIMETWO 0x50000
1631 #define L2C_TAD_TIMEOUT 0x50100
1632 #define L2C_TAD_ERR 0x60000
1633 #define L2C_TAD_TQD_ERR 0x60100
1634 #define L2C_TAD_TTG_ERR 0x60200
1635
1636
1637 #define L2C_CBC_INT_W1C 0x60000
1638
1639 #define L2C_CBC_INT_RSDSBE BIT(0)
1640 #define L2C_CBC_INT_RSDDBE BIT(1)
1641
1642 #define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
1643
1644 #define L2C_CBC_INT_MIBSBE BIT(4)
1645 #define L2C_CBC_INT_MIBDBE BIT(5)
1646
1647 #define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
1648
1649 #define L2C_CBC_INT_IORDDISOCI BIT(6)
1650 #define L2C_CBC_INT_IOWRDISOCI BIT(7)
1651
1652 #define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \
1653 L2C_CBC_INT_IOWRDISOCI)
1654
1655 #define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
1656 #define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
1657
1658
1659 static const struct error_descr l2_cbc_errors[] = {
1660 {
1661 .type = ERR_CORRECTED,
1662 .mask = L2C_CBC_INT_RSDSBE,
1663 .descr = "RSD single-bit error",
1664 },
1665 {
1666 .type = ERR_CORRECTED,
1667 .mask = L2C_CBC_INT_MIBSBE,
1668 .descr = "MIB single-bit error",
1669 },
1670 {
1671 .type = ERR_UNCORRECTED,
1672 .mask = L2C_CBC_INT_RSDDBE,
1673 .descr = "RSD double-bit error",
1674 },
1675 {
1676 .type = ERR_UNCORRECTED,
1677 .mask = L2C_CBC_INT_MIBDBE,
1678 .descr = "MIB double-bit error",
1679 },
1680 {
1681 .type = ERR_UNCORRECTED,
1682 .mask = L2C_CBC_INT_IORDDISOCI,
1683 .descr = "Read from a disabled CCPI",
1684 },
1685 {
1686 .type = ERR_UNCORRECTED,
1687 .mask = L2C_CBC_INT_IOWRDISOCI,
1688 .descr = "Write to a disabled CCPI",
1689 },
1690 {0, 0, NULL},
1691 };
1692
1693 #define L2C_CBC_INT_W1S 0x60008
1694 #define L2C_CBC_INT_ENA_W1C 0x60020
1695
1696 #define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
1697 L2C_CBC_INT_IODISOCI)
1698
1699 #define L2C_CBC_INT_ENA_W1S 0x60028
1700
1701 #define L2C_CBC_IODISOCIERR 0x80008
1702 #define L2C_CBC_IOCERR 0x80010
1703 #define L2C_CBC_RSDERR 0x80018
1704 #define L2C_CBC_MIBERR 0x80020
1705
1706
1707 #define L2C_MCI_INT_W1C 0x0
1708
1709 #define L2C_MCI_INT_VBFSBE BIT(0)
1710 #define L2C_MCI_INT_VBFDBE BIT(1)
1711
1712 static const struct error_descr l2_mci_errors[] = {
1713 {
1714 .type = ERR_CORRECTED,
1715 .mask = L2C_MCI_INT_VBFSBE,
1716 .descr = "VBF single-bit error",
1717 },
1718 {
1719 .type = ERR_UNCORRECTED,
1720 .mask = L2C_MCI_INT_VBFDBE,
1721 .descr = "VBF double-bit error",
1722 },
1723 {0, 0, NULL},
1724 };
1725
1726 #define L2C_MCI_INT_W1S 0x8
1727 #define L2C_MCI_INT_ENA_W1C 0x20
1728
1729 #define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
1730
1731 #define L2C_MCI_INT_ENA_W1S 0x28
1732
1733 #define L2C_MCI_ERR 0x10000
1734
1735 #define L2C_MESSAGE_SIZE SZ_1K
1736 #define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors))
1737
1738 struct l2c_err_ctx {
1739 char *reg_ext_name;
1740 u64 reg_int;
1741 u64 reg_ext;
1742 };
1743
1744 struct thunderx_l2c {
1745 void __iomem *regs;
1746 struct pci_dev *pdev;
1747 struct edac_device_ctl_info *edac_dev;
1748
1749 struct dentry *debugfs;
1750
1751 int index;
1752
1753 struct msix_entry msix_ent;
1754
1755 struct l2c_err_ctx err_ctx[RING_ENTRIES];
1756 unsigned long ring_head;
1757 unsigned long ring_tail;
1758 };
1759
thunderx_l2c_tad_isr(int irq,void * irq_id)1760 static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
1761 {
1762 struct msix_entry *msix = irq_id;
1763 struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
1764 msix_ent);
1765
1766 unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
1767 struct l2c_err_ctx *ctx = &tad->err_ctx[head];
1768
1769 ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
1770
1771 if (ctx->reg_int & L2C_TAD_INT_ECC) {
1772 ctx->reg_ext_name = "TQD_ERR";
1773 ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
1774 } else if (ctx->reg_int & L2C_TAD_INT_TAG) {
1775 ctx->reg_ext_name = "TTG_ERR";
1776 ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
1777 } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
1778 ctx->reg_ext_name = "TIMEOUT";
1779 ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
1780 } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
1781 ctx->reg_ext_name = "ERR";
1782 ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
1783 }
1784
1785 writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
1786
1787 tad->ring_head++;
1788
1789 return IRQ_WAKE_THREAD;
1790 }
1791
thunderx_l2c_cbc_isr(int irq,void * irq_id)1792 static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
1793 {
1794 struct msix_entry *msix = irq_id;
1795 struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
1796 msix_ent);
1797
1798 unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
1799 struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
1800
1801 ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
1802
1803 if (ctx->reg_int & L2C_CBC_INT_RSD) {
1804 ctx->reg_ext_name = "RSDERR";
1805 ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
1806 } else if (ctx->reg_int & L2C_CBC_INT_MIB) {
1807 ctx->reg_ext_name = "MIBERR";
1808 ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
1809 } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
1810 ctx->reg_ext_name = "IODISOCIERR";
1811 ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
1812 }
1813
1814 writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
1815
1816 cbc->ring_head++;
1817
1818 return IRQ_WAKE_THREAD;
1819 }
1820
thunderx_l2c_mci_isr(int irq,void * irq_id)1821 static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
1822 {
1823 struct msix_entry *msix = irq_id;
1824 struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
1825 msix_ent);
1826
1827 unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
1828 struct l2c_err_ctx *ctx = &mci->err_ctx[head];
1829
1830 ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
1831 ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
1832
1833 writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
1834
1835 ctx->reg_ext_name = "ERR";
1836
1837 mci->ring_head++;
1838
1839 return IRQ_WAKE_THREAD;
1840 }
1841
thunderx_l2c_threaded_isr(int irq,void * irq_id)1842 static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
1843 {
1844 struct msix_entry *msix = irq_id;
1845 struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
1846 msix_ent);
1847
1848 unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
1849 struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
1850 irqreturn_t ret = IRQ_NONE;
1851
1852 u64 mask_ue, mask_ce;
1853 const struct error_descr *l2_errors;
1854 char *reg_int_name;
1855
1856 char *msg;
1857 char *other;
1858
1859 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1860 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1861
1862 if (!msg || !other)
1863 goto err_free;
1864
1865 switch (l2c->pdev->device) {
1866 case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1867 reg_int_name = "L2C_TAD_INT";
1868 mask_ue = L2C_TAD_INT_UE;
1869 mask_ce = L2C_TAD_INT_CE;
1870 l2_errors = l2_tad_errors;
1871 break;
1872 case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1873 reg_int_name = "L2C_CBC_INT";
1874 mask_ue = L2C_CBC_INT_UE;
1875 mask_ce = L2C_CBC_INT_CE;
1876 l2_errors = l2_cbc_errors;
1877 break;
1878 case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1879 reg_int_name = "L2C_MCI_INT";
1880 mask_ue = L2C_MCI_INT_VBFDBE;
1881 mask_ce = L2C_MCI_INT_VBFSBE;
1882 l2_errors = l2_mci_errors;
1883 break;
1884 default:
1885 dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
1886 l2c->pdev->device);
1887 goto err_free;
1888 }
1889
1890 while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
1891 ARRAY_SIZE(l2c->err_ctx))) {
1892 snprintf(msg, L2C_MESSAGE_SIZE,
1893 "%s: %s: %016llx, %s: %016llx",
1894 l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
1895 ctx->reg_ext_name, ctx->reg_ext);
1896
1897 decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
1898
1899 strncat(msg, other, L2C_MESSAGE_SIZE);
1900
1901 if (ctx->reg_int & mask_ue)
1902 edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
1903 else if (ctx->reg_int & mask_ce)
1904 edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
1905
1906 l2c->ring_tail++;
1907 }
1908
1909 ret = IRQ_HANDLED;
1910
1911 err_free:
1912 kfree(other);
1913 kfree(msg);
1914
1915 return ret;
1916 }
1917
1918 #define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg)
1919
1920 L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
1921
1922 static struct debugfs_entry *l2c_tad_dfs_ents[] = {
1923 &debugfs_tad_int,
1924 };
1925
1926 L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
1927
1928 static struct debugfs_entry *l2c_cbc_dfs_ents[] = {
1929 &debugfs_cbc_int,
1930 };
1931
1932 L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
1933
1934 static struct debugfs_entry *l2c_mci_dfs_ents[] = {
1935 &debugfs_mci_int,
1936 };
1937
1938 static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
1939 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
1940 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
1941 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
1942 { 0, },
1943 };
1944
thunderx_l2c_probe(struct pci_dev * pdev,const struct pci_device_id * id)1945 static int thunderx_l2c_probe(struct pci_dev *pdev,
1946 const struct pci_device_id *id)
1947 {
1948 struct thunderx_l2c *l2c;
1949 struct edac_device_ctl_info *edac_dev;
1950 struct debugfs_entry **l2c_devattr;
1951 size_t dfs_entries;
1952 irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
1953 char name[32];
1954 const char *fmt;
1955 u64 reg_en_offs, reg_en_mask;
1956 int idx;
1957 int ret;
1958
1959 ret = pcim_enable_device(pdev);
1960 if (ret) {
1961 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1962 return ret;
1963 }
1964
1965 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
1966 if (ret) {
1967 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1968 return ret;
1969 }
1970
1971 switch (pdev->device) {
1972 case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1973 thunderx_l2c_isr = thunderx_l2c_tad_isr;
1974 l2c_devattr = l2c_tad_dfs_ents;
1975 dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
1976 fmt = "L2C-TAD%d";
1977 reg_en_offs = L2C_TAD_INT_ENA_W1S;
1978 reg_en_mask = L2C_TAD_INT_ENA_ALL;
1979 break;
1980 case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1981 thunderx_l2c_isr = thunderx_l2c_cbc_isr;
1982 l2c_devattr = l2c_cbc_dfs_ents;
1983 dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
1984 fmt = "L2C-CBC%d";
1985 reg_en_offs = L2C_CBC_INT_ENA_W1S;
1986 reg_en_mask = L2C_CBC_INT_ENA_ALL;
1987 break;
1988 case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1989 thunderx_l2c_isr = thunderx_l2c_mci_isr;
1990 l2c_devattr = l2c_mci_dfs_ents;
1991 dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
1992 fmt = "L2C-MCI%d";
1993 reg_en_offs = L2C_MCI_INT_ENA_W1S;
1994 reg_en_mask = L2C_MCI_INT_ENA_ALL;
1995 break;
1996 default:
1997 //Should never ever get here
1998 dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
1999 pdev->device);
2000 return -EINVAL;
2001 }
2002
2003 idx = edac_device_alloc_index();
2004 snprintf(name, sizeof(name), fmt, idx);
2005
2006 edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
2007 name, 1, "L2C", 1, 0,
2008 NULL, 0, idx);
2009 if (!edac_dev) {
2010 dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
2011 return -ENOMEM;
2012 }
2013
2014 l2c = edac_dev->pvt_info;
2015 l2c->edac_dev = edac_dev;
2016
2017 l2c->regs = pcim_iomap_table(pdev)[0];
2018 if (!l2c->regs) {
2019 dev_err(&pdev->dev, "Cannot map PCI resources\n");
2020 ret = -ENODEV;
2021 goto err_free;
2022 }
2023
2024 l2c->pdev = pdev;
2025
2026 l2c->ring_head = 0;
2027 l2c->ring_tail = 0;
2028
2029 l2c->msix_ent.entry = 0;
2030 l2c->msix_ent.vector = 0;
2031
2032 ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
2033 if (ret) {
2034 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
2035 goto err_free;
2036 }
2037
2038 ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
2039 thunderx_l2c_isr,
2040 thunderx_l2c_threaded_isr,
2041 0, "[EDAC] ThunderX L2C",
2042 &l2c->msix_ent);
2043 if (ret)
2044 goto err_free;
2045
2046 edac_dev->dev = &pdev->dev;
2047 edac_dev->dev_name = dev_name(&pdev->dev);
2048 edac_dev->mod_name = "thunderx-l2c";
2049 edac_dev->ctl_name = "thunderx-l2c";
2050
2051 ret = edac_device_add_device(edac_dev);
2052 if (ret) {
2053 dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
2054 goto err_free;
2055 }
2056
2057 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
2058 l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
2059
2060 ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
2061 l2c, dfs_entries);
2062
2063 if (ret != dfs_entries) {
2064 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
2065 ret, ret >= 0 ? " created" : "");
2066 }
2067 }
2068
2069 pci_set_drvdata(pdev, edac_dev);
2070
2071 writeq(reg_en_mask, l2c->regs + reg_en_offs);
2072
2073 return 0;
2074
2075 err_free:
2076 edac_device_free_ctl_info(edac_dev);
2077
2078 return ret;
2079 }
2080
thunderx_l2c_remove(struct pci_dev * pdev)2081 static void thunderx_l2c_remove(struct pci_dev *pdev)
2082 {
2083 struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
2084 struct thunderx_l2c *l2c = edac_dev->pvt_info;
2085
2086 switch (pdev->device) {
2087 case PCI_DEVICE_ID_THUNDER_L2C_TAD:
2088 writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
2089 break;
2090 case PCI_DEVICE_ID_THUNDER_L2C_CBC:
2091 writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
2092 break;
2093 case PCI_DEVICE_ID_THUNDER_L2C_MCI:
2094 writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
2095 break;
2096 }
2097
2098 edac_debugfs_remove_recursive(l2c->debugfs);
2099
2100 edac_device_del_device(&pdev->dev);
2101 edac_device_free_ctl_info(edac_dev);
2102 }
2103
2104 MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
2105
2106 static struct pci_driver thunderx_l2c_driver = {
2107 .name = "thunderx_l2c_edac",
2108 .probe = thunderx_l2c_probe,
2109 .remove = thunderx_l2c_remove,
2110 .id_table = thunderx_l2c_pci_tbl,
2111 };
2112
thunderx_edac_init(void)2113 static int __init thunderx_edac_init(void)
2114 {
2115 int rc = 0;
2116
2117 if (ghes_get_devices())
2118 return -EBUSY;
2119
2120 rc = pci_register_driver(&thunderx_lmc_driver);
2121 if (rc)
2122 return rc;
2123
2124 rc = pci_register_driver(&thunderx_ocx_driver);
2125 if (rc)
2126 goto err_lmc;
2127
2128 rc = pci_register_driver(&thunderx_l2c_driver);
2129 if (rc)
2130 goto err_ocx;
2131
2132 return rc;
2133 err_ocx:
2134 pci_unregister_driver(&thunderx_ocx_driver);
2135 err_lmc:
2136 pci_unregister_driver(&thunderx_lmc_driver);
2137
2138 return rc;
2139 }
2140
thunderx_edac_exit(void)2141 static void __exit thunderx_edac_exit(void)
2142 {
2143 pci_unregister_driver(&thunderx_l2c_driver);
2144 pci_unregister_driver(&thunderx_ocx_driver);
2145 pci_unregister_driver(&thunderx_lmc_driver);
2146
2147 }
2148
2149 module_init(thunderx_edac_init);
2150 module_exit(thunderx_edac_exit);
2151
2152 MODULE_LICENSE("GPL v2");
2153 MODULE_AUTHOR("Cavium, Inc.");
2154 MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");
2155