1 /*
2  * APM X-Gene SoC EDAC (error detection and correction)
3  *
4  * Copyright (c) 2015, Applied Micro Circuits Corporation
5  * Author: Feng Kan <fkan@apm.com>
6  *         Loc Ho <lho@apm.com>
7  *
8  * This program is free software; you can redistribute  it and/or modify it
9  * under  the terms of  the GNU General  Public License as published by the
10  * Free Software Foundation;  either version 2 of the  License, or (at your
11  * option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <linux/ctype.h>
23 #include <linux/edac.h>
24 #include <linux/interrupt.h>
25 #include <linux/mfd/syscon.h>
26 #include <linux/module.h>
27 #include <linux/of.h>
28 #include <linux/of_address.h>
29 #include <linux/regmap.h>
30 
31 #include "edac_module.h"
32 
33 #define EDAC_MOD_STR			"xgene_edac"
34 
35 /* Global error configuration status registers (CSR) */
36 #define PCPHPERRINTSTS			0x0000
37 #define PCPHPERRINTMSK			0x0004
38 #define  MCU_CTL_ERR_MASK		BIT(12)
39 #define  IOB_PA_ERR_MASK		BIT(11)
40 #define  IOB_BA_ERR_MASK		BIT(10)
41 #define  IOB_XGIC_ERR_MASK		BIT(9)
42 #define  IOB_RB_ERR_MASK		BIT(8)
43 #define  L3C_UNCORR_ERR_MASK		BIT(5)
44 #define  MCU_UNCORR_ERR_MASK		BIT(4)
45 #define  PMD3_MERR_MASK			BIT(3)
46 #define  PMD2_MERR_MASK			BIT(2)
47 #define  PMD1_MERR_MASK			BIT(1)
48 #define  PMD0_MERR_MASK			BIT(0)
49 #define PCPLPERRINTSTS			0x0008
50 #define PCPLPERRINTMSK			0x000C
51 #define  CSW_SWITCH_TRACE_ERR_MASK	BIT(2)
52 #define  L3C_CORR_ERR_MASK		BIT(1)
53 #define  MCU_CORR_ERR_MASK		BIT(0)
54 #define MEMERRINTSTS			0x0010
55 #define MEMERRINTMSK			0x0014
56 
57 struct xgene_edac {
58 	struct device		*dev;
59 	struct regmap		*csw_map;
60 	struct regmap		*mcba_map;
61 	struct regmap		*mcbb_map;
62 	struct regmap		*efuse_map;
63 	struct regmap		*rb_map;
64 	void __iomem		*pcp_csr;
65 	spinlock_t		lock;
66 	struct dentry           *dfs;
67 
68 	struct list_head	mcus;
69 	struct list_head	pmds;
70 	struct list_head	l3s;
71 	struct list_head	socs;
72 
73 	struct mutex		mc_lock;
74 	int			mc_active_mask;
75 	int			mc_registered_mask;
76 };
77 
xgene_edac_pcp_rd(struct xgene_edac * edac,u32 reg,u32 * val)78 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
79 {
80 	*val = readl(edac->pcp_csr + reg);
81 }
82 
xgene_edac_pcp_clrbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)83 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
84 				   u32 bits_mask)
85 {
86 	u32 val;
87 
88 	spin_lock(&edac->lock);
89 	val = readl(edac->pcp_csr + reg);
90 	val &= ~bits_mask;
91 	writel(val, edac->pcp_csr + reg);
92 	spin_unlock(&edac->lock);
93 }
94 
xgene_edac_pcp_setbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)95 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
96 				   u32 bits_mask)
97 {
98 	u32 val;
99 
100 	spin_lock(&edac->lock);
101 	val = readl(edac->pcp_csr + reg);
102 	val |= bits_mask;
103 	writel(val, edac->pcp_csr + reg);
104 	spin_unlock(&edac->lock);
105 }
106 
107 /* Memory controller error CSR */
108 #define MCU_MAX_RANK			8
109 #define MCU_RANK_STRIDE			0x40
110 
111 #define MCUGECR				0x0110
112 #define  MCU_GECR_DEMANDUCINTREN_MASK	BIT(0)
113 #define  MCU_GECR_BACKUCINTREN_MASK	BIT(1)
114 #define  MCU_GECR_CINTREN_MASK		BIT(2)
115 #define  MUC_GECR_MCUADDRERREN_MASK	BIT(9)
116 #define MCUGESR				0x0114
117 #define  MCU_GESR_ADDRNOMATCH_ERR_MASK	BIT(7)
118 #define  MCU_GESR_ADDRMULTIMATCH_ERR_MASK	BIT(6)
119 #define  MCU_GESR_PHYP_ERR_MASK		BIT(3)
120 #define MCUESRR0			0x0314
121 #define  MCU_ESRR_MULTUCERR_MASK	BIT(3)
122 #define  MCU_ESRR_BACKUCERR_MASK	BIT(2)
123 #define  MCU_ESRR_DEMANDUCERR_MASK	BIT(1)
124 #define  MCU_ESRR_CERR_MASK		BIT(0)
125 #define MCUESRRA0			0x0318
126 #define MCUEBLRR0			0x031c
127 #define  MCU_EBLRR_ERRBANK_RD(src)	(((src) & 0x00000007) >> 0)
128 #define MCUERCRR0			0x0320
129 #define  MCU_ERCRR_ERRROW_RD(src)	(((src) & 0xFFFF0000) >> 16)
130 #define  MCU_ERCRR_ERRCOL_RD(src)	((src) & 0x00000FFF)
131 #define MCUSBECNT0			0x0324
132 #define MCU_SBECNT_COUNT(src)		((src) & 0xFFFF)
133 
134 #define CSW_CSWCR			0x0000
135 #define  CSW_CSWCR_DUALMCB_MASK		BIT(0)
136 
137 #define MCBADDRMR			0x0000
138 #define  MCBADDRMR_MCU_INTLV_MODE_MASK	BIT(3)
139 #define  MCBADDRMR_DUALMCU_MODE_MASK	BIT(2)
140 #define  MCBADDRMR_MCB_INTLV_MODE_MASK	BIT(1)
141 #define  MCBADDRMR_ADDRESS_MODE_MASK	BIT(0)
142 
143 struct xgene_edac_mc_ctx {
144 	struct list_head	next;
145 	char			*name;
146 	struct mem_ctl_info	*mci;
147 	struct xgene_edac	*edac;
148 	void __iomem		*mcu_csr;
149 	u32			mcu_id;
150 };
151 
xgene_edac_mc_err_inject_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)152 static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
153 					      const char __user *data,
154 					      size_t count, loff_t *ppos)
155 {
156 	struct mem_ctl_info *mci = file->private_data;
157 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
158 	int i;
159 
160 	for (i = 0; i < MCU_MAX_RANK; i++) {
161 		writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
162 		       MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
163 		       ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
164 	}
165 	return count;
166 }
167 
168 static const struct file_operations xgene_edac_mc_debug_inject_fops = {
169 	.open = simple_open,
170 	.write = xgene_edac_mc_err_inject_write,
171 	.llseek = generic_file_llseek,
172 };
173 
xgene_edac_mc_create_debugfs_node(struct mem_ctl_info * mci)174 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
175 {
176 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
177 		return;
178 
179 	if (!mci->debugfs)
180 		return;
181 
182 	edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
183 				 &xgene_edac_mc_debug_inject_fops);
184 }
185 
xgene_edac_mc_check(struct mem_ctl_info * mci)186 static void xgene_edac_mc_check(struct mem_ctl_info *mci)
187 {
188 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
189 	unsigned int pcp_hp_stat;
190 	unsigned int pcp_lp_stat;
191 	u32 reg;
192 	u32 rank;
193 	u32 bank;
194 	u32 count;
195 	u32 col_row;
196 
197 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
198 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
199 	if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
200 	      (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
201 	      (MCU_CORR_ERR_MASK & pcp_lp_stat)))
202 		return;
203 
204 	for (rank = 0; rank < MCU_MAX_RANK; rank++) {
205 		reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
206 
207 		/* Detect uncorrectable memory error */
208 		if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
209 			   MCU_ESRR_BACKUCERR_MASK)) {
210 			/* Detected uncorrectable memory error */
211 			edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
212 				"MCU uncorrectable error at rank %d\n", rank);
213 
214 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
215 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
216 		}
217 
218 		/* Detect correctable memory error */
219 		if (reg & MCU_ESRR_CERR_MASK) {
220 			bank = readl(ctx->mcu_csr + MCUEBLRR0 +
221 				     rank * MCU_RANK_STRIDE);
222 			col_row = readl(ctx->mcu_csr + MCUERCRR0 +
223 					rank * MCU_RANK_STRIDE);
224 			count = readl(ctx->mcu_csr + MCUSBECNT0 +
225 				      rank * MCU_RANK_STRIDE);
226 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
227 				"MCU correctable error at rank %d bank %d column %d row %d count %d\n",
228 				rank, MCU_EBLRR_ERRBANK_RD(bank),
229 				MCU_ERCRR_ERRCOL_RD(col_row),
230 				MCU_ERCRR_ERRROW_RD(col_row),
231 				MCU_SBECNT_COUNT(count));
232 
233 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
234 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
235 		}
236 
237 		/* Clear all error registers */
238 		writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
239 		writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
240 		writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
241 		       rank * MCU_RANK_STRIDE);
242 		writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
243 	}
244 
245 	/* Detect memory controller error */
246 	reg = readl(ctx->mcu_csr + MCUGESR);
247 	if (reg) {
248 		if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
249 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
250 				"MCU address miss-match error\n");
251 		if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
252 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
253 				"MCU address multi-match error\n");
254 
255 		writel(reg, ctx->mcu_csr + MCUGESR);
256 	}
257 }
258 
xgene_edac_mc_irq_ctl(struct mem_ctl_info * mci,bool enable)259 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
260 {
261 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
262 	unsigned int val;
263 
264 	if (edac_op_state != EDAC_OPSTATE_INT)
265 		return;
266 
267 	mutex_lock(&ctx->edac->mc_lock);
268 
269 	/*
270 	 * As there is only single bit for enable error and interrupt mask,
271 	 * we must only enable top level interrupt after all MCUs are
272 	 * registered. Otherwise, if there is an error and the corresponding
273 	 * MCU has not registered, the interrupt will never get cleared. To
274 	 * determine all MCU have registered, we will keep track of active
275 	 * MCUs and registered MCUs.
276 	 */
277 	if (enable) {
278 		/* Set registered MCU bit */
279 		ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
280 
281 		/* Enable interrupt after all active MCU registered */
282 		if (ctx->edac->mc_registered_mask ==
283 		    ctx->edac->mc_active_mask) {
284 			/* Enable memory controller top level interrupt */
285 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
286 					       MCU_UNCORR_ERR_MASK |
287 					       MCU_CTL_ERR_MASK);
288 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
289 					       MCU_CORR_ERR_MASK);
290 		}
291 
292 		/* Enable MCU interrupt and error reporting */
293 		val = readl(ctx->mcu_csr + MCUGECR);
294 		val |= MCU_GECR_DEMANDUCINTREN_MASK |
295 		       MCU_GECR_BACKUCINTREN_MASK |
296 		       MCU_GECR_CINTREN_MASK |
297 		       MUC_GECR_MCUADDRERREN_MASK;
298 		writel(val, ctx->mcu_csr + MCUGECR);
299 	} else {
300 		/* Disable MCU interrupt */
301 		val = readl(ctx->mcu_csr + MCUGECR);
302 		val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
303 			 MCU_GECR_BACKUCINTREN_MASK |
304 			 MCU_GECR_CINTREN_MASK |
305 			 MUC_GECR_MCUADDRERREN_MASK);
306 		writel(val, ctx->mcu_csr + MCUGECR);
307 
308 		/* Disable memory controller top level interrupt */
309 		xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
310 				       MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
311 		xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
312 				       MCU_CORR_ERR_MASK);
313 
314 		/* Clear registered MCU bit */
315 		ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
316 	}
317 
318 	mutex_unlock(&ctx->edac->mc_lock);
319 }
320 
xgene_edac_mc_is_active(struct xgene_edac_mc_ctx * ctx,int mc_idx)321 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
322 {
323 	unsigned int reg;
324 	u32 mcu_mask;
325 
326 	if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
327 		return 0;
328 
329 	if (reg & CSW_CSWCR_DUALMCB_MASK) {
330 		/*
331 		 * Dual MCB active - Determine if all 4 active or just MCU0
332 		 * and MCU2 active
333 		 */
334 		if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
335 			return 0;
336 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
337 	} else {
338 		/*
339 		 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
340 		 * active
341 		 */
342 		if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
343 			return 0;
344 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
345 	}
346 
347 	/* Save active MC mask if hasn't set already */
348 	if (!ctx->edac->mc_active_mask)
349 		ctx->edac->mc_active_mask = mcu_mask;
350 
351 	return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
352 }
353 
xgene_edac_mc_add(struct xgene_edac * edac,struct device_node * np)354 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
355 {
356 	struct mem_ctl_info *mci;
357 	struct edac_mc_layer layers[2];
358 	struct xgene_edac_mc_ctx tmp_ctx;
359 	struct xgene_edac_mc_ctx *ctx;
360 	struct resource res;
361 	int rc;
362 
363 	memset(&tmp_ctx, 0, sizeof(tmp_ctx));
364 	tmp_ctx.edac = edac;
365 
366 	if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
367 		return -ENOMEM;
368 
369 	rc = of_address_to_resource(np, 0, &res);
370 	if (rc < 0) {
371 		dev_err(edac->dev, "no MCU resource address\n");
372 		goto err_group;
373 	}
374 	tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
375 	if (IS_ERR(tmp_ctx.mcu_csr)) {
376 		dev_err(edac->dev, "unable to map MCU resource\n");
377 		rc = PTR_ERR(tmp_ctx.mcu_csr);
378 		goto err_group;
379 	}
380 
381 	/* Ignore non-active MCU */
382 	if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
383 		dev_err(edac->dev, "no memory-controller property\n");
384 		rc = -ENODEV;
385 		goto err_group;
386 	}
387 	if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
388 		rc = -ENODEV;
389 		goto err_group;
390 	}
391 
392 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
393 	layers[0].size = 4;
394 	layers[0].is_virt_csrow = true;
395 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
396 	layers[1].size = 2;
397 	layers[1].is_virt_csrow = false;
398 	mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
399 			    sizeof(*ctx));
400 	if (!mci) {
401 		rc = -ENOMEM;
402 		goto err_group;
403 	}
404 
405 	ctx = mci->pvt_info;
406 	*ctx = tmp_ctx;		/* Copy over resource value */
407 	ctx->name = "xgene_edac_mc_err";
408 	ctx->mci = mci;
409 	mci->pdev = &mci->dev;
410 	mci->ctl_name = ctx->name;
411 	mci->dev_name = ctx->name;
412 
413 	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
414 			 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
415 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
416 	mci->edac_cap = EDAC_FLAG_SECDED;
417 	mci->mod_name = EDAC_MOD_STR;
418 	mci->ctl_page_to_phys = NULL;
419 	mci->scrub_cap = SCRUB_FLAG_HW_SRC;
420 	mci->scrub_mode = SCRUB_HW_SRC;
421 
422 	if (edac_op_state == EDAC_OPSTATE_POLL)
423 		mci->edac_check = xgene_edac_mc_check;
424 
425 	if (edac_mc_add_mc(mci)) {
426 		dev_err(edac->dev, "edac_mc_add_mc failed\n");
427 		rc = -EINVAL;
428 		goto err_free;
429 	}
430 
431 	xgene_edac_mc_create_debugfs_node(mci);
432 
433 	list_add(&ctx->next, &edac->mcus);
434 
435 	xgene_edac_mc_irq_ctl(mci, true);
436 
437 	devres_remove_group(edac->dev, xgene_edac_mc_add);
438 
439 	dev_info(edac->dev, "X-Gene EDAC MC registered\n");
440 	return 0;
441 
442 err_free:
443 	edac_mc_free(mci);
444 err_group:
445 	devres_release_group(edac->dev, xgene_edac_mc_add);
446 	return rc;
447 }
448 
xgene_edac_mc_remove(struct xgene_edac_mc_ctx * mcu)449 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
450 {
451 	xgene_edac_mc_irq_ctl(mcu->mci, false);
452 	edac_mc_del_mc(&mcu->mci->dev);
453 	edac_mc_free(mcu->mci);
454 	return 0;
455 }
456 
457 /* CPU L1/L2 error CSR */
458 #define MAX_CPU_PER_PMD				2
459 #define CPU_CSR_STRIDE				0x00100000
460 #define CPU_L2C_PAGE				0x000D0000
461 #define CPU_MEMERR_L2C_PAGE			0x000E0000
462 #define CPU_MEMERR_CPU_PAGE			0x000F0000
463 
464 #define MEMERR_CPU_ICFECR_PAGE_OFFSET		0x0000
465 #define MEMERR_CPU_ICFESR_PAGE_OFFSET		0x0004
466 #define  MEMERR_CPU_ICFESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
467 #define  MEMERR_CPU_ICFESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
468 #define  MEMERR_CPU_ICFESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
469 #define  MEMERR_CPU_ICFESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
470 #define  MEMERR_CPU_ICFESR_MULTCERR_MASK	BIT(2)
471 #define  MEMERR_CPU_ICFESR_CERR_MASK		BIT(0)
472 #define MEMERR_CPU_LSUESR_PAGE_OFFSET		0x000c
473 #define  MEMERR_CPU_LSUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
474 #define  MEMERR_CPU_LSUESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
475 #define  MEMERR_CPU_LSUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
476 #define  MEMERR_CPU_LSUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
477 #define  MEMERR_CPU_LSUESR_MULTCERR_MASK	BIT(2)
478 #define  MEMERR_CPU_LSUESR_CERR_MASK		BIT(0)
479 #define MEMERR_CPU_LSUECR_PAGE_OFFSET		0x0008
480 #define MEMERR_CPU_MMUECR_PAGE_OFFSET		0x0010
481 #define MEMERR_CPU_MMUESR_PAGE_OFFSET		0x0014
482 #define  MEMERR_CPU_MMUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
483 #define  MEMERR_CPU_MMUESR_ERRINDEX_RD(src)	(((src) & 0x007F0000) >> 16)
484 #define  MEMERR_CPU_MMUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
485 #define  MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK	BIT(7)
486 #define  MEMERR_CPU_MMUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
487 #define  MEMERR_CPU_MMUESR_MULTCERR_MASK	BIT(2)
488 #define  MEMERR_CPU_MMUESR_CERR_MASK		BIT(0)
489 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET		0x0804
490 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET		0x080c
491 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET		0x0814
492 
493 #define MEMERR_L2C_L2ECR_PAGE_OFFSET		0x0000
494 #define MEMERR_L2C_L2ESR_PAGE_OFFSET		0x0004
495 #define  MEMERR_L2C_L2ESR_ERRSYN_RD(src)	(((src) & 0xFF000000) >> 24)
496 #define  MEMERR_L2C_L2ESR_ERRWAY_RD(src)	(((src) & 0x00FC0000) >> 18)
497 #define  MEMERR_L2C_L2ESR_ERRCPU_RD(src)	(((src) & 0x00020000) >> 17)
498 #define  MEMERR_L2C_L2ESR_ERRGROUP_RD(src)	(((src) & 0x0000E000) >> 13)
499 #define  MEMERR_L2C_L2ESR_ERRACTION_RD(src)	(((src) & 0x00001C00) >> 10)
500 #define  MEMERR_L2C_L2ESR_ERRTYPE_RD(src)	(((src) & 0x00000300) >> 8)
501 #define  MEMERR_L2C_L2ESR_MULTUCERR_MASK	BIT(3)
502 #define  MEMERR_L2C_L2ESR_MULTICERR_MASK	BIT(2)
503 #define  MEMERR_L2C_L2ESR_UCERR_MASK		BIT(1)
504 #define  MEMERR_L2C_L2ESR_ERR_MASK		BIT(0)
505 #define MEMERR_L2C_L2EALR_PAGE_OFFSET		0x0008
506 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET		0x0010
507 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET		0x000c
508 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET		0x0014
509 #define  MEMERR_L2C_L2RTOSR_MULTERR_MASK	BIT(1)
510 #define  MEMERR_L2C_L2RTOSR_ERR_MASK		BIT(0)
511 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET		0x0018
512 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET		0x001c
513 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET		0x0804
514 
515 /*
516  * Processor Module Domain (PMD) context - Context for a pair of processsors.
517  * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
518  * its own L1 cache.
519  */
520 struct xgene_edac_pmd_ctx {
521 	struct list_head	next;
522 	struct device		ddev;
523 	char			*name;
524 	struct xgene_edac	*edac;
525 	struct edac_device_ctl_info *edac_dev;
526 	void __iomem		*pmd_csr;
527 	u32			pmd;
528 	int			version;
529 };
530 
xgene_edac_pmd_l1_check(struct edac_device_ctl_info * edac_dev,int cpu_idx)531 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
532 				    int cpu_idx)
533 {
534 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
535 	void __iomem *pg_f;
536 	u32 val;
537 
538 	pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
539 
540 	val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
541 	if (!val)
542 		goto chk_lsu;
543 	dev_err(edac_dev->dev,
544 		"CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
545 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
546 		MEMERR_CPU_ICFESR_ERRWAY_RD(val),
547 		MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
548 		MEMERR_CPU_ICFESR_ERRINFO_RD(val));
549 	if (val & MEMERR_CPU_ICFESR_CERR_MASK)
550 		dev_err(edac_dev->dev, "One or more correctable error\n");
551 	if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
552 		dev_err(edac_dev->dev, "Multiple correctable error\n");
553 	switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
554 	case 1:
555 		dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
556 		break;
557 	case 2:
558 		dev_err(edac_dev->dev, "Way select multiple hit\n");
559 		break;
560 	case 3:
561 		dev_err(edac_dev->dev, "Physical tag parity error\n");
562 		break;
563 	case 4:
564 	case 5:
565 		dev_err(edac_dev->dev, "L1 data parity error\n");
566 		break;
567 	case 6:
568 		dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
569 		break;
570 	}
571 
572 	/* Clear any HW errors */
573 	writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
574 
575 	if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
576 		   MEMERR_CPU_ICFESR_MULTCERR_MASK))
577 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
578 
579 chk_lsu:
580 	val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
581 	if (!val)
582 		goto chk_mmu;
583 	dev_err(edac_dev->dev,
584 		"CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
585 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
586 		MEMERR_CPU_LSUESR_ERRWAY_RD(val),
587 		MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
588 		MEMERR_CPU_LSUESR_ERRINFO_RD(val));
589 	if (val & MEMERR_CPU_LSUESR_CERR_MASK)
590 		dev_err(edac_dev->dev, "One or more correctable error\n");
591 	if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
592 		dev_err(edac_dev->dev, "Multiple correctable error\n");
593 	switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
594 	case 0:
595 		dev_err(edac_dev->dev, "Load tag error\n");
596 		break;
597 	case 1:
598 		dev_err(edac_dev->dev, "Load data error\n");
599 		break;
600 	case 2:
601 		dev_err(edac_dev->dev, "WSL multihit error\n");
602 		break;
603 	case 3:
604 		dev_err(edac_dev->dev, "Store tag error\n");
605 		break;
606 	case 4:
607 		dev_err(edac_dev->dev,
608 			"DTB multihit from load pipeline error\n");
609 		break;
610 	case 5:
611 		dev_err(edac_dev->dev,
612 			"DTB multihit from store pipeline error\n");
613 		break;
614 	}
615 
616 	/* Clear any HW errors */
617 	writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
618 
619 	if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
620 		   MEMERR_CPU_LSUESR_MULTCERR_MASK))
621 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
622 
623 chk_mmu:
624 	val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
625 	if (!val)
626 		return;
627 	dev_err(edac_dev->dev,
628 		"CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
629 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
630 		MEMERR_CPU_MMUESR_ERRWAY_RD(val),
631 		MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
632 		MEMERR_CPU_MMUESR_ERRINFO_RD(val),
633 		val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
634 	if (val & MEMERR_CPU_MMUESR_CERR_MASK)
635 		dev_err(edac_dev->dev, "One or more correctable error\n");
636 	if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
637 		dev_err(edac_dev->dev, "Multiple correctable error\n");
638 	switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
639 	case 0:
640 		dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
641 		break;
642 	case 1:
643 		dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
644 		break;
645 	case 2:
646 		dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
647 		break;
648 	case 3:
649 		dev_err(edac_dev->dev, "TMO operation single bank error\n");
650 		break;
651 	case 4:
652 		dev_err(edac_dev->dev, "Stage 2 UTB error\n");
653 		break;
654 	case 5:
655 		dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
656 		break;
657 	case 6:
658 		dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
659 		break;
660 	case 7:
661 		dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
662 		break;
663 	}
664 
665 	/* Clear any HW errors */
666 	writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
667 
668 	edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
669 }
670 
xgene_edac_pmd_l2_check(struct edac_device_ctl_info * edac_dev)671 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
672 {
673 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
674 	void __iomem *pg_d;
675 	void __iomem *pg_e;
676 	u32 val_hi;
677 	u32 val_lo;
678 	u32 val;
679 
680 	/* Check L2 */
681 	pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
682 	val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
683 	if (!val)
684 		goto chk_l2c;
685 	val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
686 	val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
687 	dev_err(edac_dev->dev,
688 		"PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
689 		ctx->pmd, val, val_hi, val_lo);
690 	dev_err(edac_dev->dev,
691 		"ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
692 		MEMERR_L2C_L2ESR_ERRSYN_RD(val),
693 		MEMERR_L2C_L2ESR_ERRWAY_RD(val),
694 		MEMERR_L2C_L2ESR_ERRCPU_RD(val),
695 		MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
696 		MEMERR_L2C_L2ESR_ERRACTION_RD(val));
697 
698 	if (val & MEMERR_L2C_L2ESR_ERR_MASK)
699 		dev_err(edac_dev->dev, "One or more correctable error\n");
700 	if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
701 		dev_err(edac_dev->dev, "Multiple correctable error\n");
702 	if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
703 		dev_err(edac_dev->dev, "One or more uncorrectable error\n");
704 	if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
705 		dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
706 
707 	switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
708 	case 0:
709 		dev_err(edac_dev->dev, "Outbound SDB parity error\n");
710 		break;
711 	case 1:
712 		dev_err(edac_dev->dev, "Inbound SDB parity error\n");
713 		break;
714 	case 2:
715 		dev_err(edac_dev->dev, "Tag ECC error\n");
716 		break;
717 	case 3:
718 		dev_err(edac_dev->dev, "Data ECC error\n");
719 		break;
720 	}
721 
722 	/* Clear any HW errors */
723 	writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
724 
725 	if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
726 		   MEMERR_L2C_L2ESR_MULTICERR_MASK))
727 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
728 	if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
729 		   MEMERR_L2C_L2ESR_MULTUCERR_MASK))
730 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
731 
732 chk_l2c:
733 	/* Check if any memory request timed out on L2 cache */
734 	pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
735 	val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
736 	if (val) {
737 		val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
738 		val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
739 		dev_err(edac_dev->dev,
740 			"PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
741 			ctx->pmd, val, val_hi, val_lo);
742 		writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
743 	}
744 }
745 
xgene_edac_pmd_check(struct edac_device_ctl_info * edac_dev)746 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
747 {
748 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
749 	unsigned int pcp_hp_stat;
750 	int i;
751 
752 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
753 	if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
754 		return;
755 
756 	/* Check CPU L1 error */
757 	for (i = 0; i < MAX_CPU_PER_PMD; i++)
758 		xgene_edac_pmd_l1_check(edac_dev, i);
759 
760 	/* Check CPU L2 error */
761 	xgene_edac_pmd_l2_check(edac_dev);
762 }
763 
xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info * edac_dev,int cpu)764 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
765 				      int cpu)
766 {
767 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
768 	void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
769 			     CPU_MEMERR_CPU_PAGE;
770 
771 	/*
772 	 * Enable CPU memory error:
773 	 *  MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
774 	 */
775 	writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
776 	writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
777 	writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
778 }
779 
xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info * edac_dev)780 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
781 {
782 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
783 	void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
784 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
785 
786 	/* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
787 	writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
788 	/* Configure L2C HW request time out feature if supported */
789 	if (ctx->version > 1)
790 		writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
791 }
792 
xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info * edac_dev,bool enable)793 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
794 				  bool enable)
795 {
796 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
797 	int i;
798 
799 	/* Enable PMD error interrupt */
800 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
801 		if (enable)
802 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
803 					       PMD0_MERR_MASK << ctx->pmd);
804 		else
805 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
806 					       PMD0_MERR_MASK << ctx->pmd);
807 	}
808 
809 	if (enable) {
810 		xgene_edac_pmd_hw_cfg(edac_dev);
811 
812 		/* Two CPUs per a PMD */
813 		for (i = 0; i < MAX_CPU_PER_PMD; i++)
814 			xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
815 	}
816 }
817 
xgene_edac_pmd_l1_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)818 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
819 						   const char __user *data,
820 						   size_t count, loff_t *ppos)
821 {
822 	struct edac_device_ctl_info *edac_dev = file->private_data;
823 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
824 	void __iomem *cpux_pg_f;
825 	int i;
826 
827 	for (i = 0; i < MAX_CPU_PER_PMD; i++) {
828 		cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
829 			    CPU_MEMERR_CPU_PAGE;
830 
831 		writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
832 		       MEMERR_CPU_ICFESR_CERR_MASK,
833 		       cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
834 		writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
835 		       MEMERR_CPU_LSUESR_CERR_MASK,
836 		       cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
837 		writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
838 		       MEMERR_CPU_MMUESR_CERR_MASK,
839 		       cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
840 	}
841 	return count;
842 }
843 
xgene_edac_pmd_l2_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)844 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
845 						   const char __user *data,
846 						   size_t count, loff_t *ppos)
847 {
848 	struct edac_device_ctl_info *edac_dev = file->private_data;
849 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
850 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
851 
852 	writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
853 	       MEMERR_L2C_L2ESR_MULTICERR_MASK |
854 	       MEMERR_L2C_L2ESR_UCERR_MASK |
855 	       MEMERR_L2C_L2ESR_ERR_MASK,
856 	       pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
857 	return count;
858 }
859 
860 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
861 	{
862 	.open = simple_open,
863 	.write = xgene_edac_pmd_l1_inject_ctrl_write,
864 	.llseek = generic_file_llseek, },
865 	{
866 	.open = simple_open,
867 	.write = xgene_edac_pmd_l2_inject_ctrl_write,
868 	.llseek = generic_file_llseek, },
869 	{ }
870 };
871 
872 static void
xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)873 xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
874 {
875 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
876 	struct dentry *dbgfs_dir;
877 	char name[10];
878 
879 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
880 		return;
881 
882 	snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
883 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
884 	if (!dbgfs_dir)
885 		return;
886 
887 	edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
888 				 &xgene_edac_pmd_debug_inject_fops[0]);
889 	edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
890 				 &xgene_edac_pmd_debug_inject_fops[1]);
891 }
892 
xgene_edac_pmd_available(u32 efuse,int pmd)893 static int xgene_edac_pmd_available(u32 efuse, int pmd)
894 {
895 	return (efuse & (1 << pmd)) ? 0 : 1;
896 }
897 
xgene_edac_pmd_add(struct xgene_edac * edac,struct device_node * np,int version)898 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
899 			      int version)
900 {
901 	struct edac_device_ctl_info *edac_dev;
902 	struct xgene_edac_pmd_ctx *ctx;
903 	struct resource res;
904 	char edac_name[10];
905 	u32 pmd;
906 	int rc;
907 	u32 val;
908 
909 	if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
910 		return -ENOMEM;
911 
912 	/* Determine if this PMD is disabled */
913 	if (of_property_read_u32(np, "pmd-controller", &pmd)) {
914 		dev_err(edac->dev, "no pmd-controller property\n");
915 		rc = -ENODEV;
916 		goto err_group;
917 	}
918 	rc = regmap_read(edac->efuse_map, 0, &val);
919 	if (rc)
920 		goto err_group;
921 	if (!xgene_edac_pmd_available(val, pmd)) {
922 		rc = -ENODEV;
923 		goto err_group;
924 	}
925 
926 	snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
927 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
928 					      edac_name, 1, "l2c", 1, 2, NULL,
929 					      0, edac_device_alloc_index());
930 	if (!edac_dev) {
931 		rc = -ENOMEM;
932 		goto err_group;
933 	}
934 
935 	ctx = edac_dev->pvt_info;
936 	ctx->name = "xgene_pmd_err";
937 	ctx->pmd = pmd;
938 	ctx->edac = edac;
939 	ctx->edac_dev = edac_dev;
940 	ctx->ddev = *edac->dev;
941 	ctx->version = version;
942 	edac_dev->dev = &ctx->ddev;
943 	edac_dev->ctl_name = ctx->name;
944 	edac_dev->dev_name = ctx->name;
945 	edac_dev->mod_name = EDAC_MOD_STR;
946 
947 	rc = of_address_to_resource(np, 0, &res);
948 	if (rc < 0) {
949 		dev_err(edac->dev, "no PMD resource address\n");
950 		goto err_free;
951 	}
952 	ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
953 	if (IS_ERR(ctx->pmd_csr)) {
954 		dev_err(edac->dev,
955 			"devm_ioremap_resource failed for PMD resource address\n");
956 		rc = PTR_ERR(ctx->pmd_csr);
957 		goto err_free;
958 	}
959 
960 	if (edac_op_state == EDAC_OPSTATE_POLL)
961 		edac_dev->edac_check = xgene_edac_pmd_check;
962 
963 	xgene_edac_pmd_create_debugfs_nodes(edac_dev);
964 
965 	rc = edac_device_add_device(edac_dev);
966 	if (rc > 0) {
967 		dev_err(edac->dev, "edac_device_add_device failed\n");
968 		rc = -ENOMEM;
969 		goto err_free;
970 	}
971 
972 	if (edac_op_state == EDAC_OPSTATE_INT)
973 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
974 
975 	list_add(&ctx->next, &edac->pmds);
976 
977 	xgene_edac_pmd_hw_ctl(edac_dev, 1);
978 
979 	devres_remove_group(edac->dev, xgene_edac_pmd_add);
980 
981 	dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
982 	return 0;
983 
984 err_free:
985 	edac_device_free_ctl_info(edac_dev);
986 err_group:
987 	devres_release_group(edac->dev, xgene_edac_pmd_add);
988 	return rc;
989 }
990 
xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx * pmd)991 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
992 {
993 	struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
994 
995 	xgene_edac_pmd_hw_ctl(edac_dev, 0);
996 	edac_device_del_device(edac_dev->dev);
997 	edac_device_free_ctl_info(edac_dev);
998 	return 0;
999 }
1000 
1001 /* L3 Error device */
1002 #define L3C_ESR				(0x0A * 4)
1003 #define  L3C_ESR_DATATAG_MASK		BIT(9)
1004 #define  L3C_ESR_MULTIHIT_MASK		BIT(8)
1005 #define  L3C_ESR_UCEVICT_MASK		BIT(6)
1006 #define  L3C_ESR_MULTIUCERR_MASK	BIT(5)
1007 #define  L3C_ESR_MULTICERR_MASK		BIT(4)
1008 #define  L3C_ESR_UCERR_MASK		BIT(3)
1009 #define  L3C_ESR_CERR_MASK		BIT(2)
1010 #define  L3C_ESR_UCERRINTR_MASK		BIT(1)
1011 #define  L3C_ESR_CERRINTR_MASK		BIT(0)
1012 #define L3C_ECR				(0x0B * 4)
1013 #define  L3C_ECR_UCINTREN		BIT(3)
1014 #define  L3C_ECR_CINTREN		BIT(2)
1015 #define  L3C_UCERREN			BIT(1)
1016 #define  L3C_CERREN			BIT(0)
1017 #define L3C_ELR				(0x0C * 4)
1018 #define  L3C_ELR_ERRSYN(src)		((src & 0xFF800000) >> 23)
1019 #define  L3C_ELR_ERRWAY(src)		((src & 0x007E0000) >> 17)
1020 #define  L3C_ELR_AGENTID(src)		((src & 0x0001E000) >> 13)
1021 #define  L3C_ELR_ERRGRP(src)		((src & 0x00000F00) >> 8)
1022 #define  L3C_ELR_OPTYPE(src)		((src & 0x000000F0) >> 4)
1023 #define  L3C_ELR_PADDRHIGH(src)		(src & 0x0000000F)
1024 #define L3C_AELR			(0x0D * 4)
1025 #define L3C_BELR			(0x0E * 4)
1026 #define  L3C_BELR_BANK(src)		(src & 0x0000000F)
1027 
1028 struct xgene_edac_dev_ctx {
1029 	struct list_head	next;
1030 	struct device		ddev;
1031 	char			*name;
1032 	struct xgene_edac	*edac;
1033 	struct edac_device_ctl_info *edac_dev;
1034 	int			edac_idx;
1035 	void __iomem		*dev_csr;
1036 	int			version;
1037 };
1038 
1039 /*
1040  * Version 1 of the L3 controller has broken single bit correctable logic for
1041  * certain error syndromes. Log them as uncorrectable in that case.
1042  */
xgene_edac_l3_promote_to_uc_err(u32 l3cesr,u32 l3celr)1043 static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1044 {
1045 	if (l3cesr & L3C_ESR_DATATAG_MASK) {
1046 		switch (L3C_ELR_ERRSYN(l3celr)) {
1047 		case 0x13C:
1048 		case 0x0B4:
1049 		case 0x007:
1050 		case 0x00D:
1051 		case 0x00E:
1052 		case 0x019:
1053 		case 0x01A:
1054 		case 0x01C:
1055 		case 0x04E:
1056 		case 0x041:
1057 			return true;
1058 		}
1059 	} else if (L3C_ELR_ERRWAY(l3celr) == 9)
1060 		return true;
1061 
1062 	return false;
1063 }
1064 
xgene_edac_l3_check(struct edac_device_ctl_info * edac_dev)1065 static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1066 {
1067 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1068 	u32 l3cesr;
1069 	u32 l3celr;
1070 	u32 l3caelr;
1071 	u32 l3cbelr;
1072 
1073 	l3cesr = readl(ctx->dev_csr + L3C_ESR);
1074 	if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1075 		return;
1076 
1077 	if (l3cesr & L3C_ESR_UCERR_MASK)
1078 		dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1079 	if (l3cesr & L3C_ESR_CERR_MASK)
1080 		dev_warn(edac_dev->dev, "L3C correctable error\n");
1081 
1082 	l3celr = readl(ctx->dev_csr + L3C_ELR);
1083 	l3caelr = readl(ctx->dev_csr + L3C_AELR);
1084 	l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1085 	if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1086 		dev_err(edac_dev->dev, "L3C multiple hit error\n");
1087 	if (l3cesr & L3C_ESR_UCEVICT_MASK)
1088 		dev_err(edac_dev->dev,
1089 			"L3C dropped eviction of line with error\n");
1090 	if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1091 		dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1092 	if (l3cesr & L3C_ESR_DATATAG_MASK)
1093 		dev_err(edac_dev->dev,
1094 			"L3C data error syndrome 0x%X group 0x%X\n",
1095 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1096 	else
1097 		dev_err(edac_dev->dev,
1098 			"L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1099 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1100 			L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1101 	/*
1102 	 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1103 	 *       Address [37:6] in l3caelr. Lower 6 bits are zero.
1104 	 */
1105 	dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1106 		L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1107 		(l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1108 	dev_err(edac_dev->dev,
1109 		"L3C error status register value 0x%X\n", l3cesr);
1110 
1111 	/* Clear L3C error interrupt */
1112 	writel(0, ctx->dev_csr + L3C_ESR);
1113 
1114 	if (ctx->version <= 1 &&
1115 	    xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1116 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1117 		return;
1118 	}
1119 	if (l3cesr & L3C_ESR_CERR_MASK)
1120 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1121 	if (l3cesr & L3C_ESR_UCERR_MASK)
1122 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1123 }
1124 
xgene_edac_l3_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1125 static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1126 				  bool enable)
1127 {
1128 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1129 	u32 val;
1130 
1131 	val = readl(ctx->dev_csr + L3C_ECR);
1132 	val |= L3C_UCERREN | L3C_CERREN;
1133 	/* On disable, we just disable interrupt but keep error enabled */
1134 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1135 		if (enable)
1136 			val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1137 		else
1138 			val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1139 	}
1140 	writel(val, ctx->dev_csr + L3C_ECR);
1141 
1142 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1143 		/* Enable/disable L3 error top level interrupt */
1144 		if (enable) {
1145 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1146 					       L3C_UNCORR_ERR_MASK);
1147 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1148 					       L3C_CORR_ERR_MASK);
1149 		} else {
1150 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1151 					       L3C_UNCORR_ERR_MASK);
1152 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1153 					       L3C_CORR_ERR_MASK);
1154 		}
1155 	}
1156 }
1157 
xgene_edac_l3_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)1158 static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1159 					       const char __user *data,
1160 					       size_t count, loff_t *ppos)
1161 {
1162 	struct edac_device_ctl_info *edac_dev = file->private_data;
1163 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1164 
1165 	/* Generate all errors */
1166 	writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1167 	return count;
1168 }
1169 
1170 static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1171 	.open = simple_open,
1172 	.write = xgene_edac_l3_inject_ctrl_write,
1173 	.llseek = generic_file_llseek
1174 };
1175 
1176 static void
xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)1177 xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1178 {
1179 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1180 	struct dentry *dbgfs_dir;
1181 	char name[10];
1182 
1183 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1184 		return;
1185 
1186 	snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1187 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1188 	if (!dbgfs_dir)
1189 		return;
1190 
1191 	debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1192 			    &xgene_edac_l3_debug_inject_fops);
1193 }
1194 
xgene_edac_l3_add(struct xgene_edac * edac,struct device_node * np,int version)1195 static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1196 			     int version)
1197 {
1198 	struct edac_device_ctl_info *edac_dev;
1199 	struct xgene_edac_dev_ctx *ctx;
1200 	struct resource res;
1201 	void __iomem *dev_csr;
1202 	int edac_idx;
1203 	int rc = 0;
1204 
1205 	if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1206 		return -ENOMEM;
1207 
1208 	rc = of_address_to_resource(np, 0, &res);
1209 	if (rc < 0) {
1210 		dev_err(edac->dev, "no L3 resource address\n");
1211 		goto err_release_group;
1212 	}
1213 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1214 	if (IS_ERR(dev_csr)) {
1215 		dev_err(edac->dev,
1216 			"devm_ioremap_resource failed for L3 resource address\n");
1217 		rc = PTR_ERR(dev_csr);
1218 		goto err_release_group;
1219 	}
1220 
1221 	edac_idx = edac_device_alloc_index();
1222 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1223 					      "l3c", 1, "l3c", 1, 0, NULL, 0,
1224 					      edac_idx);
1225 	if (!edac_dev) {
1226 		rc = -ENOMEM;
1227 		goto err_release_group;
1228 	}
1229 
1230 	ctx = edac_dev->pvt_info;
1231 	ctx->dev_csr = dev_csr;
1232 	ctx->name = "xgene_l3_err";
1233 	ctx->edac_idx = edac_idx;
1234 	ctx->edac = edac;
1235 	ctx->edac_dev = edac_dev;
1236 	ctx->ddev = *edac->dev;
1237 	ctx->version = version;
1238 	edac_dev->dev = &ctx->ddev;
1239 	edac_dev->ctl_name = ctx->name;
1240 	edac_dev->dev_name = ctx->name;
1241 	edac_dev->mod_name = EDAC_MOD_STR;
1242 
1243 	if (edac_op_state == EDAC_OPSTATE_POLL)
1244 		edac_dev->edac_check = xgene_edac_l3_check;
1245 
1246 	xgene_edac_l3_create_debugfs_nodes(edac_dev);
1247 
1248 	rc = edac_device_add_device(edac_dev);
1249 	if (rc > 0) {
1250 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1251 		rc = -ENOMEM;
1252 		goto err_ctl_free;
1253 	}
1254 
1255 	if (edac_op_state == EDAC_OPSTATE_INT)
1256 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1257 
1258 	list_add(&ctx->next, &edac->l3s);
1259 
1260 	xgene_edac_l3_hw_init(edac_dev, 1);
1261 
1262 	devres_remove_group(edac->dev, xgene_edac_l3_add);
1263 
1264 	dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1265 	return 0;
1266 
1267 err_ctl_free:
1268 	edac_device_free_ctl_info(edac_dev);
1269 err_release_group:
1270 	devres_release_group(edac->dev, xgene_edac_l3_add);
1271 	return rc;
1272 }
1273 
xgene_edac_l3_remove(struct xgene_edac_dev_ctx * l3)1274 static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1275 {
1276 	struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1277 
1278 	xgene_edac_l3_hw_init(edac_dev, 0);
1279 	edac_device_del_device(l3->edac->dev);
1280 	edac_device_free_ctl_info(edac_dev);
1281 	return 0;
1282 }
1283 
1284 /* SoC error device */
1285 #define IOBAXIS0TRANSERRINTSTS		0x0000
1286 #define  IOBAXIS0_M_ILLEGAL_ACCESS_MASK	BIT(1)
1287 #define  IOBAXIS0_ILLEGAL_ACCESS_MASK	BIT(0)
1288 #define IOBAXIS0TRANSERRINTMSK		0x0004
1289 #define IOBAXIS0TRANSERRREQINFOL	0x0008
1290 #define IOBAXIS0TRANSERRREQINFOH	0x000c
1291 #define  REQTYPE_RD(src)		(((src) & BIT(0)))
1292 #define  ERRADDRH_RD(src)		(((src) & 0xffc00000) >> 22)
1293 #define IOBAXIS1TRANSERRINTSTS		0x0010
1294 #define IOBAXIS1TRANSERRINTMSK		0x0014
1295 #define IOBAXIS1TRANSERRREQINFOL	0x0018
1296 #define IOBAXIS1TRANSERRREQINFOH	0x001c
1297 #define IOBPATRANSERRINTSTS		0x0020
1298 #define  IOBPA_M_REQIDRAM_CORRUPT_MASK	BIT(7)
1299 #define  IOBPA_REQIDRAM_CORRUPT_MASK	BIT(6)
1300 #define  IOBPA_M_TRANS_CORRUPT_MASK	BIT(5)
1301 #define  IOBPA_TRANS_CORRUPT_MASK	BIT(4)
1302 #define  IOBPA_M_WDATA_CORRUPT_MASK	BIT(3)
1303 #define  IOBPA_WDATA_CORRUPT_MASK	BIT(2)
1304 #define  IOBPA_M_RDATA_CORRUPT_MASK	BIT(1)
1305 #define  IOBPA_RDATA_CORRUPT_MASK	BIT(0)
1306 #define IOBBATRANSERRINTSTS		0x0030
1307 #define  M_ILLEGAL_ACCESS_MASK		BIT(15)
1308 #define  ILLEGAL_ACCESS_MASK		BIT(14)
1309 #define  M_WIDRAM_CORRUPT_MASK		BIT(13)
1310 #define  WIDRAM_CORRUPT_MASK		BIT(12)
1311 #define  M_RIDRAM_CORRUPT_MASK		BIT(11)
1312 #define  RIDRAM_CORRUPT_MASK		BIT(10)
1313 #define  M_TRANS_CORRUPT_MASK		BIT(9)
1314 #define  TRANS_CORRUPT_MASK		BIT(8)
1315 #define  M_WDATA_CORRUPT_MASK		BIT(7)
1316 #define  WDATA_CORRUPT_MASK		BIT(6)
1317 #define  M_RBM_POISONED_REQ_MASK	BIT(5)
1318 #define  RBM_POISONED_REQ_MASK		BIT(4)
1319 #define  M_XGIC_POISONED_REQ_MASK	BIT(3)
1320 #define  XGIC_POISONED_REQ_MASK		BIT(2)
1321 #define  M_WRERR_RESP_MASK		BIT(1)
1322 #define  WRERR_RESP_MASK		BIT(0)
1323 #define IOBBATRANSERRREQINFOL		0x0038
1324 #define IOBBATRANSERRREQINFOH		0x003c
1325 #define  REQTYPE_F2_RD(src)		((src) & BIT(0))
1326 #define  ERRADDRH_F2_RD(src)		(((src) & 0xffc00000) >> 22)
1327 #define IOBBATRANSERRCSWREQID		0x0040
1328 #define XGICTRANSERRINTSTS		0x0050
1329 #define  M_WR_ACCESS_ERR_MASK		BIT(3)
1330 #define  WR_ACCESS_ERR_MASK		BIT(2)
1331 #define  M_RD_ACCESS_ERR_MASK		BIT(1)
1332 #define  RD_ACCESS_ERR_MASK		BIT(0)
1333 #define XGICTRANSERRINTMSK		0x0054
1334 #define XGICTRANSERRREQINFO		0x0058
1335 #define  REQTYPE_MASK			BIT(26)
1336 #define  ERRADDR_RD(src)		((src) & 0x03ffffff)
1337 #define GLBL_ERR_STS			0x0800
1338 #define  MDED_ERR_MASK			BIT(3)
1339 #define  DED_ERR_MASK			BIT(2)
1340 #define  MSEC_ERR_MASK			BIT(1)
1341 #define  SEC_ERR_MASK			BIT(0)
1342 #define GLBL_SEC_ERRL			0x0810
1343 #define GLBL_SEC_ERRH			0x0818
1344 #define GLBL_MSEC_ERRL			0x0820
1345 #define GLBL_MSEC_ERRH			0x0828
1346 #define GLBL_DED_ERRL			0x0830
1347 #define GLBL_DED_ERRLMASK		0x0834
1348 #define GLBL_DED_ERRH			0x0838
1349 #define GLBL_DED_ERRHMASK		0x083c
1350 #define GLBL_MDED_ERRL			0x0840
1351 #define GLBL_MDED_ERRLMASK		0x0844
1352 #define GLBL_MDED_ERRH			0x0848
1353 #define GLBL_MDED_ERRHMASK		0x084c
1354 
1355 /* IO Bus Registers */
1356 #define RBCSR				0x0000
1357 #define STICKYERR_MASK			BIT(0)
1358 #define RBEIR				0x0008
1359 #define AGENT_OFFLINE_ERR_MASK		BIT(30)
1360 #define UNIMPL_RBPAGE_ERR_MASK		BIT(29)
1361 #define WORD_ALIGNED_ERR_MASK		BIT(28)
1362 #define PAGE_ACCESS_ERR_MASK		BIT(27)
1363 #define WRITE_ACCESS_MASK		BIT(26)
1364 #define RBERRADDR_RD(src)		((src) & 0x03FFFFFF)
1365 
1366 static const char * const soc_mem_err_v1[] = {
1367 	"10GbE0",
1368 	"10GbE1",
1369 	"Security",
1370 	"SATA45",
1371 	"SATA23/ETH23",
1372 	"SATA01/ETH01",
1373 	"USB1",
1374 	"USB0",
1375 	"QML",
1376 	"QM0",
1377 	"QM1 (XGbE01)",
1378 	"PCIE4",
1379 	"PCIE3",
1380 	"PCIE2",
1381 	"PCIE1",
1382 	"PCIE0",
1383 	"CTX Manager",
1384 	"OCM",
1385 	"1GbE",
1386 	"CLE",
1387 	"AHBC",
1388 	"PktDMA",
1389 	"GFC",
1390 	"MSLIM",
1391 	"10GbE2",
1392 	"10GbE3",
1393 	"QM2 (XGbE23)",
1394 	"IOB",
1395 	"unknown",
1396 	"unknown",
1397 	"unknown",
1398 	"unknown",
1399 };
1400 
xgene_edac_iob_gic_report(struct edac_device_ctl_info * edac_dev)1401 static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1402 {
1403 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1404 	u32 err_addr_lo;
1405 	u32 err_addr_hi;
1406 	u32 reg;
1407 	u32 info;
1408 
1409 	/* GIC transaction error interrupt */
1410 	reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1411 	if (!reg)
1412 		goto chk_iob_err;
1413 	dev_err(edac_dev->dev, "XGIC transaction error\n");
1414 	if (reg & RD_ACCESS_ERR_MASK)
1415 		dev_err(edac_dev->dev, "XGIC read size error\n");
1416 	if (reg & M_RD_ACCESS_ERR_MASK)
1417 		dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1418 	if (reg & WR_ACCESS_ERR_MASK)
1419 		dev_err(edac_dev->dev, "XGIC write size error\n");
1420 	if (reg & M_WR_ACCESS_ERR_MASK)
1421 		dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1422 	info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1423 	dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1424 		info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1425 		info);
1426 	writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1427 
1428 chk_iob_err:
1429 	/* IOB memory error */
1430 	reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1431 	if (!reg)
1432 		return;
1433 	if (reg & SEC_ERR_MASK) {
1434 		err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1435 		err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1436 		dev_err(edac_dev->dev,
1437 			"IOB single-bit correctable memory at 0x%08X.%08X error\n",
1438 			err_addr_lo, err_addr_hi);
1439 		writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1440 		writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1441 	}
1442 	if (reg & MSEC_ERR_MASK) {
1443 		err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1444 		err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1445 		dev_err(edac_dev->dev,
1446 			"IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1447 			err_addr_lo, err_addr_hi);
1448 		writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1449 		writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1450 	}
1451 	if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1452 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1453 
1454 	if (reg & DED_ERR_MASK) {
1455 		err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1456 		err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1457 		dev_err(edac_dev->dev,
1458 			"IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1459 			err_addr_lo, err_addr_hi);
1460 		writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1461 		writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1462 	}
1463 	if (reg & MDED_ERR_MASK) {
1464 		err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1465 		err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1466 		dev_err(edac_dev->dev,
1467 			"Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1468 			err_addr_lo, err_addr_hi);
1469 		writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1470 		writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1471 	}
1472 	if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1473 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1474 }
1475 
xgene_edac_rb_report(struct edac_device_ctl_info * edac_dev)1476 static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1477 {
1478 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1479 	u32 err_addr_lo;
1480 	u32 err_addr_hi;
1481 	u32 reg;
1482 
1483 	/* If the register bus resource isn't available, just skip it */
1484 	if (!ctx->edac->rb_map)
1485 		goto rb_skip;
1486 
1487 	/*
1488 	 * Check RB access errors
1489 	 * 1. Out of range
1490 	 * 2. Un-implemented page
1491 	 * 3. Un-aligned access
1492 	 * 4. Offline slave IP
1493 	 */
1494 	if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1495 		return;
1496 	if (reg & STICKYERR_MASK) {
1497 		bool write;
1498 		u32 address;
1499 
1500 		dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1501 		if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1502 			return;
1503 		write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1504 		address = RBERRADDR_RD(reg);
1505 		if (reg & AGENT_OFFLINE_ERR_MASK)
1506 			dev_err(edac_dev->dev,
1507 				"IOB bus %s access to offline agent error\n",
1508 				write ? "write" : "read");
1509 		if (reg & UNIMPL_RBPAGE_ERR_MASK)
1510 			dev_err(edac_dev->dev,
1511 				"IOB bus %s access to unimplemented page error\n",
1512 				write ? "write" : "read");
1513 		if (reg & WORD_ALIGNED_ERR_MASK)
1514 			dev_err(edac_dev->dev,
1515 				"IOB bus %s word aligned access error\n",
1516 				write ? "write" : "read");
1517 		if (reg & PAGE_ACCESS_ERR_MASK)
1518 			dev_err(edac_dev->dev,
1519 				"IOB bus %s to page out of range access error\n",
1520 				write ? "write" : "read");
1521 		if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1522 			return;
1523 		if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1524 			return;
1525 	}
1526 rb_skip:
1527 
1528 	/* IOB Bridge agent transaction error interrupt */
1529 	reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1530 	if (!reg)
1531 		return;
1532 
1533 	dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1534 	if (reg & WRERR_RESP_MASK)
1535 		dev_err(edac_dev->dev, "IOB BA write response error\n");
1536 	if (reg & M_WRERR_RESP_MASK)
1537 		dev_err(edac_dev->dev,
1538 			"Multiple IOB BA write response error\n");
1539 	if (reg & XGIC_POISONED_REQ_MASK)
1540 		dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1541 	if (reg & M_XGIC_POISONED_REQ_MASK)
1542 		dev_err(edac_dev->dev,
1543 			"Multiple IOB BA XGIC poisoned write error\n");
1544 	if (reg & RBM_POISONED_REQ_MASK)
1545 		dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1546 	if (reg & M_RBM_POISONED_REQ_MASK)
1547 		dev_err(edac_dev->dev,
1548 			"Multiple IOB BA RBM poisoned write error\n");
1549 	if (reg & WDATA_CORRUPT_MASK)
1550 		dev_err(edac_dev->dev, "IOB BA write error\n");
1551 	if (reg & M_WDATA_CORRUPT_MASK)
1552 		dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1553 	if (reg & TRANS_CORRUPT_MASK)
1554 		dev_err(edac_dev->dev, "IOB BA transaction error\n");
1555 	if (reg & M_TRANS_CORRUPT_MASK)
1556 		dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1557 	if (reg & RIDRAM_CORRUPT_MASK)
1558 		dev_err(edac_dev->dev,
1559 			"IOB BA RDIDRAM read transaction ID error\n");
1560 	if (reg & M_RIDRAM_CORRUPT_MASK)
1561 		dev_err(edac_dev->dev,
1562 			"Multiple IOB BA RDIDRAM read transaction ID error\n");
1563 	if (reg & WIDRAM_CORRUPT_MASK)
1564 		dev_err(edac_dev->dev,
1565 			"IOB BA RDIDRAM write transaction ID error\n");
1566 	if (reg & M_WIDRAM_CORRUPT_MASK)
1567 		dev_err(edac_dev->dev,
1568 			"Multiple IOB BA RDIDRAM write transaction ID error\n");
1569 	if (reg & ILLEGAL_ACCESS_MASK)
1570 		dev_err(edac_dev->dev,
1571 			"IOB BA XGIC/RB illegal access error\n");
1572 	if (reg & M_ILLEGAL_ACCESS_MASK)
1573 		dev_err(edac_dev->dev,
1574 			"Multiple IOB BA XGIC/RB illegal access error\n");
1575 
1576 	err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1577 	err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1578 	dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1579 		REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1580 		ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1581 	if (reg & WRERR_RESP_MASK)
1582 		dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1583 			readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1584 	writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1585 }
1586 
xgene_edac_pa_report(struct edac_device_ctl_info * edac_dev)1587 static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1588 {
1589 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1590 	u32 err_addr_lo;
1591 	u32 err_addr_hi;
1592 	u32 reg;
1593 
1594 	/* IOB Processing agent transaction error interrupt */
1595 	reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1596 	if (!reg)
1597 		goto chk_iob_axi0;
1598 	dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
1599 	if (reg & IOBPA_RDATA_CORRUPT_MASK)
1600 		dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1601 	if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1602 		dev_err(edac_dev->dev,
1603 			"Multiple IOB PA read data RAM error\n");
1604 	if (reg & IOBPA_WDATA_CORRUPT_MASK)
1605 		dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1606 	if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1607 		dev_err(edac_dev->dev,
1608 			"Multiple IOB PA write data RAM error\n");
1609 	if (reg & IOBPA_TRANS_CORRUPT_MASK)
1610 		dev_err(edac_dev->dev, "IOB PA transaction error\n");
1611 	if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1612 		dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
1613 	if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1614 		dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1615 	if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1616 		dev_err(edac_dev->dev,
1617 			"Multiple IOB PA transaction ID RAM error\n");
1618 	writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1619 
1620 chk_iob_axi0:
1621 	/* IOB AXI0 Error */
1622 	reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1623 	if (!reg)
1624 		goto chk_iob_axi1;
1625 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1626 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1627 	dev_err(edac_dev->dev,
1628 		"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1629 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1630 		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1631 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1632 	writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1633 
1634 chk_iob_axi1:
1635 	/* IOB AXI1 Error */
1636 	reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1637 	if (!reg)
1638 		return;
1639 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1640 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1641 	dev_err(edac_dev->dev,
1642 		"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1643 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1644 		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1645 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1646 	writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1647 }
1648 
xgene_edac_soc_check(struct edac_device_ctl_info * edac_dev)1649 static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1650 {
1651 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1652 	const char * const *soc_mem_err = NULL;
1653 	u32 pcp_hp_stat;
1654 	u32 pcp_lp_stat;
1655 	u32 reg;
1656 	int i;
1657 
1658 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1659 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1660 	xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1661 	if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1662 			      IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1663 	      (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1664 		return;
1665 
1666 	if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1667 		xgene_edac_iob_gic_report(edac_dev);
1668 
1669 	if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1670 		xgene_edac_rb_report(edac_dev);
1671 
1672 	if (pcp_hp_stat & IOB_PA_ERR_MASK)
1673 		xgene_edac_pa_report(edac_dev);
1674 
1675 	if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1676 		dev_info(edac_dev->dev,
1677 			 "CSW switch trace correctable memory parity error\n");
1678 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1679 	}
1680 
1681 	if (!reg)
1682 		return;
1683 	if (ctx->version == 1)
1684 		soc_mem_err = soc_mem_err_v1;
1685 	if (!soc_mem_err) {
1686 		dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1687 			reg);
1688 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1689 		return;
1690 	}
1691 	for (i = 0; i < 31; i++) {
1692 		if (reg & (1 << i)) {
1693 			dev_err(edac_dev->dev, "%s memory parity error\n",
1694 				soc_mem_err[i]);
1695 			edac_device_handle_ue(edac_dev, 0, 0,
1696 					      edac_dev->ctl_name);
1697 		}
1698 	}
1699 }
1700 
xgene_edac_soc_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1701 static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1702 				   bool enable)
1703 {
1704 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1705 
1706 	/* Enable SoC IP error interrupt */
1707 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1708 		if (enable) {
1709 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1710 					       IOB_PA_ERR_MASK |
1711 					       IOB_BA_ERR_MASK |
1712 					       IOB_XGIC_ERR_MASK |
1713 					       IOB_RB_ERR_MASK);
1714 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1715 					       CSW_SWITCH_TRACE_ERR_MASK);
1716 		} else {
1717 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1718 					       IOB_PA_ERR_MASK |
1719 					       IOB_BA_ERR_MASK |
1720 					       IOB_XGIC_ERR_MASK |
1721 					       IOB_RB_ERR_MASK);
1722 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1723 					       CSW_SWITCH_TRACE_ERR_MASK);
1724 		}
1725 
1726 		writel(enable ? 0x0 : 0xFFFFFFFF,
1727 		       ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1728 		writel(enable ? 0x0 : 0xFFFFFFFF,
1729 		       ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1730 		writel(enable ? 0x0 : 0xFFFFFFFF,
1731 		       ctx->dev_csr + XGICTRANSERRINTMSK);
1732 
1733 		xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1734 				       enable ? 0x0 : 0xFFFFFFFF);
1735 	}
1736 }
1737 
xgene_edac_soc_add(struct xgene_edac * edac,struct device_node * np,int version)1738 static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1739 			      int version)
1740 {
1741 	struct edac_device_ctl_info *edac_dev;
1742 	struct xgene_edac_dev_ctx *ctx;
1743 	void __iomem *dev_csr;
1744 	struct resource res;
1745 	int edac_idx;
1746 	int rc;
1747 
1748 	if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1749 		return -ENOMEM;
1750 
1751 	rc = of_address_to_resource(np, 0, &res);
1752 	if (rc < 0) {
1753 		dev_err(edac->dev, "no SoC resource address\n");
1754 		goto err_release_group;
1755 	}
1756 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1757 	if (IS_ERR(dev_csr)) {
1758 		dev_err(edac->dev,
1759 			"devm_ioremap_resource failed for soc resource address\n");
1760 		rc = PTR_ERR(dev_csr);
1761 		goto err_release_group;
1762 	}
1763 
1764 	edac_idx = edac_device_alloc_index();
1765 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1766 					      "SOC", 1, "SOC", 1, 2, NULL, 0,
1767 					      edac_idx);
1768 	if (!edac_dev) {
1769 		rc = -ENOMEM;
1770 		goto err_release_group;
1771 	}
1772 
1773 	ctx = edac_dev->pvt_info;
1774 	ctx->dev_csr = dev_csr;
1775 	ctx->name = "xgene_soc_err";
1776 	ctx->edac_idx = edac_idx;
1777 	ctx->edac = edac;
1778 	ctx->edac_dev = edac_dev;
1779 	ctx->ddev = *edac->dev;
1780 	ctx->version = version;
1781 	edac_dev->dev = &ctx->ddev;
1782 	edac_dev->ctl_name = ctx->name;
1783 	edac_dev->dev_name = ctx->name;
1784 	edac_dev->mod_name = EDAC_MOD_STR;
1785 
1786 	if (edac_op_state == EDAC_OPSTATE_POLL)
1787 		edac_dev->edac_check = xgene_edac_soc_check;
1788 
1789 	rc = edac_device_add_device(edac_dev);
1790 	if (rc > 0) {
1791 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1792 		rc = -ENOMEM;
1793 		goto err_ctl_free;
1794 	}
1795 
1796 	if (edac_op_state == EDAC_OPSTATE_INT)
1797 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1798 
1799 	list_add(&ctx->next, &edac->socs);
1800 
1801 	xgene_edac_soc_hw_init(edac_dev, 1);
1802 
1803 	devres_remove_group(edac->dev, xgene_edac_soc_add);
1804 
1805 	dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1806 
1807 	return 0;
1808 
1809 err_ctl_free:
1810 	edac_device_free_ctl_info(edac_dev);
1811 err_release_group:
1812 	devres_release_group(edac->dev, xgene_edac_soc_add);
1813 	return rc;
1814 }
1815 
xgene_edac_soc_remove(struct xgene_edac_dev_ctx * soc)1816 static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1817 {
1818 	struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1819 
1820 	xgene_edac_soc_hw_init(edac_dev, 0);
1821 	edac_device_del_device(soc->edac->dev);
1822 	edac_device_free_ctl_info(edac_dev);
1823 	return 0;
1824 }
1825 
xgene_edac_isr(int irq,void * dev_id)1826 static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1827 {
1828 	struct xgene_edac *ctx = dev_id;
1829 	struct xgene_edac_pmd_ctx *pmd;
1830 	struct xgene_edac_dev_ctx *node;
1831 	unsigned int pcp_hp_stat;
1832 	unsigned int pcp_lp_stat;
1833 
1834 	xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1835 	xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1836 	if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1837 	    (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1838 	    (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1839 		struct xgene_edac_mc_ctx *mcu;
1840 
1841 		list_for_each_entry(mcu, &ctx->mcus, next)
1842 			xgene_edac_mc_check(mcu->mci);
1843 	}
1844 
1845 	list_for_each_entry(pmd, &ctx->pmds, next) {
1846 		if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1847 			xgene_edac_pmd_check(pmd->edac_dev);
1848 	}
1849 
1850 	list_for_each_entry(node, &ctx->l3s, next)
1851 		xgene_edac_l3_check(node->edac_dev);
1852 
1853 	list_for_each_entry(node, &ctx->socs, next)
1854 		xgene_edac_soc_check(node->edac_dev);
1855 
1856 	return IRQ_HANDLED;
1857 }
1858 
xgene_edac_probe(struct platform_device * pdev)1859 static int xgene_edac_probe(struct platform_device *pdev)
1860 {
1861 	struct xgene_edac *edac;
1862 	struct device_node *child;
1863 	struct resource *res;
1864 	int rc;
1865 
1866 	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1867 	if (!edac)
1868 		return -ENOMEM;
1869 
1870 	edac->dev = &pdev->dev;
1871 	platform_set_drvdata(pdev, edac);
1872 	INIT_LIST_HEAD(&edac->mcus);
1873 	INIT_LIST_HEAD(&edac->pmds);
1874 	INIT_LIST_HEAD(&edac->l3s);
1875 	INIT_LIST_HEAD(&edac->socs);
1876 	spin_lock_init(&edac->lock);
1877 	mutex_init(&edac->mc_lock);
1878 
1879 	edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1880 							"regmap-csw");
1881 	if (IS_ERR(edac->csw_map)) {
1882 		dev_err(edac->dev, "unable to get syscon regmap csw\n");
1883 		rc = PTR_ERR(edac->csw_map);
1884 		goto out_err;
1885 	}
1886 
1887 	edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1888 							 "regmap-mcba");
1889 	if (IS_ERR(edac->mcba_map)) {
1890 		dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1891 		rc = PTR_ERR(edac->mcba_map);
1892 		goto out_err;
1893 	}
1894 
1895 	edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1896 							 "regmap-mcbb");
1897 	if (IS_ERR(edac->mcbb_map)) {
1898 		dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1899 		rc = PTR_ERR(edac->mcbb_map);
1900 		goto out_err;
1901 	}
1902 	edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1903 							  "regmap-efuse");
1904 	if (IS_ERR(edac->efuse_map)) {
1905 		dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1906 		rc = PTR_ERR(edac->efuse_map);
1907 		goto out_err;
1908 	}
1909 
1910 	/*
1911 	 * NOTE: The register bus resource is optional for compatibility
1912 	 * reason.
1913 	 */
1914 	edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1915 						       "regmap-rb");
1916 	if (IS_ERR(edac->rb_map)) {
1917 		dev_warn(edac->dev, "missing syscon regmap rb\n");
1918 		edac->rb_map = NULL;
1919 	}
1920 
1921 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1922 	edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1923 	if (IS_ERR(edac->pcp_csr)) {
1924 		dev_err(&pdev->dev, "no PCP resource address\n");
1925 		rc = PTR_ERR(edac->pcp_csr);
1926 		goto out_err;
1927 	}
1928 
1929 	if (edac_op_state == EDAC_OPSTATE_INT) {
1930 		int irq;
1931 		int i;
1932 
1933 		for (i = 0; i < 3; i++) {
1934 			irq = platform_get_irq(pdev, i);
1935 			if (irq < 0) {
1936 				dev_err(&pdev->dev, "No IRQ resource\n");
1937 				rc = -EINVAL;
1938 				goto out_err;
1939 			}
1940 			rc = devm_request_irq(&pdev->dev, irq,
1941 					      xgene_edac_isr, IRQF_SHARED,
1942 					      dev_name(&pdev->dev), edac);
1943 			if (rc) {
1944 				dev_err(&pdev->dev,
1945 					"Could not request IRQ %d\n", irq);
1946 				goto out_err;
1947 			}
1948 		}
1949 	}
1950 
1951 	edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1952 
1953 	for_each_child_of_node(pdev->dev.of_node, child) {
1954 		if (!of_device_is_available(child))
1955 			continue;
1956 		if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1957 			xgene_edac_mc_add(edac, child);
1958 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1959 			xgene_edac_pmd_add(edac, child, 1);
1960 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1961 			xgene_edac_pmd_add(edac, child, 2);
1962 		if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1963 			xgene_edac_l3_add(edac, child, 1);
1964 		if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1965 			xgene_edac_l3_add(edac, child, 2);
1966 		if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1967 			xgene_edac_soc_add(edac, child, 0);
1968 		if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1969 			xgene_edac_soc_add(edac, child, 1);
1970 	}
1971 
1972 	return 0;
1973 
1974 out_err:
1975 	return rc;
1976 }
1977 
xgene_edac_remove(struct platform_device * pdev)1978 static int xgene_edac_remove(struct platform_device *pdev)
1979 {
1980 	struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1981 	struct xgene_edac_mc_ctx *mcu;
1982 	struct xgene_edac_mc_ctx *temp_mcu;
1983 	struct xgene_edac_pmd_ctx *pmd;
1984 	struct xgene_edac_pmd_ctx *temp_pmd;
1985 	struct xgene_edac_dev_ctx *node;
1986 	struct xgene_edac_dev_ctx *temp_node;
1987 
1988 	list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1989 		xgene_edac_mc_remove(mcu);
1990 
1991 	list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1992 		xgene_edac_pmd_remove(pmd);
1993 
1994 	list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1995 		xgene_edac_l3_remove(node);
1996 
1997 	list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1998 		xgene_edac_soc_remove(node);
1999 
2000 	return 0;
2001 }
2002 
2003 static const struct of_device_id xgene_edac_of_match[] = {
2004 	{ .compatible = "apm,xgene-edac" },
2005 	{},
2006 };
2007 MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
2008 
2009 static struct platform_driver xgene_edac_driver = {
2010 	.probe = xgene_edac_probe,
2011 	.remove = xgene_edac_remove,
2012 	.driver = {
2013 		.name = "xgene-edac",
2014 		.of_match_table = xgene_edac_of_match,
2015 	},
2016 };
2017 
xgene_edac_init(void)2018 static int __init xgene_edac_init(void)
2019 {
2020 	int rc;
2021 
2022 	/* Make sure error reporting method is sane */
2023 	switch (edac_op_state) {
2024 	case EDAC_OPSTATE_POLL:
2025 	case EDAC_OPSTATE_INT:
2026 		break;
2027 	default:
2028 		edac_op_state = EDAC_OPSTATE_INT;
2029 		break;
2030 	}
2031 
2032 	rc = platform_driver_register(&xgene_edac_driver);
2033 	if (rc) {
2034 		edac_printk(KERN_ERR, EDAC_MOD_STR,
2035 			    "EDAC fails to register\n");
2036 		goto reg_failed;
2037 	}
2038 
2039 	return 0;
2040 
2041 reg_failed:
2042 	return rc;
2043 }
2044 module_init(xgene_edac_init);
2045 
xgene_edac_exit(void)2046 static void __exit xgene_edac_exit(void)
2047 {
2048 	platform_driver_unregister(&xgene_edac_driver);
2049 }
2050 module_exit(xgene_edac_exit);
2051 
2052 MODULE_LICENSE("GPL");
2053 MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2054 MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2055 module_param(edac_op_state, int, 0444);
2056 MODULE_PARM_DESC(edac_op_state,
2057 		 "EDAC error reporting state: 0=Poll, 2=Interrupt");
2058