1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Driver for Intel(R) 10nm server memory controller.
4  * Copyright (c) 2019, Intel Corporation.
5  *
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/io.h>
10 #include <asm/cpu_device_id.h>
11 #include <asm/intel-family.h>
12 #include <asm/mce.h>
13 #include "edac_module.h"
14 #include "skx_common.h"
15 
16 #define I10NM_REVISION	"v0.0.6"
17 #define EDAC_MOD_STR	"i10nm_edac"
18 
19 /* Debug macros */
20 #define i10nm_printk(level, fmt, arg...)	\
21 	edac_printk(level, "i10nm", fmt, ##arg)
22 
23 #define I10NM_GET_SCK_BAR(d, reg)	\
24 	pci_read_config_dword((d)->uracu, 0xd0, &(reg))
25 #define I10NM_GET_IMC_BAR(d, i, reg)		\
26 	pci_read_config_dword((d)->uracu,	\
27 	(res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg))
28 #define I10NM_GET_SAD(d, offset, i, reg)\
29 	pci_read_config_dword((d)->sad_all, (offset) + (i) * \
30 	(res_cfg->type == GNR ? 12 : 8), &(reg))
31 #define I10NM_GET_HBM_IMC_BAR(d, reg)	\
32 	pci_read_config_dword((d)->uracu, 0xd4, &(reg))
33 #define I10NM_GET_CAPID3_CFG(d, reg)	\
34 	pci_read_config_dword((d)->pcu_cr3,	\
35 	res_cfg->type == GNR ? 0x290 : 0x90, &(reg))
36 #define I10NM_GET_CAPID5_CFG(d, reg)	\
37 	pci_read_config_dword((d)->pcu_cr3,	\
38 	res_cfg->type == GNR ? 0x298 : 0x98, &(reg))
39 #define I10NM_GET_DIMMMTR(m, i, j)	\
40 	readl((m)->mbase + ((m)->hbm_mc ? 0x80c :	\
41 	(res_cfg->type == GNR ? 0xc0c : 0x2080c)) +	\
42 	(i) * (m)->chan_mmio_sz + (j) * 4)
43 #define I10NM_GET_MCDDRTCFG(m, i)	\
44 	readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
45 	(i) * (m)->chan_mmio_sz)
46 #define I10NM_GET_MCMTR(m, i)		\
47 	readl((m)->mbase + ((m)->hbm_mc ? 0xef8 :	\
48 	(res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) +	\
49 	(i) * (m)->chan_mmio_sz)
50 #define I10NM_GET_AMAP(m, i)		\
51 	readl((m)->mbase + ((m)->hbm_mc ? 0x814 :	\
52 	(res_cfg->type == GNR ? 0xc14 : 0x20814)) +	\
53 	(i) * (m)->chan_mmio_sz)
54 #define I10NM_GET_REG32(m, i, offset)	\
55 	readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
56 #define I10NM_GET_REG64(m, i, offset)	\
57 	readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
58 #define I10NM_SET_REG32(m, i, offset, v)	\
59 	writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
60 
61 #define I10NM_GET_SCK_MMIO_BASE(reg)	(GET_BITFIELD(reg, 0, 28) << 23)
62 #define I10NM_GET_IMC_MMIO_OFFSET(reg)	(GET_BITFIELD(reg, 0, 10) << 12)
63 #define I10NM_GET_IMC_MMIO_SIZE(reg)	((GET_BITFIELD(reg, 13, 23) - \
64 					 GET_BITFIELD(reg, 0, 10) + 1) << 12)
65 #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg)	\
66 	((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
67 
68 #define I10NM_GNR_IMC_MMIO_OFFSET	0x24c000
69 #define I10NM_GNR_IMC_MMIO_SIZE		0x4000
70 #define I10NM_HBM_IMC_MMIO_SIZE		0x9000
71 #define I10NM_DDR_IMC_CH_CNT(reg)	GET_BITFIELD(reg, 21, 24)
72 #define I10NM_IS_HBM_PRESENT(reg)	GET_BITFIELD(reg, 27, 30)
73 #define I10NM_IS_HBM_IMC(reg)		GET_BITFIELD(reg, 29, 29)
74 
75 #define I10NM_MAX_SAD			16
76 #define I10NM_SAD_ENABLE(reg)		GET_BITFIELD(reg, 0, 0)
77 #define I10NM_SAD_NM_CACHEABLE(reg)	GET_BITFIELD(reg, 5, 5)
78 
79 #define RETRY_RD_ERR_LOG_UC		BIT(1)
80 #define RETRY_RD_ERR_LOG_NOOVER		BIT(14)
81 #define RETRY_RD_ERR_LOG_EN		BIT(15)
82 #define RETRY_RD_ERR_LOG_NOOVER_UC	(BIT(14) | BIT(1))
83 #define RETRY_RD_ERR_LOG_OVER_UC_V	(BIT(2) | BIT(1) | BIT(0))
84 
85 static struct list_head *i10nm_edac_list;
86 
87 static struct res_config *res_cfg;
88 static int retry_rd_err_log;
89 static int decoding_via_mca;
90 static bool mem_cfg_2lm;
91 
92 static u32 offsets_scrub_icx[]  = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
93 static u32 offsets_scrub_spr[]  = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
94 static u32 offsets_scrub_spr_hbm0[]  = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
95 static u32 offsets_scrub_spr_hbm1[]  = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
96 static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
97 static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
98 static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
99 static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
100 static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
101 
__enable_retry_rd_err_log(struct skx_imc * imc,int chan,bool enable,u32 * offsets_scrub,u32 * offsets_demand,u32 * offsets_demand2)102 static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
103 				      u32 *offsets_scrub, u32 *offsets_demand,
104 				      u32 *offsets_demand2)
105 {
106 	u32 s, d, d2;
107 
108 	s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
109 	d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
110 	if (offsets_demand2)
111 		d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
112 
113 	if (enable) {
114 		/* Save default configurations */
115 		imc->chan[chan].retry_rd_err_log_s = s;
116 		imc->chan[chan].retry_rd_err_log_d = d;
117 		if (offsets_demand2)
118 			imc->chan[chan].retry_rd_err_log_d2 = d2;
119 
120 		s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
121 		s |=  RETRY_RD_ERR_LOG_EN;
122 		d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
123 		d |=  RETRY_RD_ERR_LOG_EN;
124 
125 		if (offsets_demand2) {
126 			d2 &= ~RETRY_RD_ERR_LOG_UC;
127 			d2 |=  RETRY_RD_ERR_LOG_NOOVER;
128 			d2 |=  RETRY_RD_ERR_LOG_EN;
129 		}
130 	} else {
131 		/* Restore default configurations */
132 		if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
133 			s |=  RETRY_RD_ERR_LOG_UC;
134 		if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
135 			s |=  RETRY_RD_ERR_LOG_NOOVER;
136 		if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
137 			s &= ~RETRY_RD_ERR_LOG_EN;
138 		if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
139 			d |=  RETRY_RD_ERR_LOG_UC;
140 		if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
141 			d |=  RETRY_RD_ERR_LOG_NOOVER;
142 		if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
143 			d &= ~RETRY_RD_ERR_LOG_EN;
144 
145 		if (offsets_demand2) {
146 			if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
147 				d2 |=  RETRY_RD_ERR_LOG_UC;
148 			if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
149 				d2 &=  ~RETRY_RD_ERR_LOG_NOOVER;
150 			if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
151 				d2 &= ~RETRY_RD_ERR_LOG_EN;
152 		}
153 	}
154 
155 	I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
156 	I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
157 	if (offsets_demand2)
158 		I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
159 }
160 
enable_retry_rd_err_log(bool enable)161 static void enable_retry_rd_err_log(bool enable)
162 {
163 	int i, j, imc_num, chan_num;
164 	struct skx_imc *imc;
165 	struct skx_dev *d;
166 
167 	edac_dbg(2, "\n");
168 
169 	list_for_each_entry(d, i10nm_edac_list, list) {
170 		imc_num  = res_cfg->ddr_imc_num;
171 		chan_num = res_cfg->ddr_chan_num;
172 
173 		for (i = 0; i < imc_num; i++) {
174 			imc = &d->imc[i];
175 			if (!imc->mbase)
176 				continue;
177 
178 			for (j = 0; j < chan_num; j++)
179 				__enable_retry_rd_err_log(imc, j, enable,
180 							  res_cfg->offsets_scrub,
181 							  res_cfg->offsets_demand,
182 							  res_cfg->offsets_demand2);
183 		}
184 
185 		imc_num += res_cfg->hbm_imc_num;
186 		chan_num = res_cfg->hbm_chan_num;
187 
188 		for (; i < imc_num; i++) {
189 			imc = &d->imc[i];
190 			if (!imc->mbase || !imc->hbm_mc)
191 				continue;
192 
193 			for (j = 0; j < chan_num; j++) {
194 				__enable_retry_rd_err_log(imc, j, enable,
195 							  res_cfg->offsets_scrub_hbm0,
196 							  res_cfg->offsets_demand_hbm0,
197 							  NULL);
198 				__enable_retry_rd_err_log(imc, j, enable,
199 							  res_cfg->offsets_scrub_hbm1,
200 							  res_cfg->offsets_demand_hbm1,
201 							  NULL);
202 			}
203 		}
204 	}
205 }
206 
show_retry_rd_err_log(struct decoded_addr * res,char * msg,int len,bool scrub_err)207 static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
208 				  int len, bool scrub_err)
209 {
210 	struct skx_imc *imc = &res->dev->imc[res->imc];
211 	u32 log0, log1, log2, log3, log4;
212 	u32 corr0, corr1, corr2, corr3;
213 	u32 lxg0, lxg1, lxg3, lxg4;
214 	u32 *xffsets = NULL;
215 	u64 log2a, log5;
216 	u64 lxg2a, lxg5;
217 	u32 *offsets;
218 	int n, pch;
219 
220 	if (!imc->mbase)
221 		return;
222 
223 	if (imc->hbm_mc) {
224 		pch = res->cs & 1;
225 
226 		if (pch)
227 			offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
228 					      res_cfg->offsets_demand_hbm1;
229 		else
230 			offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
231 					      res_cfg->offsets_demand_hbm0;
232 	} else {
233 		if (scrub_err) {
234 			offsets = res_cfg->offsets_scrub;
235 		} else {
236 			offsets = res_cfg->offsets_demand;
237 			xffsets = res_cfg->offsets_demand2;
238 		}
239 	}
240 
241 	log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
242 	log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
243 	log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
244 	log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
245 	log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
246 
247 	if (xffsets) {
248 		lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
249 		lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
250 		lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
251 		lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
252 		lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
253 	}
254 
255 	if (res_cfg->type == SPR) {
256 		log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
257 		n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
258 			     log0, log1, log2a, log3, log4, log5);
259 
260 		if (len - n > 0) {
261 			if (xffsets) {
262 				lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
263 				n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
264 					     lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
265 			} else {
266 				n += snprintf(msg + n, len - n, "]");
267 			}
268 		}
269 	} else {
270 		log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
271 		n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
272 			     log0, log1, log2, log3, log4, log5);
273 	}
274 
275 	if (imc->hbm_mc) {
276 		if (pch) {
277 			corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
278 			corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
279 			corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
280 			corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
281 		} else {
282 			corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
283 			corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
284 			corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
285 			corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
286 		}
287 	} else {
288 		corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
289 		corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
290 		corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
291 		corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
292 	}
293 
294 	if (len - n > 0)
295 		snprintf(msg + n, len - n,
296 			 " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
297 			 corr0 & 0xffff, corr0 >> 16,
298 			 corr1 & 0xffff, corr1 >> 16,
299 			 corr2 & 0xffff, corr2 >> 16,
300 			 corr3 & 0xffff, corr3 >> 16);
301 
302 	/* Clear status bits */
303 	if (retry_rd_err_log == 2) {
304 		if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
305 			log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
306 			I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
307 		}
308 
309 		if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
310 			lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
311 			I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
312 		}
313 	}
314 }
315 
pci_get_dev_wrapper(int dom,unsigned int bus,unsigned int dev,unsigned int fun)316 static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
317 					   unsigned int dev, unsigned int fun)
318 {
319 	struct pci_dev *pdev;
320 
321 	pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
322 	if (!pdev) {
323 		edac_dbg(2, "No device %02x:%02x.%x\n",
324 			 bus, dev, fun);
325 		return NULL;
326 	}
327 
328 	if (unlikely(pci_enable_device(pdev) < 0)) {
329 		edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
330 			 bus, dev, fun);
331 		pci_dev_put(pdev);
332 		return NULL;
333 	}
334 
335 	return pdev;
336 }
337 
338 /**
339  * i10nm_get_imc_num() - Get the number of present DDR memory controllers.
340  *
341  * @cfg : The pointer to the structure of EDAC resource configurations.
342  *
343  * For Granite Rapids CPUs, the number of present DDR memory controllers read
344  * at runtime overwrites the value statically configured in @cfg->ddr_imc_num.
345  * For other CPUs, the number of present DDR memory controllers is statically
346  * configured in @cfg->ddr_imc_num.
347  *
348  * RETURNS : 0 on success, < 0 on failure.
349  */
i10nm_get_imc_num(struct res_config * cfg)350 static int i10nm_get_imc_num(struct res_config *cfg)
351 {
352 	int n, imc_num, chan_num = 0;
353 	struct skx_dev *d;
354 	u32 reg;
355 
356 	list_for_each_entry(d, i10nm_edac_list, list) {
357 		d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus],
358 						 res_cfg->pcu_cr3_bdf.dev,
359 						 res_cfg->pcu_cr3_bdf.fun);
360 		if (!d->pcu_cr3)
361 			continue;
362 
363 		if (I10NM_GET_CAPID5_CFG(d, reg))
364 			continue;
365 
366 		n = I10NM_DDR_IMC_CH_CNT(reg);
367 
368 		if (!chan_num) {
369 			chan_num = n;
370 			edac_dbg(2, "Get DDR CH number: %d\n", chan_num);
371 		} else if (chan_num != n) {
372 			i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n);
373 		}
374 	}
375 
376 	switch (cfg->type) {
377 	case GNR:
378 		/*
379 		 * One channel per DDR memory controller for Granite Rapids CPUs.
380 		 */
381 		imc_num = chan_num;
382 
383 		if (!imc_num) {
384 			i10nm_printk(KERN_ERR, "Invalid DDR MC number\n");
385 			return -ENODEV;
386 		}
387 
388 		if (imc_num > I10NM_NUM_DDR_IMC) {
389 			i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num);
390 			return -EINVAL;
391 		}
392 
393 		if (cfg->ddr_imc_num != imc_num) {
394 			/*
395 			 * Store the number of present DDR memory controllers.
396 			 */
397 			cfg->ddr_imc_num = imc_num;
398 			edac_dbg(2, "Set DDR MC number: %d", imc_num);
399 		}
400 
401 		return 0;
402 	default:
403 		/*
404 		 * For other CPUs, the number of present DDR memory controllers
405 		 * is statically pre-configured in cfg->ddr_imc_num.
406 		 */
407 		return 0;
408 	}
409 }
410 
i10nm_check_2lm(struct res_config * cfg)411 static bool i10nm_check_2lm(struct res_config *cfg)
412 {
413 	struct skx_dev *d;
414 	u32 reg;
415 	int i;
416 
417 	list_for_each_entry(d, i10nm_edac_list, list) {
418 		d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus],
419 						 res_cfg->sad_all_bdf.dev,
420 						 res_cfg->sad_all_bdf.fun);
421 		if (!d->sad_all)
422 			continue;
423 
424 		for (i = 0; i < I10NM_MAX_SAD; i++) {
425 			I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
426 			if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
427 				edac_dbg(2, "2-level memory configuration.\n");
428 				return true;
429 			}
430 		}
431 	}
432 
433 	return false;
434 }
435 
436 /*
437  * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code.
438  * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS.
439  */
i10nm_mscod_is_ddrt(u32 mscod)440 static bool i10nm_mscod_is_ddrt(u32 mscod)
441 {
442 	switch (res_cfg->type) {
443 	case I10NM:
444 		switch (mscod) {
445 		case 0x0106: case 0x0107:
446 		case 0x0800: case 0x0804:
447 		case 0x0806 ... 0x0808:
448 		case 0x080a ... 0x080e:
449 		case 0x0810: case 0x0811:
450 		case 0x0816: case 0x081e:
451 		case 0x081f:
452 			return true;
453 		}
454 
455 		break;
456 	case SPR:
457 		switch (mscod) {
458 		case 0x0800: case 0x0804:
459 		case 0x0806 ... 0x0808:
460 		case 0x080a ... 0x080e:
461 		case 0x0810: case 0x0811:
462 		case 0x0816: case 0x081e:
463 		case 0x081f:
464 			return true;
465 		}
466 
467 		break;
468 	default:
469 		return false;
470 	}
471 
472 	return false;
473 }
474 
i10nm_mc_decode_available(struct mce * mce)475 static bool i10nm_mc_decode_available(struct mce *mce)
476 {
477 #define ICX_IMCx_CHy		0x06666000
478 	u8 bank;
479 
480 	if (!decoding_via_mca || mem_cfg_2lm)
481 		return false;
482 
483 	if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
484 			!= (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
485 		return false;
486 
487 	bank = mce->bank;
488 
489 	switch (res_cfg->type) {
490 	case I10NM:
491 		/* Check whether the bank is one of {13,14,17,18,21,22,25,26} */
492 		if (!(ICX_IMCx_CHy & (1 << bank)))
493 			return false;
494 		break;
495 	case SPR:
496 		if (bank < 13 || bank > 20)
497 			return false;
498 		break;
499 	default:
500 		return false;
501 	}
502 
503 	/* DDRT errors can't be decoded from MCA bank registers */
504 	if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
505 		return false;
506 
507 	if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
508 		return false;
509 
510 	return true;
511 }
512 
i10nm_mc_decode(struct decoded_addr * res)513 static bool i10nm_mc_decode(struct decoded_addr *res)
514 {
515 	struct mce *m = res->mce;
516 	struct skx_dev *d;
517 	u8 bank;
518 
519 	if (!i10nm_mc_decode_available(m))
520 		return false;
521 
522 	list_for_each_entry(d, i10nm_edac_list, list) {
523 		if (d->imc[0].src_id == m->socketid) {
524 			res->socket = m->socketid;
525 			res->dev = d;
526 			break;
527 		}
528 	}
529 
530 	switch (res_cfg->type) {
531 	case I10NM:
532 		bank              = m->bank - 13;
533 		res->imc          = bank / 4;
534 		res->channel      = bank % 2;
535 		res->column       = GET_BITFIELD(m->misc, 9, 18) << 2;
536 		res->row          = GET_BITFIELD(m->misc, 19, 39);
537 		res->bank_group   = GET_BITFIELD(m->misc, 40, 41);
538 		res->bank_address = GET_BITFIELD(m->misc, 42, 43);
539 		res->bank_group  |= GET_BITFIELD(m->misc, 44, 44) << 2;
540 		res->rank         = GET_BITFIELD(m->misc, 56, 58);
541 		res->dimm         = res->rank >> 2;
542 		res->rank         = res->rank % 4;
543 		break;
544 	case SPR:
545 		bank              = m->bank - 13;
546 		res->imc          = bank / 2;
547 		res->channel      = bank % 2;
548 		res->column       = GET_BITFIELD(m->misc, 9, 18) << 2;
549 		res->row          = GET_BITFIELD(m->misc, 19, 36);
550 		res->bank_group   = GET_BITFIELD(m->misc, 37, 38);
551 		res->bank_address = GET_BITFIELD(m->misc, 39, 40);
552 		res->bank_group  |= GET_BITFIELD(m->misc, 41, 41) << 2;
553 		res->rank         = GET_BITFIELD(m->misc, 57, 57);
554 		res->dimm         = GET_BITFIELD(m->misc, 58, 58);
555 		break;
556 	default:
557 		return false;
558 	}
559 
560 	if (!res->dev) {
561 		skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
562 			   m->socketid, res->imc);
563 		return false;
564 	}
565 
566 	return true;
567 }
568 
569 /**
570  * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller.
571  *
572  * @d            : The pointer to the structure of CPU socket EDAC device.
573  * @logical_idx  : The logical index of the present memory controller (0 ~ max present MC# - 1).
574  * @physical_idx : To store the corresponding physical index of @logical_idx.
575  *
576  * RETURNS       : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure.
577  */
get_gnr_mdev(struct skx_dev * d,int logical_idx,int * physical_idx)578 static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx)
579 {
580 #define GNR_MAX_IMC_PCI_CNT	28
581 
582 	struct pci_dev *mdev;
583 	int i, logical = 0;
584 
585 	/*
586 	 * Detect present memory controllers from { PCI device: 8-5, function 7-1 }
587 	 */
588 	for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) {
589 		mdev = pci_get_dev_wrapper(d->seg,
590 					   d->bus[res_cfg->ddr_mdev_bdf.bus],
591 					   res_cfg->ddr_mdev_bdf.dev + i / 7,
592 					   res_cfg->ddr_mdev_bdf.fun + i % 7);
593 
594 		if (mdev) {
595 			if (logical == logical_idx) {
596 				*physical_idx = i;
597 				return mdev;
598 			}
599 
600 			pci_dev_put(mdev);
601 			logical++;
602 		}
603 	}
604 
605 	return NULL;
606 }
607 
608 /**
609  * get_ddr_munit() - Get the resource of the i-th DDR memory controller.
610  *
611  * @d      : The pointer to the structure of CPU socket EDAC device.
612  * @i      : The index of the CPU socket relative DDR memory controller.
613  * @offset : To store the MMIO offset of the i-th DDR memory controller.
614  * @size   : To store the MMIO size of the i-th DDR memory controller.
615  *
616  * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure.
617  */
get_ddr_munit(struct skx_dev * d,int i,u32 * offset,unsigned long * size)618 static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size)
619 {
620 	struct pci_dev *mdev;
621 	int physical_idx;
622 	u32 reg;
623 
624 	switch (res_cfg->type) {
625 	case GNR:
626 		if (I10NM_GET_IMC_BAR(d, 0, reg)) {
627 			i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n");
628 			return NULL;
629 		}
630 
631 		mdev = get_gnr_mdev(d, i, &physical_idx);
632 		if (!mdev)
633 			return NULL;
634 
635 		*offset = I10NM_GET_IMC_MMIO_OFFSET(reg) +
636 			  I10NM_GNR_IMC_MMIO_OFFSET +
637 			  physical_idx * I10NM_GNR_IMC_MMIO_SIZE;
638 		*size   = I10NM_GNR_IMC_MMIO_SIZE;
639 
640 		break;
641 	default:
642 		if (I10NM_GET_IMC_BAR(d, i, reg)) {
643 			i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i);
644 			return NULL;
645 		}
646 
647 		mdev = pci_get_dev_wrapper(d->seg,
648 					   d->bus[res_cfg->ddr_mdev_bdf.bus],
649 					   res_cfg->ddr_mdev_bdf.dev + i,
650 					   res_cfg->ddr_mdev_bdf.fun);
651 		if (!mdev)
652 			return NULL;
653 
654 		*offset  = I10NM_GET_IMC_MMIO_OFFSET(reg);
655 		*size    = I10NM_GET_IMC_MMIO_SIZE(reg);
656 	}
657 
658 	return mdev;
659 }
660 
661 /**
662  * i10nm_imc_absent() - Check whether the memory controller @imc is absent
663  *
664  * @imc    : The pointer to the structure of memory controller EDAC device.
665  *
666  * RETURNS : true if the memory controller EDAC device is absent, false otherwise.
667  */
i10nm_imc_absent(struct skx_imc * imc)668 static bool i10nm_imc_absent(struct skx_imc *imc)
669 {
670 	u32 mcmtr;
671 	int i;
672 
673 	switch (res_cfg->type) {
674 	case SPR:
675 		for (i = 0; i < res_cfg->ddr_chan_num; i++) {
676 			mcmtr = I10NM_GET_MCMTR(imc, i);
677 			edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr);
678 			if (mcmtr != ~0)
679 				return false;
680 		}
681 
682 		/*
683 		 * Some workstations' absent memory controllers still
684 		 * appear as PCIe devices, misleading the EDAC driver.
685 		 * By observing that the MMIO registers of these absent
686 		 * memory controllers consistently hold the value of ~0.
687 		 *
688 		 * We identify a memory controller as absent by checking
689 		 * if its MMIO register "mcmtr" == ~0 in all its channels.
690 		 */
691 		return true;
692 	default:
693 		return false;
694 	}
695 }
696 
i10nm_get_ddr_munits(void)697 static int i10nm_get_ddr_munits(void)
698 {
699 	struct pci_dev *mdev;
700 	void __iomem *mbase;
701 	unsigned long size;
702 	struct skx_dev *d;
703 	int i, lmc, j = 0;
704 	u32 reg, off;
705 	u64 base;
706 
707 	list_for_each_entry(d, i10nm_edac_list, list) {
708 		d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus],
709 						  res_cfg->util_all_bdf.dev,
710 						  res_cfg->util_all_bdf.fun);
711 		if (!d->util_all)
712 			return -ENODEV;
713 
714 		d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus],
715 					       res_cfg->uracu_bdf.dev,
716 					       res_cfg->uracu_bdf.fun);
717 		if (!d->uracu)
718 			return -ENODEV;
719 
720 		if (I10NM_GET_SCK_BAR(d, reg)) {
721 			i10nm_printk(KERN_ERR, "Failed to socket bar\n");
722 			return -ENODEV;
723 		}
724 
725 		base = I10NM_GET_SCK_MMIO_BASE(reg);
726 		edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
727 			 j++, base, reg);
728 
729 		for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) {
730 			mdev = get_ddr_munit(d, i, &off, &size);
731 
732 			if (i == 0 && !mdev) {
733 				i10nm_printk(KERN_ERR, "No IMC found\n");
734 				return -ENODEV;
735 			}
736 			if (!mdev)
737 				continue;
738 
739 			edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
740 				 i, base + off, size, reg);
741 
742 			mbase = ioremap(base + off, size);
743 			if (!mbase) {
744 				i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
745 					     base + off);
746 				return -ENODEV;
747 			}
748 
749 			d->imc[lmc].mbase = mbase;
750 			if (i10nm_imc_absent(&d->imc[lmc])) {
751 				pci_dev_put(mdev);
752 				iounmap(mbase);
753 				d->imc[lmc].mbase = NULL;
754 				edac_dbg(2, "Skip absent mc%d\n", i);
755 				continue;
756 			} else {
757 				d->imc[lmc].mdev = mdev;
758 				lmc++;
759 			}
760 		}
761 	}
762 
763 	return 0;
764 }
765 
i10nm_check_hbm_imc(struct skx_dev * d)766 static bool i10nm_check_hbm_imc(struct skx_dev *d)
767 {
768 	u32 reg;
769 
770 	if (I10NM_GET_CAPID3_CFG(d, reg)) {
771 		i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
772 		return false;
773 	}
774 
775 	return I10NM_IS_HBM_PRESENT(reg) != 0;
776 }
777 
i10nm_get_hbm_munits(void)778 static int i10nm_get_hbm_munits(void)
779 {
780 	struct pci_dev *mdev;
781 	void __iomem *mbase;
782 	u32 reg, off, mcmtr;
783 	struct skx_dev *d;
784 	int i, lmc;
785 	u64 base;
786 
787 	list_for_each_entry(d, i10nm_edac_list, list) {
788 		if (!d->pcu_cr3)
789 			return -ENODEV;
790 
791 		if (!i10nm_check_hbm_imc(d)) {
792 			i10nm_printk(KERN_DEBUG, "No hbm memory\n");
793 			return -ENODEV;
794 		}
795 
796 		if (I10NM_GET_SCK_BAR(d, reg)) {
797 			i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
798 			return -ENODEV;
799 		}
800 		base = I10NM_GET_SCK_MMIO_BASE(reg);
801 
802 		if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
803 			i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
804 			return -ENODEV;
805 		}
806 		base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
807 
808 		lmc = res_cfg->ddr_imc_num;
809 
810 		for (i = 0; i < res_cfg->hbm_imc_num; i++) {
811 			mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus],
812 						   res_cfg->hbm_mdev_bdf.dev + i / 4,
813 						   res_cfg->hbm_mdev_bdf.fun + i % 4);
814 
815 			if (i == 0 && !mdev) {
816 				i10nm_printk(KERN_ERR, "No hbm mc found\n");
817 				return -ENODEV;
818 			}
819 			if (!mdev)
820 				continue;
821 
822 			d->imc[lmc].mdev = mdev;
823 			off = i * I10NM_HBM_IMC_MMIO_SIZE;
824 
825 			edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
826 				 lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
827 
828 			mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
829 			if (!mbase) {
830 				pci_dev_put(d->imc[lmc].mdev);
831 				d->imc[lmc].mdev = NULL;
832 
833 				i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
834 					     base + off);
835 				return -ENOMEM;
836 			}
837 
838 			d->imc[lmc].mbase = mbase;
839 			d->imc[lmc].hbm_mc = true;
840 
841 			mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
842 			if (!I10NM_IS_HBM_IMC(mcmtr)) {
843 				iounmap(d->imc[lmc].mbase);
844 				d->imc[lmc].mbase = NULL;
845 				d->imc[lmc].hbm_mc = false;
846 				pci_dev_put(d->imc[lmc].mdev);
847 				d->imc[lmc].mdev = NULL;
848 
849 				i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
850 				return -ENODEV;
851 			}
852 
853 			lmc++;
854 		}
855 	}
856 
857 	return 0;
858 }
859 
860 static struct res_config i10nm_cfg0 = {
861 	.type			= I10NM,
862 	.decs_did		= 0x3452,
863 	.busno_cfg_offset	= 0xcc,
864 	.ddr_imc_num		= 4,
865 	.ddr_chan_num		= 2,
866 	.ddr_dimm_num		= 2,
867 	.ddr_chan_mmio_sz	= 0x4000,
868 	.sad_all_bdf		= {1, 29, 0},
869 	.pcu_cr3_bdf		= {1, 30, 3},
870 	.util_all_bdf		= {1, 29, 1},
871 	.uracu_bdf		= {0, 0, 1},
872 	.ddr_mdev_bdf		= {0, 12, 0},
873 	.hbm_mdev_bdf		= {0, 12, 1},
874 	.sad_all_offset		= 0x108,
875 	.offsets_scrub		= offsets_scrub_icx,
876 	.offsets_demand		= offsets_demand_icx,
877 };
878 
879 static struct res_config i10nm_cfg1 = {
880 	.type			= I10NM,
881 	.decs_did		= 0x3452,
882 	.busno_cfg_offset	= 0xd0,
883 	.ddr_imc_num		= 4,
884 	.ddr_chan_num		= 2,
885 	.ddr_dimm_num		= 2,
886 	.ddr_chan_mmio_sz	= 0x4000,
887 	.sad_all_bdf		= {1, 29, 0},
888 	.pcu_cr3_bdf		= {1, 30, 3},
889 	.util_all_bdf		= {1, 29, 1},
890 	.uracu_bdf		= {0, 0, 1},
891 	.ddr_mdev_bdf		= {0, 12, 0},
892 	.hbm_mdev_bdf		= {0, 12, 1},
893 	.sad_all_offset		= 0x108,
894 	.offsets_scrub		= offsets_scrub_icx,
895 	.offsets_demand		= offsets_demand_icx,
896 };
897 
898 static struct res_config spr_cfg = {
899 	.type			= SPR,
900 	.decs_did		= 0x3252,
901 	.busno_cfg_offset	= 0xd0,
902 	.ddr_imc_num		= 4,
903 	.ddr_chan_num		= 2,
904 	.ddr_dimm_num		= 2,
905 	.hbm_imc_num		= 16,
906 	.hbm_chan_num		= 2,
907 	.hbm_dimm_num		= 1,
908 	.ddr_chan_mmio_sz	= 0x8000,
909 	.hbm_chan_mmio_sz	= 0x4000,
910 	.support_ddr5		= true,
911 	.sad_all_bdf		= {1, 10, 0},
912 	.pcu_cr3_bdf		= {1, 30, 3},
913 	.util_all_bdf		= {1, 29, 1},
914 	.uracu_bdf		= {0, 0, 1},
915 	.ddr_mdev_bdf		= {0, 12, 0},
916 	.hbm_mdev_bdf		= {0, 12, 1},
917 	.sad_all_offset		= 0x300,
918 	.offsets_scrub		= offsets_scrub_spr,
919 	.offsets_scrub_hbm0	= offsets_scrub_spr_hbm0,
920 	.offsets_scrub_hbm1	= offsets_scrub_spr_hbm1,
921 	.offsets_demand		= offsets_demand_spr,
922 	.offsets_demand2	= offsets_demand2_spr,
923 	.offsets_demand_hbm0	= offsets_demand_spr_hbm0,
924 	.offsets_demand_hbm1	= offsets_demand_spr_hbm1,
925 };
926 
927 static struct res_config gnr_cfg = {
928 	.type			= GNR,
929 	.decs_did		= 0x3252,
930 	.busno_cfg_offset	= 0xd0,
931 	.ddr_imc_num		= 12,
932 	.ddr_chan_num		= 1,
933 	.ddr_dimm_num		= 2,
934 	.ddr_chan_mmio_sz	= 0x4000,
935 	.support_ddr5		= true,
936 	.sad_all_bdf		= {0, 13, 0},
937 	.pcu_cr3_bdf		= {0, 5, 0},
938 	.util_all_bdf		= {0, 13, 1},
939 	.uracu_bdf		= {0, 0, 1},
940 	.ddr_mdev_bdf		= {0, 5, 1},
941 	.sad_all_offset		= 0x300,
942 };
943 
944 static const struct x86_cpu_id i10nm_cpuids[] = {
945 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D,	X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
946 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D,	X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
947 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
948 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
949 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D,		X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
950 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &spr_cfg),
951 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &spr_cfg),
952 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
953 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X,	X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
954 	{}
955 };
956 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
957 
i10nm_check_ecc(struct skx_imc * imc,int chan)958 static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
959 {
960 	u32 mcmtr;
961 
962 	mcmtr = I10NM_GET_MCMTR(imc, chan);
963 	edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
964 
965 	return !!GET_BITFIELD(mcmtr, 2, 2);
966 }
967 
i10nm_get_dimm_config(struct mem_ctl_info * mci,struct res_config * cfg)968 static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
969 				 struct res_config *cfg)
970 {
971 	struct skx_pvt *pvt = mci->pvt_info;
972 	struct skx_imc *imc = pvt->imc;
973 	u32 mtr, amap, mcddrtcfg = 0;
974 	struct dimm_info *dimm;
975 	int i, j, ndimms;
976 
977 	for (i = 0; i < imc->num_channels; i++) {
978 		if (!imc->mbase)
979 			continue;
980 
981 		ndimms = 0;
982 		amap = I10NM_GET_AMAP(imc, i);
983 
984 		if (res_cfg->type != GNR)
985 			mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
986 
987 		for (j = 0; j < imc->num_dimms; j++) {
988 			dimm = edac_get_dimm(mci, i, j, 0);
989 			mtr = I10NM_GET_DIMMMTR(imc, i, j);
990 			edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
991 				 mtr, mcddrtcfg, imc->mc, i, j);
992 
993 			if (IS_DIMM_PRESENT(mtr))
994 				ndimms += skx_get_dimm_info(mtr, 0, amap, dimm,
995 							    imc, i, j, cfg);
996 			else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
997 				ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
998 							      EDAC_MOD_STR);
999 		}
1000 		if (ndimms && !i10nm_check_ecc(imc, i)) {
1001 			i10nm_printk(KERN_ERR, "ECC is disabled on imc %d channel %d\n",
1002 				     imc->mc, i);
1003 			return -ENODEV;
1004 		}
1005 	}
1006 
1007 	return 0;
1008 }
1009 
1010 static struct notifier_block i10nm_mce_dec = {
1011 	.notifier_call	= skx_mce_check_error,
1012 	.priority	= MCE_PRIO_EDAC,
1013 };
1014 
1015 #ifdef CONFIG_EDAC_DEBUG
1016 /*
1017  * Debug feature.
1018  * Exercise the address decode logic by writing an address to
1019  * /sys/kernel/debug/edac/i10nm_test/addr.
1020  */
1021 static struct dentry *i10nm_test;
1022 
debugfs_u64_set(void * data,u64 val)1023 static int debugfs_u64_set(void *data, u64 val)
1024 {
1025 	struct mce m;
1026 
1027 	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1028 
1029 	memset(&m, 0, sizeof(m));
1030 	/* ADDRV + MemRd + Unknown channel */
1031 	m.status = MCI_STATUS_ADDRV + 0x90;
1032 	/* One corrected error */
1033 	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
1034 	m.addr = val;
1035 	skx_mce_check_error(NULL, 0, &m);
1036 
1037 	return 0;
1038 }
1039 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1040 
setup_i10nm_debug(void)1041 static void setup_i10nm_debug(void)
1042 {
1043 	i10nm_test = edac_debugfs_create_dir("i10nm_test");
1044 	if (!i10nm_test)
1045 		return;
1046 
1047 	if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
1048 				      NULL, &fops_u64_wo)) {
1049 		debugfs_remove(i10nm_test);
1050 		i10nm_test = NULL;
1051 	}
1052 }
1053 
teardown_i10nm_debug(void)1054 static void teardown_i10nm_debug(void)
1055 {
1056 	debugfs_remove_recursive(i10nm_test);
1057 }
1058 #else
setup_i10nm_debug(void)1059 static inline void setup_i10nm_debug(void) {}
teardown_i10nm_debug(void)1060 static inline void teardown_i10nm_debug(void) {}
1061 #endif /*CONFIG_EDAC_DEBUG*/
1062 
i10nm_init(void)1063 static int __init i10nm_init(void)
1064 {
1065 	u8 mc = 0, src_id = 0, node_id = 0;
1066 	const struct x86_cpu_id *id;
1067 	struct res_config *cfg;
1068 	const char *owner;
1069 	struct skx_dev *d;
1070 	int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
1071 	u64 tolm, tohm;
1072 	int imc_num;
1073 
1074 	edac_dbg(2, "\n");
1075 
1076 	if (ghes_get_devices())
1077 		return -EBUSY;
1078 
1079 	owner = edac_get_owner();
1080 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1081 		return -EBUSY;
1082 
1083 	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
1084 		return -ENODEV;
1085 
1086 	id = x86_match_cpu(i10nm_cpuids);
1087 	if (!id)
1088 		return -ENODEV;
1089 
1090 	cfg = (struct res_config *)id->driver_data;
1091 	res_cfg = cfg;
1092 
1093 	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
1094 	if (rc)
1095 		return rc;
1096 
1097 	rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list);
1098 	if (rc < 0)
1099 		goto fail;
1100 	if (rc == 0) {
1101 		i10nm_printk(KERN_ERR, "No memory controllers found\n");
1102 		return -ENODEV;
1103 	}
1104 
1105 	rc = i10nm_get_imc_num(cfg);
1106 	if (rc < 0)
1107 		goto fail;
1108 
1109 	mem_cfg_2lm = i10nm_check_2lm(cfg);
1110 	skx_set_mem_cfg(mem_cfg_2lm);
1111 
1112 	rc = i10nm_get_ddr_munits();
1113 
1114 	if (i10nm_get_hbm_munits() && rc)
1115 		goto fail;
1116 
1117 	imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num;
1118 
1119 	list_for_each_entry(d, i10nm_edac_list, list) {
1120 		rc = skx_get_src_id(d, 0xf8, &src_id);
1121 		if (rc < 0)
1122 			goto fail;
1123 
1124 		rc = skx_get_node_id(d, &node_id);
1125 		if (rc < 0)
1126 			goto fail;
1127 
1128 		edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
1129 		for (i = 0; i < imc_num; i++) {
1130 			if (!d->imc[i].mdev)
1131 				continue;
1132 
1133 			d->imc[i].mc  = mc++;
1134 			d->imc[i].lmc = i;
1135 			d->imc[i].src_id  = src_id;
1136 			d->imc[i].node_id = node_id;
1137 			if (d->imc[i].hbm_mc) {
1138 				d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
1139 				d->imc[i].num_channels = cfg->hbm_chan_num;
1140 				d->imc[i].num_dimms    = cfg->hbm_dimm_num;
1141 			} else {
1142 				d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
1143 				d->imc[i].num_channels = cfg->ddr_chan_num;
1144 				d->imc[i].num_dimms    = cfg->ddr_dimm_num;
1145 			}
1146 
1147 			rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
1148 					      "Intel_10nm Socket", EDAC_MOD_STR,
1149 					      i10nm_get_dimm_config, cfg);
1150 			if (rc < 0)
1151 				goto fail;
1152 		}
1153 	}
1154 
1155 	rc = skx_adxl_get();
1156 	if (rc)
1157 		goto fail;
1158 
1159 	opstate_init();
1160 	mce_register_decode_chain(&i10nm_mce_dec);
1161 	setup_i10nm_debug();
1162 
1163 	if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
1164 		skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
1165 		if (retry_rd_err_log == 2)
1166 			enable_retry_rd_err_log(true);
1167 	} else {
1168 		skx_set_decode(i10nm_mc_decode, NULL);
1169 	}
1170 
1171 	i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
1172 
1173 	return 0;
1174 fail:
1175 	skx_remove();
1176 	return rc;
1177 }
1178 
i10nm_exit(void)1179 static void __exit i10nm_exit(void)
1180 {
1181 	edac_dbg(2, "\n");
1182 
1183 	if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
1184 		skx_set_decode(NULL, NULL);
1185 		if (retry_rd_err_log == 2)
1186 			enable_retry_rd_err_log(false);
1187 	}
1188 
1189 	teardown_i10nm_debug();
1190 	mce_unregister_decode_chain(&i10nm_mce_dec);
1191 	skx_adxl_put();
1192 	skx_remove();
1193 }
1194 
1195 module_init(i10nm_init);
1196 module_exit(i10nm_exit);
1197 
set_decoding_via_mca(const char * buf,const struct kernel_param * kp)1198 static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp)
1199 {
1200 	unsigned long val;
1201 	int ret;
1202 
1203 	ret = kstrtoul(buf, 0, &val);
1204 
1205 	if (ret || val > 1)
1206 		return -EINVAL;
1207 
1208 	if (val && mem_cfg_2lm) {
1209 		i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n");
1210 		return -EIO;
1211 	}
1212 
1213 	ret = param_set_int(buf, kp);
1214 
1215 	return ret;
1216 }
1217 
1218 static const struct kernel_param_ops decoding_via_mca_param_ops = {
1219 	.set = set_decoding_via_mca,
1220 	.get = param_get_int,
1221 };
1222 
1223 module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644);
1224 MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable");
1225 
1226 module_param(retry_rd_err_log, int, 0444);
1227 MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
1228 
1229 MODULE_LICENSE("GPL v2");
1230 MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
1231