1 /*
2  * Copyright (c) 2020 Intel Corporation.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #define DT_DRV_COMPAT intel_ibecc
8 
9 #include <zephyr/kernel.h>
10 #include <zephyr/device.h>
11 #include <zephyr/drivers/pcie/pcie.h>
12 
13 #include <zephyr/drivers/edac.h>
14 #include "ibecc.h"
15 
16 #include <zephyr/logging/log.h>
17 LOG_MODULE_REGISTER(edac_ibecc, CONFIG_EDAC_LOG_LEVEL);
18 
19 #define DEVICE_NODE DT_NODELABEL(ibecc)
20 
21 struct ibecc_data {
22 	mem_addr_t mchbar;
23 	edac_notify_callback_f cb;
24 	uint32_t error_type;
25 
26 	/* Error count */
27 	unsigned int errors_cor;
28 	unsigned int errors_uc;
29 };
30 
ibecc_write_reg64(const struct device * dev,uint16_t reg,uint64_t value)31 static void ibecc_write_reg64(const struct device *dev,
32 			      uint16_t reg, uint64_t value)
33 {
34 	struct ibecc_data *data = dev->data;
35 	mem_addr_t reg_addr = data->mchbar + reg;
36 
37 	sys_write64(value, reg_addr);
38 }
39 
ibecc_read_reg64(const struct device * dev,uint16_t reg)40 static uint64_t ibecc_read_reg64(const struct device *dev, uint16_t reg)
41 {
42 	struct ibecc_data *data = dev->data;
43 	mem_addr_t reg_addr = data->mchbar + reg;
44 
45 	return sys_read64(reg_addr);
46 }
47 
48 #if defined(CONFIG_EDAC_ERROR_INJECT)
ibecc_write_reg32(const struct device * dev,uint16_t reg,uint32_t value)49 static void ibecc_write_reg32(const struct device *dev,
50 			      uint16_t reg, uint32_t value)
51 {
52 	struct ibecc_data *data = dev->data;
53 	mem_addr_t reg_addr = data->mchbar + reg;
54 
55 	sys_write32(value, reg_addr);
56 }
57 #endif
58 
ibecc_enabled(const pcie_bdf_t bdf)59 static bool ibecc_enabled(const pcie_bdf_t bdf)
60 {
61 	return !!(pcie_conf_read(bdf, CAPID0_C_REG) & CAPID0_C_IBECC_ENABLED);
62 }
63 
ibecc_errcmd_setup(const pcie_bdf_t bdf,bool enable)64 static void ibecc_errcmd_setup(const pcie_bdf_t bdf, bool enable)
65 {
66 	uint32_t errcmd;
67 
68 	errcmd = pcie_conf_read(bdf, ERRCMD_REG);
69 
70 	if (enable) {
71 		errcmd |= (ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
72 	} else {
73 		errcmd &= ~(ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
74 	}
75 
76 	pcie_conf_write(bdf, ERRCMD_REG, errcmd);
77 }
78 
ibecc_errsts_clear(const pcie_bdf_t bdf)79 static void ibecc_errsts_clear(const pcie_bdf_t bdf)
80 {
81 	uint32_t errsts;
82 
83 	errsts = pcie_conf_read(bdf, ERRSTS_REG);
84 
85 	if ((errsts & (ERRSTS_IBECC_COR | ERRSTS_IBECC_UC)) == 0) {
86 		return;
87 	}
88 
89 	pcie_conf_write(bdf, ERRSTS_REG, errsts);
90 }
91 
parse_ecclog(const struct device * dev,const uint64_t ecclog,struct ibecc_error * error_data)92 static void parse_ecclog(const struct device *dev, const uint64_t ecclog,
93 			 struct ibecc_error *error_data)
94 {
95 	struct ibecc_data *data = dev->data;
96 
97 	if (ecclog == 0) {
98 		return;
99 	}
100 
101 	error_data->type = ECC_ERROR_ERRTYPE(ecclog);
102 	error_data->address = ECC_ERROR_ERRADD(ecclog);
103 	error_data->syndrome = ECC_ERROR_ERRSYND(ecclog);
104 
105 	if ((ecclog & ECC_ERROR_MERRSTS) != 0) {
106 		data->errors_uc++;
107 	}
108 
109 	if ((ecclog & ECC_ERROR_CERRSTS) != 0) {
110 		data->errors_cor++;
111 	}
112 }
113 
114 #if defined(CONFIG_EDAC_ERROR_INJECT)
inject_set_param1(const struct device * dev,uint64_t addr)115 static int inject_set_param1(const struct device *dev, uint64_t addr)
116 {
117 	if ((addr & ~INJ_ADDR_BASE_MASK) != 0) {
118 		return -EINVAL;
119 	}
120 
121 	ibecc_write_reg64(dev, IBECC_INJ_ADDR_BASE, addr);
122 
123 	return 0;
124 }
125 
inject_get_param1(const struct device * dev,uint64_t * value)126 static int inject_get_param1(const struct device *dev, uint64_t *value)
127 {
128 	*value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_BASE);
129 
130 	return 0;
131 }
132 
inject_set_param2(const struct device * dev,uint64_t mask)133 static int inject_set_param2(const struct device *dev, uint64_t mask)
134 {
135 	if ((mask & ~INJ_ADDR_BASE_MASK_MASK) != 0) {
136 		return -EINVAL;
137 	}
138 
139 	ibecc_write_reg64(dev, IBECC_INJ_ADDR_MASK, mask);
140 
141 	return 0;
142 }
143 
inject_get_param2(const struct device * dev,uint64_t * value)144 static int inject_get_param2(const struct device *dev, uint64_t *value)
145 {
146 	*value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_MASK);
147 
148 	return 0;
149 }
150 
inject_set_error_type(const struct device * dev,uint32_t error_type)151 static int inject_set_error_type(const struct device *dev,
152 				 uint32_t error_type)
153 {
154 	struct ibecc_data *data = dev->data;
155 
156 	data->error_type = error_type;
157 
158 	return 0;
159 }
160 
inject_get_error_type(const struct device * dev,uint32_t * error_type)161 static int inject_get_error_type(const struct device *dev,
162 				      uint32_t *error_type)
163 {
164 	struct ibecc_data *data = dev->data;
165 
166 	*error_type = data->error_type;
167 
168 	return 0;
169 }
170 
inject_error_trigger(const struct device * dev)171 static int inject_error_trigger(const struct device *dev)
172 {
173 	struct ibecc_data *data = dev->data;
174 	uint32_t ctrl = 0;
175 
176 	switch (data->error_type) {
177 	case EDAC_ERROR_TYPE_DRAM_COR:
178 		ctrl |= INJ_CTRL_COR;
179 		break;
180 	case EDAC_ERROR_TYPE_DRAM_UC:
181 		ctrl |= INJ_CTRL_UC;
182 		break;
183 	default:
184 		/* This would clear error injection */
185 		break;
186 	}
187 
188 	ibecc_write_reg32(dev, IBECC_INJ_ADDR_CTRL, ctrl);
189 
190 	return 0;
191 }
192 #endif /* CONFIG_EDAC_ERROR_INJECT */
193 
ecc_error_log_get(const struct device * dev,uint64_t * value)194 static int ecc_error_log_get(const struct device *dev, uint64_t *value)
195 {
196 	*value = ibecc_read_reg64(dev, IBECC_ECC_ERROR_LOG);
197 	/**
198 	 * The ECC Error log register is only valid when ECC_ERROR_CERRSTS
199 	 * or ECC_ERROR_MERRSTS error status bits are set
200 	 */
201 	if ((*value & (ECC_ERROR_MERRSTS | ECC_ERROR_CERRSTS)) == 0) {
202 		return -ENODATA;
203 	}
204 
205 	return 0;
206 }
207 
ecc_error_log_clear(const struct device * dev)208 static int ecc_error_log_clear(const struct device *dev)
209 {
210 	/* Clear all error bits */
211 	ibecc_write_reg64(dev, IBECC_ECC_ERROR_LOG,
212 			  ECC_ERROR_MERRSTS | ECC_ERROR_CERRSTS);
213 
214 	return 0;
215 }
216 
parity_error_log_get(const struct device * dev,uint64_t * value)217 static int parity_error_log_get(const struct device *dev, uint64_t *value)
218 {
219 	*value = ibecc_read_reg64(dev, IBECC_PARITY_ERROR_LOG);
220 	if (*value == 0) {
221 		return -ENODATA;
222 	}
223 
224 	return 0;
225 }
226 
parity_error_log_clear(const struct device * dev)227 static int parity_error_log_clear(const struct device *dev)
228 {
229 	ibecc_write_reg64(dev, IBECC_PARITY_ERROR_LOG, PARITY_ERROR_ERRSTS);
230 
231 	return 0;
232 }
233 
errors_cor_get(const struct device * dev)234 static int errors_cor_get(const struct device *dev)
235 {
236 	struct ibecc_data *data = dev->data;
237 
238 	return data->errors_cor;
239 }
240 
errors_uc_get(const struct device * dev)241 static int errors_uc_get(const struct device *dev)
242 {
243 	struct ibecc_data *data = dev->data;
244 
245 	return data->errors_uc;
246 }
247 
notify_callback_set(const struct device * dev,edac_notify_callback_f cb)248 static int notify_callback_set(const struct device *dev,
249 			       edac_notify_callback_f cb)
250 {
251 	struct ibecc_data *data = dev->data;
252 	unsigned int key = irq_lock();
253 
254 	data->cb = cb;
255 	irq_unlock(key);
256 
257 	return 0;
258 }
259 
260 static DEVICE_API(edac, api) = {
261 #if defined(CONFIG_EDAC_ERROR_INJECT)
262 	/* Error Injection functions */
263 	.inject_set_param1 = inject_set_param1,
264 	.inject_get_param1 = inject_get_param1,
265 	.inject_set_param2 = inject_set_param2,
266 	.inject_get_param2 = inject_get_param2,
267 	.inject_set_error_type = inject_set_error_type,
268 	.inject_get_error_type = inject_get_error_type,
269 	.inject_error_trigger = inject_error_trigger,
270 #endif /* CONFIG_EDAC_ERROR_INJECT */
271 
272 	/* Error reporting & clearing functions */
273 	.ecc_error_log_get = ecc_error_log_get,
274 	.ecc_error_log_clear = ecc_error_log_clear,
275 	.parity_error_log_get = parity_error_log_get,
276 	.parity_error_log_clear = parity_error_log_clear,
277 
278 	/* Get error stats */
279 	.errors_cor_get = errors_cor_get,
280 	.errors_uc_get = errors_uc_get,
281 
282 	/* Notification callback set */
283 	.notify_cb_set = notify_callback_set,
284 };
285 
edac_ibecc_init(const struct device * dev)286 static int edac_ibecc_init(const struct device *dev)
287 {
288 	const pcie_bdf_t bdf = PCI_HOST_BRIDGE;
289 	struct ibecc_data *data = dev->data;
290 	uint64_t mchbar;
291 	uint32_t conf_data;
292 
293 	conf_data = pcie_conf_read(bdf, PCIE_CONF_ID);
294 	switch (conf_data) {
295 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU5):
296 		__fallthrough;
297 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU6):
298 		__fallthrough;
299 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU7):
300 		__fallthrough;
301 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU8):
302 		__fallthrough;
303 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU9):
304 		__fallthrough;
305 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU10):
306 		__fallthrough;
307 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU11):
308 		__fallthrough;
309 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU12):
310 		__fallthrough;
311 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU13):
312 		__fallthrough;
313 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU14):
314 		__fallthrough;
315 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU15):
316 		break;
317 	default:
318 		LOG_ERR("PCI Probe failed"); /* LCOV_EXCL_BR_LINE */
319 		return -ENODEV;
320 	}
321 
322 	if (!ibecc_enabled(bdf)) {
323 		LOG_ERR("IBECC is not enabled"); /* LCOV_EXCL_BR_LINE */
324 		return -ENODEV;
325 	}
326 
327 	mchbar = pcie_conf_read(bdf, MCHBAR_REG);
328 	mchbar |= (uint64_t)pcie_conf_read(bdf, MCHBAR_REG + 1) << 32;
329 
330 	/* Check that MCHBAR is enabled */
331 	if ((mchbar & MCHBAR_ENABLE) == 0) {
332 		LOG_ERR("MCHBAR is not enabled"); /* LCOV_EXCL_BR_LINE */
333 		return -ENODEV;
334 	}
335 
336 	mchbar &= MCHBAR_MASK;
337 
338 	device_map(&data->mchbar, mchbar, MCH_SIZE, K_MEM_CACHE_NONE);
339 
340 	/* Enable Host Bridge generated SERR event */
341 	ibecc_errcmd_setup(bdf, true);
342 
343 	LOG_INF("IBECC driver initialized"); /* LCOV_EXCL_BR_LINE */
344 
345 	return 0;
346 }
347 
348 static struct ibecc_data ibecc_data;
349 
350 DEVICE_DT_DEFINE(DEVICE_NODE, &edac_ibecc_init,
351 		 NULL, &ibecc_data, NULL, POST_KERNEL,
352 		 CONFIG_KERNEL_INIT_PRIORITY_DEVICE, &api);
353 
354 /**
355  * An IBECC error causes SERR_NMI_STS set and is indicated by
356  * ERRSTS PCI registers by IBECC_UC and IBECC_COR fields.
357  * Following needs to be done:
358  *  - Read ECC_ERR_LOG register
359  *  - Clear IBECC_UC and IBECC_COR fields of ERRSTS PCI
360  *  - Clear MERRSTS & CERRSTS fields of ECC_ERR_LOG register
361  */
362 
363 static struct k_spinlock nmi_lock;
364 
365 /* NMI handling */
366 
handle_nmi(void)367 static bool handle_nmi(void)
368 {
369 	uint8_t status;
370 
371 	status = sys_in8(NMI_STS_CNT_REG);
372 	if ((status & NMI_STS_SRC_SERR) == 0) {
373 		/* For other NMI sources return false to handle it by
374 		 * Zephyr exception handler
375 		 */
376 		return false;
377 	}
378 
379 	/* Re-enable SERR# NMI sources */
380 
381 	status = (status & NMI_STS_MASK_EN) | NMI_STS_SERR_EN;
382 	sys_out8(status, NMI_STS_CNT_REG);
383 
384 	status &= ~NMI_STS_SERR_EN;
385 	sys_out8(status, NMI_STS_CNT_REG);
386 
387 	return true;
388 }
389 
z_x86_do_kernel_nmi(const struct arch_esf * esf)390 bool z_x86_do_kernel_nmi(const struct arch_esf *esf)
391 {
392 	const struct device *const dev = DEVICE_DT_GET(DEVICE_NODE);
393 	struct ibecc_data *data = dev->data;
394 	struct ibecc_error error_data;
395 	k_spinlock_key_t key;
396 	bool ret = true;
397 	uint64_t ecclog;
398 
399 	key = k_spin_lock(&nmi_lock);
400 
401 	/* Skip the same NMI handling for other cores and return handled */
402 	if (arch_curr_cpu()->id != 0) {
403 		ret = true;
404 		goto out;
405 	}
406 
407 	if (!handle_nmi()) {
408 		/* Indicate that we do not handle this NMI */
409 		ret = false;
410 		goto out;
411 	}
412 
413 	if (edac_ecc_error_log_get(dev, &ecclog) != 0) {
414 		goto out;
415 	}
416 
417 	parse_ecclog(dev, ecclog, &error_data);
418 
419 	if (data->cb != NULL) {
420 		data->cb(dev, &error_data);
421 	}
422 
423 	edac_ecc_error_log_clear(dev);
424 
425 	ibecc_errsts_clear(PCI_HOST_BRIDGE);
426 
427 out:
428 	k_spin_unlock(&nmi_lock, key);
429 
430 	return ret;
431 }
432