1 /*
2  * Copyright (c) 2020 Intel Corporation.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #define DT_DRV_COMPAT intel_ibecc
8 
9 #include <zephyr.h>
10 #include <device.h>
11 #include <drivers/pcie/pcie.h>
12 
13 #include <drivers/edac.h>
14 #include "ibecc.h"
15 
16 /**
17  * In the driver 64 bit registers are used and not all of then at the
18  * moment may be correctly logged.
19  */
20 #include <logging/log.h>
21 LOG_MODULE_REGISTER(edac_ibecc, CONFIG_EDAC_LOG_LEVEL);
22 
23 #define DEVICE_NODE DT_NODELABEL(ibecc)
24 #define PCI_HOST_BRIDGE PCIE_BDF(0, 0, 0)
25 
26 struct ibecc_data {
27 	mem_addr_t mchbar;
28 	edac_notify_callback_f cb;
29 	uint32_t error_type;
30 
31 	/* Error count */
32 	unsigned int errors_cor;
33 	unsigned int errors_uc;
34 };
35 
ibecc_write_reg64(const struct device * dev,uint16_t reg,uint64_t value)36 static void ibecc_write_reg64(const struct device *dev,
37 			      uint16_t reg, uint64_t value)
38 {
39 	struct ibecc_data *data = dev->data;
40 	mem_addr_t reg_addr = data->mchbar + reg;
41 
42 	sys_write64(value, reg_addr);
43 }
44 
ibecc_read_reg64(const struct device * dev,uint16_t reg)45 static uint64_t ibecc_read_reg64(const struct device *dev, uint16_t reg)
46 {
47 	struct ibecc_data *data = dev->data;
48 	mem_addr_t reg_addr = data->mchbar + reg;
49 
50 	return sys_read64(reg_addr);
51 }
52 
53 #if defined(CONFIG_EDAC_ERROR_INJECT)
ibecc_write_reg32(const struct device * dev,uint16_t reg,uint32_t value)54 static void ibecc_write_reg32(const struct device *dev,
55 			      uint16_t reg, uint32_t value)
56 {
57 	struct ibecc_data *data = dev->data;
58 	mem_addr_t reg_addr = data->mchbar + reg;
59 
60 	sys_write32(value, reg_addr);
61 }
62 #endif
63 
ibecc_enabled(const pcie_bdf_t bdf)64 static bool ibecc_enabled(const pcie_bdf_t bdf)
65 {
66 	return !!(pcie_conf_read(bdf, CAPID0_C_REG) & CAPID0_C_IBECC_ENABLED);
67 }
68 
ibecc_errcmd_setup(const pcie_bdf_t bdf,bool enable)69 static void ibecc_errcmd_setup(const pcie_bdf_t bdf, bool enable)
70 {
71 	uint32_t errcmd;
72 
73 	errcmd = pcie_conf_read(bdf, ERRCMD_REG);
74 
75 	if (enable) {
76 		errcmd |= (ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
77 	} else {
78 		errcmd &= ~(ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
79 	}
80 
81 	pcie_conf_write(bdf, ERRCMD_REG, errcmd);
82 }
83 
ibecc_errsts_clear(const pcie_bdf_t bdf)84 static void ibecc_errsts_clear(const pcie_bdf_t bdf)
85 {
86 	uint32_t errsts;
87 
88 	errsts = pcie_conf_read(bdf, ERRSTS_REG);
89 
90 	if ((errsts & (ERRSTS_IBECC_COR | ERRSTS_IBECC_UC)) == 0) {
91 		return;
92 	}
93 
94 	pcie_conf_write(bdf, ERRSTS_REG, errsts);
95 }
96 
parse_ecclog(const struct device * dev,const uint64_t ecclog,struct ibecc_error * error_data)97 static void parse_ecclog(const struct device *dev, const uint64_t ecclog,
98 			 struct ibecc_error *error_data)
99 {
100 	struct ibecc_data *data = dev->data;
101 
102 	if (ecclog == 0) {
103 		return;
104 	}
105 
106 	error_data->type = ECC_ERROR_ERRTYPE(ecclog);
107 	error_data->address = ECC_ERROR_ERRADD(ecclog);
108 	error_data->syndrome = ECC_ERROR_ERRSYND(ecclog);
109 
110 	if ((ecclog & ECC_ERROR_MERRSTS) != 0) {
111 		data->errors_uc++;
112 	}
113 
114 	if ((ecclog & ECC_ERROR_CERRSTS) != 0) {
115 		data->errors_cor++;
116 	}
117 }
118 
119 #if defined(CONFIG_EDAC_ERROR_INJECT)
inject_set_param1(const struct device * dev,uint64_t addr)120 static int inject_set_param1(const struct device *dev, uint64_t addr)
121 {
122 	if ((addr & ~INJ_ADDR_BASE_MASK) != 0) {
123 		return -EINVAL;
124 	}
125 
126 	ibecc_write_reg64(dev, IBECC_INJ_ADDR_BASE, addr);
127 
128 	return 0;
129 }
130 
inject_get_param1(const struct device * dev,uint64_t * value)131 static int inject_get_param1(const struct device *dev, uint64_t *value)
132 {
133 	*value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_BASE);
134 
135 	return 0;
136 }
137 
inject_set_param2(const struct device * dev,uint64_t mask)138 static int inject_set_param2(const struct device *dev, uint64_t mask)
139 {
140 	if ((mask & ~INJ_ADDR_BASE_MASK_MASK) != 0) {
141 		return -EINVAL;
142 	}
143 
144 	ibecc_write_reg64(dev, IBECC_INJ_ADDR_MASK, mask);
145 
146 	return 0;
147 }
148 
inject_get_param2(const struct device * dev,uint64_t * value)149 static int inject_get_param2(const struct device *dev, uint64_t *value)
150 {
151 	*value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_MASK);
152 
153 	return 0;
154 }
155 
inject_set_error_type(const struct device * dev,uint32_t error_type)156 static int inject_set_error_type(const struct device *dev,
157 				 uint32_t error_type)
158 {
159 	struct ibecc_data *data = dev->data;
160 
161 	data->error_type = error_type;
162 
163 	return 0;
164 }
165 
inject_get_error_type(const struct device * dev,uint32_t * error_type)166 static int inject_get_error_type(const struct device *dev,
167 				      uint32_t *error_type)
168 {
169 	struct ibecc_data *data = dev->data;
170 
171 	*error_type = data->error_type;
172 
173 	return 0;
174 }
175 
inject_error_trigger(const struct device * dev)176 static int inject_error_trigger(const struct device *dev)
177 {
178 	struct ibecc_data *data = dev->data;
179 	uint32_t ctrl = 0;
180 
181 	switch (data->error_type) {
182 	case EDAC_ERROR_TYPE_DRAM_COR:
183 		ctrl |= INJ_CTRL_COR;
184 		break;
185 	case EDAC_ERROR_TYPE_DRAM_UC:
186 		ctrl |= INJ_CTRL_UC;
187 		break;
188 	default:
189 		/* This would clear error injection */
190 		break;
191 	}
192 
193 	ibecc_write_reg32(dev, IBECC_INJ_ADDR_CTRL, ctrl);
194 
195 	return 0;
196 }
197 #endif /* CONFIG_EDAC_ERROR_INJECT */
198 
ecc_error_log_get(const struct device * dev,uint64_t * value)199 static int ecc_error_log_get(const struct device *dev, uint64_t *value)
200 {
201 	*value = ibecc_read_reg64(dev, IBECC_ECC_ERROR_LOG);
202 
203 	return 0;
204 }
205 
ecc_error_log_clear(const struct device * dev)206 static int ecc_error_log_clear(const struct device *dev)
207 {
208 	/* Clear all error bits */
209 	ibecc_write_reg64(dev, IBECC_ECC_ERROR_LOG,
210 			  ECC_ERROR_MERRSTS | ECC_ERROR_CERRSTS);
211 
212 	return 0;
213 }
214 
parity_error_log_get(const struct device * dev,uint64_t * value)215 static int parity_error_log_get(const struct device *dev, uint64_t *value)
216 {
217 	*value = ibecc_read_reg64(dev, IBECC_PARITY_ERROR_LOG);
218 
219 	return 0;
220 }
221 
parity_error_log_clear(const struct device * dev)222 static int parity_error_log_clear(const struct device *dev)
223 {
224 	ibecc_write_reg64(dev, IBECC_PARITY_ERROR_LOG, PARITY_ERROR_ERRSTS);
225 
226 	return 0;
227 }
228 
errors_cor_get(const struct device * dev)229 static int errors_cor_get(const struct device *dev)
230 {
231 	struct ibecc_data *data = dev->data;
232 
233 	return data->errors_cor;
234 }
235 
errors_uc_get(const struct device * dev)236 static int errors_uc_get(const struct device *dev)
237 {
238 	struct ibecc_data *data = dev->data;
239 
240 	return data->errors_uc;
241 }
242 
notify_callback_set(const struct device * dev,edac_notify_callback_f cb)243 static int notify_callback_set(const struct device *dev,
244 			       edac_notify_callback_f cb)
245 {
246 	struct ibecc_data *data = dev->data;
247 	int key = irq_lock();
248 
249 	data->cb = cb;
250 	irq_unlock(key);
251 
252 	return 0;
253 }
254 
255 static const struct edac_driver_api api = {
256 #if defined(CONFIG_EDAC_ERROR_INJECT)
257 	/* Error Injection functions */
258 	.inject_set_param1 = inject_set_param1,
259 	.inject_get_param1 = inject_get_param1,
260 	.inject_set_param2 = inject_set_param2,
261 	.inject_get_param2 = inject_get_param2,
262 	.inject_set_error_type = inject_set_error_type,
263 	.inject_get_error_type = inject_get_error_type,
264 	.inject_error_trigger = inject_error_trigger,
265 #endif /* CONFIG_EDAC_ERROR_INJECT */
266 
267 	/* Error reporting & clearing functions */
268 	.ecc_error_log_get = ecc_error_log_get,
269 	.ecc_error_log_clear = ecc_error_log_clear,
270 	.parity_error_log_get = parity_error_log_get,
271 	.parity_error_log_clear = parity_error_log_clear,
272 
273 	/* Get error stats */
274 	.errors_cor_get = errors_cor_get,
275 	.errors_uc_get = errors_uc_get,
276 
277 	/* Notification callback set */
278 	.notify_cb_set = notify_callback_set,
279 };
280 
edac_ibecc_init(const struct device * dev)281 int edac_ibecc_init(const struct device *dev)
282 {
283 	const pcie_bdf_t bdf = PCI_HOST_BRIDGE;
284 	struct ibecc_data *data = dev->data;
285 	uint64_t mchbar;
286 	uint32_t conf_data;
287 
288 	conf_data = pcie_conf_read(bdf, PCIE_CONF_ID);
289 	switch (conf_data) {
290 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU5):
291 		__fallthrough;
292 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU6):
293 		__fallthrough;
294 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU7):
295 		__fallthrough;
296 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU8):
297 		__fallthrough;
298 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU9):
299 		__fallthrough;
300 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU10):
301 		__fallthrough;
302 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU11):
303 		__fallthrough;
304 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU12):
305 		__fallthrough;
306 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU13):
307 		__fallthrough;
308 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU14):
309 		__fallthrough;
310 	case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU15):
311 		break;
312 	default:
313 		LOG_ERR("PCI Probe failed");
314 		return -ENODEV;
315 	}
316 
317 	if (!ibecc_enabled(bdf)) {
318 		LOG_ERR("IBECC is not enabled");
319 		return -ENODEV;
320 	}
321 
322 	mchbar = pcie_conf_read(bdf, MCHBAR_REG);
323 	mchbar |= (uint64_t)pcie_conf_read(bdf, MCHBAR_REG + 1) << 32;
324 
325 	/* Check that MCHBAR is enabled */
326 	if ((mchbar & MCHBAR_ENABLE) == 0) {
327 		LOG_ERR("MCHBAR is not enabled");
328 		return -ENODEV;
329 	}
330 
331 	mchbar &= MCHBAR_MASK;
332 
333 	device_map(&data->mchbar, mchbar, MCH_SIZE, K_MEM_CACHE_NONE);
334 
335 	/* Enable Host Bridge generated SERR event */
336 	ibecc_errcmd_setup(bdf, true);
337 
338 	return 0;
339 }
340 
341 static struct ibecc_data ibecc_data;
342 
343 DEVICE_DT_DEFINE(DEVICE_NODE, &edac_ibecc_init,
344 		 NULL, &ibecc_data, NULL, POST_KERNEL,
345 		 CONFIG_KERNEL_INIT_PRIORITY_DEVICE, &api);
346 
347 /**
348  * An IBECC error causes SERR_NMI_STS set and is indicated by
349  * ERRSTS PCI registers by IBECC_UC and IBECC_COR fields.
350  * Following needs to be done:
351  *  - Read ECC_ERR_LOG register
352  *  - Clear IBECC_UC and IBECC_COR fields of ERRSTS PCI
353  *  - Clear MERRSTS & CERRSTS fields of ECC_ERR_LOG register
354  */
355 
356 static struct k_spinlock nmi_lock;
357 
358 /* NMI handling */
359 
handle_nmi(void)360 static bool handle_nmi(void)
361 {
362 	uint8_t status;
363 
364 	status = sys_in8(NMI_STS_CNT_REG);
365 	if ((status & NMI_STS_SRC_SERR) == 0) {
366 		/* For other NMI sources return false to handle it by
367 		 * Zephyr exception handler
368 		 */
369 		return false;
370 	}
371 
372 	/* Re-enable SERR# NMI sources */
373 
374 	status = (status & NMI_STS_MASK_EN) | NMI_STS_SERR_EN;
375 	sys_out8(status, NMI_STS_CNT_REG);
376 
377 	status &= ~NMI_STS_SERR_EN;
378 	sys_out8(status, NMI_STS_CNT_REG);
379 
380 	return true;
381 }
382 
z_x86_do_kernel_nmi(const z_arch_esf_t * esf)383 bool z_x86_do_kernel_nmi(const z_arch_esf_t *esf)
384 {
385 	const struct device *dev = DEVICE_DT_GET(DEVICE_NODE);
386 	struct ibecc_data *data = dev->data;
387 	struct ibecc_error error_data;
388 	k_spinlock_key_t key;
389 	bool ret = true;
390 	uint64_t ecclog;
391 
392 	key = k_spin_lock(&nmi_lock);
393 
394 	/* Skip the same NMI handling for other cores and return handled */
395 	if (arch_curr_cpu()->id != 0) {
396 		ret = true;
397 		goto out;
398 	}
399 
400 	if (!handle_nmi()) {
401 		/* Indicate that we do not handle this NMI */
402 		ret = false;
403 		goto out;
404 	}
405 
406 	if (edac_ecc_error_log_get(dev, &ecclog) != 0) {
407 		goto out;
408 	}
409 
410 	parse_ecclog(dev, ecclog, &error_data);
411 
412 	if (data->cb != NULL) {
413 		data->cb(dev, &error_data);
414 	}
415 
416 	edac_ecc_error_log_clear(dev);
417 
418 	ibecc_errsts_clear(PCI_HOST_BRIDGE);
419 
420 out:
421 	k_spin_unlock(&nmi_lock, key);
422 
423 	return ret;
424 }
425