1 /*
2 * Copyright (c) 2020 Intel Corporation.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #define DT_DRV_COMPAT intel_ibecc
8
9 #include <zephyr/kernel.h>
10 #include <zephyr/device.h>
11 #include <zephyr/drivers/pcie/pcie.h>
12
13 #include <zephyr/drivers/edac.h>
14 #include "ibecc.h"
15
16 #include <zephyr/logging/log.h>
17 LOG_MODULE_REGISTER(edac_ibecc, CONFIG_EDAC_LOG_LEVEL);
18
19 #define DEVICE_NODE DT_NODELABEL(ibecc)
20
21 struct ibecc_data {
22 mem_addr_t mchbar;
23 edac_notify_callback_f cb;
24 uint32_t error_type;
25
26 /* Error count */
27 unsigned int errors_cor;
28 unsigned int errors_uc;
29 };
30
ibecc_write_reg64(const struct device * dev,uint16_t reg,uint64_t value)31 static void ibecc_write_reg64(const struct device *dev,
32 uint16_t reg, uint64_t value)
33 {
34 struct ibecc_data *data = dev->data;
35 mem_addr_t reg_addr = data->mchbar + reg;
36
37 sys_write64(value, reg_addr);
38 }
39
ibecc_read_reg64(const struct device * dev,uint16_t reg)40 static uint64_t ibecc_read_reg64(const struct device *dev, uint16_t reg)
41 {
42 struct ibecc_data *data = dev->data;
43 mem_addr_t reg_addr = data->mchbar + reg;
44
45 return sys_read64(reg_addr);
46 }
47
48 #if defined(CONFIG_EDAC_ERROR_INJECT)
ibecc_write_reg32(const struct device * dev,uint16_t reg,uint32_t value)49 static void ibecc_write_reg32(const struct device *dev,
50 uint16_t reg, uint32_t value)
51 {
52 struct ibecc_data *data = dev->data;
53 mem_addr_t reg_addr = data->mchbar + reg;
54
55 sys_write32(value, reg_addr);
56 }
57 #endif
58
ibecc_enabled(const pcie_bdf_t bdf)59 static bool ibecc_enabled(const pcie_bdf_t bdf)
60 {
61 return !!(pcie_conf_read(bdf, CAPID0_C_REG) & CAPID0_C_IBECC_ENABLED);
62 }
63
ibecc_errcmd_setup(const pcie_bdf_t bdf,bool enable)64 static void ibecc_errcmd_setup(const pcie_bdf_t bdf, bool enable)
65 {
66 uint32_t errcmd;
67
68 errcmd = pcie_conf_read(bdf, ERRCMD_REG);
69
70 if (enable) {
71 errcmd |= (ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
72 } else {
73 errcmd &= ~(ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
74 }
75
76 pcie_conf_write(bdf, ERRCMD_REG, errcmd);
77 }
78
ibecc_errsts_clear(const pcie_bdf_t bdf)79 static void ibecc_errsts_clear(const pcie_bdf_t bdf)
80 {
81 uint32_t errsts;
82
83 errsts = pcie_conf_read(bdf, ERRSTS_REG);
84
85 if ((errsts & (ERRSTS_IBECC_COR | ERRSTS_IBECC_UC)) == 0) {
86 return;
87 }
88
89 pcie_conf_write(bdf, ERRSTS_REG, errsts);
90 }
91
parse_ecclog(const struct device * dev,const uint64_t ecclog,struct ibecc_error * error_data)92 static void parse_ecclog(const struct device *dev, const uint64_t ecclog,
93 struct ibecc_error *error_data)
94 {
95 struct ibecc_data *data = dev->data;
96
97 if (ecclog == 0) {
98 return;
99 }
100
101 error_data->type = ECC_ERROR_ERRTYPE(ecclog);
102 error_data->address = ECC_ERROR_ERRADD(ecclog);
103 error_data->syndrome = ECC_ERROR_ERRSYND(ecclog);
104
105 if ((ecclog & ECC_ERROR_MERRSTS) != 0) {
106 data->errors_uc++;
107 }
108
109 if ((ecclog & ECC_ERROR_CERRSTS) != 0) {
110 data->errors_cor++;
111 }
112 }
113
114 #if defined(CONFIG_EDAC_ERROR_INJECT)
inject_set_param1(const struct device * dev,uint64_t addr)115 static int inject_set_param1(const struct device *dev, uint64_t addr)
116 {
117 if ((addr & ~INJ_ADDR_BASE_MASK) != 0) {
118 return -EINVAL;
119 }
120
121 ibecc_write_reg64(dev, IBECC_INJ_ADDR_BASE, addr);
122
123 return 0;
124 }
125
inject_get_param1(const struct device * dev,uint64_t * value)126 static int inject_get_param1(const struct device *dev, uint64_t *value)
127 {
128 *value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_BASE);
129
130 return 0;
131 }
132
inject_set_param2(const struct device * dev,uint64_t mask)133 static int inject_set_param2(const struct device *dev, uint64_t mask)
134 {
135 if ((mask & ~INJ_ADDR_BASE_MASK_MASK) != 0) {
136 return -EINVAL;
137 }
138
139 ibecc_write_reg64(dev, IBECC_INJ_ADDR_MASK, mask);
140
141 return 0;
142 }
143
inject_get_param2(const struct device * dev,uint64_t * value)144 static int inject_get_param2(const struct device *dev, uint64_t *value)
145 {
146 *value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_MASK);
147
148 return 0;
149 }
150
inject_set_error_type(const struct device * dev,uint32_t error_type)151 static int inject_set_error_type(const struct device *dev,
152 uint32_t error_type)
153 {
154 struct ibecc_data *data = dev->data;
155
156 data->error_type = error_type;
157
158 return 0;
159 }
160
inject_get_error_type(const struct device * dev,uint32_t * error_type)161 static int inject_get_error_type(const struct device *dev,
162 uint32_t *error_type)
163 {
164 struct ibecc_data *data = dev->data;
165
166 *error_type = data->error_type;
167
168 return 0;
169 }
170
inject_error_trigger(const struct device * dev)171 static int inject_error_trigger(const struct device *dev)
172 {
173 struct ibecc_data *data = dev->data;
174 uint32_t ctrl = 0;
175
176 switch (data->error_type) {
177 case EDAC_ERROR_TYPE_DRAM_COR:
178 ctrl |= INJ_CTRL_COR;
179 break;
180 case EDAC_ERROR_TYPE_DRAM_UC:
181 ctrl |= INJ_CTRL_UC;
182 break;
183 default:
184 /* This would clear error injection */
185 break;
186 }
187
188 ibecc_write_reg32(dev, IBECC_INJ_ADDR_CTRL, ctrl);
189
190 return 0;
191 }
192 #endif /* CONFIG_EDAC_ERROR_INJECT */
193
ecc_error_log_get(const struct device * dev,uint64_t * value)194 static int ecc_error_log_get(const struct device *dev, uint64_t *value)
195 {
196 *value = ibecc_read_reg64(dev, IBECC_ECC_ERROR_LOG);
197 /**
198 * The ECC Error log register is only valid when ECC_ERROR_CERRSTS
199 * or ECC_ERROR_MERRSTS error status bits are set
200 */
201 if ((*value & (ECC_ERROR_MERRSTS | ECC_ERROR_CERRSTS)) == 0) {
202 return -ENODATA;
203 }
204
205 return 0;
206 }
207
ecc_error_log_clear(const struct device * dev)208 static int ecc_error_log_clear(const struct device *dev)
209 {
210 /* Clear all error bits */
211 ibecc_write_reg64(dev, IBECC_ECC_ERROR_LOG,
212 ECC_ERROR_MERRSTS | ECC_ERROR_CERRSTS);
213
214 return 0;
215 }
216
parity_error_log_get(const struct device * dev,uint64_t * value)217 static int parity_error_log_get(const struct device *dev, uint64_t *value)
218 {
219 *value = ibecc_read_reg64(dev, IBECC_PARITY_ERROR_LOG);
220 if (*value == 0) {
221 return -ENODATA;
222 }
223
224 return 0;
225 }
226
parity_error_log_clear(const struct device * dev)227 static int parity_error_log_clear(const struct device *dev)
228 {
229 ibecc_write_reg64(dev, IBECC_PARITY_ERROR_LOG, PARITY_ERROR_ERRSTS);
230
231 return 0;
232 }
233
errors_cor_get(const struct device * dev)234 static int errors_cor_get(const struct device *dev)
235 {
236 struct ibecc_data *data = dev->data;
237
238 return data->errors_cor;
239 }
240
errors_uc_get(const struct device * dev)241 static int errors_uc_get(const struct device *dev)
242 {
243 struct ibecc_data *data = dev->data;
244
245 return data->errors_uc;
246 }
247
notify_callback_set(const struct device * dev,edac_notify_callback_f cb)248 static int notify_callback_set(const struct device *dev,
249 edac_notify_callback_f cb)
250 {
251 struct ibecc_data *data = dev->data;
252 unsigned int key = irq_lock();
253
254 data->cb = cb;
255 irq_unlock(key);
256
257 return 0;
258 }
259
260 static DEVICE_API(edac, api) = {
261 #if defined(CONFIG_EDAC_ERROR_INJECT)
262 /* Error Injection functions */
263 .inject_set_param1 = inject_set_param1,
264 .inject_get_param1 = inject_get_param1,
265 .inject_set_param2 = inject_set_param2,
266 .inject_get_param2 = inject_get_param2,
267 .inject_set_error_type = inject_set_error_type,
268 .inject_get_error_type = inject_get_error_type,
269 .inject_error_trigger = inject_error_trigger,
270 #endif /* CONFIG_EDAC_ERROR_INJECT */
271
272 /* Error reporting & clearing functions */
273 .ecc_error_log_get = ecc_error_log_get,
274 .ecc_error_log_clear = ecc_error_log_clear,
275 .parity_error_log_get = parity_error_log_get,
276 .parity_error_log_clear = parity_error_log_clear,
277
278 /* Get error stats */
279 .errors_cor_get = errors_cor_get,
280 .errors_uc_get = errors_uc_get,
281
282 /* Notification callback set */
283 .notify_cb_set = notify_callback_set,
284 };
285
edac_ibecc_init(const struct device * dev)286 static int edac_ibecc_init(const struct device *dev)
287 {
288 const pcie_bdf_t bdf = PCI_HOST_BRIDGE;
289 struct ibecc_data *data = dev->data;
290 uint64_t mchbar;
291 uint32_t conf_data;
292
293 conf_data = pcie_conf_read(bdf, PCIE_CONF_ID);
294 switch (conf_data) {
295 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU5):
296 __fallthrough;
297 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU6):
298 __fallthrough;
299 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU7):
300 __fallthrough;
301 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU8):
302 __fallthrough;
303 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU9):
304 __fallthrough;
305 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU10):
306 __fallthrough;
307 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU11):
308 __fallthrough;
309 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU12):
310 __fallthrough;
311 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU13):
312 __fallthrough;
313 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU14):
314 __fallthrough;
315 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU15):
316 break;
317 default:
318 LOG_ERR("PCI Probe failed"); /* LCOV_EXCL_BR_LINE */
319 return -ENODEV;
320 }
321
322 if (!ibecc_enabled(bdf)) {
323 LOG_ERR("IBECC is not enabled"); /* LCOV_EXCL_BR_LINE */
324 return -ENODEV;
325 }
326
327 mchbar = pcie_conf_read(bdf, MCHBAR_REG);
328 mchbar |= (uint64_t)pcie_conf_read(bdf, MCHBAR_REG + 1) << 32;
329
330 /* Check that MCHBAR is enabled */
331 if ((mchbar & MCHBAR_ENABLE) == 0) {
332 LOG_ERR("MCHBAR is not enabled"); /* LCOV_EXCL_BR_LINE */
333 return -ENODEV;
334 }
335
336 mchbar &= MCHBAR_MASK;
337
338 device_map(&data->mchbar, mchbar, MCH_SIZE, K_MEM_CACHE_NONE);
339
340 /* Enable Host Bridge generated SERR event */
341 ibecc_errcmd_setup(bdf, true);
342
343 LOG_INF("IBECC driver initialized"); /* LCOV_EXCL_BR_LINE */
344
345 return 0;
346 }
347
348 static struct ibecc_data ibecc_data;
349
350 DEVICE_DT_DEFINE(DEVICE_NODE, &edac_ibecc_init,
351 NULL, &ibecc_data, NULL, POST_KERNEL,
352 CONFIG_KERNEL_INIT_PRIORITY_DEVICE, &api);
353
354 /**
355 * An IBECC error causes SERR_NMI_STS set and is indicated by
356 * ERRSTS PCI registers by IBECC_UC and IBECC_COR fields.
357 * Following needs to be done:
358 * - Read ECC_ERR_LOG register
359 * - Clear IBECC_UC and IBECC_COR fields of ERRSTS PCI
360 * - Clear MERRSTS & CERRSTS fields of ECC_ERR_LOG register
361 */
362
363 static struct k_spinlock nmi_lock;
364
365 /* NMI handling */
366
handle_nmi(void)367 static bool handle_nmi(void)
368 {
369 uint8_t status;
370
371 status = sys_in8(NMI_STS_CNT_REG);
372 if ((status & NMI_STS_SRC_SERR) == 0) {
373 /* For other NMI sources return false to handle it by
374 * Zephyr exception handler
375 */
376 return false;
377 }
378
379 /* Re-enable SERR# NMI sources */
380
381 status = (status & NMI_STS_MASK_EN) | NMI_STS_SERR_EN;
382 sys_out8(status, NMI_STS_CNT_REG);
383
384 status &= ~NMI_STS_SERR_EN;
385 sys_out8(status, NMI_STS_CNT_REG);
386
387 return true;
388 }
389
z_x86_do_kernel_nmi(const struct arch_esf * esf)390 bool z_x86_do_kernel_nmi(const struct arch_esf *esf)
391 {
392 const struct device *const dev = DEVICE_DT_GET(DEVICE_NODE);
393 struct ibecc_data *data = dev->data;
394 struct ibecc_error error_data;
395 k_spinlock_key_t key;
396 bool ret = true;
397 uint64_t ecclog;
398
399 key = k_spin_lock(&nmi_lock);
400
401 /* Skip the same NMI handling for other cores and return handled */
402 if (arch_curr_cpu()->id != 0) {
403 ret = true;
404 goto out;
405 }
406
407 if (!handle_nmi()) {
408 /* Indicate that we do not handle this NMI */
409 ret = false;
410 goto out;
411 }
412
413 if (edac_ecc_error_log_get(dev, &ecclog) != 0) {
414 goto out;
415 }
416
417 parse_ecclog(dev, ecclog, &error_data);
418
419 if (data->cb != NULL) {
420 data->cb(dev, &error_data);
421 }
422
423 edac_ecc_error_log_clear(dev);
424
425 ibecc_errsts_clear(PCI_HOST_BRIDGE);
426
427 out:
428 k_spin_unlock(&nmi_lock, key);
429
430 return ret;
431 }
432