1 /*
2 * Copyright (c) 2020 Intel Corporation.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #define DT_DRV_COMPAT intel_ibecc
8
9 #include <zephyr.h>
10 #include <device.h>
11 #include <drivers/pcie/pcie.h>
12
13 #include <drivers/edac.h>
14 #include "ibecc.h"
15
16 /**
17 * In the driver 64 bit registers are used and not all of then at the
18 * moment may be correctly logged.
19 */
20 #include <logging/log.h>
21 LOG_MODULE_REGISTER(edac_ibecc, CONFIG_EDAC_LOG_LEVEL);
22
23 #define DEVICE_NODE DT_NODELABEL(ibecc)
24 #define PCI_HOST_BRIDGE PCIE_BDF(0, 0, 0)
25
26 struct ibecc_data {
27 mem_addr_t mchbar;
28 edac_notify_callback_f cb;
29 uint32_t error_type;
30
31 /* Error count */
32 unsigned int errors_cor;
33 unsigned int errors_uc;
34 };
35
ibecc_write_reg64(const struct device * dev,uint16_t reg,uint64_t value)36 static void ibecc_write_reg64(const struct device *dev,
37 uint16_t reg, uint64_t value)
38 {
39 struct ibecc_data *data = dev->data;
40 mem_addr_t reg_addr = data->mchbar + reg;
41
42 sys_write64(value, reg_addr);
43 }
44
ibecc_read_reg64(const struct device * dev,uint16_t reg)45 static uint64_t ibecc_read_reg64(const struct device *dev, uint16_t reg)
46 {
47 struct ibecc_data *data = dev->data;
48 mem_addr_t reg_addr = data->mchbar + reg;
49
50 return sys_read64(reg_addr);
51 }
52
53 #if defined(CONFIG_EDAC_ERROR_INJECT)
ibecc_write_reg32(const struct device * dev,uint16_t reg,uint32_t value)54 static void ibecc_write_reg32(const struct device *dev,
55 uint16_t reg, uint32_t value)
56 {
57 struct ibecc_data *data = dev->data;
58 mem_addr_t reg_addr = data->mchbar + reg;
59
60 sys_write32(value, reg_addr);
61 }
62 #endif
63
ibecc_enabled(const pcie_bdf_t bdf)64 static bool ibecc_enabled(const pcie_bdf_t bdf)
65 {
66 return !!(pcie_conf_read(bdf, CAPID0_C_REG) & CAPID0_C_IBECC_ENABLED);
67 }
68
ibecc_errcmd_setup(const pcie_bdf_t bdf,bool enable)69 static void ibecc_errcmd_setup(const pcie_bdf_t bdf, bool enable)
70 {
71 uint32_t errcmd;
72
73 errcmd = pcie_conf_read(bdf, ERRCMD_REG);
74
75 if (enable) {
76 errcmd |= (ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
77 } else {
78 errcmd &= ~(ERRCMD_IBECC_COR | ERRCMD_IBECC_UC) << 16;
79 }
80
81 pcie_conf_write(bdf, ERRCMD_REG, errcmd);
82 }
83
ibecc_errsts_clear(const pcie_bdf_t bdf)84 static void ibecc_errsts_clear(const pcie_bdf_t bdf)
85 {
86 uint32_t errsts;
87
88 errsts = pcie_conf_read(bdf, ERRSTS_REG);
89
90 if ((errsts & (ERRSTS_IBECC_COR | ERRSTS_IBECC_UC)) == 0) {
91 return;
92 }
93
94 pcie_conf_write(bdf, ERRSTS_REG, errsts);
95 }
96
parse_ecclog(const struct device * dev,const uint64_t ecclog,struct ibecc_error * error_data)97 static void parse_ecclog(const struct device *dev, const uint64_t ecclog,
98 struct ibecc_error *error_data)
99 {
100 struct ibecc_data *data = dev->data;
101
102 if (ecclog == 0) {
103 return;
104 }
105
106 error_data->type = ECC_ERROR_ERRTYPE(ecclog);
107 error_data->address = ECC_ERROR_ERRADD(ecclog);
108 error_data->syndrome = ECC_ERROR_ERRSYND(ecclog);
109
110 if ((ecclog & ECC_ERROR_MERRSTS) != 0) {
111 data->errors_uc++;
112 }
113
114 if ((ecclog & ECC_ERROR_CERRSTS) != 0) {
115 data->errors_cor++;
116 }
117 }
118
119 #if defined(CONFIG_EDAC_ERROR_INJECT)
inject_set_param1(const struct device * dev,uint64_t addr)120 static int inject_set_param1(const struct device *dev, uint64_t addr)
121 {
122 if ((addr & ~INJ_ADDR_BASE_MASK) != 0) {
123 return -EINVAL;
124 }
125
126 ibecc_write_reg64(dev, IBECC_INJ_ADDR_BASE, addr);
127
128 return 0;
129 }
130
inject_get_param1(const struct device * dev,uint64_t * value)131 static int inject_get_param1(const struct device *dev, uint64_t *value)
132 {
133 *value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_BASE);
134
135 return 0;
136 }
137
inject_set_param2(const struct device * dev,uint64_t mask)138 static int inject_set_param2(const struct device *dev, uint64_t mask)
139 {
140 if ((mask & ~INJ_ADDR_BASE_MASK_MASK) != 0) {
141 return -EINVAL;
142 }
143
144 ibecc_write_reg64(dev, IBECC_INJ_ADDR_MASK, mask);
145
146 return 0;
147 }
148
inject_get_param2(const struct device * dev,uint64_t * value)149 static int inject_get_param2(const struct device *dev, uint64_t *value)
150 {
151 *value = ibecc_read_reg64(dev, IBECC_INJ_ADDR_MASK);
152
153 return 0;
154 }
155
inject_set_error_type(const struct device * dev,uint32_t error_type)156 static int inject_set_error_type(const struct device *dev,
157 uint32_t error_type)
158 {
159 struct ibecc_data *data = dev->data;
160
161 data->error_type = error_type;
162
163 return 0;
164 }
165
inject_get_error_type(const struct device * dev,uint32_t * error_type)166 static int inject_get_error_type(const struct device *dev,
167 uint32_t *error_type)
168 {
169 struct ibecc_data *data = dev->data;
170
171 *error_type = data->error_type;
172
173 return 0;
174 }
175
inject_error_trigger(const struct device * dev)176 static int inject_error_trigger(const struct device *dev)
177 {
178 struct ibecc_data *data = dev->data;
179 uint32_t ctrl = 0;
180
181 switch (data->error_type) {
182 case EDAC_ERROR_TYPE_DRAM_COR:
183 ctrl |= INJ_CTRL_COR;
184 break;
185 case EDAC_ERROR_TYPE_DRAM_UC:
186 ctrl |= INJ_CTRL_UC;
187 break;
188 default:
189 /* This would clear error injection */
190 break;
191 }
192
193 ibecc_write_reg32(dev, IBECC_INJ_ADDR_CTRL, ctrl);
194
195 return 0;
196 }
197 #endif /* CONFIG_EDAC_ERROR_INJECT */
198
ecc_error_log_get(const struct device * dev,uint64_t * value)199 static int ecc_error_log_get(const struct device *dev, uint64_t *value)
200 {
201 *value = ibecc_read_reg64(dev, IBECC_ECC_ERROR_LOG);
202
203 return 0;
204 }
205
ecc_error_log_clear(const struct device * dev)206 static int ecc_error_log_clear(const struct device *dev)
207 {
208 /* Clear all error bits */
209 ibecc_write_reg64(dev, IBECC_ECC_ERROR_LOG,
210 ECC_ERROR_MERRSTS | ECC_ERROR_CERRSTS);
211
212 return 0;
213 }
214
parity_error_log_get(const struct device * dev,uint64_t * value)215 static int parity_error_log_get(const struct device *dev, uint64_t *value)
216 {
217 *value = ibecc_read_reg64(dev, IBECC_PARITY_ERROR_LOG);
218
219 return 0;
220 }
221
parity_error_log_clear(const struct device * dev)222 static int parity_error_log_clear(const struct device *dev)
223 {
224 ibecc_write_reg64(dev, IBECC_PARITY_ERROR_LOG, PARITY_ERROR_ERRSTS);
225
226 return 0;
227 }
228
errors_cor_get(const struct device * dev)229 static int errors_cor_get(const struct device *dev)
230 {
231 struct ibecc_data *data = dev->data;
232
233 return data->errors_cor;
234 }
235
errors_uc_get(const struct device * dev)236 static int errors_uc_get(const struct device *dev)
237 {
238 struct ibecc_data *data = dev->data;
239
240 return data->errors_uc;
241 }
242
notify_callback_set(const struct device * dev,edac_notify_callback_f cb)243 static int notify_callback_set(const struct device *dev,
244 edac_notify_callback_f cb)
245 {
246 struct ibecc_data *data = dev->data;
247 int key = irq_lock();
248
249 data->cb = cb;
250 irq_unlock(key);
251
252 return 0;
253 }
254
255 static const struct edac_driver_api api = {
256 #if defined(CONFIG_EDAC_ERROR_INJECT)
257 /* Error Injection functions */
258 .inject_set_param1 = inject_set_param1,
259 .inject_get_param1 = inject_get_param1,
260 .inject_set_param2 = inject_set_param2,
261 .inject_get_param2 = inject_get_param2,
262 .inject_set_error_type = inject_set_error_type,
263 .inject_get_error_type = inject_get_error_type,
264 .inject_error_trigger = inject_error_trigger,
265 #endif /* CONFIG_EDAC_ERROR_INJECT */
266
267 /* Error reporting & clearing functions */
268 .ecc_error_log_get = ecc_error_log_get,
269 .ecc_error_log_clear = ecc_error_log_clear,
270 .parity_error_log_get = parity_error_log_get,
271 .parity_error_log_clear = parity_error_log_clear,
272
273 /* Get error stats */
274 .errors_cor_get = errors_cor_get,
275 .errors_uc_get = errors_uc_get,
276
277 /* Notification callback set */
278 .notify_cb_set = notify_callback_set,
279 };
280
edac_ibecc_init(const struct device * dev)281 int edac_ibecc_init(const struct device *dev)
282 {
283 const pcie_bdf_t bdf = PCI_HOST_BRIDGE;
284 struct ibecc_data *data = dev->data;
285 uint64_t mchbar;
286 uint32_t conf_data;
287
288 conf_data = pcie_conf_read(bdf, PCIE_CONF_ID);
289 switch (conf_data) {
290 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU5):
291 __fallthrough;
292 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU6):
293 __fallthrough;
294 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU7):
295 __fallthrough;
296 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU8):
297 __fallthrough;
298 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU9):
299 __fallthrough;
300 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU10):
301 __fallthrough;
302 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU11):
303 __fallthrough;
304 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU12):
305 __fallthrough;
306 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU13):
307 __fallthrough;
308 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU14):
309 __fallthrough;
310 case PCIE_ID(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_SKU15):
311 break;
312 default:
313 LOG_ERR("PCI Probe failed");
314 return -ENODEV;
315 }
316
317 if (!ibecc_enabled(bdf)) {
318 LOG_ERR("IBECC is not enabled");
319 return -ENODEV;
320 }
321
322 mchbar = pcie_conf_read(bdf, MCHBAR_REG);
323 mchbar |= (uint64_t)pcie_conf_read(bdf, MCHBAR_REG + 1) << 32;
324
325 /* Check that MCHBAR is enabled */
326 if ((mchbar & MCHBAR_ENABLE) == 0) {
327 LOG_ERR("MCHBAR is not enabled");
328 return -ENODEV;
329 }
330
331 mchbar &= MCHBAR_MASK;
332
333 device_map(&data->mchbar, mchbar, MCH_SIZE, K_MEM_CACHE_NONE);
334
335 /* Enable Host Bridge generated SERR event */
336 ibecc_errcmd_setup(bdf, true);
337
338 return 0;
339 }
340
341 static struct ibecc_data ibecc_data;
342
343 DEVICE_DT_DEFINE(DEVICE_NODE, &edac_ibecc_init,
344 NULL, &ibecc_data, NULL, POST_KERNEL,
345 CONFIG_KERNEL_INIT_PRIORITY_DEVICE, &api);
346
347 /**
348 * An IBECC error causes SERR_NMI_STS set and is indicated by
349 * ERRSTS PCI registers by IBECC_UC and IBECC_COR fields.
350 * Following needs to be done:
351 * - Read ECC_ERR_LOG register
352 * - Clear IBECC_UC and IBECC_COR fields of ERRSTS PCI
353 * - Clear MERRSTS & CERRSTS fields of ECC_ERR_LOG register
354 */
355
356 static struct k_spinlock nmi_lock;
357
358 /* NMI handling */
359
handle_nmi(void)360 static bool handle_nmi(void)
361 {
362 uint8_t status;
363
364 status = sys_in8(NMI_STS_CNT_REG);
365 if ((status & NMI_STS_SRC_SERR) == 0) {
366 /* For other NMI sources return false to handle it by
367 * Zephyr exception handler
368 */
369 return false;
370 }
371
372 /* Re-enable SERR# NMI sources */
373
374 status = (status & NMI_STS_MASK_EN) | NMI_STS_SERR_EN;
375 sys_out8(status, NMI_STS_CNT_REG);
376
377 status &= ~NMI_STS_SERR_EN;
378 sys_out8(status, NMI_STS_CNT_REG);
379
380 return true;
381 }
382
z_x86_do_kernel_nmi(const z_arch_esf_t * esf)383 bool z_x86_do_kernel_nmi(const z_arch_esf_t *esf)
384 {
385 const struct device *dev = DEVICE_DT_GET(DEVICE_NODE);
386 struct ibecc_data *data = dev->data;
387 struct ibecc_error error_data;
388 k_spinlock_key_t key;
389 bool ret = true;
390 uint64_t ecclog;
391
392 key = k_spin_lock(&nmi_lock);
393
394 /* Skip the same NMI handling for other cores and return handled */
395 if (arch_curr_cpu()->id != 0) {
396 ret = true;
397 goto out;
398 }
399
400 if (!handle_nmi()) {
401 /* Indicate that we do not handle this NMI */
402 ret = false;
403 goto out;
404 }
405
406 if (edac_ecc_error_log_get(dev, &ecclog) != 0) {
407 goto out;
408 }
409
410 parse_ecclog(dev, ecclog, &error_data);
411
412 if (data->cb != NULL) {
413 data->cb(dev, &error_data);
414 }
415
416 edac_ecc_error_log_clear(dev);
417
418 ibecc_errsts_clear(PCI_HOST_BRIDGE);
419
420 out:
421 k_spin_unlock(&nmi_lock, key);
422
423 return ret;
424 }
425