1 /*
2 * Copyright (c) 2020 Intel Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6 #define DT_DRV_COMPAT intel_vt_d
7
8 #include <errno.h>
9
10 #include <zephyr/kernel.h>
11 #include <zephyr/arch/cpu.h>
12
13 #include <soc.h>
14 #include <zephyr/device.h>
15 #include <zephyr/init.h>
16 #include <string.h>
17
18
19 #include <zephyr/cache.h>
20
21 #include <zephyr/arch/x86/intel_vtd.h>
22 #include <zephyr/drivers/interrupt_controller/intel_vtd.h>
23 #include <zephyr/drivers/interrupt_controller/ioapic.h>
24 #include <zephyr/drivers/interrupt_controller/loapic.h>
25 #include <zephyr/drivers/pcie/msi.h>
26
27 #include <kernel_arch_func.h>
28
29 #include "intc_intel_vtd.h"
30
vtd_pause_cpu(void)31 static inline void vtd_pause_cpu(void)
32 {
33 __asm__ volatile("pause" ::: "memory");
34 }
35
vtd_write_reg32(const struct device * dev,uint16_t reg,uint32_t value)36 static void vtd_write_reg32(const struct device *dev,
37 uint16_t reg, uint32_t value)
38 {
39 uintptr_t base_address = DEVICE_MMIO_GET(dev);
40
41 sys_write32(value, (base_address + reg));
42 }
43
vtd_read_reg32(const struct device * dev,uint16_t reg)44 static uint32_t vtd_read_reg32(const struct device *dev, uint16_t reg)
45 {
46 uintptr_t base_address = DEVICE_MMIO_GET(dev);
47
48 return sys_read32(base_address + reg);
49 }
50
vtd_write_reg64(const struct device * dev,uint16_t reg,uint64_t value)51 static void vtd_write_reg64(const struct device *dev,
52 uint16_t reg, uint64_t value)
53 {
54 uintptr_t base_address = DEVICE_MMIO_GET(dev);
55
56 sys_write64(value, (base_address + reg));
57 }
58
vtd_read_reg64(const struct device * dev,uint16_t reg)59 static uint64_t vtd_read_reg64(const struct device *dev, uint16_t reg)
60 {
61 uintptr_t base_address = DEVICE_MMIO_GET(dev);
62
63 return sys_read64(base_address + reg);
64 }
65
vtd_send_cmd(const struct device * dev,uint16_t cmd_bit,uint16_t status_bit)66 static void vtd_send_cmd(const struct device *dev,
67 uint16_t cmd_bit, uint16_t status_bit)
68 {
69 uintptr_t base_address = DEVICE_MMIO_GET(dev);
70 uint32_t value;
71
72 value = vtd_read_reg32(dev, VTD_GSTS_REG);
73 value |= BIT(cmd_bit);
74
75 vtd_write_reg32(dev, VTD_GCMD_REG, value);
76
77 while (!sys_test_bit((base_address + VTD_GSTS_REG),
78 status_bit)) {
79 /* Do nothing */
80 }
81 }
82
vtd_flush_irte_from_cache(const struct device * dev,uint8_t irte_idx)83 static void vtd_flush_irte_from_cache(const struct device *dev,
84 uint8_t irte_idx)
85 {
86 struct vtd_ictl_data *data = dev->data;
87
88 if (!data->pwc) {
89 cache_data_flush_range(&data->irte[irte_idx],
90 sizeof(union vtd_irte));
91 }
92 }
93
vtd_qi_init(const struct device * dev)94 static void vtd_qi_init(const struct device *dev)
95 {
96 struct vtd_ictl_data *data = dev->data;
97 uint64_t value;
98
99 vtd_write_reg64(dev, VTD_IQT_REG, 0);
100 data->qi_tail = 0;
101
102 value = VTD_IQA_REG_GEN_CONTENT((uintptr_t)data->qi,
103 VTD_IQA_WIDTH_128_BIT, QI_SIZE);
104 vtd_write_reg64(dev, VTD_IQA_REG, value);
105
106 vtd_send_cmd(dev, VTD_GCMD_QIE, VTD_GSTS_QIES);
107 }
108
vtd_qi_tail_inc(const struct device * dev)109 static inline void vtd_qi_tail_inc(const struct device *dev)
110 {
111 struct vtd_ictl_data *data = dev->data;
112
113 data->qi_tail += sizeof(struct qi_descriptor);
114 data->qi_tail %= (QI_NUM * sizeof(struct qi_descriptor));
115 }
116
vtd_qi_send(const struct device * dev,struct qi_descriptor * descriptor)117 static int vtd_qi_send(const struct device *dev,
118 struct qi_descriptor *descriptor)
119 {
120 struct vtd_ictl_data *data = dev->data;
121 union qi_wait_descriptor wait_desc = { 0 };
122 struct qi_descriptor *desc;
123 uint32_t wait_status;
124 uint32_t wait_count;
125
126 desc = (struct qi_descriptor *)((uintptr_t)data->qi + data->qi_tail);
127
128 desc->low = descriptor->low;
129 desc->high = descriptor->high;
130
131 vtd_qi_tail_inc(dev);
132
133 desc++;
134
135 wait_status = QI_WAIT_STATUS_INCOMPLETE;
136
137 wait_desc.wait.type = QI_TYPE_WAIT;
138 wait_desc.wait.status_write = 1;
139 wait_desc.wait.status_data = QI_WAIT_STATUS_COMPLETE;
140 wait_desc.wait.address = ((uintptr_t)&wait_status) >> 2;
141
142 desc->low = wait_desc.desc.low;
143 desc->high = wait_desc.desc.high;
144
145 vtd_qi_tail_inc(dev);
146
147 vtd_write_reg64(dev, VTD_IQT_REG, data->qi_tail);
148
149 wait_count = 0;
150
151 while (wait_status != QI_WAIT_STATUS_COMPLETE) {
152 /* We cannot use timeout here, this function being called
153 * at init time, it might result that the system clock
154 * is not initialized yet since VT-D init comes first.
155 */
156 if (wait_count > QI_WAIT_COUNT_LIMIT) {
157 printk("QI timeout\n");
158 return -ETIME;
159 }
160
161 if (vtd_read_reg32(dev, VTD_FSTS_REG) & VTD_FSTS_IQE) {
162 printk("QI error\n");
163 return -EIO;
164 }
165
166 vtd_pause_cpu();
167 wait_count++;
168 }
169
170 return 0;
171 }
172
vtd_global_cc_invalidate(const struct device * dev)173 static int vtd_global_cc_invalidate(const struct device *dev)
174 {
175 union qi_icc_descriptor iec_desc = { 0 };
176
177 iec_desc.icc.type = QI_TYPE_ICC;
178 iec_desc.icc.granularity = 1; /* Global Invalidation requested */
179
180 return vtd_qi_send(dev, &iec_desc.desc);
181 }
182
vtd_global_iec_invalidate(const struct device * dev)183 static int vtd_global_iec_invalidate(const struct device *dev)
184 {
185 union qi_iec_descriptor iec_desc = { 0 };
186
187 iec_desc.iec.type = QI_TYPE_IEC;
188 iec_desc.iec.granularity = 0; /* Global Invalidation requested */
189
190 return vtd_qi_send(dev, &iec_desc.desc);
191 }
192
vtd_index_iec_invalidate(const struct device * dev,uint8_t irte_idx)193 static int vtd_index_iec_invalidate(const struct device *dev, uint8_t irte_idx)
194 {
195 union qi_iec_descriptor iec_desc = { 0 };
196
197 iec_desc.iec.type = QI_TYPE_IEC;
198 iec_desc.iec.granularity = 1; /* Index based invalidation requested */
199
200 iec_desc.iec.interrupt_index = irte_idx;
201 iec_desc.iec.index_mask = 0;
202
203 return vtd_qi_send(dev, &iec_desc.desc);
204 }
205
fault_status_description(uint32_t status)206 static void fault_status_description(uint32_t status)
207 {
208 if (status & VTD_FSTS_PFO) {
209 printk("Primary Fault Overflow (PFO)\n");
210 }
211
212 if (status & VTD_FSTS_AFO) {
213 printk("Advanced Fault Overflow (AFO)\n");
214 }
215
216 if (status & VTD_FSTS_APF) {
217 printk("Advanced Primary Fault (APF)\n");
218 }
219
220 if (status & VTD_FSTS_IQE) {
221 printk("Invalidation Queue Error (IQE)\n");
222 }
223
224 if (status & VTD_FSTS_ICE) {
225 printk("Invalidation Completion Error (ICE)\n");
226 }
227
228 if (status & VTD_FSTS_ITE) {
229 printk("Invalidation Timeout Error\n");
230 }
231
232 if (status & VTD_FSTS_PPF) {
233 printk("Primary Pending Fault (PPF) %u\n",
234 VTD_FSTS_FRI(status));
235 }
236 }
237
fault_record_description(uint64_t low,uint64_t high)238 static void fault_record_description(uint64_t low, uint64_t high)
239 {
240 printk("Fault %s request: Reason 0x%x info 0x%llx src 0x%x\n",
241 (high & VTD_FRCD_T) ? "Read/Atomic" : "Write/Page",
242 VTD_FRCD_FR(high), VTD_FRCD_FI(low), VTD_FRCD_SID(high));
243 }
244
fault_event_isr(const void * arg)245 static void fault_event_isr(const void *arg)
246 {
247 const struct device *dev = arg;
248 struct vtd_ictl_data *data = dev->data;
249 uint32_t status;
250 uint8_t f_idx;
251
252 status = vtd_read_reg32(dev, VTD_FSTS_REG);
253 fault_status_description(status);
254
255 if (!(status & VTD_FSTS_PPF)) {
256 goto out;
257 }
258
259 f_idx = VTD_FSTS_FRI(status);
260 while (f_idx < data->fault_record_num) {
261 uint64_t fault_l, fault_h;
262
263 /* Reading fault's 64 lowest bits */
264 fault_l = vtd_read_reg64(dev, data->fault_record_reg +
265 (VTD_FRCD_REG_SIZE * f_idx));
266 /* Reading fault's 64 highest bits */
267 fault_h = vtd_read_reg64(dev, data->fault_record_reg +
268 (VTD_FRCD_REG_SIZE * f_idx) + 8);
269
270 if (fault_h & VTD_FRCD_F) {
271 fault_record_description(fault_l, fault_h);
272 }
273
274 /* Clearing the fault */
275 vtd_write_reg64(dev, data->fault_record_reg +
276 (VTD_FRCD_REG_SIZE * f_idx), fault_l);
277 vtd_write_reg64(dev, data->fault_record_reg +
278 (VTD_FRCD_REG_SIZE * f_idx) + 8, fault_h);
279 f_idx++;
280 }
281 out:
282 /* Clearing fault status */
283 vtd_write_reg32(dev, VTD_FSTS_REG, VTD_FSTS_CLEAR(status));
284 }
285
vtd_fault_event_init(const struct device * dev)286 static void vtd_fault_event_init(const struct device *dev)
287 {
288 struct vtd_ictl_data *data = dev->data;
289 uint64_t value;
290 uint32_t reg;
291
292 value = vtd_read_reg64(dev, VTD_CAP_REG);
293 data->fault_record_num = VTD_CAP_NFR(value) + 1;
294 data->fault_record_reg = DEVICE_MMIO_GET(dev) +
295 (uintptr_t)(16 * VTD_CAP_FRO(value));
296
297 /* Allocating IRQ & vector and connecting the ISR handler,
298 * by-passing remapping by using x86 functions directly.
299 */
300 data->fault_irq = arch_irq_allocate();
301 data->fault_vector = z_x86_allocate_vector(0, -1);
302
303 vtd_write_reg32(dev, VTD_FEDATA_REG, data->fault_vector);
304 vtd_write_reg32(dev, VTD_FEADDR_REG,
305 pcie_msi_map(data->fault_irq, NULL, 0));
306 vtd_write_reg32(dev, VTD_FEUADDR_REG, 0);
307
308 z_x86_irq_connect_on_vector(data->fault_irq, data->fault_vector,
309 fault_event_isr, dev);
310
311 vtd_write_reg32(dev, VTD_FSTS_REG,
312 VTD_FSTS_CLEAR(vtd_read_reg32(dev, VTD_FSTS_REG)));
313
314 /* Unmasking interrupts */
315 reg = vtd_read_reg32(dev, VTD_FECTL_REG);
316 reg &= ~BIT(VTD_FECTL_REG_IM);
317 vtd_write_reg32(dev, VTD_FECTL_REG, reg);
318 }
319
vtd_ictl_allocate_entries(const struct device * dev,uint8_t n_entries)320 static int vtd_ictl_allocate_entries(const struct device *dev,
321 uint8_t n_entries)
322 {
323 struct vtd_ictl_data *data = dev->data;
324 int irte_idx_start;
325
326 if ((data->irte_num_used + n_entries) > IRTE_NUM) {
327 return -EBUSY;
328 }
329
330 irte_idx_start = data->irte_num_used;
331 data->irte_num_used += n_entries;
332
333 return irte_idx_start;
334 }
335
vtd_ictl_remap_msi(const struct device * dev,msi_vector_t * vector,uint8_t n_vector)336 static uint32_t vtd_ictl_remap_msi(const struct device *dev,
337 msi_vector_t *vector,
338 uint8_t n_vector)
339 {
340 uint32_t shv = (n_vector > 1) ? VTD_INT_SHV : 0;
341
342 return VTD_MSI_MAP(vector->arch.irte, shv);
343 }
344
vtd_ictl_remap(const struct device * dev,uint8_t irte_idx,uint16_t vector,uint32_t flags,uint16_t src_id)345 static int vtd_ictl_remap(const struct device *dev,
346 uint8_t irte_idx,
347 uint16_t vector,
348 uint32_t flags,
349 uint16_t src_id)
350 {
351 struct vtd_ictl_data *data = dev->data;
352 union vtd_irte irte = { 0 };
353 uint32_t delivery_mode;
354
355 irte.bits.vector = vector;
356
357 if (IS_ENABLED(CONFIG_X2APIC)) {
358 /* Getting the logical APIC ID */
359 irte.bits.dst_id = x86_read_loapic(LOAPIC_LDR);
360 } else {
361 /* As for IOAPIC: let's mask all possible IDs */
362 irte.bits.dst_id = 0xFF << 8;
363 }
364
365 if (src_id != USHRT_MAX &&
366 !IS_ENABLED(CONFIG_INTEL_VTD_ICTL_NO_SRC_ID_CHECK)) {
367 irte.bits.src_validation_type = 1;
368 irte.bits.src_id = src_id;
369 }
370
371 delivery_mode = (flags & IOAPIC_DELIVERY_MODE_MASK);
372 if ((delivery_mode != IOAPIC_FIXED) ||
373 (delivery_mode != IOAPIC_LOW)) {
374 delivery_mode = IOAPIC_LOW;
375 }
376
377 irte.bits.trigger_mode = (flags & IOAPIC_TRIGGER_MASK) >> 15;
378 irte.bits.delivery_mode = delivery_mode >> 8;
379 irte.bits.redirection_hint = 1;
380 irte.bits.dst_mode = 1; /* Always logical */
381 irte.bits.present = 1;
382
383 data->irte[irte_idx].parts.low = irte.parts.low;
384 data->irte[irte_idx].parts.high = irte.parts.high;
385
386 vtd_index_iec_invalidate(dev, irte_idx);
387
388 vtd_flush_irte_from_cache(dev, irte_idx);
389
390 return 0;
391 }
392
vtd_ictl_set_irte_vector(const struct device * dev,uint8_t irte_idx,uint16_t vector)393 static int vtd_ictl_set_irte_vector(const struct device *dev,
394 uint8_t irte_idx,
395 uint16_t vector)
396 {
397 struct vtd_ictl_data *data = dev->data;
398
399 data->vectors[irte_idx] = vector;
400
401 return 0;
402 }
403
vtd_ictl_get_irte_by_vector(const struct device * dev,uint16_t vector)404 static int vtd_ictl_get_irte_by_vector(const struct device *dev,
405 uint16_t vector)
406 {
407 struct vtd_ictl_data *data = dev->data;
408 int irte_idx;
409
410 for (irte_idx = 0; irte_idx < IRTE_NUM; irte_idx++) {
411 if (data->vectors[irte_idx] == vector) {
412 return irte_idx;
413 }
414 }
415
416 return -EINVAL;
417 }
418
vtd_ictl_get_irte_vector(const struct device * dev,uint8_t irte_idx)419 static uint16_t vtd_ictl_get_irte_vector(const struct device *dev,
420 uint8_t irte_idx)
421 {
422 struct vtd_ictl_data *data = dev->data;
423
424 return data->vectors[irte_idx];
425 }
426
vtd_ictl_set_irte_irq(const struct device * dev,uint8_t irte_idx,unsigned int irq)427 static int vtd_ictl_set_irte_irq(const struct device *dev,
428 uint8_t irte_idx,
429 unsigned int irq)
430 {
431 struct vtd_ictl_data *data = dev->data;
432
433 data->irqs[irte_idx] = irq;
434
435 return 0;
436 }
437
vtd_ictl_get_irte_by_irq(const struct device * dev,unsigned int irq)438 static int vtd_ictl_get_irte_by_irq(const struct device *dev,
439 unsigned int irq)
440 {
441 struct vtd_ictl_data *data = dev->data;
442 int irte_idx;
443
444 for (irte_idx = 0; irte_idx < IRTE_NUM; irte_idx++) {
445 if (data->irqs[irte_idx] == irq) {
446 return irte_idx;
447 }
448 }
449
450 return -EINVAL;
451 }
452
vtd_ictl_set_irte_msi(const struct device * dev,uint8_t irte_idx,bool msi)453 static void vtd_ictl_set_irte_msi(const struct device *dev,
454 uint8_t irte_idx, bool msi)
455 {
456 struct vtd_ictl_data *data = dev->data;
457
458 data->msi[irte_idx] = msi;
459 }
460
vtd_ictl_irte_is_msi(const struct device * dev,uint8_t irte_idx)461 static bool vtd_ictl_irte_is_msi(const struct device *dev,
462 uint8_t irte_idx)
463 {
464 struct vtd_ictl_data *data = dev->data;
465
466 return data->msi[irte_idx];
467 }
468
vtd_ictl_init(const struct device * dev)469 static int vtd_ictl_init(const struct device *dev)
470 {
471 struct vtd_ictl_data *data = dev->data;
472 unsigned int key = irq_lock();
473 uint64_t eime = 0;
474 uint64_t value;
475 int ret = 0;
476
477 DEVICE_MMIO_MAP(dev, K_MEM_CACHE_NONE);
478
479 if (vtd_read_reg64(dev, VTD_ECAP_REG) & VTD_ECAP_C) {
480 printk("Page walk coherency supported\n");
481 data->pwc = true;
482 }
483
484 vtd_fault_event_init(dev);
485
486 vtd_qi_init(dev);
487
488 if (vtd_global_cc_invalidate(dev) != 0) {
489 printk("Could not perform ICC invalidation\n");
490 ret = -EIO;
491 goto out;
492 }
493
494 if (IS_ENABLED(CONFIG_X2APIC)) {
495 eime = VTD_IRTA_EIME;
496 }
497
498 value = VTD_IRTA_REG_GEN_CONTENT((uintptr_t)data->irte,
499 IRTA_SIZE, eime);
500
501 vtd_write_reg64(dev, VTD_IRTA_REG, value);
502
503 if (vtd_global_iec_invalidate(dev) != 0) {
504 printk("Could not perform IEC invalidation\n");
505 ret = -EIO;
506 goto out;
507 }
508
509 if (!IS_ENABLED(CONFIG_X2APIC) &&
510 IS_ENABLED(CONFIG_INTEL_VTD_ICTL_XAPIC_PASSTHROUGH)) {
511 vtd_send_cmd(dev, VTD_GCMD_CFI, VTD_GSTS_CFIS);
512 }
513
514 vtd_send_cmd(dev, VTD_GCMD_SIRTP, VTD_GSTS_SIRTPS);
515 vtd_send_cmd(dev, VTD_GCMD_IRE, VTD_GSTS_IRES);
516
517 printk("Intel VT-D up and running (status 0x%x)\n",
518 vtd_read_reg32(dev, VTD_GSTS_REG));
519
520 out:
521 irq_unlock(key);
522
523 return ret;
524 }
525
526 static DEVICE_API(vtd, vtd_api) = {
527 .allocate_entries = vtd_ictl_allocate_entries,
528 .remap_msi = vtd_ictl_remap_msi,
529 .remap = vtd_ictl_remap,
530 .set_irte_vector = vtd_ictl_set_irte_vector,
531 .get_irte_by_vector = vtd_ictl_get_irte_by_vector,
532 .get_irte_vector = vtd_ictl_get_irte_vector,
533 .set_irte_irq = vtd_ictl_set_irte_irq,
534 .get_irte_by_irq = vtd_ictl_get_irte_by_irq,
535 .set_irte_msi = vtd_ictl_set_irte_msi,
536 .irte_is_msi = vtd_ictl_irte_is_msi
537 };
538
539 static struct vtd_ictl_data vtd_ictl_data_0 = {
540 .irqs = { -EINVAL },
541 .vectors = { -EINVAL },
542 };
543
544 static const struct vtd_ictl_cfg vtd_ictl_cfg_0 = {
545 DEVICE_MMIO_ROM_INIT(DT_DRV_INST(0)),
546 };
547
548 DEVICE_DT_INST_DEFINE(0,
549 vtd_ictl_init, NULL,
550 &vtd_ictl_data_0, &vtd_ictl_cfg_0,
551 PRE_KERNEL_1, CONFIG_INTEL_VTD_ICTL_INIT_PRIORITY, &vtd_api);
552