1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2017-2019, IBM Corporation.
4 */
5
6 #define pr_fmt(fmt) "xive-kvm: " fmt
7
8 #include <linux/kernel.h>
9 #include <linux/kvm_host.h>
10 #include <linux/err.h>
11 #include <linux/gfp.h>
12 #include <linux/spinlock.h>
13 #include <linux/delay.h>
14 #include <linux/file.h>
15 #include <asm/uaccess.h>
16 #include <asm/kvm_book3s.h>
17 #include <asm/kvm_ppc.h>
18 #include <asm/hvcall.h>
19 #include <asm/xive.h>
20 #include <asm/xive-regs.h>
21 #include <asm/debug.h>
22 #include <asm/debugfs.h>
23 #include <asm/opal.h>
24
25 #include <linux/debugfs.h>
26 #include <linux/seq_file.h>
27
28 #include "book3s_xive.h"
29
xive_vm_esb_load(struct xive_irq_data * xd,u32 offset)30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31 {
32 u64 val;
33
34 /*
35 * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
36 * load operation, so there is no need to enforce load-after-store
37 * ordering.
38 */
39
40 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
41 offset |= offset << 4;
42
43 val = in_be64(xd->eoi_mmio + offset);
44 return (u8)val;
45 }
46
kvmppc_xive_native_cleanup_queue(struct kvm_vcpu * vcpu,int prio)47 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
48 {
49 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
50 struct xive_q *q = &xc->queues[prio];
51
52 xive_native_disable_queue(xc->vp_id, q, prio);
53 if (q->qpage) {
54 put_page(virt_to_page(q->qpage));
55 q->qpage = NULL;
56 }
57 }
58
kvmppc_xive_native_configure_queue(u32 vp_id,struct xive_q * q,u8 prio,__be32 * qpage,u32 order,bool can_escalate)59 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
60 u8 prio, __be32 *qpage,
61 u32 order, bool can_escalate)
62 {
63 int rc;
64 __be32 *qpage_prev = q->qpage;
65
66 rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
67 can_escalate);
68 if (rc)
69 return rc;
70
71 if (qpage_prev)
72 put_page(virt_to_page(qpage_prev));
73
74 return rc;
75 }
76
kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu * vcpu)77 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
78 {
79 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
80 int i;
81
82 if (!kvmppc_xive_enabled(vcpu))
83 return;
84
85 if (!xc)
86 return;
87
88 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
89
90 /* Ensure no interrupt is still routed to that VP */
91 xc->valid = false;
92 kvmppc_xive_disable_vcpu_interrupts(vcpu);
93
94 /* Free escalations */
95 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
96 /* Free the escalation irq */
97 if (xc->esc_virq[i]) {
98 if (xc->xive->single_escalation)
99 xive_cleanup_single_escalation(vcpu, xc,
100 xc->esc_virq[i]);
101 free_irq(xc->esc_virq[i], vcpu);
102 irq_dispose_mapping(xc->esc_virq[i]);
103 kfree(xc->esc_virq_names[i]);
104 xc->esc_virq[i] = 0;
105 }
106 }
107
108 /* Disable the VP */
109 xive_native_disable_vp(xc->vp_id);
110
111 /* Clear the cam word so guest entry won't try to push context */
112 vcpu->arch.xive_cam_word = 0;
113
114 /* Free the queues */
115 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
116 kvmppc_xive_native_cleanup_queue(vcpu, i);
117 }
118
119 /* Free the VP */
120 kfree(xc);
121
122 /* Cleanup the vcpu */
123 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
124 vcpu->arch.xive_vcpu = NULL;
125 }
126
kvmppc_xive_native_connect_vcpu(struct kvm_device * dev,struct kvm_vcpu * vcpu,u32 server_num)127 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
128 struct kvm_vcpu *vcpu, u32 server_num)
129 {
130 struct kvmppc_xive *xive = dev->private;
131 struct kvmppc_xive_vcpu *xc = NULL;
132 int rc;
133 u32 vp_id;
134
135 pr_devel("native_connect_vcpu(server=%d)\n", server_num);
136
137 if (dev->ops != &kvm_xive_native_ops) {
138 pr_devel("Wrong ops !\n");
139 return -EPERM;
140 }
141 if (xive->kvm != vcpu->kvm)
142 return -EPERM;
143 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
144 return -EBUSY;
145
146 mutex_lock(&xive->lock);
147
148 rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
149 if (rc)
150 goto bail;
151
152 xc = kzalloc(sizeof(*xc), GFP_KERNEL);
153 if (!xc) {
154 rc = -ENOMEM;
155 goto bail;
156 }
157
158 vcpu->arch.xive_vcpu = xc;
159 xc->xive = xive;
160 xc->vcpu = vcpu;
161 xc->server_num = server_num;
162
163 xc->vp_id = vp_id;
164 xc->valid = true;
165 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
166
167 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
168 if (rc) {
169 pr_err("Failed to get VP info from OPAL: %d\n", rc);
170 goto bail;
171 }
172
173 /*
174 * Enable the VP first as the single escalation mode will
175 * affect escalation interrupts numbering
176 */
177 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
178 if (rc) {
179 pr_err("Failed to enable VP in OPAL: %d\n", rc);
180 goto bail;
181 }
182
183 /* Configure VCPU fields for use by assembly push/pull */
184 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
185 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
186
187 /* TODO: reset all queues to a clean state ? */
188 bail:
189 mutex_unlock(&xive->lock);
190 if (rc)
191 kvmppc_xive_native_cleanup_vcpu(vcpu);
192
193 return rc;
194 }
195
196 /*
197 * Device passthrough support
198 */
kvmppc_xive_native_reset_mapped(struct kvm * kvm,unsigned long irq)199 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
200 {
201 struct kvmppc_xive *xive = kvm->arch.xive;
202 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
203
204 if (irq >= KVMPPC_XIVE_NR_IRQS)
205 return -EINVAL;
206
207 /*
208 * Clear the ESB pages of the IRQ number being mapped (or
209 * unmapped) into the guest and let the the VM fault handler
210 * repopulate with the appropriate ESB pages (device or IC)
211 */
212 pr_debug("clearing esb pages for girq 0x%lx\n", irq);
213 mutex_lock(&xive->mapping_lock);
214 if (xive->mapping)
215 unmap_mapping_range(xive->mapping,
216 esb_pgoff << PAGE_SHIFT,
217 2ull << PAGE_SHIFT, 1);
218 mutex_unlock(&xive->mapping_lock);
219 return 0;
220 }
221
222 static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
223 .reset_mapped = kvmppc_xive_native_reset_mapped,
224 };
225
xive_native_esb_fault(struct vm_fault * vmf)226 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
227 {
228 struct vm_area_struct *vma = vmf->vma;
229 struct kvm_device *dev = vma->vm_file->private_data;
230 struct kvmppc_xive *xive = dev->private;
231 struct kvmppc_xive_src_block *sb;
232 struct kvmppc_xive_irq_state *state;
233 struct xive_irq_data *xd;
234 u32 hw_num;
235 u16 src;
236 u64 page;
237 unsigned long irq;
238 u64 page_offset;
239
240 /*
241 * Linux/KVM uses a two pages ESB setting, one for trigger and
242 * one for EOI
243 */
244 page_offset = vmf->pgoff - vma->vm_pgoff;
245 irq = page_offset / 2;
246
247 sb = kvmppc_xive_find_source(xive, irq, &src);
248 if (!sb) {
249 pr_devel("%s: source %lx not found !\n", __func__, irq);
250 return VM_FAULT_SIGBUS;
251 }
252
253 state = &sb->irq_state[src];
254
255 /* Some sanity checking */
256 if (!state->valid) {
257 pr_devel("%s: source %lx invalid !\n", __func__, irq);
258 return VM_FAULT_SIGBUS;
259 }
260
261 kvmppc_xive_select_irq(state, &hw_num, &xd);
262
263 arch_spin_lock(&sb->lock);
264
265 /*
266 * first/even page is for trigger
267 * second/odd page is for EOI and management.
268 */
269 page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
270 arch_spin_unlock(&sb->lock);
271
272 if (WARN_ON(!page)) {
273 pr_err("%s: accessing invalid ESB page for source %lx !\n",
274 __func__, irq);
275 return VM_FAULT_SIGBUS;
276 }
277
278 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
279 return VM_FAULT_NOPAGE;
280 }
281
282 static const struct vm_operations_struct xive_native_esb_vmops = {
283 .fault = xive_native_esb_fault,
284 };
285
xive_native_tima_fault(struct vm_fault * vmf)286 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
287 {
288 struct vm_area_struct *vma = vmf->vma;
289
290 switch (vmf->pgoff - vma->vm_pgoff) {
291 case 0: /* HW - forbid access */
292 case 1: /* HV - forbid access */
293 return VM_FAULT_SIGBUS;
294 case 2: /* OS */
295 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
296 return VM_FAULT_NOPAGE;
297 case 3: /* USER - TODO */
298 default:
299 return VM_FAULT_SIGBUS;
300 }
301 }
302
303 static const struct vm_operations_struct xive_native_tima_vmops = {
304 .fault = xive_native_tima_fault,
305 };
306
kvmppc_xive_native_mmap(struct kvm_device * dev,struct vm_area_struct * vma)307 static int kvmppc_xive_native_mmap(struct kvm_device *dev,
308 struct vm_area_struct *vma)
309 {
310 struct kvmppc_xive *xive = dev->private;
311
312 /* We only allow mappings at fixed offset for now */
313 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
314 if (vma_pages(vma) > 4)
315 return -EINVAL;
316 vma->vm_ops = &xive_native_tima_vmops;
317 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
318 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
319 return -EINVAL;
320 vma->vm_ops = &xive_native_esb_vmops;
321 } else {
322 return -EINVAL;
323 }
324
325 vma->vm_flags |= VM_IO | VM_PFNMAP;
326 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
327
328 /*
329 * Grab the KVM device file address_space to be able to clear
330 * the ESB pages mapping when a device is passed-through into
331 * the guest.
332 */
333 xive->mapping = vma->vm_file->f_mapping;
334 return 0;
335 }
336
kvmppc_xive_native_set_source(struct kvmppc_xive * xive,long irq,u64 addr)337 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
338 u64 addr)
339 {
340 struct kvmppc_xive_src_block *sb;
341 struct kvmppc_xive_irq_state *state;
342 u64 __user *ubufp = (u64 __user *) addr;
343 u64 val;
344 u16 idx;
345 int rc;
346
347 pr_devel("%s irq=0x%lx\n", __func__, irq);
348
349 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
350 return -E2BIG;
351
352 sb = kvmppc_xive_find_source(xive, irq, &idx);
353 if (!sb) {
354 pr_debug("No source, creating source block...\n");
355 sb = kvmppc_xive_create_src_block(xive, irq);
356 if (!sb) {
357 pr_err("Failed to create block...\n");
358 return -ENOMEM;
359 }
360 }
361 state = &sb->irq_state[idx];
362
363 if (get_user(val, ubufp)) {
364 pr_err("fault getting user info !\n");
365 return -EFAULT;
366 }
367
368 arch_spin_lock(&sb->lock);
369
370 /*
371 * If the source doesn't already have an IPI, allocate
372 * one and get the corresponding data
373 */
374 if (!state->ipi_number) {
375 state->ipi_number = xive_native_alloc_irq();
376 if (state->ipi_number == 0) {
377 pr_err("Failed to allocate IRQ !\n");
378 rc = -ENXIO;
379 goto unlock;
380 }
381 xive_native_populate_irq_data(state->ipi_number,
382 &state->ipi_data);
383 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
384 state->ipi_number, irq);
385 }
386
387 /* Restore LSI state */
388 if (val & KVM_XIVE_LEVEL_SENSITIVE) {
389 state->lsi = true;
390 if (val & KVM_XIVE_LEVEL_ASSERTED)
391 state->asserted = true;
392 pr_devel(" LSI ! Asserted=%d\n", state->asserted);
393 }
394
395 /* Mask IRQ to start with */
396 state->act_server = 0;
397 state->act_priority = MASKED;
398 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
399 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
400
401 /* Increment the number of valid sources and mark this one valid */
402 if (!state->valid)
403 xive->src_count++;
404 state->valid = true;
405
406 rc = 0;
407
408 unlock:
409 arch_spin_unlock(&sb->lock);
410
411 return rc;
412 }
413
kvmppc_xive_native_update_source_config(struct kvmppc_xive * xive,struct kvmppc_xive_src_block * sb,struct kvmppc_xive_irq_state * state,u32 server,u8 priority,bool masked,u32 eisn)414 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
415 struct kvmppc_xive_src_block *sb,
416 struct kvmppc_xive_irq_state *state,
417 u32 server, u8 priority, bool masked,
418 u32 eisn)
419 {
420 struct kvm *kvm = xive->kvm;
421 u32 hw_num;
422 int rc = 0;
423
424 arch_spin_lock(&sb->lock);
425
426 if (state->act_server == server && state->act_priority == priority &&
427 state->eisn == eisn)
428 goto unlock;
429
430 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
431 priority, server, masked, state->act_server,
432 state->act_priority);
433
434 kvmppc_xive_select_irq(state, &hw_num, NULL);
435
436 if (priority != MASKED && !masked) {
437 rc = kvmppc_xive_select_target(kvm, &server, priority);
438 if (rc)
439 goto unlock;
440
441 state->act_priority = priority;
442 state->act_server = server;
443 state->eisn = eisn;
444
445 rc = xive_native_configure_irq(hw_num,
446 kvmppc_xive_vp(xive, server),
447 priority, eisn);
448 } else {
449 state->act_priority = MASKED;
450 state->act_server = 0;
451 state->eisn = 0;
452
453 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
454 }
455
456 unlock:
457 arch_spin_unlock(&sb->lock);
458 return rc;
459 }
460
kvmppc_xive_native_set_source_config(struct kvmppc_xive * xive,long irq,u64 addr)461 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
462 long irq, u64 addr)
463 {
464 struct kvmppc_xive_src_block *sb;
465 struct kvmppc_xive_irq_state *state;
466 u64 __user *ubufp = (u64 __user *) addr;
467 u16 src;
468 u64 kvm_cfg;
469 u32 server;
470 u8 priority;
471 bool masked;
472 u32 eisn;
473
474 sb = kvmppc_xive_find_source(xive, irq, &src);
475 if (!sb)
476 return -ENOENT;
477
478 state = &sb->irq_state[src];
479
480 if (!state->valid)
481 return -EINVAL;
482
483 if (get_user(kvm_cfg, ubufp))
484 return -EFAULT;
485
486 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
487
488 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
489 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
490 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
491 KVM_XIVE_SOURCE_SERVER_SHIFT;
492 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
493 KVM_XIVE_SOURCE_MASKED_SHIFT;
494 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
495 KVM_XIVE_SOURCE_EISN_SHIFT;
496
497 if (priority != xive_prio_from_guest(priority)) {
498 pr_err("invalid priority for queue %d for VCPU %d\n",
499 priority, server);
500 return -EINVAL;
501 }
502
503 return kvmppc_xive_native_update_source_config(xive, sb, state, server,
504 priority, masked, eisn);
505 }
506
kvmppc_xive_native_sync_source(struct kvmppc_xive * xive,long irq,u64 addr)507 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
508 long irq, u64 addr)
509 {
510 struct kvmppc_xive_src_block *sb;
511 struct kvmppc_xive_irq_state *state;
512 struct xive_irq_data *xd;
513 u32 hw_num;
514 u16 src;
515 int rc = 0;
516
517 pr_devel("%s irq=0x%lx", __func__, irq);
518
519 sb = kvmppc_xive_find_source(xive, irq, &src);
520 if (!sb)
521 return -ENOENT;
522
523 state = &sb->irq_state[src];
524
525 rc = -EINVAL;
526
527 arch_spin_lock(&sb->lock);
528
529 if (state->valid) {
530 kvmppc_xive_select_irq(state, &hw_num, &xd);
531 xive_native_sync_source(hw_num);
532 rc = 0;
533 }
534
535 arch_spin_unlock(&sb->lock);
536 return rc;
537 }
538
xive_native_validate_queue_size(u32 qshift)539 static int xive_native_validate_queue_size(u32 qshift)
540 {
541 /*
542 * We only support 64K pages for the moment. This is also
543 * advertised in the DT property "ibm,xive-eq-sizes"
544 */
545 switch (qshift) {
546 case 0: /* EQ reset */
547 case 16:
548 return 0;
549 case 12:
550 case 21:
551 case 24:
552 default:
553 return -EINVAL;
554 }
555 }
556
kvmppc_xive_native_set_queue_config(struct kvmppc_xive * xive,long eq_idx,u64 addr)557 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
558 long eq_idx, u64 addr)
559 {
560 struct kvm *kvm = xive->kvm;
561 struct kvm_vcpu *vcpu;
562 struct kvmppc_xive_vcpu *xc;
563 void __user *ubufp = (void __user *) addr;
564 u32 server;
565 u8 priority;
566 struct kvm_ppc_xive_eq kvm_eq;
567 int rc;
568 __be32 *qaddr = 0;
569 struct page *page;
570 struct xive_q *q;
571 gfn_t gfn;
572 unsigned long page_size;
573 int srcu_idx;
574
575 /*
576 * Demangle priority/server tuple from the EQ identifier
577 */
578 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
579 KVM_XIVE_EQ_PRIORITY_SHIFT;
580 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
581 KVM_XIVE_EQ_SERVER_SHIFT;
582
583 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
584 return -EFAULT;
585
586 vcpu = kvmppc_xive_find_server(kvm, server);
587 if (!vcpu) {
588 pr_err("Can't find server %d\n", server);
589 return -ENOENT;
590 }
591 xc = vcpu->arch.xive_vcpu;
592
593 if (priority != xive_prio_from_guest(priority)) {
594 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
595 priority, server);
596 return -EINVAL;
597 }
598 q = &xc->queues[priority];
599
600 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
601 __func__, server, priority, kvm_eq.flags,
602 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
603
604 /* reset queue and disable queueing */
605 if (!kvm_eq.qshift) {
606 q->guest_qaddr = 0;
607 q->guest_qshift = 0;
608
609 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
610 NULL, 0, true);
611 if (rc) {
612 pr_err("Failed to reset queue %d for VCPU %d: %d\n",
613 priority, xc->server_num, rc);
614 return rc;
615 }
616
617 return 0;
618 }
619
620 /*
621 * sPAPR specifies a "Unconditional Notify (n) flag" for the
622 * H_INT_SET_QUEUE_CONFIG hcall which forces notification
623 * without using the coalescing mechanisms provided by the
624 * XIVE END ESBs. This is required on KVM as notification
625 * using the END ESBs is not supported.
626 */
627 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
628 pr_err("invalid flags %d\n", kvm_eq.flags);
629 return -EINVAL;
630 }
631
632 rc = xive_native_validate_queue_size(kvm_eq.qshift);
633 if (rc) {
634 pr_err("invalid queue size %d\n", kvm_eq.qshift);
635 return rc;
636 }
637
638 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
639 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
640 1ull << kvm_eq.qshift);
641 return -EINVAL;
642 }
643
644 srcu_idx = srcu_read_lock(&kvm->srcu);
645 gfn = gpa_to_gfn(kvm_eq.qaddr);
646
647 page_size = kvm_host_page_size(vcpu, gfn);
648 if (1ull << kvm_eq.qshift > page_size) {
649 srcu_read_unlock(&kvm->srcu, srcu_idx);
650 pr_warn("Incompatible host page size %lx!\n", page_size);
651 return -EINVAL;
652 }
653
654 page = gfn_to_page(kvm, gfn);
655 if (is_error_page(page)) {
656 srcu_read_unlock(&kvm->srcu, srcu_idx);
657 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
658 return -EINVAL;
659 }
660
661 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
662 srcu_read_unlock(&kvm->srcu, srcu_idx);
663
664 /*
665 * Backup the queue page guest address to the mark EQ page
666 * dirty for migration.
667 */
668 q->guest_qaddr = kvm_eq.qaddr;
669 q->guest_qshift = kvm_eq.qshift;
670
671 /*
672 * Unconditional Notification is forced by default at the
673 * OPAL level because the use of END ESBs is not supported by
674 * Linux.
675 */
676 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
677 (__be32 *) qaddr, kvm_eq.qshift, true);
678 if (rc) {
679 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
680 priority, xc->server_num, rc);
681 put_page(page);
682 return rc;
683 }
684
685 /*
686 * Only restore the queue state when needed. When doing the
687 * H_INT_SET_SOURCE_CONFIG hcall, it should not.
688 */
689 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
690 rc = xive_native_set_queue_state(xc->vp_id, priority,
691 kvm_eq.qtoggle,
692 kvm_eq.qindex);
693 if (rc)
694 goto error;
695 }
696
697 rc = kvmppc_xive_attach_escalation(vcpu, priority,
698 xive->single_escalation);
699 error:
700 if (rc)
701 kvmppc_xive_native_cleanup_queue(vcpu, priority);
702 return rc;
703 }
704
kvmppc_xive_native_get_queue_config(struct kvmppc_xive * xive,long eq_idx,u64 addr)705 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
706 long eq_idx, u64 addr)
707 {
708 struct kvm *kvm = xive->kvm;
709 struct kvm_vcpu *vcpu;
710 struct kvmppc_xive_vcpu *xc;
711 struct xive_q *q;
712 void __user *ubufp = (u64 __user *) addr;
713 u32 server;
714 u8 priority;
715 struct kvm_ppc_xive_eq kvm_eq;
716 u64 qaddr;
717 u64 qshift;
718 u64 qeoi_page;
719 u32 escalate_irq;
720 u64 qflags;
721 int rc;
722
723 /*
724 * Demangle priority/server tuple from the EQ identifier
725 */
726 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
727 KVM_XIVE_EQ_PRIORITY_SHIFT;
728 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
729 KVM_XIVE_EQ_SERVER_SHIFT;
730
731 vcpu = kvmppc_xive_find_server(kvm, server);
732 if (!vcpu) {
733 pr_err("Can't find server %d\n", server);
734 return -ENOENT;
735 }
736 xc = vcpu->arch.xive_vcpu;
737
738 if (priority != xive_prio_from_guest(priority)) {
739 pr_err("invalid priority for queue %d for VCPU %d\n",
740 priority, server);
741 return -EINVAL;
742 }
743 q = &xc->queues[priority];
744
745 memset(&kvm_eq, 0, sizeof(kvm_eq));
746
747 if (!q->qpage)
748 return 0;
749
750 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
751 &qeoi_page, &escalate_irq, &qflags);
752 if (rc)
753 return rc;
754
755 kvm_eq.flags = 0;
756 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
757 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
758
759 kvm_eq.qshift = q->guest_qshift;
760 kvm_eq.qaddr = q->guest_qaddr;
761
762 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
763 &kvm_eq.qindex);
764 if (rc)
765 return rc;
766
767 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
768 __func__, server, priority, kvm_eq.flags,
769 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
770
771 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
772 return -EFAULT;
773
774 return 0;
775 }
776
kvmppc_xive_reset_sources(struct kvmppc_xive_src_block * sb)777 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
778 {
779 int i;
780
781 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
782 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
783
784 if (!state->valid)
785 continue;
786
787 if (state->act_priority == MASKED)
788 continue;
789
790 state->eisn = 0;
791 state->act_server = 0;
792 state->act_priority = MASKED;
793 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
794 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
795 if (state->pt_number) {
796 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
797 xive_native_configure_irq(state->pt_number,
798 0, MASKED, 0);
799 }
800 }
801 }
802
kvmppc_xive_reset(struct kvmppc_xive * xive)803 static int kvmppc_xive_reset(struct kvmppc_xive *xive)
804 {
805 struct kvm *kvm = xive->kvm;
806 struct kvm_vcpu *vcpu;
807 unsigned int i;
808
809 pr_devel("%s\n", __func__);
810
811 mutex_lock(&xive->lock);
812
813 kvm_for_each_vcpu(i, vcpu, kvm) {
814 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
815 unsigned int prio;
816
817 if (!xc)
818 continue;
819
820 kvmppc_xive_disable_vcpu_interrupts(vcpu);
821
822 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
823
824 /* Single escalation, no queue 7 */
825 if (prio == 7 && xive->single_escalation)
826 break;
827
828 if (xc->esc_virq[prio]) {
829 free_irq(xc->esc_virq[prio], vcpu);
830 irq_dispose_mapping(xc->esc_virq[prio]);
831 kfree(xc->esc_virq_names[prio]);
832 xc->esc_virq[prio] = 0;
833 }
834
835 kvmppc_xive_native_cleanup_queue(vcpu, prio);
836 }
837 }
838
839 for (i = 0; i <= xive->max_sbid; i++) {
840 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
841
842 if (sb) {
843 arch_spin_lock(&sb->lock);
844 kvmppc_xive_reset_sources(sb);
845 arch_spin_unlock(&sb->lock);
846 }
847 }
848
849 mutex_unlock(&xive->lock);
850
851 return 0;
852 }
853
kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block * sb)854 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
855 {
856 int j;
857
858 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
859 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
860 struct xive_irq_data *xd;
861 u32 hw_num;
862
863 if (!state->valid)
864 continue;
865
866 /*
867 * The struct kvmppc_xive_irq_state reflects the state
868 * of the EAS configuration and not the state of the
869 * source. The source is masked setting the PQ bits to
870 * '-Q', which is what is being done before calling
871 * the KVM_DEV_XIVE_EQ_SYNC control.
872 *
873 * If a source EAS is configured, OPAL syncs the XIVE
874 * IC of the source and the XIVE IC of the previous
875 * target if any.
876 *
877 * So it should be fine ignoring MASKED sources as
878 * they have been synced already.
879 */
880 if (state->act_priority == MASKED)
881 continue;
882
883 kvmppc_xive_select_irq(state, &hw_num, &xd);
884 xive_native_sync_source(hw_num);
885 xive_native_sync_queue(hw_num);
886 }
887 }
888
kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu * vcpu)889 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
890 {
891 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
892 unsigned int prio;
893 int srcu_idx;
894
895 if (!xc)
896 return -ENOENT;
897
898 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
899 struct xive_q *q = &xc->queues[prio];
900
901 if (!q->qpage)
902 continue;
903
904 /* Mark EQ page dirty for migration */
905 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
906 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
907 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
908 }
909 return 0;
910 }
911
kvmppc_xive_native_eq_sync(struct kvmppc_xive * xive)912 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
913 {
914 struct kvm *kvm = xive->kvm;
915 struct kvm_vcpu *vcpu;
916 unsigned int i;
917
918 pr_devel("%s\n", __func__);
919
920 mutex_lock(&xive->lock);
921 for (i = 0; i <= xive->max_sbid; i++) {
922 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
923
924 if (sb) {
925 arch_spin_lock(&sb->lock);
926 kvmppc_xive_native_sync_sources(sb);
927 arch_spin_unlock(&sb->lock);
928 }
929 }
930
931 kvm_for_each_vcpu(i, vcpu, kvm) {
932 kvmppc_xive_native_vcpu_eq_sync(vcpu);
933 }
934 mutex_unlock(&xive->lock);
935
936 return 0;
937 }
938
kvmppc_xive_native_set_attr(struct kvm_device * dev,struct kvm_device_attr * attr)939 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
940 struct kvm_device_attr *attr)
941 {
942 struct kvmppc_xive *xive = dev->private;
943
944 switch (attr->group) {
945 case KVM_DEV_XIVE_GRP_CTRL:
946 switch (attr->attr) {
947 case KVM_DEV_XIVE_RESET:
948 return kvmppc_xive_reset(xive);
949 case KVM_DEV_XIVE_EQ_SYNC:
950 return kvmppc_xive_native_eq_sync(xive);
951 case KVM_DEV_XIVE_NR_SERVERS:
952 return kvmppc_xive_set_nr_servers(xive, attr->addr);
953 }
954 break;
955 case KVM_DEV_XIVE_GRP_SOURCE:
956 return kvmppc_xive_native_set_source(xive, attr->attr,
957 attr->addr);
958 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
959 return kvmppc_xive_native_set_source_config(xive, attr->attr,
960 attr->addr);
961 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
962 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
963 attr->addr);
964 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
965 return kvmppc_xive_native_sync_source(xive, attr->attr,
966 attr->addr);
967 }
968 return -ENXIO;
969 }
970
kvmppc_xive_native_get_attr(struct kvm_device * dev,struct kvm_device_attr * attr)971 static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
972 struct kvm_device_attr *attr)
973 {
974 struct kvmppc_xive *xive = dev->private;
975
976 switch (attr->group) {
977 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
978 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
979 attr->addr);
980 }
981 return -ENXIO;
982 }
983
kvmppc_xive_native_has_attr(struct kvm_device * dev,struct kvm_device_attr * attr)984 static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
985 struct kvm_device_attr *attr)
986 {
987 switch (attr->group) {
988 case KVM_DEV_XIVE_GRP_CTRL:
989 switch (attr->attr) {
990 case KVM_DEV_XIVE_RESET:
991 case KVM_DEV_XIVE_EQ_SYNC:
992 case KVM_DEV_XIVE_NR_SERVERS:
993 return 0;
994 }
995 break;
996 case KVM_DEV_XIVE_GRP_SOURCE:
997 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
998 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
999 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
1000 attr->attr < KVMPPC_XIVE_NR_IRQS)
1001 return 0;
1002 break;
1003 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
1004 return 0;
1005 }
1006 return -ENXIO;
1007 }
1008
1009 /*
1010 * Called when device fd is closed. kvm->lock is held.
1011 */
kvmppc_xive_native_release(struct kvm_device * dev)1012 static void kvmppc_xive_native_release(struct kvm_device *dev)
1013 {
1014 struct kvmppc_xive *xive = dev->private;
1015 struct kvm *kvm = xive->kvm;
1016 struct kvm_vcpu *vcpu;
1017 int i;
1018
1019 pr_devel("Releasing xive native device\n");
1020
1021 /*
1022 * Clear the KVM device file address_space which is used to
1023 * unmap the ESB pages when a device is passed-through.
1024 */
1025 mutex_lock(&xive->mapping_lock);
1026 xive->mapping = NULL;
1027 mutex_unlock(&xive->mapping_lock);
1028
1029 /*
1030 * Since this is the device release function, we know that
1031 * userspace does not have any open fd or mmap referring to
1032 * the device. Therefore there can not be any of the
1033 * device attribute set/get, mmap, or page fault functions
1034 * being executed concurrently, and similarly, the
1035 * connect_vcpu and set/clr_mapped functions also cannot
1036 * be being executed.
1037 */
1038
1039 debugfs_remove(xive->dentry);
1040
1041 /*
1042 * We should clean up the vCPU interrupt presenters first.
1043 */
1044 kvm_for_each_vcpu(i, vcpu, kvm) {
1045 /*
1046 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1047 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1048 * Holding the vcpu->mutex also means that the vcpu cannot
1049 * be executing the KVM_RUN ioctl, and therefore it cannot
1050 * be executing the XIVE push or pull code or accessing
1051 * the XIVE MMIO regions.
1052 */
1053 mutex_lock(&vcpu->mutex);
1054 kvmppc_xive_native_cleanup_vcpu(vcpu);
1055 mutex_unlock(&vcpu->mutex);
1056 }
1057
1058 /*
1059 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1060 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1061 * against xive code getting called during vcpu execution or
1062 * set/get one_reg operations.
1063 */
1064 kvm->arch.xive = NULL;
1065
1066 for (i = 0; i <= xive->max_sbid; i++) {
1067 if (xive->src_blocks[i])
1068 kvmppc_xive_free_sources(xive->src_blocks[i]);
1069 kfree(xive->src_blocks[i]);
1070 xive->src_blocks[i] = NULL;
1071 }
1072
1073 if (xive->vp_base != XIVE_INVALID_VP)
1074 xive_native_free_vp_block(xive->vp_base);
1075
1076 /*
1077 * A reference of the kvmppc_xive pointer is now kept under
1078 * the xive_devices struct of the machine for reuse. It is
1079 * freed when the VM is destroyed for now until we fix all the
1080 * execution paths.
1081 */
1082
1083 kfree(dev);
1084 }
1085
1086 /*
1087 * Create a XIVE device. kvm->lock is held.
1088 */
kvmppc_xive_native_create(struct kvm_device * dev,u32 type)1089 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1090 {
1091 struct kvmppc_xive *xive;
1092 struct kvm *kvm = dev->kvm;
1093
1094 pr_devel("Creating xive native device\n");
1095
1096 if (kvm->arch.xive)
1097 return -EEXIST;
1098
1099 xive = kvmppc_xive_get_device(kvm, type);
1100 if (!xive)
1101 return -ENOMEM;
1102
1103 dev->private = xive;
1104 xive->dev = dev;
1105 xive->kvm = kvm;
1106 mutex_init(&xive->mapping_lock);
1107 mutex_init(&xive->lock);
1108
1109 /* VP allocation is delayed to the first call to connect_vcpu */
1110 xive->vp_base = XIVE_INVALID_VP;
1111 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
1112 * on a POWER9 system.
1113 */
1114 xive->nr_servers = KVM_MAX_VCPUS;
1115
1116 xive->single_escalation = xive_native_has_single_escalation();
1117 xive->ops = &kvmppc_xive_native_ops;
1118
1119 kvm->arch.xive = xive;
1120 return 0;
1121 }
1122
1123 /*
1124 * Interrupt Pending Buffer (IPB) offset
1125 */
1126 #define TM_IPB_SHIFT 40
1127 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
1128
kvmppc_xive_native_get_vp(struct kvm_vcpu * vcpu,union kvmppc_one_reg * val)1129 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1130 {
1131 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1132 u64 opal_state;
1133 int rc;
1134
1135 if (!kvmppc_xive_enabled(vcpu))
1136 return -EPERM;
1137
1138 if (!xc)
1139 return -ENOENT;
1140
1141 /* Thread context registers. We only care about IPB and CPPR */
1142 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1143
1144 /* Get the VP state from OPAL */
1145 rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1146 if (rc)
1147 return rc;
1148
1149 /*
1150 * Capture the backup of IPB register in the NVT structure and
1151 * merge it in our KVM VP state.
1152 */
1153 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1154
1155 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1156 __func__,
1157 vcpu->arch.xive_saved_state.nsr,
1158 vcpu->arch.xive_saved_state.cppr,
1159 vcpu->arch.xive_saved_state.ipb,
1160 vcpu->arch.xive_saved_state.pipr,
1161 vcpu->arch.xive_saved_state.w01,
1162 (u32) vcpu->arch.xive_cam_word, opal_state);
1163
1164 return 0;
1165 }
1166
kvmppc_xive_native_set_vp(struct kvm_vcpu * vcpu,union kvmppc_one_reg * val)1167 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1168 {
1169 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1170 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1171
1172 pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1173 val->xive_timaval[0], val->xive_timaval[1]);
1174
1175 if (!kvmppc_xive_enabled(vcpu))
1176 return -EPERM;
1177
1178 if (!xc || !xive)
1179 return -ENOENT;
1180
1181 /* We can't update the state of a "pushed" VCPU */
1182 if (WARN_ON(vcpu->arch.xive_pushed))
1183 return -EBUSY;
1184
1185 /*
1186 * Restore the thread context registers. IPB and CPPR should
1187 * be the only ones that matter.
1188 */
1189 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1190
1191 /*
1192 * There is no need to restore the XIVE internal state (IPB
1193 * stored in the NVT) as the IPB register was merged in KVM VP
1194 * state when captured.
1195 */
1196 return 0;
1197 }
1198
kvmppc_xive_native_supported(void)1199 bool kvmppc_xive_native_supported(void)
1200 {
1201 return xive_native_has_queue_state_support();
1202 }
1203
xive_native_debug_show(struct seq_file * m,void * private)1204 static int xive_native_debug_show(struct seq_file *m, void *private)
1205 {
1206 struct kvmppc_xive *xive = m->private;
1207 struct kvm *kvm = xive->kvm;
1208 struct kvm_vcpu *vcpu;
1209 unsigned int i;
1210
1211 if (!kvm)
1212 return 0;
1213
1214 seq_puts(m, "=========\nVCPU state\n=========\n");
1215
1216 kvm_for_each_vcpu(i, vcpu, kvm) {
1217 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1218
1219 if (!xc)
1220 continue;
1221
1222 seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1223 xc->server_num, xc->vp_id,
1224 vcpu->arch.xive_saved_state.nsr,
1225 vcpu->arch.xive_saved_state.cppr,
1226 vcpu->arch.xive_saved_state.ipb,
1227 vcpu->arch.xive_saved_state.pipr,
1228 vcpu->arch.xive_saved_state.w01,
1229 (u32) vcpu->arch.xive_cam_word);
1230
1231 kvmppc_xive_debug_show_queues(m, vcpu);
1232 }
1233
1234 return 0;
1235 }
1236
1237 DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
1238
xive_native_debugfs_init(struct kvmppc_xive * xive)1239 static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1240 {
1241 char *name;
1242
1243 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1244 if (!name) {
1245 pr_err("%s: no memory for name\n", __func__);
1246 return;
1247 }
1248
1249 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1250 xive, &xive_native_debug_fops);
1251
1252 pr_debug("%s: created %s\n", __func__, name);
1253 kfree(name);
1254 }
1255
kvmppc_xive_native_init(struct kvm_device * dev)1256 static void kvmppc_xive_native_init(struct kvm_device *dev)
1257 {
1258 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1259
1260 /* Register some debug interfaces */
1261 xive_native_debugfs_init(xive);
1262 }
1263
1264 struct kvm_device_ops kvm_xive_native_ops = {
1265 .name = "kvm-xive-native",
1266 .create = kvmppc_xive_native_create,
1267 .init = kvmppc_xive_native_init,
1268 .release = kvmppc_xive_native_release,
1269 .set_attr = kvmppc_xive_native_set_attr,
1270 .get_attr = kvmppc_xive_native_get_attr,
1271 .has_attr = kvmppc_xive_native_has_attr,
1272 .mmap = kvmppc_xive_native_mmap,
1273 };
1274
kvmppc_xive_native_init_module(void)1275 void kvmppc_xive_native_init_module(void)
1276 {
1277 ;
1278 }
1279
kvmppc_xive_native_exit_module(void)1280 void kvmppc_xive_native_exit_module(void)
1281 {
1282 ;
1283 }
1284