1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2017-2019, IBM Corporation.
4 */
5
6 #define pr_fmt(fmt) "xive-kvm: " fmt
7
8 #include <linux/kernel.h>
9 #include <linux/kvm_host.h>
10 #include <linux/err.h>
11 #include <linux/gfp.h>
12 #include <linux/spinlock.h>
13 #include <linux/delay.h>
14 #include <linux/file.h>
15 #include <asm/uaccess.h>
16 #include <asm/kvm_book3s.h>
17 #include <asm/kvm_ppc.h>
18 #include <asm/hvcall.h>
19 #include <asm/xive.h>
20 #include <asm/xive-regs.h>
21 #include <asm/debug.h>
22 #include <asm/debugfs.h>
23 #include <asm/opal.h>
24
25 #include <linux/debugfs.h>
26 #include <linux/seq_file.h>
27
28 #include "book3s_xive.h"
29
xive_vm_esb_load(struct xive_irq_data * xd,u32 offset)30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31 {
32 u64 val;
33
34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
35 offset |= offset << 4;
36
37 val = in_be64(xd->eoi_mmio + offset);
38 return (u8)val;
39 }
40
kvmppc_xive_native_cleanup_queue(struct kvm_vcpu * vcpu,int prio)41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
42 {
43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
44 struct xive_q *q = &xc->queues[prio];
45
46 xive_native_disable_queue(xc->vp_id, q, prio);
47 if (q->qpage) {
48 put_page(virt_to_page(q->qpage));
49 q->qpage = NULL;
50 }
51 }
52
kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu * vcpu)53 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
54 {
55 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
56 int i;
57
58 if (!kvmppc_xive_enabled(vcpu))
59 return;
60
61 if (!xc)
62 return;
63
64 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
65
66 /* Ensure no interrupt is still routed to that VP */
67 xc->valid = false;
68 kvmppc_xive_disable_vcpu_interrupts(vcpu);
69
70 /* Free escalations */
71 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
72 /* Free the escalation irq */
73 if (xc->esc_virq[i]) {
74 if (xc->xive->single_escalation)
75 xive_cleanup_single_escalation(vcpu, xc,
76 xc->esc_virq[i]);
77 free_irq(xc->esc_virq[i], vcpu);
78 irq_dispose_mapping(xc->esc_virq[i]);
79 kfree(xc->esc_virq_names[i]);
80 xc->esc_virq[i] = 0;
81 }
82 }
83
84 /* Disable the VP */
85 xive_native_disable_vp(xc->vp_id);
86
87 /* Clear the cam word so guest entry won't try to push context */
88 vcpu->arch.xive_cam_word = 0;
89
90 /* Free the queues */
91 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
92 kvmppc_xive_native_cleanup_queue(vcpu, i);
93 }
94
95 /* Free the VP */
96 kfree(xc);
97
98 /* Cleanup the vcpu */
99 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
100 vcpu->arch.xive_vcpu = NULL;
101 }
102
kvmppc_xive_native_connect_vcpu(struct kvm_device * dev,struct kvm_vcpu * vcpu,u32 server_num)103 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
104 struct kvm_vcpu *vcpu, u32 server_num)
105 {
106 struct kvmppc_xive *xive = dev->private;
107 struct kvmppc_xive_vcpu *xc = NULL;
108 int rc;
109 u32 vp_id;
110
111 pr_devel("native_connect_vcpu(server=%d)\n", server_num);
112
113 if (dev->ops != &kvm_xive_native_ops) {
114 pr_devel("Wrong ops !\n");
115 return -EPERM;
116 }
117 if (xive->kvm != vcpu->kvm)
118 return -EPERM;
119 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
120 return -EBUSY;
121 if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
122 pr_devel("Out of bounds !\n");
123 return -EINVAL;
124 }
125
126 mutex_lock(&xive->lock);
127
128 vp_id = kvmppc_xive_vp(xive, server_num);
129 if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
130 pr_devel("Duplicate !\n");
131 rc = -EEXIST;
132 goto bail;
133 }
134
135 xc = kzalloc(sizeof(*xc), GFP_KERNEL);
136 if (!xc) {
137 rc = -ENOMEM;
138 goto bail;
139 }
140
141 vcpu->arch.xive_vcpu = xc;
142 xc->xive = xive;
143 xc->vcpu = vcpu;
144 xc->server_num = server_num;
145
146 xc->vp_id = vp_id;
147 xc->valid = true;
148 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
149
150 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
151 if (rc) {
152 pr_err("Failed to get VP info from OPAL: %d\n", rc);
153 goto bail;
154 }
155
156 /*
157 * Enable the VP first as the single escalation mode will
158 * affect escalation interrupts numbering
159 */
160 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
161 if (rc) {
162 pr_err("Failed to enable VP in OPAL: %d\n", rc);
163 goto bail;
164 }
165
166 /* Configure VCPU fields for use by assembly push/pull */
167 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
168 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
169
170 /* TODO: reset all queues to a clean state ? */
171 bail:
172 mutex_unlock(&xive->lock);
173 if (rc)
174 kvmppc_xive_native_cleanup_vcpu(vcpu);
175
176 return rc;
177 }
178
179 /*
180 * Device passthrough support
181 */
kvmppc_xive_native_reset_mapped(struct kvm * kvm,unsigned long irq)182 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
183 {
184 struct kvmppc_xive *xive = kvm->arch.xive;
185 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
186
187 if (irq >= KVMPPC_XIVE_NR_IRQS)
188 return -EINVAL;
189
190 /*
191 * Clear the ESB pages of the IRQ number being mapped (or
192 * unmapped) into the guest and let the the VM fault handler
193 * repopulate with the appropriate ESB pages (device or IC)
194 */
195 pr_debug("clearing esb pages for girq 0x%lx\n", irq);
196 mutex_lock(&xive->mapping_lock);
197 if (xive->mapping)
198 unmap_mapping_range(xive->mapping,
199 esb_pgoff << PAGE_SHIFT,
200 2ull << PAGE_SHIFT, 1);
201 mutex_unlock(&xive->mapping_lock);
202 return 0;
203 }
204
205 static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
206 .reset_mapped = kvmppc_xive_native_reset_mapped,
207 };
208
xive_native_esb_fault(struct vm_fault * vmf)209 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
210 {
211 struct vm_area_struct *vma = vmf->vma;
212 struct kvm_device *dev = vma->vm_file->private_data;
213 struct kvmppc_xive *xive = dev->private;
214 struct kvmppc_xive_src_block *sb;
215 struct kvmppc_xive_irq_state *state;
216 struct xive_irq_data *xd;
217 u32 hw_num;
218 u16 src;
219 u64 page;
220 unsigned long irq;
221 u64 page_offset;
222
223 /*
224 * Linux/KVM uses a two pages ESB setting, one for trigger and
225 * one for EOI
226 */
227 page_offset = vmf->pgoff - vma->vm_pgoff;
228 irq = page_offset / 2;
229
230 sb = kvmppc_xive_find_source(xive, irq, &src);
231 if (!sb) {
232 pr_devel("%s: source %lx not found !\n", __func__, irq);
233 return VM_FAULT_SIGBUS;
234 }
235
236 state = &sb->irq_state[src];
237 kvmppc_xive_select_irq(state, &hw_num, &xd);
238
239 arch_spin_lock(&sb->lock);
240
241 /*
242 * first/even page is for trigger
243 * second/odd page is for EOI and management.
244 */
245 page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
246 arch_spin_unlock(&sb->lock);
247
248 if (WARN_ON(!page)) {
249 pr_err("%s: accessing invalid ESB page for source %lx !\n",
250 __func__, irq);
251 return VM_FAULT_SIGBUS;
252 }
253
254 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
255 return VM_FAULT_NOPAGE;
256 }
257
258 static const struct vm_operations_struct xive_native_esb_vmops = {
259 .fault = xive_native_esb_fault,
260 };
261
xive_native_tima_fault(struct vm_fault * vmf)262 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
263 {
264 struct vm_area_struct *vma = vmf->vma;
265
266 switch (vmf->pgoff - vma->vm_pgoff) {
267 case 0: /* HW - forbid access */
268 case 1: /* HV - forbid access */
269 return VM_FAULT_SIGBUS;
270 case 2: /* OS */
271 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
272 return VM_FAULT_NOPAGE;
273 case 3: /* USER - TODO */
274 default:
275 return VM_FAULT_SIGBUS;
276 }
277 }
278
279 static const struct vm_operations_struct xive_native_tima_vmops = {
280 .fault = xive_native_tima_fault,
281 };
282
kvmppc_xive_native_mmap(struct kvm_device * dev,struct vm_area_struct * vma)283 static int kvmppc_xive_native_mmap(struct kvm_device *dev,
284 struct vm_area_struct *vma)
285 {
286 struct kvmppc_xive *xive = dev->private;
287
288 /* We only allow mappings at fixed offset for now */
289 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
290 if (vma_pages(vma) > 4)
291 return -EINVAL;
292 vma->vm_ops = &xive_native_tima_vmops;
293 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
294 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
295 return -EINVAL;
296 vma->vm_ops = &xive_native_esb_vmops;
297 } else {
298 return -EINVAL;
299 }
300
301 vma->vm_flags |= VM_IO | VM_PFNMAP;
302 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
303
304 /*
305 * Grab the KVM device file address_space to be able to clear
306 * the ESB pages mapping when a device is passed-through into
307 * the guest.
308 */
309 xive->mapping = vma->vm_file->f_mapping;
310 return 0;
311 }
312
kvmppc_xive_native_set_source(struct kvmppc_xive * xive,long irq,u64 addr)313 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
314 u64 addr)
315 {
316 struct kvmppc_xive_src_block *sb;
317 struct kvmppc_xive_irq_state *state;
318 u64 __user *ubufp = (u64 __user *) addr;
319 u64 val;
320 u16 idx;
321 int rc;
322
323 pr_devel("%s irq=0x%lx\n", __func__, irq);
324
325 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
326 return -E2BIG;
327
328 sb = kvmppc_xive_find_source(xive, irq, &idx);
329 if (!sb) {
330 pr_debug("No source, creating source block...\n");
331 sb = kvmppc_xive_create_src_block(xive, irq);
332 if (!sb) {
333 pr_err("Failed to create block...\n");
334 return -ENOMEM;
335 }
336 }
337 state = &sb->irq_state[idx];
338
339 if (get_user(val, ubufp)) {
340 pr_err("fault getting user info !\n");
341 return -EFAULT;
342 }
343
344 arch_spin_lock(&sb->lock);
345
346 /*
347 * If the source doesn't already have an IPI, allocate
348 * one and get the corresponding data
349 */
350 if (!state->ipi_number) {
351 state->ipi_number = xive_native_alloc_irq();
352 if (state->ipi_number == 0) {
353 pr_err("Failed to allocate IRQ !\n");
354 rc = -ENXIO;
355 goto unlock;
356 }
357 xive_native_populate_irq_data(state->ipi_number,
358 &state->ipi_data);
359 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
360 state->ipi_number, irq);
361 }
362
363 /* Restore LSI state */
364 if (val & KVM_XIVE_LEVEL_SENSITIVE) {
365 state->lsi = true;
366 if (val & KVM_XIVE_LEVEL_ASSERTED)
367 state->asserted = true;
368 pr_devel(" LSI ! Asserted=%d\n", state->asserted);
369 }
370
371 /* Mask IRQ to start with */
372 state->act_server = 0;
373 state->act_priority = MASKED;
374 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
375 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
376
377 /* Increment the number of valid sources and mark this one valid */
378 if (!state->valid)
379 xive->src_count++;
380 state->valid = true;
381
382 rc = 0;
383
384 unlock:
385 arch_spin_unlock(&sb->lock);
386
387 return rc;
388 }
389
kvmppc_xive_native_update_source_config(struct kvmppc_xive * xive,struct kvmppc_xive_src_block * sb,struct kvmppc_xive_irq_state * state,u32 server,u8 priority,bool masked,u32 eisn)390 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
391 struct kvmppc_xive_src_block *sb,
392 struct kvmppc_xive_irq_state *state,
393 u32 server, u8 priority, bool masked,
394 u32 eisn)
395 {
396 struct kvm *kvm = xive->kvm;
397 u32 hw_num;
398 int rc = 0;
399
400 arch_spin_lock(&sb->lock);
401
402 if (state->act_server == server && state->act_priority == priority &&
403 state->eisn == eisn)
404 goto unlock;
405
406 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
407 priority, server, masked, state->act_server,
408 state->act_priority);
409
410 kvmppc_xive_select_irq(state, &hw_num, NULL);
411
412 if (priority != MASKED && !masked) {
413 rc = kvmppc_xive_select_target(kvm, &server, priority);
414 if (rc)
415 goto unlock;
416
417 state->act_priority = priority;
418 state->act_server = server;
419 state->eisn = eisn;
420
421 rc = xive_native_configure_irq(hw_num,
422 kvmppc_xive_vp(xive, server),
423 priority, eisn);
424 } else {
425 state->act_priority = MASKED;
426 state->act_server = 0;
427 state->eisn = 0;
428
429 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
430 }
431
432 unlock:
433 arch_spin_unlock(&sb->lock);
434 return rc;
435 }
436
kvmppc_xive_native_set_source_config(struct kvmppc_xive * xive,long irq,u64 addr)437 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
438 long irq, u64 addr)
439 {
440 struct kvmppc_xive_src_block *sb;
441 struct kvmppc_xive_irq_state *state;
442 u64 __user *ubufp = (u64 __user *) addr;
443 u16 src;
444 u64 kvm_cfg;
445 u32 server;
446 u8 priority;
447 bool masked;
448 u32 eisn;
449
450 sb = kvmppc_xive_find_source(xive, irq, &src);
451 if (!sb)
452 return -ENOENT;
453
454 state = &sb->irq_state[src];
455
456 if (!state->valid)
457 return -EINVAL;
458
459 if (get_user(kvm_cfg, ubufp))
460 return -EFAULT;
461
462 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
463
464 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
465 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
466 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
467 KVM_XIVE_SOURCE_SERVER_SHIFT;
468 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
469 KVM_XIVE_SOURCE_MASKED_SHIFT;
470 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
471 KVM_XIVE_SOURCE_EISN_SHIFT;
472
473 if (priority != xive_prio_from_guest(priority)) {
474 pr_err("invalid priority for queue %d for VCPU %d\n",
475 priority, server);
476 return -EINVAL;
477 }
478
479 return kvmppc_xive_native_update_source_config(xive, sb, state, server,
480 priority, masked, eisn);
481 }
482
kvmppc_xive_native_sync_source(struct kvmppc_xive * xive,long irq,u64 addr)483 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
484 long irq, u64 addr)
485 {
486 struct kvmppc_xive_src_block *sb;
487 struct kvmppc_xive_irq_state *state;
488 struct xive_irq_data *xd;
489 u32 hw_num;
490 u16 src;
491 int rc = 0;
492
493 pr_devel("%s irq=0x%lx", __func__, irq);
494
495 sb = kvmppc_xive_find_source(xive, irq, &src);
496 if (!sb)
497 return -ENOENT;
498
499 state = &sb->irq_state[src];
500
501 rc = -EINVAL;
502
503 arch_spin_lock(&sb->lock);
504
505 if (state->valid) {
506 kvmppc_xive_select_irq(state, &hw_num, &xd);
507 xive_native_sync_source(hw_num);
508 rc = 0;
509 }
510
511 arch_spin_unlock(&sb->lock);
512 return rc;
513 }
514
xive_native_validate_queue_size(u32 qshift)515 static int xive_native_validate_queue_size(u32 qshift)
516 {
517 /*
518 * We only support 64K pages for the moment. This is also
519 * advertised in the DT property "ibm,xive-eq-sizes"
520 */
521 switch (qshift) {
522 case 0: /* EQ reset */
523 case 16:
524 return 0;
525 case 12:
526 case 21:
527 case 24:
528 default:
529 return -EINVAL;
530 }
531 }
532
kvmppc_xive_native_set_queue_config(struct kvmppc_xive * xive,long eq_idx,u64 addr)533 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
534 long eq_idx, u64 addr)
535 {
536 struct kvm *kvm = xive->kvm;
537 struct kvm_vcpu *vcpu;
538 struct kvmppc_xive_vcpu *xc;
539 void __user *ubufp = (void __user *) addr;
540 u32 server;
541 u8 priority;
542 struct kvm_ppc_xive_eq kvm_eq;
543 int rc;
544 __be32 *qaddr = 0;
545 struct page *page;
546 struct xive_q *q;
547 gfn_t gfn;
548 unsigned long page_size;
549 int srcu_idx;
550
551 /*
552 * Demangle priority/server tuple from the EQ identifier
553 */
554 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
555 KVM_XIVE_EQ_PRIORITY_SHIFT;
556 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
557 KVM_XIVE_EQ_SERVER_SHIFT;
558
559 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
560 return -EFAULT;
561
562 vcpu = kvmppc_xive_find_server(kvm, server);
563 if (!vcpu) {
564 pr_err("Can't find server %d\n", server);
565 return -ENOENT;
566 }
567 xc = vcpu->arch.xive_vcpu;
568
569 if (priority != xive_prio_from_guest(priority)) {
570 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
571 priority, server);
572 return -EINVAL;
573 }
574 q = &xc->queues[priority];
575
576 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
577 __func__, server, priority, kvm_eq.flags,
578 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
579
580 /* reset queue and disable queueing */
581 if (!kvm_eq.qshift) {
582 q->guest_qaddr = 0;
583 q->guest_qshift = 0;
584
585 rc = xive_native_configure_queue(xc->vp_id, q, priority,
586 NULL, 0, true);
587 if (rc) {
588 pr_err("Failed to reset queue %d for VCPU %d: %d\n",
589 priority, xc->server_num, rc);
590 return rc;
591 }
592
593 if (q->qpage) {
594 put_page(virt_to_page(q->qpage));
595 q->qpage = NULL;
596 }
597
598 return 0;
599 }
600
601 /*
602 * sPAPR specifies a "Unconditional Notify (n) flag" for the
603 * H_INT_SET_QUEUE_CONFIG hcall which forces notification
604 * without using the coalescing mechanisms provided by the
605 * XIVE END ESBs. This is required on KVM as notification
606 * using the END ESBs is not supported.
607 */
608 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
609 pr_err("invalid flags %d\n", kvm_eq.flags);
610 return -EINVAL;
611 }
612
613 rc = xive_native_validate_queue_size(kvm_eq.qshift);
614 if (rc) {
615 pr_err("invalid queue size %d\n", kvm_eq.qshift);
616 return rc;
617 }
618
619 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
620 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
621 1ull << kvm_eq.qshift);
622 return -EINVAL;
623 }
624
625 srcu_idx = srcu_read_lock(&kvm->srcu);
626 gfn = gpa_to_gfn(kvm_eq.qaddr);
627 page = gfn_to_page(kvm, gfn);
628 if (is_error_page(page)) {
629 srcu_read_unlock(&kvm->srcu, srcu_idx);
630 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
631 return -EINVAL;
632 }
633
634 page_size = kvm_host_page_size(kvm, gfn);
635 if (1ull << kvm_eq.qshift > page_size) {
636 srcu_read_unlock(&kvm->srcu, srcu_idx);
637 pr_warn("Incompatible host page size %lx!\n", page_size);
638 return -EINVAL;
639 }
640
641 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
642 srcu_read_unlock(&kvm->srcu, srcu_idx);
643
644 /*
645 * Backup the queue page guest address to the mark EQ page
646 * dirty for migration.
647 */
648 q->guest_qaddr = kvm_eq.qaddr;
649 q->guest_qshift = kvm_eq.qshift;
650
651 /*
652 * Unconditional Notification is forced by default at the
653 * OPAL level because the use of END ESBs is not supported by
654 * Linux.
655 */
656 rc = xive_native_configure_queue(xc->vp_id, q, priority,
657 (__be32 *) qaddr, kvm_eq.qshift, true);
658 if (rc) {
659 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
660 priority, xc->server_num, rc);
661 put_page(page);
662 return rc;
663 }
664
665 /*
666 * Only restore the queue state when needed. When doing the
667 * H_INT_SET_SOURCE_CONFIG hcall, it should not.
668 */
669 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
670 rc = xive_native_set_queue_state(xc->vp_id, priority,
671 kvm_eq.qtoggle,
672 kvm_eq.qindex);
673 if (rc)
674 goto error;
675 }
676
677 rc = kvmppc_xive_attach_escalation(vcpu, priority,
678 xive->single_escalation);
679 error:
680 if (rc)
681 kvmppc_xive_native_cleanup_queue(vcpu, priority);
682 return rc;
683 }
684
kvmppc_xive_native_get_queue_config(struct kvmppc_xive * xive,long eq_idx,u64 addr)685 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
686 long eq_idx, u64 addr)
687 {
688 struct kvm *kvm = xive->kvm;
689 struct kvm_vcpu *vcpu;
690 struct kvmppc_xive_vcpu *xc;
691 struct xive_q *q;
692 void __user *ubufp = (u64 __user *) addr;
693 u32 server;
694 u8 priority;
695 struct kvm_ppc_xive_eq kvm_eq;
696 u64 qaddr;
697 u64 qshift;
698 u64 qeoi_page;
699 u32 escalate_irq;
700 u64 qflags;
701 int rc;
702
703 /*
704 * Demangle priority/server tuple from the EQ identifier
705 */
706 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
707 KVM_XIVE_EQ_PRIORITY_SHIFT;
708 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
709 KVM_XIVE_EQ_SERVER_SHIFT;
710
711 vcpu = kvmppc_xive_find_server(kvm, server);
712 if (!vcpu) {
713 pr_err("Can't find server %d\n", server);
714 return -ENOENT;
715 }
716 xc = vcpu->arch.xive_vcpu;
717
718 if (priority != xive_prio_from_guest(priority)) {
719 pr_err("invalid priority for queue %d for VCPU %d\n",
720 priority, server);
721 return -EINVAL;
722 }
723 q = &xc->queues[priority];
724
725 memset(&kvm_eq, 0, sizeof(kvm_eq));
726
727 if (!q->qpage)
728 return 0;
729
730 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
731 &qeoi_page, &escalate_irq, &qflags);
732 if (rc)
733 return rc;
734
735 kvm_eq.flags = 0;
736 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
737 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
738
739 kvm_eq.qshift = q->guest_qshift;
740 kvm_eq.qaddr = q->guest_qaddr;
741
742 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
743 &kvm_eq.qindex);
744 if (rc)
745 return rc;
746
747 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
748 __func__, server, priority, kvm_eq.flags,
749 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
750
751 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
752 return -EFAULT;
753
754 return 0;
755 }
756
kvmppc_xive_reset_sources(struct kvmppc_xive_src_block * sb)757 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
758 {
759 int i;
760
761 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
762 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
763
764 if (!state->valid)
765 continue;
766
767 if (state->act_priority == MASKED)
768 continue;
769
770 state->eisn = 0;
771 state->act_server = 0;
772 state->act_priority = MASKED;
773 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
774 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
775 if (state->pt_number) {
776 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
777 xive_native_configure_irq(state->pt_number,
778 0, MASKED, 0);
779 }
780 }
781 }
782
kvmppc_xive_reset(struct kvmppc_xive * xive)783 static int kvmppc_xive_reset(struct kvmppc_xive *xive)
784 {
785 struct kvm *kvm = xive->kvm;
786 struct kvm_vcpu *vcpu;
787 unsigned int i;
788
789 pr_devel("%s\n", __func__);
790
791 mutex_lock(&xive->lock);
792
793 kvm_for_each_vcpu(i, vcpu, kvm) {
794 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
795 unsigned int prio;
796
797 if (!xc)
798 continue;
799
800 kvmppc_xive_disable_vcpu_interrupts(vcpu);
801
802 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
803
804 /* Single escalation, no queue 7 */
805 if (prio == 7 && xive->single_escalation)
806 break;
807
808 if (xc->esc_virq[prio]) {
809 free_irq(xc->esc_virq[prio], vcpu);
810 irq_dispose_mapping(xc->esc_virq[prio]);
811 kfree(xc->esc_virq_names[prio]);
812 xc->esc_virq[prio] = 0;
813 }
814
815 kvmppc_xive_native_cleanup_queue(vcpu, prio);
816 }
817 }
818
819 for (i = 0; i <= xive->max_sbid; i++) {
820 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
821
822 if (sb) {
823 arch_spin_lock(&sb->lock);
824 kvmppc_xive_reset_sources(sb);
825 arch_spin_unlock(&sb->lock);
826 }
827 }
828
829 mutex_unlock(&xive->lock);
830
831 return 0;
832 }
833
kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block * sb)834 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
835 {
836 int j;
837
838 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
839 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
840 struct xive_irq_data *xd;
841 u32 hw_num;
842
843 if (!state->valid)
844 continue;
845
846 /*
847 * The struct kvmppc_xive_irq_state reflects the state
848 * of the EAS configuration and not the state of the
849 * source. The source is masked setting the PQ bits to
850 * '-Q', which is what is being done before calling
851 * the KVM_DEV_XIVE_EQ_SYNC control.
852 *
853 * If a source EAS is configured, OPAL syncs the XIVE
854 * IC of the source and the XIVE IC of the previous
855 * target if any.
856 *
857 * So it should be fine ignoring MASKED sources as
858 * they have been synced already.
859 */
860 if (state->act_priority == MASKED)
861 continue;
862
863 kvmppc_xive_select_irq(state, &hw_num, &xd);
864 xive_native_sync_source(hw_num);
865 xive_native_sync_queue(hw_num);
866 }
867 }
868
kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu * vcpu)869 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
870 {
871 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
872 unsigned int prio;
873 int srcu_idx;
874
875 if (!xc)
876 return -ENOENT;
877
878 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
879 struct xive_q *q = &xc->queues[prio];
880
881 if (!q->qpage)
882 continue;
883
884 /* Mark EQ page dirty for migration */
885 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
886 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
887 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
888 }
889 return 0;
890 }
891
kvmppc_xive_native_eq_sync(struct kvmppc_xive * xive)892 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
893 {
894 struct kvm *kvm = xive->kvm;
895 struct kvm_vcpu *vcpu;
896 unsigned int i;
897
898 pr_devel("%s\n", __func__);
899
900 mutex_lock(&xive->lock);
901 for (i = 0; i <= xive->max_sbid; i++) {
902 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
903
904 if (sb) {
905 arch_spin_lock(&sb->lock);
906 kvmppc_xive_native_sync_sources(sb);
907 arch_spin_unlock(&sb->lock);
908 }
909 }
910
911 kvm_for_each_vcpu(i, vcpu, kvm) {
912 kvmppc_xive_native_vcpu_eq_sync(vcpu);
913 }
914 mutex_unlock(&xive->lock);
915
916 return 0;
917 }
918
kvmppc_xive_native_set_attr(struct kvm_device * dev,struct kvm_device_attr * attr)919 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
920 struct kvm_device_attr *attr)
921 {
922 struct kvmppc_xive *xive = dev->private;
923
924 switch (attr->group) {
925 case KVM_DEV_XIVE_GRP_CTRL:
926 switch (attr->attr) {
927 case KVM_DEV_XIVE_RESET:
928 return kvmppc_xive_reset(xive);
929 case KVM_DEV_XIVE_EQ_SYNC:
930 return kvmppc_xive_native_eq_sync(xive);
931 }
932 break;
933 case KVM_DEV_XIVE_GRP_SOURCE:
934 return kvmppc_xive_native_set_source(xive, attr->attr,
935 attr->addr);
936 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
937 return kvmppc_xive_native_set_source_config(xive, attr->attr,
938 attr->addr);
939 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
940 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
941 attr->addr);
942 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
943 return kvmppc_xive_native_sync_source(xive, attr->attr,
944 attr->addr);
945 }
946 return -ENXIO;
947 }
948
kvmppc_xive_native_get_attr(struct kvm_device * dev,struct kvm_device_attr * attr)949 static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
950 struct kvm_device_attr *attr)
951 {
952 struct kvmppc_xive *xive = dev->private;
953
954 switch (attr->group) {
955 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
956 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
957 attr->addr);
958 }
959 return -ENXIO;
960 }
961
kvmppc_xive_native_has_attr(struct kvm_device * dev,struct kvm_device_attr * attr)962 static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
963 struct kvm_device_attr *attr)
964 {
965 switch (attr->group) {
966 case KVM_DEV_XIVE_GRP_CTRL:
967 switch (attr->attr) {
968 case KVM_DEV_XIVE_RESET:
969 case KVM_DEV_XIVE_EQ_SYNC:
970 return 0;
971 }
972 break;
973 case KVM_DEV_XIVE_GRP_SOURCE:
974 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
975 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
976 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
977 attr->attr < KVMPPC_XIVE_NR_IRQS)
978 return 0;
979 break;
980 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
981 return 0;
982 }
983 return -ENXIO;
984 }
985
986 /*
987 * Called when device fd is closed. kvm->lock is held.
988 */
kvmppc_xive_native_release(struct kvm_device * dev)989 static void kvmppc_xive_native_release(struct kvm_device *dev)
990 {
991 struct kvmppc_xive *xive = dev->private;
992 struct kvm *kvm = xive->kvm;
993 struct kvm_vcpu *vcpu;
994 int i;
995
996 pr_devel("Releasing xive native device\n");
997
998 /*
999 * Clear the KVM device file address_space which is used to
1000 * unmap the ESB pages when a device is passed-through.
1001 */
1002 mutex_lock(&xive->mapping_lock);
1003 xive->mapping = NULL;
1004 mutex_unlock(&xive->mapping_lock);
1005
1006 /*
1007 * Since this is the device release function, we know that
1008 * userspace does not have any open fd or mmap referring to
1009 * the device. Therefore there can not be any of the
1010 * device attribute set/get, mmap, or page fault functions
1011 * being executed concurrently, and similarly, the
1012 * connect_vcpu and set/clr_mapped functions also cannot
1013 * be being executed.
1014 */
1015
1016 debugfs_remove(xive->dentry);
1017
1018 /*
1019 * We should clean up the vCPU interrupt presenters first.
1020 */
1021 kvm_for_each_vcpu(i, vcpu, kvm) {
1022 /*
1023 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1024 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1025 * Holding the vcpu->mutex also means that the vcpu cannot
1026 * be executing the KVM_RUN ioctl, and therefore it cannot
1027 * be executing the XIVE push or pull code or accessing
1028 * the XIVE MMIO regions.
1029 */
1030 mutex_lock(&vcpu->mutex);
1031 kvmppc_xive_native_cleanup_vcpu(vcpu);
1032 mutex_unlock(&vcpu->mutex);
1033 }
1034
1035 /*
1036 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1037 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1038 * against xive code getting called during vcpu execution or
1039 * set/get one_reg operations.
1040 */
1041 kvm->arch.xive = NULL;
1042
1043 for (i = 0; i <= xive->max_sbid; i++) {
1044 if (xive->src_blocks[i])
1045 kvmppc_xive_free_sources(xive->src_blocks[i]);
1046 kfree(xive->src_blocks[i]);
1047 xive->src_blocks[i] = NULL;
1048 }
1049
1050 if (xive->vp_base != XIVE_INVALID_VP)
1051 xive_native_free_vp_block(xive->vp_base);
1052
1053 /*
1054 * A reference of the kvmppc_xive pointer is now kept under
1055 * the xive_devices struct of the machine for reuse. It is
1056 * freed when the VM is destroyed for now until we fix all the
1057 * execution paths.
1058 */
1059
1060 kfree(dev);
1061 }
1062
1063 /*
1064 * Create a XIVE device. kvm->lock is held.
1065 */
kvmppc_xive_native_create(struct kvm_device * dev,u32 type)1066 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1067 {
1068 struct kvmppc_xive *xive;
1069 struct kvm *kvm = dev->kvm;
1070 int ret = 0;
1071
1072 pr_devel("Creating xive native device\n");
1073
1074 if (kvm->arch.xive)
1075 return -EEXIST;
1076
1077 xive = kvmppc_xive_get_device(kvm, type);
1078 if (!xive)
1079 return -ENOMEM;
1080
1081 dev->private = xive;
1082 xive->dev = dev;
1083 xive->kvm = kvm;
1084 kvm->arch.xive = xive;
1085 mutex_init(&xive->mapping_lock);
1086 mutex_init(&xive->lock);
1087
1088 /*
1089 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
1090 * a default. Getting the max number of CPUs the VM was
1091 * configured with would improve our usage of the XIVE VP space.
1092 */
1093 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1094 pr_devel("VP_Base=%x\n", xive->vp_base);
1095
1096 if (xive->vp_base == XIVE_INVALID_VP)
1097 ret = -ENXIO;
1098
1099 xive->single_escalation = xive_native_has_single_escalation();
1100 xive->ops = &kvmppc_xive_native_ops;
1101
1102 if (ret)
1103 return ret;
1104
1105 return 0;
1106 }
1107
1108 /*
1109 * Interrupt Pending Buffer (IPB) offset
1110 */
1111 #define TM_IPB_SHIFT 40
1112 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
1113
kvmppc_xive_native_get_vp(struct kvm_vcpu * vcpu,union kvmppc_one_reg * val)1114 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1115 {
1116 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1117 u64 opal_state;
1118 int rc;
1119
1120 if (!kvmppc_xive_enabled(vcpu))
1121 return -EPERM;
1122
1123 if (!xc)
1124 return -ENOENT;
1125
1126 /* Thread context registers. We only care about IPB and CPPR */
1127 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1128
1129 /* Get the VP state from OPAL */
1130 rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1131 if (rc)
1132 return rc;
1133
1134 /*
1135 * Capture the backup of IPB register in the NVT structure and
1136 * merge it in our KVM VP state.
1137 */
1138 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1139
1140 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1141 __func__,
1142 vcpu->arch.xive_saved_state.nsr,
1143 vcpu->arch.xive_saved_state.cppr,
1144 vcpu->arch.xive_saved_state.ipb,
1145 vcpu->arch.xive_saved_state.pipr,
1146 vcpu->arch.xive_saved_state.w01,
1147 (u32) vcpu->arch.xive_cam_word, opal_state);
1148
1149 return 0;
1150 }
1151
kvmppc_xive_native_set_vp(struct kvm_vcpu * vcpu,union kvmppc_one_reg * val)1152 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1153 {
1154 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1155 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1156
1157 pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1158 val->xive_timaval[0], val->xive_timaval[1]);
1159
1160 if (!kvmppc_xive_enabled(vcpu))
1161 return -EPERM;
1162
1163 if (!xc || !xive)
1164 return -ENOENT;
1165
1166 /* We can't update the state of a "pushed" VCPU */
1167 if (WARN_ON(vcpu->arch.xive_pushed))
1168 return -EBUSY;
1169
1170 /*
1171 * Restore the thread context registers. IPB and CPPR should
1172 * be the only ones that matter.
1173 */
1174 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1175
1176 /*
1177 * There is no need to restore the XIVE internal state (IPB
1178 * stored in the NVT) as the IPB register was merged in KVM VP
1179 * state when captured.
1180 */
1181 return 0;
1182 }
1183
kvmppc_xive_native_supported(void)1184 bool kvmppc_xive_native_supported(void)
1185 {
1186 return xive_native_has_queue_state_support();
1187 }
1188
xive_native_debug_show(struct seq_file * m,void * private)1189 static int xive_native_debug_show(struct seq_file *m, void *private)
1190 {
1191 struct kvmppc_xive *xive = m->private;
1192 struct kvm *kvm = xive->kvm;
1193 struct kvm_vcpu *vcpu;
1194 unsigned int i;
1195
1196 if (!kvm)
1197 return 0;
1198
1199 seq_puts(m, "=========\nVCPU state\n=========\n");
1200
1201 kvm_for_each_vcpu(i, vcpu, kvm) {
1202 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1203
1204 if (!xc)
1205 continue;
1206
1207 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1208 xc->server_num,
1209 vcpu->arch.xive_saved_state.nsr,
1210 vcpu->arch.xive_saved_state.cppr,
1211 vcpu->arch.xive_saved_state.ipb,
1212 vcpu->arch.xive_saved_state.pipr,
1213 vcpu->arch.xive_saved_state.w01,
1214 (u32) vcpu->arch.xive_cam_word);
1215
1216 kvmppc_xive_debug_show_queues(m, vcpu);
1217 }
1218
1219 return 0;
1220 }
1221
xive_native_debug_open(struct inode * inode,struct file * file)1222 static int xive_native_debug_open(struct inode *inode, struct file *file)
1223 {
1224 return single_open(file, xive_native_debug_show, inode->i_private);
1225 }
1226
1227 static const struct file_operations xive_native_debug_fops = {
1228 .open = xive_native_debug_open,
1229 .read = seq_read,
1230 .llseek = seq_lseek,
1231 .release = single_release,
1232 };
1233
xive_native_debugfs_init(struct kvmppc_xive * xive)1234 static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1235 {
1236 char *name;
1237
1238 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1239 if (!name) {
1240 pr_err("%s: no memory for name\n", __func__);
1241 return;
1242 }
1243
1244 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1245 xive, &xive_native_debug_fops);
1246
1247 pr_debug("%s: created %s\n", __func__, name);
1248 kfree(name);
1249 }
1250
kvmppc_xive_native_init(struct kvm_device * dev)1251 static void kvmppc_xive_native_init(struct kvm_device *dev)
1252 {
1253 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1254
1255 /* Register some debug interfaces */
1256 xive_native_debugfs_init(xive);
1257 }
1258
1259 struct kvm_device_ops kvm_xive_native_ops = {
1260 .name = "kvm-xive-native",
1261 .create = kvmppc_xive_native_create,
1262 .init = kvmppc_xive_native_init,
1263 .release = kvmppc_xive_native_release,
1264 .set_attr = kvmppc_xive_native_set_attr,
1265 .get_attr = kvmppc_xive_native_get_attr,
1266 .has_attr = kvmppc_xive_native_has_attr,
1267 .mmap = kvmppc_xive_native_mmap,
1268 };
1269
kvmppc_xive_native_init_module(void)1270 void kvmppc_xive_native_init_module(void)
1271 {
1272 ;
1273 }
1274
kvmppc_xive_native_exit_module(void)1275 void kvmppc_xive_native_exit_module(void)
1276 {
1277 ;
1278 }
1279