1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2018
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "userspace_handled", VCPU_STAT(exit_userspace) },
64 { "exit_null", VCPU_STAT(exit_null) },
65 { "exit_validity", VCPU_STAT(exit_validity) },
66 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 { "exit_external_request", VCPU_STAT(exit_external_request) },
68 { "exit_io_request", VCPU_STAT(exit_io_request) },
69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 { "exit_instruction", VCPU_STAT(exit_instruction) },
71 { "exit_pei", VCPU_STAT(exit_pei) },
72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 { "deliver_program", VCPU_STAT(deliver_program) },
93 { "deliver_io", VCPU_STAT(deliver_io) },
94 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 { "inject_ckc", VCPU_STAT(inject_ckc) },
97 { "inject_cputm", VCPU_STAT(inject_cputm) },
98 { "inject_external_call", VCPU_STAT(inject_external_call) },
99 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 { "inject_io", VM_STAT(inject_io) },
102 { "inject_mchk", VCPU_STAT(inject_mchk) },
103 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 { "inject_program", VCPU_STAT(inject_program) },
105 { "inject_restart", VCPU_STAT(inject_restart) },
106 { "inject_service_signal", VM_STAT(inject_service_signal) },
107 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 { "inject_virtio", VM_STAT(inject_virtio) },
111 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 { "instruction_gs", VCPU_STAT(instruction_gs) },
113 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 { "instruction_sck", VCPU_STAT(instruction_sck) },
120 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 { "instruction_spx", VCPU_STAT(instruction_spx) },
122 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 { "instruction_stap", VCPU_STAT(instruction_stap) },
124 { "instruction_iske", VCPU_STAT(instruction_iske) },
125 { "instruction_ri", VCPU_STAT(instruction_ri) },
126 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 { "instruction_sske", VCPU_STAT(instruction_sske) },
128 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 { "instruction_essa", VCPU_STAT(instruction_essa) },
130 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 { "instruction_tb", VCPU_STAT(instruction_tb) },
133 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 { "instruction_sie", VCPU_STAT(instruction_sie) },
138 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
161 { NULL }
162 };
163
164 struct kvm_s390_tod_clock_ext {
165 __u8 epoch_idx;
166 __u64 tod;
167 __u8 reserved[7];
168 } __packed;
169
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174
175 /* allow 1m huge page guest backing, if !nested */
176 static int hpage;
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179
180 /*
181 * For now we handle at most 16 double words as this is what the s390 base
182 * kernel handles and stores in the prefix page. If we ever need to go beyond
183 * this, this requires changes to code, but the external uapi can stay.
184 */
185 #define SIZE_INTERNAL 16
186
187 /*
188 * Base feature mask that defines default mask for facilities. Consists of the
189 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
190 */
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
192 /*
193 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194 * and defines the facilities that can be enabled via a cpu model.
195 */
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
197
kvm_s390_fac_size(void)198 static unsigned long kvm_s390_fac_size(void)
199 {
200 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203 sizeof(S390_lowcore.stfle_fac_list));
204
205 return SIZE_INTERNAL;
206 }
207
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
212
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
216
217 /* Section: not file related */
kvm_arch_hardware_enable(void)218 int kvm_arch_hardware_enable(void)
219 {
220 /* every s390 is virtualization enabled ;-) */
221 return 0;
222 }
223
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
225 unsigned long end);
226
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 {
229 u8 delta_idx = 0;
230
231 /*
232 * The TOD jumps by delta, we have to compensate this by adding
233 * -delta to the epoch.
234 */
235 delta = -delta;
236
237 /* sign-extension - we're adding to signed values below */
238 if ((s64)delta < 0)
239 delta_idx = -1;
240
241 scb->epoch += delta;
242 if (scb->ecd & ECD_MEF) {
243 scb->epdx += delta_idx;
244 if (scb->epoch < delta)
245 scb->epdx += 1;
246 }
247 }
248
249 /*
250 * This callback is executed during stop_machine(). All CPUs are therefore
251 * temporarily stopped. In order not to change guest behavior, we have to
252 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253 * so a CPU won't be stopped while calculating with the epoch.
254 */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
256 void *v)
257 {
258 struct kvm *kvm;
259 struct kvm_vcpu *vcpu;
260 int i;
261 unsigned long long *delta = v;
262
263 list_for_each_entry(kvm, &vm_list, vm_list) {
264 kvm_for_each_vcpu(i, vcpu, kvm) {
265 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
266 if (i == 0) {
267 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
269 }
270 if (vcpu->arch.cputm_enabled)
271 vcpu->arch.cputm_start += *delta;
272 if (vcpu->arch.vsie_block)
273 kvm_clock_sync_scb(vcpu->arch.vsie_block,
274 *delta);
275 }
276 }
277 return NOTIFY_OK;
278 }
279
280 static struct notifier_block kvm_clock_notifier = {
281 .notifier_call = kvm_clock_sync,
282 };
283
kvm_arch_hardware_setup(void)284 int kvm_arch_hardware_setup(void)
285 {
286 gmap_notifier.notifier_call = kvm_gmap_notifier;
287 gmap_register_pte_notifier(&gmap_notifier);
288 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289 gmap_register_pte_notifier(&vsie_gmap_notifier);
290 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291 &kvm_clock_notifier);
292 return 0;
293 }
294
kvm_arch_hardware_unsetup(void)295 void kvm_arch_hardware_unsetup(void)
296 {
297 gmap_unregister_pte_notifier(&gmap_notifier);
298 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300 &kvm_clock_notifier);
301 }
302
allow_cpu_feat(unsigned long nr)303 static void allow_cpu_feat(unsigned long nr)
304 {
305 set_bit_inv(nr, kvm_s390_available_cpu_feat);
306 }
307
plo_test_bit(unsigned char nr)308 static inline int plo_test_bit(unsigned char nr)
309 {
310 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
311 int cc;
312
313 asm volatile(
314 /* Parameter registers are ignored for "test bit" */
315 " plo 0,0,0,0(0)\n"
316 " ipm %0\n"
317 " srl %0,28\n"
318 : "=d" (cc)
319 : "d" (r0)
320 : "cc");
321 return cc == 0;
322 }
323
kvm_s390_cpu_feat_init(void)324 static void kvm_s390_cpu_feat_init(void)
325 {
326 int i;
327
328 for (i = 0; i < 256; ++i) {
329 if (plo_test_bit(i))
330 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
331 }
332
333 if (test_facility(28)) /* TOD-clock steering */
334 ptff(kvm_s390_available_subfunc.ptff,
335 sizeof(kvm_s390_available_subfunc.ptff),
336 PTFF_QAF);
337
338 if (test_facility(17)) { /* MSA */
339 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340 kvm_s390_available_subfunc.kmac);
341 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342 kvm_s390_available_subfunc.kmc);
343 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
344 kvm_s390_available_subfunc.km);
345 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346 kvm_s390_available_subfunc.kimd);
347 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348 kvm_s390_available_subfunc.klmd);
349 }
350 if (test_facility(76)) /* MSA3 */
351 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352 kvm_s390_available_subfunc.pckmo);
353 if (test_facility(77)) { /* MSA4 */
354 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355 kvm_s390_available_subfunc.kmctr);
356 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357 kvm_s390_available_subfunc.kmf);
358 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359 kvm_s390_available_subfunc.kmo);
360 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361 kvm_s390_available_subfunc.pcc);
362 }
363 if (test_facility(57)) /* MSA5 */
364 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365 kvm_s390_available_subfunc.ppno);
366
367 if (test_facility(146)) /* MSA8 */
368 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369 kvm_s390_available_subfunc.kma);
370
371 if (MACHINE_HAS_ESOP)
372 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
373 /*
374 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
376 */
377 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378 !test_facility(3) || !nested)
379 return;
380 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381 if (sclp.has_64bscao)
382 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
383 if (sclp.has_siif)
384 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
385 if (sclp.has_gpere)
386 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
387 if (sclp.has_gsls)
388 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
389 if (sclp.has_ib)
390 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
391 if (sclp.has_cei)
392 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
393 if (sclp.has_ibs)
394 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
395 if (sclp.has_kss)
396 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
397 /*
398 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399 * all skey handling functions read/set the skey from the PGSTE
400 * instead of the real storage key.
401 *
402 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403 * pages being detected as preserved although they are resident.
404 *
405 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
407 *
408 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
411 *
412 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413 * cannot easily shadow the SCA because of the ipte lock.
414 */
415 }
416
kvm_arch_init(void * opaque)417 int kvm_arch_init(void *opaque)
418 {
419 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
420 if (!kvm_s390_dbf)
421 return -ENOMEM;
422
423 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
424 debug_unregister(kvm_s390_dbf);
425 return -ENOMEM;
426 }
427
428 kvm_s390_cpu_feat_init();
429
430 /* Register floating interrupt controller interface. */
431 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
432 }
433
kvm_arch_exit(void)434 void kvm_arch_exit(void)
435 {
436 debug_unregister(kvm_s390_dbf);
437 }
438
439 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)440 long kvm_arch_dev_ioctl(struct file *filp,
441 unsigned int ioctl, unsigned long arg)
442 {
443 if (ioctl == KVM_S390_ENABLE_SIE)
444 return s390_enable_sie();
445 return -EINVAL;
446 }
447
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)448 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
449 {
450 int r;
451
452 switch (ext) {
453 case KVM_CAP_S390_PSW:
454 case KVM_CAP_S390_GMAP:
455 case KVM_CAP_SYNC_MMU:
456 #ifdef CONFIG_KVM_S390_UCONTROL
457 case KVM_CAP_S390_UCONTROL:
458 #endif
459 case KVM_CAP_ASYNC_PF:
460 case KVM_CAP_SYNC_REGS:
461 case KVM_CAP_ONE_REG:
462 case KVM_CAP_ENABLE_CAP:
463 case KVM_CAP_S390_CSS_SUPPORT:
464 case KVM_CAP_IOEVENTFD:
465 case KVM_CAP_DEVICE_CTRL:
466 case KVM_CAP_ENABLE_CAP_VM:
467 case KVM_CAP_S390_IRQCHIP:
468 case KVM_CAP_VM_ATTRIBUTES:
469 case KVM_CAP_MP_STATE:
470 case KVM_CAP_IMMEDIATE_EXIT:
471 case KVM_CAP_S390_INJECT_IRQ:
472 case KVM_CAP_S390_USER_SIGP:
473 case KVM_CAP_S390_USER_STSI:
474 case KVM_CAP_S390_SKEYS:
475 case KVM_CAP_S390_IRQ_STATE:
476 case KVM_CAP_S390_USER_INSTR0:
477 case KVM_CAP_S390_CMMA_MIGRATION:
478 case KVM_CAP_S390_AIS:
479 case KVM_CAP_S390_AIS_MIGRATION:
480 r = 1;
481 break;
482 case KVM_CAP_S390_HPAGE_1M:
483 r = 0;
484 if (hpage && !kvm_is_ucontrol(kvm))
485 r = 1;
486 break;
487 case KVM_CAP_S390_MEM_OP:
488 r = MEM_OP_MAX_SIZE;
489 break;
490 case KVM_CAP_NR_VCPUS:
491 case KVM_CAP_MAX_VCPUS:
492 r = KVM_S390_BSCA_CPU_SLOTS;
493 if (!kvm_s390_use_sca_entries())
494 r = KVM_MAX_VCPUS;
495 else if (sclp.has_esca && sclp.has_64bscao)
496 r = KVM_S390_ESCA_CPU_SLOTS;
497 break;
498 case KVM_CAP_NR_MEMSLOTS:
499 r = KVM_USER_MEM_SLOTS;
500 break;
501 case KVM_CAP_S390_COW:
502 r = MACHINE_HAS_ESOP;
503 break;
504 case KVM_CAP_S390_VECTOR_REGISTERS:
505 r = MACHINE_HAS_VX;
506 break;
507 case KVM_CAP_S390_RI:
508 r = test_facility(64);
509 break;
510 case KVM_CAP_S390_GS:
511 r = test_facility(133);
512 break;
513 case KVM_CAP_S390_BPB:
514 r = test_facility(82);
515 break;
516 default:
517 r = 0;
518 }
519 return r;
520 }
521
kvm_s390_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)522 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
523 struct kvm_memory_slot *memslot)
524 {
525 int i;
526 gfn_t cur_gfn, last_gfn;
527 unsigned long gaddr, vmaddr;
528 struct gmap *gmap = kvm->arch.gmap;
529 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
530
531 /* Loop over all guest segments */
532 cur_gfn = memslot->base_gfn;
533 last_gfn = memslot->base_gfn + memslot->npages;
534 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
535 gaddr = gfn_to_gpa(cur_gfn);
536 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
537 if (kvm_is_error_hva(vmaddr))
538 continue;
539
540 bitmap_zero(bitmap, _PAGE_ENTRIES);
541 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
542 for (i = 0; i < _PAGE_ENTRIES; i++) {
543 if (test_bit(i, bitmap))
544 mark_page_dirty(kvm, cur_gfn + i);
545 }
546
547 if (fatal_signal_pending(current))
548 return;
549 cond_resched();
550 }
551 }
552
553 /* Section: vm related */
554 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
555
556 /*
557 * Get (and clear) the dirty memory log for a memory slot.
558 */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)559 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
560 struct kvm_dirty_log *log)
561 {
562 int r;
563 unsigned long n;
564 struct kvm_memslots *slots;
565 struct kvm_memory_slot *memslot;
566 int is_dirty = 0;
567
568 if (kvm_is_ucontrol(kvm))
569 return -EINVAL;
570
571 mutex_lock(&kvm->slots_lock);
572
573 r = -EINVAL;
574 if (log->slot >= KVM_USER_MEM_SLOTS)
575 goto out;
576
577 slots = kvm_memslots(kvm);
578 memslot = id_to_memslot(slots, log->slot);
579 r = -ENOENT;
580 if (!memslot->dirty_bitmap)
581 goto out;
582
583 kvm_s390_sync_dirty_log(kvm, memslot);
584 r = kvm_get_dirty_log(kvm, log, &is_dirty);
585 if (r)
586 goto out;
587
588 /* Clear the dirty log */
589 if (is_dirty) {
590 n = kvm_dirty_bitmap_bytes(memslot);
591 memset(memslot->dirty_bitmap, 0, n);
592 }
593 r = 0;
594 out:
595 mutex_unlock(&kvm->slots_lock);
596 return r;
597 }
598
icpt_operexc_on_all_vcpus(struct kvm * kvm)599 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
600 {
601 unsigned int i;
602 struct kvm_vcpu *vcpu;
603
604 kvm_for_each_vcpu(i, vcpu, kvm) {
605 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
606 }
607 }
608
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)609 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
610 {
611 int r;
612
613 if (cap->flags)
614 return -EINVAL;
615
616 switch (cap->cap) {
617 case KVM_CAP_S390_IRQCHIP:
618 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
619 kvm->arch.use_irqchip = 1;
620 r = 0;
621 break;
622 case KVM_CAP_S390_USER_SIGP:
623 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
624 kvm->arch.user_sigp = 1;
625 r = 0;
626 break;
627 case KVM_CAP_S390_VECTOR_REGISTERS:
628 mutex_lock(&kvm->lock);
629 if (kvm->created_vcpus) {
630 r = -EBUSY;
631 } else if (MACHINE_HAS_VX) {
632 set_kvm_facility(kvm->arch.model.fac_mask, 129);
633 set_kvm_facility(kvm->arch.model.fac_list, 129);
634 if (test_facility(134)) {
635 set_kvm_facility(kvm->arch.model.fac_mask, 134);
636 set_kvm_facility(kvm->arch.model.fac_list, 134);
637 }
638 if (test_facility(135)) {
639 set_kvm_facility(kvm->arch.model.fac_mask, 135);
640 set_kvm_facility(kvm->arch.model.fac_list, 135);
641 }
642 r = 0;
643 } else
644 r = -EINVAL;
645 mutex_unlock(&kvm->lock);
646 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
647 r ? "(not available)" : "(success)");
648 break;
649 case KVM_CAP_S390_RI:
650 r = -EINVAL;
651 mutex_lock(&kvm->lock);
652 if (kvm->created_vcpus) {
653 r = -EBUSY;
654 } else if (test_facility(64)) {
655 set_kvm_facility(kvm->arch.model.fac_mask, 64);
656 set_kvm_facility(kvm->arch.model.fac_list, 64);
657 r = 0;
658 }
659 mutex_unlock(&kvm->lock);
660 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
661 r ? "(not available)" : "(success)");
662 break;
663 case KVM_CAP_S390_AIS:
664 mutex_lock(&kvm->lock);
665 if (kvm->created_vcpus) {
666 r = -EBUSY;
667 } else {
668 set_kvm_facility(kvm->arch.model.fac_mask, 72);
669 set_kvm_facility(kvm->arch.model.fac_list, 72);
670 r = 0;
671 }
672 mutex_unlock(&kvm->lock);
673 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
674 r ? "(not available)" : "(success)");
675 break;
676 case KVM_CAP_S390_GS:
677 r = -EINVAL;
678 mutex_lock(&kvm->lock);
679 if (kvm->created_vcpus) {
680 r = -EBUSY;
681 } else if (test_facility(133)) {
682 set_kvm_facility(kvm->arch.model.fac_mask, 133);
683 set_kvm_facility(kvm->arch.model.fac_list, 133);
684 r = 0;
685 }
686 mutex_unlock(&kvm->lock);
687 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
688 r ? "(not available)" : "(success)");
689 break;
690 case KVM_CAP_S390_HPAGE_1M:
691 mutex_lock(&kvm->lock);
692 if (kvm->created_vcpus)
693 r = -EBUSY;
694 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
695 r = -EINVAL;
696 else {
697 r = 0;
698 down_write(&kvm->mm->mmap_sem);
699 kvm->mm->context.allow_gmap_hpage_1m = 1;
700 up_write(&kvm->mm->mmap_sem);
701 /*
702 * We might have to create fake 4k page
703 * tables. To avoid that the hardware works on
704 * stale PGSTEs, we emulate these instructions.
705 */
706 kvm->arch.use_skf = 0;
707 kvm->arch.use_pfmfi = 0;
708 }
709 mutex_unlock(&kvm->lock);
710 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
711 r ? "(not available)" : "(success)");
712 break;
713 case KVM_CAP_S390_USER_STSI:
714 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
715 kvm->arch.user_stsi = 1;
716 r = 0;
717 break;
718 case KVM_CAP_S390_USER_INSTR0:
719 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
720 kvm->arch.user_instr0 = 1;
721 icpt_operexc_on_all_vcpus(kvm);
722 r = 0;
723 break;
724 default:
725 r = -EINVAL;
726 break;
727 }
728 return r;
729 }
730
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)731 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
732 {
733 int ret;
734
735 switch (attr->attr) {
736 case KVM_S390_VM_MEM_LIMIT_SIZE:
737 ret = 0;
738 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
739 kvm->arch.mem_limit);
740 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
741 ret = -EFAULT;
742 break;
743 default:
744 ret = -ENXIO;
745 break;
746 }
747 return ret;
748 }
749
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)750 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
751 {
752 int ret;
753 unsigned int idx;
754 switch (attr->attr) {
755 case KVM_S390_VM_MEM_ENABLE_CMMA:
756 ret = -ENXIO;
757 if (!sclp.has_cmma)
758 break;
759
760 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
761 mutex_lock(&kvm->lock);
762 if (kvm->created_vcpus)
763 ret = -EBUSY;
764 else if (kvm->mm->context.allow_gmap_hpage_1m)
765 ret = -EINVAL;
766 else {
767 kvm->arch.use_cmma = 1;
768 /* Not compatible with cmma. */
769 kvm->arch.use_pfmfi = 0;
770 ret = 0;
771 }
772 mutex_unlock(&kvm->lock);
773 break;
774 case KVM_S390_VM_MEM_CLR_CMMA:
775 ret = -ENXIO;
776 if (!sclp.has_cmma)
777 break;
778 ret = -EINVAL;
779 if (!kvm->arch.use_cmma)
780 break;
781
782 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
783 mutex_lock(&kvm->lock);
784 idx = srcu_read_lock(&kvm->srcu);
785 s390_reset_cmma(kvm->arch.gmap->mm);
786 srcu_read_unlock(&kvm->srcu, idx);
787 mutex_unlock(&kvm->lock);
788 ret = 0;
789 break;
790 case KVM_S390_VM_MEM_LIMIT_SIZE: {
791 unsigned long new_limit;
792
793 if (kvm_is_ucontrol(kvm))
794 return -EINVAL;
795
796 if (get_user(new_limit, (u64 __user *)attr->addr))
797 return -EFAULT;
798
799 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
800 new_limit > kvm->arch.mem_limit)
801 return -E2BIG;
802
803 if (!new_limit)
804 return -EINVAL;
805
806 /* gmap_create takes last usable address */
807 if (new_limit != KVM_S390_NO_MEM_LIMIT)
808 new_limit -= 1;
809
810 ret = -EBUSY;
811 mutex_lock(&kvm->lock);
812 if (!kvm->created_vcpus) {
813 /* gmap_create will round the limit up */
814 struct gmap *new = gmap_create(current->mm, new_limit);
815
816 if (!new) {
817 ret = -ENOMEM;
818 } else {
819 gmap_remove(kvm->arch.gmap);
820 new->private = kvm;
821 kvm->arch.gmap = new;
822 ret = 0;
823 }
824 }
825 mutex_unlock(&kvm->lock);
826 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
827 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
828 (void *) kvm->arch.gmap->asce);
829 break;
830 }
831 default:
832 ret = -ENXIO;
833 break;
834 }
835 return ret;
836 }
837
838 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
839
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)840 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
841 {
842 struct kvm_vcpu *vcpu;
843 int i;
844
845 kvm_s390_vcpu_block_all(kvm);
846
847 kvm_for_each_vcpu(i, vcpu, kvm)
848 kvm_s390_vcpu_crypto_setup(vcpu);
849
850 kvm_s390_vcpu_unblock_all(kvm);
851 }
852
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)853 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
854 {
855 if (!test_kvm_facility(kvm, 76))
856 return -EINVAL;
857
858 mutex_lock(&kvm->lock);
859 switch (attr->attr) {
860 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
861 get_random_bytes(
862 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
863 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
864 kvm->arch.crypto.aes_kw = 1;
865 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
866 break;
867 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
868 get_random_bytes(
869 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
870 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
871 kvm->arch.crypto.dea_kw = 1;
872 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
873 break;
874 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
875 kvm->arch.crypto.aes_kw = 0;
876 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
877 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
878 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
879 break;
880 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
881 kvm->arch.crypto.dea_kw = 0;
882 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
883 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
884 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
885 break;
886 default:
887 mutex_unlock(&kvm->lock);
888 return -ENXIO;
889 }
890
891 kvm_s390_vcpu_crypto_reset_all(kvm);
892 mutex_unlock(&kvm->lock);
893 return 0;
894 }
895
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)896 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
897 {
898 int cx;
899 struct kvm_vcpu *vcpu;
900
901 kvm_for_each_vcpu(cx, vcpu, kvm)
902 kvm_s390_sync_request(req, vcpu);
903 }
904
905 /*
906 * Must be called with kvm->srcu held to avoid races on memslots, and with
907 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
908 */
kvm_s390_vm_start_migration(struct kvm * kvm)909 static int kvm_s390_vm_start_migration(struct kvm *kvm)
910 {
911 struct kvm_memory_slot *ms;
912 struct kvm_memslots *slots;
913 unsigned long ram_pages = 0;
914 int slotnr;
915
916 /* migration mode already enabled */
917 if (kvm->arch.migration_mode)
918 return 0;
919 slots = kvm_memslots(kvm);
920 if (!slots || !slots->used_slots)
921 return -EINVAL;
922
923 if (!kvm->arch.use_cmma) {
924 kvm->arch.migration_mode = 1;
925 return 0;
926 }
927 /* mark all the pages in active slots as dirty */
928 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
929 ms = slots->memslots + slotnr;
930 /*
931 * The second half of the bitmap is only used on x86,
932 * and would be wasted otherwise, so we put it to good
933 * use here to keep track of the state of the storage
934 * attributes.
935 */
936 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
937 ram_pages += ms->npages;
938 }
939 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
940 kvm->arch.migration_mode = 1;
941 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
942 return 0;
943 }
944
945 /*
946 * Must be called with kvm->slots_lock to avoid races with ourselves and
947 * kvm_s390_vm_start_migration.
948 */
kvm_s390_vm_stop_migration(struct kvm * kvm)949 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
950 {
951 /* migration mode already disabled */
952 if (!kvm->arch.migration_mode)
953 return 0;
954 kvm->arch.migration_mode = 0;
955 if (kvm->arch.use_cmma)
956 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
957 return 0;
958 }
959
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)960 static int kvm_s390_vm_set_migration(struct kvm *kvm,
961 struct kvm_device_attr *attr)
962 {
963 int res = -ENXIO;
964
965 mutex_lock(&kvm->slots_lock);
966 switch (attr->attr) {
967 case KVM_S390_VM_MIGRATION_START:
968 res = kvm_s390_vm_start_migration(kvm);
969 break;
970 case KVM_S390_VM_MIGRATION_STOP:
971 res = kvm_s390_vm_stop_migration(kvm);
972 break;
973 default:
974 break;
975 }
976 mutex_unlock(&kvm->slots_lock);
977
978 return res;
979 }
980
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)981 static int kvm_s390_vm_get_migration(struct kvm *kvm,
982 struct kvm_device_attr *attr)
983 {
984 u64 mig = kvm->arch.migration_mode;
985
986 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
987 return -ENXIO;
988
989 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
990 return -EFAULT;
991 return 0;
992 }
993
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)994 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
995 {
996 struct kvm_s390_vm_tod_clock gtod;
997
998 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
999 return -EFAULT;
1000
1001 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1002 return -EINVAL;
1003 kvm_s390_set_tod_clock(kvm, >od);
1004
1005 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1006 gtod.epoch_idx, gtod.tod);
1007
1008 return 0;
1009 }
1010
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1011 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1012 {
1013 u8 gtod_high;
1014
1015 if (copy_from_user(>od_high, (void __user *)attr->addr,
1016 sizeof(gtod_high)))
1017 return -EFAULT;
1018
1019 if (gtod_high != 0)
1020 return -EINVAL;
1021 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1022
1023 return 0;
1024 }
1025
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1026 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1027 {
1028 struct kvm_s390_vm_tod_clock gtod = { 0 };
1029
1030 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1031 sizeof(gtod.tod)))
1032 return -EFAULT;
1033
1034 kvm_s390_set_tod_clock(kvm, >od);
1035 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1036 return 0;
1037 }
1038
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1039 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041 int ret;
1042
1043 if (attr->flags)
1044 return -EINVAL;
1045
1046 switch (attr->attr) {
1047 case KVM_S390_VM_TOD_EXT:
1048 ret = kvm_s390_set_tod_ext(kvm, attr);
1049 break;
1050 case KVM_S390_VM_TOD_HIGH:
1051 ret = kvm_s390_set_tod_high(kvm, attr);
1052 break;
1053 case KVM_S390_VM_TOD_LOW:
1054 ret = kvm_s390_set_tod_low(kvm, attr);
1055 break;
1056 default:
1057 ret = -ENXIO;
1058 break;
1059 }
1060 return ret;
1061 }
1062
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1063 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1064 struct kvm_s390_vm_tod_clock *gtod)
1065 {
1066 struct kvm_s390_tod_clock_ext htod;
1067
1068 preempt_disable();
1069
1070 get_tod_clock_ext((char *)&htod);
1071
1072 gtod->tod = htod.tod + kvm->arch.epoch;
1073 gtod->epoch_idx = 0;
1074 if (test_kvm_facility(kvm, 139)) {
1075 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1076 if (gtod->tod < htod.tod)
1077 gtod->epoch_idx += 1;
1078 }
1079
1080 preempt_enable();
1081 }
1082
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1083 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1084 {
1085 struct kvm_s390_vm_tod_clock gtod;
1086
1087 memset(>od, 0, sizeof(gtod));
1088 kvm_s390_get_tod_clock(kvm, >od);
1089 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1090 return -EFAULT;
1091
1092 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1093 gtod.epoch_idx, gtod.tod);
1094 return 0;
1095 }
1096
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1097 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099 u8 gtod_high = 0;
1100
1101 if (copy_to_user((void __user *)attr->addr, >od_high,
1102 sizeof(gtod_high)))
1103 return -EFAULT;
1104 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1105
1106 return 0;
1107 }
1108
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1109 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1110 {
1111 u64 gtod;
1112
1113 gtod = kvm_s390_get_tod_clock_fast(kvm);
1114 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1115 return -EFAULT;
1116 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1117
1118 return 0;
1119 }
1120
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1121 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 int ret;
1124
1125 if (attr->flags)
1126 return -EINVAL;
1127
1128 switch (attr->attr) {
1129 case KVM_S390_VM_TOD_EXT:
1130 ret = kvm_s390_get_tod_ext(kvm, attr);
1131 break;
1132 case KVM_S390_VM_TOD_HIGH:
1133 ret = kvm_s390_get_tod_high(kvm, attr);
1134 break;
1135 case KVM_S390_VM_TOD_LOW:
1136 ret = kvm_s390_get_tod_low(kvm, attr);
1137 break;
1138 default:
1139 ret = -ENXIO;
1140 break;
1141 }
1142 return ret;
1143 }
1144
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1145 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147 struct kvm_s390_vm_cpu_processor *proc;
1148 u16 lowest_ibc, unblocked_ibc;
1149 int ret = 0;
1150
1151 mutex_lock(&kvm->lock);
1152 if (kvm->created_vcpus) {
1153 ret = -EBUSY;
1154 goto out;
1155 }
1156 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1157 if (!proc) {
1158 ret = -ENOMEM;
1159 goto out;
1160 }
1161 if (!copy_from_user(proc, (void __user *)attr->addr,
1162 sizeof(*proc))) {
1163 kvm->arch.model.cpuid = proc->cpuid;
1164 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1165 unblocked_ibc = sclp.ibc & 0xfff;
1166 if (lowest_ibc && proc->ibc) {
1167 if (proc->ibc > unblocked_ibc)
1168 kvm->arch.model.ibc = unblocked_ibc;
1169 else if (proc->ibc < lowest_ibc)
1170 kvm->arch.model.ibc = lowest_ibc;
1171 else
1172 kvm->arch.model.ibc = proc->ibc;
1173 }
1174 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1175 S390_ARCH_FAC_LIST_SIZE_BYTE);
1176 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1177 kvm->arch.model.ibc,
1178 kvm->arch.model.cpuid);
1179 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1180 kvm->arch.model.fac_list[0],
1181 kvm->arch.model.fac_list[1],
1182 kvm->arch.model.fac_list[2]);
1183 } else
1184 ret = -EFAULT;
1185 kfree(proc);
1186 out:
1187 mutex_unlock(&kvm->lock);
1188 return ret;
1189 }
1190
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1191 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1192 struct kvm_device_attr *attr)
1193 {
1194 struct kvm_s390_vm_cpu_feat data;
1195
1196 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1197 return -EFAULT;
1198 if (!bitmap_subset((unsigned long *) data.feat,
1199 kvm_s390_available_cpu_feat,
1200 KVM_S390_VM_CPU_FEAT_NR_BITS))
1201 return -EINVAL;
1202
1203 mutex_lock(&kvm->lock);
1204 if (kvm->created_vcpus) {
1205 mutex_unlock(&kvm->lock);
1206 return -EBUSY;
1207 }
1208 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1209 KVM_S390_VM_CPU_FEAT_NR_BITS);
1210 mutex_unlock(&kvm->lock);
1211 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1212 data.feat[0],
1213 data.feat[1],
1214 data.feat[2]);
1215 return 0;
1216 }
1217
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1218 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1219 struct kvm_device_attr *attr)
1220 {
1221 /*
1222 * Once supported by kernel + hw, we have to store the subfunctions
1223 * in kvm->arch and remember that user space configured them.
1224 */
1225 return -ENXIO;
1226 }
1227
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1228 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230 int ret = -ENXIO;
1231
1232 switch (attr->attr) {
1233 case KVM_S390_VM_CPU_PROCESSOR:
1234 ret = kvm_s390_set_processor(kvm, attr);
1235 break;
1236 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1237 ret = kvm_s390_set_processor_feat(kvm, attr);
1238 break;
1239 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1240 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1241 break;
1242 }
1243 return ret;
1244 }
1245
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1246 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248 struct kvm_s390_vm_cpu_processor *proc;
1249 int ret = 0;
1250
1251 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1252 if (!proc) {
1253 ret = -ENOMEM;
1254 goto out;
1255 }
1256 proc->cpuid = kvm->arch.model.cpuid;
1257 proc->ibc = kvm->arch.model.ibc;
1258 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1259 S390_ARCH_FAC_LIST_SIZE_BYTE);
1260 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1261 kvm->arch.model.ibc,
1262 kvm->arch.model.cpuid);
1263 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1264 kvm->arch.model.fac_list[0],
1265 kvm->arch.model.fac_list[1],
1266 kvm->arch.model.fac_list[2]);
1267 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1268 ret = -EFAULT;
1269 kfree(proc);
1270 out:
1271 return ret;
1272 }
1273
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1274 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1275 {
1276 struct kvm_s390_vm_cpu_machine *mach;
1277 int ret = 0;
1278
1279 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1280 if (!mach) {
1281 ret = -ENOMEM;
1282 goto out;
1283 }
1284 get_cpu_id((struct cpuid *) &mach->cpuid);
1285 mach->ibc = sclp.ibc;
1286 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1287 S390_ARCH_FAC_LIST_SIZE_BYTE);
1288 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1289 sizeof(S390_lowcore.stfle_fac_list));
1290 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1291 kvm->arch.model.ibc,
1292 kvm->arch.model.cpuid);
1293 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1294 mach->fac_mask[0],
1295 mach->fac_mask[1],
1296 mach->fac_mask[2]);
1297 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1298 mach->fac_list[0],
1299 mach->fac_list[1],
1300 mach->fac_list[2]);
1301 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1302 ret = -EFAULT;
1303 kfree(mach);
1304 out:
1305 return ret;
1306 }
1307
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1308 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1309 struct kvm_device_attr *attr)
1310 {
1311 struct kvm_s390_vm_cpu_feat data;
1312
1313 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1314 KVM_S390_VM_CPU_FEAT_NR_BITS);
1315 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1316 return -EFAULT;
1317 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1318 data.feat[0],
1319 data.feat[1],
1320 data.feat[2]);
1321 return 0;
1322 }
1323
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1324 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1325 struct kvm_device_attr *attr)
1326 {
1327 struct kvm_s390_vm_cpu_feat data;
1328
1329 bitmap_copy((unsigned long *) data.feat,
1330 kvm_s390_available_cpu_feat,
1331 KVM_S390_VM_CPU_FEAT_NR_BITS);
1332 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1333 return -EFAULT;
1334 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1335 data.feat[0],
1336 data.feat[1],
1337 data.feat[2]);
1338 return 0;
1339 }
1340
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1341 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1342 struct kvm_device_attr *attr)
1343 {
1344 /*
1345 * Once we can actually configure subfunctions (kernel + hw support),
1346 * we have to check if they were already set by user space, if so copy
1347 * them from kvm->arch.
1348 */
1349 return -ENXIO;
1350 }
1351
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1352 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1353 struct kvm_device_attr *attr)
1354 {
1355 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1356 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1357 return -EFAULT;
1358 return 0;
1359 }
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1360 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1361 {
1362 int ret = -ENXIO;
1363
1364 switch (attr->attr) {
1365 case KVM_S390_VM_CPU_PROCESSOR:
1366 ret = kvm_s390_get_processor(kvm, attr);
1367 break;
1368 case KVM_S390_VM_CPU_MACHINE:
1369 ret = kvm_s390_get_machine(kvm, attr);
1370 break;
1371 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1372 ret = kvm_s390_get_processor_feat(kvm, attr);
1373 break;
1374 case KVM_S390_VM_CPU_MACHINE_FEAT:
1375 ret = kvm_s390_get_machine_feat(kvm, attr);
1376 break;
1377 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1378 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1379 break;
1380 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1381 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1382 break;
1383 }
1384 return ret;
1385 }
1386
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1387 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1388 {
1389 int ret;
1390
1391 switch (attr->group) {
1392 case KVM_S390_VM_MEM_CTRL:
1393 ret = kvm_s390_set_mem_control(kvm, attr);
1394 break;
1395 case KVM_S390_VM_TOD:
1396 ret = kvm_s390_set_tod(kvm, attr);
1397 break;
1398 case KVM_S390_VM_CPU_MODEL:
1399 ret = kvm_s390_set_cpu_model(kvm, attr);
1400 break;
1401 case KVM_S390_VM_CRYPTO:
1402 ret = kvm_s390_vm_set_crypto(kvm, attr);
1403 break;
1404 case KVM_S390_VM_MIGRATION:
1405 ret = kvm_s390_vm_set_migration(kvm, attr);
1406 break;
1407 default:
1408 ret = -ENXIO;
1409 break;
1410 }
1411
1412 return ret;
1413 }
1414
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1415 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1416 {
1417 int ret;
1418
1419 switch (attr->group) {
1420 case KVM_S390_VM_MEM_CTRL:
1421 ret = kvm_s390_get_mem_control(kvm, attr);
1422 break;
1423 case KVM_S390_VM_TOD:
1424 ret = kvm_s390_get_tod(kvm, attr);
1425 break;
1426 case KVM_S390_VM_CPU_MODEL:
1427 ret = kvm_s390_get_cpu_model(kvm, attr);
1428 break;
1429 case KVM_S390_VM_MIGRATION:
1430 ret = kvm_s390_vm_get_migration(kvm, attr);
1431 break;
1432 default:
1433 ret = -ENXIO;
1434 break;
1435 }
1436
1437 return ret;
1438 }
1439
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1440 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442 int ret;
1443
1444 switch (attr->group) {
1445 case KVM_S390_VM_MEM_CTRL:
1446 switch (attr->attr) {
1447 case KVM_S390_VM_MEM_ENABLE_CMMA:
1448 case KVM_S390_VM_MEM_CLR_CMMA:
1449 ret = sclp.has_cmma ? 0 : -ENXIO;
1450 break;
1451 case KVM_S390_VM_MEM_LIMIT_SIZE:
1452 ret = 0;
1453 break;
1454 default:
1455 ret = -ENXIO;
1456 break;
1457 }
1458 break;
1459 case KVM_S390_VM_TOD:
1460 switch (attr->attr) {
1461 case KVM_S390_VM_TOD_LOW:
1462 case KVM_S390_VM_TOD_HIGH:
1463 ret = 0;
1464 break;
1465 default:
1466 ret = -ENXIO;
1467 break;
1468 }
1469 break;
1470 case KVM_S390_VM_CPU_MODEL:
1471 switch (attr->attr) {
1472 case KVM_S390_VM_CPU_PROCESSOR:
1473 case KVM_S390_VM_CPU_MACHINE:
1474 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1475 case KVM_S390_VM_CPU_MACHINE_FEAT:
1476 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1477 ret = 0;
1478 break;
1479 /* configuring subfunctions is not supported yet */
1480 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1481 default:
1482 ret = -ENXIO;
1483 break;
1484 }
1485 break;
1486 case KVM_S390_VM_CRYPTO:
1487 switch (attr->attr) {
1488 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1489 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1490 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1491 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1492 ret = 0;
1493 break;
1494 default:
1495 ret = -ENXIO;
1496 break;
1497 }
1498 break;
1499 case KVM_S390_VM_MIGRATION:
1500 ret = 0;
1501 break;
1502 default:
1503 ret = -ENXIO;
1504 break;
1505 }
1506
1507 return ret;
1508 }
1509
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1510 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1511 {
1512 uint8_t *keys;
1513 uint64_t hva;
1514 int srcu_idx, i, r = 0;
1515
1516 if (args->flags != 0)
1517 return -EINVAL;
1518
1519 /* Is this guest using storage keys? */
1520 if (!mm_uses_skeys(current->mm))
1521 return KVM_S390_GET_SKEYS_NONE;
1522
1523 /* Enforce sane limit on memory allocation */
1524 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1525 return -EINVAL;
1526
1527 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1528 if (!keys)
1529 return -ENOMEM;
1530
1531 down_read(¤t->mm->mmap_sem);
1532 srcu_idx = srcu_read_lock(&kvm->srcu);
1533 for (i = 0; i < args->count; i++) {
1534 hva = gfn_to_hva(kvm, args->start_gfn + i);
1535 if (kvm_is_error_hva(hva)) {
1536 r = -EFAULT;
1537 break;
1538 }
1539
1540 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1541 if (r)
1542 break;
1543 }
1544 srcu_read_unlock(&kvm->srcu, srcu_idx);
1545 up_read(¤t->mm->mmap_sem);
1546
1547 if (!r) {
1548 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1549 sizeof(uint8_t) * args->count);
1550 if (r)
1551 r = -EFAULT;
1552 }
1553
1554 kvfree(keys);
1555 return r;
1556 }
1557
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1558 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1559 {
1560 uint8_t *keys;
1561 uint64_t hva;
1562 int srcu_idx, i, r = 0;
1563 bool unlocked;
1564
1565 if (args->flags != 0)
1566 return -EINVAL;
1567
1568 /* Enforce sane limit on memory allocation */
1569 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1570 return -EINVAL;
1571
1572 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1573 if (!keys)
1574 return -ENOMEM;
1575
1576 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1577 sizeof(uint8_t) * args->count);
1578 if (r) {
1579 r = -EFAULT;
1580 goto out;
1581 }
1582
1583 /* Enable storage key handling for the guest */
1584 r = s390_enable_skey();
1585 if (r)
1586 goto out;
1587
1588 i = 0;
1589 down_read(¤t->mm->mmap_sem);
1590 srcu_idx = srcu_read_lock(&kvm->srcu);
1591 while (i < args->count) {
1592 unlocked = false;
1593 hva = gfn_to_hva(kvm, args->start_gfn + i);
1594 if (kvm_is_error_hva(hva)) {
1595 r = -EFAULT;
1596 break;
1597 }
1598
1599 /* Lowest order bit is reserved */
1600 if (keys[i] & 0x01) {
1601 r = -EINVAL;
1602 break;
1603 }
1604
1605 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1606 if (r) {
1607 r = fixup_user_fault(current, current->mm, hva,
1608 FAULT_FLAG_WRITE, &unlocked);
1609 if (r)
1610 break;
1611 }
1612 if (!r)
1613 i++;
1614 }
1615 srcu_read_unlock(&kvm->srcu, srcu_idx);
1616 up_read(¤t->mm->mmap_sem);
1617 out:
1618 kvfree(keys);
1619 return r;
1620 }
1621
1622 /*
1623 * Base address and length must be sent at the start of each block, therefore
1624 * it's cheaper to send some clean data, as long as it's less than the size of
1625 * two longs.
1626 */
1627 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1628 /* for consistency */
1629 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1630
1631 /*
1632 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1633 * address falls in a hole. In that case the index of one of the memslots
1634 * bordering the hole is returned.
1635 */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1636 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1637 {
1638 int start = 0, end = slots->used_slots;
1639 int slot = atomic_read(&slots->lru_slot);
1640 struct kvm_memory_slot *memslots = slots->memslots;
1641
1642 if (gfn >= memslots[slot].base_gfn &&
1643 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1644 return slot;
1645
1646 while (start < end) {
1647 slot = start + (end - start) / 2;
1648
1649 if (gfn >= memslots[slot].base_gfn)
1650 end = slot;
1651 else
1652 start = slot + 1;
1653 }
1654
1655 if (gfn >= memslots[start].base_gfn &&
1656 gfn < memslots[start].base_gfn + memslots[start].npages) {
1657 atomic_set(&slots->lru_slot, start);
1658 }
1659
1660 return start;
1661 }
1662
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1663 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1664 u8 *res, unsigned long bufsize)
1665 {
1666 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1667
1668 args->count = 0;
1669 while (args->count < bufsize) {
1670 hva = gfn_to_hva(kvm, cur_gfn);
1671 /*
1672 * We return an error if the first value was invalid, but we
1673 * return successfully if at least one value was copied.
1674 */
1675 if (kvm_is_error_hva(hva))
1676 return args->count ? 0 : -EFAULT;
1677 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1678 pgstev = 0;
1679 res[args->count++] = (pgstev >> 24) & 0x43;
1680 cur_gfn++;
1681 }
1682
1683 return 0;
1684 }
1685
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1686 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1687 unsigned long cur_gfn)
1688 {
1689 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1690 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1691 unsigned long ofs = cur_gfn - ms->base_gfn;
1692
1693 if (ms->base_gfn + ms->npages <= cur_gfn) {
1694 slotidx--;
1695 /* If we are above the highest slot, wrap around */
1696 if (slotidx < 0)
1697 slotidx = slots->used_slots - 1;
1698
1699 ms = slots->memslots + slotidx;
1700 ofs = 0;
1701 }
1702 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1703 while ((slotidx > 0) && (ofs >= ms->npages)) {
1704 slotidx--;
1705 ms = slots->memslots + slotidx;
1706 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1707 }
1708 return ms->base_gfn + ofs;
1709 }
1710
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1711 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1712 u8 *res, unsigned long bufsize)
1713 {
1714 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1715 struct kvm_memslots *slots = kvm_memslots(kvm);
1716 struct kvm_memory_slot *ms;
1717
1718 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1719 ms = gfn_to_memslot(kvm, cur_gfn);
1720 args->count = 0;
1721 args->start_gfn = cur_gfn;
1722 if (!ms)
1723 return 0;
1724 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1725 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1726
1727 while (args->count < bufsize) {
1728 hva = gfn_to_hva(kvm, cur_gfn);
1729 if (kvm_is_error_hva(hva))
1730 return 0;
1731 /* Decrement only if we actually flipped the bit to 0 */
1732 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1733 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1734 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1735 pgstev = 0;
1736 /* Save the value */
1737 res[args->count++] = (pgstev >> 24) & 0x43;
1738 /* If the next bit is too far away, stop. */
1739 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1740 return 0;
1741 /* If we reached the previous "next", find the next one */
1742 if (cur_gfn == next_gfn)
1743 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1744 /* Reached the end of memory or of the buffer, stop */
1745 if ((next_gfn >= mem_end) ||
1746 (next_gfn - args->start_gfn >= bufsize))
1747 return 0;
1748 cur_gfn++;
1749 /* Reached the end of the current memslot, take the next one. */
1750 if (cur_gfn - ms->base_gfn >= ms->npages) {
1751 ms = gfn_to_memslot(kvm, cur_gfn);
1752 if (!ms)
1753 return 0;
1754 }
1755 }
1756 return 0;
1757 }
1758
1759 /*
1760 * This function searches for the next page with dirty CMMA attributes, and
1761 * saves the attributes in the buffer up to either the end of the buffer or
1762 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1763 * no trailing clean bytes are saved.
1764 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1765 * output buffer will indicate 0 as length.
1766 */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)1767 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1768 struct kvm_s390_cmma_log *args)
1769 {
1770 unsigned long bufsize;
1771 int srcu_idx, peek, ret;
1772 u8 *values;
1773
1774 if (!kvm->arch.use_cmma)
1775 return -ENXIO;
1776 /* Invalid/unsupported flags were specified */
1777 if (args->flags & ~KVM_S390_CMMA_PEEK)
1778 return -EINVAL;
1779 /* Migration mode query, and we are not doing a migration */
1780 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1781 if (!peek && !kvm->arch.migration_mode)
1782 return -EINVAL;
1783 /* CMMA is disabled or was not used, or the buffer has length zero */
1784 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1785 if (!bufsize || !kvm->mm->context.uses_cmm) {
1786 memset(args, 0, sizeof(*args));
1787 return 0;
1788 }
1789 /* We are not peeking, and there are no dirty pages */
1790 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1791 memset(args, 0, sizeof(*args));
1792 return 0;
1793 }
1794
1795 values = vmalloc(bufsize);
1796 if (!values)
1797 return -ENOMEM;
1798
1799 down_read(&kvm->mm->mmap_sem);
1800 srcu_idx = srcu_read_lock(&kvm->srcu);
1801 if (peek)
1802 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1803 else
1804 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1805 srcu_read_unlock(&kvm->srcu, srcu_idx);
1806 up_read(&kvm->mm->mmap_sem);
1807
1808 if (kvm->arch.migration_mode)
1809 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1810 else
1811 args->remaining = 0;
1812
1813 if (copy_to_user((void __user *)args->values, values, args->count))
1814 ret = -EFAULT;
1815
1816 vfree(values);
1817 return ret;
1818 }
1819
1820 /*
1821 * This function sets the CMMA attributes for the given pages. If the input
1822 * buffer has zero length, no action is taken, otherwise the attributes are
1823 * set and the mm->context.uses_cmm flag is set.
1824 */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)1825 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1826 const struct kvm_s390_cmma_log *args)
1827 {
1828 unsigned long hva, mask, pgstev, i;
1829 uint8_t *bits;
1830 int srcu_idx, r = 0;
1831
1832 mask = args->mask;
1833
1834 if (!kvm->arch.use_cmma)
1835 return -ENXIO;
1836 /* invalid/unsupported flags */
1837 if (args->flags != 0)
1838 return -EINVAL;
1839 /* Enforce sane limit on memory allocation */
1840 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1841 return -EINVAL;
1842 /* Nothing to do */
1843 if (args->count == 0)
1844 return 0;
1845
1846 bits = vmalloc(array_size(sizeof(*bits), args->count));
1847 if (!bits)
1848 return -ENOMEM;
1849
1850 r = copy_from_user(bits, (void __user *)args->values, args->count);
1851 if (r) {
1852 r = -EFAULT;
1853 goto out;
1854 }
1855
1856 down_read(&kvm->mm->mmap_sem);
1857 srcu_idx = srcu_read_lock(&kvm->srcu);
1858 for (i = 0; i < args->count; i++) {
1859 hva = gfn_to_hva(kvm, args->start_gfn + i);
1860 if (kvm_is_error_hva(hva)) {
1861 r = -EFAULT;
1862 break;
1863 }
1864
1865 pgstev = bits[i];
1866 pgstev = pgstev << 24;
1867 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1868 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1869 }
1870 srcu_read_unlock(&kvm->srcu, srcu_idx);
1871 up_read(&kvm->mm->mmap_sem);
1872
1873 if (!kvm->mm->context.uses_cmm) {
1874 down_write(&kvm->mm->mmap_sem);
1875 kvm->mm->context.uses_cmm = 1;
1876 up_write(&kvm->mm->mmap_sem);
1877 }
1878 out:
1879 vfree(bits);
1880 return r;
1881 }
1882
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)1883 long kvm_arch_vm_ioctl(struct file *filp,
1884 unsigned int ioctl, unsigned long arg)
1885 {
1886 struct kvm *kvm = filp->private_data;
1887 void __user *argp = (void __user *)arg;
1888 struct kvm_device_attr attr;
1889 int r;
1890
1891 switch (ioctl) {
1892 case KVM_S390_INTERRUPT: {
1893 struct kvm_s390_interrupt s390int;
1894
1895 r = -EFAULT;
1896 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1897 break;
1898 r = kvm_s390_inject_vm(kvm, &s390int);
1899 break;
1900 }
1901 case KVM_ENABLE_CAP: {
1902 struct kvm_enable_cap cap;
1903 r = -EFAULT;
1904 if (copy_from_user(&cap, argp, sizeof(cap)))
1905 break;
1906 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1907 break;
1908 }
1909 case KVM_CREATE_IRQCHIP: {
1910 struct kvm_irq_routing_entry routing;
1911
1912 r = -EINVAL;
1913 if (kvm->arch.use_irqchip) {
1914 /* Set up dummy routing. */
1915 memset(&routing, 0, sizeof(routing));
1916 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1917 }
1918 break;
1919 }
1920 case KVM_SET_DEVICE_ATTR: {
1921 r = -EFAULT;
1922 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1923 break;
1924 r = kvm_s390_vm_set_attr(kvm, &attr);
1925 break;
1926 }
1927 case KVM_GET_DEVICE_ATTR: {
1928 r = -EFAULT;
1929 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1930 break;
1931 r = kvm_s390_vm_get_attr(kvm, &attr);
1932 break;
1933 }
1934 case KVM_HAS_DEVICE_ATTR: {
1935 r = -EFAULT;
1936 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1937 break;
1938 r = kvm_s390_vm_has_attr(kvm, &attr);
1939 break;
1940 }
1941 case KVM_S390_GET_SKEYS: {
1942 struct kvm_s390_skeys args;
1943
1944 r = -EFAULT;
1945 if (copy_from_user(&args, argp,
1946 sizeof(struct kvm_s390_skeys)))
1947 break;
1948 r = kvm_s390_get_skeys(kvm, &args);
1949 break;
1950 }
1951 case KVM_S390_SET_SKEYS: {
1952 struct kvm_s390_skeys args;
1953
1954 r = -EFAULT;
1955 if (copy_from_user(&args, argp,
1956 sizeof(struct kvm_s390_skeys)))
1957 break;
1958 r = kvm_s390_set_skeys(kvm, &args);
1959 break;
1960 }
1961 case KVM_S390_GET_CMMA_BITS: {
1962 struct kvm_s390_cmma_log args;
1963
1964 r = -EFAULT;
1965 if (copy_from_user(&args, argp, sizeof(args)))
1966 break;
1967 mutex_lock(&kvm->slots_lock);
1968 r = kvm_s390_get_cmma_bits(kvm, &args);
1969 mutex_unlock(&kvm->slots_lock);
1970 if (!r) {
1971 r = copy_to_user(argp, &args, sizeof(args));
1972 if (r)
1973 r = -EFAULT;
1974 }
1975 break;
1976 }
1977 case KVM_S390_SET_CMMA_BITS: {
1978 struct kvm_s390_cmma_log args;
1979
1980 r = -EFAULT;
1981 if (copy_from_user(&args, argp, sizeof(args)))
1982 break;
1983 mutex_lock(&kvm->slots_lock);
1984 r = kvm_s390_set_cmma_bits(kvm, &args);
1985 mutex_unlock(&kvm->slots_lock);
1986 break;
1987 }
1988 default:
1989 r = -ENOTTY;
1990 }
1991
1992 return r;
1993 }
1994
kvm_s390_query_ap_config(u8 * config)1995 static int kvm_s390_query_ap_config(u8 *config)
1996 {
1997 u32 fcn_code = 0x04000000UL;
1998 u32 cc = 0;
1999
2000 memset(config, 0, 128);
2001 asm volatile(
2002 "lgr 0,%1\n"
2003 "lgr 2,%2\n"
2004 ".long 0xb2af0000\n" /* PQAP(QCI) */
2005 "0: ipm %0\n"
2006 "srl %0,28\n"
2007 "1:\n"
2008 EX_TABLE(0b, 1b)
2009 : "+r" (cc)
2010 : "r" (fcn_code), "r" (config)
2011 : "cc", "0", "2", "memory"
2012 );
2013
2014 return cc;
2015 }
2016
kvm_s390_apxa_installed(void)2017 static int kvm_s390_apxa_installed(void)
2018 {
2019 u8 config[128];
2020 int cc;
2021
2022 if (test_facility(12)) {
2023 cc = kvm_s390_query_ap_config(config);
2024
2025 if (cc)
2026 pr_err("PQAP(QCI) failed with cc=%d", cc);
2027 else
2028 return config[0] & 0x40;
2029 }
2030
2031 return 0;
2032 }
2033
kvm_s390_set_crycb_format(struct kvm * kvm)2034 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2035 {
2036 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2037
2038 if (kvm_s390_apxa_installed())
2039 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2040 else
2041 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2042 }
2043
kvm_s390_get_initial_cpuid(void)2044 static u64 kvm_s390_get_initial_cpuid(void)
2045 {
2046 struct cpuid cpuid;
2047
2048 get_cpu_id(&cpuid);
2049 cpuid.version = 0xff;
2050 return *((u64 *) &cpuid);
2051 }
2052
kvm_s390_crypto_init(struct kvm * kvm)2053 static void kvm_s390_crypto_init(struct kvm *kvm)
2054 {
2055 if (!test_kvm_facility(kvm, 76))
2056 return;
2057
2058 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2059 kvm_s390_set_crycb_format(kvm);
2060
2061 /* Enable AES/DEA protected key functions by default */
2062 kvm->arch.crypto.aes_kw = 1;
2063 kvm->arch.crypto.dea_kw = 1;
2064 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2065 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2066 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2067 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2068 }
2069
sca_dispose(struct kvm * kvm)2070 static void sca_dispose(struct kvm *kvm)
2071 {
2072 if (kvm->arch.use_esca)
2073 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2074 else
2075 free_page((unsigned long)(kvm->arch.sca));
2076 kvm->arch.sca = NULL;
2077 }
2078
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2079 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2080 {
2081 gfp_t alloc_flags = GFP_KERNEL;
2082 int i, rc;
2083 char debug_name[16];
2084 static unsigned long sca_offset;
2085
2086 rc = -EINVAL;
2087 #ifdef CONFIG_KVM_S390_UCONTROL
2088 if (type & ~KVM_VM_S390_UCONTROL)
2089 goto out_err;
2090 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2091 goto out_err;
2092 #else
2093 if (type)
2094 goto out_err;
2095 #endif
2096
2097 rc = s390_enable_sie();
2098 if (rc)
2099 goto out_err;
2100
2101 rc = -ENOMEM;
2102
2103 if (!sclp.has_64bscao)
2104 alloc_flags |= GFP_DMA;
2105 rwlock_init(&kvm->arch.sca_lock);
2106 /* start with basic SCA */
2107 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2108 if (!kvm->arch.sca)
2109 goto out_err;
2110 spin_lock(&kvm_lock);
2111 sca_offset += 16;
2112 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2113 sca_offset = 0;
2114 kvm->arch.sca = (struct bsca_block *)
2115 ((char *) kvm->arch.sca + sca_offset);
2116 spin_unlock(&kvm_lock);
2117
2118 sprintf(debug_name, "kvm-%u", current->pid);
2119
2120 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2121 if (!kvm->arch.dbf)
2122 goto out_err;
2123
2124 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2125 kvm->arch.sie_page2 =
2126 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2127 if (!kvm->arch.sie_page2)
2128 goto out_err;
2129
2130 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2131
2132 for (i = 0; i < kvm_s390_fac_size(); i++) {
2133 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2134 (kvm_s390_fac_base[i] |
2135 kvm_s390_fac_ext[i]);
2136 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2137 kvm_s390_fac_base[i];
2138 }
2139
2140 /* we are always in czam mode - even on pre z14 machines */
2141 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2142 set_kvm_facility(kvm->arch.model.fac_list, 138);
2143 /* we emulate STHYI in kvm */
2144 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2145 set_kvm_facility(kvm->arch.model.fac_list, 74);
2146 if (MACHINE_HAS_TLB_GUEST) {
2147 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2148 set_kvm_facility(kvm->arch.model.fac_list, 147);
2149 }
2150
2151 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2152 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2153
2154 kvm_s390_crypto_init(kvm);
2155
2156 mutex_init(&kvm->arch.float_int.ais_lock);
2157 spin_lock_init(&kvm->arch.float_int.lock);
2158 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2159 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2160 init_waitqueue_head(&kvm->arch.ipte_wq);
2161 mutex_init(&kvm->arch.ipte_mutex);
2162
2163 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2164 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2165
2166 if (type & KVM_VM_S390_UCONTROL) {
2167 kvm->arch.gmap = NULL;
2168 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2169 } else {
2170 if (sclp.hamax == U64_MAX)
2171 kvm->arch.mem_limit = TASK_SIZE_MAX;
2172 else
2173 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2174 sclp.hamax + 1);
2175 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2176 if (!kvm->arch.gmap)
2177 goto out_err;
2178 kvm->arch.gmap->private = kvm;
2179 kvm->arch.gmap->pfault_enabled = 0;
2180 }
2181
2182 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2183 kvm->arch.use_skf = sclp.has_skey;
2184 spin_lock_init(&kvm->arch.start_stop_lock);
2185 kvm_s390_vsie_init(kvm);
2186 kvm_s390_gisa_init(kvm);
2187 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2188
2189 return 0;
2190 out_err:
2191 free_page((unsigned long)kvm->arch.sie_page2);
2192 debug_unregister(kvm->arch.dbf);
2193 sca_dispose(kvm);
2194 KVM_EVENT(3, "creation of vm failed: %d", rc);
2195 return rc;
2196 }
2197
kvm_arch_has_vcpu_debugfs(void)2198 bool kvm_arch_has_vcpu_debugfs(void)
2199 {
2200 return false;
2201 }
2202
kvm_arch_create_vcpu_debugfs(struct kvm_vcpu * vcpu)2203 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2204 {
2205 return 0;
2206 }
2207
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2208 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2209 {
2210 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2211 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2212 kvm_s390_clear_local_irqs(vcpu);
2213 kvm_clear_async_pf_completion_queue(vcpu);
2214 if (!kvm_is_ucontrol(vcpu->kvm))
2215 sca_del_vcpu(vcpu);
2216
2217 if (kvm_is_ucontrol(vcpu->kvm))
2218 gmap_remove(vcpu->arch.gmap);
2219
2220 if (vcpu->kvm->arch.use_cmma)
2221 kvm_s390_vcpu_unsetup_cmma(vcpu);
2222 free_page((unsigned long)(vcpu->arch.sie_block));
2223
2224 kvm_vcpu_uninit(vcpu);
2225 kmem_cache_free(kvm_vcpu_cache, vcpu);
2226 }
2227
kvm_free_vcpus(struct kvm * kvm)2228 static void kvm_free_vcpus(struct kvm *kvm)
2229 {
2230 unsigned int i;
2231 struct kvm_vcpu *vcpu;
2232
2233 kvm_for_each_vcpu(i, vcpu, kvm)
2234 kvm_arch_vcpu_destroy(vcpu);
2235
2236 mutex_lock(&kvm->lock);
2237 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2238 kvm->vcpus[i] = NULL;
2239
2240 atomic_set(&kvm->online_vcpus, 0);
2241 mutex_unlock(&kvm->lock);
2242 }
2243
kvm_arch_destroy_vm(struct kvm * kvm)2244 void kvm_arch_destroy_vm(struct kvm *kvm)
2245 {
2246 kvm_free_vcpus(kvm);
2247 sca_dispose(kvm);
2248 debug_unregister(kvm->arch.dbf);
2249 kvm_s390_gisa_destroy(kvm);
2250 free_page((unsigned long)kvm->arch.sie_page2);
2251 if (!kvm_is_ucontrol(kvm))
2252 gmap_remove(kvm->arch.gmap);
2253 kvm_s390_destroy_adapters(kvm);
2254 kvm_s390_clear_float_irqs(kvm);
2255 kvm_s390_vsie_destroy(kvm);
2256 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2257 }
2258
2259 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2260 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2261 {
2262 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2263 if (!vcpu->arch.gmap)
2264 return -ENOMEM;
2265 vcpu->arch.gmap->private = vcpu->kvm;
2266
2267 return 0;
2268 }
2269
sca_del_vcpu(struct kvm_vcpu * vcpu)2270 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2271 {
2272 if (!kvm_s390_use_sca_entries())
2273 return;
2274 read_lock(&vcpu->kvm->arch.sca_lock);
2275 if (vcpu->kvm->arch.use_esca) {
2276 struct esca_block *sca = vcpu->kvm->arch.sca;
2277
2278 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2279 sca->cpu[vcpu->vcpu_id].sda = 0;
2280 } else {
2281 struct bsca_block *sca = vcpu->kvm->arch.sca;
2282
2283 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2284 sca->cpu[vcpu->vcpu_id].sda = 0;
2285 }
2286 read_unlock(&vcpu->kvm->arch.sca_lock);
2287 }
2288
sca_add_vcpu(struct kvm_vcpu * vcpu)2289 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2290 {
2291 if (!kvm_s390_use_sca_entries()) {
2292 struct bsca_block *sca = vcpu->kvm->arch.sca;
2293
2294 /* we still need the basic sca for the ipte control */
2295 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2296 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2297 return;
2298 }
2299 read_lock(&vcpu->kvm->arch.sca_lock);
2300 if (vcpu->kvm->arch.use_esca) {
2301 struct esca_block *sca = vcpu->kvm->arch.sca;
2302
2303 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2304 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2305 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2306 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2307 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2308 } else {
2309 struct bsca_block *sca = vcpu->kvm->arch.sca;
2310
2311 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2312 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2313 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2314 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2315 }
2316 read_unlock(&vcpu->kvm->arch.sca_lock);
2317 }
2318
2319 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2320 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2321 {
2322 d->sda = s->sda;
2323 d->sigp_ctrl.c = s->sigp_ctrl.c;
2324 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2325 }
2326
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2327 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2328 {
2329 int i;
2330
2331 d->ipte_control = s->ipte_control;
2332 d->mcn[0] = s->mcn;
2333 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2334 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2335 }
2336
sca_switch_to_extended(struct kvm * kvm)2337 static int sca_switch_to_extended(struct kvm *kvm)
2338 {
2339 struct bsca_block *old_sca = kvm->arch.sca;
2340 struct esca_block *new_sca;
2341 struct kvm_vcpu *vcpu;
2342 unsigned int vcpu_idx;
2343 u32 scaol, scaoh;
2344
2345 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2346 if (!new_sca)
2347 return -ENOMEM;
2348
2349 scaoh = (u32)((u64)(new_sca) >> 32);
2350 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2351
2352 kvm_s390_vcpu_block_all(kvm);
2353 write_lock(&kvm->arch.sca_lock);
2354
2355 sca_copy_b_to_e(new_sca, old_sca);
2356
2357 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2358 vcpu->arch.sie_block->scaoh = scaoh;
2359 vcpu->arch.sie_block->scaol = scaol;
2360 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2361 }
2362 kvm->arch.sca = new_sca;
2363 kvm->arch.use_esca = 1;
2364
2365 write_unlock(&kvm->arch.sca_lock);
2366 kvm_s390_vcpu_unblock_all(kvm);
2367
2368 free_page((unsigned long)old_sca);
2369
2370 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2371 old_sca, kvm->arch.sca);
2372 return 0;
2373 }
2374
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2375 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2376 {
2377 int rc;
2378
2379 if (!kvm_s390_use_sca_entries()) {
2380 if (id < KVM_MAX_VCPUS)
2381 return true;
2382 return false;
2383 }
2384 if (id < KVM_S390_BSCA_CPU_SLOTS)
2385 return true;
2386 if (!sclp.has_esca || !sclp.has_64bscao)
2387 return false;
2388
2389 mutex_lock(&kvm->lock);
2390 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2391 mutex_unlock(&kvm->lock);
2392
2393 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2394 }
2395
kvm_arch_vcpu_init(struct kvm_vcpu * vcpu)2396 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2397 {
2398 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2399 kvm_clear_async_pf_completion_queue(vcpu);
2400 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2401 KVM_SYNC_GPRS |
2402 KVM_SYNC_ACRS |
2403 KVM_SYNC_CRS |
2404 KVM_SYNC_ARCH0 |
2405 KVM_SYNC_PFAULT;
2406 kvm_s390_set_prefix(vcpu, 0);
2407 if (test_kvm_facility(vcpu->kvm, 64))
2408 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2409 if (test_kvm_facility(vcpu->kvm, 82))
2410 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2411 if (test_kvm_facility(vcpu->kvm, 133))
2412 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2413 if (test_kvm_facility(vcpu->kvm, 156))
2414 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2415 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2416 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2417 */
2418 if (MACHINE_HAS_VX)
2419 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2420 else
2421 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2422
2423 if (kvm_is_ucontrol(vcpu->kvm))
2424 return __kvm_ucontrol_vcpu_init(vcpu);
2425
2426 return 0;
2427 }
2428
2429 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2430 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2431 {
2432 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2433 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2434 vcpu->arch.cputm_start = get_tod_clock_fast();
2435 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2436 }
2437
2438 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2439 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2440 {
2441 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2442 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2443 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2444 vcpu->arch.cputm_start = 0;
2445 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2446 }
2447
2448 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2449 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2450 {
2451 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2452 vcpu->arch.cputm_enabled = true;
2453 __start_cpu_timer_accounting(vcpu);
2454 }
2455
2456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2457 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2458 {
2459 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2460 __stop_cpu_timer_accounting(vcpu);
2461 vcpu->arch.cputm_enabled = false;
2462 }
2463
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2464 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2465 {
2466 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2467 __enable_cpu_timer_accounting(vcpu);
2468 preempt_enable();
2469 }
2470
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2471 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2472 {
2473 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2474 __disable_cpu_timer_accounting(vcpu);
2475 preempt_enable();
2476 }
2477
2478 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)2479 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2480 {
2481 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2482 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2483 if (vcpu->arch.cputm_enabled)
2484 vcpu->arch.cputm_start = get_tod_clock_fast();
2485 vcpu->arch.sie_block->cputm = cputm;
2486 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2487 preempt_enable();
2488 }
2489
2490 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)2491 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2492 {
2493 unsigned int seq;
2494 __u64 value;
2495
2496 if (unlikely(!vcpu->arch.cputm_enabled))
2497 return vcpu->arch.sie_block->cputm;
2498
2499 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2500 do {
2501 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2502 /*
2503 * If the writer would ever execute a read in the critical
2504 * section, e.g. in irq context, we have a deadlock.
2505 */
2506 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2507 value = vcpu->arch.sie_block->cputm;
2508 /* if cputm_start is 0, accounting is being started/stopped */
2509 if (likely(vcpu->arch.cputm_start))
2510 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2511 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2512 preempt_enable();
2513 return value;
2514 }
2515
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)2516 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2517 {
2518
2519 gmap_enable(vcpu->arch.enabled_gmap);
2520 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2521 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2522 __start_cpu_timer_accounting(vcpu);
2523 vcpu->cpu = cpu;
2524 }
2525
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)2526 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2527 {
2528 vcpu->cpu = -1;
2529 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2530 __stop_cpu_timer_accounting(vcpu);
2531 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2532 vcpu->arch.enabled_gmap = gmap_get_enabled();
2533 gmap_disable(vcpu->arch.enabled_gmap);
2534
2535 }
2536
kvm_s390_vcpu_initial_reset(struct kvm_vcpu * vcpu)2537 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2538 {
2539 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2540 vcpu->arch.sie_block->gpsw.mask = 0UL;
2541 vcpu->arch.sie_block->gpsw.addr = 0UL;
2542 kvm_s390_set_prefix(vcpu, 0);
2543 kvm_s390_set_cpu_timer(vcpu, 0);
2544 vcpu->arch.sie_block->ckc = 0UL;
2545 vcpu->arch.sie_block->todpr = 0;
2546 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2547 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2548 CR0_INTERRUPT_KEY_SUBMASK |
2549 CR0_MEASUREMENT_ALERT_SUBMASK;
2550 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2551 CR14_UNUSED_33 |
2552 CR14_EXTERNAL_DAMAGE_SUBMASK;
2553 /* make sure the new fpc will be lazily loaded */
2554 save_fpu_regs();
2555 current->thread.fpu.fpc = 0;
2556 vcpu->arch.sie_block->gbea = 1;
2557 vcpu->arch.sie_block->pp = 0;
2558 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2559 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2560 kvm_clear_async_pf_completion_queue(vcpu);
2561 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2562 kvm_s390_vcpu_stop(vcpu);
2563 kvm_s390_clear_local_irqs(vcpu);
2564 }
2565
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)2566 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2567 {
2568 mutex_lock(&vcpu->kvm->lock);
2569 preempt_disable();
2570 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2571 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2572 preempt_enable();
2573 mutex_unlock(&vcpu->kvm->lock);
2574 if (!kvm_is_ucontrol(vcpu->kvm)) {
2575 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2576 sca_add_vcpu(vcpu);
2577 }
2578 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2579 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2580 /* make vcpu_load load the right gmap on the first trigger */
2581 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2582 }
2583
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)2584 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2585 {
2586 if (!test_kvm_facility(vcpu->kvm, 76))
2587 return;
2588
2589 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2590
2591 if (vcpu->kvm->arch.crypto.aes_kw)
2592 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2593 if (vcpu->kvm->arch.crypto.dea_kw)
2594 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2595
2596 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2597 }
2598
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)2599 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2600 {
2601 free_page(vcpu->arch.sie_block->cbrlo);
2602 vcpu->arch.sie_block->cbrlo = 0;
2603 }
2604
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)2605 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2606 {
2607 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2608 if (!vcpu->arch.sie_block->cbrlo)
2609 return -ENOMEM;
2610 return 0;
2611 }
2612
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)2613 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2614 {
2615 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2616
2617 vcpu->arch.sie_block->ibc = model->ibc;
2618 if (test_kvm_facility(vcpu->kvm, 7))
2619 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2620 }
2621
kvm_arch_vcpu_setup(struct kvm_vcpu * vcpu)2622 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2623 {
2624 int rc = 0;
2625
2626 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2627 CPUSTAT_SM |
2628 CPUSTAT_STOPPED);
2629
2630 if (test_kvm_facility(vcpu->kvm, 78))
2631 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2632 else if (test_kvm_facility(vcpu->kvm, 8))
2633 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2634
2635 kvm_s390_vcpu_setup_model(vcpu);
2636
2637 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2638 if (MACHINE_HAS_ESOP)
2639 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2640 if (test_kvm_facility(vcpu->kvm, 9))
2641 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2642 if (test_kvm_facility(vcpu->kvm, 73))
2643 vcpu->arch.sie_block->ecb |= ECB_TE;
2644
2645 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2646 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2647 if (test_kvm_facility(vcpu->kvm, 130))
2648 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2649 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2650 if (sclp.has_cei)
2651 vcpu->arch.sie_block->eca |= ECA_CEI;
2652 if (sclp.has_ib)
2653 vcpu->arch.sie_block->eca |= ECA_IB;
2654 if (sclp.has_siif)
2655 vcpu->arch.sie_block->eca |= ECA_SII;
2656 if (sclp.has_sigpif)
2657 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2658 if (test_kvm_facility(vcpu->kvm, 129)) {
2659 vcpu->arch.sie_block->eca |= ECA_VX;
2660 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2661 }
2662 if (test_kvm_facility(vcpu->kvm, 139))
2663 vcpu->arch.sie_block->ecd |= ECD_MEF;
2664 if (test_kvm_facility(vcpu->kvm, 156))
2665 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2666 if (vcpu->arch.sie_block->gd) {
2667 vcpu->arch.sie_block->eca |= ECA_AIV;
2668 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2669 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2670 }
2671 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2672 | SDNXC;
2673 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2674
2675 if (sclp.has_kss)
2676 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2677 else
2678 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2679
2680 if (vcpu->kvm->arch.use_cmma) {
2681 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2682 if (rc)
2683 return rc;
2684 }
2685 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2686 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2687
2688 kvm_s390_vcpu_crypto_setup(vcpu);
2689
2690 return rc;
2691 }
2692
kvm_arch_vcpu_create(struct kvm * kvm,unsigned int id)2693 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2694 unsigned int id)
2695 {
2696 struct kvm_vcpu *vcpu;
2697 struct sie_page *sie_page;
2698 int rc = -EINVAL;
2699
2700 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2701 goto out;
2702
2703 rc = -ENOMEM;
2704
2705 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2706 if (!vcpu)
2707 goto out;
2708
2709 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2710 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2711 if (!sie_page)
2712 goto out_free_cpu;
2713
2714 vcpu->arch.sie_block = &sie_page->sie_block;
2715 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2716
2717 /* the real guest size will always be smaller than msl */
2718 vcpu->arch.sie_block->mso = 0;
2719 vcpu->arch.sie_block->msl = sclp.hamax;
2720
2721 vcpu->arch.sie_block->icpua = id;
2722 spin_lock_init(&vcpu->arch.local_int.lock);
2723 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2724 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2725 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2726 seqcount_init(&vcpu->arch.cputm_seqcount);
2727
2728 rc = kvm_vcpu_init(vcpu, kvm, id);
2729 if (rc)
2730 goto out_free_sie_block;
2731 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2732 vcpu->arch.sie_block);
2733 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2734
2735 return vcpu;
2736 out_free_sie_block:
2737 free_page((unsigned long)(vcpu->arch.sie_block));
2738 out_free_cpu:
2739 kmem_cache_free(kvm_vcpu_cache, vcpu);
2740 out:
2741 return ERR_PTR(rc);
2742 }
2743
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)2744 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2745 {
2746 return kvm_s390_vcpu_has_irq(vcpu, 0);
2747 }
2748
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)2749 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2750 {
2751 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2752 }
2753
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)2754 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2755 {
2756 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2757 exit_sie(vcpu);
2758 }
2759
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)2760 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2761 {
2762 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2763 }
2764
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)2765 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2766 {
2767 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2768 exit_sie(vcpu);
2769 }
2770
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)2771 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2772 {
2773 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2774 }
2775
2776 /*
2777 * Kick a guest cpu out of SIE and wait until SIE is not running.
2778 * If the CPU is not running (e.g. waiting as idle) the function will
2779 * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)2780 void exit_sie(struct kvm_vcpu *vcpu)
2781 {
2782 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2783 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2784 cpu_relax();
2785 }
2786
2787 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)2788 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2789 {
2790 kvm_make_request(req, vcpu);
2791 kvm_s390_vcpu_request(vcpu);
2792 }
2793
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)2794 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2795 unsigned long end)
2796 {
2797 struct kvm *kvm = gmap->private;
2798 struct kvm_vcpu *vcpu;
2799 unsigned long prefix;
2800 int i;
2801
2802 if (gmap_is_shadow(gmap))
2803 return;
2804 if (start >= 1UL << 31)
2805 /* We are only interested in prefix pages */
2806 return;
2807 kvm_for_each_vcpu(i, vcpu, kvm) {
2808 /* match against both prefix pages */
2809 prefix = kvm_s390_get_prefix(vcpu);
2810 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2811 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2812 start, end);
2813 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2814 }
2815 }
2816 }
2817
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)2818 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2819 {
2820 /* kvm common code refers to this, but never calls it */
2821 BUG();
2822 return 0;
2823 }
2824
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)2825 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2826 struct kvm_one_reg *reg)
2827 {
2828 int r = -EINVAL;
2829
2830 switch (reg->id) {
2831 case KVM_REG_S390_TODPR:
2832 r = put_user(vcpu->arch.sie_block->todpr,
2833 (u32 __user *)reg->addr);
2834 break;
2835 case KVM_REG_S390_EPOCHDIFF:
2836 r = put_user(vcpu->arch.sie_block->epoch,
2837 (u64 __user *)reg->addr);
2838 break;
2839 case KVM_REG_S390_CPU_TIMER:
2840 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2841 (u64 __user *)reg->addr);
2842 break;
2843 case KVM_REG_S390_CLOCK_COMP:
2844 r = put_user(vcpu->arch.sie_block->ckc,
2845 (u64 __user *)reg->addr);
2846 break;
2847 case KVM_REG_S390_PFTOKEN:
2848 r = put_user(vcpu->arch.pfault_token,
2849 (u64 __user *)reg->addr);
2850 break;
2851 case KVM_REG_S390_PFCOMPARE:
2852 r = put_user(vcpu->arch.pfault_compare,
2853 (u64 __user *)reg->addr);
2854 break;
2855 case KVM_REG_S390_PFSELECT:
2856 r = put_user(vcpu->arch.pfault_select,
2857 (u64 __user *)reg->addr);
2858 break;
2859 case KVM_REG_S390_PP:
2860 r = put_user(vcpu->arch.sie_block->pp,
2861 (u64 __user *)reg->addr);
2862 break;
2863 case KVM_REG_S390_GBEA:
2864 r = put_user(vcpu->arch.sie_block->gbea,
2865 (u64 __user *)reg->addr);
2866 break;
2867 default:
2868 break;
2869 }
2870
2871 return r;
2872 }
2873
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)2874 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2875 struct kvm_one_reg *reg)
2876 {
2877 int r = -EINVAL;
2878 __u64 val;
2879
2880 switch (reg->id) {
2881 case KVM_REG_S390_TODPR:
2882 r = get_user(vcpu->arch.sie_block->todpr,
2883 (u32 __user *)reg->addr);
2884 break;
2885 case KVM_REG_S390_EPOCHDIFF:
2886 r = get_user(vcpu->arch.sie_block->epoch,
2887 (u64 __user *)reg->addr);
2888 break;
2889 case KVM_REG_S390_CPU_TIMER:
2890 r = get_user(val, (u64 __user *)reg->addr);
2891 if (!r)
2892 kvm_s390_set_cpu_timer(vcpu, val);
2893 break;
2894 case KVM_REG_S390_CLOCK_COMP:
2895 r = get_user(vcpu->arch.sie_block->ckc,
2896 (u64 __user *)reg->addr);
2897 break;
2898 case KVM_REG_S390_PFTOKEN:
2899 r = get_user(vcpu->arch.pfault_token,
2900 (u64 __user *)reg->addr);
2901 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2902 kvm_clear_async_pf_completion_queue(vcpu);
2903 break;
2904 case KVM_REG_S390_PFCOMPARE:
2905 r = get_user(vcpu->arch.pfault_compare,
2906 (u64 __user *)reg->addr);
2907 break;
2908 case KVM_REG_S390_PFSELECT:
2909 r = get_user(vcpu->arch.pfault_select,
2910 (u64 __user *)reg->addr);
2911 break;
2912 case KVM_REG_S390_PP:
2913 r = get_user(vcpu->arch.sie_block->pp,
2914 (u64 __user *)reg->addr);
2915 break;
2916 case KVM_REG_S390_GBEA:
2917 r = get_user(vcpu->arch.sie_block->gbea,
2918 (u64 __user *)reg->addr);
2919 break;
2920 default:
2921 break;
2922 }
2923
2924 return r;
2925 }
2926
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)2927 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2928 {
2929 kvm_s390_vcpu_initial_reset(vcpu);
2930 return 0;
2931 }
2932
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)2933 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2934 {
2935 vcpu_load(vcpu);
2936 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2937 vcpu_put(vcpu);
2938 return 0;
2939 }
2940
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)2941 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2942 {
2943 vcpu_load(vcpu);
2944 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2945 vcpu_put(vcpu);
2946 return 0;
2947 }
2948
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)2949 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2950 struct kvm_sregs *sregs)
2951 {
2952 vcpu_load(vcpu);
2953
2954 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2955 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2956
2957 vcpu_put(vcpu);
2958 return 0;
2959 }
2960
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)2961 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2962 struct kvm_sregs *sregs)
2963 {
2964 vcpu_load(vcpu);
2965
2966 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2967 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2968
2969 vcpu_put(vcpu);
2970 return 0;
2971 }
2972
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)2973 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2974 {
2975 int ret = 0;
2976
2977 vcpu_load(vcpu);
2978
2979 if (test_fp_ctl(fpu->fpc)) {
2980 ret = -EINVAL;
2981 goto out;
2982 }
2983 vcpu->run->s.regs.fpc = fpu->fpc;
2984 if (MACHINE_HAS_VX)
2985 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2986 (freg_t *) fpu->fprs);
2987 else
2988 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2989
2990 out:
2991 vcpu_put(vcpu);
2992 return ret;
2993 }
2994
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)2995 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2996 {
2997 vcpu_load(vcpu);
2998
2999 /* make sure we have the latest values */
3000 save_fpu_regs();
3001 if (MACHINE_HAS_VX)
3002 convert_vx_to_fp((freg_t *) fpu->fprs,
3003 (__vector128 *) vcpu->run->s.regs.vrs);
3004 else
3005 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3006 fpu->fpc = vcpu->run->s.regs.fpc;
3007
3008 vcpu_put(vcpu);
3009 return 0;
3010 }
3011
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3012 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3013 {
3014 int rc = 0;
3015
3016 if (!is_vcpu_stopped(vcpu))
3017 rc = -EBUSY;
3018 else {
3019 vcpu->run->psw_mask = psw.mask;
3020 vcpu->run->psw_addr = psw.addr;
3021 }
3022 return rc;
3023 }
3024
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3025 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3026 struct kvm_translation *tr)
3027 {
3028 return -EINVAL; /* not implemented yet */
3029 }
3030
3031 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3032 KVM_GUESTDBG_USE_HW_BP | \
3033 KVM_GUESTDBG_ENABLE)
3034
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3035 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3036 struct kvm_guest_debug *dbg)
3037 {
3038 int rc = 0;
3039
3040 vcpu_load(vcpu);
3041
3042 vcpu->guest_debug = 0;
3043 kvm_s390_clear_bp_data(vcpu);
3044
3045 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3046 rc = -EINVAL;
3047 goto out;
3048 }
3049 if (!sclp.has_gpere) {
3050 rc = -EINVAL;
3051 goto out;
3052 }
3053
3054 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3055 vcpu->guest_debug = dbg->control;
3056 /* enforce guest PER */
3057 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3058
3059 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3060 rc = kvm_s390_import_bp_data(vcpu, dbg);
3061 } else {
3062 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3063 vcpu->arch.guestdbg.last_bp = 0;
3064 }
3065
3066 if (rc) {
3067 vcpu->guest_debug = 0;
3068 kvm_s390_clear_bp_data(vcpu);
3069 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3070 }
3071
3072 out:
3073 vcpu_put(vcpu);
3074 return rc;
3075 }
3076
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3077 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3078 struct kvm_mp_state *mp_state)
3079 {
3080 int ret;
3081
3082 vcpu_load(vcpu);
3083
3084 /* CHECK_STOP and LOAD are not supported yet */
3085 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3086 KVM_MP_STATE_OPERATING;
3087
3088 vcpu_put(vcpu);
3089 return ret;
3090 }
3091
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3092 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3093 struct kvm_mp_state *mp_state)
3094 {
3095 int rc = 0;
3096
3097 vcpu_load(vcpu);
3098
3099 /* user space knows about this interface - let it control the state */
3100 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3101
3102 switch (mp_state->mp_state) {
3103 case KVM_MP_STATE_STOPPED:
3104 kvm_s390_vcpu_stop(vcpu);
3105 break;
3106 case KVM_MP_STATE_OPERATING:
3107 kvm_s390_vcpu_start(vcpu);
3108 break;
3109 case KVM_MP_STATE_LOAD:
3110 case KVM_MP_STATE_CHECK_STOP:
3111 /* fall through - CHECK_STOP and LOAD are not supported yet */
3112 default:
3113 rc = -ENXIO;
3114 }
3115
3116 vcpu_put(vcpu);
3117 return rc;
3118 }
3119
ibs_enabled(struct kvm_vcpu * vcpu)3120 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3121 {
3122 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3123 }
3124
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3125 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3126 {
3127 retry:
3128 kvm_s390_vcpu_request_handled(vcpu);
3129 if (!kvm_request_pending(vcpu))
3130 return 0;
3131 /*
3132 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3133 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3134 * This ensures that the ipte instruction for this request has
3135 * already finished. We might race against a second unmapper that
3136 * wants to set the blocking bit. Lets just retry the request loop.
3137 */
3138 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3139 int rc;
3140 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3141 kvm_s390_get_prefix(vcpu),
3142 PAGE_SIZE * 2, PROT_WRITE);
3143 if (rc) {
3144 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3145 return rc;
3146 }
3147 goto retry;
3148 }
3149
3150 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3151 vcpu->arch.sie_block->ihcpu = 0xffff;
3152 goto retry;
3153 }
3154
3155 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3156 if (!ibs_enabled(vcpu)) {
3157 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3158 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3159 }
3160 goto retry;
3161 }
3162
3163 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3164 if (ibs_enabled(vcpu)) {
3165 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3166 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3167 }
3168 goto retry;
3169 }
3170
3171 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3172 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3173 goto retry;
3174 }
3175
3176 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3177 /*
3178 * Disable CMM virtualization; we will emulate the ESSA
3179 * instruction manually, in order to provide additional
3180 * functionalities needed for live migration.
3181 */
3182 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3183 goto retry;
3184 }
3185
3186 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3187 /*
3188 * Re-enable CMM virtualization if CMMA is available and
3189 * CMM has been used.
3190 */
3191 if ((vcpu->kvm->arch.use_cmma) &&
3192 (vcpu->kvm->mm->context.uses_cmm))
3193 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3194 goto retry;
3195 }
3196
3197 /* nothing to do, just clear the request */
3198 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3199
3200 return 0;
3201 }
3202
kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3203 void kvm_s390_set_tod_clock(struct kvm *kvm,
3204 const struct kvm_s390_vm_tod_clock *gtod)
3205 {
3206 struct kvm_vcpu *vcpu;
3207 struct kvm_s390_tod_clock_ext htod;
3208 int i;
3209
3210 mutex_lock(&kvm->lock);
3211 preempt_disable();
3212
3213 get_tod_clock_ext((char *)&htod);
3214
3215 kvm->arch.epoch = gtod->tod - htod.tod;
3216 kvm->arch.epdx = 0;
3217 if (test_kvm_facility(kvm, 139)) {
3218 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3219 if (kvm->arch.epoch > gtod->tod)
3220 kvm->arch.epdx -= 1;
3221 }
3222
3223 kvm_s390_vcpu_block_all(kvm);
3224 kvm_for_each_vcpu(i, vcpu, kvm) {
3225 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3226 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3227 }
3228
3229 kvm_s390_vcpu_unblock_all(kvm);
3230 preempt_enable();
3231 mutex_unlock(&kvm->lock);
3232 }
3233
3234 /**
3235 * kvm_arch_fault_in_page - fault-in guest page if necessary
3236 * @vcpu: The corresponding virtual cpu
3237 * @gpa: Guest physical address
3238 * @writable: Whether the page should be writable or not
3239 *
3240 * Make sure that a guest page has been faulted-in on the host.
3241 *
3242 * Return: Zero on success, negative error code otherwise.
3243 */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3244 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3245 {
3246 return gmap_fault(vcpu->arch.gmap, gpa,
3247 writable ? FAULT_FLAG_WRITE : 0);
3248 }
3249
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3250 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3251 unsigned long token)
3252 {
3253 struct kvm_s390_interrupt inti;
3254 struct kvm_s390_irq irq;
3255
3256 if (start_token) {
3257 irq.u.ext.ext_params2 = token;
3258 irq.type = KVM_S390_INT_PFAULT_INIT;
3259 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3260 } else {
3261 inti.type = KVM_S390_INT_PFAULT_DONE;
3262 inti.parm64 = token;
3263 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3264 }
3265 }
3266
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3267 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3268 struct kvm_async_pf *work)
3269 {
3270 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3271 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3272 }
3273
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3274 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3275 struct kvm_async_pf *work)
3276 {
3277 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3278 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3279 }
3280
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3281 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3282 struct kvm_async_pf *work)
3283 {
3284 /* s390 will always inject the page directly */
3285 }
3286
kvm_arch_can_inject_async_page_present(struct kvm_vcpu * vcpu)3287 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3288 {
3289 /*
3290 * s390 will always inject the page directly,
3291 * but we still want check_async_completion to cleanup
3292 */
3293 return true;
3294 }
3295
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3296 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3297 {
3298 hva_t hva;
3299 struct kvm_arch_async_pf arch;
3300 int rc;
3301
3302 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3303 return 0;
3304 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3305 vcpu->arch.pfault_compare)
3306 return 0;
3307 if (psw_extint_disabled(vcpu))
3308 return 0;
3309 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3310 return 0;
3311 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3312 return 0;
3313 if (!vcpu->arch.gmap->pfault_enabled)
3314 return 0;
3315
3316 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3317 hva += current->thread.gmap_addr & ~PAGE_MASK;
3318 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3319 return 0;
3320
3321 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3322 return rc;
3323 }
3324
vcpu_pre_run(struct kvm_vcpu * vcpu)3325 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3326 {
3327 int rc, cpuflags;
3328
3329 /*
3330 * On s390 notifications for arriving pages will be delivered directly
3331 * to the guest but the house keeping for completed pfaults is
3332 * handled outside the worker.
3333 */
3334 kvm_check_async_pf_completion(vcpu);
3335
3336 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3337 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3338
3339 if (need_resched())
3340 schedule();
3341
3342 if (test_cpu_flag(CIF_MCCK_PENDING))
3343 s390_handle_mcck();
3344
3345 if (!kvm_is_ucontrol(vcpu->kvm)) {
3346 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3347 if (rc)
3348 return rc;
3349 }
3350
3351 rc = kvm_s390_handle_requests(vcpu);
3352 if (rc)
3353 return rc;
3354
3355 if (guestdbg_enabled(vcpu)) {
3356 kvm_s390_backup_guest_per_regs(vcpu);
3357 kvm_s390_patch_guest_per_regs(vcpu);
3358 }
3359
3360 vcpu->arch.sie_block->icptcode = 0;
3361 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3362 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3363 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3364
3365 return 0;
3366 }
3367
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)3368 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3369 {
3370 struct kvm_s390_pgm_info pgm_info = {
3371 .code = PGM_ADDRESSING,
3372 };
3373 u8 opcode, ilen;
3374 int rc;
3375
3376 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3377 trace_kvm_s390_sie_fault(vcpu);
3378
3379 /*
3380 * We want to inject an addressing exception, which is defined as a
3381 * suppressing or terminating exception. However, since we came here
3382 * by a DAT access exception, the PSW still points to the faulting
3383 * instruction since DAT exceptions are nullifying. So we've got
3384 * to look up the current opcode to get the length of the instruction
3385 * to be able to forward the PSW.
3386 */
3387 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3388 ilen = insn_length(opcode);
3389 if (rc < 0) {
3390 return rc;
3391 } else if (rc) {
3392 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3393 * Forward by arbitrary ilc, injection will take care of
3394 * nullification if necessary.
3395 */
3396 pgm_info = vcpu->arch.pgm;
3397 ilen = 4;
3398 }
3399 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3400 kvm_s390_forward_psw(vcpu, ilen);
3401 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3402 }
3403
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)3404 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3405 {
3406 struct mcck_volatile_info *mcck_info;
3407 struct sie_page *sie_page;
3408
3409 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3410 vcpu->arch.sie_block->icptcode);
3411 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3412
3413 if (guestdbg_enabled(vcpu))
3414 kvm_s390_restore_guest_per_regs(vcpu);
3415
3416 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3417 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3418
3419 if (exit_reason == -EINTR) {
3420 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3421 sie_page = container_of(vcpu->arch.sie_block,
3422 struct sie_page, sie_block);
3423 mcck_info = &sie_page->mcck_info;
3424 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3425 return 0;
3426 }
3427
3428 if (vcpu->arch.sie_block->icptcode > 0) {
3429 int rc = kvm_handle_sie_intercept(vcpu);
3430
3431 if (rc != -EOPNOTSUPP)
3432 return rc;
3433 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3434 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3435 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3436 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3437 return -EREMOTE;
3438 } else if (exit_reason != -EFAULT) {
3439 vcpu->stat.exit_null++;
3440 return 0;
3441 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3442 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3443 vcpu->run->s390_ucontrol.trans_exc_code =
3444 current->thread.gmap_addr;
3445 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3446 return -EREMOTE;
3447 } else if (current->thread.gmap_pfault) {
3448 trace_kvm_s390_major_guest_pfault(vcpu);
3449 current->thread.gmap_pfault = 0;
3450 if (kvm_arch_setup_async_pf(vcpu))
3451 return 0;
3452 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3453 }
3454 return vcpu_post_run_fault_in_sie(vcpu);
3455 }
3456
__vcpu_run(struct kvm_vcpu * vcpu)3457 static int __vcpu_run(struct kvm_vcpu *vcpu)
3458 {
3459 int rc, exit_reason;
3460
3461 /*
3462 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3463 * ning the guest), so that memslots (and other stuff) are protected
3464 */
3465 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3466
3467 do {
3468 rc = vcpu_pre_run(vcpu);
3469 if (rc)
3470 break;
3471
3472 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3473 /*
3474 * As PF_VCPU will be used in fault handler, between
3475 * guest_enter and guest_exit should be no uaccess.
3476 */
3477 local_irq_disable();
3478 guest_enter_irqoff();
3479 __disable_cpu_timer_accounting(vcpu);
3480 local_irq_enable();
3481 exit_reason = sie64a(vcpu->arch.sie_block,
3482 vcpu->run->s.regs.gprs);
3483 local_irq_disable();
3484 __enable_cpu_timer_accounting(vcpu);
3485 guest_exit_irqoff();
3486 local_irq_enable();
3487 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3488
3489 rc = vcpu_post_run(vcpu, exit_reason);
3490 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3491
3492 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3493 return rc;
3494 }
3495
sync_regs(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3496 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3497 {
3498 struct runtime_instr_cb *riccb;
3499 struct gs_cb *gscb;
3500
3501 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3502 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3503 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3504 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3505 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3506 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3507 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3508 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3509 /* some control register changes require a tlb flush */
3510 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3511 }
3512 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3513 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3514 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3515 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3516 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3517 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3518 }
3519 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3520 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3521 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3522 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3523 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3524 kvm_clear_async_pf_completion_queue(vcpu);
3525 }
3526 /*
3527 * If userspace sets the riccb (e.g. after migration) to a valid state,
3528 * we should enable RI here instead of doing the lazy enablement.
3529 */
3530 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3531 test_kvm_facility(vcpu->kvm, 64) &&
3532 riccb->v &&
3533 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3534 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3535 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3536 }
3537 /*
3538 * If userspace sets the gscb (e.g. after migration) to non-zero,
3539 * we should enable GS here instead of doing the lazy enablement.
3540 */
3541 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3542 test_kvm_facility(vcpu->kvm, 133) &&
3543 gscb->gssm &&
3544 !vcpu->arch.gs_enabled) {
3545 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3546 vcpu->arch.sie_block->ecb |= ECB_GS;
3547 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3548 vcpu->arch.gs_enabled = 1;
3549 }
3550 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3551 test_kvm_facility(vcpu->kvm, 82)) {
3552 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3553 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3554 }
3555 save_access_regs(vcpu->arch.host_acrs);
3556 restore_access_regs(vcpu->run->s.regs.acrs);
3557 /* save host (userspace) fprs/vrs */
3558 save_fpu_regs();
3559 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3560 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3561 if (MACHINE_HAS_VX)
3562 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3563 else
3564 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3565 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3566 if (test_fp_ctl(current->thread.fpu.fpc))
3567 /* User space provided an invalid FPC, let's clear it */
3568 current->thread.fpu.fpc = 0;
3569 if (MACHINE_HAS_GS) {
3570 preempt_disable();
3571 __ctl_set_bit(2, 4);
3572 if (current->thread.gs_cb) {
3573 vcpu->arch.host_gscb = current->thread.gs_cb;
3574 save_gs_cb(vcpu->arch.host_gscb);
3575 }
3576 if (vcpu->arch.gs_enabled) {
3577 current->thread.gs_cb = (struct gs_cb *)
3578 &vcpu->run->s.regs.gscb;
3579 restore_gs_cb(current->thread.gs_cb);
3580 }
3581 preempt_enable();
3582 }
3583 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3584
3585 kvm_run->kvm_dirty_regs = 0;
3586 }
3587
store_regs(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3588 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3589 {
3590 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3591 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3592 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3593 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3594 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3595 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3596 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3597 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3598 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3599 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3600 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3601 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3602 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3603 save_access_regs(vcpu->run->s.regs.acrs);
3604 restore_access_regs(vcpu->arch.host_acrs);
3605 /* Save guest register state */
3606 save_fpu_regs();
3607 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3608 /* Restore will be done lazily at return */
3609 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3610 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3611 if (MACHINE_HAS_GS) {
3612 __ctl_set_bit(2, 4);
3613 if (vcpu->arch.gs_enabled)
3614 save_gs_cb(current->thread.gs_cb);
3615 preempt_disable();
3616 current->thread.gs_cb = vcpu->arch.host_gscb;
3617 restore_gs_cb(vcpu->arch.host_gscb);
3618 preempt_enable();
3619 if (!vcpu->arch.host_gscb)
3620 __ctl_clear_bit(2, 4);
3621 vcpu->arch.host_gscb = NULL;
3622 }
3623 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3624 }
3625
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3626 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3627 {
3628 int rc;
3629
3630 if (kvm_run->immediate_exit)
3631 return -EINTR;
3632
3633 vcpu_load(vcpu);
3634
3635 if (guestdbg_exit_pending(vcpu)) {
3636 kvm_s390_prepare_debug_exit(vcpu);
3637 rc = 0;
3638 goto out;
3639 }
3640
3641 kvm_sigset_activate(vcpu);
3642
3643 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3644 kvm_s390_vcpu_start(vcpu);
3645 } else if (is_vcpu_stopped(vcpu)) {
3646 pr_err_ratelimited("can't run stopped vcpu %d\n",
3647 vcpu->vcpu_id);
3648 rc = -EINVAL;
3649 goto out;
3650 }
3651
3652 sync_regs(vcpu, kvm_run);
3653 enable_cpu_timer_accounting(vcpu);
3654
3655 might_fault();
3656 rc = __vcpu_run(vcpu);
3657
3658 if (signal_pending(current) && !rc) {
3659 kvm_run->exit_reason = KVM_EXIT_INTR;
3660 rc = -EINTR;
3661 }
3662
3663 if (guestdbg_exit_pending(vcpu) && !rc) {
3664 kvm_s390_prepare_debug_exit(vcpu);
3665 rc = 0;
3666 }
3667
3668 if (rc == -EREMOTE) {
3669 /* userspace support is needed, kvm_run has been prepared */
3670 rc = 0;
3671 }
3672
3673 disable_cpu_timer_accounting(vcpu);
3674 store_regs(vcpu, kvm_run);
3675
3676 kvm_sigset_deactivate(vcpu);
3677
3678 vcpu->stat.exit_userspace++;
3679 out:
3680 vcpu_put(vcpu);
3681 return rc;
3682 }
3683
3684 /*
3685 * store status at address
3686 * we use have two special cases:
3687 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3688 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3689 */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)3690 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3691 {
3692 unsigned char archmode = 1;
3693 freg_t fprs[NUM_FPRS];
3694 unsigned int px;
3695 u64 clkcomp, cputm;
3696 int rc;
3697
3698 px = kvm_s390_get_prefix(vcpu);
3699 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3700 if (write_guest_abs(vcpu, 163, &archmode, 1))
3701 return -EFAULT;
3702 gpa = 0;
3703 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3704 if (write_guest_real(vcpu, 163, &archmode, 1))
3705 return -EFAULT;
3706 gpa = px;
3707 } else
3708 gpa -= __LC_FPREGS_SAVE_AREA;
3709
3710 /* manually convert vector registers if necessary */
3711 if (MACHINE_HAS_VX) {
3712 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3713 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3714 fprs, 128);
3715 } else {
3716 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3717 vcpu->run->s.regs.fprs, 128);
3718 }
3719 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3720 vcpu->run->s.regs.gprs, 128);
3721 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3722 &vcpu->arch.sie_block->gpsw, 16);
3723 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3724 &px, 4);
3725 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3726 &vcpu->run->s.regs.fpc, 4);
3727 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3728 &vcpu->arch.sie_block->todpr, 4);
3729 cputm = kvm_s390_get_cpu_timer(vcpu);
3730 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3731 &cputm, 8);
3732 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3733 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3734 &clkcomp, 8);
3735 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3736 &vcpu->run->s.regs.acrs, 64);
3737 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3738 &vcpu->arch.sie_block->gcr, 128);
3739 return rc ? -EFAULT : 0;
3740 }
3741
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)3742 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3743 {
3744 /*
3745 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3746 * switch in the run ioctl. Let's update our copies before we save
3747 * it into the save area
3748 */
3749 save_fpu_regs();
3750 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3751 save_access_regs(vcpu->run->s.regs.acrs);
3752
3753 return kvm_s390_store_status_unloaded(vcpu, addr);
3754 }
3755
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)3756 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3757 {
3758 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3759 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3760 }
3761
__disable_ibs_on_all_vcpus(struct kvm * kvm)3762 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3763 {
3764 unsigned int i;
3765 struct kvm_vcpu *vcpu;
3766
3767 kvm_for_each_vcpu(i, vcpu, kvm) {
3768 __disable_ibs_on_vcpu(vcpu);
3769 }
3770 }
3771
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)3772 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3773 {
3774 if (!sclp.has_ibs)
3775 return;
3776 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3777 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3778 }
3779
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)3780 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3781 {
3782 int i, online_vcpus, started_vcpus = 0;
3783
3784 if (!is_vcpu_stopped(vcpu))
3785 return;
3786
3787 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3788 /* Only one cpu at a time may enter/leave the STOPPED state. */
3789 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3790 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3791
3792 for (i = 0; i < online_vcpus; i++) {
3793 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3794 started_vcpus++;
3795 }
3796
3797 if (started_vcpus == 0) {
3798 /* we're the only active VCPU -> speed it up */
3799 __enable_ibs_on_vcpu(vcpu);
3800 } else if (started_vcpus == 1) {
3801 /*
3802 * As we are starting a second VCPU, we have to disable
3803 * the IBS facility on all VCPUs to remove potentially
3804 * oustanding ENABLE requests.
3805 */
3806 __disable_ibs_on_all_vcpus(vcpu->kvm);
3807 }
3808
3809 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3810 /*
3811 * Another VCPU might have used IBS while we were offline.
3812 * Let's play safe and flush the VCPU at startup.
3813 */
3814 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3815 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3816 return;
3817 }
3818
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)3819 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3820 {
3821 int i, online_vcpus, started_vcpus = 0;
3822 struct kvm_vcpu *started_vcpu = NULL;
3823
3824 if (is_vcpu_stopped(vcpu))
3825 return;
3826
3827 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3828 /* Only one cpu at a time may enter/leave the STOPPED state. */
3829 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3830 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3831
3832 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3833 kvm_s390_clear_stop_irq(vcpu);
3834
3835 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3836 __disable_ibs_on_vcpu(vcpu);
3837
3838 for (i = 0; i < online_vcpus; i++) {
3839 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3840 started_vcpus++;
3841 started_vcpu = vcpu->kvm->vcpus[i];
3842 }
3843 }
3844
3845 if (started_vcpus == 1) {
3846 /*
3847 * As we only have one VCPU left, we want to enable the
3848 * IBS facility for that VCPU to speed it up.
3849 */
3850 __enable_ibs_on_vcpu(started_vcpu);
3851 }
3852
3853 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3854 return;
3855 }
3856
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)3857 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3858 struct kvm_enable_cap *cap)
3859 {
3860 int r;
3861
3862 if (cap->flags)
3863 return -EINVAL;
3864
3865 switch (cap->cap) {
3866 case KVM_CAP_S390_CSS_SUPPORT:
3867 if (!vcpu->kvm->arch.css_support) {
3868 vcpu->kvm->arch.css_support = 1;
3869 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3870 trace_kvm_s390_enable_css(vcpu->kvm);
3871 }
3872 r = 0;
3873 break;
3874 default:
3875 r = -EINVAL;
3876 break;
3877 }
3878 return r;
3879 }
3880
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)3881 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3882 struct kvm_s390_mem_op *mop)
3883 {
3884 void __user *uaddr = (void __user *)mop->buf;
3885 void *tmpbuf = NULL;
3886 int r, srcu_idx;
3887 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3888 | KVM_S390_MEMOP_F_CHECK_ONLY;
3889
3890 if (mop->flags & ~supported_flags)
3891 return -EINVAL;
3892
3893 if (mop->size > MEM_OP_MAX_SIZE)
3894 return -E2BIG;
3895
3896 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3897 tmpbuf = vmalloc(mop->size);
3898 if (!tmpbuf)
3899 return -ENOMEM;
3900 }
3901
3902 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3903
3904 switch (mop->op) {
3905 case KVM_S390_MEMOP_LOGICAL_READ:
3906 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3907 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3908 mop->size, GACC_FETCH);
3909 break;
3910 }
3911 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3912 if (r == 0) {
3913 if (copy_to_user(uaddr, tmpbuf, mop->size))
3914 r = -EFAULT;
3915 }
3916 break;
3917 case KVM_S390_MEMOP_LOGICAL_WRITE:
3918 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3919 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3920 mop->size, GACC_STORE);
3921 break;
3922 }
3923 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3924 r = -EFAULT;
3925 break;
3926 }
3927 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3928 break;
3929 default:
3930 r = -EINVAL;
3931 }
3932
3933 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3934
3935 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3936 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3937
3938 vfree(tmpbuf);
3939 return r;
3940 }
3941
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)3942 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3943 unsigned int ioctl, unsigned long arg)
3944 {
3945 struct kvm_vcpu *vcpu = filp->private_data;
3946 void __user *argp = (void __user *)arg;
3947
3948 switch (ioctl) {
3949 case KVM_S390_IRQ: {
3950 struct kvm_s390_irq s390irq;
3951
3952 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3953 return -EFAULT;
3954 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3955 }
3956 case KVM_S390_INTERRUPT: {
3957 struct kvm_s390_interrupt s390int;
3958 struct kvm_s390_irq s390irq;
3959
3960 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3961 return -EFAULT;
3962 if (s390int_to_s390irq(&s390int, &s390irq))
3963 return -EINVAL;
3964 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3965 }
3966 }
3967 return -ENOIOCTLCMD;
3968 }
3969
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)3970 long kvm_arch_vcpu_ioctl(struct file *filp,
3971 unsigned int ioctl, unsigned long arg)
3972 {
3973 struct kvm_vcpu *vcpu = filp->private_data;
3974 void __user *argp = (void __user *)arg;
3975 int idx;
3976 long r;
3977
3978 vcpu_load(vcpu);
3979
3980 switch (ioctl) {
3981 case KVM_S390_STORE_STATUS:
3982 idx = srcu_read_lock(&vcpu->kvm->srcu);
3983 r = kvm_s390_vcpu_store_status(vcpu, arg);
3984 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3985 break;
3986 case KVM_S390_SET_INITIAL_PSW: {
3987 psw_t psw;
3988
3989 r = -EFAULT;
3990 if (copy_from_user(&psw, argp, sizeof(psw)))
3991 break;
3992 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3993 break;
3994 }
3995 case KVM_S390_INITIAL_RESET:
3996 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3997 break;
3998 case KVM_SET_ONE_REG:
3999 case KVM_GET_ONE_REG: {
4000 struct kvm_one_reg reg;
4001 r = -EFAULT;
4002 if (copy_from_user(®, argp, sizeof(reg)))
4003 break;
4004 if (ioctl == KVM_SET_ONE_REG)
4005 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4006 else
4007 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4008 break;
4009 }
4010 #ifdef CONFIG_KVM_S390_UCONTROL
4011 case KVM_S390_UCAS_MAP: {
4012 struct kvm_s390_ucas_mapping ucasmap;
4013
4014 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4015 r = -EFAULT;
4016 break;
4017 }
4018
4019 if (!kvm_is_ucontrol(vcpu->kvm)) {
4020 r = -EINVAL;
4021 break;
4022 }
4023
4024 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4025 ucasmap.vcpu_addr, ucasmap.length);
4026 break;
4027 }
4028 case KVM_S390_UCAS_UNMAP: {
4029 struct kvm_s390_ucas_mapping ucasmap;
4030
4031 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4032 r = -EFAULT;
4033 break;
4034 }
4035
4036 if (!kvm_is_ucontrol(vcpu->kvm)) {
4037 r = -EINVAL;
4038 break;
4039 }
4040
4041 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4042 ucasmap.length);
4043 break;
4044 }
4045 #endif
4046 case KVM_S390_VCPU_FAULT: {
4047 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4048 break;
4049 }
4050 case KVM_ENABLE_CAP:
4051 {
4052 struct kvm_enable_cap cap;
4053 r = -EFAULT;
4054 if (copy_from_user(&cap, argp, sizeof(cap)))
4055 break;
4056 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4057 break;
4058 }
4059 case KVM_S390_MEM_OP: {
4060 struct kvm_s390_mem_op mem_op;
4061
4062 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4063 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4064 else
4065 r = -EFAULT;
4066 break;
4067 }
4068 case KVM_S390_SET_IRQ_STATE: {
4069 struct kvm_s390_irq_state irq_state;
4070
4071 r = -EFAULT;
4072 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4073 break;
4074 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4075 irq_state.len == 0 ||
4076 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4077 r = -EINVAL;
4078 break;
4079 }
4080 /* do not use irq_state.flags, it will break old QEMUs */
4081 r = kvm_s390_set_irq_state(vcpu,
4082 (void __user *) irq_state.buf,
4083 irq_state.len);
4084 break;
4085 }
4086 case KVM_S390_GET_IRQ_STATE: {
4087 struct kvm_s390_irq_state irq_state;
4088
4089 r = -EFAULT;
4090 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4091 break;
4092 if (irq_state.len == 0) {
4093 r = -EINVAL;
4094 break;
4095 }
4096 /* do not use irq_state.flags, it will break old QEMUs */
4097 r = kvm_s390_get_irq_state(vcpu,
4098 (__u8 __user *) irq_state.buf,
4099 irq_state.len);
4100 break;
4101 }
4102 default:
4103 r = -ENOTTY;
4104 }
4105
4106 vcpu_put(vcpu);
4107 return r;
4108 }
4109
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4110 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4111 {
4112 #ifdef CONFIG_KVM_S390_UCONTROL
4113 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4114 && (kvm_is_ucontrol(vcpu->kvm))) {
4115 vmf->page = virt_to_page(vcpu->arch.sie_block);
4116 get_page(vmf->page);
4117 return 0;
4118 }
4119 #endif
4120 return VM_FAULT_SIGBUS;
4121 }
4122
kvm_arch_create_memslot(struct kvm * kvm,struct kvm_memory_slot * slot,unsigned long npages)4123 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4124 unsigned long npages)
4125 {
4126 return 0;
4127 }
4128
4129 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4130 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4131 struct kvm_memory_slot *memslot,
4132 const struct kvm_userspace_memory_region *mem,
4133 enum kvm_mr_change change)
4134 {
4135 /* A few sanity checks. We can have memory slots which have to be
4136 located/ended at a segment boundary (1MB). The memory in userland is
4137 ok to be fragmented into various different vmas. It is okay to mmap()
4138 and munmap() stuff in this slot after doing this call at any time */
4139
4140 if (mem->userspace_addr & 0xffffful)
4141 return -EINVAL;
4142
4143 if (mem->memory_size & 0xffffful)
4144 return -EINVAL;
4145
4146 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4147 return -EINVAL;
4148
4149 return 0;
4150 }
4151
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,const struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)4152 void kvm_arch_commit_memory_region(struct kvm *kvm,
4153 const struct kvm_userspace_memory_region *mem,
4154 const struct kvm_memory_slot *old,
4155 const struct kvm_memory_slot *new,
4156 enum kvm_mr_change change)
4157 {
4158 int rc;
4159
4160 /* If the basics of the memslot do not change, we do not want
4161 * to update the gmap. Every update causes several unnecessary
4162 * segment translation exceptions. This is usually handled just
4163 * fine by the normal fault handler + gmap, but it will also
4164 * cause faults on the prefix page of running guest CPUs.
4165 */
4166 if (old->userspace_addr == mem->userspace_addr &&
4167 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4168 old->npages * PAGE_SIZE == mem->memory_size)
4169 return;
4170
4171 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4172 mem->guest_phys_addr, mem->memory_size);
4173 if (rc)
4174 pr_warn("failed to commit memory region\n");
4175 return;
4176 }
4177
nonhyp_mask(int i)4178 static inline unsigned long nonhyp_mask(int i)
4179 {
4180 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4181
4182 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4183 }
4184
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)4185 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4186 {
4187 vcpu->valid_wakeup = false;
4188 }
4189
kvm_s390_init(void)4190 static int __init kvm_s390_init(void)
4191 {
4192 int i;
4193
4194 if (!sclp.has_sief2) {
4195 pr_info("SIE not available\n");
4196 return -ENODEV;
4197 }
4198
4199 if (nested && hpage) {
4200 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4201 return -EINVAL;
4202 }
4203
4204 for (i = 0; i < 16; i++)
4205 kvm_s390_fac_base[i] |=
4206 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4207
4208 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4209 }
4210
kvm_s390_exit(void)4211 static void __exit kvm_s390_exit(void)
4212 {
4213 kvm_exit();
4214 }
4215
4216 module_init(kvm_s390_init);
4217 module_exit(kvm_s390_exit);
4218
4219 /*
4220 * Enable autoloading of the kvm module.
4221 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4222 * since x86 takes a different approach.
4223 */
4224 #include <linux/miscdevice.h>
4225 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4226 MODULE_ALIAS("devname:kvm");
4227