1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 	{ "deliver_program", VCPU_STAT(deliver_program) },
93 	{ "deliver_io", VCPU_STAT(deliver_io) },
94 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
97 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
98 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
99 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 	{ "inject_io", VM_STAT(inject_io) },
102 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
103 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 	{ "inject_program", VCPU_STAT(inject_program) },
105 	{ "inject_restart", VCPU_STAT(inject_restart) },
106 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
107 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 	{ "inject_virtio", VM_STAT(inject_virtio) },
111 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
113 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
120 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
122 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
124 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
125 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
126 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
128 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
130 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
133 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
138 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
161 	{ NULL }
162 };
163 
164 struct kvm_s390_tod_clock_ext {
165 	__u8 epoch_idx;
166 	__u64 tod;
167 	__u8 reserved[7];
168 } __packed;
169 
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174 
175 /* allow 1m huge page guest backing, if !nested */
176 static int hpage;
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179 
180 /*
181  * For now we handle at most 16 double words as this is what the s390 base
182  * kernel handles and stores in the prefix page. If we ever need to go beyond
183  * this, this requires changes to code, but the external uapi can stay.
184  */
185 #define SIZE_INTERNAL 16
186 
187 /*
188  * Base feature mask that defines default mask for facilities. Consists of the
189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
190  */
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
192 /*
193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194  * and defines the facilities that can be enabled via a cpu model.
195  */
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
197 
kvm_s390_fac_size(void)198 static unsigned long kvm_s390_fac_size(void)
199 {
200 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203 		sizeof(S390_lowcore.stfle_fac_list));
204 
205 	return SIZE_INTERNAL;
206 }
207 
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
212 
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
216 
217 /* Section: not file related */
kvm_arch_hardware_enable(void)218 int kvm_arch_hardware_enable(void)
219 {
220 	/* every s390 is virtualization enabled ;-) */
221 	return 0;
222 }
223 
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
225 			      unsigned long end);
226 
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 {
229 	u8 delta_idx = 0;
230 
231 	/*
232 	 * The TOD jumps by delta, we have to compensate this by adding
233 	 * -delta to the epoch.
234 	 */
235 	delta = -delta;
236 
237 	/* sign-extension - we're adding to signed values below */
238 	if ((s64)delta < 0)
239 		delta_idx = -1;
240 
241 	scb->epoch += delta;
242 	if (scb->ecd & ECD_MEF) {
243 		scb->epdx += delta_idx;
244 		if (scb->epoch < delta)
245 			scb->epdx += 1;
246 	}
247 }
248 
249 /*
250  * This callback is executed during stop_machine(). All CPUs are therefore
251  * temporarily stopped. In order not to change guest behavior, we have to
252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253  * so a CPU won't be stopped while calculating with the epoch.
254  */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
256 			  void *v)
257 {
258 	struct kvm *kvm;
259 	struct kvm_vcpu *vcpu;
260 	int i;
261 	unsigned long long *delta = v;
262 
263 	list_for_each_entry(kvm, &vm_list, vm_list) {
264 		kvm_for_each_vcpu(i, vcpu, kvm) {
265 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
266 			if (i == 0) {
267 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
269 			}
270 			if (vcpu->arch.cputm_enabled)
271 				vcpu->arch.cputm_start += *delta;
272 			if (vcpu->arch.vsie_block)
273 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
274 						   *delta);
275 		}
276 	}
277 	return NOTIFY_OK;
278 }
279 
280 static struct notifier_block kvm_clock_notifier = {
281 	.notifier_call = kvm_clock_sync,
282 };
283 
kvm_arch_hardware_setup(void)284 int kvm_arch_hardware_setup(void)
285 {
286 	gmap_notifier.notifier_call = kvm_gmap_notifier;
287 	gmap_register_pte_notifier(&gmap_notifier);
288 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289 	gmap_register_pte_notifier(&vsie_gmap_notifier);
290 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291 				       &kvm_clock_notifier);
292 	return 0;
293 }
294 
kvm_arch_hardware_unsetup(void)295 void kvm_arch_hardware_unsetup(void)
296 {
297 	gmap_unregister_pte_notifier(&gmap_notifier);
298 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300 					 &kvm_clock_notifier);
301 }
302 
allow_cpu_feat(unsigned long nr)303 static void allow_cpu_feat(unsigned long nr)
304 {
305 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
306 }
307 
plo_test_bit(unsigned char nr)308 static inline int plo_test_bit(unsigned char nr)
309 {
310 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
311 	int cc;
312 
313 	asm volatile(
314 		/* Parameter registers are ignored for "test bit" */
315 		"	plo	0,0,0,0(0)\n"
316 		"	ipm	%0\n"
317 		"	srl	%0,28\n"
318 		: "=d" (cc)
319 		: "d" (r0)
320 		: "cc");
321 	return cc == 0;
322 }
323 
kvm_s390_cpu_feat_init(void)324 static void kvm_s390_cpu_feat_init(void)
325 {
326 	int i;
327 
328 	for (i = 0; i < 256; ++i) {
329 		if (plo_test_bit(i))
330 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
331 	}
332 
333 	if (test_facility(28)) /* TOD-clock steering */
334 		ptff(kvm_s390_available_subfunc.ptff,
335 		     sizeof(kvm_s390_available_subfunc.ptff),
336 		     PTFF_QAF);
337 
338 	if (test_facility(17)) { /* MSA */
339 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340 			      kvm_s390_available_subfunc.kmac);
341 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342 			      kvm_s390_available_subfunc.kmc);
343 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
344 			      kvm_s390_available_subfunc.km);
345 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346 			      kvm_s390_available_subfunc.kimd);
347 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348 			      kvm_s390_available_subfunc.klmd);
349 	}
350 	if (test_facility(76)) /* MSA3 */
351 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352 			      kvm_s390_available_subfunc.pckmo);
353 	if (test_facility(77)) { /* MSA4 */
354 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355 			      kvm_s390_available_subfunc.kmctr);
356 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357 			      kvm_s390_available_subfunc.kmf);
358 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359 			      kvm_s390_available_subfunc.kmo);
360 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361 			      kvm_s390_available_subfunc.pcc);
362 	}
363 	if (test_facility(57)) /* MSA5 */
364 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365 			      kvm_s390_available_subfunc.ppno);
366 
367 	if (test_facility(146)) /* MSA8 */
368 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369 			      kvm_s390_available_subfunc.kma);
370 
371 	if (MACHINE_HAS_ESOP)
372 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
373 	/*
374 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
376 	 */
377 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378 	    !test_facility(3) || !nested)
379 		return;
380 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381 	if (sclp.has_64bscao)
382 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
383 	if (sclp.has_siif)
384 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
385 	if (sclp.has_gpere)
386 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
387 	if (sclp.has_gsls)
388 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
389 	if (sclp.has_ib)
390 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
391 	if (sclp.has_cei)
392 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
393 	if (sclp.has_ibs)
394 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
395 	if (sclp.has_kss)
396 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
397 	/*
398 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399 	 * all skey handling functions read/set the skey from the PGSTE
400 	 * instead of the real storage key.
401 	 *
402 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403 	 * pages being detected as preserved although they are resident.
404 	 *
405 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
407 	 *
408 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
411 	 *
412 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413 	 * cannot easily shadow the SCA because of the ipte lock.
414 	 */
415 }
416 
kvm_arch_init(void * opaque)417 int kvm_arch_init(void *opaque)
418 {
419 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
420 	if (!kvm_s390_dbf)
421 		return -ENOMEM;
422 
423 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
424 		debug_unregister(kvm_s390_dbf);
425 		return -ENOMEM;
426 	}
427 
428 	kvm_s390_cpu_feat_init();
429 
430 	/* Register floating interrupt controller interface. */
431 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
432 }
433 
kvm_arch_exit(void)434 void kvm_arch_exit(void)
435 {
436 	debug_unregister(kvm_s390_dbf);
437 }
438 
439 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)440 long kvm_arch_dev_ioctl(struct file *filp,
441 			unsigned int ioctl, unsigned long arg)
442 {
443 	if (ioctl == KVM_S390_ENABLE_SIE)
444 		return s390_enable_sie();
445 	return -EINVAL;
446 }
447 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)448 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
449 {
450 	int r;
451 
452 	switch (ext) {
453 	case KVM_CAP_S390_PSW:
454 	case KVM_CAP_S390_GMAP:
455 	case KVM_CAP_SYNC_MMU:
456 #ifdef CONFIG_KVM_S390_UCONTROL
457 	case KVM_CAP_S390_UCONTROL:
458 #endif
459 	case KVM_CAP_ASYNC_PF:
460 	case KVM_CAP_SYNC_REGS:
461 	case KVM_CAP_ONE_REG:
462 	case KVM_CAP_ENABLE_CAP:
463 	case KVM_CAP_S390_CSS_SUPPORT:
464 	case KVM_CAP_IOEVENTFD:
465 	case KVM_CAP_DEVICE_CTRL:
466 	case KVM_CAP_ENABLE_CAP_VM:
467 	case KVM_CAP_S390_IRQCHIP:
468 	case KVM_CAP_VM_ATTRIBUTES:
469 	case KVM_CAP_MP_STATE:
470 	case KVM_CAP_IMMEDIATE_EXIT:
471 	case KVM_CAP_S390_INJECT_IRQ:
472 	case KVM_CAP_S390_USER_SIGP:
473 	case KVM_CAP_S390_USER_STSI:
474 	case KVM_CAP_S390_SKEYS:
475 	case KVM_CAP_S390_IRQ_STATE:
476 	case KVM_CAP_S390_USER_INSTR0:
477 	case KVM_CAP_S390_CMMA_MIGRATION:
478 	case KVM_CAP_S390_AIS:
479 	case KVM_CAP_S390_AIS_MIGRATION:
480 		r = 1;
481 		break;
482 	case KVM_CAP_S390_HPAGE_1M:
483 		r = 0;
484 		if (hpage && !kvm_is_ucontrol(kvm))
485 			r = 1;
486 		break;
487 	case KVM_CAP_S390_MEM_OP:
488 		r = MEM_OP_MAX_SIZE;
489 		break;
490 	case KVM_CAP_NR_VCPUS:
491 	case KVM_CAP_MAX_VCPUS:
492 		r = KVM_S390_BSCA_CPU_SLOTS;
493 		if (!kvm_s390_use_sca_entries())
494 			r = KVM_MAX_VCPUS;
495 		else if (sclp.has_esca && sclp.has_64bscao)
496 			r = KVM_S390_ESCA_CPU_SLOTS;
497 		break;
498 	case KVM_CAP_NR_MEMSLOTS:
499 		r = KVM_USER_MEM_SLOTS;
500 		break;
501 	case KVM_CAP_S390_COW:
502 		r = MACHINE_HAS_ESOP;
503 		break;
504 	case KVM_CAP_S390_VECTOR_REGISTERS:
505 		r = MACHINE_HAS_VX;
506 		break;
507 	case KVM_CAP_S390_RI:
508 		r = test_facility(64);
509 		break;
510 	case KVM_CAP_S390_GS:
511 		r = test_facility(133);
512 		break;
513 	case KVM_CAP_S390_BPB:
514 		r = test_facility(82);
515 		break;
516 	default:
517 		r = 0;
518 	}
519 	return r;
520 }
521 
kvm_s390_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)522 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
523 				    struct kvm_memory_slot *memslot)
524 {
525 	int i;
526 	gfn_t cur_gfn, last_gfn;
527 	unsigned long gaddr, vmaddr;
528 	struct gmap *gmap = kvm->arch.gmap;
529 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
530 
531 	/* Loop over all guest segments */
532 	cur_gfn = memslot->base_gfn;
533 	last_gfn = memslot->base_gfn + memslot->npages;
534 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
535 		gaddr = gfn_to_gpa(cur_gfn);
536 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
537 		if (kvm_is_error_hva(vmaddr))
538 			continue;
539 
540 		bitmap_zero(bitmap, _PAGE_ENTRIES);
541 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
542 		for (i = 0; i < _PAGE_ENTRIES; i++) {
543 			if (test_bit(i, bitmap))
544 				mark_page_dirty(kvm, cur_gfn + i);
545 		}
546 
547 		if (fatal_signal_pending(current))
548 			return;
549 		cond_resched();
550 	}
551 }
552 
553 /* Section: vm related */
554 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
555 
556 /*
557  * Get (and clear) the dirty memory log for a memory slot.
558  */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)559 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
560 			       struct kvm_dirty_log *log)
561 {
562 	int r;
563 	unsigned long n;
564 	struct kvm_memslots *slots;
565 	struct kvm_memory_slot *memslot;
566 	int is_dirty = 0;
567 
568 	if (kvm_is_ucontrol(kvm))
569 		return -EINVAL;
570 
571 	mutex_lock(&kvm->slots_lock);
572 
573 	r = -EINVAL;
574 	if (log->slot >= KVM_USER_MEM_SLOTS)
575 		goto out;
576 
577 	slots = kvm_memslots(kvm);
578 	memslot = id_to_memslot(slots, log->slot);
579 	r = -ENOENT;
580 	if (!memslot->dirty_bitmap)
581 		goto out;
582 
583 	kvm_s390_sync_dirty_log(kvm, memslot);
584 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
585 	if (r)
586 		goto out;
587 
588 	/* Clear the dirty log */
589 	if (is_dirty) {
590 		n = kvm_dirty_bitmap_bytes(memslot);
591 		memset(memslot->dirty_bitmap, 0, n);
592 	}
593 	r = 0;
594 out:
595 	mutex_unlock(&kvm->slots_lock);
596 	return r;
597 }
598 
icpt_operexc_on_all_vcpus(struct kvm * kvm)599 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
600 {
601 	unsigned int i;
602 	struct kvm_vcpu *vcpu;
603 
604 	kvm_for_each_vcpu(i, vcpu, kvm) {
605 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
606 	}
607 }
608 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)609 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
610 {
611 	int r;
612 
613 	if (cap->flags)
614 		return -EINVAL;
615 
616 	switch (cap->cap) {
617 	case KVM_CAP_S390_IRQCHIP:
618 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
619 		kvm->arch.use_irqchip = 1;
620 		r = 0;
621 		break;
622 	case KVM_CAP_S390_USER_SIGP:
623 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
624 		kvm->arch.user_sigp = 1;
625 		r = 0;
626 		break;
627 	case KVM_CAP_S390_VECTOR_REGISTERS:
628 		mutex_lock(&kvm->lock);
629 		if (kvm->created_vcpus) {
630 			r = -EBUSY;
631 		} else if (MACHINE_HAS_VX) {
632 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
633 			set_kvm_facility(kvm->arch.model.fac_list, 129);
634 			if (test_facility(134)) {
635 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
636 				set_kvm_facility(kvm->arch.model.fac_list, 134);
637 			}
638 			if (test_facility(135)) {
639 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
640 				set_kvm_facility(kvm->arch.model.fac_list, 135);
641 			}
642 			r = 0;
643 		} else
644 			r = -EINVAL;
645 		mutex_unlock(&kvm->lock);
646 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
647 			 r ? "(not available)" : "(success)");
648 		break;
649 	case KVM_CAP_S390_RI:
650 		r = -EINVAL;
651 		mutex_lock(&kvm->lock);
652 		if (kvm->created_vcpus) {
653 			r = -EBUSY;
654 		} else if (test_facility(64)) {
655 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
656 			set_kvm_facility(kvm->arch.model.fac_list, 64);
657 			r = 0;
658 		}
659 		mutex_unlock(&kvm->lock);
660 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
661 			 r ? "(not available)" : "(success)");
662 		break;
663 	case KVM_CAP_S390_AIS:
664 		mutex_lock(&kvm->lock);
665 		if (kvm->created_vcpus) {
666 			r = -EBUSY;
667 		} else {
668 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
669 			set_kvm_facility(kvm->arch.model.fac_list, 72);
670 			r = 0;
671 		}
672 		mutex_unlock(&kvm->lock);
673 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
674 			 r ? "(not available)" : "(success)");
675 		break;
676 	case KVM_CAP_S390_GS:
677 		r = -EINVAL;
678 		mutex_lock(&kvm->lock);
679 		if (kvm->created_vcpus) {
680 			r = -EBUSY;
681 		} else if (test_facility(133)) {
682 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
683 			set_kvm_facility(kvm->arch.model.fac_list, 133);
684 			r = 0;
685 		}
686 		mutex_unlock(&kvm->lock);
687 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
688 			 r ? "(not available)" : "(success)");
689 		break;
690 	case KVM_CAP_S390_HPAGE_1M:
691 		mutex_lock(&kvm->lock);
692 		if (kvm->created_vcpus)
693 			r = -EBUSY;
694 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
695 			r = -EINVAL;
696 		else {
697 			r = 0;
698 			down_write(&kvm->mm->mmap_sem);
699 			kvm->mm->context.allow_gmap_hpage_1m = 1;
700 			up_write(&kvm->mm->mmap_sem);
701 			/*
702 			 * We might have to create fake 4k page
703 			 * tables. To avoid that the hardware works on
704 			 * stale PGSTEs, we emulate these instructions.
705 			 */
706 			kvm->arch.use_skf = 0;
707 			kvm->arch.use_pfmfi = 0;
708 		}
709 		mutex_unlock(&kvm->lock);
710 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
711 			 r ? "(not available)" : "(success)");
712 		break;
713 	case KVM_CAP_S390_USER_STSI:
714 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
715 		kvm->arch.user_stsi = 1;
716 		r = 0;
717 		break;
718 	case KVM_CAP_S390_USER_INSTR0:
719 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
720 		kvm->arch.user_instr0 = 1;
721 		icpt_operexc_on_all_vcpus(kvm);
722 		r = 0;
723 		break;
724 	default:
725 		r = -EINVAL;
726 		break;
727 	}
728 	return r;
729 }
730 
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)731 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
732 {
733 	int ret;
734 
735 	switch (attr->attr) {
736 	case KVM_S390_VM_MEM_LIMIT_SIZE:
737 		ret = 0;
738 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
739 			 kvm->arch.mem_limit);
740 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
741 			ret = -EFAULT;
742 		break;
743 	default:
744 		ret = -ENXIO;
745 		break;
746 	}
747 	return ret;
748 }
749 
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)750 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
751 {
752 	int ret;
753 	unsigned int idx;
754 	switch (attr->attr) {
755 	case KVM_S390_VM_MEM_ENABLE_CMMA:
756 		ret = -ENXIO;
757 		if (!sclp.has_cmma)
758 			break;
759 
760 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
761 		mutex_lock(&kvm->lock);
762 		if (kvm->created_vcpus)
763 			ret = -EBUSY;
764 		else if (kvm->mm->context.allow_gmap_hpage_1m)
765 			ret = -EINVAL;
766 		else {
767 			kvm->arch.use_cmma = 1;
768 			/* Not compatible with cmma. */
769 			kvm->arch.use_pfmfi = 0;
770 			ret = 0;
771 		}
772 		mutex_unlock(&kvm->lock);
773 		break;
774 	case KVM_S390_VM_MEM_CLR_CMMA:
775 		ret = -ENXIO;
776 		if (!sclp.has_cmma)
777 			break;
778 		ret = -EINVAL;
779 		if (!kvm->arch.use_cmma)
780 			break;
781 
782 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
783 		mutex_lock(&kvm->lock);
784 		idx = srcu_read_lock(&kvm->srcu);
785 		s390_reset_cmma(kvm->arch.gmap->mm);
786 		srcu_read_unlock(&kvm->srcu, idx);
787 		mutex_unlock(&kvm->lock);
788 		ret = 0;
789 		break;
790 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
791 		unsigned long new_limit;
792 
793 		if (kvm_is_ucontrol(kvm))
794 			return -EINVAL;
795 
796 		if (get_user(new_limit, (u64 __user *)attr->addr))
797 			return -EFAULT;
798 
799 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
800 		    new_limit > kvm->arch.mem_limit)
801 			return -E2BIG;
802 
803 		if (!new_limit)
804 			return -EINVAL;
805 
806 		/* gmap_create takes last usable address */
807 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
808 			new_limit -= 1;
809 
810 		ret = -EBUSY;
811 		mutex_lock(&kvm->lock);
812 		if (!kvm->created_vcpus) {
813 			/* gmap_create will round the limit up */
814 			struct gmap *new = gmap_create(current->mm, new_limit);
815 
816 			if (!new) {
817 				ret = -ENOMEM;
818 			} else {
819 				gmap_remove(kvm->arch.gmap);
820 				new->private = kvm;
821 				kvm->arch.gmap = new;
822 				ret = 0;
823 			}
824 		}
825 		mutex_unlock(&kvm->lock);
826 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
827 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
828 			 (void *) kvm->arch.gmap->asce);
829 		break;
830 	}
831 	default:
832 		ret = -ENXIO;
833 		break;
834 	}
835 	return ret;
836 }
837 
838 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
839 
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)840 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
841 {
842 	struct kvm_vcpu *vcpu;
843 	int i;
844 
845 	kvm_s390_vcpu_block_all(kvm);
846 
847 	kvm_for_each_vcpu(i, vcpu, kvm)
848 		kvm_s390_vcpu_crypto_setup(vcpu);
849 
850 	kvm_s390_vcpu_unblock_all(kvm);
851 }
852 
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)853 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
854 {
855 	if (!test_kvm_facility(kvm, 76))
856 		return -EINVAL;
857 
858 	mutex_lock(&kvm->lock);
859 	switch (attr->attr) {
860 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
861 		get_random_bytes(
862 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
863 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
864 		kvm->arch.crypto.aes_kw = 1;
865 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
866 		break;
867 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
868 		get_random_bytes(
869 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
870 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
871 		kvm->arch.crypto.dea_kw = 1;
872 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
873 		break;
874 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
875 		kvm->arch.crypto.aes_kw = 0;
876 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
877 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
878 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
879 		break;
880 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
881 		kvm->arch.crypto.dea_kw = 0;
882 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
883 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
884 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
885 		break;
886 	default:
887 		mutex_unlock(&kvm->lock);
888 		return -ENXIO;
889 	}
890 
891 	kvm_s390_vcpu_crypto_reset_all(kvm);
892 	mutex_unlock(&kvm->lock);
893 	return 0;
894 }
895 
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)896 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
897 {
898 	int cx;
899 	struct kvm_vcpu *vcpu;
900 
901 	kvm_for_each_vcpu(cx, vcpu, kvm)
902 		kvm_s390_sync_request(req, vcpu);
903 }
904 
905 /*
906  * Must be called with kvm->srcu held to avoid races on memslots, and with
907  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
908  */
kvm_s390_vm_start_migration(struct kvm * kvm)909 static int kvm_s390_vm_start_migration(struct kvm *kvm)
910 {
911 	struct kvm_memory_slot *ms;
912 	struct kvm_memslots *slots;
913 	unsigned long ram_pages = 0;
914 	int slotnr;
915 
916 	/* migration mode already enabled */
917 	if (kvm->arch.migration_mode)
918 		return 0;
919 	slots = kvm_memslots(kvm);
920 	if (!slots || !slots->used_slots)
921 		return -EINVAL;
922 
923 	if (!kvm->arch.use_cmma) {
924 		kvm->arch.migration_mode = 1;
925 		return 0;
926 	}
927 	/* mark all the pages in active slots as dirty */
928 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
929 		ms = slots->memslots + slotnr;
930 		/*
931 		 * The second half of the bitmap is only used on x86,
932 		 * and would be wasted otherwise, so we put it to good
933 		 * use here to keep track of the state of the storage
934 		 * attributes.
935 		 */
936 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
937 		ram_pages += ms->npages;
938 	}
939 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
940 	kvm->arch.migration_mode = 1;
941 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
942 	return 0;
943 }
944 
945 /*
946  * Must be called with kvm->slots_lock to avoid races with ourselves and
947  * kvm_s390_vm_start_migration.
948  */
kvm_s390_vm_stop_migration(struct kvm * kvm)949 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
950 {
951 	/* migration mode already disabled */
952 	if (!kvm->arch.migration_mode)
953 		return 0;
954 	kvm->arch.migration_mode = 0;
955 	if (kvm->arch.use_cmma)
956 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
957 	return 0;
958 }
959 
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)960 static int kvm_s390_vm_set_migration(struct kvm *kvm,
961 				     struct kvm_device_attr *attr)
962 {
963 	int res = -ENXIO;
964 
965 	mutex_lock(&kvm->slots_lock);
966 	switch (attr->attr) {
967 	case KVM_S390_VM_MIGRATION_START:
968 		res = kvm_s390_vm_start_migration(kvm);
969 		break;
970 	case KVM_S390_VM_MIGRATION_STOP:
971 		res = kvm_s390_vm_stop_migration(kvm);
972 		break;
973 	default:
974 		break;
975 	}
976 	mutex_unlock(&kvm->slots_lock);
977 
978 	return res;
979 }
980 
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)981 static int kvm_s390_vm_get_migration(struct kvm *kvm,
982 				     struct kvm_device_attr *attr)
983 {
984 	u64 mig = kvm->arch.migration_mode;
985 
986 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
987 		return -ENXIO;
988 
989 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
990 		return -EFAULT;
991 	return 0;
992 }
993 
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)994 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
995 {
996 	struct kvm_s390_vm_tod_clock gtod;
997 
998 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
999 		return -EFAULT;
1000 
1001 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1002 		return -EINVAL;
1003 	kvm_s390_set_tod_clock(kvm, &gtod);
1004 
1005 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1006 		gtod.epoch_idx, gtod.tod);
1007 
1008 	return 0;
1009 }
1010 
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1011 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1012 {
1013 	u8 gtod_high;
1014 
1015 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1016 					   sizeof(gtod_high)))
1017 		return -EFAULT;
1018 
1019 	if (gtod_high != 0)
1020 		return -EINVAL;
1021 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1022 
1023 	return 0;
1024 }
1025 
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1026 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1027 {
1028 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1029 
1030 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1031 			   sizeof(gtod.tod)))
1032 		return -EFAULT;
1033 
1034 	kvm_s390_set_tod_clock(kvm, &gtod);
1035 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1036 	return 0;
1037 }
1038 
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1039 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041 	int ret;
1042 
1043 	if (attr->flags)
1044 		return -EINVAL;
1045 
1046 	switch (attr->attr) {
1047 	case KVM_S390_VM_TOD_EXT:
1048 		ret = kvm_s390_set_tod_ext(kvm, attr);
1049 		break;
1050 	case KVM_S390_VM_TOD_HIGH:
1051 		ret = kvm_s390_set_tod_high(kvm, attr);
1052 		break;
1053 	case KVM_S390_VM_TOD_LOW:
1054 		ret = kvm_s390_set_tod_low(kvm, attr);
1055 		break;
1056 	default:
1057 		ret = -ENXIO;
1058 		break;
1059 	}
1060 	return ret;
1061 }
1062 
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1063 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1064 				   struct kvm_s390_vm_tod_clock *gtod)
1065 {
1066 	struct kvm_s390_tod_clock_ext htod;
1067 
1068 	preempt_disable();
1069 
1070 	get_tod_clock_ext((char *)&htod);
1071 
1072 	gtod->tod = htod.tod + kvm->arch.epoch;
1073 	gtod->epoch_idx = 0;
1074 	if (test_kvm_facility(kvm, 139)) {
1075 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1076 		if (gtod->tod < htod.tod)
1077 			gtod->epoch_idx += 1;
1078 	}
1079 
1080 	preempt_enable();
1081 }
1082 
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1083 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1084 {
1085 	struct kvm_s390_vm_tod_clock gtod;
1086 
1087 	memset(&gtod, 0, sizeof(gtod));
1088 	kvm_s390_get_tod_clock(kvm, &gtod);
1089 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1090 		return -EFAULT;
1091 
1092 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1093 		gtod.epoch_idx, gtod.tod);
1094 	return 0;
1095 }
1096 
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1097 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099 	u8 gtod_high = 0;
1100 
1101 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1102 					 sizeof(gtod_high)))
1103 		return -EFAULT;
1104 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1105 
1106 	return 0;
1107 }
1108 
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1109 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1110 {
1111 	u64 gtod;
1112 
1113 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1114 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1115 		return -EFAULT;
1116 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1117 
1118 	return 0;
1119 }
1120 
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1121 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 	int ret;
1124 
1125 	if (attr->flags)
1126 		return -EINVAL;
1127 
1128 	switch (attr->attr) {
1129 	case KVM_S390_VM_TOD_EXT:
1130 		ret = kvm_s390_get_tod_ext(kvm, attr);
1131 		break;
1132 	case KVM_S390_VM_TOD_HIGH:
1133 		ret = kvm_s390_get_tod_high(kvm, attr);
1134 		break;
1135 	case KVM_S390_VM_TOD_LOW:
1136 		ret = kvm_s390_get_tod_low(kvm, attr);
1137 		break;
1138 	default:
1139 		ret = -ENXIO;
1140 		break;
1141 	}
1142 	return ret;
1143 }
1144 
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1145 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147 	struct kvm_s390_vm_cpu_processor *proc;
1148 	u16 lowest_ibc, unblocked_ibc;
1149 	int ret = 0;
1150 
1151 	mutex_lock(&kvm->lock);
1152 	if (kvm->created_vcpus) {
1153 		ret = -EBUSY;
1154 		goto out;
1155 	}
1156 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1157 	if (!proc) {
1158 		ret = -ENOMEM;
1159 		goto out;
1160 	}
1161 	if (!copy_from_user(proc, (void __user *)attr->addr,
1162 			    sizeof(*proc))) {
1163 		kvm->arch.model.cpuid = proc->cpuid;
1164 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1165 		unblocked_ibc = sclp.ibc & 0xfff;
1166 		if (lowest_ibc && proc->ibc) {
1167 			if (proc->ibc > unblocked_ibc)
1168 				kvm->arch.model.ibc = unblocked_ibc;
1169 			else if (proc->ibc < lowest_ibc)
1170 				kvm->arch.model.ibc = lowest_ibc;
1171 			else
1172 				kvm->arch.model.ibc = proc->ibc;
1173 		}
1174 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1175 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1176 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1177 			 kvm->arch.model.ibc,
1178 			 kvm->arch.model.cpuid);
1179 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1180 			 kvm->arch.model.fac_list[0],
1181 			 kvm->arch.model.fac_list[1],
1182 			 kvm->arch.model.fac_list[2]);
1183 	} else
1184 		ret = -EFAULT;
1185 	kfree(proc);
1186 out:
1187 	mutex_unlock(&kvm->lock);
1188 	return ret;
1189 }
1190 
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1191 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1192 				       struct kvm_device_attr *attr)
1193 {
1194 	struct kvm_s390_vm_cpu_feat data;
1195 
1196 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1197 		return -EFAULT;
1198 	if (!bitmap_subset((unsigned long *) data.feat,
1199 			   kvm_s390_available_cpu_feat,
1200 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1201 		return -EINVAL;
1202 
1203 	mutex_lock(&kvm->lock);
1204 	if (kvm->created_vcpus) {
1205 		mutex_unlock(&kvm->lock);
1206 		return -EBUSY;
1207 	}
1208 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1209 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1210 	mutex_unlock(&kvm->lock);
1211 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1212 			 data.feat[0],
1213 			 data.feat[1],
1214 			 data.feat[2]);
1215 	return 0;
1216 }
1217 
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1218 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1219 					  struct kvm_device_attr *attr)
1220 {
1221 	/*
1222 	 * Once supported by kernel + hw, we have to store the subfunctions
1223 	 * in kvm->arch and remember that user space configured them.
1224 	 */
1225 	return -ENXIO;
1226 }
1227 
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1228 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230 	int ret = -ENXIO;
1231 
1232 	switch (attr->attr) {
1233 	case KVM_S390_VM_CPU_PROCESSOR:
1234 		ret = kvm_s390_set_processor(kvm, attr);
1235 		break;
1236 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1237 		ret = kvm_s390_set_processor_feat(kvm, attr);
1238 		break;
1239 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1240 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1241 		break;
1242 	}
1243 	return ret;
1244 }
1245 
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1246 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248 	struct kvm_s390_vm_cpu_processor *proc;
1249 	int ret = 0;
1250 
1251 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1252 	if (!proc) {
1253 		ret = -ENOMEM;
1254 		goto out;
1255 	}
1256 	proc->cpuid = kvm->arch.model.cpuid;
1257 	proc->ibc = kvm->arch.model.ibc;
1258 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1259 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1260 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1261 		 kvm->arch.model.ibc,
1262 		 kvm->arch.model.cpuid);
1263 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1264 		 kvm->arch.model.fac_list[0],
1265 		 kvm->arch.model.fac_list[1],
1266 		 kvm->arch.model.fac_list[2]);
1267 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1268 		ret = -EFAULT;
1269 	kfree(proc);
1270 out:
1271 	return ret;
1272 }
1273 
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1274 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1275 {
1276 	struct kvm_s390_vm_cpu_machine *mach;
1277 	int ret = 0;
1278 
1279 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1280 	if (!mach) {
1281 		ret = -ENOMEM;
1282 		goto out;
1283 	}
1284 	get_cpu_id((struct cpuid *) &mach->cpuid);
1285 	mach->ibc = sclp.ibc;
1286 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1287 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1288 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1289 	       sizeof(S390_lowcore.stfle_fac_list));
1290 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1291 		 kvm->arch.model.ibc,
1292 		 kvm->arch.model.cpuid);
1293 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1294 		 mach->fac_mask[0],
1295 		 mach->fac_mask[1],
1296 		 mach->fac_mask[2]);
1297 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1298 		 mach->fac_list[0],
1299 		 mach->fac_list[1],
1300 		 mach->fac_list[2]);
1301 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1302 		ret = -EFAULT;
1303 	kfree(mach);
1304 out:
1305 	return ret;
1306 }
1307 
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1308 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1309 				       struct kvm_device_attr *attr)
1310 {
1311 	struct kvm_s390_vm_cpu_feat data;
1312 
1313 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1314 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1315 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1316 		return -EFAULT;
1317 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1318 			 data.feat[0],
1319 			 data.feat[1],
1320 			 data.feat[2]);
1321 	return 0;
1322 }
1323 
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1324 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1325 				     struct kvm_device_attr *attr)
1326 {
1327 	struct kvm_s390_vm_cpu_feat data;
1328 
1329 	bitmap_copy((unsigned long *) data.feat,
1330 		    kvm_s390_available_cpu_feat,
1331 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1332 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1333 		return -EFAULT;
1334 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1335 			 data.feat[0],
1336 			 data.feat[1],
1337 			 data.feat[2]);
1338 	return 0;
1339 }
1340 
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1341 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1342 					  struct kvm_device_attr *attr)
1343 {
1344 	/*
1345 	 * Once we can actually configure subfunctions (kernel + hw support),
1346 	 * we have to check if they were already set by user space, if so copy
1347 	 * them from kvm->arch.
1348 	 */
1349 	return -ENXIO;
1350 }
1351 
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1352 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1353 					struct kvm_device_attr *attr)
1354 {
1355 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1356 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1357 		return -EFAULT;
1358 	return 0;
1359 }
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1360 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1361 {
1362 	int ret = -ENXIO;
1363 
1364 	switch (attr->attr) {
1365 	case KVM_S390_VM_CPU_PROCESSOR:
1366 		ret = kvm_s390_get_processor(kvm, attr);
1367 		break;
1368 	case KVM_S390_VM_CPU_MACHINE:
1369 		ret = kvm_s390_get_machine(kvm, attr);
1370 		break;
1371 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1372 		ret = kvm_s390_get_processor_feat(kvm, attr);
1373 		break;
1374 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1375 		ret = kvm_s390_get_machine_feat(kvm, attr);
1376 		break;
1377 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1378 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1379 		break;
1380 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1381 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1382 		break;
1383 	}
1384 	return ret;
1385 }
1386 
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1387 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1388 {
1389 	int ret;
1390 
1391 	switch (attr->group) {
1392 	case KVM_S390_VM_MEM_CTRL:
1393 		ret = kvm_s390_set_mem_control(kvm, attr);
1394 		break;
1395 	case KVM_S390_VM_TOD:
1396 		ret = kvm_s390_set_tod(kvm, attr);
1397 		break;
1398 	case KVM_S390_VM_CPU_MODEL:
1399 		ret = kvm_s390_set_cpu_model(kvm, attr);
1400 		break;
1401 	case KVM_S390_VM_CRYPTO:
1402 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1403 		break;
1404 	case KVM_S390_VM_MIGRATION:
1405 		ret = kvm_s390_vm_set_migration(kvm, attr);
1406 		break;
1407 	default:
1408 		ret = -ENXIO;
1409 		break;
1410 	}
1411 
1412 	return ret;
1413 }
1414 
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1415 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1416 {
1417 	int ret;
1418 
1419 	switch (attr->group) {
1420 	case KVM_S390_VM_MEM_CTRL:
1421 		ret = kvm_s390_get_mem_control(kvm, attr);
1422 		break;
1423 	case KVM_S390_VM_TOD:
1424 		ret = kvm_s390_get_tod(kvm, attr);
1425 		break;
1426 	case KVM_S390_VM_CPU_MODEL:
1427 		ret = kvm_s390_get_cpu_model(kvm, attr);
1428 		break;
1429 	case KVM_S390_VM_MIGRATION:
1430 		ret = kvm_s390_vm_get_migration(kvm, attr);
1431 		break;
1432 	default:
1433 		ret = -ENXIO;
1434 		break;
1435 	}
1436 
1437 	return ret;
1438 }
1439 
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1440 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442 	int ret;
1443 
1444 	switch (attr->group) {
1445 	case KVM_S390_VM_MEM_CTRL:
1446 		switch (attr->attr) {
1447 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1448 		case KVM_S390_VM_MEM_CLR_CMMA:
1449 			ret = sclp.has_cmma ? 0 : -ENXIO;
1450 			break;
1451 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1452 			ret = 0;
1453 			break;
1454 		default:
1455 			ret = -ENXIO;
1456 			break;
1457 		}
1458 		break;
1459 	case KVM_S390_VM_TOD:
1460 		switch (attr->attr) {
1461 		case KVM_S390_VM_TOD_LOW:
1462 		case KVM_S390_VM_TOD_HIGH:
1463 			ret = 0;
1464 			break;
1465 		default:
1466 			ret = -ENXIO;
1467 			break;
1468 		}
1469 		break;
1470 	case KVM_S390_VM_CPU_MODEL:
1471 		switch (attr->attr) {
1472 		case KVM_S390_VM_CPU_PROCESSOR:
1473 		case KVM_S390_VM_CPU_MACHINE:
1474 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1475 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1476 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1477 			ret = 0;
1478 			break;
1479 		/* configuring subfunctions is not supported yet */
1480 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1481 		default:
1482 			ret = -ENXIO;
1483 			break;
1484 		}
1485 		break;
1486 	case KVM_S390_VM_CRYPTO:
1487 		switch (attr->attr) {
1488 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1489 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1490 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1491 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1492 			ret = 0;
1493 			break;
1494 		default:
1495 			ret = -ENXIO;
1496 			break;
1497 		}
1498 		break;
1499 	case KVM_S390_VM_MIGRATION:
1500 		ret = 0;
1501 		break;
1502 	default:
1503 		ret = -ENXIO;
1504 		break;
1505 	}
1506 
1507 	return ret;
1508 }
1509 
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1510 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1511 {
1512 	uint8_t *keys;
1513 	uint64_t hva;
1514 	int srcu_idx, i, r = 0;
1515 
1516 	if (args->flags != 0)
1517 		return -EINVAL;
1518 
1519 	/* Is this guest using storage keys? */
1520 	if (!mm_uses_skeys(current->mm))
1521 		return KVM_S390_GET_SKEYS_NONE;
1522 
1523 	/* Enforce sane limit on memory allocation */
1524 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1525 		return -EINVAL;
1526 
1527 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1528 	if (!keys)
1529 		return -ENOMEM;
1530 
1531 	down_read(&current->mm->mmap_sem);
1532 	srcu_idx = srcu_read_lock(&kvm->srcu);
1533 	for (i = 0; i < args->count; i++) {
1534 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1535 		if (kvm_is_error_hva(hva)) {
1536 			r = -EFAULT;
1537 			break;
1538 		}
1539 
1540 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1541 		if (r)
1542 			break;
1543 	}
1544 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1545 	up_read(&current->mm->mmap_sem);
1546 
1547 	if (!r) {
1548 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1549 				 sizeof(uint8_t) * args->count);
1550 		if (r)
1551 			r = -EFAULT;
1552 	}
1553 
1554 	kvfree(keys);
1555 	return r;
1556 }
1557 
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1558 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1559 {
1560 	uint8_t *keys;
1561 	uint64_t hva;
1562 	int srcu_idx, i, r = 0;
1563 	bool unlocked;
1564 
1565 	if (args->flags != 0)
1566 		return -EINVAL;
1567 
1568 	/* Enforce sane limit on memory allocation */
1569 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1570 		return -EINVAL;
1571 
1572 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1573 	if (!keys)
1574 		return -ENOMEM;
1575 
1576 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1577 			   sizeof(uint8_t) * args->count);
1578 	if (r) {
1579 		r = -EFAULT;
1580 		goto out;
1581 	}
1582 
1583 	/* Enable storage key handling for the guest */
1584 	r = s390_enable_skey();
1585 	if (r)
1586 		goto out;
1587 
1588 	i = 0;
1589 	down_read(&current->mm->mmap_sem);
1590 	srcu_idx = srcu_read_lock(&kvm->srcu);
1591         while (i < args->count) {
1592 		unlocked = false;
1593 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1594 		if (kvm_is_error_hva(hva)) {
1595 			r = -EFAULT;
1596 			break;
1597 		}
1598 
1599 		/* Lowest order bit is reserved */
1600 		if (keys[i] & 0x01) {
1601 			r = -EINVAL;
1602 			break;
1603 		}
1604 
1605 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1606 		if (r) {
1607 			r = fixup_user_fault(current, current->mm, hva,
1608 					     FAULT_FLAG_WRITE, &unlocked);
1609 			if (r)
1610 				break;
1611 		}
1612 		if (!r)
1613 			i++;
1614 	}
1615 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1616 	up_read(&current->mm->mmap_sem);
1617 out:
1618 	kvfree(keys);
1619 	return r;
1620 }
1621 
1622 /*
1623  * Base address and length must be sent at the start of each block, therefore
1624  * it's cheaper to send some clean data, as long as it's less than the size of
1625  * two longs.
1626  */
1627 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1628 /* for consistency */
1629 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1630 
1631 /*
1632  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1633  * address falls in a hole. In that case the index of one of the memslots
1634  * bordering the hole is returned.
1635  */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1636 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1637 {
1638 	int start = 0, end = slots->used_slots;
1639 	int slot = atomic_read(&slots->lru_slot);
1640 	struct kvm_memory_slot *memslots = slots->memslots;
1641 
1642 	if (gfn >= memslots[slot].base_gfn &&
1643 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1644 		return slot;
1645 
1646 	while (start < end) {
1647 		slot = start + (end - start) / 2;
1648 
1649 		if (gfn >= memslots[slot].base_gfn)
1650 			end = slot;
1651 		else
1652 			start = slot + 1;
1653 	}
1654 
1655 	if (gfn >= memslots[start].base_gfn &&
1656 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1657 		atomic_set(&slots->lru_slot, start);
1658 	}
1659 
1660 	return start;
1661 }
1662 
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1663 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1664 			      u8 *res, unsigned long bufsize)
1665 {
1666 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1667 
1668 	args->count = 0;
1669 	while (args->count < bufsize) {
1670 		hva = gfn_to_hva(kvm, cur_gfn);
1671 		/*
1672 		 * We return an error if the first value was invalid, but we
1673 		 * return successfully if at least one value was copied.
1674 		 */
1675 		if (kvm_is_error_hva(hva))
1676 			return args->count ? 0 : -EFAULT;
1677 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1678 			pgstev = 0;
1679 		res[args->count++] = (pgstev >> 24) & 0x43;
1680 		cur_gfn++;
1681 	}
1682 
1683 	return 0;
1684 }
1685 
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1686 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1687 					      unsigned long cur_gfn)
1688 {
1689 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1690 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1691 	unsigned long ofs = cur_gfn - ms->base_gfn;
1692 
1693 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1694 		slotidx--;
1695 		/* If we are above the highest slot, wrap around */
1696 		if (slotidx < 0)
1697 			slotidx = slots->used_slots - 1;
1698 
1699 		ms = slots->memslots + slotidx;
1700 		ofs = 0;
1701 	}
1702 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1703 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1704 		slotidx--;
1705 		ms = slots->memslots + slotidx;
1706 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1707 	}
1708 	return ms->base_gfn + ofs;
1709 }
1710 
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1711 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1712 			     u8 *res, unsigned long bufsize)
1713 {
1714 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1715 	struct kvm_memslots *slots = kvm_memslots(kvm);
1716 	struct kvm_memory_slot *ms;
1717 
1718 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1719 	ms = gfn_to_memslot(kvm, cur_gfn);
1720 	args->count = 0;
1721 	args->start_gfn = cur_gfn;
1722 	if (!ms)
1723 		return 0;
1724 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1725 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1726 
1727 	while (args->count < bufsize) {
1728 		hva = gfn_to_hva(kvm, cur_gfn);
1729 		if (kvm_is_error_hva(hva))
1730 			return 0;
1731 		/* Decrement only if we actually flipped the bit to 0 */
1732 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1733 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
1734 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1735 			pgstev = 0;
1736 		/* Save the value */
1737 		res[args->count++] = (pgstev >> 24) & 0x43;
1738 		/* If the next bit is too far away, stop. */
1739 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1740 			return 0;
1741 		/* If we reached the previous "next", find the next one */
1742 		if (cur_gfn == next_gfn)
1743 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1744 		/* Reached the end of memory or of the buffer, stop */
1745 		if ((next_gfn >= mem_end) ||
1746 		    (next_gfn - args->start_gfn >= bufsize))
1747 			return 0;
1748 		cur_gfn++;
1749 		/* Reached the end of the current memslot, take the next one. */
1750 		if (cur_gfn - ms->base_gfn >= ms->npages) {
1751 			ms = gfn_to_memslot(kvm, cur_gfn);
1752 			if (!ms)
1753 				return 0;
1754 		}
1755 	}
1756 	return 0;
1757 }
1758 
1759 /*
1760  * This function searches for the next page with dirty CMMA attributes, and
1761  * saves the attributes in the buffer up to either the end of the buffer or
1762  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1763  * no trailing clean bytes are saved.
1764  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1765  * output buffer will indicate 0 as length.
1766  */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)1767 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1768 				  struct kvm_s390_cmma_log *args)
1769 {
1770 	unsigned long bufsize;
1771 	int srcu_idx, peek, ret;
1772 	u8 *values;
1773 
1774 	if (!kvm->arch.use_cmma)
1775 		return -ENXIO;
1776 	/* Invalid/unsupported flags were specified */
1777 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1778 		return -EINVAL;
1779 	/* Migration mode query, and we are not doing a migration */
1780 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1781 	if (!peek && !kvm->arch.migration_mode)
1782 		return -EINVAL;
1783 	/* CMMA is disabled or was not used, or the buffer has length zero */
1784 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1785 	if (!bufsize || !kvm->mm->context.uses_cmm) {
1786 		memset(args, 0, sizeof(*args));
1787 		return 0;
1788 	}
1789 	/* We are not peeking, and there are no dirty pages */
1790 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1791 		memset(args, 0, sizeof(*args));
1792 		return 0;
1793 	}
1794 
1795 	values = vmalloc(bufsize);
1796 	if (!values)
1797 		return -ENOMEM;
1798 
1799 	down_read(&kvm->mm->mmap_sem);
1800 	srcu_idx = srcu_read_lock(&kvm->srcu);
1801 	if (peek)
1802 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1803 	else
1804 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1805 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1806 	up_read(&kvm->mm->mmap_sem);
1807 
1808 	if (kvm->arch.migration_mode)
1809 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1810 	else
1811 		args->remaining = 0;
1812 
1813 	if (copy_to_user((void __user *)args->values, values, args->count))
1814 		ret = -EFAULT;
1815 
1816 	vfree(values);
1817 	return ret;
1818 }
1819 
1820 /*
1821  * This function sets the CMMA attributes for the given pages. If the input
1822  * buffer has zero length, no action is taken, otherwise the attributes are
1823  * set and the mm->context.uses_cmm flag is set.
1824  */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)1825 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1826 				  const struct kvm_s390_cmma_log *args)
1827 {
1828 	unsigned long hva, mask, pgstev, i;
1829 	uint8_t *bits;
1830 	int srcu_idx, r = 0;
1831 
1832 	mask = args->mask;
1833 
1834 	if (!kvm->arch.use_cmma)
1835 		return -ENXIO;
1836 	/* invalid/unsupported flags */
1837 	if (args->flags != 0)
1838 		return -EINVAL;
1839 	/* Enforce sane limit on memory allocation */
1840 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1841 		return -EINVAL;
1842 	/* Nothing to do */
1843 	if (args->count == 0)
1844 		return 0;
1845 
1846 	bits = vmalloc(array_size(sizeof(*bits), args->count));
1847 	if (!bits)
1848 		return -ENOMEM;
1849 
1850 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1851 	if (r) {
1852 		r = -EFAULT;
1853 		goto out;
1854 	}
1855 
1856 	down_read(&kvm->mm->mmap_sem);
1857 	srcu_idx = srcu_read_lock(&kvm->srcu);
1858 	for (i = 0; i < args->count; i++) {
1859 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1860 		if (kvm_is_error_hva(hva)) {
1861 			r = -EFAULT;
1862 			break;
1863 		}
1864 
1865 		pgstev = bits[i];
1866 		pgstev = pgstev << 24;
1867 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1868 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1869 	}
1870 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1871 	up_read(&kvm->mm->mmap_sem);
1872 
1873 	if (!kvm->mm->context.uses_cmm) {
1874 		down_write(&kvm->mm->mmap_sem);
1875 		kvm->mm->context.uses_cmm = 1;
1876 		up_write(&kvm->mm->mmap_sem);
1877 	}
1878 out:
1879 	vfree(bits);
1880 	return r;
1881 }
1882 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)1883 long kvm_arch_vm_ioctl(struct file *filp,
1884 		       unsigned int ioctl, unsigned long arg)
1885 {
1886 	struct kvm *kvm = filp->private_data;
1887 	void __user *argp = (void __user *)arg;
1888 	struct kvm_device_attr attr;
1889 	int r;
1890 
1891 	switch (ioctl) {
1892 	case KVM_S390_INTERRUPT: {
1893 		struct kvm_s390_interrupt s390int;
1894 
1895 		r = -EFAULT;
1896 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1897 			break;
1898 		r = kvm_s390_inject_vm(kvm, &s390int);
1899 		break;
1900 	}
1901 	case KVM_ENABLE_CAP: {
1902 		struct kvm_enable_cap cap;
1903 		r = -EFAULT;
1904 		if (copy_from_user(&cap, argp, sizeof(cap)))
1905 			break;
1906 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1907 		break;
1908 	}
1909 	case KVM_CREATE_IRQCHIP: {
1910 		struct kvm_irq_routing_entry routing;
1911 
1912 		r = -EINVAL;
1913 		if (kvm->arch.use_irqchip) {
1914 			/* Set up dummy routing. */
1915 			memset(&routing, 0, sizeof(routing));
1916 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1917 		}
1918 		break;
1919 	}
1920 	case KVM_SET_DEVICE_ATTR: {
1921 		r = -EFAULT;
1922 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1923 			break;
1924 		r = kvm_s390_vm_set_attr(kvm, &attr);
1925 		break;
1926 	}
1927 	case KVM_GET_DEVICE_ATTR: {
1928 		r = -EFAULT;
1929 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1930 			break;
1931 		r = kvm_s390_vm_get_attr(kvm, &attr);
1932 		break;
1933 	}
1934 	case KVM_HAS_DEVICE_ATTR: {
1935 		r = -EFAULT;
1936 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1937 			break;
1938 		r = kvm_s390_vm_has_attr(kvm, &attr);
1939 		break;
1940 	}
1941 	case KVM_S390_GET_SKEYS: {
1942 		struct kvm_s390_skeys args;
1943 
1944 		r = -EFAULT;
1945 		if (copy_from_user(&args, argp,
1946 				   sizeof(struct kvm_s390_skeys)))
1947 			break;
1948 		r = kvm_s390_get_skeys(kvm, &args);
1949 		break;
1950 	}
1951 	case KVM_S390_SET_SKEYS: {
1952 		struct kvm_s390_skeys args;
1953 
1954 		r = -EFAULT;
1955 		if (copy_from_user(&args, argp,
1956 				   sizeof(struct kvm_s390_skeys)))
1957 			break;
1958 		r = kvm_s390_set_skeys(kvm, &args);
1959 		break;
1960 	}
1961 	case KVM_S390_GET_CMMA_BITS: {
1962 		struct kvm_s390_cmma_log args;
1963 
1964 		r = -EFAULT;
1965 		if (copy_from_user(&args, argp, sizeof(args)))
1966 			break;
1967 		mutex_lock(&kvm->slots_lock);
1968 		r = kvm_s390_get_cmma_bits(kvm, &args);
1969 		mutex_unlock(&kvm->slots_lock);
1970 		if (!r) {
1971 			r = copy_to_user(argp, &args, sizeof(args));
1972 			if (r)
1973 				r = -EFAULT;
1974 		}
1975 		break;
1976 	}
1977 	case KVM_S390_SET_CMMA_BITS: {
1978 		struct kvm_s390_cmma_log args;
1979 
1980 		r = -EFAULT;
1981 		if (copy_from_user(&args, argp, sizeof(args)))
1982 			break;
1983 		mutex_lock(&kvm->slots_lock);
1984 		r = kvm_s390_set_cmma_bits(kvm, &args);
1985 		mutex_unlock(&kvm->slots_lock);
1986 		break;
1987 	}
1988 	default:
1989 		r = -ENOTTY;
1990 	}
1991 
1992 	return r;
1993 }
1994 
kvm_s390_query_ap_config(u8 * config)1995 static int kvm_s390_query_ap_config(u8 *config)
1996 {
1997 	u32 fcn_code = 0x04000000UL;
1998 	u32 cc = 0;
1999 
2000 	memset(config, 0, 128);
2001 	asm volatile(
2002 		"lgr 0,%1\n"
2003 		"lgr 2,%2\n"
2004 		".long 0xb2af0000\n"		/* PQAP(QCI) */
2005 		"0: ipm %0\n"
2006 		"srl %0,28\n"
2007 		"1:\n"
2008 		EX_TABLE(0b, 1b)
2009 		: "+r" (cc)
2010 		: "r" (fcn_code), "r" (config)
2011 		: "cc", "0", "2", "memory"
2012 	);
2013 
2014 	return cc;
2015 }
2016 
kvm_s390_apxa_installed(void)2017 static int kvm_s390_apxa_installed(void)
2018 {
2019 	u8 config[128];
2020 	int cc;
2021 
2022 	if (test_facility(12)) {
2023 		cc = kvm_s390_query_ap_config(config);
2024 
2025 		if (cc)
2026 			pr_err("PQAP(QCI) failed with cc=%d", cc);
2027 		else
2028 			return config[0] & 0x40;
2029 	}
2030 
2031 	return 0;
2032 }
2033 
kvm_s390_set_crycb_format(struct kvm * kvm)2034 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2035 {
2036 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2037 
2038 	if (kvm_s390_apxa_installed())
2039 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2040 	else
2041 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2042 }
2043 
kvm_s390_get_initial_cpuid(void)2044 static u64 kvm_s390_get_initial_cpuid(void)
2045 {
2046 	struct cpuid cpuid;
2047 
2048 	get_cpu_id(&cpuid);
2049 	cpuid.version = 0xff;
2050 	return *((u64 *) &cpuid);
2051 }
2052 
kvm_s390_crypto_init(struct kvm * kvm)2053 static void kvm_s390_crypto_init(struct kvm *kvm)
2054 {
2055 	if (!test_kvm_facility(kvm, 76))
2056 		return;
2057 
2058 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2059 	kvm_s390_set_crycb_format(kvm);
2060 
2061 	/* Enable AES/DEA protected key functions by default */
2062 	kvm->arch.crypto.aes_kw = 1;
2063 	kvm->arch.crypto.dea_kw = 1;
2064 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2065 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2066 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2067 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2068 }
2069 
sca_dispose(struct kvm * kvm)2070 static void sca_dispose(struct kvm *kvm)
2071 {
2072 	if (kvm->arch.use_esca)
2073 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2074 	else
2075 		free_page((unsigned long)(kvm->arch.sca));
2076 	kvm->arch.sca = NULL;
2077 }
2078 
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2079 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2080 {
2081 	gfp_t alloc_flags = GFP_KERNEL;
2082 	int i, rc;
2083 	char debug_name[16];
2084 	static unsigned long sca_offset;
2085 
2086 	rc = -EINVAL;
2087 #ifdef CONFIG_KVM_S390_UCONTROL
2088 	if (type & ~KVM_VM_S390_UCONTROL)
2089 		goto out_err;
2090 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2091 		goto out_err;
2092 #else
2093 	if (type)
2094 		goto out_err;
2095 #endif
2096 
2097 	rc = s390_enable_sie();
2098 	if (rc)
2099 		goto out_err;
2100 
2101 	rc = -ENOMEM;
2102 
2103 	if (!sclp.has_64bscao)
2104 		alloc_flags |= GFP_DMA;
2105 	rwlock_init(&kvm->arch.sca_lock);
2106 	/* start with basic SCA */
2107 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2108 	if (!kvm->arch.sca)
2109 		goto out_err;
2110 	spin_lock(&kvm_lock);
2111 	sca_offset += 16;
2112 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2113 		sca_offset = 0;
2114 	kvm->arch.sca = (struct bsca_block *)
2115 			((char *) kvm->arch.sca + sca_offset);
2116 	spin_unlock(&kvm_lock);
2117 
2118 	sprintf(debug_name, "kvm-%u", current->pid);
2119 
2120 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2121 	if (!kvm->arch.dbf)
2122 		goto out_err;
2123 
2124 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2125 	kvm->arch.sie_page2 =
2126 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2127 	if (!kvm->arch.sie_page2)
2128 		goto out_err;
2129 
2130 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2131 
2132 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2133 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2134 					      (kvm_s390_fac_base[i] |
2135 					       kvm_s390_fac_ext[i]);
2136 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2137 					      kvm_s390_fac_base[i];
2138 	}
2139 
2140 	/* we are always in czam mode - even on pre z14 machines */
2141 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2142 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2143 	/* we emulate STHYI in kvm */
2144 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2145 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2146 	if (MACHINE_HAS_TLB_GUEST) {
2147 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2148 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2149 	}
2150 
2151 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2152 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2153 
2154 	kvm_s390_crypto_init(kvm);
2155 
2156 	mutex_init(&kvm->arch.float_int.ais_lock);
2157 	spin_lock_init(&kvm->arch.float_int.lock);
2158 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2159 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2160 	init_waitqueue_head(&kvm->arch.ipte_wq);
2161 	mutex_init(&kvm->arch.ipte_mutex);
2162 
2163 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2164 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2165 
2166 	if (type & KVM_VM_S390_UCONTROL) {
2167 		kvm->arch.gmap = NULL;
2168 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2169 	} else {
2170 		if (sclp.hamax == U64_MAX)
2171 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2172 		else
2173 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2174 						    sclp.hamax + 1);
2175 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2176 		if (!kvm->arch.gmap)
2177 			goto out_err;
2178 		kvm->arch.gmap->private = kvm;
2179 		kvm->arch.gmap->pfault_enabled = 0;
2180 	}
2181 
2182 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2183 	kvm->arch.use_skf = sclp.has_skey;
2184 	spin_lock_init(&kvm->arch.start_stop_lock);
2185 	kvm_s390_vsie_init(kvm);
2186 	kvm_s390_gisa_init(kvm);
2187 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2188 
2189 	return 0;
2190 out_err:
2191 	free_page((unsigned long)kvm->arch.sie_page2);
2192 	debug_unregister(kvm->arch.dbf);
2193 	sca_dispose(kvm);
2194 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2195 	return rc;
2196 }
2197 
kvm_arch_has_vcpu_debugfs(void)2198 bool kvm_arch_has_vcpu_debugfs(void)
2199 {
2200 	return false;
2201 }
2202 
kvm_arch_create_vcpu_debugfs(struct kvm_vcpu * vcpu)2203 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2204 {
2205 	return 0;
2206 }
2207 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2208 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2209 {
2210 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2211 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2212 	kvm_s390_clear_local_irqs(vcpu);
2213 	kvm_clear_async_pf_completion_queue(vcpu);
2214 	if (!kvm_is_ucontrol(vcpu->kvm))
2215 		sca_del_vcpu(vcpu);
2216 
2217 	if (kvm_is_ucontrol(vcpu->kvm))
2218 		gmap_remove(vcpu->arch.gmap);
2219 
2220 	if (vcpu->kvm->arch.use_cmma)
2221 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2222 	free_page((unsigned long)(vcpu->arch.sie_block));
2223 
2224 	kvm_vcpu_uninit(vcpu);
2225 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2226 }
2227 
kvm_free_vcpus(struct kvm * kvm)2228 static void kvm_free_vcpus(struct kvm *kvm)
2229 {
2230 	unsigned int i;
2231 	struct kvm_vcpu *vcpu;
2232 
2233 	kvm_for_each_vcpu(i, vcpu, kvm)
2234 		kvm_arch_vcpu_destroy(vcpu);
2235 
2236 	mutex_lock(&kvm->lock);
2237 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2238 		kvm->vcpus[i] = NULL;
2239 
2240 	atomic_set(&kvm->online_vcpus, 0);
2241 	mutex_unlock(&kvm->lock);
2242 }
2243 
kvm_arch_destroy_vm(struct kvm * kvm)2244 void kvm_arch_destroy_vm(struct kvm *kvm)
2245 {
2246 	kvm_free_vcpus(kvm);
2247 	sca_dispose(kvm);
2248 	debug_unregister(kvm->arch.dbf);
2249 	kvm_s390_gisa_destroy(kvm);
2250 	free_page((unsigned long)kvm->arch.sie_page2);
2251 	if (!kvm_is_ucontrol(kvm))
2252 		gmap_remove(kvm->arch.gmap);
2253 	kvm_s390_destroy_adapters(kvm);
2254 	kvm_s390_clear_float_irqs(kvm);
2255 	kvm_s390_vsie_destroy(kvm);
2256 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2257 }
2258 
2259 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2260 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2261 {
2262 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2263 	if (!vcpu->arch.gmap)
2264 		return -ENOMEM;
2265 	vcpu->arch.gmap->private = vcpu->kvm;
2266 
2267 	return 0;
2268 }
2269 
sca_del_vcpu(struct kvm_vcpu * vcpu)2270 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2271 {
2272 	if (!kvm_s390_use_sca_entries())
2273 		return;
2274 	read_lock(&vcpu->kvm->arch.sca_lock);
2275 	if (vcpu->kvm->arch.use_esca) {
2276 		struct esca_block *sca = vcpu->kvm->arch.sca;
2277 
2278 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2279 		sca->cpu[vcpu->vcpu_id].sda = 0;
2280 	} else {
2281 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2282 
2283 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2284 		sca->cpu[vcpu->vcpu_id].sda = 0;
2285 	}
2286 	read_unlock(&vcpu->kvm->arch.sca_lock);
2287 }
2288 
sca_add_vcpu(struct kvm_vcpu * vcpu)2289 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2290 {
2291 	if (!kvm_s390_use_sca_entries()) {
2292 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2293 
2294 		/* we still need the basic sca for the ipte control */
2295 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2296 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2297 		return;
2298 	}
2299 	read_lock(&vcpu->kvm->arch.sca_lock);
2300 	if (vcpu->kvm->arch.use_esca) {
2301 		struct esca_block *sca = vcpu->kvm->arch.sca;
2302 
2303 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2304 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2305 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2306 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2307 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2308 	} else {
2309 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2310 
2311 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2312 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2313 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2314 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2315 	}
2316 	read_unlock(&vcpu->kvm->arch.sca_lock);
2317 }
2318 
2319 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2320 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2321 {
2322 	d->sda = s->sda;
2323 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2324 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2325 }
2326 
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2327 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2328 {
2329 	int i;
2330 
2331 	d->ipte_control = s->ipte_control;
2332 	d->mcn[0] = s->mcn;
2333 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2334 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2335 }
2336 
sca_switch_to_extended(struct kvm * kvm)2337 static int sca_switch_to_extended(struct kvm *kvm)
2338 {
2339 	struct bsca_block *old_sca = kvm->arch.sca;
2340 	struct esca_block *new_sca;
2341 	struct kvm_vcpu *vcpu;
2342 	unsigned int vcpu_idx;
2343 	u32 scaol, scaoh;
2344 
2345 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2346 	if (!new_sca)
2347 		return -ENOMEM;
2348 
2349 	scaoh = (u32)((u64)(new_sca) >> 32);
2350 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2351 
2352 	kvm_s390_vcpu_block_all(kvm);
2353 	write_lock(&kvm->arch.sca_lock);
2354 
2355 	sca_copy_b_to_e(new_sca, old_sca);
2356 
2357 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2358 		vcpu->arch.sie_block->scaoh = scaoh;
2359 		vcpu->arch.sie_block->scaol = scaol;
2360 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2361 	}
2362 	kvm->arch.sca = new_sca;
2363 	kvm->arch.use_esca = 1;
2364 
2365 	write_unlock(&kvm->arch.sca_lock);
2366 	kvm_s390_vcpu_unblock_all(kvm);
2367 
2368 	free_page((unsigned long)old_sca);
2369 
2370 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2371 		 old_sca, kvm->arch.sca);
2372 	return 0;
2373 }
2374 
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2375 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2376 {
2377 	int rc;
2378 
2379 	if (!kvm_s390_use_sca_entries()) {
2380 		if (id < KVM_MAX_VCPUS)
2381 			return true;
2382 		return false;
2383 	}
2384 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2385 		return true;
2386 	if (!sclp.has_esca || !sclp.has_64bscao)
2387 		return false;
2388 
2389 	mutex_lock(&kvm->lock);
2390 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2391 	mutex_unlock(&kvm->lock);
2392 
2393 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2394 }
2395 
kvm_arch_vcpu_init(struct kvm_vcpu * vcpu)2396 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2397 {
2398 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2399 	kvm_clear_async_pf_completion_queue(vcpu);
2400 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2401 				    KVM_SYNC_GPRS |
2402 				    KVM_SYNC_ACRS |
2403 				    KVM_SYNC_CRS |
2404 				    KVM_SYNC_ARCH0 |
2405 				    KVM_SYNC_PFAULT;
2406 	kvm_s390_set_prefix(vcpu, 0);
2407 	if (test_kvm_facility(vcpu->kvm, 64))
2408 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2409 	if (test_kvm_facility(vcpu->kvm, 82))
2410 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2411 	if (test_kvm_facility(vcpu->kvm, 133))
2412 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2413 	if (test_kvm_facility(vcpu->kvm, 156))
2414 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2415 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2416 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2417 	 */
2418 	if (MACHINE_HAS_VX)
2419 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2420 	else
2421 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2422 
2423 	if (kvm_is_ucontrol(vcpu->kvm))
2424 		return __kvm_ucontrol_vcpu_init(vcpu);
2425 
2426 	return 0;
2427 }
2428 
2429 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2430 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2431 {
2432 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2433 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2434 	vcpu->arch.cputm_start = get_tod_clock_fast();
2435 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2436 }
2437 
2438 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2439 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2440 {
2441 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2442 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2443 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2444 	vcpu->arch.cputm_start = 0;
2445 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2446 }
2447 
2448 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2449 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2450 {
2451 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2452 	vcpu->arch.cputm_enabled = true;
2453 	__start_cpu_timer_accounting(vcpu);
2454 }
2455 
2456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2457 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2458 {
2459 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2460 	__stop_cpu_timer_accounting(vcpu);
2461 	vcpu->arch.cputm_enabled = false;
2462 }
2463 
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2464 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2465 {
2466 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2467 	__enable_cpu_timer_accounting(vcpu);
2468 	preempt_enable();
2469 }
2470 
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2471 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2472 {
2473 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2474 	__disable_cpu_timer_accounting(vcpu);
2475 	preempt_enable();
2476 }
2477 
2478 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)2479 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2480 {
2481 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2482 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2483 	if (vcpu->arch.cputm_enabled)
2484 		vcpu->arch.cputm_start = get_tod_clock_fast();
2485 	vcpu->arch.sie_block->cputm = cputm;
2486 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2487 	preempt_enable();
2488 }
2489 
2490 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)2491 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2492 {
2493 	unsigned int seq;
2494 	__u64 value;
2495 
2496 	if (unlikely(!vcpu->arch.cputm_enabled))
2497 		return vcpu->arch.sie_block->cputm;
2498 
2499 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2500 	do {
2501 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2502 		/*
2503 		 * If the writer would ever execute a read in the critical
2504 		 * section, e.g. in irq context, we have a deadlock.
2505 		 */
2506 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2507 		value = vcpu->arch.sie_block->cputm;
2508 		/* if cputm_start is 0, accounting is being started/stopped */
2509 		if (likely(vcpu->arch.cputm_start))
2510 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2511 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2512 	preempt_enable();
2513 	return value;
2514 }
2515 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)2516 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2517 {
2518 
2519 	gmap_enable(vcpu->arch.enabled_gmap);
2520 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2521 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2522 		__start_cpu_timer_accounting(vcpu);
2523 	vcpu->cpu = cpu;
2524 }
2525 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)2526 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2527 {
2528 	vcpu->cpu = -1;
2529 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2530 		__stop_cpu_timer_accounting(vcpu);
2531 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2532 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2533 	gmap_disable(vcpu->arch.enabled_gmap);
2534 
2535 }
2536 
kvm_s390_vcpu_initial_reset(struct kvm_vcpu * vcpu)2537 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2538 {
2539 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2540 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2541 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2542 	kvm_s390_set_prefix(vcpu, 0);
2543 	kvm_s390_set_cpu_timer(vcpu, 0);
2544 	vcpu->arch.sie_block->ckc       = 0UL;
2545 	vcpu->arch.sie_block->todpr     = 0;
2546 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2547 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2548 					CR0_INTERRUPT_KEY_SUBMASK |
2549 					CR0_MEASUREMENT_ALERT_SUBMASK;
2550 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2551 					CR14_UNUSED_33 |
2552 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2553 	/* make sure the new fpc will be lazily loaded */
2554 	save_fpu_regs();
2555 	current->thread.fpu.fpc = 0;
2556 	vcpu->arch.sie_block->gbea = 1;
2557 	vcpu->arch.sie_block->pp = 0;
2558 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2559 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2560 	kvm_clear_async_pf_completion_queue(vcpu);
2561 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2562 		kvm_s390_vcpu_stop(vcpu);
2563 	kvm_s390_clear_local_irqs(vcpu);
2564 }
2565 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)2566 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2567 {
2568 	mutex_lock(&vcpu->kvm->lock);
2569 	preempt_disable();
2570 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2571 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2572 	preempt_enable();
2573 	mutex_unlock(&vcpu->kvm->lock);
2574 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2575 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2576 		sca_add_vcpu(vcpu);
2577 	}
2578 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2579 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2580 	/* make vcpu_load load the right gmap on the first trigger */
2581 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2582 }
2583 
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)2584 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2585 {
2586 	if (!test_kvm_facility(vcpu->kvm, 76))
2587 		return;
2588 
2589 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2590 
2591 	if (vcpu->kvm->arch.crypto.aes_kw)
2592 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2593 	if (vcpu->kvm->arch.crypto.dea_kw)
2594 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2595 
2596 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2597 }
2598 
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)2599 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2600 {
2601 	free_page(vcpu->arch.sie_block->cbrlo);
2602 	vcpu->arch.sie_block->cbrlo = 0;
2603 }
2604 
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)2605 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2606 {
2607 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2608 	if (!vcpu->arch.sie_block->cbrlo)
2609 		return -ENOMEM;
2610 	return 0;
2611 }
2612 
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)2613 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2614 {
2615 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2616 
2617 	vcpu->arch.sie_block->ibc = model->ibc;
2618 	if (test_kvm_facility(vcpu->kvm, 7))
2619 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2620 }
2621 
kvm_arch_vcpu_setup(struct kvm_vcpu * vcpu)2622 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2623 {
2624 	int rc = 0;
2625 
2626 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2627 						    CPUSTAT_SM |
2628 						    CPUSTAT_STOPPED);
2629 
2630 	if (test_kvm_facility(vcpu->kvm, 78))
2631 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2632 	else if (test_kvm_facility(vcpu->kvm, 8))
2633 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2634 
2635 	kvm_s390_vcpu_setup_model(vcpu);
2636 
2637 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2638 	if (MACHINE_HAS_ESOP)
2639 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2640 	if (test_kvm_facility(vcpu->kvm, 9))
2641 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2642 	if (test_kvm_facility(vcpu->kvm, 73))
2643 		vcpu->arch.sie_block->ecb |= ECB_TE;
2644 
2645 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2646 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2647 	if (test_kvm_facility(vcpu->kvm, 130))
2648 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2649 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2650 	if (sclp.has_cei)
2651 		vcpu->arch.sie_block->eca |= ECA_CEI;
2652 	if (sclp.has_ib)
2653 		vcpu->arch.sie_block->eca |= ECA_IB;
2654 	if (sclp.has_siif)
2655 		vcpu->arch.sie_block->eca |= ECA_SII;
2656 	if (sclp.has_sigpif)
2657 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2658 	if (test_kvm_facility(vcpu->kvm, 129)) {
2659 		vcpu->arch.sie_block->eca |= ECA_VX;
2660 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2661 	}
2662 	if (test_kvm_facility(vcpu->kvm, 139))
2663 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2664 	if (test_kvm_facility(vcpu->kvm, 156))
2665 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2666 	if (vcpu->arch.sie_block->gd) {
2667 		vcpu->arch.sie_block->eca |= ECA_AIV;
2668 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2669 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2670 	}
2671 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2672 					| SDNXC;
2673 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2674 
2675 	if (sclp.has_kss)
2676 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2677 	else
2678 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2679 
2680 	if (vcpu->kvm->arch.use_cmma) {
2681 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2682 		if (rc)
2683 			return rc;
2684 	}
2685 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2686 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2687 
2688 	kvm_s390_vcpu_crypto_setup(vcpu);
2689 
2690 	return rc;
2691 }
2692 
kvm_arch_vcpu_create(struct kvm * kvm,unsigned int id)2693 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2694 				      unsigned int id)
2695 {
2696 	struct kvm_vcpu *vcpu;
2697 	struct sie_page *sie_page;
2698 	int rc = -EINVAL;
2699 
2700 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2701 		goto out;
2702 
2703 	rc = -ENOMEM;
2704 
2705 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2706 	if (!vcpu)
2707 		goto out;
2708 
2709 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2710 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2711 	if (!sie_page)
2712 		goto out_free_cpu;
2713 
2714 	vcpu->arch.sie_block = &sie_page->sie_block;
2715 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2716 
2717 	/* the real guest size will always be smaller than msl */
2718 	vcpu->arch.sie_block->mso = 0;
2719 	vcpu->arch.sie_block->msl = sclp.hamax;
2720 
2721 	vcpu->arch.sie_block->icpua = id;
2722 	spin_lock_init(&vcpu->arch.local_int.lock);
2723 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2724 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2725 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2726 	seqcount_init(&vcpu->arch.cputm_seqcount);
2727 
2728 	rc = kvm_vcpu_init(vcpu, kvm, id);
2729 	if (rc)
2730 		goto out_free_sie_block;
2731 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2732 		 vcpu->arch.sie_block);
2733 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2734 
2735 	return vcpu;
2736 out_free_sie_block:
2737 	free_page((unsigned long)(vcpu->arch.sie_block));
2738 out_free_cpu:
2739 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2740 out:
2741 	return ERR_PTR(rc);
2742 }
2743 
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)2744 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2745 {
2746 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2747 }
2748 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)2749 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2750 {
2751 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2752 }
2753 
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)2754 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2755 {
2756 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2757 	exit_sie(vcpu);
2758 }
2759 
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)2760 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2761 {
2762 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2763 }
2764 
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)2765 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2766 {
2767 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2768 	exit_sie(vcpu);
2769 }
2770 
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)2771 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2772 {
2773 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2774 }
2775 
2776 /*
2777  * Kick a guest cpu out of SIE and wait until SIE is not running.
2778  * If the CPU is not running (e.g. waiting as idle) the function will
2779  * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)2780 void exit_sie(struct kvm_vcpu *vcpu)
2781 {
2782 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2783 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2784 		cpu_relax();
2785 }
2786 
2787 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)2788 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2789 {
2790 	kvm_make_request(req, vcpu);
2791 	kvm_s390_vcpu_request(vcpu);
2792 }
2793 
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)2794 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2795 			      unsigned long end)
2796 {
2797 	struct kvm *kvm = gmap->private;
2798 	struct kvm_vcpu *vcpu;
2799 	unsigned long prefix;
2800 	int i;
2801 
2802 	if (gmap_is_shadow(gmap))
2803 		return;
2804 	if (start >= 1UL << 31)
2805 		/* We are only interested in prefix pages */
2806 		return;
2807 	kvm_for_each_vcpu(i, vcpu, kvm) {
2808 		/* match against both prefix pages */
2809 		prefix = kvm_s390_get_prefix(vcpu);
2810 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2811 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2812 				   start, end);
2813 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2814 		}
2815 	}
2816 }
2817 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)2818 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2819 {
2820 	/* kvm common code refers to this, but never calls it */
2821 	BUG();
2822 	return 0;
2823 }
2824 
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)2825 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2826 					   struct kvm_one_reg *reg)
2827 {
2828 	int r = -EINVAL;
2829 
2830 	switch (reg->id) {
2831 	case KVM_REG_S390_TODPR:
2832 		r = put_user(vcpu->arch.sie_block->todpr,
2833 			     (u32 __user *)reg->addr);
2834 		break;
2835 	case KVM_REG_S390_EPOCHDIFF:
2836 		r = put_user(vcpu->arch.sie_block->epoch,
2837 			     (u64 __user *)reg->addr);
2838 		break;
2839 	case KVM_REG_S390_CPU_TIMER:
2840 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2841 			     (u64 __user *)reg->addr);
2842 		break;
2843 	case KVM_REG_S390_CLOCK_COMP:
2844 		r = put_user(vcpu->arch.sie_block->ckc,
2845 			     (u64 __user *)reg->addr);
2846 		break;
2847 	case KVM_REG_S390_PFTOKEN:
2848 		r = put_user(vcpu->arch.pfault_token,
2849 			     (u64 __user *)reg->addr);
2850 		break;
2851 	case KVM_REG_S390_PFCOMPARE:
2852 		r = put_user(vcpu->arch.pfault_compare,
2853 			     (u64 __user *)reg->addr);
2854 		break;
2855 	case KVM_REG_S390_PFSELECT:
2856 		r = put_user(vcpu->arch.pfault_select,
2857 			     (u64 __user *)reg->addr);
2858 		break;
2859 	case KVM_REG_S390_PP:
2860 		r = put_user(vcpu->arch.sie_block->pp,
2861 			     (u64 __user *)reg->addr);
2862 		break;
2863 	case KVM_REG_S390_GBEA:
2864 		r = put_user(vcpu->arch.sie_block->gbea,
2865 			     (u64 __user *)reg->addr);
2866 		break;
2867 	default:
2868 		break;
2869 	}
2870 
2871 	return r;
2872 }
2873 
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)2874 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2875 					   struct kvm_one_reg *reg)
2876 {
2877 	int r = -EINVAL;
2878 	__u64 val;
2879 
2880 	switch (reg->id) {
2881 	case KVM_REG_S390_TODPR:
2882 		r = get_user(vcpu->arch.sie_block->todpr,
2883 			     (u32 __user *)reg->addr);
2884 		break;
2885 	case KVM_REG_S390_EPOCHDIFF:
2886 		r = get_user(vcpu->arch.sie_block->epoch,
2887 			     (u64 __user *)reg->addr);
2888 		break;
2889 	case KVM_REG_S390_CPU_TIMER:
2890 		r = get_user(val, (u64 __user *)reg->addr);
2891 		if (!r)
2892 			kvm_s390_set_cpu_timer(vcpu, val);
2893 		break;
2894 	case KVM_REG_S390_CLOCK_COMP:
2895 		r = get_user(vcpu->arch.sie_block->ckc,
2896 			     (u64 __user *)reg->addr);
2897 		break;
2898 	case KVM_REG_S390_PFTOKEN:
2899 		r = get_user(vcpu->arch.pfault_token,
2900 			     (u64 __user *)reg->addr);
2901 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2902 			kvm_clear_async_pf_completion_queue(vcpu);
2903 		break;
2904 	case KVM_REG_S390_PFCOMPARE:
2905 		r = get_user(vcpu->arch.pfault_compare,
2906 			     (u64 __user *)reg->addr);
2907 		break;
2908 	case KVM_REG_S390_PFSELECT:
2909 		r = get_user(vcpu->arch.pfault_select,
2910 			     (u64 __user *)reg->addr);
2911 		break;
2912 	case KVM_REG_S390_PP:
2913 		r = get_user(vcpu->arch.sie_block->pp,
2914 			     (u64 __user *)reg->addr);
2915 		break;
2916 	case KVM_REG_S390_GBEA:
2917 		r = get_user(vcpu->arch.sie_block->gbea,
2918 			     (u64 __user *)reg->addr);
2919 		break;
2920 	default:
2921 		break;
2922 	}
2923 
2924 	return r;
2925 }
2926 
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)2927 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2928 {
2929 	kvm_s390_vcpu_initial_reset(vcpu);
2930 	return 0;
2931 }
2932 
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)2933 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2934 {
2935 	vcpu_load(vcpu);
2936 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2937 	vcpu_put(vcpu);
2938 	return 0;
2939 }
2940 
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)2941 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2942 {
2943 	vcpu_load(vcpu);
2944 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2945 	vcpu_put(vcpu);
2946 	return 0;
2947 }
2948 
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)2949 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2950 				  struct kvm_sregs *sregs)
2951 {
2952 	vcpu_load(vcpu);
2953 
2954 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2955 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2956 
2957 	vcpu_put(vcpu);
2958 	return 0;
2959 }
2960 
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)2961 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2962 				  struct kvm_sregs *sregs)
2963 {
2964 	vcpu_load(vcpu);
2965 
2966 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2967 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2968 
2969 	vcpu_put(vcpu);
2970 	return 0;
2971 }
2972 
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)2973 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2974 {
2975 	int ret = 0;
2976 
2977 	vcpu_load(vcpu);
2978 
2979 	if (test_fp_ctl(fpu->fpc)) {
2980 		ret = -EINVAL;
2981 		goto out;
2982 	}
2983 	vcpu->run->s.regs.fpc = fpu->fpc;
2984 	if (MACHINE_HAS_VX)
2985 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2986 				 (freg_t *) fpu->fprs);
2987 	else
2988 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2989 
2990 out:
2991 	vcpu_put(vcpu);
2992 	return ret;
2993 }
2994 
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)2995 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2996 {
2997 	vcpu_load(vcpu);
2998 
2999 	/* make sure we have the latest values */
3000 	save_fpu_regs();
3001 	if (MACHINE_HAS_VX)
3002 		convert_vx_to_fp((freg_t *) fpu->fprs,
3003 				 (__vector128 *) vcpu->run->s.regs.vrs);
3004 	else
3005 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3006 	fpu->fpc = vcpu->run->s.regs.fpc;
3007 
3008 	vcpu_put(vcpu);
3009 	return 0;
3010 }
3011 
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3012 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3013 {
3014 	int rc = 0;
3015 
3016 	if (!is_vcpu_stopped(vcpu))
3017 		rc = -EBUSY;
3018 	else {
3019 		vcpu->run->psw_mask = psw.mask;
3020 		vcpu->run->psw_addr = psw.addr;
3021 	}
3022 	return rc;
3023 }
3024 
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3025 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3026 				  struct kvm_translation *tr)
3027 {
3028 	return -EINVAL; /* not implemented yet */
3029 }
3030 
3031 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3032 			      KVM_GUESTDBG_USE_HW_BP | \
3033 			      KVM_GUESTDBG_ENABLE)
3034 
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3035 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3036 					struct kvm_guest_debug *dbg)
3037 {
3038 	int rc = 0;
3039 
3040 	vcpu_load(vcpu);
3041 
3042 	vcpu->guest_debug = 0;
3043 	kvm_s390_clear_bp_data(vcpu);
3044 
3045 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3046 		rc = -EINVAL;
3047 		goto out;
3048 	}
3049 	if (!sclp.has_gpere) {
3050 		rc = -EINVAL;
3051 		goto out;
3052 	}
3053 
3054 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3055 		vcpu->guest_debug = dbg->control;
3056 		/* enforce guest PER */
3057 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3058 
3059 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3060 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3061 	} else {
3062 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3063 		vcpu->arch.guestdbg.last_bp = 0;
3064 	}
3065 
3066 	if (rc) {
3067 		vcpu->guest_debug = 0;
3068 		kvm_s390_clear_bp_data(vcpu);
3069 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3070 	}
3071 
3072 out:
3073 	vcpu_put(vcpu);
3074 	return rc;
3075 }
3076 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3077 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3078 				    struct kvm_mp_state *mp_state)
3079 {
3080 	int ret;
3081 
3082 	vcpu_load(vcpu);
3083 
3084 	/* CHECK_STOP and LOAD are not supported yet */
3085 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3086 				      KVM_MP_STATE_OPERATING;
3087 
3088 	vcpu_put(vcpu);
3089 	return ret;
3090 }
3091 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3092 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3093 				    struct kvm_mp_state *mp_state)
3094 {
3095 	int rc = 0;
3096 
3097 	vcpu_load(vcpu);
3098 
3099 	/* user space knows about this interface - let it control the state */
3100 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3101 
3102 	switch (mp_state->mp_state) {
3103 	case KVM_MP_STATE_STOPPED:
3104 		kvm_s390_vcpu_stop(vcpu);
3105 		break;
3106 	case KVM_MP_STATE_OPERATING:
3107 		kvm_s390_vcpu_start(vcpu);
3108 		break;
3109 	case KVM_MP_STATE_LOAD:
3110 	case KVM_MP_STATE_CHECK_STOP:
3111 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3112 	default:
3113 		rc = -ENXIO;
3114 	}
3115 
3116 	vcpu_put(vcpu);
3117 	return rc;
3118 }
3119 
ibs_enabled(struct kvm_vcpu * vcpu)3120 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3121 {
3122 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3123 }
3124 
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3125 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3126 {
3127 retry:
3128 	kvm_s390_vcpu_request_handled(vcpu);
3129 	if (!kvm_request_pending(vcpu))
3130 		return 0;
3131 	/*
3132 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3133 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3134 	 * This ensures that the ipte instruction for this request has
3135 	 * already finished. We might race against a second unmapper that
3136 	 * wants to set the blocking bit. Lets just retry the request loop.
3137 	 */
3138 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3139 		int rc;
3140 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3141 					  kvm_s390_get_prefix(vcpu),
3142 					  PAGE_SIZE * 2, PROT_WRITE);
3143 		if (rc) {
3144 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3145 			return rc;
3146 		}
3147 		goto retry;
3148 	}
3149 
3150 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3151 		vcpu->arch.sie_block->ihcpu = 0xffff;
3152 		goto retry;
3153 	}
3154 
3155 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3156 		if (!ibs_enabled(vcpu)) {
3157 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3158 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3159 		}
3160 		goto retry;
3161 	}
3162 
3163 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3164 		if (ibs_enabled(vcpu)) {
3165 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3166 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3167 		}
3168 		goto retry;
3169 	}
3170 
3171 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3172 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3173 		goto retry;
3174 	}
3175 
3176 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3177 		/*
3178 		 * Disable CMM virtualization; we will emulate the ESSA
3179 		 * instruction manually, in order to provide additional
3180 		 * functionalities needed for live migration.
3181 		 */
3182 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3183 		goto retry;
3184 	}
3185 
3186 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3187 		/*
3188 		 * Re-enable CMM virtualization if CMMA is available and
3189 		 * CMM has been used.
3190 		 */
3191 		if ((vcpu->kvm->arch.use_cmma) &&
3192 		    (vcpu->kvm->mm->context.uses_cmm))
3193 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3194 		goto retry;
3195 	}
3196 
3197 	/* nothing to do, just clear the request */
3198 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3199 
3200 	return 0;
3201 }
3202 
kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3203 void kvm_s390_set_tod_clock(struct kvm *kvm,
3204 			    const struct kvm_s390_vm_tod_clock *gtod)
3205 {
3206 	struct kvm_vcpu *vcpu;
3207 	struct kvm_s390_tod_clock_ext htod;
3208 	int i;
3209 
3210 	mutex_lock(&kvm->lock);
3211 	preempt_disable();
3212 
3213 	get_tod_clock_ext((char *)&htod);
3214 
3215 	kvm->arch.epoch = gtod->tod - htod.tod;
3216 	kvm->arch.epdx = 0;
3217 	if (test_kvm_facility(kvm, 139)) {
3218 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3219 		if (kvm->arch.epoch > gtod->tod)
3220 			kvm->arch.epdx -= 1;
3221 	}
3222 
3223 	kvm_s390_vcpu_block_all(kvm);
3224 	kvm_for_each_vcpu(i, vcpu, kvm) {
3225 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3226 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3227 	}
3228 
3229 	kvm_s390_vcpu_unblock_all(kvm);
3230 	preempt_enable();
3231 	mutex_unlock(&kvm->lock);
3232 }
3233 
3234 /**
3235  * kvm_arch_fault_in_page - fault-in guest page if necessary
3236  * @vcpu: The corresponding virtual cpu
3237  * @gpa: Guest physical address
3238  * @writable: Whether the page should be writable or not
3239  *
3240  * Make sure that a guest page has been faulted-in on the host.
3241  *
3242  * Return: Zero on success, negative error code otherwise.
3243  */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3244 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3245 {
3246 	return gmap_fault(vcpu->arch.gmap, gpa,
3247 			  writable ? FAULT_FLAG_WRITE : 0);
3248 }
3249 
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3250 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3251 				      unsigned long token)
3252 {
3253 	struct kvm_s390_interrupt inti;
3254 	struct kvm_s390_irq irq;
3255 
3256 	if (start_token) {
3257 		irq.u.ext.ext_params2 = token;
3258 		irq.type = KVM_S390_INT_PFAULT_INIT;
3259 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3260 	} else {
3261 		inti.type = KVM_S390_INT_PFAULT_DONE;
3262 		inti.parm64 = token;
3263 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3264 	}
3265 }
3266 
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3267 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3268 				     struct kvm_async_pf *work)
3269 {
3270 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3271 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3272 }
3273 
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3274 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3275 				 struct kvm_async_pf *work)
3276 {
3277 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3278 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3279 }
3280 
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3281 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3282 			       struct kvm_async_pf *work)
3283 {
3284 	/* s390 will always inject the page directly */
3285 }
3286 
kvm_arch_can_inject_async_page_present(struct kvm_vcpu * vcpu)3287 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3288 {
3289 	/*
3290 	 * s390 will always inject the page directly,
3291 	 * but we still want check_async_completion to cleanup
3292 	 */
3293 	return true;
3294 }
3295 
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3296 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3297 {
3298 	hva_t hva;
3299 	struct kvm_arch_async_pf arch;
3300 	int rc;
3301 
3302 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3303 		return 0;
3304 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3305 	    vcpu->arch.pfault_compare)
3306 		return 0;
3307 	if (psw_extint_disabled(vcpu))
3308 		return 0;
3309 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3310 		return 0;
3311 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3312 		return 0;
3313 	if (!vcpu->arch.gmap->pfault_enabled)
3314 		return 0;
3315 
3316 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3317 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3318 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3319 		return 0;
3320 
3321 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3322 	return rc;
3323 }
3324 
vcpu_pre_run(struct kvm_vcpu * vcpu)3325 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3326 {
3327 	int rc, cpuflags;
3328 
3329 	/*
3330 	 * On s390 notifications for arriving pages will be delivered directly
3331 	 * to the guest but the house keeping for completed pfaults is
3332 	 * handled outside the worker.
3333 	 */
3334 	kvm_check_async_pf_completion(vcpu);
3335 
3336 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3337 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3338 
3339 	if (need_resched())
3340 		schedule();
3341 
3342 	if (test_cpu_flag(CIF_MCCK_PENDING))
3343 		s390_handle_mcck();
3344 
3345 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3346 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3347 		if (rc)
3348 			return rc;
3349 	}
3350 
3351 	rc = kvm_s390_handle_requests(vcpu);
3352 	if (rc)
3353 		return rc;
3354 
3355 	if (guestdbg_enabled(vcpu)) {
3356 		kvm_s390_backup_guest_per_regs(vcpu);
3357 		kvm_s390_patch_guest_per_regs(vcpu);
3358 	}
3359 
3360 	vcpu->arch.sie_block->icptcode = 0;
3361 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3362 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3363 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3364 
3365 	return 0;
3366 }
3367 
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)3368 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3369 {
3370 	struct kvm_s390_pgm_info pgm_info = {
3371 		.code = PGM_ADDRESSING,
3372 	};
3373 	u8 opcode, ilen;
3374 	int rc;
3375 
3376 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3377 	trace_kvm_s390_sie_fault(vcpu);
3378 
3379 	/*
3380 	 * We want to inject an addressing exception, which is defined as a
3381 	 * suppressing or terminating exception. However, since we came here
3382 	 * by a DAT access exception, the PSW still points to the faulting
3383 	 * instruction since DAT exceptions are nullifying. So we've got
3384 	 * to look up the current opcode to get the length of the instruction
3385 	 * to be able to forward the PSW.
3386 	 */
3387 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3388 	ilen = insn_length(opcode);
3389 	if (rc < 0) {
3390 		return rc;
3391 	} else if (rc) {
3392 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3393 		 * Forward by arbitrary ilc, injection will take care of
3394 		 * nullification if necessary.
3395 		 */
3396 		pgm_info = vcpu->arch.pgm;
3397 		ilen = 4;
3398 	}
3399 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3400 	kvm_s390_forward_psw(vcpu, ilen);
3401 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3402 }
3403 
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)3404 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3405 {
3406 	struct mcck_volatile_info *mcck_info;
3407 	struct sie_page *sie_page;
3408 
3409 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3410 		   vcpu->arch.sie_block->icptcode);
3411 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3412 
3413 	if (guestdbg_enabled(vcpu))
3414 		kvm_s390_restore_guest_per_regs(vcpu);
3415 
3416 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3417 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3418 
3419 	if (exit_reason == -EINTR) {
3420 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3421 		sie_page = container_of(vcpu->arch.sie_block,
3422 					struct sie_page, sie_block);
3423 		mcck_info = &sie_page->mcck_info;
3424 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3425 		return 0;
3426 	}
3427 
3428 	if (vcpu->arch.sie_block->icptcode > 0) {
3429 		int rc = kvm_handle_sie_intercept(vcpu);
3430 
3431 		if (rc != -EOPNOTSUPP)
3432 			return rc;
3433 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3434 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3435 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3436 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3437 		return -EREMOTE;
3438 	} else if (exit_reason != -EFAULT) {
3439 		vcpu->stat.exit_null++;
3440 		return 0;
3441 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3442 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3443 		vcpu->run->s390_ucontrol.trans_exc_code =
3444 						current->thread.gmap_addr;
3445 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3446 		return -EREMOTE;
3447 	} else if (current->thread.gmap_pfault) {
3448 		trace_kvm_s390_major_guest_pfault(vcpu);
3449 		current->thread.gmap_pfault = 0;
3450 		if (kvm_arch_setup_async_pf(vcpu))
3451 			return 0;
3452 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3453 	}
3454 	return vcpu_post_run_fault_in_sie(vcpu);
3455 }
3456 
__vcpu_run(struct kvm_vcpu * vcpu)3457 static int __vcpu_run(struct kvm_vcpu *vcpu)
3458 {
3459 	int rc, exit_reason;
3460 
3461 	/*
3462 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3463 	 * ning the guest), so that memslots (and other stuff) are protected
3464 	 */
3465 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3466 
3467 	do {
3468 		rc = vcpu_pre_run(vcpu);
3469 		if (rc)
3470 			break;
3471 
3472 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3473 		/*
3474 		 * As PF_VCPU will be used in fault handler, between
3475 		 * guest_enter and guest_exit should be no uaccess.
3476 		 */
3477 		local_irq_disable();
3478 		guest_enter_irqoff();
3479 		__disable_cpu_timer_accounting(vcpu);
3480 		local_irq_enable();
3481 		exit_reason = sie64a(vcpu->arch.sie_block,
3482 				     vcpu->run->s.regs.gprs);
3483 		local_irq_disable();
3484 		__enable_cpu_timer_accounting(vcpu);
3485 		guest_exit_irqoff();
3486 		local_irq_enable();
3487 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3488 
3489 		rc = vcpu_post_run(vcpu, exit_reason);
3490 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3491 
3492 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3493 	return rc;
3494 }
3495 
sync_regs(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3496 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3497 {
3498 	struct runtime_instr_cb *riccb;
3499 	struct gs_cb *gscb;
3500 
3501 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3502 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3503 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3504 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3505 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3506 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3507 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3508 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3509 		/* some control register changes require a tlb flush */
3510 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3511 	}
3512 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3513 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3514 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3515 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3516 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3517 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3518 	}
3519 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3520 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3521 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3522 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3523 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3524 			kvm_clear_async_pf_completion_queue(vcpu);
3525 	}
3526 	/*
3527 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3528 	 * we should enable RI here instead of doing the lazy enablement.
3529 	 */
3530 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3531 	    test_kvm_facility(vcpu->kvm, 64) &&
3532 	    riccb->v &&
3533 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3534 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3535 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3536 	}
3537 	/*
3538 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3539 	 * we should enable GS here instead of doing the lazy enablement.
3540 	 */
3541 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3542 	    test_kvm_facility(vcpu->kvm, 133) &&
3543 	    gscb->gssm &&
3544 	    !vcpu->arch.gs_enabled) {
3545 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3546 		vcpu->arch.sie_block->ecb |= ECB_GS;
3547 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3548 		vcpu->arch.gs_enabled = 1;
3549 	}
3550 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3551 	    test_kvm_facility(vcpu->kvm, 82)) {
3552 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3553 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3554 	}
3555 	save_access_regs(vcpu->arch.host_acrs);
3556 	restore_access_regs(vcpu->run->s.regs.acrs);
3557 	/* save host (userspace) fprs/vrs */
3558 	save_fpu_regs();
3559 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3560 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3561 	if (MACHINE_HAS_VX)
3562 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3563 	else
3564 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3565 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3566 	if (test_fp_ctl(current->thread.fpu.fpc))
3567 		/* User space provided an invalid FPC, let's clear it */
3568 		current->thread.fpu.fpc = 0;
3569 	if (MACHINE_HAS_GS) {
3570 		preempt_disable();
3571 		__ctl_set_bit(2, 4);
3572 		if (current->thread.gs_cb) {
3573 			vcpu->arch.host_gscb = current->thread.gs_cb;
3574 			save_gs_cb(vcpu->arch.host_gscb);
3575 		}
3576 		if (vcpu->arch.gs_enabled) {
3577 			current->thread.gs_cb = (struct gs_cb *)
3578 						&vcpu->run->s.regs.gscb;
3579 			restore_gs_cb(current->thread.gs_cb);
3580 		}
3581 		preempt_enable();
3582 	}
3583 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3584 
3585 	kvm_run->kvm_dirty_regs = 0;
3586 }
3587 
store_regs(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3588 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3589 {
3590 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3591 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3592 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3593 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3594 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3595 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3596 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3597 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3598 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3599 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3600 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3601 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3602 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3603 	save_access_regs(vcpu->run->s.regs.acrs);
3604 	restore_access_regs(vcpu->arch.host_acrs);
3605 	/* Save guest register state */
3606 	save_fpu_regs();
3607 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3608 	/* Restore will be done lazily at return */
3609 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3610 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3611 	if (MACHINE_HAS_GS) {
3612 		__ctl_set_bit(2, 4);
3613 		if (vcpu->arch.gs_enabled)
3614 			save_gs_cb(current->thread.gs_cb);
3615 		preempt_disable();
3616 		current->thread.gs_cb = vcpu->arch.host_gscb;
3617 		restore_gs_cb(vcpu->arch.host_gscb);
3618 		preempt_enable();
3619 		if (!vcpu->arch.host_gscb)
3620 			__ctl_clear_bit(2, 4);
3621 		vcpu->arch.host_gscb = NULL;
3622 	}
3623 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3624 }
3625 
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3626 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3627 {
3628 	int rc;
3629 
3630 	if (kvm_run->immediate_exit)
3631 		return -EINTR;
3632 
3633 	vcpu_load(vcpu);
3634 
3635 	if (guestdbg_exit_pending(vcpu)) {
3636 		kvm_s390_prepare_debug_exit(vcpu);
3637 		rc = 0;
3638 		goto out;
3639 	}
3640 
3641 	kvm_sigset_activate(vcpu);
3642 
3643 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3644 		kvm_s390_vcpu_start(vcpu);
3645 	} else if (is_vcpu_stopped(vcpu)) {
3646 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3647 				   vcpu->vcpu_id);
3648 		rc = -EINVAL;
3649 		goto out;
3650 	}
3651 
3652 	sync_regs(vcpu, kvm_run);
3653 	enable_cpu_timer_accounting(vcpu);
3654 
3655 	might_fault();
3656 	rc = __vcpu_run(vcpu);
3657 
3658 	if (signal_pending(current) && !rc) {
3659 		kvm_run->exit_reason = KVM_EXIT_INTR;
3660 		rc = -EINTR;
3661 	}
3662 
3663 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3664 		kvm_s390_prepare_debug_exit(vcpu);
3665 		rc = 0;
3666 	}
3667 
3668 	if (rc == -EREMOTE) {
3669 		/* userspace support is needed, kvm_run has been prepared */
3670 		rc = 0;
3671 	}
3672 
3673 	disable_cpu_timer_accounting(vcpu);
3674 	store_regs(vcpu, kvm_run);
3675 
3676 	kvm_sigset_deactivate(vcpu);
3677 
3678 	vcpu->stat.exit_userspace++;
3679 out:
3680 	vcpu_put(vcpu);
3681 	return rc;
3682 }
3683 
3684 /*
3685  * store status at address
3686  * we use have two special cases:
3687  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3688  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3689  */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)3690 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3691 {
3692 	unsigned char archmode = 1;
3693 	freg_t fprs[NUM_FPRS];
3694 	unsigned int px;
3695 	u64 clkcomp, cputm;
3696 	int rc;
3697 
3698 	px = kvm_s390_get_prefix(vcpu);
3699 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3700 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3701 			return -EFAULT;
3702 		gpa = 0;
3703 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3704 		if (write_guest_real(vcpu, 163, &archmode, 1))
3705 			return -EFAULT;
3706 		gpa = px;
3707 	} else
3708 		gpa -= __LC_FPREGS_SAVE_AREA;
3709 
3710 	/* manually convert vector registers if necessary */
3711 	if (MACHINE_HAS_VX) {
3712 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3713 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3714 				     fprs, 128);
3715 	} else {
3716 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3717 				     vcpu->run->s.regs.fprs, 128);
3718 	}
3719 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3720 			      vcpu->run->s.regs.gprs, 128);
3721 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3722 			      &vcpu->arch.sie_block->gpsw, 16);
3723 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3724 			      &px, 4);
3725 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3726 			      &vcpu->run->s.regs.fpc, 4);
3727 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3728 			      &vcpu->arch.sie_block->todpr, 4);
3729 	cputm = kvm_s390_get_cpu_timer(vcpu);
3730 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3731 			      &cputm, 8);
3732 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3733 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3734 			      &clkcomp, 8);
3735 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3736 			      &vcpu->run->s.regs.acrs, 64);
3737 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3738 			      &vcpu->arch.sie_block->gcr, 128);
3739 	return rc ? -EFAULT : 0;
3740 }
3741 
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)3742 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3743 {
3744 	/*
3745 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3746 	 * switch in the run ioctl. Let's update our copies before we save
3747 	 * it into the save area
3748 	 */
3749 	save_fpu_regs();
3750 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3751 	save_access_regs(vcpu->run->s.regs.acrs);
3752 
3753 	return kvm_s390_store_status_unloaded(vcpu, addr);
3754 }
3755 
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)3756 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3757 {
3758 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3759 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3760 }
3761 
__disable_ibs_on_all_vcpus(struct kvm * kvm)3762 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3763 {
3764 	unsigned int i;
3765 	struct kvm_vcpu *vcpu;
3766 
3767 	kvm_for_each_vcpu(i, vcpu, kvm) {
3768 		__disable_ibs_on_vcpu(vcpu);
3769 	}
3770 }
3771 
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)3772 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3773 {
3774 	if (!sclp.has_ibs)
3775 		return;
3776 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3777 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3778 }
3779 
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)3780 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3781 {
3782 	int i, online_vcpus, started_vcpus = 0;
3783 
3784 	if (!is_vcpu_stopped(vcpu))
3785 		return;
3786 
3787 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3788 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3789 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3790 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3791 
3792 	for (i = 0; i < online_vcpus; i++) {
3793 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3794 			started_vcpus++;
3795 	}
3796 
3797 	if (started_vcpus == 0) {
3798 		/* we're the only active VCPU -> speed it up */
3799 		__enable_ibs_on_vcpu(vcpu);
3800 	} else if (started_vcpus == 1) {
3801 		/*
3802 		 * As we are starting a second VCPU, we have to disable
3803 		 * the IBS facility on all VCPUs to remove potentially
3804 		 * oustanding ENABLE requests.
3805 		 */
3806 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3807 	}
3808 
3809 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3810 	/*
3811 	 * Another VCPU might have used IBS while we were offline.
3812 	 * Let's play safe and flush the VCPU at startup.
3813 	 */
3814 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3815 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3816 	return;
3817 }
3818 
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)3819 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3820 {
3821 	int i, online_vcpus, started_vcpus = 0;
3822 	struct kvm_vcpu *started_vcpu = NULL;
3823 
3824 	if (is_vcpu_stopped(vcpu))
3825 		return;
3826 
3827 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3828 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3829 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3830 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3831 
3832 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3833 	kvm_s390_clear_stop_irq(vcpu);
3834 
3835 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3836 	__disable_ibs_on_vcpu(vcpu);
3837 
3838 	for (i = 0; i < online_vcpus; i++) {
3839 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3840 			started_vcpus++;
3841 			started_vcpu = vcpu->kvm->vcpus[i];
3842 		}
3843 	}
3844 
3845 	if (started_vcpus == 1) {
3846 		/*
3847 		 * As we only have one VCPU left, we want to enable the
3848 		 * IBS facility for that VCPU to speed it up.
3849 		 */
3850 		__enable_ibs_on_vcpu(started_vcpu);
3851 	}
3852 
3853 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3854 	return;
3855 }
3856 
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)3857 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3858 				     struct kvm_enable_cap *cap)
3859 {
3860 	int r;
3861 
3862 	if (cap->flags)
3863 		return -EINVAL;
3864 
3865 	switch (cap->cap) {
3866 	case KVM_CAP_S390_CSS_SUPPORT:
3867 		if (!vcpu->kvm->arch.css_support) {
3868 			vcpu->kvm->arch.css_support = 1;
3869 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3870 			trace_kvm_s390_enable_css(vcpu->kvm);
3871 		}
3872 		r = 0;
3873 		break;
3874 	default:
3875 		r = -EINVAL;
3876 		break;
3877 	}
3878 	return r;
3879 }
3880 
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)3881 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3882 				  struct kvm_s390_mem_op *mop)
3883 {
3884 	void __user *uaddr = (void __user *)mop->buf;
3885 	void *tmpbuf = NULL;
3886 	int r, srcu_idx;
3887 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3888 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3889 
3890 	if (mop->flags & ~supported_flags)
3891 		return -EINVAL;
3892 
3893 	if (mop->size > MEM_OP_MAX_SIZE)
3894 		return -E2BIG;
3895 
3896 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3897 		tmpbuf = vmalloc(mop->size);
3898 		if (!tmpbuf)
3899 			return -ENOMEM;
3900 	}
3901 
3902 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3903 
3904 	switch (mop->op) {
3905 	case KVM_S390_MEMOP_LOGICAL_READ:
3906 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3907 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3908 					    mop->size, GACC_FETCH);
3909 			break;
3910 		}
3911 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3912 		if (r == 0) {
3913 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3914 				r = -EFAULT;
3915 		}
3916 		break;
3917 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3918 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3919 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3920 					    mop->size, GACC_STORE);
3921 			break;
3922 		}
3923 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3924 			r = -EFAULT;
3925 			break;
3926 		}
3927 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3928 		break;
3929 	default:
3930 		r = -EINVAL;
3931 	}
3932 
3933 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3934 
3935 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3936 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3937 
3938 	vfree(tmpbuf);
3939 	return r;
3940 }
3941 
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)3942 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3943 			       unsigned int ioctl, unsigned long arg)
3944 {
3945 	struct kvm_vcpu *vcpu = filp->private_data;
3946 	void __user *argp = (void __user *)arg;
3947 
3948 	switch (ioctl) {
3949 	case KVM_S390_IRQ: {
3950 		struct kvm_s390_irq s390irq;
3951 
3952 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3953 			return -EFAULT;
3954 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3955 	}
3956 	case KVM_S390_INTERRUPT: {
3957 		struct kvm_s390_interrupt s390int;
3958 		struct kvm_s390_irq s390irq;
3959 
3960 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3961 			return -EFAULT;
3962 		if (s390int_to_s390irq(&s390int, &s390irq))
3963 			return -EINVAL;
3964 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3965 	}
3966 	}
3967 	return -ENOIOCTLCMD;
3968 }
3969 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)3970 long kvm_arch_vcpu_ioctl(struct file *filp,
3971 			 unsigned int ioctl, unsigned long arg)
3972 {
3973 	struct kvm_vcpu *vcpu = filp->private_data;
3974 	void __user *argp = (void __user *)arg;
3975 	int idx;
3976 	long r;
3977 
3978 	vcpu_load(vcpu);
3979 
3980 	switch (ioctl) {
3981 	case KVM_S390_STORE_STATUS:
3982 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3983 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3984 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3985 		break;
3986 	case KVM_S390_SET_INITIAL_PSW: {
3987 		psw_t psw;
3988 
3989 		r = -EFAULT;
3990 		if (copy_from_user(&psw, argp, sizeof(psw)))
3991 			break;
3992 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3993 		break;
3994 	}
3995 	case KVM_S390_INITIAL_RESET:
3996 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3997 		break;
3998 	case KVM_SET_ONE_REG:
3999 	case KVM_GET_ONE_REG: {
4000 		struct kvm_one_reg reg;
4001 		r = -EFAULT;
4002 		if (copy_from_user(&reg, argp, sizeof(reg)))
4003 			break;
4004 		if (ioctl == KVM_SET_ONE_REG)
4005 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4006 		else
4007 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4008 		break;
4009 	}
4010 #ifdef CONFIG_KVM_S390_UCONTROL
4011 	case KVM_S390_UCAS_MAP: {
4012 		struct kvm_s390_ucas_mapping ucasmap;
4013 
4014 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4015 			r = -EFAULT;
4016 			break;
4017 		}
4018 
4019 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4020 			r = -EINVAL;
4021 			break;
4022 		}
4023 
4024 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4025 				     ucasmap.vcpu_addr, ucasmap.length);
4026 		break;
4027 	}
4028 	case KVM_S390_UCAS_UNMAP: {
4029 		struct kvm_s390_ucas_mapping ucasmap;
4030 
4031 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4032 			r = -EFAULT;
4033 			break;
4034 		}
4035 
4036 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4037 			r = -EINVAL;
4038 			break;
4039 		}
4040 
4041 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4042 			ucasmap.length);
4043 		break;
4044 	}
4045 #endif
4046 	case KVM_S390_VCPU_FAULT: {
4047 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4048 		break;
4049 	}
4050 	case KVM_ENABLE_CAP:
4051 	{
4052 		struct kvm_enable_cap cap;
4053 		r = -EFAULT;
4054 		if (copy_from_user(&cap, argp, sizeof(cap)))
4055 			break;
4056 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4057 		break;
4058 	}
4059 	case KVM_S390_MEM_OP: {
4060 		struct kvm_s390_mem_op mem_op;
4061 
4062 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4063 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4064 		else
4065 			r = -EFAULT;
4066 		break;
4067 	}
4068 	case KVM_S390_SET_IRQ_STATE: {
4069 		struct kvm_s390_irq_state irq_state;
4070 
4071 		r = -EFAULT;
4072 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4073 			break;
4074 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4075 		    irq_state.len == 0 ||
4076 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4077 			r = -EINVAL;
4078 			break;
4079 		}
4080 		/* do not use irq_state.flags, it will break old QEMUs */
4081 		r = kvm_s390_set_irq_state(vcpu,
4082 					   (void __user *) irq_state.buf,
4083 					   irq_state.len);
4084 		break;
4085 	}
4086 	case KVM_S390_GET_IRQ_STATE: {
4087 		struct kvm_s390_irq_state irq_state;
4088 
4089 		r = -EFAULT;
4090 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4091 			break;
4092 		if (irq_state.len == 0) {
4093 			r = -EINVAL;
4094 			break;
4095 		}
4096 		/* do not use irq_state.flags, it will break old QEMUs */
4097 		r = kvm_s390_get_irq_state(vcpu,
4098 					   (__u8 __user *)  irq_state.buf,
4099 					   irq_state.len);
4100 		break;
4101 	}
4102 	default:
4103 		r = -ENOTTY;
4104 	}
4105 
4106 	vcpu_put(vcpu);
4107 	return r;
4108 }
4109 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4110 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4111 {
4112 #ifdef CONFIG_KVM_S390_UCONTROL
4113 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4114 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4115 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4116 		get_page(vmf->page);
4117 		return 0;
4118 	}
4119 #endif
4120 	return VM_FAULT_SIGBUS;
4121 }
4122 
kvm_arch_create_memslot(struct kvm * kvm,struct kvm_memory_slot * slot,unsigned long npages)4123 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4124 			    unsigned long npages)
4125 {
4126 	return 0;
4127 }
4128 
4129 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4130 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4131 				   struct kvm_memory_slot *memslot,
4132 				   const struct kvm_userspace_memory_region *mem,
4133 				   enum kvm_mr_change change)
4134 {
4135 	/* A few sanity checks. We can have memory slots which have to be
4136 	   located/ended at a segment boundary (1MB). The memory in userland is
4137 	   ok to be fragmented into various different vmas. It is okay to mmap()
4138 	   and munmap() stuff in this slot after doing this call at any time */
4139 
4140 	if (mem->userspace_addr & 0xffffful)
4141 		return -EINVAL;
4142 
4143 	if (mem->memory_size & 0xffffful)
4144 		return -EINVAL;
4145 
4146 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4147 		return -EINVAL;
4148 
4149 	return 0;
4150 }
4151 
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,const struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)4152 void kvm_arch_commit_memory_region(struct kvm *kvm,
4153 				const struct kvm_userspace_memory_region *mem,
4154 				const struct kvm_memory_slot *old,
4155 				const struct kvm_memory_slot *new,
4156 				enum kvm_mr_change change)
4157 {
4158 	int rc;
4159 
4160 	/* If the basics of the memslot do not change, we do not want
4161 	 * to update the gmap. Every update causes several unnecessary
4162 	 * segment translation exceptions. This is usually handled just
4163 	 * fine by the normal fault handler + gmap, but it will also
4164 	 * cause faults on the prefix page of running guest CPUs.
4165 	 */
4166 	if (old->userspace_addr == mem->userspace_addr &&
4167 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4168 	    old->npages * PAGE_SIZE == mem->memory_size)
4169 		return;
4170 
4171 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4172 		mem->guest_phys_addr, mem->memory_size);
4173 	if (rc)
4174 		pr_warn("failed to commit memory region\n");
4175 	return;
4176 }
4177 
nonhyp_mask(int i)4178 static inline unsigned long nonhyp_mask(int i)
4179 {
4180 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4181 
4182 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4183 }
4184 
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)4185 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4186 {
4187 	vcpu->valid_wakeup = false;
4188 }
4189 
kvm_s390_init(void)4190 static int __init kvm_s390_init(void)
4191 {
4192 	int i;
4193 
4194 	if (!sclp.has_sief2) {
4195 		pr_info("SIE not available\n");
4196 		return -ENODEV;
4197 	}
4198 
4199 	if (nested && hpage) {
4200 		pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4201 		return -EINVAL;
4202 	}
4203 
4204 	for (i = 0; i < 16; i++)
4205 		kvm_s390_fac_base[i] |=
4206 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4207 
4208 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4209 }
4210 
kvm_s390_exit(void)4211 static void __exit kvm_s390_exit(void)
4212 {
4213 	kvm_exit();
4214 }
4215 
4216 module_init(kvm_s390_init);
4217 module_exit(kvm_s390_exit);
4218 
4219 /*
4220  * Enable autoloading of the kvm module.
4221  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4222  * since x86 takes a different approach.
4223  */
4224 #include <linux/miscdevice.h>
4225 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4226 MODULE_ALIAS("devname:kvm");
4227