1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 	.name_size = KVM_STATS_NAME_SIZE,
72 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 	.id_offset = sizeof(struct kvm_stats_header),
74 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 		       sizeof(kvm_vm_stats_desc),
77 };
78 
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 	KVM_GENERIC_VCPU_STATS(),
81 	STATS_DESC_COUNTER(VCPU, exit_userspace),
82 	STATS_DESC_COUNTER(VCPU, exit_null),
83 	STATS_DESC_COUNTER(VCPU, exit_external_request),
84 	STATS_DESC_COUNTER(VCPU, exit_io_request),
85 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 	STATS_DESC_COUNTER(VCPU, exit_validity),
88 	STATS_DESC_COUNTER(VCPU, exit_instruction),
89 	STATS_DESC_COUNTER(VCPU, exit_pei),
90 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_program),
108 	STATS_DESC_COUNTER(VCPU, deliver_io),
109 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 	STATS_DESC_COUNTER(VCPU, inject_ckc),
112 	STATS_DESC_COUNTER(VCPU, inject_cputm),
113 	STATS_DESC_COUNTER(VCPU, inject_external_call),
114 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 	STATS_DESC_COUNTER(VCPU, inject_mchk),
116 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 	STATS_DESC_COUNTER(VCPU, inject_program),
118 	STATS_DESC_COUNTER(VCPU, inject_restart),
119 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 	STATS_DESC_COUNTER(VCPU, instruction_gs),
123 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 	STATS_DESC_COUNTER(VCPU, instruction_sck),
129 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 	STATS_DESC_COUNTER(VCPU, instruction_spx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 	STATS_DESC_COUNTER(VCPU, instruction_stap),
134 	STATS_DESC_COUNTER(VCPU, instruction_iske),
135 	STATS_DESC_COUNTER(VCPU, instruction_ri),
136 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 	STATS_DESC_COUNTER(VCPU, instruction_sske),
138 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 	STATS_DESC_COUNTER(VCPU, instruction_tb),
142 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 	STATS_DESC_COUNTER(VCPU, instruction_sie),
146 	STATS_DESC_COUNTER(VCPU, instruction_essa),
147 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 	STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175 
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 	.name_size = KVM_STATS_NAME_SIZE,
178 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 	.id_offset = sizeof(struct kvm_stats_header),
180 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 		       sizeof(kvm_vcpu_stats_desc),
183 };
184 
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189 
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194 
195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199 
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa  = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204 
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209 
210 /*
211  * For now we handle at most 16 double words as this is what the s390 base
212  * kernel handles and stores in the prefix page. If we ever need to go beyond
213  * this, this requires changes to code, but the external uapi can stay.
214  */
215 #define SIZE_INTERNAL 16
216 
217 /*
218  * Base feature mask that defines default mask for facilities. Consists of the
219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220  */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224  * and defines the facilities that can be enabled via a cpu model.
225  */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227 
kvm_s390_fac_size(void)228 static unsigned long kvm_s390_fac_size(void)
229 {
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 		sizeof(stfle_fac_list));
234 
235 	return SIZE_INTERNAL;
236 }
237 
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242 
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247 
248 /* Section: not file related */
kvm_arch_hardware_enable(void)249 int kvm_arch_hardware_enable(void)
250 {
251 	/* every s390 is virtualization enabled ;-) */
252 	return 0;
253 }
254 
kvm_arch_check_processor_compat(void * opaque)255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 	return 0;
258 }
259 
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 			      unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264 
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 	u8 delta_idx = 0;
268 
269 	/*
270 	 * The TOD jumps by delta, we have to compensate this by adding
271 	 * -delta to the epoch.
272 	 */
273 	delta = -delta;
274 
275 	/* sign-extension - we're adding to signed values below */
276 	if ((s64)delta < 0)
277 		delta_idx = -1;
278 
279 	scb->epoch += delta;
280 	if (scb->ecd & ECD_MEF) {
281 		scb->epdx += delta_idx;
282 		if (scb->epoch < delta)
283 			scb->epdx += 1;
284 	}
285 }
286 
287 /*
288  * This callback is executed during stop_machine(). All CPUs are therefore
289  * temporarily stopped. In order not to change guest behavior, we have to
290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291  * so a CPU won't be stopped while calculating with the epoch.
292  */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 			  void *v)
295 {
296 	struct kvm *kvm;
297 	struct kvm_vcpu *vcpu;
298 	int i;
299 	unsigned long long *delta = v;
300 
301 	list_for_each_entry(kvm, &vm_list, vm_list) {
302 		kvm_for_each_vcpu(i, vcpu, kvm) {
303 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 			if (i == 0) {
305 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 			}
308 			if (vcpu->arch.cputm_enabled)
309 				vcpu->arch.cputm_start += *delta;
310 			if (vcpu->arch.vsie_block)
311 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 						   *delta);
313 		}
314 	}
315 	return NOTIFY_OK;
316 }
317 
318 static struct notifier_block kvm_clock_notifier = {
319 	.notifier_call = kvm_clock_sync,
320 };
321 
kvm_arch_hardware_setup(void * opaque)322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 	gmap_notifier.notifier_call = kvm_gmap_notifier;
325 	gmap_register_pte_notifier(&gmap_notifier);
326 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 	gmap_register_pte_notifier(&vsie_gmap_notifier);
328 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 				       &kvm_clock_notifier);
330 	return 0;
331 }
332 
kvm_arch_hardware_unsetup(void)333 void kvm_arch_hardware_unsetup(void)
334 {
335 	gmap_unregister_pte_notifier(&gmap_notifier);
336 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 					 &kvm_clock_notifier);
339 }
340 
allow_cpu_feat(unsigned long nr)341 static void allow_cpu_feat(unsigned long nr)
342 {
343 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345 
plo_test_bit(unsigned char nr)346 static inline int plo_test_bit(unsigned char nr)
347 {
348 	unsigned long function = (unsigned long)nr | 0x100;
349 	int cc;
350 
351 	asm volatile(
352 		"	lgr	0,%[function]\n"
353 		/* Parameter registers are ignored for "test bit" */
354 		"	plo	0,0,0,0(0)\n"
355 		"	ipm	%0\n"
356 		"	srl	%0,28\n"
357 		: "=d" (cc)
358 		: [function] "d" (function)
359 		: "cc", "0");
360 	return cc == 0;
361 }
362 
__insn32_query(unsigned int opcode,u8 * query)363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 	asm volatile(
366 		"	lghi	0,0\n"
367 		"	lgr	1,%[query]\n"
368 		/* Parameter registers are ignored */
369 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
370 		:
371 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 		: "cc", "memory", "0", "1");
373 }
374 
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377 
kvm_s390_cpu_feat_init(void)378 static void kvm_s390_cpu_feat_init(void)
379 {
380 	int i;
381 
382 	for (i = 0; i < 256; ++i) {
383 		if (plo_test_bit(i))
384 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 	}
386 
387 	if (test_facility(28)) /* TOD-clock steering */
388 		ptff(kvm_s390_available_subfunc.ptff,
389 		     sizeof(kvm_s390_available_subfunc.ptff),
390 		     PTFF_QAF);
391 
392 	if (test_facility(17)) { /* MSA */
393 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmac);
395 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmc);
397 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.km);
399 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kimd);
401 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.klmd);
403 	}
404 	if (test_facility(76)) /* MSA3 */
405 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.pckmo);
407 	if (test_facility(77)) { /* MSA4 */
408 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kmctr);
410 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmf);
412 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmo);
414 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.pcc);
416 	}
417 	if (test_facility(57)) /* MSA5 */
418 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.ppno);
420 
421 	if (test_facility(146)) /* MSA8 */
422 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.kma);
424 
425 	if (test_facility(155)) /* MSA9 */
426 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kdsa);
428 
429 	if (test_facility(150)) /* SORTL */
430 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431 
432 	if (test_facility(151)) /* DFLTCC */
433 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434 
435 	if (MACHINE_HAS_ESOP)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 	/*
438 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 	 */
441 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 	    !test_facility(3) || !nested)
443 		return;
444 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 	if (sclp.has_64bscao)
446 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 	if (sclp.has_siif)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 	if (sclp.has_gpere)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 	if (sclp.has_gsls)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 	if (sclp.has_ib)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 	if (sclp.has_cei)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 	if (sclp.has_ibs)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 	if (sclp.has_kss)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 	/*
462 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 	 * all skey handling functions read/set the skey from the PGSTE
464 	 * instead of the real storage key.
465 	 *
466 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 	 * pages being detected as preserved although they are resident.
468 	 *
469 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 	 *
472 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 	 *
476 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 	 * cannot easily shadow the SCA because of the ipte lock.
478 	 */
479 }
480 
kvm_arch_init(void * opaque)481 int kvm_arch_init(void *opaque)
482 {
483 	int rc = -ENOMEM;
484 
485 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 	if (!kvm_s390_dbf)
487 		return -ENOMEM;
488 
489 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf_uv)
491 		goto out;
492 
493 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 		goto out;
496 
497 	kvm_s390_cpu_feat_init();
498 
499 	/* Register floating interrupt controller interface. */
500 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 	if (rc) {
502 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 		goto out;
504 	}
505 
506 	rc = kvm_s390_gib_init(GAL_ISC);
507 	if (rc)
508 		goto out;
509 
510 	return 0;
511 
512 out:
513 	kvm_arch_exit();
514 	return rc;
515 }
516 
kvm_arch_exit(void)517 void kvm_arch_exit(void)
518 {
519 	kvm_s390_gib_destroy();
520 	debug_unregister(kvm_s390_dbf);
521 	debug_unregister(kvm_s390_dbf_uv);
522 }
523 
524 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)525 long kvm_arch_dev_ioctl(struct file *filp,
526 			unsigned int ioctl, unsigned long arg)
527 {
528 	if (ioctl == KVM_S390_ENABLE_SIE)
529 		return s390_enable_sie();
530 	return -EINVAL;
531 }
532 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 	int r;
536 
537 	switch (ext) {
538 	case KVM_CAP_S390_PSW:
539 	case KVM_CAP_S390_GMAP:
540 	case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 	case KVM_CAP_S390_UCONTROL:
543 #endif
544 	case KVM_CAP_ASYNC_PF:
545 	case KVM_CAP_SYNC_REGS:
546 	case KVM_CAP_ONE_REG:
547 	case KVM_CAP_ENABLE_CAP:
548 	case KVM_CAP_S390_CSS_SUPPORT:
549 	case KVM_CAP_IOEVENTFD:
550 	case KVM_CAP_DEVICE_CTRL:
551 	case KVM_CAP_S390_IRQCHIP:
552 	case KVM_CAP_VM_ATTRIBUTES:
553 	case KVM_CAP_MP_STATE:
554 	case KVM_CAP_IMMEDIATE_EXIT:
555 	case KVM_CAP_S390_INJECT_IRQ:
556 	case KVM_CAP_S390_USER_SIGP:
557 	case KVM_CAP_S390_USER_STSI:
558 	case KVM_CAP_S390_SKEYS:
559 	case KVM_CAP_S390_IRQ_STATE:
560 	case KVM_CAP_S390_USER_INSTR0:
561 	case KVM_CAP_S390_CMMA_MIGRATION:
562 	case KVM_CAP_S390_AIS:
563 	case KVM_CAP_S390_AIS_MIGRATION:
564 	case KVM_CAP_S390_VCPU_RESETS:
565 	case KVM_CAP_SET_GUEST_DEBUG:
566 	case KVM_CAP_S390_DIAG318:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		break;
589 	case KVM_CAP_S390_COW:
590 		r = MACHINE_HAS_ESOP;
591 		break;
592 	case KVM_CAP_S390_VECTOR_REGISTERS:
593 		r = MACHINE_HAS_VX;
594 		break;
595 	case KVM_CAP_S390_RI:
596 		r = test_facility(64);
597 		break;
598 	case KVM_CAP_S390_GS:
599 		r = test_facility(133);
600 		break;
601 	case KVM_CAP_S390_BPB:
602 		r = test_facility(82);
603 		break;
604 	case KVM_CAP_S390_PROTECTED:
605 		r = is_prot_virt_host();
606 		break;
607 	default:
608 		r = 0;
609 	}
610 	return r;
611 }
612 
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
614 {
615 	int i;
616 	gfn_t cur_gfn, last_gfn;
617 	unsigned long gaddr, vmaddr;
618 	struct gmap *gmap = kvm->arch.gmap;
619 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
620 
621 	/* Loop over all guest segments */
622 	cur_gfn = memslot->base_gfn;
623 	last_gfn = memslot->base_gfn + memslot->npages;
624 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
625 		gaddr = gfn_to_gpa(cur_gfn);
626 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
627 		if (kvm_is_error_hva(vmaddr))
628 			continue;
629 
630 		bitmap_zero(bitmap, _PAGE_ENTRIES);
631 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
632 		for (i = 0; i < _PAGE_ENTRIES; i++) {
633 			if (test_bit(i, bitmap))
634 				mark_page_dirty(kvm, cur_gfn + i);
635 		}
636 
637 		if (fatal_signal_pending(current))
638 			return;
639 		cond_resched();
640 	}
641 }
642 
643 /* Section: vm related */
644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
645 
646 /*
647  * Get (and clear) the dirty memory log for a memory slot.
648  */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
650 			       struct kvm_dirty_log *log)
651 {
652 	int r;
653 	unsigned long n;
654 	struct kvm_memory_slot *memslot;
655 	int is_dirty;
656 
657 	if (kvm_is_ucontrol(kvm))
658 		return -EINVAL;
659 
660 	mutex_lock(&kvm->slots_lock);
661 
662 	r = -EINVAL;
663 	if (log->slot >= KVM_USER_MEM_SLOTS)
664 		goto out;
665 
666 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
667 	if (r)
668 		goto out;
669 
670 	/* Clear the dirty log */
671 	if (is_dirty) {
672 		n = kvm_dirty_bitmap_bytes(memslot);
673 		memset(memslot->dirty_bitmap, 0, n);
674 	}
675 	r = 0;
676 out:
677 	mutex_unlock(&kvm->slots_lock);
678 	return r;
679 }
680 
icpt_operexc_on_all_vcpus(struct kvm * kvm)681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
682 {
683 	unsigned int i;
684 	struct kvm_vcpu *vcpu;
685 
686 	kvm_for_each_vcpu(i, vcpu, kvm) {
687 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
688 	}
689 }
690 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
692 {
693 	int r;
694 
695 	if (cap->flags)
696 		return -EINVAL;
697 
698 	switch (cap->cap) {
699 	case KVM_CAP_S390_IRQCHIP:
700 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
701 		kvm->arch.use_irqchip = 1;
702 		r = 0;
703 		break;
704 	case KVM_CAP_S390_USER_SIGP:
705 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
706 		kvm->arch.user_sigp = 1;
707 		r = 0;
708 		break;
709 	case KVM_CAP_S390_VECTOR_REGISTERS:
710 		mutex_lock(&kvm->lock);
711 		if (kvm->created_vcpus) {
712 			r = -EBUSY;
713 		} else if (MACHINE_HAS_VX) {
714 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
715 			set_kvm_facility(kvm->arch.model.fac_list, 129);
716 			if (test_facility(134)) {
717 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
718 				set_kvm_facility(kvm->arch.model.fac_list, 134);
719 			}
720 			if (test_facility(135)) {
721 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
722 				set_kvm_facility(kvm->arch.model.fac_list, 135);
723 			}
724 			if (test_facility(148)) {
725 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
726 				set_kvm_facility(kvm->arch.model.fac_list, 148);
727 			}
728 			if (test_facility(152)) {
729 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
730 				set_kvm_facility(kvm->arch.model.fac_list, 152);
731 			}
732 			if (test_facility(192)) {
733 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
734 				set_kvm_facility(kvm->arch.model.fac_list, 192);
735 			}
736 			r = 0;
737 		} else
738 			r = -EINVAL;
739 		mutex_unlock(&kvm->lock);
740 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
741 			 r ? "(not available)" : "(success)");
742 		break;
743 	case KVM_CAP_S390_RI:
744 		r = -EINVAL;
745 		mutex_lock(&kvm->lock);
746 		if (kvm->created_vcpus) {
747 			r = -EBUSY;
748 		} else if (test_facility(64)) {
749 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
750 			set_kvm_facility(kvm->arch.model.fac_list, 64);
751 			r = 0;
752 		}
753 		mutex_unlock(&kvm->lock);
754 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
755 			 r ? "(not available)" : "(success)");
756 		break;
757 	case KVM_CAP_S390_AIS:
758 		mutex_lock(&kvm->lock);
759 		if (kvm->created_vcpus) {
760 			r = -EBUSY;
761 		} else {
762 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
763 			set_kvm_facility(kvm->arch.model.fac_list, 72);
764 			r = 0;
765 		}
766 		mutex_unlock(&kvm->lock);
767 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
768 			 r ? "(not available)" : "(success)");
769 		break;
770 	case KVM_CAP_S390_GS:
771 		r = -EINVAL;
772 		mutex_lock(&kvm->lock);
773 		if (kvm->created_vcpus) {
774 			r = -EBUSY;
775 		} else if (test_facility(133)) {
776 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
777 			set_kvm_facility(kvm->arch.model.fac_list, 133);
778 			r = 0;
779 		}
780 		mutex_unlock(&kvm->lock);
781 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
782 			 r ? "(not available)" : "(success)");
783 		break;
784 	case KVM_CAP_S390_HPAGE_1M:
785 		mutex_lock(&kvm->lock);
786 		if (kvm->created_vcpus)
787 			r = -EBUSY;
788 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
789 			r = -EINVAL;
790 		else {
791 			r = 0;
792 			mmap_write_lock(kvm->mm);
793 			kvm->mm->context.allow_gmap_hpage_1m = 1;
794 			mmap_write_unlock(kvm->mm);
795 			/*
796 			 * We might have to create fake 4k page
797 			 * tables. To avoid that the hardware works on
798 			 * stale PGSTEs, we emulate these instructions.
799 			 */
800 			kvm->arch.use_skf = 0;
801 			kvm->arch.use_pfmfi = 0;
802 		}
803 		mutex_unlock(&kvm->lock);
804 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
805 			 r ? "(not available)" : "(success)");
806 		break;
807 	case KVM_CAP_S390_USER_STSI:
808 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
809 		kvm->arch.user_stsi = 1;
810 		r = 0;
811 		break;
812 	case KVM_CAP_S390_USER_INSTR0:
813 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
814 		kvm->arch.user_instr0 = 1;
815 		icpt_operexc_on_all_vcpus(kvm);
816 		r = 0;
817 		break;
818 	default:
819 		r = -EINVAL;
820 		break;
821 	}
822 	return r;
823 }
824 
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
826 {
827 	int ret;
828 
829 	switch (attr->attr) {
830 	case KVM_S390_VM_MEM_LIMIT_SIZE:
831 		ret = 0;
832 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
833 			 kvm->arch.mem_limit);
834 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
835 			ret = -EFAULT;
836 		break;
837 	default:
838 		ret = -ENXIO;
839 		break;
840 	}
841 	return ret;
842 }
843 
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
845 {
846 	int ret;
847 	unsigned int idx;
848 	switch (attr->attr) {
849 	case KVM_S390_VM_MEM_ENABLE_CMMA:
850 		ret = -ENXIO;
851 		if (!sclp.has_cmma)
852 			break;
853 
854 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
855 		mutex_lock(&kvm->lock);
856 		if (kvm->created_vcpus)
857 			ret = -EBUSY;
858 		else if (kvm->mm->context.allow_gmap_hpage_1m)
859 			ret = -EINVAL;
860 		else {
861 			kvm->arch.use_cmma = 1;
862 			/* Not compatible with cmma. */
863 			kvm->arch.use_pfmfi = 0;
864 			ret = 0;
865 		}
866 		mutex_unlock(&kvm->lock);
867 		break;
868 	case KVM_S390_VM_MEM_CLR_CMMA:
869 		ret = -ENXIO;
870 		if (!sclp.has_cmma)
871 			break;
872 		ret = -EINVAL;
873 		if (!kvm->arch.use_cmma)
874 			break;
875 
876 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
877 		mutex_lock(&kvm->lock);
878 		idx = srcu_read_lock(&kvm->srcu);
879 		s390_reset_cmma(kvm->arch.gmap->mm);
880 		srcu_read_unlock(&kvm->srcu, idx);
881 		mutex_unlock(&kvm->lock);
882 		ret = 0;
883 		break;
884 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
885 		unsigned long new_limit;
886 
887 		if (kvm_is_ucontrol(kvm))
888 			return -EINVAL;
889 
890 		if (get_user(new_limit, (u64 __user *)attr->addr))
891 			return -EFAULT;
892 
893 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
894 		    new_limit > kvm->arch.mem_limit)
895 			return -E2BIG;
896 
897 		if (!new_limit)
898 			return -EINVAL;
899 
900 		/* gmap_create takes last usable address */
901 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
902 			new_limit -= 1;
903 
904 		ret = -EBUSY;
905 		mutex_lock(&kvm->lock);
906 		if (!kvm->created_vcpus) {
907 			/* gmap_create will round the limit up */
908 			struct gmap *new = gmap_create(current->mm, new_limit);
909 
910 			if (!new) {
911 				ret = -ENOMEM;
912 			} else {
913 				gmap_remove(kvm->arch.gmap);
914 				new->private = kvm;
915 				kvm->arch.gmap = new;
916 				ret = 0;
917 			}
918 		}
919 		mutex_unlock(&kvm->lock);
920 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
921 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
922 			 (void *) kvm->arch.gmap->asce);
923 		break;
924 	}
925 	default:
926 		ret = -ENXIO;
927 		break;
928 	}
929 	return ret;
930 }
931 
932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
933 
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
935 {
936 	struct kvm_vcpu *vcpu;
937 	int i;
938 
939 	kvm_s390_vcpu_block_all(kvm);
940 
941 	kvm_for_each_vcpu(i, vcpu, kvm) {
942 		kvm_s390_vcpu_crypto_setup(vcpu);
943 		/* recreate the shadow crycb by leaving the VSIE handler */
944 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
945 	}
946 
947 	kvm_s390_vcpu_unblock_all(kvm);
948 }
949 
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
951 {
952 	mutex_lock(&kvm->lock);
953 	switch (attr->attr) {
954 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
955 		if (!test_kvm_facility(kvm, 76)) {
956 			mutex_unlock(&kvm->lock);
957 			return -EINVAL;
958 		}
959 		get_random_bytes(
960 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
961 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
962 		kvm->arch.crypto.aes_kw = 1;
963 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
964 		break;
965 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
966 		if (!test_kvm_facility(kvm, 76)) {
967 			mutex_unlock(&kvm->lock);
968 			return -EINVAL;
969 		}
970 		get_random_bytes(
971 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
972 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
973 		kvm->arch.crypto.dea_kw = 1;
974 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
975 		break;
976 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
977 		if (!test_kvm_facility(kvm, 76)) {
978 			mutex_unlock(&kvm->lock);
979 			return -EINVAL;
980 		}
981 		kvm->arch.crypto.aes_kw = 0;
982 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
983 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
984 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
985 		break;
986 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
987 		if (!test_kvm_facility(kvm, 76)) {
988 			mutex_unlock(&kvm->lock);
989 			return -EINVAL;
990 		}
991 		kvm->arch.crypto.dea_kw = 0;
992 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
993 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
994 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
995 		break;
996 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
997 		if (!ap_instructions_available()) {
998 			mutex_unlock(&kvm->lock);
999 			return -EOPNOTSUPP;
1000 		}
1001 		kvm->arch.crypto.apie = 1;
1002 		break;
1003 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004 		if (!ap_instructions_available()) {
1005 			mutex_unlock(&kvm->lock);
1006 			return -EOPNOTSUPP;
1007 		}
1008 		kvm->arch.crypto.apie = 0;
1009 		break;
1010 	default:
1011 		mutex_unlock(&kvm->lock);
1012 		return -ENXIO;
1013 	}
1014 
1015 	kvm_s390_vcpu_crypto_reset_all(kvm);
1016 	mutex_unlock(&kvm->lock);
1017 	return 0;
1018 }
1019 
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1021 {
1022 	int cx;
1023 	struct kvm_vcpu *vcpu;
1024 
1025 	kvm_for_each_vcpu(cx, vcpu, kvm)
1026 		kvm_s390_sync_request(req, vcpu);
1027 }
1028 
1029 /*
1030  * Must be called with kvm->srcu held to avoid races on memslots, and with
1031  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1032  */
kvm_s390_vm_start_migration(struct kvm * kvm)1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1034 {
1035 	struct kvm_memory_slot *ms;
1036 	struct kvm_memslots *slots;
1037 	unsigned long ram_pages = 0;
1038 	int slotnr;
1039 
1040 	/* migration mode already enabled */
1041 	if (kvm->arch.migration_mode)
1042 		return 0;
1043 	slots = kvm_memslots(kvm);
1044 	if (!slots || !slots->used_slots)
1045 		return -EINVAL;
1046 
1047 	if (!kvm->arch.use_cmma) {
1048 		kvm->arch.migration_mode = 1;
1049 		return 0;
1050 	}
1051 	/* mark all the pages in active slots as dirty */
1052 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053 		ms = slots->memslots + slotnr;
1054 		if (!ms->dirty_bitmap)
1055 			return -EINVAL;
1056 		/*
1057 		 * The second half of the bitmap is only used on x86,
1058 		 * and would be wasted otherwise, so we put it to good
1059 		 * use here to keep track of the state of the storage
1060 		 * attributes.
1061 		 */
1062 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063 		ram_pages += ms->npages;
1064 	}
1065 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066 	kvm->arch.migration_mode = 1;
1067 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1068 	return 0;
1069 }
1070 
1071 /*
1072  * Must be called with kvm->slots_lock to avoid races with ourselves and
1073  * kvm_s390_vm_start_migration.
1074  */
kvm_s390_vm_stop_migration(struct kvm * kvm)1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1076 {
1077 	/* migration mode already disabled */
1078 	if (!kvm->arch.migration_mode)
1079 		return 0;
1080 	kvm->arch.migration_mode = 0;
1081 	if (kvm->arch.use_cmma)
1082 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1083 	return 0;
1084 }
1085 
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087 				     struct kvm_device_attr *attr)
1088 {
1089 	int res = -ENXIO;
1090 
1091 	mutex_lock(&kvm->slots_lock);
1092 	switch (attr->attr) {
1093 	case KVM_S390_VM_MIGRATION_START:
1094 		res = kvm_s390_vm_start_migration(kvm);
1095 		break;
1096 	case KVM_S390_VM_MIGRATION_STOP:
1097 		res = kvm_s390_vm_stop_migration(kvm);
1098 		break;
1099 	default:
1100 		break;
1101 	}
1102 	mutex_unlock(&kvm->slots_lock);
1103 
1104 	return res;
1105 }
1106 
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108 				     struct kvm_device_attr *attr)
1109 {
1110 	u64 mig = kvm->arch.migration_mode;
1111 
1112 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1113 		return -ENXIO;
1114 
1115 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1116 		return -EFAULT;
1117 	return 0;
1118 }
1119 
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1120 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122 	struct kvm_s390_vm_tod_clock gtod;
1123 
1124 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1125 		return -EFAULT;
1126 
1127 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1128 		return -EINVAL;
1129 	kvm_s390_set_tod_clock(kvm, &gtod);
1130 
1131 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1132 		gtod.epoch_idx, gtod.tod);
1133 
1134 	return 0;
1135 }
1136 
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1137 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139 	u8 gtod_high;
1140 
1141 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1142 					   sizeof(gtod_high)))
1143 		return -EFAULT;
1144 
1145 	if (gtod_high != 0)
1146 		return -EINVAL;
1147 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1148 
1149 	return 0;
1150 }
1151 
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1152 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1153 {
1154 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1155 
1156 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1157 			   sizeof(gtod.tod)))
1158 		return -EFAULT;
1159 
1160 	kvm_s390_set_tod_clock(kvm, &gtod);
1161 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1162 	return 0;
1163 }
1164 
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1165 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1166 {
1167 	int ret;
1168 
1169 	if (attr->flags)
1170 		return -EINVAL;
1171 
1172 	switch (attr->attr) {
1173 	case KVM_S390_VM_TOD_EXT:
1174 		ret = kvm_s390_set_tod_ext(kvm, attr);
1175 		break;
1176 	case KVM_S390_VM_TOD_HIGH:
1177 		ret = kvm_s390_set_tod_high(kvm, attr);
1178 		break;
1179 	case KVM_S390_VM_TOD_LOW:
1180 		ret = kvm_s390_set_tod_low(kvm, attr);
1181 		break;
1182 	default:
1183 		ret = -ENXIO;
1184 		break;
1185 	}
1186 	return ret;
1187 }
1188 
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1189 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1190 				   struct kvm_s390_vm_tod_clock *gtod)
1191 {
1192 	union tod_clock clk;
1193 
1194 	preempt_disable();
1195 
1196 	store_tod_clock_ext(&clk);
1197 
1198 	gtod->tod = clk.tod + kvm->arch.epoch;
1199 	gtod->epoch_idx = 0;
1200 	if (test_kvm_facility(kvm, 139)) {
1201 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1202 		if (gtod->tod < clk.tod)
1203 			gtod->epoch_idx += 1;
1204 	}
1205 
1206 	preempt_enable();
1207 }
1208 
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1209 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1210 {
1211 	struct kvm_s390_vm_tod_clock gtod;
1212 
1213 	memset(&gtod, 0, sizeof(gtod));
1214 	kvm_s390_get_tod_clock(kvm, &gtod);
1215 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1216 		return -EFAULT;
1217 
1218 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1219 		gtod.epoch_idx, gtod.tod);
1220 	return 0;
1221 }
1222 
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1223 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1224 {
1225 	u8 gtod_high = 0;
1226 
1227 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1228 					 sizeof(gtod_high)))
1229 		return -EFAULT;
1230 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1231 
1232 	return 0;
1233 }
1234 
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1235 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237 	u64 gtod;
1238 
1239 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1240 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1241 		return -EFAULT;
1242 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1243 
1244 	return 0;
1245 }
1246 
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1247 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1248 {
1249 	int ret;
1250 
1251 	if (attr->flags)
1252 		return -EINVAL;
1253 
1254 	switch (attr->attr) {
1255 	case KVM_S390_VM_TOD_EXT:
1256 		ret = kvm_s390_get_tod_ext(kvm, attr);
1257 		break;
1258 	case KVM_S390_VM_TOD_HIGH:
1259 		ret = kvm_s390_get_tod_high(kvm, attr);
1260 		break;
1261 	case KVM_S390_VM_TOD_LOW:
1262 		ret = kvm_s390_get_tod_low(kvm, attr);
1263 		break;
1264 	default:
1265 		ret = -ENXIO;
1266 		break;
1267 	}
1268 	return ret;
1269 }
1270 
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1271 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273 	struct kvm_s390_vm_cpu_processor *proc;
1274 	u16 lowest_ibc, unblocked_ibc;
1275 	int ret = 0;
1276 
1277 	mutex_lock(&kvm->lock);
1278 	if (kvm->created_vcpus) {
1279 		ret = -EBUSY;
1280 		goto out;
1281 	}
1282 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1283 	if (!proc) {
1284 		ret = -ENOMEM;
1285 		goto out;
1286 	}
1287 	if (!copy_from_user(proc, (void __user *)attr->addr,
1288 			    sizeof(*proc))) {
1289 		kvm->arch.model.cpuid = proc->cpuid;
1290 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1291 		unblocked_ibc = sclp.ibc & 0xfff;
1292 		if (lowest_ibc && proc->ibc) {
1293 			if (proc->ibc > unblocked_ibc)
1294 				kvm->arch.model.ibc = unblocked_ibc;
1295 			else if (proc->ibc < lowest_ibc)
1296 				kvm->arch.model.ibc = lowest_ibc;
1297 			else
1298 				kvm->arch.model.ibc = proc->ibc;
1299 		}
1300 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1301 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1302 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1303 			 kvm->arch.model.ibc,
1304 			 kvm->arch.model.cpuid);
1305 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1306 			 kvm->arch.model.fac_list[0],
1307 			 kvm->arch.model.fac_list[1],
1308 			 kvm->arch.model.fac_list[2]);
1309 	} else
1310 		ret = -EFAULT;
1311 	kfree(proc);
1312 out:
1313 	mutex_unlock(&kvm->lock);
1314 	return ret;
1315 }
1316 
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1317 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1318 				       struct kvm_device_attr *attr)
1319 {
1320 	struct kvm_s390_vm_cpu_feat data;
1321 
1322 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1323 		return -EFAULT;
1324 	if (!bitmap_subset((unsigned long *) data.feat,
1325 			   kvm_s390_available_cpu_feat,
1326 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1327 		return -EINVAL;
1328 
1329 	mutex_lock(&kvm->lock);
1330 	if (kvm->created_vcpus) {
1331 		mutex_unlock(&kvm->lock);
1332 		return -EBUSY;
1333 	}
1334 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1335 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1336 	mutex_unlock(&kvm->lock);
1337 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1338 			 data.feat[0],
1339 			 data.feat[1],
1340 			 data.feat[2]);
1341 	return 0;
1342 }
1343 
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1344 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1345 					  struct kvm_device_attr *attr)
1346 {
1347 	mutex_lock(&kvm->lock);
1348 	if (kvm->created_vcpus) {
1349 		mutex_unlock(&kvm->lock);
1350 		return -EBUSY;
1351 	}
1352 
1353 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1354 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1355 		mutex_unlock(&kvm->lock);
1356 		return -EFAULT;
1357 	}
1358 	mutex_unlock(&kvm->lock);
1359 
1360 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1365 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1368 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1371 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1374 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1377 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1380 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1383 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1386 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1389 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1392 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1395 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1398 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1399 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1401 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1402 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1404 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1405 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1407 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1408 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1412 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1413 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1417 
1418 	return 0;
1419 }
1420 
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1421 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1422 {
1423 	int ret = -ENXIO;
1424 
1425 	switch (attr->attr) {
1426 	case KVM_S390_VM_CPU_PROCESSOR:
1427 		ret = kvm_s390_set_processor(kvm, attr);
1428 		break;
1429 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1430 		ret = kvm_s390_set_processor_feat(kvm, attr);
1431 		break;
1432 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1433 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1434 		break;
1435 	}
1436 	return ret;
1437 }
1438 
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1439 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1440 {
1441 	struct kvm_s390_vm_cpu_processor *proc;
1442 	int ret = 0;
1443 
1444 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1445 	if (!proc) {
1446 		ret = -ENOMEM;
1447 		goto out;
1448 	}
1449 	proc->cpuid = kvm->arch.model.cpuid;
1450 	proc->ibc = kvm->arch.model.ibc;
1451 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1452 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1453 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454 		 kvm->arch.model.ibc,
1455 		 kvm->arch.model.cpuid);
1456 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457 		 kvm->arch.model.fac_list[0],
1458 		 kvm->arch.model.fac_list[1],
1459 		 kvm->arch.model.fac_list[2]);
1460 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1461 		ret = -EFAULT;
1462 	kfree(proc);
1463 out:
1464 	return ret;
1465 }
1466 
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1467 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1468 {
1469 	struct kvm_s390_vm_cpu_machine *mach;
1470 	int ret = 0;
1471 
1472 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1473 	if (!mach) {
1474 		ret = -ENOMEM;
1475 		goto out;
1476 	}
1477 	get_cpu_id((struct cpuid *) &mach->cpuid);
1478 	mach->ibc = sclp.ibc;
1479 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1480 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1481 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1482 	       sizeof(stfle_fac_list));
1483 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1484 		 kvm->arch.model.ibc,
1485 		 kvm->arch.model.cpuid);
1486 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1487 		 mach->fac_mask[0],
1488 		 mach->fac_mask[1],
1489 		 mach->fac_mask[2]);
1490 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1491 		 mach->fac_list[0],
1492 		 mach->fac_list[1],
1493 		 mach->fac_list[2]);
1494 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1495 		ret = -EFAULT;
1496 	kfree(mach);
1497 out:
1498 	return ret;
1499 }
1500 
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1501 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1502 				       struct kvm_device_attr *attr)
1503 {
1504 	struct kvm_s390_vm_cpu_feat data;
1505 
1506 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1507 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1508 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1509 		return -EFAULT;
1510 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1511 			 data.feat[0],
1512 			 data.feat[1],
1513 			 data.feat[2]);
1514 	return 0;
1515 }
1516 
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1517 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1518 				     struct kvm_device_attr *attr)
1519 {
1520 	struct kvm_s390_vm_cpu_feat data;
1521 
1522 	bitmap_copy((unsigned long *) data.feat,
1523 		    kvm_s390_available_cpu_feat,
1524 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1525 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1526 		return -EFAULT;
1527 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1528 			 data.feat[0],
1529 			 data.feat[1],
1530 			 data.feat[2]);
1531 	return 0;
1532 }
1533 
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1534 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1535 					  struct kvm_device_attr *attr)
1536 {
1537 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1538 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1539 		return -EFAULT;
1540 
1541 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1546 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1549 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1552 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1555 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1558 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1561 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1564 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1567 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1570 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1573 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1576 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1579 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1580 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1582 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1583 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1585 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1588 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1593 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1594 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1597 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1598 
1599 	return 0;
1600 }
1601 
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1602 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1603 					struct kvm_device_attr *attr)
1604 {
1605 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1606 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1607 		return -EFAULT;
1608 
1609 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1614 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1617 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1620 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1623 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1626 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1629 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1632 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1635 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1638 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1641 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1644 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1647 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1648 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1650 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1651 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1653 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1654 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1656 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1657 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1661 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1662 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1665 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1666 
1667 	return 0;
1668 }
1669 
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1670 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1671 {
1672 	int ret = -ENXIO;
1673 
1674 	switch (attr->attr) {
1675 	case KVM_S390_VM_CPU_PROCESSOR:
1676 		ret = kvm_s390_get_processor(kvm, attr);
1677 		break;
1678 	case KVM_S390_VM_CPU_MACHINE:
1679 		ret = kvm_s390_get_machine(kvm, attr);
1680 		break;
1681 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1682 		ret = kvm_s390_get_processor_feat(kvm, attr);
1683 		break;
1684 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1685 		ret = kvm_s390_get_machine_feat(kvm, attr);
1686 		break;
1687 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1688 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1689 		break;
1690 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1691 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1692 		break;
1693 	}
1694 	return ret;
1695 }
1696 
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1697 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1698 {
1699 	int ret;
1700 
1701 	switch (attr->group) {
1702 	case KVM_S390_VM_MEM_CTRL:
1703 		ret = kvm_s390_set_mem_control(kvm, attr);
1704 		break;
1705 	case KVM_S390_VM_TOD:
1706 		ret = kvm_s390_set_tod(kvm, attr);
1707 		break;
1708 	case KVM_S390_VM_CPU_MODEL:
1709 		ret = kvm_s390_set_cpu_model(kvm, attr);
1710 		break;
1711 	case KVM_S390_VM_CRYPTO:
1712 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1713 		break;
1714 	case KVM_S390_VM_MIGRATION:
1715 		ret = kvm_s390_vm_set_migration(kvm, attr);
1716 		break;
1717 	default:
1718 		ret = -ENXIO;
1719 		break;
1720 	}
1721 
1722 	return ret;
1723 }
1724 
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726 {
1727 	int ret;
1728 
1729 	switch (attr->group) {
1730 	case KVM_S390_VM_MEM_CTRL:
1731 		ret = kvm_s390_get_mem_control(kvm, attr);
1732 		break;
1733 	case KVM_S390_VM_TOD:
1734 		ret = kvm_s390_get_tod(kvm, attr);
1735 		break;
1736 	case KVM_S390_VM_CPU_MODEL:
1737 		ret = kvm_s390_get_cpu_model(kvm, attr);
1738 		break;
1739 	case KVM_S390_VM_MIGRATION:
1740 		ret = kvm_s390_vm_get_migration(kvm, attr);
1741 		break;
1742 	default:
1743 		ret = -ENXIO;
1744 		break;
1745 	}
1746 
1747 	return ret;
1748 }
1749 
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1750 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1751 {
1752 	int ret;
1753 
1754 	switch (attr->group) {
1755 	case KVM_S390_VM_MEM_CTRL:
1756 		switch (attr->attr) {
1757 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1758 		case KVM_S390_VM_MEM_CLR_CMMA:
1759 			ret = sclp.has_cmma ? 0 : -ENXIO;
1760 			break;
1761 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1762 			ret = 0;
1763 			break;
1764 		default:
1765 			ret = -ENXIO;
1766 			break;
1767 		}
1768 		break;
1769 	case KVM_S390_VM_TOD:
1770 		switch (attr->attr) {
1771 		case KVM_S390_VM_TOD_LOW:
1772 		case KVM_S390_VM_TOD_HIGH:
1773 			ret = 0;
1774 			break;
1775 		default:
1776 			ret = -ENXIO;
1777 			break;
1778 		}
1779 		break;
1780 	case KVM_S390_VM_CPU_MODEL:
1781 		switch (attr->attr) {
1782 		case KVM_S390_VM_CPU_PROCESSOR:
1783 		case KVM_S390_VM_CPU_MACHINE:
1784 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1785 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1786 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1787 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1788 			ret = 0;
1789 			break;
1790 		default:
1791 			ret = -ENXIO;
1792 			break;
1793 		}
1794 		break;
1795 	case KVM_S390_VM_CRYPTO:
1796 		switch (attr->attr) {
1797 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1798 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1799 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1800 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1801 			ret = 0;
1802 			break;
1803 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1804 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1805 			ret = ap_instructions_available() ? 0 : -ENXIO;
1806 			break;
1807 		default:
1808 			ret = -ENXIO;
1809 			break;
1810 		}
1811 		break;
1812 	case KVM_S390_VM_MIGRATION:
1813 		ret = 0;
1814 		break;
1815 	default:
1816 		ret = -ENXIO;
1817 		break;
1818 	}
1819 
1820 	return ret;
1821 }
1822 
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1823 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1824 {
1825 	uint8_t *keys;
1826 	uint64_t hva;
1827 	int srcu_idx, i, r = 0;
1828 
1829 	if (args->flags != 0)
1830 		return -EINVAL;
1831 
1832 	/* Is this guest using storage keys? */
1833 	if (!mm_uses_skeys(current->mm))
1834 		return KVM_S390_GET_SKEYS_NONE;
1835 
1836 	/* Enforce sane limit on memory allocation */
1837 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1838 		return -EINVAL;
1839 
1840 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1841 	if (!keys)
1842 		return -ENOMEM;
1843 
1844 	mmap_read_lock(current->mm);
1845 	srcu_idx = srcu_read_lock(&kvm->srcu);
1846 	for (i = 0; i < args->count; i++) {
1847 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1848 		if (kvm_is_error_hva(hva)) {
1849 			r = -EFAULT;
1850 			break;
1851 		}
1852 
1853 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1854 		if (r)
1855 			break;
1856 	}
1857 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1858 	mmap_read_unlock(current->mm);
1859 
1860 	if (!r) {
1861 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1862 				 sizeof(uint8_t) * args->count);
1863 		if (r)
1864 			r = -EFAULT;
1865 	}
1866 
1867 	kvfree(keys);
1868 	return r;
1869 }
1870 
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1871 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1872 {
1873 	uint8_t *keys;
1874 	uint64_t hva;
1875 	int srcu_idx, i, r = 0;
1876 	bool unlocked;
1877 
1878 	if (args->flags != 0)
1879 		return -EINVAL;
1880 
1881 	/* Enforce sane limit on memory allocation */
1882 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1883 		return -EINVAL;
1884 
1885 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1886 	if (!keys)
1887 		return -ENOMEM;
1888 
1889 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1890 			   sizeof(uint8_t) * args->count);
1891 	if (r) {
1892 		r = -EFAULT;
1893 		goto out;
1894 	}
1895 
1896 	/* Enable storage key handling for the guest */
1897 	r = s390_enable_skey();
1898 	if (r)
1899 		goto out;
1900 
1901 	i = 0;
1902 	mmap_read_lock(current->mm);
1903 	srcu_idx = srcu_read_lock(&kvm->srcu);
1904         while (i < args->count) {
1905 		unlocked = false;
1906 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1907 		if (kvm_is_error_hva(hva)) {
1908 			r = -EFAULT;
1909 			break;
1910 		}
1911 
1912 		/* Lowest order bit is reserved */
1913 		if (keys[i] & 0x01) {
1914 			r = -EINVAL;
1915 			break;
1916 		}
1917 
1918 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1919 		if (r) {
1920 			r = fixup_user_fault(current->mm, hva,
1921 					     FAULT_FLAG_WRITE, &unlocked);
1922 			if (r)
1923 				break;
1924 		}
1925 		if (!r)
1926 			i++;
1927 	}
1928 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1929 	mmap_read_unlock(current->mm);
1930 out:
1931 	kvfree(keys);
1932 	return r;
1933 }
1934 
1935 /*
1936  * Base address and length must be sent at the start of each block, therefore
1937  * it's cheaper to send some clean data, as long as it's less than the size of
1938  * two longs.
1939  */
1940 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1941 /* for consistency */
1942 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1943 
1944 /*
1945  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1946  * address falls in a hole. In that case the index of one of the memslots
1947  * bordering the hole is returned.
1948  */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1949 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1950 {
1951 	int start = 0, end = slots->used_slots;
1952 	int slot = atomic_read(&slots->last_used_slot);
1953 	struct kvm_memory_slot *memslots = slots->memslots;
1954 
1955 	if (gfn >= memslots[slot].base_gfn &&
1956 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1957 		return slot;
1958 
1959 	while (start < end) {
1960 		slot = start + (end - start) / 2;
1961 
1962 		if (gfn >= memslots[slot].base_gfn)
1963 			end = slot;
1964 		else
1965 			start = slot + 1;
1966 	}
1967 
1968 	if (start >= slots->used_slots)
1969 		return slots->used_slots - 1;
1970 
1971 	if (gfn >= memslots[start].base_gfn &&
1972 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1973 		atomic_set(&slots->last_used_slot, start);
1974 	}
1975 
1976 	return start;
1977 }
1978 
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1979 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1980 			      u8 *res, unsigned long bufsize)
1981 {
1982 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1983 
1984 	args->count = 0;
1985 	while (args->count < bufsize) {
1986 		hva = gfn_to_hva(kvm, cur_gfn);
1987 		/*
1988 		 * We return an error if the first value was invalid, but we
1989 		 * return successfully if at least one value was copied.
1990 		 */
1991 		if (kvm_is_error_hva(hva))
1992 			return args->count ? 0 : -EFAULT;
1993 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1994 			pgstev = 0;
1995 		res[args->count++] = (pgstev >> 24) & 0x43;
1996 		cur_gfn++;
1997 	}
1998 
1999 	return 0;
2000 }
2001 
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)2002 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2003 					      unsigned long cur_gfn)
2004 {
2005 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2006 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
2007 	unsigned long ofs = cur_gfn - ms->base_gfn;
2008 
2009 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2010 		slotidx--;
2011 		/* If we are above the highest slot, wrap around */
2012 		if (slotidx < 0)
2013 			slotidx = slots->used_slots - 1;
2014 
2015 		ms = slots->memslots + slotidx;
2016 		ofs = 0;
2017 	}
2018 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2019 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2020 		slotidx--;
2021 		ms = slots->memslots + slotidx;
2022 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2023 	}
2024 	return ms->base_gfn + ofs;
2025 }
2026 
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2027 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2028 			     u8 *res, unsigned long bufsize)
2029 {
2030 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2031 	struct kvm_memslots *slots = kvm_memslots(kvm);
2032 	struct kvm_memory_slot *ms;
2033 
2034 	if (unlikely(!slots->used_slots))
2035 		return 0;
2036 
2037 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2038 	ms = gfn_to_memslot(kvm, cur_gfn);
2039 	args->count = 0;
2040 	args->start_gfn = cur_gfn;
2041 	if (!ms)
2042 		return 0;
2043 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2044 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2045 
2046 	while (args->count < bufsize) {
2047 		hva = gfn_to_hva(kvm, cur_gfn);
2048 		if (kvm_is_error_hva(hva))
2049 			return 0;
2050 		/* Decrement only if we actually flipped the bit to 0 */
2051 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2052 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2053 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2054 			pgstev = 0;
2055 		/* Save the value */
2056 		res[args->count++] = (pgstev >> 24) & 0x43;
2057 		/* If the next bit is too far away, stop. */
2058 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2059 			return 0;
2060 		/* If we reached the previous "next", find the next one */
2061 		if (cur_gfn == next_gfn)
2062 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2063 		/* Reached the end of memory or of the buffer, stop */
2064 		if ((next_gfn >= mem_end) ||
2065 		    (next_gfn - args->start_gfn >= bufsize))
2066 			return 0;
2067 		cur_gfn++;
2068 		/* Reached the end of the current memslot, take the next one. */
2069 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2070 			ms = gfn_to_memslot(kvm, cur_gfn);
2071 			if (!ms)
2072 				return 0;
2073 		}
2074 	}
2075 	return 0;
2076 }
2077 
2078 /*
2079  * This function searches for the next page with dirty CMMA attributes, and
2080  * saves the attributes in the buffer up to either the end of the buffer or
2081  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2082  * no trailing clean bytes are saved.
2083  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2084  * output buffer will indicate 0 as length.
2085  */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2086 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2087 				  struct kvm_s390_cmma_log *args)
2088 {
2089 	unsigned long bufsize;
2090 	int srcu_idx, peek, ret;
2091 	u8 *values;
2092 
2093 	if (!kvm->arch.use_cmma)
2094 		return -ENXIO;
2095 	/* Invalid/unsupported flags were specified */
2096 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2097 		return -EINVAL;
2098 	/* Migration mode query, and we are not doing a migration */
2099 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2100 	if (!peek && !kvm->arch.migration_mode)
2101 		return -EINVAL;
2102 	/* CMMA is disabled or was not used, or the buffer has length zero */
2103 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2104 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2105 		memset(args, 0, sizeof(*args));
2106 		return 0;
2107 	}
2108 	/* We are not peeking, and there are no dirty pages */
2109 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2110 		memset(args, 0, sizeof(*args));
2111 		return 0;
2112 	}
2113 
2114 	values = vmalloc(bufsize);
2115 	if (!values)
2116 		return -ENOMEM;
2117 
2118 	mmap_read_lock(kvm->mm);
2119 	srcu_idx = srcu_read_lock(&kvm->srcu);
2120 	if (peek)
2121 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2122 	else
2123 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2124 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2125 	mmap_read_unlock(kvm->mm);
2126 
2127 	if (kvm->arch.migration_mode)
2128 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2129 	else
2130 		args->remaining = 0;
2131 
2132 	if (copy_to_user((void __user *)args->values, values, args->count))
2133 		ret = -EFAULT;
2134 
2135 	vfree(values);
2136 	return ret;
2137 }
2138 
2139 /*
2140  * This function sets the CMMA attributes for the given pages. If the input
2141  * buffer has zero length, no action is taken, otherwise the attributes are
2142  * set and the mm->context.uses_cmm flag is set.
2143  */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2144 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2145 				  const struct kvm_s390_cmma_log *args)
2146 {
2147 	unsigned long hva, mask, pgstev, i;
2148 	uint8_t *bits;
2149 	int srcu_idx, r = 0;
2150 
2151 	mask = args->mask;
2152 
2153 	if (!kvm->arch.use_cmma)
2154 		return -ENXIO;
2155 	/* invalid/unsupported flags */
2156 	if (args->flags != 0)
2157 		return -EINVAL;
2158 	/* Enforce sane limit on memory allocation */
2159 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2160 		return -EINVAL;
2161 	/* Nothing to do */
2162 	if (args->count == 0)
2163 		return 0;
2164 
2165 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2166 	if (!bits)
2167 		return -ENOMEM;
2168 
2169 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2170 	if (r) {
2171 		r = -EFAULT;
2172 		goto out;
2173 	}
2174 
2175 	mmap_read_lock(kvm->mm);
2176 	srcu_idx = srcu_read_lock(&kvm->srcu);
2177 	for (i = 0; i < args->count; i++) {
2178 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2179 		if (kvm_is_error_hva(hva)) {
2180 			r = -EFAULT;
2181 			break;
2182 		}
2183 
2184 		pgstev = bits[i];
2185 		pgstev = pgstev << 24;
2186 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2187 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2188 	}
2189 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2190 	mmap_read_unlock(kvm->mm);
2191 
2192 	if (!kvm->mm->context.uses_cmm) {
2193 		mmap_write_lock(kvm->mm);
2194 		kvm->mm->context.uses_cmm = 1;
2195 		mmap_write_unlock(kvm->mm);
2196 	}
2197 out:
2198 	vfree(bits);
2199 	return r;
2200 }
2201 
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2202 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2203 {
2204 	struct kvm_vcpu *vcpu;
2205 	u16 rc, rrc;
2206 	int ret = 0;
2207 	int i;
2208 
2209 	/*
2210 	 * We ignore failures and try to destroy as many CPUs as possible.
2211 	 * At the same time we must not free the assigned resources when
2212 	 * this fails, as the ultravisor has still access to that memory.
2213 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2214 	 * behind.
2215 	 * We want to return the first failure rc and rrc, though.
2216 	 */
2217 	kvm_for_each_vcpu(i, vcpu, kvm) {
2218 		mutex_lock(&vcpu->mutex);
2219 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2220 			*rcp = rc;
2221 			*rrcp = rrc;
2222 			ret = -EIO;
2223 		}
2224 		mutex_unlock(&vcpu->mutex);
2225 	}
2226 	return ret;
2227 }
2228 
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2229 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2230 {
2231 	int i, r = 0;
2232 	u16 dummy;
2233 
2234 	struct kvm_vcpu *vcpu;
2235 
2236 	kvm_for_each_vcpu(i, vcpu, kvm) {
2237 		mutex_lock(&vcpu->mutex);
2238 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2239 		mutex_unlock(&vcpu->mutex);
2240 		if (r)
2241 			break;
2242 	}
2243 	if (r)
2244 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2245 	return r;
2246 }
2247 
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2248 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2249 {
2250 	int r = 0;
2251 	u16 dummy;
2252 	void __user *argp = (void __user *)cmd->data;
2253 
2254 	switch (cmd->cmd) {
2255 	case KVM_PV_ENABLE: {
2256 		r = -EINVAL;
2257 		if (kvm_s390_pv_is_protected(kvm))
2258 			break;
2259 
2260 		/*
2261 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2262 		 *  esca, we need no cleanup in the error cases below
2263 		 */
2264 		r = sca_switch_to_extended(kvm);
2265 		if (r)
2266 			break;
2267 
2268 		mmap_write_lock(current->mm);
2269 		r = gmap_mark_unmergeable();
2270 		mmap_write_unlock(current->mm);
2271 		if (r)
2272 			break;
2273 
2274 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2275 		if (r)
2276 			break;
2277 
2278 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2279 		if (r)
2280 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2281 
2282 		/* we need to block service interrupts from now on */
2283 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2284 		break;
2285 	}
2286 	case KVM_PV_DISABLE: {
2287 		r = -EINVAL;
2288 		if (!kvm_s390_pv_is_protected(kvm))
2289 			break;
2290 
2291 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2292 		/*
2293 		 * If a CPU could not be destroyed, destroy VM will also fail.
2294 		 * There is no point in trying to destroy it. Instead return
2295 		 * the rc and rrc from the first CPU that failed destroying.
2296 		 */
2297 		if (r)
2298 			break;
2299 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2300 
2301 		/* no need to block service interrupts any more */
2302 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2303 		break;
2304 	}
2305 	case KVM_PV_SET_SEC_PARMS: {
2306 		struct kvm_s390_pv_sec_parm parms = {};
2307 		void *hdr;
2308 
2309 		r = -EINVAL;
2310 		if (!kvm_s390_pv_is_protected(kvm))
2311 			break;
2312 
2313 		r = -EFAULT;
2314 		if (copy_from_user(&parms, argp, sizeof(parms)))
2315 			break;
2316 
2317 		/* Currently restricted to 8KB */
2318 		r = -EINVAL;
2319 		if (parms.length > PAGE_SIZE * 2)
2320 			break;
2321 
2322 		r = -ENOMEM;
2323 		hdr = vmalloc(parms.length);
2324 		if (!hdr)
2325 			break;
2326 
2327 		r = -EFAULT;
2328 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2329 				    parms.length))
2330 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2331 						      &cmd->rc, &cmd->rrc);
2332 
2333 		vfree(hdr);
2334 		break;
2335 	}
2336 	case KVM_PV_UNPACK: {
2337 		struct kvm_s390_pv_unp unp = {};
2338 
2339 		r = -EINVAL;
2340 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2341 			break;
2342 
2343 		r = -EFAULT;
2344 		if (copy_from_user(&unp, argp, sizeof(unp)))
2345 			break;
2346 
2347 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2348 				       &cmd->rc, &cmd->rrc);
2349 		break;
2350 	}
2351 	case KVM_PV_VERIFY: {
2352 		r = -EINVAL;
2353 		if (!kvm_s390_pv_is_protected(kvm))
2354 			break;
2355 
2356 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2357 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2358 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2359 			     cmd->rrc);
2360 		break;
2361 	}
2362 	case KVM_PV_PREP_RESET: {
2363 		r = -EINVAL;
2364 		if (!kvm_s390_pv_is_protected(kvm))
2365 			break;
2366 
2367 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2368 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2369 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2370 			     cmd->rc, cmd->rrc);
2371 		break;
2372 	}
2373 	case KVM_PV_UNSHARE_ALL: {
2374 		r = -EINVAL;
2375 		if (!kvm_s390_pv_is_protected(kvm))
2376 			break;
2377 
2378 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2379 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2380 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2381 			     cmd->rc, cmd->rrc);
2382 		break;
2383 	}
2384 	default:
2385 		r = -ENOTTY;
2386 	}
2387 	return r;
2388 }
2389 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2390 long kvm_arch_vm_ioctl(struct file *filp,
2391 		       unsigned int ioctl, unsigned long arg)
2392 {
2393 	struct kvm *kvm = filp->private_data;
2394 	void __user *argp = (void __user *)arg;
2395 	struct kvm_device_attr attr;
2396 	int r;
2397 
2398 	switch (ioctl) {
2399 	case KVM_S390_INTERRUPT: {
2400 		struct kvm_s390_interrupt s390int;
2401 
2402 		r = -EFAULT;
2403 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2404 			break;
2405 		r = kvm_s390_inject_vm(kvm, &s390int);
2406 		break;
2407 	}
2408 	case KVM_CREATE_IRQCHIP: {
2409 		struct kvm_irq_routing_entry routing;
2410 
2411 		r = -EINVAL;
2412 		if (kvm->arch.use_irqchip) {
2413 			/* Set up dummy routing. */
2414 			memset(&routing, 0, sizeof(routing));
2415 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2416 		}
2417 		break;
2418 	}
2419 	case KVM_SET_DEVICE_ATTR: {
2420 		r = -EFAULT;
2421 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2422 			break;
2423 		r = kvm_s390_vm_set_attr(kvm, &attr);
2424 		break;
2425 	}
2426 	case KVM_GET_DEVICE_ATTR: {
2427 		r = -EFAULT;
2428 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2429 			break;
2430 		r = kvm_s390_vm_get_attr(kvm, &attr);
2431 		break;
2432 	}
2433 	case KVM_HAS_DEVICE_ATTR: {
2434 		r = -EFAULT;
2435 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2436 			break;
2437 		r = kvm_s390_vm_has_attr(kvm, &attr);
2438 		break;
2439 	}
2440 	case KVM_S390_GET_SKEYS: {
2441 		struct kvm_s390_skeys args;
2442 
2443 		r = -EFAULT;
2444 		if (copy_from_user(&args, argp,
2445 				   sizeof(struct kvm_s390_skeys)))
2446 			break;
2447 		r = kvm_s390_get_skeys(kvm, &args);
2448 		break;
2449 	}
2450 	case KVM_S390_SET_SKEYS: {
2451 		struct kvm_s390_skeys args;
2452 
2453 		r = -EFAULT;
2454 		if (copy_from_user(&args, argp,
2455 				   sizeof(struct kvm_s390_skeys)))
2456 			break;
2457 		r = kvm_s390_set_skeys(kvm, &args);
2458 		break;
2459 	}
2460 	case KVM_S390_GET_CMMA_BITS: {
2461 		struct kvm_s390_cmma_log args;
2462 
2463 		r = -EFAULT;
2464 		if (copy_from_user(&args, argp, sizeof(args)))
2465 			break;
2466 		mutex_lock(&kvm->slots_lock);
2467 		r = kvm_s390_get_cmma_bits(kvm, &args);
2468 		mutex_unlock(&kvm->slots_lock);
2469 		if (!r) {
2470 			r = copy_to_user(argp, &args, sizeof(args));
2471 			if (r)
2472 				r = -EFAULT;
2473 		}
2474 		break;
2475 	}
2476 	case KVM_S390_SET_CMMA_BITS: {
2477 		struct kvm_s390_cmma_log args;
2478 
2479 		r = -EFAULT;
2480 		if (copy_from_user(&args, argp, sizeof(args)))
2481 			break;
2482 		mutex_lock(&kvm->slots_lock);
2483 		r = kvm_s390_set_cmma_bits(kvm, &args);
2484 		mutex_unlock(&kvm->slots_lock);
2485 		break;
2486 	}
2487 	case KVM_S390_PV_COMMAND: {
2488 		struct kvm_pv_cmd args;
2489 
2490 		/* protvirt means user sigp */
2491 		kvm->arch.user_cpu_state_ctrl = 1;
2492 		r = 0;
2493 		if (!is_prot_virt_host()) {
2494 			r = -EINVAL;
2495 			break;
2496 		}
2497 		if (copy_from_user(&args, argp, sizeof(args))) {
2498 			r = -EFAULT;
2499 			break;
2500 		}
2501 		if (args.flags) {
2502 			r = -EINVAL;
2503 			break;
2504 		}
2505 		mutex_lock(&kvm->lock);
2506 		r = kvm_s390_handle_pv(kvm, &args);
2507 		mutex_unlock(&kvm->lock);
2508 		if (copy_to_user(argp, &args, sizeof(args))) {
2509 			r = -EFAULT;
2510 			break;
2511 		}
2512 		break;
2513 	}
2514 	default:
2515 		r = -ENOTTY;
2516 	}
2517 
2518 	return r;
2519 }
2520 
kvm_s390_apxa_installed(void)2521 static int kvm_s390_apxa_installed(void)
2522 {
2523 	struct ap_config_info info;
2524 
2525 	if (ap_instructions_available()) {
2526 		if (ap_qci(&info) == 0)
2527 			return info.apxa;
2528 	}
2529 
2530 	return 0;
2531 }
2532 
2533 /*
2534  * The format of the crypto control block (CRYCB) is specified in the 3 low
2535  * order bits of the CRYCB designation (CRYCBD) field as follows:
2536  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2537  *	     AP extended addressing (APXA) facility are installed.
2538  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2539  * Format 2: Both the APXA and MSAX3 facilities are installed
2540  */
kvm_s390_set_crycb_format(struct kvm * kvm)2541 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2542 {
2543 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2544 
2545 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2546 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2547 
2548 	/* Check whether MSAX3 is installed */
2549 	if (!test_kvm_facility(kvm, 76))
2550 		return;
2551 
2552 	if (kvm_s390_apxa_installed())
2553 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2554 	else
2555 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2556 }
2557 
2558 /*
2559  * kvm_arch_crypto_set_masks
2560  *
2561  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2562  *	 to be set.
2563  * @apm: the mask identifying the accessible AP adapters
2564  * @aqm: the mask identifying the accessible AP domains
2565  * @adm: the mask identifying the accessible AP control domains
2566  *
2567  * Set the masks that identify the adapters, domains and control domains to
2568  * which the KVM guest is granted access.
2569  *
2570  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2571  *	 function.
2572  */
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2573 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2574 			       unsigned long *aqm, unsigned long *adm)
2575 {
2576 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2577 
2578 	kvm_s390_vcpu_block_all(kvm);
2579 
2580 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2581 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2582 		memcpy(crycb->apcb1.apm, apm, 32);
2583 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2584 			 apm[0], apm[1], apm[2], apm[3]);
2585 		memcpy(crycb->apcb1.aqm, aqm, 32);
2586 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2587 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2588 		memcpy(crycb->apcb1.adm, adm, 32);
2589 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2590 			 adm[0], adm[1], adm[2], adm[3]);
2591 		break;
2592 	case CRYCB_FORMAT1:
2593 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2594 		memcpy(crycb->apcb0.apm, apm, 8);
2595 		memcpy(crycb->apcb0.aqm, aqm, 2);
2596 		memcpy(crycb->apcb0.adm, adm, 2);
2597 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2598 			 apm[0], *((unsigned short *)aqm),
2599 			 *((unsigned short *)adm));
2600 		break;
2601 	default:	/* Can not happen */
2602 		break;
2603 	}
2604 
2605 	/* recreate the shadow crycb for each vcpu */
2606 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2607 	kvm_s390_vcpu_unblock_all(kvm);
2608 }
2609 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2610 
2611 /*
2612  * kvm_arch_crypto_clear_masks
2613  *
2614  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2615  *	 to be cleared.
2616  *
2617  * Clear the masks that identify the adapters, domains and control domains to
2618  * which the KVM guest is granted access.
2619  *
2620  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2621  *	 function.
2622  */
kvm_arch_crypto_clear_masks(struct kvm * kvm)2623 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2624 {
2625 	kvm_s390_vcpu_block_all(kvm);
2626 
2627 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2628 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2629 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2630 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2631 
2632 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2633 	/* recreate the shadow crycb for each vcpu */
2634 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2635 	kvm_s390_vcpu_unblock_all(kvm);
2636 }
2637 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2638 
kvm_s390_get_initial_cpuid(void)2639 static u64 kvm_s390_get_initial_cpuid(void)
2640 {
2641 	struct cpuid cpuid;
2642 
2643 	get_cpu_id(&cpuid);
2644 	cpuid.version = 0xff;
2645 	return *((u64 *) &cpuid);
2646 }
2647 
kvm_s390_crypto_init(struct kvm * kvm)2648 static void kvm_s390_crypto_init(struct kvm *kvm)
2649 {
2650 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2651 	kvm_s390_set_crycb_format(kvm);
2652 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2653 
2654 	if (!test_kvm_facility(kvm, 76))
2655 		return;
2656 
2657 	/* Enable AES/DEA protected key functions by default */
2658 	kvm->arch.crypto.aes_kw = 1;
2659 	kvm->arch.crypto.dea_kw = 1;
2660 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2661 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2662 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2663 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2664 }
2665 
sca_dispose(struct kvm * kvm)2666 static void sca_dispose(struct kvm *kvm)
2667 {
2668 	if (kvm->arch.use_esca)
2669 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2670 	else
2671 		free_page((unsigned long)(kvm->arch.sca));
2672 	kvm->arch.sca = NULL;
2673 }
2674 
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2675 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2676 {
2677 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2678 	int i, rc;
2679 	char debug_name[16];
2680 	static unsigned long sca_offset;
2681 
2682 	rc = -EINVAL;
2683 #ifdef CONFIG_KVM_S390_UCONTROL
2684 	if (type & ~KVM_VM_S390_UCONTROL)
2685 		goto out_err;
2686 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2687 		goto out_err;
2688 #else
2689 	if (type)
2690 		goto out_err;
2691 #endif
2692 
2693 	rc = s390_enable_sie();
2694 	if (rc)
2695 		goto out_err;
2696 
2697 	rc = -ENOMEM;
2698 
2699 	if (!sclp.has_64bscao)
2700 		alloc_flags |= GFP_DMA;
2701 	rwlock_init(&kvm->arch.sca_lock);
2702 	/* start with basic SCA */
2703 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2704 	if (!kvm->arch.sca)
2705 		goto out_err;
2706 	mutex_lock(&kvm_lock);
2707 	sca_offset += 16;
2708 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2709 		sca_offset = 0;
2710 	kvm->arch.sca = (struct bsca_block *)
2711 			((char *) kvm->arch.sca + sca_offset);
2712 	mutex_unlock(&kvm_lock);
2713 
2714 	sprintf(debug_name, "kvm-%u", current->pid);
2715 
2716 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2717 	if (!kvm->arch.dbf)
2718 		goto out_err;
2719 
2720 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2721 	kvm->arch.sie_page2 =
2722 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2723 	if (!kvm->arch.sie_page2)
2724 		goto out_err;
2725 
2726 	kvm->arch.sie_page2->kvm = kvm;
2727 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2728 
2729 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2730 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2731 					      (kvm_s390_fac_base[i] |
2732 					       kvm_s390_fac_ext[i]);
2733 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2734 					      kvm_s390_fac_base[i];
2735 	}
2736 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2737 
2738 	/* we are always in czam mode - even on pre z14 machines */
2739 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2740 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2741 	/* we emulate STHYI in kvm */
2742 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2743 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2744 	if (MACHINE_HAS_TLB_GUEST) {
2745 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2746 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2747 	}
2748 
2749 	if (css_general_characteristics.aiv && test_facility(65))
2750 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2751 
2752 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2753 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2754 
2755 	kvm_s390_crypto_init(kvm);
2756 
2757 	mutex_init(&kvm->arch.float_int.ais_lock);
2758 	spin_lock_init(&kvm->arch.float_int.lock);
2759 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2760 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2761 	init_waitqueue_head(&kvm->arch.ipte_wq);
2762 	mutex_init(&kvm->arch.ipte_mutex);
2763 
2764 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2765 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2766 
2767 	if (type & KVM_VM_S390_UCONTROL) {
2768 		kvm->arch.gmap = NULL;
2769 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2770 	} else {
2771 		if (sclp.hamax == U64_MAX)
2772 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2773 		else
2774 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2775 						    sclp.hamax + 1);
2776 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2777 		if (!kvm->arch.gmap)
2778 			goto out_err;
2779 		kvm->arch.gmap->private = kvm;
2780 		kvm->arch.gmap->pfault_enabled = 0;
2781 	}
2782 
2783 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2784 	kvm->arch.use_skf = sclp.has_skey;
2785 	spin_lock_init(&kvm->arch.start_stop_lock);
2786 	kvm_s390_vsie_init(kvm);
2787 	if (use_gisa)
2788 		kvm_s390_gisa_init(kvm);
2789 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2790 
2791 	return 0;
2792 out_err:
2793 	free_page((unsigned long)kvm->arch.sie_page2);
2794 	debug_unregister(kvm->arch.dbf);
2795 	sca_dispose(kvm);
2796 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2797 	return rc;
2798 }
2799 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2800 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2801 {
2802 	u16 rc, rrc;
2803 
2804 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2805 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2806 	kvm_s390_clear_local_irqs(vcpu);
2807 	kvm_clear_async_pf_completion_queue(vcpu);
2808 	if (!kvm_is_ucontrol(vcpu->kvm))
2809 		sca_del_vcpu(vcpu);
2810 
2811 	if (kvm_is_ucontrol(vcpu->kvm))
2812 		gmap_remove(vcpu->arch.gmap);
2813 
2814 	if (vcpu->kvm->arch.use_cmma)
2815 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2816 	/* We can not hold the vcpu mutex here, we are already dying */
2817 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2818 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2819 	free_page((unsigned long)(vcpu->arch.sie_block));
2820 }
2821 
kvm_free_vcpus(struct kvm * kvm)2822 static void kvm_free_vcpus(struct kvm *kvm)
2823 {
2824 	unsigned int i;
2825 	struct kvm_vcpu *vcpu;
2826 
2827 	kvm_for_each_vcpu(i, vcpu, kvm)
2828 		kvm_vcpu_destroy(vcpu);
2829 
2830 	mutex_lock(&kvm->lock);
2831 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2832 		kvm->vcpus[i] = NULL;
2833 
2834 	atomic_set(&kvm->online_vcpus, 0);
2835 	mutex_unlock(&kvm->lock);
2836 }
2837 
kvm_arch_destroy_vm(struct kvm * kvm)2838 void kvm_arch_destroy_vm(struct kvm *kvm)
2839 {
2840 	u16 rc, rrc;
2841 
2842 	kvm_free_vcpus(kvm);
2843 	sca_dispose(kvm);
2844 	kvm_s390_gisa_destroy(kvm);
2845 	/*
2846 	 * We are already at the end of life and kvm->lock is not taken.
2847 	 * This is ok as the file descriptor is closed by now and nobody
2848 	 * can mess with the pv state. To avoid lockdep_assert_held from
2849 	 * complaining we do not use kvm_s390_pv_is_protected.
2850 	 */
2851 	if (kvm_s390_pv_get_handle(kvm))
2852 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2853 	debug_unregister(kvm->arch.dbf);
2854 	free_page((unsigned long)kvm->arch.sie_page2);
2855 	if (!kvm_is_ucontrol(kvm))
2856 		gmap_remove(kvm->arch.gmap);
2857 	kvm_s390_destroy_adapters(kvm);
2858 	kvm_s390_clear_float_irqs(kvm);
2859 	kvm_s390_vsie_destroy(kvm);
2860 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2861 }
2862 
2863 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2864 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2865 {
2866 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2867 	if (!vcpu->arch.gmap)
2868 		return -ENOMEM;
2869 	vcpu->arch.gmap->private = vcpu->kvm;
2870 
2871 	return 0;
2872 }
2873 
sca_del_vcpu(struct kvm_vcpu * vcpu)2874 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2875 {
2876 	if (!kvm_s390_use_sca_entries())
2877 		return;
2878 	read_lock(&vcpu->kvm->arch.sca_lock);
2879 	if (vcpu->kvm->arch.use_esca) {
2880 		struct esca_block *sca = vcpu->kvm->arch.sca;
2881 
2882 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2883 		sca->cpu[vcpu->vcpu_id].sda = 0;
2884 	} else {
2885 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2886 
2887 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2888 		sca->cpu[vcpu->vcpu_id].sda = 0;
2889 	}
2890 	read_unlock(&vcpu->kvm->arch.sca_lock);
2891 }
2892 
sca_add_vcpu(struct kvm_vcpu * vcpu)2893 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2894 {
2895 	if (!kvm_s390_use_sca_entries()) {
2896 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2897 
2898 		/* we still need the basic sca for the ipte control */
2899 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2900 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2901 		return;
2902 	}
2903 	read_lock(&vcpu->kvm->arch.sca_lock);
2904 	if (vcpu->kvm->arch.use_esca) {
2905 		struct esca_block *sca = vcpu->kvm->arch.sca;
2906 
2907 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2908 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2909 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2910 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2911 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2912 	} else {
2913 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2914 
2915 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2916 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2917 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2918 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2919 	}
2920 	read_unlock(&vcpu->kvm->arch.sca_lock);
2921 }
2922 
2923 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2924 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2925 {
2926 	d->sda = s->sda;
2927 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2928 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2929 }
2930 
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2931 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2932 {
2933 	int i;
2934 
2935 	d->ipte_control = s->ipte_control;
2936 	d->mcn[0] = s->mcn;
2937 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2938 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2939 }
2940 
sca_switch_to_extended(struct kvm * kvm)2941 static int sca_switch_to_extended(struct kvm *kvm)
2942 {
2943 	struct bsca_block *old_sca = kvm->arch.sca;
2944 	struct esca_block *new_sca;
2945 	struct kvm_vcpu *vcpu;
2946 	unsigned int vcpu_idx;
2947 	u32 scaol, scaoh;
2948 
2949 	if (kvm->arch.use_esca)
2950 		return 0;
2951 
2952 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2953 	if (!new_sca)
2954 		return -ENOMEM;
2955 
2956 	scaoh = (u32)((u64)(new_sca) >> 32);
2957 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2958 
2959 	kvm_s390_vcpu_block_all(kvm);
2960 	write_lock(&kvm->arch.sca_lock);
2961 
2962 	sca_copy_b_to_e(new_sca, old_sca);
2963 
2964 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2965 		vcpu->arch.sie_block->scaoh = scaoh;
2966 		vcpu->arch.sie_block->scaol = scaol;
2967 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2968 	}
2969 	kvm->arch.sca = new_sca;
2970 	kvm->arch.use_esca = 1;
2971 
2972 	write_unlock(&kvm->arch.sca_lock);
2973 	kvm_s390_vcpu_unblock_all(kvm);
2974 
2975 	free_page((unsigned long)old_sca);
2976 
2977 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2978 		 old_sca, kvm->arch.sca);
2979 	return 0;
2980 }
2981 
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2982 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2983 {
2984 	int rc;
2985 
2986 	if (!kvm_s390_use_sca_entries()) {
2987 		if (id < KVM_MAX_VCPUS)
2988 			return true;
2989 		return false;
2990 	}
2991 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2992 		return true;
2993 	if (!sclp.has_esca || !sclp.has_64bscao)
2994 		return false;
2995 
2996 	mutex_lock(&kvm->lock);
2997 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2998 	mutex_unlock(&kvm->lock);
2999 
3000 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3001 }
3002 
3003 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)3004 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3005 {
3006 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3007 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008 	vcpu->arch.cputm_start = get_tod_clock_fast();
3009 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3010 }
3011 
3012 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)3013 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3014 {
3015 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3016 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3017 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3018 	vcpu->arch.cputm_start = 0;
3019 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3020 }
3021 
3022 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3023 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3024 {
3025 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3026 	vcpu->arch.cputm_enabled = true;
3027 	__start_cpu_timer_accounting(vcpu);
3028 }
3029 
3030 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3031 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3032 {
3033 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3034 	__stop_cpu_timer_accounting(vcpu);
3035 	vcpu->arch.cputm_enabled = false;
3036 }
3037 
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3038 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3039 {
3040 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3041 	__enable_cpu_timer_accounting(vcpu);
3042 	preempt_enable();
3043 }
3044 
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3045 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3046 {
3047 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3048 	__disable_cpu_timer_accounting(vcpu);
3049 	preempt_enable();
3050 }
3051 
3052 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3053 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3054 {
3055 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3056 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3057 	if (vcpu->arch.cputm_enabled)
3058 		vcpu->arch.cputm_start = get_tod_clock_fast();
3059 	vcpu->arch.sie_block->cputm = cputm;
3060 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3061 	preempt_enable();
3062 }
3063 
3064 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3065 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3066 {
3067 	unsigned int seq;
3068 	__u64 value;
3069 
3070 	if (unlikely(!vcpu->arch.cputm_enabled))
3071 		return vcpu->arch.sie_block->cputm;
3072 
3073 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3074 	do {
3075 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3076 		/*
3077 		 * If the writer would ever execute a read in the critical
3078 		 * section, e.g. in irq context, we have a deadlock.
3079 		 */
3080 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3081 		value = vcpu->arch.sie_block->cputm;
3082 		/* if cputm_start is 0, accounting is being started/stopped */
3083 		if (likely(vcpu->arch.cputm_start))
3084 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3085 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3086 	preempt_enable();
3087 	return value;
3088 }
3089 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3090 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3091 {
3092 
3093 	gmap_enable(vcpu->arch.enabled_gmap);
3094 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3095 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3096 		__start_cpu_timer_accounting(vcpu);
3097 	vcpu->cpu = cpu;
3098 }
3099 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3100 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3101 {
3102 	vcpu->cpu = -1;
3103 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3104 		__stop_cpu_timer_accounting(vcpu);
3105 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3106 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3107 	gmap_disable(vcpu->arch.enabled_gmap);
3108 
3109 }
3110 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3111 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3112 {
3113 	mutex_lock(&vcpu->kvm->lock);
3114 	preempt_disable();
3115 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3116 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3117 	preempt_enable();
3118 	mutex_unlock(&vcpu->kvm->lock);
3119 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3120 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3121 		sca_add_vcpu(vcpu);
3122 	}
3123 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3124 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3125 	/* make vcpu_load load the right gmap on the first trigger */
3126 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3127 }
3128 
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3129 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3130 {
3131 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3132 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3133 		return true;
3134 	return false;
3135 }
3136 
kvm_has_pckmo_ecc(struct kvm * kvm)3137 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3138 {
3139 	/* At least one ECC subfunction must be present */
3140 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3141 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3142 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3143 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3144 	       kvm_has_pckmo_subfunc(kvm, 41);
3145 
3146 }
3147 
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3148 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3149 {
3150 	/*
3151 	 * If the AP instructions are not being interpreted and the MSAX3
3152 	 * facility is not configured for the guest, there is nothing to set up.
3153 	 */
3154 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3155 		return;
3156 
3157 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3158 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3159 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3160 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3161 
3162 	if (vcpu->kvm->arch.crypto.apie)
3163 		vcpu->arch.sie_block->eca |= ECA_APIE;
3164 
3165 	/* Set up protected key support */
3166 	if (vcpu->kvm->arch.crypto.aes_kw) {
3167 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3168 		/* ecc is also wrapped with AES key */
3169 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3170 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3171 	}
3172 
3173 	if (vcpu->kvm->arch.crypto.dea_kw)
3174 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3175 }
3176 
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3177 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3178 {
3179 	free_page(vcpu->arch.sie_block->cbrlo);
3180 	vcpu->arch.sie_block->cbrlo = 0;
3181 }
3182 
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3183 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3184 {
3185 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3186 	if (!vcpu->arch.sie_block->cbrlo)
3187 		return -ENOMEM;
3188 	return 0;
3189 }
3190 
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3191 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3192 {
3193 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3194 
3195 	vcpu->arch.sie_block->ibc = model->ibc;
3196 	if (test_kvm_facility(vcpu->kvm, 7))
3197 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3198 }
3199 
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3200 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3201 {
3202 	int rc = 0;
3203 	u16 uvrc, uvrrc;
3204 
3205 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3206 						    CPUSTAT_SM |
3207 						    CPUSTAT_STOPPED);
3208 
3209 	if (test_kvm_facility(vcpu->kvm, 78))
3210 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3211 	else if (test_kvm_facility(vcpu->kvm, 8))
3212 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3213 
3214 	kvm_s390_vcpu_setup_model(vcpu);
3215 
3216 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3217 	if (MACHINE_HAS_ESOP)
3218 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3219 	if (test_kvm_facility(vcpu->kvm, 9))
3220 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3221 	if (test_kvm_facility(vcpu->kvm, 73))
3222 		vcpu->arch.sie_block->ecb |= ECB_TE;
3223 	if (!kvm_is_ucontrol(vcpu->kvm))
3224 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3225 
3226 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3227 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3228 	if (test_kvm_facility(vcpu->kvm, 130))
3229 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3230 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3231 	if (sclp.has_cei)
3232 		vcpu->arch.sie_block->eca |= ECA_CEI;
3233 	if (sclp.has_ib)
3234 		vcpu->arch.sie_block->eca |= ECA_IB;
3235 	if (sclp.has_siif)
3236 		vcpu->arch.sie_block->eca |= ECA_SII;
3237 	if (sclp.has_sigpif)
3238 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3239 	if (test_kvm_facility(vcpu->kvm, 129)) {
3240 		vcpu->arch.sie_block->eca |= ECA_VX;
3241 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3242 	}
3243 	if (test_kvm_facility(vcpu->kvm, 139))
3244 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3245 	if (test_kvm_facility(vcpu->kvm, 156))
3246 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3247 	if (vcpu->arch.sie_block->gd) {
3248 		vcpu->arch.sie_block->eca |= ECA_AIV;
3249 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3250 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3251 	}
3252 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3253 					| SDNXC;
3254 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3255 
3256 	if (sclp.has_kss)
3257 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3258 	else
3259 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3260 
3261 	if (vcpu->kvm->arch.use_cmma) {
3262 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3263 		if (rc)
3264 			return rc;
3265 	}
3266 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3267 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3268 
3269 	vcpu->arch.sie_block->hpid = HPID_KVM;
3270 
3271 	kvm_s390_vcpu_crypto_setup(vcpu);
3272 
3273 	mutex_lock(&vcpu->kvm->lock);
3274 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3275 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3276 		if (rc)
3277 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3278 	}
3279 	mutex_unlock(&vcpu->kvm->lock);
3280 
3281 	return rc;
3282 }
3283 
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3284 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3285 {
3286 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3287 		return -EINVAL;
3288 	return 0;
3289 }
3290 
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3291 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3292 {
3293 	struct sie_page *sie_page;
3294 	int rc;
3295 
3296 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3297 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3298 	if (!sie_page)
3299 		return -ENOMEM;
3300 
3301 	vcpu->arch.sie_block = &sie_page->sie_block;
3302 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3303 
3304 	/* the real guest size will always be smaller than msl */
3305 	vcpu->arch.sie_block->mso = 0;
3306 	vcpu->arch.sie_block->msl = sclp.hamax;
3307 
3308 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3309 	spin_lock_init(&vcpu->arch.local_int.lock);
3310 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3311 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3312 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3313 	seqcount_init(&vcpu->arch.cputm_seqcount);
3314 
3315 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3316 	kvm_clear_async_pf_completion_queue(vcpu);
3317 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3318 				    KVM_SYNC_GPRS |
3319 				    KVM_SYNC_ACRS |
3320 				    KVM_SYNC_CRS |
3321 				    KVM_SYNC_ARCH0 |
3322 				    KVM_SYNC_PFAULT |
3323 				    KVM_SYNC_DIAG318;
3324 	kvm_s390_set_prefix(vcpu, 0);
3325 	if (test_kvm_facility(vcpu->kvm, 64))
3326 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3327 	if (test_kvm_facility(vcpu->kvm, 82))
3328 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3329 	if (test_kvm_facility(vcpu->kvm, 133))
3330 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3331 	if (test_kvm_facility(vcpu->kvm, 156))
3332 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3333 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3334 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3335 	 */
3336 	if (MACHINE_HAS_VX)
3337 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3338 	else
3339 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3340 
3341 	if (kvm_is_ucontrol(vcpu->kvm)) {
3342 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3343 		if (rc)
3344 			goto out_free_sie_block;
3345 	}
3346 
3347 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3348 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3349 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3350 
3351 	rc = kvm_s390_vcpu_setup(vcpu);
3352 	if (rc)
3353 		goto out_ucontrol_uninit;
3354 	return 0;
3355 
3356 out_ucontrol_uninit:
3357 	if (kvm_is_ucontrol(vcpu->kvm))
3358 		gmap_remove(vcpu->arch.gmap);
3359 out_free_sie_block:
3360 	free_page((unsigned long)(vcpu->arch.sie_block));
3361 	return rc;
3362 }
3363 
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3364 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3365 {
3366 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3367 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3368 }
3369 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3370 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3371 {
3372 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3373 }
3374 
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3375 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3376 {
3377 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3378 	exit_sie(vcpu);
3379 }
3380 
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3381 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3382 {
3383 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3384 }
3385 
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3386 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3387 {
3388 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3389 	exit_sie(vcpu);
3390 }
3391 
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3392 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3393 {
3394 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3395 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3396 }
3397 
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3398 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3399 {
3400 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3401 }
3402 
3403 /*
3404  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3405  * If the CPU is not running (e.g. waiting as idle) the function will
3406  * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3407 void exit_sie(struct kvm_vcpu *vcpu)
3408 {
3409 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3410 	kvm_s390_vsie_kick(vcpu);
3411 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3412 		cpu_relax();
3413 }
3414 
3415 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3416 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3417 {
3418 	kvm_make_request(req, vcpu);
3419 	kvm_s390_vcpu_request(vcpu);
3420 }
3421 
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3422 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3423 			      unsigned long end)
3424 {
3425 	struct kvm *kvm = gmap->private;
3426 	struct kvm_vcpu *vcpu;
3427 	unsigned long prefix;
3428 	int i;
3429 
3430 	if (gmap_is_shadow(gmap))
3431 		return;
3432 	if (start >= 1UL << 31)
3433 		/* We are only interested in prefix pages */
3434 		return;
3435 	kvm_for_each_vcpu(i, vcpu, kvm) {
3436 		/* match against both prefix pages */
3437 		prefix = kvm_s390_get_prefix(vcpu);
3438 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3439 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3440 				   start, end);
3441 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3442 		}
3443 	}
3444 }
3445 
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3446 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3447 {
3448 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3449 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3450 	    halt_poll_max_steal) {
3451 		vcpu->stat.halt_no_poll_steal++;
3452 		return true;
3453 	}
3454 	return false;
3455 }
3456 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3457 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3458 {
3459 	/* kvm common code refers to this, but never calls it */
3460 	BUG();
3461 	return 0;
3462 }
3463 
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3464 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3465 					   struct kvm_one_reg *reg)
3466 {
3467 	int r = -EINVAL;
3468 
3469 	switch (reg->id) {
3470 	case KVM_REG_S390_TODPR:
3471 		r = put_user(vcpu->arch.sie_block->todpr,
3472 			     (u32 __user *)reg->addr);
3473 		break;
3474 	case KVM_REG_S390_EPOCHDIFF:
3475 		r = put_user(vcpu->arch.sie_block->epoch,
3476 			     (u64 __user *)reg->addr);
3477 		break;
3478 	case KVM_REG_S390_CPU_TIMER:
3479 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3480 			     (u64 __user *)reg->addr);
3481 		break;
3482 	case KVM_REG_S390_CLOCK_COMP:
3483 		r = put_user(vcpu->arch.sie_block->ckc,
3484 			     (u64 __user *)reg->addr);
3485 		break;
3486 	case KVM_REG_S390_PFTOKEN:
3487 		r = put_user(vcpu->arch.pfault_token,
3488 			     (u64 __user *)reg->addr);
3489 		break;
3490 	case KVM_REG_S390_PFCOMPARE:
3491 		r = put_user(vcpu->arch.pfault_compare,
3492 			     (u64 __user *)reg->addr);
3493 		break;
3494 	case KVM_REG_S390_PFSELECT:
3495 		r = put_user(vcpu->arch.pfault_select,
3496 			     (u64 __user *)reg->addr);
3497 		break;
3498 	case KVM_REG_S390_PP:
3499 		r = put_user(vcpu->arch.sie_block->pp,
3500 			     (u64 __user *)reg->addr);
3501 		break;
3502 	case KVM_REG_S390_GBEA:
3503 		r = put_user(vcpu->arch.sie_block->gbea,
3504 			     (u64 __user *)reg->addr);
3505 		break;
3506 	default:
3507 		break;
3508 	}
3509 
3510 	return r;
3511 }
3512 
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3513 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3514 					   struct kvm_one_reg *reg)
3515 {
3516 	int r = -EINVAL;
3517 	__u64 val;
3518 
3519 	switch (reg->id) {
3520 	case KVM_REG_S390_TODPR:
3521 		r = get_user(vcpu->arch.sie_block->todpr,
3522 			     (u32 __user *)reg->addr);
3523 		break;
3524 	case KVM_REG_S390_EPOCHDIFF:
3525 		r = get_user(vcpu->arch.sie_block->epoch,
3526 			     (u64 __user *)reg->addr);
3527 		break;
3528 	case KVM_REG_S390_CPU_TIMER:
3529 		r = get_user(val, (u64 __user *)reg->addr);
3530 		if (!r)
3531 			kvm_s390_set_cpu_timer(vcpu, val);
3532 		break;
3533 	case KVM_REG_S390_CLOCK_COMP:
3534 		r = get_user(vcpu->arch.sie_block->ckc,
3535 			     (u64 __user *)reg->addr);
3536 		break;
3537 	case KVM_REG_S390_PFTOKEN:
3538 		r = get_user(vcpu->arch.pfault_token,
3539 			     (u64 __user *)reg->addr);
3540 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3541 			kvm_clear_async_pf_completion_queue(vcpu);
3542 		break;
3543 	case KVM_REG_S390_PFCOMPARE:
3544 		r = get_user(vcpu->arch.pfault_compare,
3545 			     (u64 __user *)reg->addr);
3546 		break;
3547 	case KVM_REG_S390_PFSELECT:
3548 		r = get_user(vcpu->arch.pfault_select,
3549 			     (u64 __user *)reg->addr);
3550 		break;
3551 	case KVM_REG_S390_PP:
3552 		r = get_user(vcpu->arch.sie_block->pp,
3553 			     (u64 __user *)reg->addr);
3554 		break;
3555 	case KVM_REG_S390_GBEA:
3556 		r = get_user(vcpu->arch.sie_block->gbea,
3557 			     (u64 __user *)reg->addr);
3558 		break;
3559 	default:
3560 		break;
3561 	}
3562 
3563 	return r;
3564 }
3565 
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3566 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3567 {
3568 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3569 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3570 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3571 
3572 	kvm_clear_async_pf_completion_queue(vcpu);
3573 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3574 		kvm_s390_vcpu_stop(vcpu);
3575 	kvm_s390_clear_local_irqs(vcpu);
3576 }
3577 
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3578 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3579 {
3580 	/* Initial reset is a superset of the normal reset */
3581 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3582 
3583 	/*
3584 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3585 	 * We do not only reset the internal data, but also ...
3586 	 */
3587 	vcpu->arch.sie_block->gpsw.mask = 0;
3588 	vcpu->arch.sie_block->gpsw.addr = 0;
3589 	kvm_s390_set_prefix(vcpu, 0);
3590 	kvm_s390_set_cpu_timer(vcpu, 0);
3591 	vcpu->arch.sie_block->ckc = 0;
3592 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3593 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3594 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3595 
3596 	/* ... the data in sync regs */
3597 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3598 	vcpu->run->s.regs.ckc = 0;
3599 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3600 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3601 	vcpu->run->psw_addr = 0;
3602 	vcpu->run->psw_mask = 0;
3603 	vcpu->run->s.regs.todpr = 0;
3604 	vcpu->run->s.regs.cputm = 0;
3605 	vcpu->run->s.regs.ckc = 0;
3606 	vcpu->run->s.regs.pp = 0;
3607 	vcpu->run->s.regs.gbea = 1;
3608 	vcpu->run->s.regs.fpc = 0;
3609 	/*
3610 	 * Do not reset these registers in the protected case, as some of
3611 	 * them are overlayed and they are not accessible in this case
3612 	 * anyway.
3613 	 */
3614 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3615 		vcpu->arch.sie_block->gbea = 1;
3616 		vcpu->arch.sie_block->pp = 0;
3617 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3618 		vcpu->arch.sie_block->todpr = 0;
3619 	}
3620 }
3621 
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3622 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3623 {
3624 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3625 
3626 	/* Clear reset is a superset of the initial reset */
3627 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3628 
3629 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3630 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3631 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3632 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3633 
3634 	regs->etoken = 0;
3635 	regs->etoken_extension = 0;
3636 }
3637 
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3638 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3639 {
3640 	vcpu_load(vcpu);
3641 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3642 	vcpu_put(vcpu);
3643 	return 0;
3644 }
3645 
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3646 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3647 {
3648 	vcpu_load(vcpu);
3649 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3650 	vcpu_put(vcpu);
3651 	return 0;
3652 }
3653 
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3654 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3655 				  struct kvm_sregs *sregs)
3656 {
3657 	vcpu_load(vcpu);
3658 
3659 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3660 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3661 
3662 	vcpu_put(vcpu);
3663 	return 0;
3664 }
3665 
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3666 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3667 				  struct kvm_sregs *sregs)
3668 {
3669 	vcpu_load(vcpu);
3670 
3671 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3672 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3673 
3674 	vcpu_put(vcpu);
3675 	return 0;
3676 }
3677 
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3678 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3679 {
3680 	int ret = 0;
3681 
3682 	vcpu_load(vcpu);
3683 
3684 	if (test_fp_ctl(fpu->fpc)) {
3685 		ret = -EINVAL;
3686 		goto out;
3687 	}
3688 	vcpu->run->s.regs.fpc = fpu->fpc;
3689 	if (MACHINE_HAS_VX)
3690 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3691 				 (freg_t *) fpu->fprs);
3692 	else
3693 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3694 
3695 out:
3696 	vcpu_put(vcpu);
3697 	return ret;
3698 }
3699 
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3700 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3701 {
3702 	vcpu_load(vcpu);
3703 
3704 	/* make sure we have the latest values */
3705 	save_fpu_regs();
3706 	if (MACHINE_HAS_VX)
3707 		convert_vx_to_fp((freg_t *) fpu->fprs,
3708 				 (__vector128 *) vcpu->run->s.regs.vrs);
3709 	else
3710 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3711 	fpu->fpc = vcpu->run->s.regs.fpc;
3712 
3713 	vcpu_put(vcpu);
3714 	return 0;
3715 }
3716 
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3717 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3718 {
3719 	int rc = 0;
3720 
3721 	if (!is_vcpu_stopped(vcpu))
3722 		rc = -EBUSY;
3723 	else {
3724 		vcpu->run->psw_mask = psw.mask;
3725 		vcpu->run->psw_addr = psw.addr;
3726 	}
3727 	return rc;
3728 }
3729 
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3730 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3731 				  struct kvm_translation *tr)
3732 {
3733 	return -EINVAL; /* not implemented yet */
3734 }
3735 
3736 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3737 			      KVM_GUESTDBG_USE_HW_BP | \
3738 			      KVM_GUESTDBG_ENABLE)
3739 
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3740 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3741 					struct kvm_guest_debug *dbg)
3742 {
3743 	int rc = 0;
3744 
3745 	vcpu_load(vcpu);
3746 
3747 	vcpu->guest_debug = 0;
3748 	kvm_s390_clear_bp_data(vcpu);
3749 
3750 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3751 		rc = -EINVAL;
3752 		goto out;
3753 	}
3754 	if (!sclp.has_gpere) {
3755 		rc = -EINVAL;
3756 		goto out;
3757 	}
3758 
3759 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3760 		vcpu->guest_debug = dbg->control;
3761 		/* enforce guest PER */
3762 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3763 
3764 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3765 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3766 	} else {
3767 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3768 		vcpu->arch.guestdbg.last_bp = 0;
3769 	}
3770 
3771 	if (rc) {
3772 		vcpu->guest_debug = 0;
3773 		kvm_s390_clear_bp_data(vcpu);
3774 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3775 	}
3776 
3777 out:
3778 	vcpu_put(vcpu);
3779 	return rc;
3780 }
3781 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3782 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3783 				    struct kvm_mp_state *mp_state)
3784 {
3785 	int ret;
3786 
3787 	vcpu_load(vcpu);
3788 
3789 	/* CHECK_STOP and LOAD are not supported yet */
3790 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3791 				      KVM_MP_STATE_OPERATING;
3792 
3793 	vcpu_put(vcpu);
3794 	return ret;
3795 }
3796 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3797 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3798 				    struct kvm_mp_state *mp_state)
3799 {
3800 	int rc = 0;
3801 
3802 	vcpu_load(vcpu);
3803 
3804 	/* user space knows about this interface - let it control the state */
3805 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3806 
3807 	switch (mp_state->mp_state) {
3808 	case KVM_MP_STATE_STOPPED:
3809 		rc = kvm_s390_vcpu_stop(vcpu);
3810 		break;
3811 	case KVM_MP_STATE_OPERATING:
3812 		rc = kvm_s390_vcpu_start(vcpu);
3813 		break;
3814 	case KVM_MP_STATE_LOAD:
3815 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3816 			rc = -ENXIO;
3817 			break;
3818 		}
3819 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3820 		break;
3821 	case KVM_MP_STATE_CHECK_STOP:
3822 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3823 	default:
3824 		rc = -ENXIO;
3825 	}
3826 
3827 	vcpu_put(vcpu);
3828 	return rc;
3829 }
3830 
ibs_enabled(struct kvm_vcpu * vcpu)3831 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3832 {
3833 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3834 }
3835 
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3836 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3837 {
3838 retry:
3839 	kvm_s390_vcpu_request_handled(vcpu);
3840 	if (!kvm_request_pending(vcpu))
3841 		return 0;
3842 	/*
3843 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3844 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3845 	 * This ensures that the ipte instruction for this request has
3846 	 * already finished. We might race against a second unmapper that
3847 	 * wants to set the blocking bit. Lets just retry the request loop.
3848 	 */
3849 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3850 		int rc;
3851 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3852 					  kvm_s390_get_prefix(vcpu),
3853 					  PAGE_SIZE * 2, PROT_WRITE);
3854 		if (rc) {
3855 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3856 			return rc;
3857 		}
3858 		goto retry;
3859 	}
3860 
3861 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3862 		vcpu->arch.sie_block->ihcpu = 0xffff;
3863 		goto retry;
3864 	}
3865 
3866 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3867 		if (!ibs_enabled(vcpu)) {
3868 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3869 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3870 		}
3871 		goto retry;
3872 	}
3873 
3874 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3875 		if (ibs_enabled(vcpu)) {
3876 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3877 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3878 		}
3879 		goto retry;
3880 	}
3881 
3882 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3883 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3884 		goto retry;
3885 	}
3886 
3887 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3888 		/*
3889 		 * Disable CMM virtualization; we will emulate the ESSA
3890 		 * instruction manually, in order to provide additional
3891 		 * functionalities needed for live migration.
3892 		 */
3893 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3894 		goto retry;
3895 	}
3896 
3897 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3898 		/*
3899 		 * Re-enable CMM virtualization if CMMA is available and
3900 		 * CMM has been used.
3901 		 */
3902 		if ((vcpu->kvm->arch.use_cmma) &&
3903 		    (vcpu->kvm->mm->context.uses_cmm))
3904 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3905 		goto retry;
3906 	}
3907 
3908 	/* nothing to do, just clear the request */
3909 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3910 	/* we left the vsie handler, nothing to do, just clear the request */
3911 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3912 
3913 	return 0;
3914 }
3915 
kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3916 void kvm_s390_set_tod_clock(struct kvm *kvm,
3917 			    const struct kvm_s390_vm_tod_clock *gtod)
3918 {
3919 	struct kvm_vcpu *vcpu;
3920 	union tod_clock clk;
3921 	int i;
3922 
3923 	mutex_lock(&kvm->lock);
3924 	preempt_disable();
3925 
3926 	store_tod_clock_ext(&clk);
3927 
3928 	kvm->arch.epoch = gtod->tod - clk.tod;
3929 	kvm->arch.epdx = 0;
3930 	if (test_kvm_facility(kvm, 139)) {
3931 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3932 		if (kvm->arch.epoch > gtod->tod)
3933 			kvm->arch.epdx -= 1;
3934 	}
3935 
3936 	kvm_s390_vcpu_block_all(kvm);
3937 	kvm_for_each_vcpu(i, vcpu, kvm) {
3938 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3939 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3940 	}
3941 
3942 	kvm_s390_vcpu_unblock_all(kvm);
3943 	preempt_enable();
3944 	mutex_unlock(&kvm->lock);
3945 }
3946 
3947 /**
3948  * kvm_arch_fault_in_page - fault-in guest page if necessary
3949  * @vcpu: The corresponding virtual cpu
3950  * @gpa: Guest physical address
3951  * @writable: Whether the page should be writable or not
3952  *
3953  * Make sure that a guest page has been faulted-in on the host.
3954  *
3955  * Return: Zero on success, negative error code otherwise.
3956  */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3957 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3958 {
3959 	return gmap_fault(vcpu->arch.gmap, gpa,
3960 			  writable ? FAULT_FLAG_WRITE : 0);
3961 }
3962 
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3963 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3964 				      unsigned long token)
3965 {
3966 	struct kvm_s390_interrupt inti;
3967 	struct kvm_s390_irq irq;
3968 
3969 	if (start_token) {
3970 		irq.u.ext.ext_params2 = token;
3971 		irq.type = KVM_S390_INT_PFAULT_INIT;
3972 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3973 	} else {
3974 		inti.type = KVM_S390_INT_PFAULT_DONE;
3975 		inti.parm64 = token;
3976 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3977 	}
3978 }
3979 
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3980 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3981 				     struct kvm_async_pf *work)
3982 {
3983 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3984 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3985 
3986 	return true;
3987 }
3988 
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3989 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3990 				 struct kvm_async_pf *work)
3991 {
3992 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3993 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3994 }
3995 
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3996 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3997 			       struct kvm_async_pf *work)
3998 {
3999 	/* s390 will always inject the page directly */
4000 }
4001 
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)4002 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4003 {
4004 	/*
4005 	 * s390 will always inject the page directly,
4006 	 * but we still want check_async_completion to cleanup
4007 	 */
4008 	return true;
4009 }
4010 
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)4011 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4012 {
4013 	hva_t hva;
4014 	struct kvm_arch_async_pf arch;
4015 
4016 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4017 		return false;
4018 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4019 	    vcpu->arch.pfault_compare)
4020 		return false;
4021 	if (psw_extint_disabled(vcpu))
4022 		return false;
4023 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4024 		return false;
4025 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4026 		return false;
4027 	if (!vcpu->arch.gmap->pfault_enabled)
4028 		return false;
4029 
4030 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4031 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4032 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4033 		return false;
4034 
4035 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4036 }
4037 
vcpu_pre_run(struct kvm_vcpu * vcpu)4038 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4039 {
4040 	int rc, cpuflags;
4041 
4042 	/*
4043 	 * On s390 notifications for arriving pages will be delivered directly
4044 	 * to the guest but the house keeping for completed pfaults is
4045 	 * handled outside the worker.
4046 	 */
4047 	kvm_check_async_pf_completion(vcpu);
4048 
4049 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4050 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4051 
4052 	if (need_resched())
4053 		schedule();
4054 
4055 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4056 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4057 		if (rc)
4058 			return rc;
4059 	}
4060 
4061 	rc = kvm_s390_handle_requests(vcpu);
4062 	if (rc)
4063 		return rc;
4064 
4065 	if (guestdbg_enabled(vcpu)) {
4066 		kvm_s390_backup_guest_per_regs(vcpu);
4067 		kvm_s390_patch_guest_per_regs(vcpu);
4068 	}
4069 
4070 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4071 
4072 	vcpu->arch.sie_block->icptcode = 0;
4073 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4074 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4075 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4076 
4077 	return 0;
4078 }
4079 
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4080 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4081 {
4082 	struct kvm_s390_pgm_info pgm_info = {
4083 		.code = PGM_ADDRESSING,
4084 	};
4085 	u8 opcode, ilen;
4086 	int rc;
4087 
4088 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4089 	trace_kvm_s390_sie_fault(vcpu);
4090 
4091 	/*
4092 	 * We want to inject an addressing exception, which is defined as a
4093 	 * suppressing or terminating exception. However, since we came here
4094 	 * by a DAT access exception, the PSW still points to the faulting
4095 	 * instruction since DAT exceptions are nullifying. So we've got
4096 	 * to look up the current opcode to get the length of the instruction
4097 	 * to be able to forward the PSW.
4098 	 */
4099 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4100 	ilen = insn_length(opcode);
4101 	if (rc < 0) {
4102 		return rc;
4103 	} else if (rc) {
4104 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4105 		 * Forward by arbitrary ilc, injection will take care of
4106 		 * nullification if necessary.
4107 		 */
4108 		pgm_info = vcpu->arch.pgm;
4109 		ilen = 4;
4110 	}
4111 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4112 	kvm_s390_forward_psw(vcpu, ilen);
4113 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4114 }
4115 
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4116 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4117 {
4118 	struct mcck_volatile_info *mcck_info;
4119 	struct sie_page *sie_page;
4120 
4121 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4122 		   vcpu->arch.sie_block->icptcode);
4123 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4124 
4125 	if (guestdbg_enabled(vcpu))
4126 		kvm_s390_restore_guest_per_regs(vcpu);
4127 
4128 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4129 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4130 
4131 	if (exit_reason == -EINTR) {
4132 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4133 		sie_page = container_of(vcpu->arch.sie_block,
4134 					struct sie_page, sie_block);
4135 		mcck_info = &sie_page->mcck_info;
4136 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4137 		return 0;
4138 	}
4139 
4140 	if (vcpu->arch.sie_block->icptcode > 0) {
4141 		int rc = kvm_handle_sie_intercept(vcpu);
4142 
4143 		if (rc != -EOPNOTSUPP)
4144 			return rc;
4145 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4146 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4147 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4148 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4149 		return -EREMOTE;
4150 	} else if (exit_reason != -EFAULT) {
4151 		vcpu->stat.exit_null++;
4152 		return 0;
4153 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4154 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4155 		vcpu->run->s390_ucontrol.trans_exc_code =
4156 						current->thread.gmap_addr;
4157 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4158 		return -EREMOTE;
4159 	} else if (current->thread.gmap_pfault) {
4160 		trace_kvm_s390_major_guest_pfault(vcpu);
4161 		current->thread.gmap_pfault = 0;
4162 		if (kvm_arch_setup_async_pf(vcpu))
4163 			return 0;
4164 		vcpu->stat.pfault_sync++;
4165 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4166 	}
4167 	return vcpu_post_run_fault_in_sie(vcpu);
4168 }
4169 
4170 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4171 static int __vcpu_run(struct kvm_vcpu *vcpu)
4172 {
4173 	int rc, exit_reason;
4174 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4175 
4176 	/*
4177 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4178 	 * ning the guest), so that memslots (and other stuff) are protected
4179 	 */
4180 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4181 
4182 	do {
4183 		rc = vcpu_pre_run(vcpu);
4184 		if (rc)
4185 			break;
4186 
4187 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4188 		/*
4189 		 * As PF_VCPU will be used in fault handler, between
4190 		 * guest_enter and guest_exit should be no uaccess.
4191 		 */
4192 		local_irq_disable();
4193 		guest_enter_irqoff();
4194 		__disable_cpu_timer_accounting(vcpu);
4195 		local_irq_enable();
4196 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4197 			memcpy(sie_page->pv_grregs,
4198 			       vcpu->run->s.regs.gprs,
4199 			       sizeof(sie_page->pv_grregs));
4200 		}
4201 		if (test_cpu_flag(CIF_FPU))
4202 			load_fpu_regs();
4203 		exit_reason = sie64a(vcpu->arch.sie_block,
4204 				     vcpu->run->s.regs.gprs);
4205 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4206 			memcpy(vcpu->run->s.regs.gprs,
4207 			       sie_page->pv_grregs,
4208 			       sizeof(sie_page->pv_grregs));
4209 			/*
4210 			 * We're not allowed to inject interrupts on intercepts
4211 			 * that leave the guest state in an "in-between" state
4212 			 * where the next SIE entry will do a continuation.
4213 			 * Fence interrupts in our "internal" PSW.
4214 			 */
4215 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4216 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4217 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4218 			}
4219 		}
4220 		local_irq_disable();
4221 		__enable_cpu_timer_accounting(vcpu);
4222 		guest_exit_irqoff();
4223 		local_irq_enable();
4224 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4225 
4226 		rc = vcpu_post_run(vcpu, exit_reason);
4227 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4228 
4229 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4230 	return rc;
4231 }
4232 
sync_regs_fmt2(struct kvm_vcpu * vcpu)4233 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4234 {
4235 	struct kvm_run *kvm_run = vcpu->run;
4236 	struct runtime_instr_cb *riccb;
4237 	struct gs_cb *gscb;
4238 
4239 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4240 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4241 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4242 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4243 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4244 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4245 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4246 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4247 	}
4248 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4249 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4250 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4251 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4252 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4253 			kvm_clear_async_pf_completion_queue(vcpu);
4254 	}
4255 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4256 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4257 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4258 	}
4259 	/*
4260 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4261 	 * we should enable RI here instead of doing the lazy enablement.
4262 	 */
4263 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4264 	    test_kvm_facility(vcpu->kvm, 64) &&
4265 	    riccb->v &&
4266 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4267 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4268 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4269 	}
4270 	/*
4271 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4272 	 * we should enable GS here instead of doing the lazy enablement.
4273 	 */
4274 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4275 	    test_kvm_facility(vcpu->kvm, 133) &&
4276 	    gscb->gssm &&
4277 	    !vcpu->arch.gs_enabled) {
4278 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4279 		vcpu->arch.sie_block->ecb |= ECB_GS;
4280 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4281 		vcpu->arch.gs_enabled = 1;
4282 	}
4283 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4284 	    test_kvm_facility(vcpu->kvm, 82)) {
4285 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4286 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4287 	}
4288 	if (MACHINE_HAS_GS) {
4289 		preempt_disable();
4290 		__ctl_set_bit(2, 4);
4291 		if (current->thread.gs_cb) {
4292 			vcpu->arch.host_gscb = current->thread.gs_cb;
4293 			save_gs_cb(vcpu->arch.host_gscb);
4294 		}
4295 		if (vcpu->arch.gs_enabled) {
4296 			current->thread.gs_cb = (struct gs_cb *)
4297 						&vcpu->run->s.regs.gscb;
4298 			restore_gs_cb(current->thread.gs_cb);
4299 		}
4300 		preempt_enable();
4301 	}
4302 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4303 }
4304 
sync_regs(struct kvm_vcpu * vcpu)4305 static void sync_regs(struct kvm_vcpu *vcpu)
4306 {
4307 	struct kvm_run *kvm_run = vcpu->run;
4308 
4309 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4310 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4311 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4312 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4313 		/* some control register changes require a tlb flush */
4314 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4315 	}
4316 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4317 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4318 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4319 	}
4320 	save_access_regs(vcpu->arch.host_acrs);
4321 	restore_access_regs(vcpu->run->s.regs.acrs);
4322 	/* save host (userspace) fprs/vrs */
4323 	save_fpu_regs();
4324 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4325 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4326 	if (MACHINE_HAS_VX)
4327 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4328 	else
4329 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4330 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4331 	if (test_fp_ctl(current->thread.fpu.fpc))
4332 		/* User space provided an invalid FPC, let's clear it */
4333 		current->thread.fpu.fpc = 0;
4334 
4335 	/* Sync fmt2 only data */
4336 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4337 		sync_regs_fmt2(vcpu);
4338 	} else {
4339 		/*
4340 		 * In several places we have to modify our internal view to
4341 		 * not do things that are disallowed by the ultravisor. For
4342 		 * example we must not inject interrupts after specific exits
4343 		 * (e.g. 112 prefix page not secure). We do this by turning
4344 		 * off the machine check, external and I/O interrupt bits
4345 		 * of our PSW copy. To avoid getting validity intercepts, we
4346 		 * do only accept the condition code from userspace.
4347 		 */
4348 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4349 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4350 						   PSW_MASK_CC;
4351 	}
4352 
4353 	kvm_run->kvm_dirty_regs = 0;
4354 }
4355 
store_regs_fmt2(struct kvm_vcpu * vcpu)4356 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4357 {
4358 	struct kvm_run *kvm_run = vcpu->run;
4359 
4360 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4361 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4362 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4363 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4364 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4365 	if (MACHINE_HAS_GS) {
4366 		preempt_disable();
4367 		__ctl_set_bit(2, 4);
4368 		if (vcpu->arch.gs_enabled)
4369 			save_gs_cb(current->thread.gs_cb);
4370 		current->thread.gs_cb = vcpu->arch.host_gscb;
4371 		restore_gs_cb(vcpu->arch.host_gscb);
4372 		if (!vcpu->arch.host_gscb)
4373 			__ctl_clear_bit(2, 4);
4374 		vcpu->arch.host_gscb = NULL;
4375 		preempt_enable();
4376 	}
4377 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4378 }
4379 
store_regs(struct kvm_vcpu * vcpu)4380 static void store_regs(struct kvm_vcpu *vcpu)
4381 {
4382 	struct kvm_run *kvm_run = vcpu->run;
4383 
4384 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4385 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4386 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4387 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4388 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4389 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4390 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4391 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4392 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4393 	save_access_regs(vcpu->run->s.regs.acrs);
4394 	restore_access_regs(vcpu->arch.host_acrs);
4395 	/* Save guest register state */
4396 	save_fpu_regs();
4397 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4398 	/* Restore will be done lazily at return */
4399 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4400 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4401 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4402 		store_regs_fmt2(vcpu);
4403 }
4404 
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4405 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4406 {
4407 	struct kvm_run *kvm_run = vcpu->run;
4408 	int rc;
4409 
4410 	if (kvm_run->immediate_exit)
4411 		return -EINTR;
4412 
4413 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4414 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4415 		return -EINVAL;
4416 
4417 	vcpu_load(vcpu);
4418 
4419 	if (guestdbg_exit_pending(vcpu)) {
4420 		kvm_s390_prepare_debug_exit(vcpu);
4421 		rc = 0;
4422 		goto out;
4423 	}
4424 
4425 	kvm_sigset_activate(vcpu);
4426 
4427 	/*
4428 	 * no need to check the return value of vcpu_start as it can only have
4429 	 * an error for protvirt, but protvirt means user cpu state
4430 	 */
4431 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4432 		kvm_s390_vcpu_start(vcpu);
4433 	} else if (is_vcpu_stopped(vcpu)) {
4434 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4435 				   vcpu->vcpu_id);
4436 		rc = -EINVAL;
4437 		goto out;
4438 	}
4439 
4440 	sync_regs(vcpu);
4441 	enable_cpu_timer_accounting(vcpu);
4442 
4443 	might_fault();
4444 	rc = __vcpu_run(vcpu);
4445 
4446 	if (signal_pending(current) && !rc) {
4447 		kvm_run->exit_reason = KVM_EXIT_INTR;
4448 		rc = -EINTR;
4449 	}
4450 
4451 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4452 		kvm_s390_prepare_debug_exit(vcpu);
4453 		rc = 0;
4454 	}
4455 
4456 	if (rc == -EREMOTE) {
4457 		/* userspace support is needed, kvm_run has been prepared */
4458 		rc = 0;
4459 	}
4460 
4461 	disable_cpu_timer_accounting(vcpu);
4462 	store_regs(vcpu);
4463 
4464 	kvm_sigset_deactivate(vcpu);
4465 
4466 	vcpu->stat.exit_userspace++;
4467 out:
4468 	vcpu_put(vcpu);
4469 	return rc;
4470 }
4471 
4472 /*
4473  * store status at address
4474  * we use have two special cases:
4475  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4476  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4477  */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4478 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4479 {
4480 	unsigned char archmode = 1;
4481 	freg_t fprs[NUM_FPRS];
4482 	unsigned int px;
4483 	u64 clkcomp, cputm;
4484 	int rc;
4485 
4486 	px = kvm_s390_get_prefix(vcpu);
4487 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4488 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4489 			return -EFAULT;
4490 		gpa = 0;
4491 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4492 		if (write_guest_real(vcpu, 163, &archmode, 1))
4493 			return -EFAULT;
4494 		gpa = px;
4495 	} else
4496 		gpa -= __LC_FPREGS_SAVE_AREA;
4497 
4498 	/* manually convert vector registers if necessary */
4499 	if (MACHINE_HAS_VX) {
4500 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4501 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4502 				     fprs, 128);
4503 	} else {
4504 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4505 				     vcpu->run->s.regs.fprs, 128);
4506 	}
4507 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4508 			      vcpu->run->s.regs.gprs, 128);
4509 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4510 			      &vcpu->arch.sie_block->gpsw, 16);
4511 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4512 			      &px, 4);
4513 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4514 			      &vcpu->run->s.regs.fpc, 4);
4515 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4516 			      &vcpu->arch.sie_block->todpr, 4);
4517 	cputm = kvm_s390_get_cpu_timer(vcpu);
4518 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4519 			      &cputm, 8);
4520 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4521 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4522 			      &clkcomp, 8);
4523 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4524 			      &vcpu->run->s.regs.acrs, 64);
4525 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4526 			      &vcpu->arch.sie_block->gcr, 128);
4527 	return rc ? -EFAULT : 0;
4528 }
4529 
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4530 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4531 {
4532 	/*
4533 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4534 	 * switch in the run ioctl. Let's update our copies before we save
4535 	 * it into the save area
4536 	 */
4537 	save_fpu_regs();
4538 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4539 	save_access_regs(vcpu->run->s.regs.acrs);
4540 
4541 	return kvm_s390_store_status_unloaded(vcpu, addr);
4542 }
4543 
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4544 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4545 {
4546 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4547 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4548 }
4549 
__disable_ibs_on_all_vcpus(struct kvm * kvm)4550 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4551 {
4552 	unsigned int i;
4553 	struct kvm_vcpu *vcpu;
4554 
4555 	kvm_for_each_vcpu(i, vcpu, kvm) {
4556 		__disable_ibs_on_vcpu(vcpu);
4557 	}
4558 }
4559 
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4560 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4561 {
4562 	if (!sclp.has_ibs)
4563 		return;
4564 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4565 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4566 }
4567 
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4568 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4569 {
4570 	int i, online_vcpus, r = 0, started_vcpus = 0;
4571 
4572 	if (!is_vcpu_stopped(vcpu))
4573 		return 0;
4574 
4575 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4576 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4577 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4578 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4579 
4580 	/* Let's tell the UV that we want to change into the operating state */
4581 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4582 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4583 		if (r) {
4584 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4585 			return r;
4586 		}
4587 	}
4588 
4589 	for (i = 0; i < online_vcpus; i++) {
4590 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4591 			started_vcpus++;
4592 	}
4593 
4594 	if (started_vcpus == 0) {
4595 		/* we're the only active VCPU -> speed it up */
4596 		__enable_ibs_on_vcpu(vcpu);
4597 	} else if (started_vcpus == 1) {
4598 		/*
4599 		 * As we are starting a second VCPU, we have to disable
4600 		 * the IBS facility on all VCPUs to remove potentially
4601 		 * outstanding ENABLE requests.
4602 		 */
4603 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4604 	}
4605 
4606 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4607 	/*
4608 	 * The real PSW might have changed due to a RESTART interpreted by the
4609 	 * ultravisor. We block all interrupts and let the next sie exit
4610 	 * refresh our view.
4611 	 */
4612 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4613 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4614 	/*
4615 	 * Another VCPU might have used IBS while we were offline.
4616 	 * Let's play safe and flush the VCPU at startup.
4617 	 */
4618 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4619 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4620 	return 0;
4621 }
4622 
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4623 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4624 {
4625 	int i, online_vcpus, r = 0, started_vcpus = 0;
4626 	struct kvm_vcpu *started_vcpu = NULL;
4627 
4628 	if (is_vcpu_stopped(vcpu))
4629 		return 0;
4630 
4631 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4632 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4633 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4634 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4635 
4636 	/* Let's tell the UV that we want to change into the stopped state */
4637 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4638 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4639 		if (r) {
4640 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4641 			return r;
4642 		}
4643 	}
4644 
4645 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4646 	kvm_s390_clear_stop_irq(vcpu);
4647 
4648 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4649 	__disable_ibs_on_vcpu(vcpu);
4650 
4651 	for (i = 0; i < online_vcpus; i++) {
4652 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4653 			started_vcpus++;
4654 			started_vcpu = vcpu->kvm->vcpus[i];
4655 		}
4656 	}
4657 
4658 	if (started_vcpus == 1) {
4659 		/*
4660 		 * As we only have one VCPU left, we want to enable the
4661 		 * IBS facility for that VCPU to speed it up.
4662 		 */
4663 		__enable_ibs_on_vcpu(started_vcpu);
4664 	}
4665 
4666 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4667 	return 0;
4668 }
4669 
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4670 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4671 				     struct kvm_enable_cap *cap)
4672 {
4673 	int r;
4674 
4675 	if (cap->flags)
4676 		return -EINVAL;
4677 
4678 	switch (cap->cap) {
4679 	case KVM_CAP_S390_CSS_SUPPORT:
4680 		if (!vcpu->kvm->arch.css_support) {
4681 			vcpu->kvm->arch.css_support = 1;
4682 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4683 			trace_kvm_s390_enable_css(vcpu->kvm);
4684 		}
4685 		r = 0;
4686 		break;
4687 	default:
4688 		r = -EINVAL;
4689 		break;
4690 	}
4691 	return r;
4692 }
4693 
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4694 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4695 				   struct kvm_s390_mem_op *mop)
4696 {
4697 	void __user *uaddr = (void __user *)mop->buf;
4698 	int r = 0;
4699 
4700 	if (mop->flags || !mop->size)
4701 		return -EINVAL;
4702 	if (mop->size + mop->sida_offset < mop->size)
4703 		return -EINVAL;
4704 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4705 		return -E2BIG;
4706 
4707 	switch (mop->op) {
4708 	case KVM_S390_MEMOP_SIDA_READ:
4709 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4710 				 mop->sida_offset), mop->size))
4711 			r = -EFAULT;
4712 
4713 		break;
4714 	case KVM_S390_MEMOP_SIDA_WRITE:
4715 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4716 				   mop->sida_offset), uaddr, mop->size))
4717 			r = -EFAULT;
4718 		break;
4719 	}
4720 	return r;
4721 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4722 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4723 				  struct kvm_s390_mem_op *mop)
4724 {
4725 	void __user *uaddr = (void __user *)mop->buf;
4726 	void *tmpbuf = NULL;
4727 	int r = 0;
4728 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4729 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4730 
4731 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4732 		return -EINVAL;
4733 
4734 	if (mop->size > MEM_OP_MAX_SIZE)
4735 		return -E2BIG;
4736 
4737 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4738 		return -EINVAL;
4739 
4740 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4741 		tmpbuf = vmalloc(mop->size);
4742 		if (!tmpbuf)
4743 			return -ENOMEM;
4744 	}
4745 
4746 	switch (mop->op) {
4747 	case KVM_S390_MEMOP_LOGICAL_READ:
4748 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4749 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4750 					    mop->size, GACC_FETCH);
4751 			break;
4752 		}
4753 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4754 		if (r == 0) {
4755 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4756 				r = -EFAULT;
4757 		}
4758 		break;
4759 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4760 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4761 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4762 					    mop->size, GACC_STORE);
4763 			break;
4764 		}
4765 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4766 			r = -EFAULT;
4767 			break;
4768 		}
4769 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4770 		break;
4771 	}
4772 
4773 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4774 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4775 
4776 	vfree(tmpbuf);
4777 	return r;
4778 }
4779 
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4780 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4781 				      struct kvm_s390_mem_op *mop)
4782 {
4783 	int r, srcu_idx;
4784 
4785 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4786 
4787 	switch (mop->op) {
4788 	case KVM_S390_MEMOP_LOGICAL_READ:
4789 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4790 		r = kvm_s390_guest_mem_op(vcpu, mop);
4791 		break;
4792 	case KVM_S390_MEMOP_SIDA_READ:
4793 	case KVM_S390_MEMOP_SIDA_WRITE:
4794 		/* we are locked against sida going away by the vcpu->mutex */
4795 		r = kvm_s390_guest_sida_op(vcpu, mop);
4796 		break;
4797 	default:
4798 		r = -EINVAL;
4799 	}
4800 
4801 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4802 	return r;
4803 }
4804 
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4805 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4806 			       unsigned int ioctl, unsigned long arg)
4807 {
4808 	struct kvm_vcpu *vcpu = filp->private_data;
4809 	void __user *argp = (void __user *)arg;
4810 
4811 	switch (ioctl) {
4812 	case KVM_S390_IRQ: {
4813 		struct kvm_s390_irq s390irq;
4814 
4815 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4816 			return -EFAULT;
4817 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4818 	}
4819 	case KVM_S390_INTERRUPT: {
4820 		struct kvm_s390_interrupt s390int;
4821 		struct kvm_s390_irq s390irq = {};
4822 
4823 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4824 			return -EFAULT;
4825 		if (s390int_to_s390irq(&s390int, &s390irq))
4826 			return -EINVAL;
4827 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4828 	}
4829 	}
4830 	return -ENOIOCTLCMD;
4831 }
4832 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4833 long kvm_arch_vcpu_ioctl(struct file *filp,
4834 			 unsigned int ioctl, unsigned long arg)
4835 {
4836 	struct kvm_vcpu *vcpu = filp->private_data;
4837 	void __user *argp = (void __user *)arg;
4838 	int idx;
4839 	long r;
4840 	u16 rc, rrc;
4841 
4842 	vcpu_load(vcpu);
4843 
4844 	switch (ioctl) {
4845 	case KVM_S390_STORE_STATUS:
4846 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4847 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4848 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4849 		break;
4850 	case KVM_S390_SET_INITIAL_PSW: {
4851 		psw_t psw;
4852 
4853 		r = -EFAULT;
4854 		if (copy_from_user(&psw, argp, sizeof(psw)))
4855 			break;
4856 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4857 		break;
4858 	}
4859 	case KVM_S390_CLEAR_RESET:
4860 		r = 0;
4861 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4862 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4863 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4864 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4865 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4866 				   rc, rrc);
4867 		}
4868 		break;
4869 	case KVM_S390_INITIAL_RESET:
4870 		r = 0;
4871 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4872 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4873 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4874 					  UVC_CMD_CPU_RESET_INITIAL,
4875 					  &rc, &rrc);
4876 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4877 				   rc, rrc);
4878 		}
4879 		break;
4880 	case KVM_S390_NORMAL_RESET:
4881 		r = 0;
4882 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4883 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4884 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4885 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4886 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4887 				   rc, rrc);
4888 		}
4889 		break;
4890 	case KVM_SET_ONE_REG:
4891 	case KVM_GET_ONE_REG: {
4892 		struct kvm_one_reg reg;
4893 		r = -EINVAL;
4894 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4895 			break;
4896 		r = -EFAULT;
4897 		if (copy_from_user(&reg, argp, sizeof(reg)))
4898 			break;
4899 		if (ioctl == KVM_SET_ONE_REG)
4900 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4901 		else
4902 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4903 		break;
4904 	}
4905 #ifdef CONFIG_KVM_S390_UCONTROL
4906 	case KVM_S390_UCAS_MAP: {
4907 		struct kvm_s390_ucas_mapping ucasmap;
4908 
4909 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4910 			r = -EFAULT;
4911 			break;
4912 		}
4913 
4914 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4915 			r = -EINVAL;
4916 			break;
4917 		}
4918 
4919 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4920 				     ucasmap.vcpu_addr, ucasmap.length);
4921 		break;
4922 	}
4923 	case KVM_S390_UCAS_UNMAP: {
4924 		struct kvm_s390_ucas_mapping ucasmap;
4925 
4926 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4927 			r = -EFAULT;
4928 			break;
4929 		}
4930 
4931 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4932 			r = -EINVAL;
4933 			break;
4934 		}
4935 
4936 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4937 			ucasmap.length);
4938 		break;
4939 	}
4940 #endif
4941 	case KVM_S390_VCPU_FAULT: {
4942 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4943 		break;
4944 	}
4945 	case KVM_ENABLE_CAP:
4946 	{
4947 		struct kvm_enable_cap cap;
4948 		r = -EFAULT;
4949 		if (copy_from_user(&cap, argp, sizeof(cap)))
4950 			break;
4951 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4952 		break;
4953 	}
4954 	case KVM_S390_MEM_OP: {
4955 		struct kvm_s390_mem_op mem_op;
4956 
4957 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4958 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4959 		else
4960 			r = -EFAULT;
4961 		break;
4962 	}
4963 	case KVM_S390_SET_IRQ_STATE: {
4964 		struct kvm_s390_irq_state irq_state;
4965 
4966 		r = -EFAULT;
4967 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4968 			break;
4969 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4970 		    irq_state.len == 0 ||
4971 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4972 			r = -EINVAL;
4973 			break;
4974 		}
4975 		/* do not use irq_state.flags, it will break old QEMUs */
4976 		r = kvm_s390_set_irq_state(vcpu,
4977 					   (void __user *) irq_state.buf,
4978 					   irq_state.len);
4979 		break;
4980 	}
4981 	case KVM_S390_GET_IRQ_STATE: {
4982 		struct kvm_s390_irq_state irq_state;
4983 
4984 		r = -EFAULT;
4985 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4986 			break;
4987 		if (irq_state.len == 0) {
4988 			r = -EINVAL;
4989 			break;
4990 		}
4991 		/* do not use irq_state.flags, it will break old QEMUs */
4992 		r = kvm_s390_get_irq_state(vcpu,
4993 					   (__u8 __user *)  irq_state.buf,
4994 					   irq_state.len);
4995 		break;
4996 	}
4997 	default:
4998 		r = -ENOTTY;
4999 	}
5000 
5001 	vcpu_put(vcpu);
5002 	return r;
5003 }
5004 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)5005 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5006 {
5007 #ifdef CONFIG_KVM_S390_UCONTROL
5008 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5009 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5010 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5011 		get_page(vmf->page);
5012 		return 0;
5013 	}
5014 #endif
5015 	return VM_FAULT_SIGBUS;
5016 }
5017 
5018 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)5019 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5020 				   struct kvm_memory_slot *memslot,
5021 				   const struct kvm_userspace_memory_region *mem,
5022 				   enum kvm_mr_change change)
5023 {
5024 	/* A few sanity checks. We can have memory slots which have to be
5025 	   located/ended at a segment boundary (1MB). The memory in userland is
5026 	   ok to be fragmented into various different vmas. It is okay to mmap()
5027 	   and munmap() stuff in this slot after doing this call at any time */
5028 
5029 	if (mem->userspace_addr & 0xffffful)
5030 		return -EINVAL;
5031 
5032 	if (mem->memory_size & 0xffffful)
5033 		return -EINVAL;
5034 
5035 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5036 		return -EINVAL;
5037 
5038 	/* When we are protected, we should not change the memory slots */
5039 	if (kvm_s390_pv_get_handle(kvm))
5040 		return -EINVAL;
5041 	return 0;
5042 }
5043 
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5044 void kvm_arch_commit_memory_region(struct kvm *kvm,
5045 				const struct kvm_userspace_memory_region *mem,
5046 				struct kvm_memory_slot *old,
5047 				const struct kvm_memory_slot *new,
5048 				enum kvm_mr_change change)
5049 {
5050 	int rc = 0;
5051 
5052 	switch (change) {
5053 	case KVM_MR_DELETE:
5054 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5055 					old->npages * PAGE_SIZE);
5056 		break;
5057 	case KVM_MR_MOVE:
5058 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5059 					old->npages * PAGE_SIZE);
5060 		if (rc)
5061 			break;
5062 		fallthrough;
5063 	case KVM_MR_CREATE:
5064 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5065 				      mem->guest_phys_addr, mem->memory_size);
5066 		break;
5067 	case KVM_MR_FLAGS_ONLY:
5068 		break;
5069 	default:
5070 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5071 	}
5072 	if (rc)
5073 		pr_warn("failed to commit memory region\n");
5074 	return;
5075 }
5076 
nonhyp_mask(int i)5077 static inline unsigned long nonhyp_mask(int i)
5078 {
5079 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5080 
5081 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5082 }
5083 
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5084 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5085 {
5086 	vcpu->valid_wakeup = false;
5087 }
5088 
kvm_s390_init(void)5089 static int __init kvm_s390_init(void)
5090 {
5091 	int i;
5092 
5093 	if (!sclp.has_sief2) {
5094 		pr_info("SIE is not available\n");
5095 		return -ENODEV;
5096 	}
5097 
5098 	if (nested && hpage) {
5099 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5100 		return -EINVAL;
5101 	}
5102 
5103 	for (i = 0; i < 16; i++)
5104 		kvm_s390_fac_base[i] |=
5105 			stfle_fac_list[i] & nonhyp_mask(i);
5106 
5107 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5108 }
5109 
kvm_s390_exit(void)5110 static void __exit kvm_s390_exit(void)
5111 {
5112 	kvm_exit();
5113 }
5114 
5115 module_init(kvm_s390_init);
5116 module_exit(kvm_s390_exit);
5117 
5118 /*
5119  * Enable autoloading of the kvm module.
5120  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5121  * since x86 takes a different approach.
5122  */
5123 #include <linux/miscdevice.h>
5124 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5125 MODULE_ALIAS("devname:kvm");
5126