1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * tools/testing/selftests/kvm/lib/x86_64/processor.c
4  *
5  * Copyright (C) 2018, Google LLC.
6  */
7 
8 #define _GNU_SOURCE /* for program_invocation_name */
9 
10 #include "test_util.h"
11 #include "kvm_util.h"
12 #include "../kvm_util_internal.h"
13 #include "processor.h"
14 
15 #ifndef NUM_INTERRUPTS
16 #define NUM_INTERRUPTS 256
17 #endif
18 
19 #define DEFAULT_CODE_SELECTOR 0x8
20 #define DEFAULT_DATA_SELECTOR 0x10
21 
22 /* Minimum physical address used for virtual translation tables. */
23 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
24 
25 vm_vaddr_t exception_handlers;
26 
27 /* Virtual translation table structure declarations */
28 struct pageMapL4Entry {
29 	uint64_t present:1;
30 	uint64_t writable:1;
31 	uint64_t user:1;
32 	uint64_t write_through:1;
33 	uint64_t cache_disable:1;
34 	uint64_t accessed:1;
35 	uint64_t ignored_06:1;
36 	uint64_t page_size:1;
37 	uint64_t ignored_11_08:4;
38 	uint64_t address:40;
39 	uint64_t ignored_62_52:11;
40 	uint64_t execute_disable:1;
41 };
42 
43 struct pageDirectoryPointerEntry {
44 	uint64_t present:1;
45 	uint64_t writable:1;
46 	uint64_t user:1;
47 	uint64_t write_through:1;
48 	uint64_t cache_disable:1;
49 	uint64_t accessed:1;
50 	uint64_t ignored_06:1;
51 	uint64_t page_size:1;
52 	uint64_t ignored_11_08:4;
53 	uint64_t address:40;
54 	uint64_t ignored_62_52:11;
55 	uint64_t execute_disable:1;
56 };
57 
58 struct pageDirectoryEntry {
59 	uint64_t present:1;
60 	uint64_t writable:1;
61 	uint64_t user:1;
62 	uint64_t write_through:1;
63 	uint64_t cache_disable:1;
64 	uint64_t accessed:1;
65 	uint64_t ignored_06:1;
66 	uint64_t page_size:1;
67 	uint64_t ignored_11_08:4;
68 	uint64_t address:40;
69 	uint64_t ignored_62_52:11;
70 	uint64_t execute_disable:1;
71 };
72 
73 struct pageTableEntry {
74 	uint64_t present:1;
75 	uint64_t writable:1;
76 	uint64_t user:1;
77 	uint64_t write_through:1;
78 	uint64_t cache_disable:1;
79 	uint64_t accessed:1;
80 	uint64_t dirty:1;
81 	uint64_t reserved_07:1;
82 	uint64_t global:1;
83 	uint64_t ignored_11_09:3;
84 	uint64_t address:40;
85 	uint64_t ignored_62_52:11;
86 	uint64_t execute_disable:1;
87 };
88 
regs_dump(FILE * stream,struct kvm_regs * regs,uint8_t indent)89 void regs_dump(FILE *stream, struct kvm_regs *regs,
90 	       uint8_t indent)
91 {
92 	fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
93 		"rcx: 0x%.16llx rdx: 0x%.16llx\n",
94 		indent, "",
95 		regs->rax, regs->rbx, regs->rcx, regs->rdx);
96 	fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
97 		"rsp: 0x%.16llx rbp: 0x%.16llx\n",
98 		indent, "",
99 		regs->rsi, regs->rdi, regs->rsp, regs->rbp);
100 	fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
101 		"r10: 0x%.16llx r11: 0x%.16llx\n",
102 		indent, "",
103 		regs->r8, regs->r9, regs->r10, regs->r11);
104 	fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
105 		"r14: 0x%.16llx r15: 0x%.16llx\n",
106 		indent, "",
107 		regs->r12, regs->r13, regs->r14, regs->r15);
108 	fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
109 		indent, "",
110 		regs->rip, regs->rflags);
111 }
112 
113 /*
114  * Segment Dump
115  *
116  * Input Args:
117  *   stream  - Output FILE stream
118  *   segment - KVM segment
119  *   indent  - Left margin indent amount
120  *
121  * Output Args: None
122  *
123  * Return: None
124  *
125  * Dumps the state of the KVM segment given by @segment, to the FILE stream
126  * given by @stream.
127  */
segment_dump(FILE * stream,struct kvm_segment * segment,uint8_t indent)128 static void segment_dump(FILE *stream, struct kvm_segment *segment,
129 			 uint8_t indent)
130 {
131 	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
132 		"selector: 0x%.4x type: 0x%.2x\n",
133 		indent, "", segment->base, segment->limit,
134 		segment->selector, segment->type);
135 	fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
136 		"db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
137 		indent, "", segment->present, segment->dpl,
138 		segment->db, segment->s, segment->l);
139 	fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
140 		"unusable: 0x%.2x padding: 0x%.2x\n",
141 		indent, "", segment->g, segment->avl,
142 		segment->unusable, segment->padding);
143 }
144 
145 /*
146  * dtable Dump
147  *
148  * Input Args:
149  *   stream - Output FILE stream
150  *   dtable - KVM dtable
151  *   indent - Left margin indent amount
152  *
153  * Output Args: None
154  *
155  * Return: None
156  *
157  * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
158  * given by @stream.
159  */
dtable_dump(FILE * stream,struct kvm_dtable * dtable,uint8_t indent)160 static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
161 			uint8_t indent)
162 {
163 	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
164 		"padding: 0x%.4x 0x%.4x 0x%.4x\n",
165 		indent, "", dtable->base, dtable->limit,
166 		dtable->padding[0], dtable->padding[1], dtable->padding[2]);
167 }
168 
sregs_dump(FILE * stream,struct kvm_sregs * sregs,uint8_t indent)169 void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
170 		uint8_t indent)
171 {
172 	unsigned int i;
173 
174 	fprintf(stream, "%*scs:\n", indent, "");
175 	segment_dump(stream, &sregs->cs, indent + 2);
176 	fprintf(stream, "%*sds:\n", indent, "");
177 	segment_dump(stream, &sregs->ds, indent + 2);
178 	fprintf(stream, "%*ses:\n", indent, "");
179 	segment_dump(stream, &sregs->es, indent + 2);
180 	fprintf(stream, "%*sfs:\n", indent, "");
181 	segment_dump(stream, &sregs->fs, indent + 2);
182 	fprintf(stream, "%*sgs:\n", indent, "");
183 	segment_dump(stream, &sregs->gs, indent + 2);
184 	fprintf(stream, "%*sss:\n", indent, "");
185 	segment_dump(stream, &sregs->ss, indent + 2);
186 	fprintf(stream, "%*str:\n", indent, "");
187 	segment_dump(stream, &sregs->tr, indent + 2);
188 	fprintf(stream, "%*sldt:\n", indent, "");
189 	segment_dump(stream, &sregs->ldt, indent + 2);
190 
191 	fprintf(stream, "%*sgdt:\n", indent, "");
192 	dtable_dump(stream, &sregs->gdt, indent + 2);
193 	fprintf(stream, "%*sidt:\n", indent, "");
194 	dtable_dump(stream, &sregs->idt, indent + 2);
195 
196 	fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
197 		"cr3: 0x%.16llx cr4: 0x%.16llx\n",
198 		indent, "",
199 		sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
200 	fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
201 		"apic_base: 0x%.16llx\n",
202 		indent, "",
203 		sregs->cr8, sregs->efer, sregs->apic_base);
204 
205 	fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
206 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
207 		fprintf(stream, "%*s%.16llx\n", indent + 2, "",
208 			sregs->interrupt_bitmap[i]);
209 	}
210 }
211 
virt_pgd_alloc(struct kvm_vm * vm,uint32_t pgd_memslot)212 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
213 {
214 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
215 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
216 
217 	/* If needed, create page map l4 table. */
218 	if (!vm->pgd_created) {
219 		vm_paddr_t paddr = vm_phy_page_alloc(vm,
220 			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
221 		vm->pgd = paddr;
222 		vm->pgd_created = true;
223 	}
224 }
225 
virt_pg_map(struct kvm_vm * vm,uint64_t vaddr,uint64_t paddr,uint32_t pgd_memslot)226 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
227 	uint32_t pgd_memslot)
228 {
229 	uint16_t index[4];
230 	struct pageMapL4Entry *pml4e;
231 
232 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
233 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
234 
235 	TEST_ASSERT((vaddr % vm->page_size) == 0,
236 		"Virtual address not on page boundary,\n"
237 		"  vaddr: 0x%lx vm->page_size: 0x%x",
238 		vaddr, vm->page_size);
239 	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
240 		(vaddr >> vm->page_shift)),
241 		"Invalid virtual address, vaddr: 0x%lx",
242 		vaddr);
243 	TEST_ASSERT((paddr % vm->page_size) == 0,
244 		"Physical address not on page boundary,\n"
245 		"  paddr: 0x%lx vm->page_size: 0x%x",
246 		paddr, vm->page_size);
247 	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
248 		"Physical address beyond beyond maximum supported,\n"
249 		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
250 		paddr, vm->max_gfn, vm->page_size);
251 
252 	index[0] = (vaddr >> 12) & 0x1ffu;
253 	index[1] = (vaddr >> 21) & 0x1ffu;
254 	index[2] = (vaddr >> 30) & 0x1ffu;
255 	index[3] = (vaddr >> 39) & 0x1ffu;
256 
257 	/* Allocate page directory pointer table if not present. */
258 	pml4e = addr_gpa2hva(vm, vm->pgd);
259 	if (!pml4e[index[3]].present) {
260 		pml4e[index[3]].address = vm_phy_page_alloc(vm,
261 			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
262 			>> vm->page_shift;
263 		pml4e[index[3]].writable = true;
264 		pml4e[index[3]].present = true;
265 	}
266 
267 	/* Allocate page directory table if not present. */
268 	struct pageDirectoryPointerEntry *pdpe;
269 	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
270 	if (!pdpe[index[2]].present) {
271 		pdpe[index[2]].address = vm_phy_page_alloc(vm,
272 			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
273 			>> vm->page_shift;
274 		pdpe[index[2]].writable = true;
275 		pdpe[index[2]].present = true;
276 	}
277 
278 	/* Allocate page table if not present. */
279 	struct pageDirectoryEntry *pde;
280 	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
281 	if (!pde[index[1]].present) {
282 		pde[index[1]].address = vm_phy_page_alloc(vm,
283 			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
284 			>> vm->page_shift;
285 		pde[index[1]].writable = true;
286 		pde[index[1]].present = true;
287 	}
288 
289 	/* Fill in page table entry. */
290 	struct pageTableEntry *pte;
291 	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
292 	pte[index[0]].address = paddr >> vm->page_shift;
293 	pte[index[0]].writable = true;
294 	pte[index[0]].present = 1;
295 }
296 
virt_dump(FILE * stream,struct kvm_vm * vm,uint8_t indent)297 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
298 {
299 	struct pageMapL4Entry *pml4e, *pml4e_start;
300 	struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
301 	struct pageDirectoryEntry *pde, *pde_start;
302 	struct pageTableEntry *pte, *pte_start;
303 
304 	if (!vm->pgd_created)
305 		return;
306 
307 	fprintf(stream, "%*s                                          "
308 		"                no\n", indent, "");
309 	fprintf(stream, "%*s      index hvaddr         gpaddr         "
310 		"addr         w exec dirty\n",
311 		indent, "");
312 	pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
313 		vm->pgd);
314 	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
315 		pml4e = &pml4e_start[n1];
316 		if (!pml4e->present)
317 			continue;
318 		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
319 			" %u\n",
320 			indent, "",
321 			pml4e - pml4e_start, pml4e,
322 			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
323 			pml4e->writable, pml4e->execute_disable);
324 
325 		pdpe_start = addr_gpa2hva(vm, pml4e->address
326 			* vm->page_size);
327 		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
328 			pdpe = &pdpe_start[n2];
329 			if (!pdpe->present)
330 				continue;
331 			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
332 				"%u  %u\n",
333 				indent, "",
334 				pdpe - pdpe_start, pdpe,
335 				addr_hva2gpa(vm, pdpe),
336 				(uint64_t) pdpe->address, pdpe->writable,
337 				pdpe->execute_disable);
338 
339 			pde_start = addr_gpa2hva(vm,
340 				pdpe->address * vm->page_size);
341 			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
342 				pde = &pde_start[n3];
343 				if (!pde->present)
344 					continue;
345 				fprintf(stream, "%*spde   0x%-3zx %p "
346 					"0x%-12lx 0x%-10lx %u  %u\n",
347 					indent, "", pde - pde_start, pde,
348 					addr_hva2gpa(vm, pde),
349 					(uint64_t) pde->address, pde->writable,
350 					pde->execute_disable);
351 
352 				pte_start = addr_gpa2hva(vm,
353 					pde->address * vm->page_size);
354 				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
355 					pte = &pte_start[n4];
356 					if (!pte->present)
357 						continue;
358 					fprintf(stream, "%*spte   0x%-3zx %p "
359 						"0x%-12lx 0x%-10lx %u  %u "
360 						"    %u    0x%-10lx\n",
361 						indent, "",
362 						pte - pte_start, pte,
363 						addr_hva2gpa(vm, pte),
364 						(uint64_t) pte->address,
365 						pte->writable,
366 						pte->execute_disable,
367 						pte->dirty,
368 						((uint64_t) n1 << 27)
369 							| ((uint64_t) n2 << 18)
370 							| ((uint64_t) n3 << 9)
371 							| ((uint64_t) n4));
372 				}
373 			}
374 		}
375 	}
376 }
377 
378 /*
379  * Set Unusable Segment
380  *
381  * Input Args: None
382  *
383  * Output Args:
384  *   segp - Pointer to segment register
385  *
386  * Return: None
387  *
388  * Sets the segment register pointed to by @segp to an unusable state.
389  */
kvm_seg_set_unusable(struct kvm_segment * segp)390 static void kvm_seg_set_unusable(struct kvm_segment *segp)
391 {
392 	memset(segp, 0, sizeof(*segp));
393 	segp->unusable = true;
394 }
395 
kvm_seg_fill_gdt_64bit(struct kvm_vm * vm,struct kvm_segment * segp)396 static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
397 {
398 	void *gdt = addr_gva2hva(vm, vm->gdt);
399 	struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
400 
401 	desc->limit0 = segp->limit & 0xFFFF;
402 	desc->base0 = segp->base & 0xFFFF;
403 	desc->base1 = segp->base >> 16;
404 	desc->type = segp->type;
405 	desc->s = segp->s;
406 	desc->dpl = segp->dpl;
407 	desc->p = segp->present;
408 	desc->limit1 = segp->limit >> 16;
409 	desc->avl = segp->avl;
410 	desc->l = segp->l;
411 	desc->db = segp->db;
412 	desc->g = segp->g;
413 	desc->base2 = segp->base >> 24;
414 	if (!segp->s)
415 		desc->base3 = segp->base >> 32;
416 }
417 
418 
419 /*
420  * Set Long Mode Flat Kernel Code Segment
421  *
422  * Input Args:
423  *   vm - VM whose GDT is being filled, or NULL to only write segp
424  *   selector - selector value
425  *
426  * Output Args:
427  *   segp - Pointer to KVM segment
428  *
429  * Return: None
430  *
431  * Sets up the KVM segment pointed to by @segp, to be a code segment
432  * with the selector value given by @selector.
433  */
kvm_seg_set_kernel_code_64bit(struct kvm_vm * vm,uint16_t selector,struct kvm_segment * segp)434 static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
435 	struct kvm_segment *segp)
436 {
437 	memset(segp, 0, sizeof(*segp));
438 	segp->selector = selector;
439 	segp->limit = 0xFFFFFFFFu;
440 	segp->s = 0x1; /* kTypeCodeData */
441 	segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
442 					  * | kFlagCodeReadable
443 					  */
444 	segp->g = true;
445 	segp->l = true;
446 	segp->present = 1;
447 	if (vm)
448 		kvm_seg_fill_gdt_64bit(vm, segp);
449 }
450 
451 /*
452  * Set Long Mode Flat Kernel Data Segment
453  *
454  * Input Args:
455  *   vm - VM whose GDT is being filled, or NULL to only write segp
456  *   selector - selector value
457  *
458  * Output Args:
459  *   segp - Pointer to KVM segment
460  *
461  * Return: None
462  *
463  * Sets up the KVM segment pointed to by @segp, to be a data segment
464  * with the selector value given by @selector.
465  */
kvm_seg_set_kernel_data_64bit(struct kvm_vm * vm,uint16_t selector,struct kvm_segment * segp)466 static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
467 	struct kvm_segment *segp)
468 {
469 	memset(segp, 0, sizeof(*segp));
470 	segp->selector = selector;
471 	segp->limit = 0xFFFFFFFFu;
472 	segp->s = 0x1; /* kTypeCodeData */
473 	segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
474 					  * | kFlagDataWritable
475 					  */
476 	segp->g = true;
477 	segp->present = true;
478 	if (vm)
479 		kvm_seg_fill_gdt_64bit(vm, segp);
480 }
481 
addr_gva2gpa(struct kvm_vm * vm,vm_vaddr_t gva)482 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
483 {
484 	uint16_t index[4];
485 	struct pageMapL4Entry *pml4e;
486 	struct pageDirectoryPointerEntry *pdpe;
487 	struct pageDirectoryEntry *pde;
488 	struct pageTableEntry *pte;
489 
490 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
491 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
492 
493 	index[0] = (gva >> 12) & 0x1ffu;
494 	index[1] = (gva >> 21) & 0x1ffu;
495 	index[2] = (gva >> 30) & 0x1ffu;
496 	index[3] = (gva >> 39) & 0x1ffu;
497 
498 	if (!vm->pgd_created)
499 		goto unmapped_gva;
500 	pml4e = addr_gpa2hva(vm, vm->pgd);
501 	if (!pml4e[index[3]].present)
502 		goto unmapped_gva;
503 
504 	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
505 	if (!pdpe[index[2]].present)
506 		goto unmapped_gva;
507 
508 	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
509 	if (!pde[index[1]].present)
510 		goto unmapped_gva;
511 
512 	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
513 	if (!pte[index[0]].present)
514 		goto unmapped_gva;
515 
516 	return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
517 
518 unmapped_gva:
519 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
520 	exit(EXIT_FAILURE);
521 }
522 
kvm_setup_gdt(struct kvm_vm * vm,struct kvm_dtable * dt,int gdt_memslot,int pgd_memslot)523 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
524 			  int pgd_memslot)
525 {
526 	if (!vm->gdt)
527 		vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
528 			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
529 
530 	dt->base = vm->gdt;
531 	dt->limit = getpagesize();
532 }
533 
kvm_setup_tss_64bit(struct kvm_vm * vm,struct kvm_segment * segp,int selector,int gdt_memslot,int pgd_memslot)534 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
535 				int selector, int gdt_memslot,
536 				int pgd_memslot)
537 {
538 	if (!vm->tss)
539 		vm->tss = vm_vaddr_alloc(vm, getpagesize(),
540 			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
541 
542 	memset(segp, 0, sizeof(*segp));
543 	segp->base = vm->tss;
544 	segp->limit = 0x67;
545 	segp->selector = selector;
546 	segp->type = 0xb;
547 	segp->present = 1;
548 	kvm_seg_fill_gdt_64bit(vm, segp);
549 }
550 
vcpu_setup(struct kvm_vm * vm,int vcpuid,int pgd_memslot,int gdt_memslot)551 static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
552 {
553 	struct kvm_sregs sregs;
554 
555 	/* Set mode specific system register values. */
556 	vcpu_sregs_get(vm, vcpuid, &sregs);
557 
558 	sregs.idt.limit = 0;
559 
560 	kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
561 
562 	switch (vm->mode) {
563 	case VM_MODE_PXXV48_4K:
564 		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
565 		sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
566 		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
567 
568 		kvm_seg_set_unusable(&sregs.ldt);
569 		kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
570 		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
571 		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
572 		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
573 		break;
574 
575 	default:
576 		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
577 	}
578 
579 	sregs.cr3 = vm->pgd;
580 	vcpu_sregs_set(vm, vcpuid, &sregs);
581 }
582 
vm_vcpu_add_default(struct kvm_vm * vm,uint32_t vcpuid,void * guest_code)583 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
584 {
585 	struct kvm_mp_state mp_state;
586 	struct kvm_regs regs;
587 	vm_vaddr_t stack_vaddr;
588 	stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
589 				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
590 
591 	/* Create VCPU */
592 	vm_vcpu_add(vm, vcpuid);
593 	vcpu_setup(vm, vcpuid, 0, 0);
594 
595 	/* Setup guest general purpose registers */
596 	vcpu_regs_get(vm, vcpuid, &regs);
597 	regs.rflags = regs.rflags | 0x2;
598 	regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
599 	regs.rip = (unsigned long) guest_code;
600 	vcpu_regs_set(vm, vcpuid, &regs);
601 
602 	/* Setup the MP state */
603 	mp_state.mp_state = 0;
604 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
605 }
606 
607 /*
608  * Allocate an instance of struct kvm_cpuid2
609  *
610  * Input Args: None
611  *
612  * Output Args: None
613  *
614  * Return: A pointer to the allocated struct. The caller is responsible
615  * for freeing this struct.
616  *
617  * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
618  * array to be decided at allocation time, allocation is slightly
619  * complicated. This function uses a reasonable default length for
620  * the array and performs the appropriate allocation.
621  */
allocate_kvm_cpuid2(void)622 static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
623 {
624 	struct kvm_cpuid2 *cpuid;
625 	int nent = 100;
626 	size_t size;
627 
628 	size = sizeof(*cpuid);
629 	size += nent * sizeof(struct kvm_cpuid_entry2);
630 	cpuid = malloc(size);
631 	if (!cpuid) {
632 		perror("malloc");
633 		abort();
634 	}
635 
636 	cpuid->nent = nent;
637 
638 	return cpuid;
639 }
640 
641 /*
642  * KVM Supported CPUID Get
643  *
644  * Input Args: None
645  *
646  * Output Args:
647  *
648  * Return: The supported KVM CPUID
649  *
650  * Get the guest CPUID supported by KVM.
651  */
kvm_get_supported_cpuid(void)652 struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
653 {
654 	static struct kvm_cpuid2 *cpuid;
655 	int ret;
656 	int kvm_fd;
657 
658 	if (cpuid)
659 		return cpuid;
660 
661 	cpuid = allocate_kvm_cpuid2();
662 	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
663 	if (kvm_fd < 0)
664 		exit(KSFT_SKIP);
665 
666 	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
667 	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
668 		    ret, errno);
669 
670 	close(kvm_fd);
671 	return cpuid;
672 }
673 
674 /*
675  * Locate a cpuid entry.
676  *
677  * Input Args:
678  *   function: The function of the cpuid entry to find.
679  *   index: The index of the cpuid entry.
680  *
681  * Output Args: None
682  *
683  * Return: A pointer to the cpuid entry. Never returns NULL.
684  */
685 struct kvm_cpuid_entry2 *
kvm_get_supported_cpuid_index(uint32_t function,uint32_t index)686 kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
687 {
688 	struct kvm_cpuid2 *cpuid;
689 	struct kvm_cpuid_entry2 *entry = NULL;
690 	int i;
691 
692 	cpuid = kvm_get_supported_cpuid();
693 	for (i = 0; i < cpuid->nent; i++) {
694 		if (cpuid->entries[i].function == function &&
695 		    cpuid->entries[i].index == index) {
696 			entry = &cpuid->entries[i];
697 			break;
698 		}
699 	}
700 
701 	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
702 		    function, index);
703 	return entry;
704 }
705 
706 /*
707  * VM VCPU CPUID Set
708  *
709  * Input Args:
710  *   vm - Virtual Machine
711  *   vcpuid - VCPU id
712  *   cpuid - The CPUID values to set.
713  *
714  * Output Args: None
715  *
716  * Return: void
717  *
718  * Set the VCPU's CPUID.
719  */
vcpu_set_cpuid(struct kvm_vm * vm,uint32_t vcpuid,struct kvm_cpuid2 * cpuid)720 void vcpu_set_cpuid(struct kvm_vm *vm,
721 		uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
722 {
723 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
724 	int rc;
725 
726 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
727 
728 	rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
729 	TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
730 		    rc, errno);
731 
732 }
733 
vm_create_default(uint32_t vcpuid,uint64_t extra_mem_pages,void * guest_code)734 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
735 				 void *guest_code)
736 {
737 	struct kvm_vm *vm;
738 	/*
739 	 * For x86 the maximum page table size for a memory region
740 	 * will be when only 4K pages are used.  In that case the
741 	 * total extra size for page tables (for extra N pages) will
742 	 * be: N/512+N/512^2+N/512^3+... which is definitely smaller
743 	 * than N/512*2.
744 	 */
745 	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
746 
747 	/* Create VM */
748 	vm = vm_create(VM_MODE_DEFAULT,
749 		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
750 		       O_RDWR);
751 
752 	/* Setup guest code */
753 	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
754 
755 	/* Setup IRQ Chip */
756 	vm_create_irqchip(vm);
757 
758 	/* Add the first vCPU. */
759 	vm_vcpu_add_default(vm, vcpuid, guest_code);
760 
761 	return vm;
762 }
763 
764 /*
765  * VCPU Get MSR
766  *
767  * Input Args:
768  *   vm - Virtual Machine
769  *   vcpuid - VCPU ID
770  *   msr_index - Index of MSR
771  *
772  * Output Args: None
773  *
774  * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
775  *
776  * Get value of MSR for VCPU.
777  */
vcpu_get_msr(struct kvm_vm * vm,uint32_t vcpuid,uint64_t msr_index)778 uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
779 {
780 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
781 	struct {
782 		struct kvm_msrs header;
783 		struct kvm_msr_entry entry;
784 	} buffer = {};
785 	int r;
786 
787 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
788 	buffer.header.nmsrs = 1;
789 	buffer.entry.index = msr_index;
790 	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
791 	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
792 		"  rc: %i errno: %i", r, errno);
793 
794 	return buffer.entry.data;
795 }
796 
797 /*
798  * _VCPU Set MSR
799  *
800  * Input Args:
801  *   vm - Virtual Machine
802  *   vcpuid - VCPU ID
803  *   msr_index - Index of MSR
804  *   msr_value - New value of MSR
805  *
806  * Output Args: None
807  *
808  * Return: The result of KVM_SET_MSRS.
809  *
810  * Sets the value of an MSR for the given VCPU.
811  */
_vcpu_set_msr(struct kvm_vm * vm,uint32_t vcpuid,uint64_t msr_index,uint64_t msr_value)812 int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
813 		  uint64_t msr_value)
814 {
815 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
816 	struct {
817 		struct kvm_msrs header;
818 		struct kvm_msr_entry entry;
819 	} buffer = {};
820 	int r;
821 
822 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
823 	memset(&buffer, 0, sizeof(buffer));
824 	buffer.header.nmsrs = 1;
825 	buffer.entry.index = msr_index;
826 	buffer.entry.data = msr_value;
827 	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
828 	return r;
829 }
830 
831 /*
832  * VCPU Set MSR
833  *
834  * Input Args:
835  *   vm - Virtual Machine
836  *   vcpuid - VCPU ID
837  *   msr_index - Index of MSR
838  *   msr_value - New value of MSR
839  *
840  * Output Args: None
841  *
842  * Return: On success, nothing. On failure a TEST_ASSERT is produced.
843  *
844  * Set value of MSR for VCPU.
845  */
vcpu_set_msr(struct kvm_vm * vm,uint32_t vcpuid,uint64_t msr_index,uint64_t msr_value)846 void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
847 	uint64_t msr_value)
848 {
849 	int r;
850 
851 	r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
852 	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
853 		"  rc: %i errno: %i", r, errno);
854 }
855 
vcpu_args_set(struct kvm_vm * vm,uint32_t vcpuid,unsigned int num,...)856 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
857 {
858 	va_list ap;
859 	struct kvm_regs regs;
860 
861 	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
862 		    "  num: %u\n",
863 		    num);
864 
865 	va_start(ap, num);
866 	vcpu_regs_get(vm, vcpuid, &regs);
867 
868 	if (num >= 1)
869 		regs.rdi = va_arg(ap, uint64_t);
870 
871 	if (num >= 2)
872 		regs.rsi = va_arg(ap, uint64_t);
873 
874 	if (num >= 3)
875 		regs.rdx = va_arg(ap, uint64_t);
876 
877 	if (num >= 4)
878 		regs.rcx = va_arg(ap, uint64_t);
879 
880 	if (num >= 5)
881 		regs.r8 = va_arg(ap, uint64_t);
882 
883 	if (num >= 6)
884 		regs.r9 = va_arg(ap, uint64_t);
885 
886 	vcpu_regs_set(vm, vcpuid, &regs);
887 	va_end(ap);
888 }
889 
vcpu_dump(FILE * stream,struct kvm_vm * vm,uint32_t vcpuid,uint8_t indent)890 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
891 {
892 	struct kvm_regs regs;
893 	struct kvm_sregs sregs;
894 
895 	fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
896 
897 	fprintf(stream, "%*sregs:\n", indent + 2, "");
898 	vcpu_regs_get(vm, vcpuid, &regs);
899 	regs_dump(stream, &regs, indent + 4);
900 
901 	fprintf(stream, "%*ssregs:\n", indent + 2, "");
902 	vcpu_sregs_get(vm, vcpuid, &sregs);
903 	sregs_dump(stream, &sregs, indent + 4);
904 }
905 
906 struct kvm_x86_state {
907 	struct kvm_vcpu_events events;
908 	struct kvm_mp_state mp_state;
909 	struct kvm_regs regs;
910 	struct kvm_xsave xsave;
911 	struct kvm_xcrs xcrs;
912 	struct kvm_sregs sregs;
913 	struct kvm_debugregs debugregs;
914 	union {
915 		struct kvm_nested_state nested;
916 		char nested_[16384];
917 	};
918 	struct kvm_msrs msrs;
919 };
920 
kvm_get_num_msrs_fd(int kvm_fd)921 static int kvm_get_num_msrs_fd(int kvm_fd)
922 {
923 	struct kvm_msr_list nmsrs;
924 	int r;
925 
926 	nmsrs.nmsrs = 0;
927 	r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
928 	TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
929 		r);
930 
931 	return nmsrs.nmsrs;
932 }
933 
kvm_get_num_msrs(struct kvm_vm * vm)934 static int kvm_get_num_msrs(struct kvm_vm *vm)
935 {
936 	return kvm_get_num_msrs_fd(vm->kvm_fd);
937 }
938 
kvm_get_msr_index_list(void)939 struct kvm_msr_list *kvm_get_msr_index_list(void)
940 {
941 	struct kvm_msr_list *list;
942 	int nmsrs, r, kvm_fd;
943 
944 	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
945 	if (kvm_fd < 0)
946 		exit(KSFT_SKIP);
947 
948 	nmsrs = kvm_get_num_msrs_fd(kvm_fd);
949 	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
950 	list->nmsrs = nmsrs;
951 	r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
952 	close(kvm_fd);
953 
954 	TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
955 		r);
956 
957 	return list;
958 }
959 
vcpu_save_state(struct kvm_vm * vm,uint32_t vcpuid)960 struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
961 {
962 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
963 	struct kvm_msr_list *list;
964 	struct kvm_x86_state *state;
965 	int nmsrs, r, i;
966 	static int nested_size = -1;
967 
968 	if (nested_size == -1) {
969 		nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
970 		TEST_ASSERT(nested_size <= sizeof(state->nested_),
971 			    "Nested state size too big, %i > %zi",
972 			    nested_size, sizeof(state->nested_));
973 	}
974 
975 	/*
976 	 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
977 	 * guest state is consistent only after userspace re-enters the
978 	 * kernel with KVM_RUN.  Complete IO prior to migrating state
979 	 * to a new VM.
980 	 */
981 	vcpu_run_complete_io(vm, vcpuid);
982 
983 	nmsrs = kvm_get_num_msrs(vm);
984 	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
985 	list->nmsrs = nmsrs;
986 	r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
987         TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
988                 r);
989 
990 	state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
991 	r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
992         TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
993                 r);
994 
995 	r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
996         TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
997                 r);
998 
999 	r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
1000         TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
1001                 r);
1002 
1003 	r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
1004         TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
1005                 r);
1006 
1007 	if (kvm_check_cap(KVM_CAP_XCRS)) {
1008 		r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
1009 		TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
1010 			    r);
1011 	}
1012 
1013 	r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
1014         TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
1015                 r);
1016 
1017 	if (nested_size) {
1018 		state->nested.size = sizeof(state->nested_);
1019 		r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
1020 		TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
1021 			r);
1022 		TEST_ASSERT(state->nested.size <= nested_size,
1023 			"Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
1024 			state->nested.size, nested_size);
1025 	} else
1026 		state->nested.size = 0;
1027 
1028 	state->msrs.nmsrs = nmsrs;
1029 	for (i = 0; i < nmsrs; i++)
1030 		state->msrs.entries[i].index = list->indices[i];
1031 	r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
1032         TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
1033                 r, r == nmsrs ? -1 : list->indices[r]);
1034 
1035 	r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
1036         TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
1037                 r);
1038 
1039 	free(list);
1040 	return state;
1041 }
1042 
vcpu_load_state(struct kvm_vm * vm,uint32_t vcpuid,struct kvm_x86_state * state)1043 void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
1044 {
1045 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1046 	int r;
1047 
1048 	r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
1049         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
1050                 r);
1051 
1052 	if (kvm_check_cap(KVM_CAP_XCRS)) {
1053 		r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
1054 		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
1055 			    r);
1056 	}
1057 
1058 	r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
1059         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
1060                 r);
1061 
1062 	r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
1063         TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
1064                 r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
1065 
1066 	r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
1067         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
1068                 r);
1069 
1070 	r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
1071         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
1072                 r);
1073 
1074 	r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
1075         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
1076                 r);
1077 
1078 	r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
1079         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
1080                 r);
1081 
1082 	if (state->nested.size) {
1083 		r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
1084 		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
1085 			r);
1086 	}
1087 }
1088 
is_intel_cpu(void)1089 bool is_intel_cpu(void)
1090 {
1091 	int eax, ebx, ecx, edx;
1092 	const uint32_t *chunk;
1093 	const int leaf = 0;
1094 
1095 	__asm__ __volatile__(
1096 		"cpuid"
1097 		: /* output */ "=a"(eax), "=b"(ebx),
1098 		  "=c"(ecx), "=d"(edx)
1099 		: /* input */ "0"(leaf), "2"(0));
1100 
1101 	chunk = (const uint32_t *)("GenuineIntel");
1102 	return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
1103 }
1104 
kvm_get_cpuid_max_basic(void)1105 uint32_t kvm_get_cpuid_max_basic(void)
1106 {
1107 	return kvm_get_supported_cpuid_entry(0)->eax;
1108 }
1109 
kvm_get_cpuid_max_extended(void)1110 uint32_t kvm_get_cpuid_max_extended(void)
1111 {
1112 	return kvm_get_supported_cpuid_entry(0x80000000)->eax;
1113 }
1114 
kvm_get_cpu_address_width(unsigned int * pa_bits,unsigned int * va_bits)1115 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
1116 {
1117 	struct kvm_cpuid_entry2 *entry;
1118 	bool pae;
1119 
1120 	/* SDM 4.1.4 */
1121 	if (kvm_get_cpuid_max_extended() < 0x80000008) {
1122 		pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
1123 		*pa_bits = pae ? 36 : 32;
1124 		*va_bits = 32;
1125 	} else {
1126 		entry = kvm_get_supported_cpuid_entry(0x80000008);
1127 		*pa_bits = entry->eax & 0xff;
1128 		*va_bits = (entry->eax >> 8) & 0xff;
1129 	}
1130 }
1131 
1132 struct idt_entry {
1133 	uint16_t offset0;
1134 	uint16_t selector;
1135 	uint16_t ist : 3;
1136 	uint16_t : 5;
1137 	uint16_t type : 4;
1138 	uint16_t : 1;
1139 	uint16_t dpl : 2;
1140 	uint16_t p : 1;
1141 	uint16_t offset1;
1142 	uint32_t offset2; uint32_t reserved;
1143 };
1144 
set_idt_entry(struct kvm_vm * vm,int vector,unsigned long addr,int dpl,unsigned short selector)1145 static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
1146 			  int dpl, unsigned short selector)
1147 {
1148 	struct idt_entry *base =
1149 		(struct idt_entry *)addr_gva2hva(vm, vm->idt);
1150 	struct idt_entry *e = &base[vector];
1151 
1152 	memset(e, 0, sizeof(*e));
1153 	e->offset0 = addr;
1154 	e->selector = selector;
1155 	e->ist = 0;
1156 	e->type = 14;
1157 	e->dpl = dpl;
1158 	e->p = 1;
1159 	e->offset1 = addr >> 16;
1160 	e->offset2 = addr >> 32;
1161 }
1162 
kvm_exit_unexpected_vector(uint32_t value)1163 void kvm_exit_unexpected_vector(uint32_t value)
1164 {
1165 	outl(UNEXPECTED_VECTOR_PORT, value);
1166 }
1167 
route_exception(struct ex_regs * regs)1168 void route_exception(struct ex_regs *regs)
1169 {
1170 	typedef void(*handler)(struct ex_regs *);
1171 	handler *handlers = (handler *)exception_handlers;
1172 
1173 	if (handlers && handlers[regs->vector]) {
1174 		handlers[regs->vector](regs);
1175 		return;
1176 	}
1177 
1178 	kvm_exit_unexpected_vector(regs->vector);
1179 }
1180 
vm_init_descriptor_tables(struct kvm_vm * vm)1181 void vm_init_descriptor_tables(struct kvm_vm *vm)
1182 {
1183 	extern void *idt_handlers;
1184 	int i;
1185 
1186 	vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
1187 	vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
1188 	/* Handlers have the same address in both address spaces.*/
1189 	for (i = 0; i < NUM_INTERRUPTS; i++)
1190 		set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
1191 			DEFAULT_CODE_SELECTOR);
1192 }
1193 
vcpu_init_descriptor_tables(struct kvm_vm * vm,uint32_t vcpuid)1194 void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
1195 {
1196 	struct kvm_sregs sregs;
1197 
1198 	vcpu_sregs_get(vm, vcpuid, &sregs);
1199 	sregs.idt.base = vm->idt;
1200 	sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
1201 	sregs.gdt.base = vm->gdt;
1202 	sregs.gdt.limit = getpagesize() - 1;
1203 	kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
1204 	vcpu_sregs_set(vm, vcpuid, &sregs);
1205 	*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
1206 }
1207 
vm_handle_exception(struct kvm_vm * vm,int vector,void (* handler)(struct ex_regs *))1208 void vm_handle_exception(struct kvm_vm *vm, int vector,
1209 			 void (*handler)(struct ex_regs *))
1210 {
1211 	vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
1212 
1213 	handlers[vector] = (vm_vaddr_t)handler;
1214 }
1215 
assert_on_unhandled_exception(struct kvm_vm * vm,uint32_t vcpuid)1216 void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
1217 {
1218 	if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
1219 		&& vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
1220 		&& vcpu_state(vm, vcpuid)->io.size == 4) {
1221 		/* Grab pointer to io data */
1222 		uint32_t *data = (void *)vcpu_state(vm, vcpuid)
1223 			+ vcpu_state(vm, vcpuid)->io.data_offset;
1224 
1225 		TEST_ASSERT(false,
1226 			    "Unexpected vectored event in guest (vector:0x%x)",
1227 			    *data);
1228 	}
1229 }
1230 
set_cpuid(struct kvm_cpuid2 * cpuid,struct kvm_cpuid_entry2 * ent)1231 bool set_cpuid(struct kvm_cpuid2 *cpuid,
1232 	       struct kvm_cpuid_entry2 *ent)
1233 {
1234 	int i;
1235 
1236 	for (i = 0; i < cpuid->nent; i++) {
1237 		struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
1238 
1239 		if (cur->function != ent->function || cur->index != ent->index)
1240 			continue;
1241 
1242 		memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
1243 		return true;
1244 	}
1245 
1246 	return false;
1247 }
1248 
kvm_hypercall(uint64_t nr,uint64_t a0,uint64_t a1,uint64_t a2,uint64_t a3)1249 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
1250 		       uint64_t a3)
1251 {
1252 	uint64_t r;
1253 
1254 	asm volatile("vmcall"
1255 		     : "=a"(r)
1256 		     : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
1257 	return r;
1258 }
1259