1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * svm_vmcall_test
4 *
5 * Copyright © 2021 Amazon.com, Inc. or its affiliates.
6 *
7 * Xen shared_info / pvclock testing
8 */
9
10 #include "test_util.h"
11 #include "kvm_util.h"
12 #include "processor.h"
13
14 #include <stdint.h>
15 #include <time.h>
16 #include <sched.h>
17 #include <signal.h>
18 #include <pthread.h>
19
20 #include <sys/eventfd.h>
21
22 /* Defined in include/linux/kvm_types.h */
23 #define GPA_INVALID (~(ulong)0)
24
25 #define SHINFO_REGION_GVA 0xc0000000ULL
26 #define SHINFO_REGION_GPA 0xc0000000ULL
27 #define SHINFO_REGION_SLOT 10
28
29 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
30 #define DUMMY_REGION_SLOT 11
31
32 #define SHINFO_ADDR (SHINFO_REGION_GPA)
33 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
34 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
35 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
36
37 #define SHINFO_VADDR (SHINFO_REGION_GVA)
38 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
39 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
40
41 #define EVTCHN_VECTOR 0x10
42
43 #define EVTCHN_TEST1 15
44 #define EVTCHN_TEST2 66
45 #define EVTCHN_TIMER 13
46
47 #define XEN_HYPERCALL_MSR 0x40000000
48
49 #define MIN_STEAL_TIME 50000
50
51 #define SHINFO_RACE_TIMEOUT 2 /* seconds */
52
53 #define __HYPERVISOR_set_timer_op 15
54 #define __HYPERVISOR_sched_op 29
55 #define __HYPERVISOR_event_channel_op 32
56
57 #define SCHEDOP_poll 3
58
59 #define EVTCHNOP_send 4
60
61 #define EVTCHNSTAT_interdomain 2
62
63 struct evtchn_send {
64 u32 port;
65 };
66
67 struct sched_poll {
68 u32 *ports;
69 unsigned int nr_ports;
70 u64 timeout;
71 };
72
73 struct pvclock_vcpu_time_info {
74 u32 version;
75 u32 pad0;
76 u64 tsc_timestamp;
77 u64 system_time;
78 u32 tsc_to_system_mul;
79 s8 tsc_shift;
80 u8 flags;
81 u8 pad[2];
82 } __attribute__((__packed__)); /* 32 bytes */
83
84 struct pvclock_wall_clock {
85 u32 version;
86 u32 sec;
87 u32 nsec;
88 } __attribute__((__packed__));
89
90 struct vcpu_runstate_info {
91 uint32_t state;
92 uint64_t state_entry_time;
93 uint64_t time[4];
94 };
95
96 struct arch_vcpu_info {
97 unsigned long cr2;
98 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
99 };
100
101 struct vcpu_info {
102 uint8_t evtchn_upcall_pending;
103 uint8_t evtchn_upcall_mask;
104 unsigned long evtchn_pending_sel;
105 struct arch_vcpu_info arch;
106 struct pvclock_vcpu_time_info time;
107 }; /* 64 bytes (x86) */
108
109 struct shared_info {
110 struct vcpu_info vcpu_info[32];
111 unsigned long evtchn_pending[64];
112 unsigned long evtchn_mask[64];
113 struct pvclock_wall_clock wc;
114 uint32_t wc_sec_hi;
115 /* arch_shared_info here */
116 };
117
118 #define RUNSTATE_running 0
119 #define RUNSTATE_runnable 1
120 #define RUNSTATE_blocked 2
121 #define RUNSTATE_offline 3
122
123 static const char *runstate_names[] = {
124 "running",
125 "runnable",
126 "blocked",
127 "offline"
128 };
129
130 struct {
131 struct kvm_irq_routing info;
132 struct kvm_irq_routing_entry entries[2];
133 } irq_routes;
134
135 static volatile bool guest_saw_irq;
136
evtchn_handler(struct ex_regs * regs)137 static void evtchn_handler(struct ex_regs *regs)
138 {
139 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
140 vi->evtchn_upcall_pending = 0;
141 vi->evtchn_pending_sel = 0;
142 guest_saw_irq = true;
143
144 GUEST_SYNC(0x20);
145 }
146
guest_wait_for_irq(void)147 static void guest_wait_for_irq(void)
148 {
149 while (!guest_saw_irq)
150 __asm__ __volatile__ ("rep nop" : : : "memory");
151 guest_saw_irq = false;
152 }
153
guest_code(void)154 static void guest_code(void)
155 {
156 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
157 int i;
158
159 __asm__ __volatile__(
160 "sti\n"
161 "nop\n"
162 );
163
164 /* Trigger an interrupt injection */
165 GUEST_SYNC(0);
166
167 guest_wait_for_irq();
168
169 /* Test having the host set runstates manually */
170 GUEST_SYNC(RUNSTATE_runnable);
171 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
172 GUEST_ASSERT(rs->state == 0);
173
174 GUEST_SYNC(RUNSTATE_blocked);
175 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
176 GUEST_ASSERT(rs->state == 0);
177
178 GUEST_SYNC(RUNSTATE_offline);
179 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
180 GUEST_ASSERT(rs->state == 0);
181
182 /* Test runstate time adjust */
183 GUEST_SYNC(4);
184 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
185 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
186
187 /* Test runstate time set */
188 GUEST_SYNC(5);
189 GUEST_ASSERT(rs->state_entry_time >= 0x8000);
190 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
191 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
192 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
193
194 /* sched_yield() should result in some 'runnable' time */
195 GUEST_SYNC(6);
196 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
197
198 /* Attempt to deliver a *masked* interrupt */
199 GUEST_SYNC(7);
200
201 /* Wait until we see the bit set */
202 struct shared_info *si = (void *)SHINFO_VADDR;
203 while (!si->evtchn_pending[0])
204 __asm__ __volatile__ ("rep nop" : : : "memory");
205
206 /* Now deliver an *unmasked* interrupt */
207 GUEST_SYNC(8);
208
209 guest_wait_for_irq();
210
211 /* Change memslots and deliver an interrupt */
212 GUEST_SYNC(9);
213
214 guest_wait_for_irq();
215
216 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
217 GUEST_SYNC(10);
218
219 guest_wait_for_irq();
220
221 GUEST_SYNC(11);
222
223 /* Our turn. Deliver event channel (to ourselves) with
224 * EVTCHNOP_send hypercall. */
225 unsigned long rax;
226 struct evtchn_send s = { .port = 127 };
227 __asm__ __volatile__ ("vmcall" :
228 "=a" (rax) :
229 "a" (__HYPERVISOR_event_channel_op),
230 "D" (EVTCHNOP_send),
231 "S" (&s));
232
233 GUEST_ASSERT(rax == 0);
234
235 guest_wait_for_irq();
236
237 GUEST_SYNC(12);
238
239 /* Deliver "outbound" event channel to an eventfd which
240 * happens to be one of our own irqfds. */
241 s.port = 197;
242 __asm__ __volatile__ ("vmcall" :
243 "=a" (rax) :
244 "a" (__HYPERVISOR_event_channel_op),
245 "D" (EVTCHNOP_send),
246 "S" (&s));
247
248 GUEST_ASSERT(rax == 0);
249
250 guest_wait_for_irq();
251
252 GUEST_SYNC(13);
253
254 /* Set a timer 100ms in the future. */
255 __asm__ __volatile__ ("vmcall" :
256 "=a" (rax) :
257 "a" (__HYPERVISOR_set_timer_op),
258 "D" (rs->state_entry_time + 100000000));
259 GUEST_ASSERT(rax == 0);
260
261 GUEST_SYNC(14);
262
263 /* Now wait for the timer */
264 guest_wait_for_irq();
265
266 GUEST_SYNC(15);
267
268 /* The host has 'restored' the timer. Just wait for it. */
269 guest_wait_for_irq();
270
271 GUEST_SYNC(16);
272
273 /* Poll for an event channel port which is already set */
274 u32 ports[1] = { EVTCHN_TIMER };
275 struct sched_poll p = {
276 .ports = ports,
277 .nr_ports = 1,
278 .timeout = 0,
279 };
280
281 __asm__ __volatile__ ("vmcall" :
282 "=a" (rax) :
283 "a" (__HYPERVISOR_sched_op),
284 "D" (SCHEDOP_poll),
285 "S" (&p));
286
287 GUEST_ASSERT(rax == 0);
288
289 GUEST_SYNC(17);
290
291 /* Poll for an unset port and wait for the timeout. */
292 p.timeout = 100000000;
293 __asm__ __volatile__ ("vmcall" :
294 "=a" (rax) :
295 "a" (__HYPERVISOR_sched_op),
296 "D" (SCHEDOP_poll),
297 "S" (&p));
298
299 GUEST_ASSERT(rax == 0);
300
301 GUEST_SYNC(18);
302
303 /* A timer will wake the masked port we're waiting on, while we poll */
304 p.timeout = 0;
305 __asm__ __volatile__ ("vmcall" :
306 "=a" (rax) :
307 "a" (__HYPERVISOR_sched_op),
308 "D" (SCHEDOP_poll),
309 "S" (&p));
310
311 GUEST_ASSERT(rax == 0);
312
313 GUEST_SYNC(19);
314
315 /* A timer wake an *unmasked* port which should wake us with an
316 * actual interrupt, while we're polling on a different port. */
317 ports[0]++;
318 p.timeout = 0;
319 __asm__ __volatile__ ("vmcall" :
320 "=a" (rax) :
321 "a" (__HYPERVISOR_sched_op),
322 "D" (SCHEDOP_poll),
323 "S" (&p));
324
325 GUEST_ASSERT(rax == 0);
326
327 guest_wait_for_irq();
328
329 GUEST_SYNC(20);
330
331 /* Timer should have fired already */
332 guest_wait_for_irq();
333
334 GUEST_SYNC(21);
335 /* Racing host ioctls */
336
337 guest_wait_for_irq();
338
339 GUEST_SYNC(22);
340 /* Racing vmcall against host ioctl */
341
342 ports[0] = 0;
343
344 p = (struct sched_poll) {
345 .ports = ports,
346 .nr_ports = 1,
347 .timeout = 0
348 };
349
350 wait_for_timer:
351 /*
352 * Poll for a timer wake event while the worker thread is mucking with
353 * the shared info. KVM XEN drops timer IRQs if the shared info is
354 * invalid when the timer expires. Arbitrarily poll 100 times before
355 * giving up and asking the VMM to re-arm the timer. 100 polls should
356 * consume enough time to beat on KVM without taking too long if the
357 * timer IRQ is dropped due to an invalid event channel.
358 */
359 for (i = 0; i < 100 && !guest_saw_irq; i++)
360 asm volatile("vmcall"
361 : "=a" (rax)
362 : "a" (__HYPERVISOR_sched_op),
363 "D" (SCHEDOP_poll),
364 "S" (&p)
365 : "memory");
366
367 /*
368 * Re-send the timer IRQ if it was (likely) dropped due to the timer
369 * expiring while the event channel was invalid.
370 */
371 if (!guest_saw_irq) {
372 GUEST_SYNC(23);
373 goto wait_for_timer;
374 }
375 guest_saw_irq = false;
376
377 GUEST_SYNC(24);
378 }
379
cmp_timespec(struct timespec * a,struct timespec * b)380 static int cmp_timespec(struct timespec *a, struct timespec *b)
381 {
382 if (a->tv_sec > b->tv_sec)
383 return 1;
384 else if (a->tv_sec < b->tv_sec)
385 return -1;
386 else if (a->tv_nsec > b->tv_nsec)
387 return 1;
388 else if (a->tv_nsec < b->tv_nsec)
389 return -1;
390 else
391 return 0;
392 }
393
394 static struct vcpu_info *vinfo;
395 static struct kvm_vcpu *vcpu;
396
handle_alrm(int sig)397 static void handle_alrm(int sig)
398 {
399 if (vinfo)
400 printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
401 vcpu_dump(stdout, vcpu, 0);
402 TEST_FAIL("IRQ delivery timed out");
403 }
404
juggle_shinfo_state(void * arg)405 static void *juggle_shinfo_state(void *arg)
406 {
407 struct kvm_vm *vm = (struct kvm_vm *)arg;
408
409 struct kvm_xen_hvm_attr cache_init = {
410 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
411 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
412 };
413
414 struct kvm_xen_hvm_attr cache_destroy = {
415 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
416 .u.shared_info.gfn = GPA_INVALID
417 };
418
419 for (;;) {
420 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init);
421 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy);
422 pthread_testcancel();
423 };
424
425 return NULL;
426 }
427
main(int argc,char * argv[])428 int main(int argc, char *argv[])
429 {
430 struct timespec min_ts, max_ts, vm_ts;
431 struct kvm_vm *vm;
432 pthread_t thread;
433 bool verbose;
434 int ret;
435
436 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
437 !strncmp(argv[1], "--verbose", 10));
438
439 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
440 TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
441
442 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
443 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
444 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
445
446 clock_gettime(CLOCK_REALTIME, &min_ts);
447
448 vm = vm_create_with_one_vcpu(&vcpu, guest_code);
449
450 /* Map a region for the shared_info page */
451 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
452 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
453 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
454
455 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
456
457 int zero_fd = open("/dev/zero", O_RDONLY);
458 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
459
460 struct kvm_xen_hvm_config hvmc = {
461 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
462 .msr = XEN_HYPERCALL_MSR,
463 };
464
465 /* Let the kernel know that we *will* use it for sending all
466 * event channels, which lets it intercept SCHEDOP_poll */
467 if (do_evtchn_tests)
468 hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
469
470 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
471
472 struct kvm_xen_hvm_attr lm = {
473 .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
474 .u.long_mode = 1,
475 };
476 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
477
478 struct kvm_xen_hvm_attr ha = {
479 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
480 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
481 };
482 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
483
484 /*
485 * Test what happens when the HVA of the shinfo page is remapped after
486 * the kernel has a reference to it. But make sure we copy the clock
487 * info over since that's only set at setup time, and we test it later.
488 */
489 struct pvclock_wall_clock wc_copy = shinfo->wc;
490 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
491 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
492 shinfo->wc = wc_copy;
493
494 struct kvm_xen_vcpu_attr vi = {
495 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
496 .u.gpa = VCPU_INFO_ADDR,
497 };
498 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
499
500 struct kvm_xen_vcpu_attr pvclock = {
501 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
502 .u.gpa = PVTIME_ADDR,
503 };
504 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
505
506 struct kvm_xen_hvm_attr vec = {
507 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
508 .u.vector = EVTCHN_VECTOR,
509 };
510 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
511
512 vm_init_descriptor_tables(vm);
513 vcpu_init_descriptor_tables(vcpu);
514 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
515
516 if (do_runstate_tests) {
517 struct kvm_xen_vcpu_attr st = {
518 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
519 .u.gpa = RUNSTATE_ADDR,
520 };
521 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
522 }
523
524 int irq_fd[2] = { -1, -1 };
525
526 if (do_eventfd_tests) {
527 irq_fd[0] = eventfd(0, 0);
528 irq_fd[1] = eventfd(0, 0);
529
530 /* Unexpected, but not a KVM failure */
531 if (irq_fd[0] == -1 || irq_fd[1] == -1)
532 do_evtchn_tests = do_eventfd_tests = false;
533 }
534
535 if (do_eventfd_tests) {
536 irq_routes.info.nr = 2;
537
538 irq_routes.entries[0].gsi = 32;
539 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
540 irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
541 irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
542 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
543
544 irq_routes.entries[1].gsi = 33;
545 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
546 irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
547 irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
548 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
549
550 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
551
552 struct kvm_irqfd ifd = { };
553
554 ifd.fd = irq_fd[0];
555 ifd.gsi = 32;
556 vm_ioctl(vm, KVM_IRQFD, &ifd);
557
558 ifd.fd = irq_fd[1];
559 ifd.gsi = 33;
560 vm_ioctl(vm, KVM_IRQFD, &ifd);
561
562 struct sigaction sa = { };
563 sa.sa_handler = handle_alrm;
564 sigaction(SIGALRM, &sa, NULL);
565 }
566
567 struct kvm_xen_vcpu_attr tmr = {
568 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
569 .u.timer.port = EVTCHN_TIMER,
570 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
571 .u.timer.expires_ns = 0
572 };
573
574 if (do_evtchn_tests) {
575 struct kvm_xen_hvm_attr inj = {
576 .type = KVM_XEN_ATTR_TYPE_EVTCHN,
577 .u.evtchn.send_port = 127,
578 .u.evtchn.type = EVTCHNSTAT_interdomain,
579 .u.evtchn.flags = 0,
580 .u.evtchn.deliver.port.port = EVTCHN_TEST1,
581 .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
582 .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
583 };
584 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
585
586 /* Test migration to a different vCPU */
587 inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
588 inj.u.evtchn.deliver.port.vcpu = vcpu->id;
589 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
590
591 inj.u.evtchn.send_port = 197;
592 inj.u.evtchn.deliver.eventfd.port = 0;
593 inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
594 inj.u.evtchn.flags = 0;
595 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
596
597 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
598 }
599 vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
600 vinfo->evtchn_upcall_pending = 0;
601
602 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
603 rs->state = 0x5a;
604
605 bool evtchn_irq_expected = false;
606
607 for (;;) {
608 volatile struct kvm_run *run = vcpu->run;
609 struct ucall uc;
610
611 vcpu_run(vcpu);
612
613 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
614 "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
615 run->exit_reason,
616 exit_reason_str(run->exit_reason));
617
618 switch (get_ucall(vcpu, &uc)) {
619 case UCALL_ABORT:
620 REPORT_GUEST_ASSERT(uc);
621 /* NOT REACHED */
622 case UCALL_SYNC: {
623 struct kvm_xen_vcpu_attr rst;
624 long rundelay;
625
626 if (do_runstate_tests)
627 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
628 rs->time[1] + rs->time[2] + rs->time[3],
629 "runstate times don't add up");
630
631 switch (uc.args[1]) {
632 case 0:
633 if (verbose)
634 printf("Delivering evtchn upcall\n");
635 evtchn_irq_expected = true;
636 vinfo->evtchn_upcall_pending = 1;
637 break;
638
639 case RUNSTATE_runnable...RUNSTATE_offline:
640 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
641 if (!do_runstate_tests)
642 goto done;
643 if (verbose)
644 printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
645 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
646 rst.u.runstate.state = uc.args[1];
647 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
648 break;
649
650 case 4:
651 if (verbose)
652 printf("Testing RUNSTATE_ADJUST\n");
653 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
654 memset(&rst.u, 0, sizeof(rst.u));
655 rst.u.runstate.state = (uint64_t)-1;
656 rst.u.runstate.time_blocked =
657 0x5a - rs->time[RUNSTATE_blocked];
658 rst.u.runstate.time_offline =
659 0x6b6b - rs->time[RUNSTATE_offline];
660 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
661 rst.u.runstate.time_offline;
662 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
663 break;
664
665 case 5:
666 if (verbose)
667 printf("Testing RUNSTATE_DATA\n");
668 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
669 memset(&rst.u, 0, sizeof(rst.u));
670 rst.u.runstate.state = RUNSTATE_running;
671 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
672 rst.u.runstate.time_blocked = 0x6b6b;
673 rst.u.runstate.time_offline = 0x5a;
674 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
675 break;
676
677 case 6:
678 if (verbose)
679 printf("Testing steal time\n");
680 /* Yield until scheduler delay exceeds target */
681 rundelay = get_run_delay() + MIN_STEAL_TIME;
682 do {
683 sched_yield();
684 } while (get_run_delay() < rundelay);
685 break;
686
687 case 7:
688 if (!do_eventfd_tests)
689 goto done;
690 if (verbose)
691 printf("Testing masked event channel\n");
692 shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
693 eventfd_write(irq_fd[0], 1UL);
694 alarm(1);
695 break;
696
697 case 8:
698 if (verbose)
699 printf("Testing unmasked event channel\n");
700 /* Unmask that, but deliver the other one */
701 shinfo->evtchn_pending[0] = 0;
702 shinfo->evtchn_mask[0] = 0;
703 eventfd_write(irq_fd[1], 1UL);
704 evtchn_irq_expected = true;
705 alarm(1);
706 break;
707
708 case 9:
709 TEST_ASSERT(!evtchn_irq_expected,
710 "Expected event channel IRQ but it didn't happen");
711 shinfo->evtchn_pending[1] = 0;
712 if (verbose)
713 printf("Testing event channel after memslot change\n");
714 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
715 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
716 eventfd_write(irq_fd[0], 1UL);
717 evtchn_irq_expected = true;
718 alarm(1);
719 break;
720
721 case 10:
722 TEST_ASSERT(!evtchn_irq_expected,
723 "Expected event channel IRQ but it didn't happen");
724 if (!do_evtchn_tests)
725 goto done;
726
727 shinfo->evtchn_pending[0] = 0;
728 if (verbose)
729 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
730
731 struct kvm_irq_routing_xen_evtchn e;
732 e.port = EVTCHN_TEST2;
733 e.vcpu = vcpu->id;
734 e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
735
736 vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
737 evtchn_irq_expected = true;
738 alarm(1);
739 break;
740
741 case 11:
742 TEST_ASSERT(!evtchn_irq_expected,
743 "Expected event channel IRQ but it didn't happen");
744 shinfo->evtchn_pending[1] = 0;
745
746 if (verbose)
747 printf("Testing guest EVTCHNOP_send direct to evtchn\n");
748 evtchn_irq_expected = true;
749 alarm(1);
750 break;
751
752 case 12:
753 TEST_ASSERT(!evtchn_irq_expected,
754 "Expected event channel IRQ but it didn't happen");
755 shinfo->evtchn_pending[0] = 0;
756
757 if (verbose)
758 printf("Testing guest EVTCHNOP_send to eventfd\n");
759 evtchn_irq_expected = true;
760 alarm(1);
761 break;
762
763 case 13:
764 TEST_ASSERT(!evtchn_irq_expected,
765 "Expected event channel IRQ but it didn't happen");
766 shinfo->evtchn_pending[1] = 0;
767
768 if (verbose)
769 printf("Testing guest oneshot timer\n");
770 break;
771
772 case 14:
773 memset(&tmr, 0, sizeof(tmr));
774 tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
775 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
776 TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
777 "Timer port not returned");
778 TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
779 "Timer priority not returned");
780 TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
781 "Timer expiry not returned");
782 evtchn_irq_expected = true;
783 alarm(1);
784 break;
785
786 case 15:
787 TEST_ASSERT(!evtchn_irq_expected,
788 "Expected event channel IRQ but it didn't happen");
789 shinfo->evtchn_pending[0] = 0;
790
791 if (verbose)
792 printf("Testing restored oneshot timer\n");
793
794 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
795 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
796 evtchn_irq_expected = true;
797 alarm(1);
798 break;
799
800 case 16:
801 TEST_ASSERT(!evtchn_irq_expected,
802 "Expected event channel IRQ but it didn't happen");
803
804 if (verbose)
805 printf("Testing SCHEDOP_poll with already pending event\n");
806 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
807 alarm(1);
808 break;
809
810 case 17:
811 if (verbose)
812 printf("Testing SCHEDOP_poll timeout\n");
813 shinfo->evtchn_pending[0] = 0;
814 alarm(1);
815 break;
816
817 case 18:
818 if (verbose)
819 printf("Testing SCHEDOP_poll wake on masked event\n");
820
821 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
822 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
823 alarm(1);
824 break;
825
826 case 19:
827 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
828 if (verbose)
829 printf("Testing SCHEDOP_poll wake on unmasked event\n");
830
831 evtchn_irq_expected = true;
832 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
833 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
834
835 /* Read it back and check the pending time is reported correctly */
836 tmr.u.timer.expires_ns = 0;
837 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
838 TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
839 "Timer not reported pending");
840 alarm(1);
841 break;
842
843 case 20:
844 TEST_ASSERT(!evtchn_irq_expected,
845 "Expected event channel IRQ but it didn't happen");
846 /* Read timer and check it is no longer pending */
847 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
848 TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
849
850 shinfo->evtchn_pending[0] = 0;
851 if (verbose)
852 printf("Testing timer in the past\n");
853
854 evtchn_irq_expected = true;
855 tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
856 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
857 alarm(1);
858 break;
859
860 case 21:
861 TEST_ASSERT(!evtchn_irq_expected,
862 "Expected event channel IRQ but it didn't happen");
863 alarm(0);
864
865 if (verbose)
866 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
867
868 ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
869 TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
870
871 struct kvm_irq_routing_xen_evtchn uxe = {
872 .port = 1,
873 .vcpu = vcpu->id,
874 .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
875 };
876
877 evtchn_irq_expected = true;
878 for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
879 __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
880 break;
881
882 case 22:
883 TEST_ASSERT(!evtchn_irq_expected,
884 "Expected event channel IRQ but it didn't happen");
885
886 if (verbose)
887 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
888
889 shinfo->evtchn_pending[0] = 1;
890
891 evtchn_irq_expected = true;
892 tmr.u.timer.expires_ns = rs->state_entry_time +
893 SHINFO_RACE_TIMEOUT * 1000000000ULL;
894 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
895 break;
896
897 case 23:
898 /*
899 * Optional and possibly repeated sync point.
900 * Injecting the timer IRQ may fail if the
901 * shinfo is invalid when the timer expires.
902 * If the timer has expired but the IRQ hasn't
903 * been delivered, rearm the timer and retry.
904 */
905 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
906
907 /* Resume the guest if the timer is still pending. */
908 if (tmr.u.timer.expires_ns)
909 break;
910
911 /* All done if the IRQ was delivered. */
912 if (!evtchn_irq_expected)
913 break;
914
915 tmr.u.timer.expires_ns = rs->state_entry_time +
916 SHINFO_RACE_TIMEOUT * 1000000000ULL;
917 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
918 break;
919 case 24:
920 TEST_ASSERT(!evtchn_irq_expected,
921 "Expected event channel IRQ but it didn't happen");
922
923 ret = pthread_cancel(thread);
924 TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
925
926 ret = pthread_join(thread, 0);
927 TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
928 goto done;
929
930 case 0x20:
931 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
932 evtchn_irq_expected = false;
933 break;
934 }
935 break;
936 }
937 case UCALL_DONE:
938 goto done;
939 default:
940 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
941 }
942 }
943
944 done:
945 alarm(0);
946 clock_gettime(CLOCK_REALTIME, &max_ts);
947
948 /*
949 * Just a *really* basic check that things are being put in the
950 * right place. The actual calculations are much the same for
951 * Xen as they are for the KVM variants, so no need to check.
952 */
953 struct pvclock_wall_clock *wc;
954 struct pvclock_vcpu_time_info *ti, *ti2;
955
956 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
957 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
958 ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
959
960 if (verbose) {
961 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
962 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
963 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
964 ti->tsc_shift, ti->flags);
965 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
966 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
967 ti2->tsc_shift, ti2->flags);
968 }
969
970 vm_ts.tv_sec = wc->sec;
971 vm_ts.tv_nsec = wc->nsec;
972 TEST_ASSERT(wc->version && !(wc->version & 1),
973 "Bad wallclock version %x", wc->version);
974 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
975 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
976
977 TEST_ASSERT(ti->version && !(ti->version & 1),
978 "Bad time_info version %x", ti->version);
979 TEST_ASSERT(ti2->version && !(ti2->version & 1),
980 "Bad time_info version %x", ti->version);
981
982 if (do_runstate_tests) {
983 /*
984 * Fetch runstate and check sanity. Strictly speaking in the
985 * general case we might not expect the numbers to be identical
986 * but in this case we know we aren't running the vCPU any more.
987 */
988 struct kvm_xen_vcpu_attr rst = {
989 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
990 };
991 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
992
993 if (verbose) {
994 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
995 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
996 rs->state, rs->state_entry_time);
997 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
998 printf("State %s: %" PRIu64 " ns\n",
999 runstate_names[i], rs->time[i]);
1000 }
1001 }
1002 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
1003 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
1004 "State entry time mismatch");
1005 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
1006 "Running time mismatch");
1007 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
1008 "Runnable time mismatch");
1009 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
1010 "Blocked time mismatch");
1011 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
1012 "Offline time mismatch");
1013
1014 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
1015 rs->time[1] + rs->time[2] + rs->time[3],
1016 "runstate times don't add up");
1017 }
1018 kvm_vm_free(vm);
1019 return 0;
1020 }
1021