Lines Matching +full:sync +full:- +full:1
1 // SPDX-License-Identifier: GPL-2.0
3 * A memslot-related performance benchmark.
34 #define MEM_TEST_SIZE (MEM_SIZE - 4096)
46 #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096)
60 #define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12))
110 * Technically, we need also for the atomic bool to be address-free, which
114 * all KVM-supported platforms.
131 TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit"); in check_mmio_access()
132 TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); in check_mmio_access()
133 TEST_ASSERT(run->mmio.len == 8, in check_mmio_access()
134 "Unexpected exit mmio size = %u", run->mmio.len); in check_mmio_access()
135 TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min && in check_mmio_access()
136 run->mmio.phys_addr <= data->mmio_gpa_max, in check_mmio_access()
138 run->mmio.phys_addr); in check_mmio_access()
144 struct kvm_vcpu *vcpu = data->vcpu; in vcpu_worker()
145 struct kvm_run *run = vcpu->run; in vcpu_worker()
148 while (1) { in vcpu_worker()
153 TEST_ASSERT(uc.args[1] == 0, in vcpu_worker()
154 "Unexpected sync ucall, got %lx", in vcpu_worker()
155 (ulong)uc.args[1]); in vcpu_worker()
159 if (run->exit_reason == KVM_EXIT_MMIO) in vcpu_worker()
197 TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096, in vm_gpa2hva()
199 gpa -= MEM_GPA; in vm_gpa2hva()
203 slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); in vm_gpa2hva()
204 slotoffs = gpage - (slot * data->pages_per_slot); in vm_gpa2hva()
209 if (slot == data->nslots - 1) in vm_gpa2hva()
210 slotpages = data->npages - slot * data->pages_per_slot; in vm_gpa2hva()
212 slotpages = data->pages_per_slot; in vm_gpa2hva()
216 *rempages = slotpages - slotoffs; in vm_gpa2hva()
219 base = data->hva_slots[slot]; in vm_gpa2hva()
225 TEST_ASSERT(slot < data->nslots, "Too high slot number"); in vm_slot2gpa()
227 return MEM_GPA + slot * data->pages_per_slot * 4096; in vm_slot2gpa()
237 data->vm = NULL; in alloc_vm()
238 data->vcpu = NULL; in alloc_vm()
239 data->hva_slots = NULL; in alloc_vm()
253 struct sync_area *sync; in prepare_vm() local
256 TEST_ASSERT(max_mem_slots > 1, in prepare_vm()
257 "KVM_CAP_NR_MEMSLOTS should be greater than 1"); in prepare_vm()
258 TEST_ASSERT(nslots > 1 || nslots == -1, in prepare_vm()
259 "Slot count cap should be greater than 1"); in prepare_vm()
260 if (nslots != -1) in prepare_vm()
264 TEST_ASSERT(mempages > 1, in prepare_vm()
267 data->npages = mempages; in prepare_vm()
268 data->nslots = max_mem_slots - 1; in prepare_vm()
269 data->pages_per_slot = mempages / data->nslots; in prepare_vm()
270 if (!data->pages_per_slot) { in prepare_vm()
271 *maxslots = mempages + 1; in prepare_vm()
275 rempages = mempages % data->nslots; in prepare_vm()
276 data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); in prepare_vm()
277 TEST_ASSERT(data->hva_slots, "malloc() fail"); in prepare_vm()
279 data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); in prepare_vm()
280 ucall_init(data->vm, NULL); in prepare_vm()
282 pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", in prepare_vm()
283 max_mem_slots - 1, data->pages_per_slot, rempages); in prepare_vm()
286 for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) { in prepare_vm()
289 npages = data->pages_per_slot; in prepare_vm()
290 if (slot == max_mem_slots - 1) in prepare_vm()
293 vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, in prepare_vm()
300 for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) { in prepare_vm()
304 npages = data->pages_per_slot; in prepare_vm()
305 if (slot == max_mem_slots - 2) in prepare_vm()
308 gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, in prepare_vm()
309 slot + 1); in prepare_vm()
313 data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr); in prepare_vm()
314 memset(data->hva_slots[slot], 0, npages * 4096); in prepare_vm()
319 virt_map(data->vm, MEM_GPA, MEM_GPA, mempages); in prepare_vm()
321 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); in prepare_vm()
322 atomic_init(&sync->start_flag, false); in prepare_vm()
323 atomic_init(&sync->exit_flag, false); in prepare_vm()
324 atomic_init(&sync->sync_flag, false); in prepare_vm()
326 data->mmio_ok = false; in prepare_vm()
335 pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data); in launch_vm()
343 kvm_vm_free(data->vm); in free_vm()
344 free(data->hva_slots); in free_vm()
350 pthread_join(data->vcpu_thread, NULL); in wait_guest_exit()
353 static void let_guest_run(struct sync_area *sync) in let_guest_run() argument
355 atomic_store_explicit(&sync->start_flag, true, memory_order_release); in let_guest_run()
360 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_spin_until_start() local
362 while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire)) in guest_spin_until_start()
366 static void make_guest_exit(struct sync_area *sync) in make_guest_exit() argument
368 atomic_store_explicit(&sync->exit_flag, true, memory_order_release); in make_guest_exit()
373 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in _guest_should_exit() local
375 return atomic_load_explicit(&sync->exit_flag, memory_order_acquire); in _guest_should_exit()
386 static noinline void host_perform_sync(struct sync_area *sync) in host_perform_sync() argument
390 atomic_store_explicit(&sync->sync_flag, true, memory_order_release); in host_perform_sync()
391 while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) in host_perform_sync()
399 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_perform_sync() local
407 } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag, in guest_perform_sync()
417 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_code_test_memslot_move() local
418 uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); in guest_code_test_memslot_move()
432 * No host sync here since the MMIO exits are so expensive in guest_code_test_memslot_move()
445 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_code_test_memslot_map() local
451 while (1) { in guest_code_test_memslot_map()
474 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; in guest_code_test_memslot_unmap() local
480 while (1) { in guest_code_test_memslot_unmap()
485 * per host sync as otherwise the host will spend in guest_code_test_memslot_unmap()
513 while (1) { in guest_code_test_memslot_rw()
539 struct sync_area *sync, in test_memslot_move_prepare() argument
544 movesrcgpa = vm_slot2gpa(data, data->nslots - 1); in test_memslot_move_prepare()
556 movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1)); in test_memslot_move_prepare()
557 sync->move_area_ptr = (void *)movetestgpa; in test_memslot_move_prepare()
560 data->mmio_ok = true; in test_memslot_move_prepare()
561 data->mmio_gpa_min = movesrcgpa; in test_memslot_move_prepare()
562 data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1; in test_memslot_move_prepare()
569 struct sync_area *sync, in test_memslot_move_prepare_active() argument
572 return test_memslot_move_prepare(data, sync, maxslots, true); in test_memslot_move_prepare_active()
576 struct sync_area *sync, in test_memslot_move_prepare_inactive() argument
579 return test_memslot_move_prepare(data, sync, maxslots, false); in test_memslot_move_prepare_inactive()
582 static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) in test_memslot_move_loop() argument
586 movesrcgpa = vm_slot2gpa(data, data->nslots - 1); in test_memslot_move_loop()
587 vm_mem_region_move(data->vm, data->nslots - 1 + 1, in test_memslot_move_loop()
589 vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa); in test_memslot_move_loop()
604 npages = min(npages, count - ctr); in test_memslot_do_unmap()
633 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) in test_memslot_map_loop() argument
649 host_perform_sync(sync); in test_memslot_map_loop()
652 MEM_TEST_MAP_SIZE_PAGES / 2 - 1, in test_memslot_map_loop()
666 host_perform_sync(sync); in test_memslot_map_loop()
669 test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1, in test_memslot_map_loop()
674 struct sync_area *sync, in test_memslot_unmap_loop_common() argument
686 host_perform_sync(sync); in test_memslot_unmap_loop_common()
692 host_perform_sync(sync); in test_memslot_unmap_loop_common()
701 struct sync_area *sync) in test_memslot_unmap_loop() argument
703 test_memslot_unmap_loop_common(data, sync, 1); in test_memslot_unmap_loop()
707 struct sync_area *sync) in test_memslot_unmap_loop_chunked() argument
709 test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES); in test_memslot_unmap_loop_chunked()
712 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) in test_memslot_rw_loop() argument
720 host_perform_sync(sync); in test_memslot_rw_loop()
733 host_perform_sync(sync); in test_memslot_rw_loop()
740 bool (*prepare)(struct vm_data *data, struct sync_area *sync,
742 void (*loop)(struct vm_data *data, struct sync_area *sync);
752 uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES; in test_execute()
754 struct sync_area *sync; in test_execute() local
759 if (!prepare_vm(data, nslots, maxslots, tdata->guest_code, in test_execute()
765 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); in test_execute()
767 if (tdata->prepare && in test_execute()
768 !tdata->prepare(data, sync, maxslots)) { in test_execute()
776 let_guest_run(sync); in test_execute()
778 while (1) { in test_execute()
780 if (guest_runtime->tv_sec >= maxtime) in test_execute()
783 tdata->loop(data, sync); in test_execute()
788 make_guest_exit(sync); in test_execute()
806 .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
812 .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
849 …pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r r… in help()
851 pr_info(" -h: print this help screen.\n"); in help()
852 pr_info(" -v: enable verbose mode (not for benchmarking).\n"); in help()
853 pr_info(" -d: enable extra debug checks.\n"); in help()
854 pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", in help()
855 targs->nslots); in help()
856 pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", in help()
857 targs->tfirst, NTESTS - 1); in help()
858 pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n", in help()
859 targs->tlast, NTESTS - 1); in help()
860 pr_info(" -l: specify the test length in seconds (currently: %i)\n", in help()
861 targs->seconds); in help()
862 pr_info(" -r: specify the number of runs per test (currently: %i)\n", in help()
863 targs->runs); in help()
875 while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { in parse_args()
888 targs->nslots = atoi(optarg); in parse_args()
889 if (targs->nslots <= 0 && targs->nslots != -1) { in parse_args()
890 pr_info("Slot count cap has to be positive or -1 for no cap\n"); in parse_args()
895 targs->tfirst = atoi(optarg); in parse_args()
896 if (targs->tfirst < 0) { in parse_args()
897 pr_info("First test to run has to be non-negative\n"); in parse_args()
902 targs->tlast = atoi(optarg); in parse_args()
903 if (targs->tlast < 0 || targs->tlast >= NTESTS) { in parse_args()
904 pr_info("Last test to run has to be non-negative and less than %zu\n", in parse_args()
910 targs->seconds = atoi(optarg); in parse_args()
911 if (targs->seconds < 0) { in parse_args()
912 pr_info("Test length in seconds has to be non-negative\n"); in parse_args()
917 targs->runs = atoi(optarg); in parse_args()
918 if (targs->runs <= 0) { in parse_args()
931 if (targs->tfirst > targs->tlast) { in parse_args()
954 if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, in test_loop()
970 pr_info("No full loops done - too short test time or system too loaded?\n"); in test_loop()
987 if (!data->mem_size && in test_loop()
988 (!rbestslottime->slottimens || in test_loop()
989 result.slottimens < rbestslottime->slottimens)) in test_loop()
991 if (!rbestruntime->runtimens || in test_loop()
992 result.runtimens < rbestruntime->runtimens) in test_loop()
1002 .tlast = NTESTS - 1, in main()
1003 .nslots = -1, in main()
1005 .runs = 1, in main()
1014 return -1; in main()
1026 data->name, targs.runs, targs.seconds); in main()