|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * KVM dirty page logging performance test |
| 4 | + * |
| 5 | + * Based on dirty_log_test.c |
| 6 | + * |
| 7 | + * Copyright (C) 2018, Red Hat, Inc. |
| 8 | + * Copyright (C) 2020, Google, Inc. |
| 9 | + */ |
| 10 | + |
| 11 | +#define _GNU_SOURCE /* for program_invocation_name */ |
| 12 | + |
| 13 | +#include <stdio.h> |
| 14 | +#include <stdlib.h> |
| 15 | +#include <unistd.h> |
| 16 | +#include <time.h> |
| 17 | +#include <pthread.h> |
| 18 | +#include <linux/bitmap.h> |
| 19 | +#include <linux/bitops.h> |
| 20 | + |
| 21 | +#include "kvm_util.h" |
| 22 | +#include "perf_test_util.h" |
| 23 | +#include "processor.h" |
| 24 | +#include "test_util.h" |
| 25 | + |
| 26 | +/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ |
| 27 | +#define TEST_HOST_LOOP_N 2UL |
| 28 | + |
| 29 | +/* Host variables */ |
| 30 | +static bool host_quit; |
| 31 | +static uint64_t iteration; |
| 32 | +static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; |
| 33 | + |
| 34 | +static void *vcpu_worker(void *data) |
| 35 | +{ |
| 36 | + int ret; |
| 37 | + struct kvm_vm *vm = perf_test_args.vm; |
| 38 | + uint64_t pages_count = 0; |
| 39 | + struct kvm_run *run; |
| 40 | + struct timespec start; |
| 41 | + struct timespec ts_diff; |
| 42 | + struct timespec total = (struct timespec){0}; |
| 43 | + struct timespec avg; |
| 44 | + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; |
| 45 | + int vcpu_id = vcpu_args->vcpu_id; |
| 46 | + |
| 47 | + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); |
| 48 | + run = vcpu_state(vm, vcpu_id); |
| 49 | + |
| 50 | + while (!READ_ONCE(host_quit)) { |
| 51 | + uint64_t current_iteration = READ_ONCE(iteration); |
| 52 | + |
| 53 | + clock_gettime(CLOCK_MONOTONIC, &start); |
| 54 | + ret = _vcpu_run(vm, vcpu_id); |
| 55 | + ts_diff = timespec_diff_now(start); |
| 56 | + |
| 57 | + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); |
| 58 | + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, |
| 59 | + "Invalid guest sync status: exit_reason=%s\n", |
| 60 | + exit_reason_str(run->exit_reason)); |
| 61 | + |
| 62 | + pr_debug("Got sync event from vCPU %d\n", vcpu_id); |
| 63 | + vcpu_last_completed_iteration[vcpu_id] = current_iteration; |
| 64 | + pr_debug("vCPU %d updated last completed iteration to %lu\n", |
| 65 | + vcpu_id, vcpu_last_completed_iteration[vcpu_id]); |
| 66 | + |
| 67 | + if (current_iteration) { |
| 68 | + pages_count += vcpu_args->pages; |
| 69 | + total = timespec_add(total, ts_diff); |
| 70 | + pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", |
| 71 | + vcpu_id, current_iteration, ts_diff.tv_sec, |
| 72 | + ts_diff.tv_nsec); |
| 73 | + } else { |
| 74 | + pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", |
| 75 | + vcpu_id, current_iteration, ts_diff.tv_sec, |
| 76 | + ts_diff.tv_nsec); |
| 77 | + } |
| 78 | + |
| 79 | + while (current_iteration == READ_ONCE(iteration) && |
| 80 | + !READ_ONCE(host_quit)) {} |
| 81 | + } |
| 82 | + |
| 83 | + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); |
| 84 | + pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", |
| 85 | + vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], |
| 86 | + total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); |
| 87 | + |
| 88 | + return NULL; |
| 89 | +} |
| 90 | + |
| 91 | +#ifdef USE_CLEAR_DIRTY_LOG |
| 92 | +static u64 dirty_log_manual_caps; |
| 93 | +#endif |
| 94 | + |
| 95 | +static void run_test(enum vm_guest_mode mode, unsigned long iterations, |
| 96 | + uint64_t phys_offset, int wr_fract) |
| 97 | +{ |
| 98 | + pthread_t *vcpu_threads; |
| 99 | + struct kvm_vm *vm; |
| 100 | + unsigned long *bmap; |
| 101 | + uint64_t guest_num_pages; |
| 102 | + uint64_t host_num_pages; |
| 103 | + int vcpu_id; |
| 104 | + struct timespec start; |
| 105 | + struct timespec ts_diff; |
| 106 | + struct timespec get_dirty_log_total = (struct timespec){0}; |
| 107 | + struct timespec vcpu_dirty_total = (struct timespec){0}; |
| 108 | + struct timespec avg; |
| 109 | +#ifdef USE_CLEAR_DIRTY_LOG |
| 110 | + struct kvm_enable_cap cap = {}; |
| 111 | + struct timespec clear_dirty_log_total = (struct timespec){0}; |
| 112 | +#endif |
| 113 | + |
| 114 | + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); |
| 115 | + |
| 116 | + perf_test_args.wr_fract = wr_fract; |
| 117 | + |
| 118 | + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); |
| 119 | + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); |
| 120 | + host_num_pages = vm_num_host_pages(mode, guest_num_pages); |
| 121 | + bmap = bitmap_alloc(host_num_pages); |
| 122 | + |
| 123 | +#ifdef USE_CLEAR_DIRTY_LOG |
| 124 | + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; |
| 125 | + cap.args[0] = dirty_log_manual_caps; |
| 126 | + vm_enable_cap(vm, &cap); |
| 127 | +#endif |
| 128 | + |
| 129 | + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); |
| 130 | + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); |
| 131 | + |
| 132 | + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); |
| 133 | + |
| 134 | + sync_global_to_guest(vm, perf_test_args); |
| 135 | + |
| 136 | + /* Start the iterations */ |
| 137 | + iteration = 0; |
| 138 | + host_quit = false; |
| 139 | + |
| 140 | + clock_gettime(CLOCK_MONOTONIC, &start); |
| 141 | + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { |
| 142 | + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, |
| 143 | + &perf_test_args.vcpu_args[vcpu_id]); |
| 144 | + } |
| 145 | + |
| 146 | + /* Allow the vCPU to populate memory */ |
| 147 | + pr_debug("Starting iteration %lu - Populating\n", iteration); |
| 148 | + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) |
| 149 | + pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", |
| 150 | + iteration); |
| 151 | + |
| 152 | + ts_diff = timespec_diff_now(start); |
| 153 | + pr_info("Populate memory time: %ld.%.9lds\n", |
| 154 | + ts_diff.tv_sec, ts_diff.tv_nsec); |
| 155 | + |
| 156 | + /* Enable dirty logging */ |
| 157 | + clock_gettime(CLOCK_MONOTONIC, &start); |
| 158 | + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, |
| 159 | + KVM_MEM_LOG_DIRTY_PAGES); |
| 160 | + ts_diff = timespec_diff_now(start); |
| 161 | + pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", |
| 162 | + ts_diff.tv_sec, ts_diff.tv_nsec); |
| 163 | + |
| 164 | + while (iteration < iterations) { |
| 165 | + /* |
| 166 | + * Incrementing the iteration number will start the vCPUs |
| 167 | + * dirtying memory again. |
| 168 | + */ |
| 169 | + clock_gettime(CLOCK_MONOTONIC, &start); |
| 170 | + iteration++; |
| 171 | + |
| 172 | + pr_debug("Starting iteration %lu\n", iteration); |
| 173 | + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { |
| 174 | + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) |
| 175 | + pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", |
| 176 | + vcpu_id, iteration); |
| 177 | + } |
| 178 | + |
| 179 | + ts_diff = timespec_diff_now(start); |
| 180 | + vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); |
| 181 | + pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", |
| 182 | + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); |
| 183 | + |
| 184 | + clock_gettime(CLOCK_MONOTONIC, &start); |
| 185 | + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); |
| 186 | + |
| 187 | + ts_diff = timespec_diff_now(start); |
| 188 | + get_dirty_log_total = timespec_add(get_dirty_log_total, |
| 189 | + ts_diff); |
| 190 | + pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", |
| 191 | + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); |
| 192 | + |
| 193 | +#ifdef USE_CLEAR_DIRTY_LOG |
| 194 | + clock_gettime(CLOCK_MONOTONIC, &start); |
| 195 | + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, |
| 196 | + host_num_pages); |
| 197 | + |
| 198 | + ts_diff = timespec_diff_now(start); |
| 199 | + clear_dirty_log_total = timespec_add(clear_dirty_log_total, |
| 200 | + ts_diff); |
| 201 | + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", |
| 202 | + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); |
| 203 | +#endif |
| 204 | + } |
| 205 | + |
| 206 | + /* Tell the vcpu thread to quit */ |
| 207 | + host_quit = true; |
| 208 | + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) |
| 209 | + pthread_join(vcpu_threads[vcpu_id], NULL); |
| 210 | + |
| 211 | + /* Disable dirty logging */ |
| 212 | + clock_gettime(CLOCK_MONOTONIC, &start); |
| 213 | + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); |
| 214 | + ts_diff = timespec_diff_now(start); |
| 215 | + pr_info("Disabling dirty logging time: %ld.%.9lds\n", |
| 216 | + ts_diff.tv_sec, ts_diff.tv_nsec); |
| 217 | + |
| 218 | + avg = timespec_div(get_dirty_log_total, iterations); |
| 219 | + pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", |
| 220 | + iterations, get_dirty_log_total.tv_sec, |
| 221 | + get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); |
| 222 | + |
| 223 | +#ifdef USE_CLEAR_DIRTY_LOG |
| 224 | + avg = timespec_div(clear_dirty_log_total, iterations); |
| 225 | + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", |
| 226 | + iterations, clear_dirty_log_total.tv_sec, |
| 227 | + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); |
| 228 | +#endif |
| 229 | + |
| 230 | + free(bmap); |
| 231 | + free(vcpu_threads); |
| 232 | + ucall_uninit(vm); |
| 233 | + kvm_vm_free(vm); |
| 234 | +} |
| 235 | + |
| 236 | +struct guest_mode { |
| 237 | + bool supported; |
| 238 | + bool enabled; |
| 239 | +}; |
| 240 | +static struct guest_mode guest_modes[NUM_VM_MODES]; |
| 241 | + |
| 242 | +#define guest_mode_init(mode, supported, enabled) ({ \ |
| 243 | + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ |
| 244 | +}) |
| 245 | + |
| 246 | +static void help(char *name) |
| 247 | +{ |
| 248 | + int i; |
| 249 | + |
| 250 | + puts(""); |
| 251 | + printf("usage: %s [-h] [-i iterations] [-p offset] " |
| 252 | + "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); |
| 253 | + puts(""); |
| 254 | + printf(" -i: specify iteration counts (default: %"PRIu64")\n", |
| 255 | + TEST_HOST_LOOP_N); |
| 256 | + printf(" -p: specify guest physical test memory offset\n" |
| 257 | + " Warning: a low offset can conflict with the loaded test code.\n"); |
| 258 | + printf(" -m: specify the guest mode ID to test " |
| 259 | + "(default: test all supported modes)\n" |
| 260 | + " This option may be used multiple times.\n" |
| 261 | + " Guest mode IDs:\n"); |
| 262 | + for (i = 0; i < NUM_VM_MODES; ++i) { |
| 263 | + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), |
| 264 | + guest_modes[i].supported ? " (supported)" : ""); |
| 265 | + } |
| 266 | + printf(" -b: specify the size of the memory region which should be\n" |
| 267 | + " dirtied by each vCPU. e.g. 10M or 3G.\n" |
| 268 | + " (default: 1G)\n"); |
| 269 | + printf(" -f: specify the fraction of pages which should be written to\n" |
| 270 | + " as opposed to simply read, in the form\n" |
| 271 | + " 1/<fraction of pages to write>.\n" |
| 272 | + " (default: 1 i.e. all pages are written to.)\n"); |
| 273 | + printf(" -v: specify the number of vCPUs to run.\n"); |
| 274 | + puts(""); |
| 275 | + exit(0); |
| 276 | +} |
| 277 | + |
| 278 | +int main(int argc, char *argv[]) |
| 279 | +{ |
| 280 | + unsigned long iterations = TEST_HOST_LOOP_N; |
| 281 | + bool mode_selected = false; |
| 282 | + uint64_t phys_offset = 0; |
| 283 | + unsigned int mode; |
| 284 | + int opt, i; |
| 285 | + int wr_fract = 1; |
| 286 | + |
| 287 | +#ifdef USE_CLEAR_DIRTY_LOG |
| 288 | + dirty_log_manual_caps = |
| 289 | + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); |
| 290 | + if (!dirty_log_manual_caps) { |
| 291 | + print_skip("KVM_CLEAR_DIRTY_LOG not available"); |
| 292 | + exit(KSFT_SKIP); |
| 293 | + } |
| 294 | + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | |
| 295 | + KVM_DIRTY_LOG_INITIALLY_SET); |
| 296 | +#endif |
| 297 | + |
| 298 | +#ifdef __x86_64__ |
| 299 | + guest_mode_init(VM_MODE_PXXV48_4K, true, true); |
| 300 | +#endif |
| 301 | +#ifdef __aarch64__ |
| 302 | + guest_mode_init(VM_MODE_P40V48_4K, true, true); |
| 303 | + guest_mode_init(VM_MODE_P40V48_64K, true, true); |
| 304 | + |
| 305 | + { |
| 306 | + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); |
| 307 | + |
| 308 | + if (limit >= 52) |
| 309 | + guest_mode_init(VM_MODE_P52V48_64K, true, true); |
| 310 | + if (limit >= 48) { |
| 311 | + guest_mode_init(VM_MODE_P48V48_4K, true, true); |
| 312 | + guest_mode_init(VM_MODE_P48V48_64K, true, true); |
| 313 | + } |
| 314 | + } |
| 315 | +#endif |
| 316 | +#ifdef __s390x__ |
| 317 | + guest_mode_init(VM_MODE_P40V48_4K, true, true); |
| 318 | +#endif |
| 319 | + |
| 320 | + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { |
| 321 | + switch (opt) { |
| 322 | + case 'i': |
| 323 | + iterations = strtol(optarg, NULL, 10); |
| 324 | + break; |
| 325 | + case 'p': |
| 326 | + phys_offset = strtoull(optarg, NULL, 0); |
| 327 | + break; |
| 328 | + case 'm': |
| 329 | + if (!mode_selected) { |
| 330 | + for (i = 0; i < NUM_VM_MODES; ++i) |
| 331 | + guest_modes[i].enabled = false; |
| 332 | + mode_selected = true; |
| 333 | + } |
| 334 | + mode = strtoul(optarg, NULL, 10); |
| 335 | + TEST_ASSERT(mode < NUM_VM_MODES, |
| 336 | + "Guest mode ID %d too big", mode); |
| 337 | + guest_modes[mode].enabled = true; |
| 338 | + break; |
| 339 | + case 'b': |
| 340 | + guest_percpu_mem_size = parse_size(optarg); |
| 341 | + break; |
| 342 | + case 'f': |
| 343 | + wr_fract = atoi(optarg); |
| 344 | + TEST_ASSERT(wr_fract >= 1, |
| 345 | + "Write fraction cannot be less than one"); |
| 346 | + break; |
| 347 | + case 'v': |
| 348 | + nr_vcpus = atoi(optarg); |
| 349 | + TEST_ASSERT(nr_vcpus > 0, |
| 350 | + "Must have a positive number of vCPUs"); |
| 351 | + TEST_ASSERT(nr_vcpus <= MAX_VCPUS, |
| 352 | + "This test does not currently support\n" |
| 353 | + "more than %d vCPUs.", MAX_VCPUS); |
| 354 | + break; |
| 355 | + case 'h': |
| 356 | + default: |
| 357 | + help(argv[0]); |
| 358 | + break; |
| 359 | + } |
| 360 | + } |
| 361 | + |
| 362 | + TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); |
| 363 | + |
| 364 | + pr_info("Test iterations: %"PRIu64"\n", iterations); |
| 365 | + |
| 366 | + for (i = 0; i < NUM_VM_MODES; ++i) { |
| 367 | + if (!guest_modes[i].enabled) |
| 368 | + continue; |
| 369 | + TEST_ASSERT(guest_modes[i].supported, |
| 370 | + "Guest mode ID %d (%s) not supported.", |
| 371 | + i, vm_guest_mode_string(i)); |
| 372 | + run_test(i, iterations, phys_offset, wr_fract); |
| 373 | + } |
| 374 | + |
| 375 | + return 0; |
| 376 | +} |
0 commit comments