diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f2ce5cd45a2ca984473cb40c26ada71e7591bb04..946ccb260be2449368ba54561a39d6f6fcd54923 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -45,6 +45,7 @@ #include "qemu/guest-random.h" #include "sysemu/hw_accel.h" #include "kvm-cpus.h" +#include "sysemu/dirtylimit.h" #include "hw/boards.h" @@ -493,6 +494,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) cpu->kvm_state = s; cpu->vcpu_dirty = true; cpu->dirty_pages = 0; + cpu->throttle_us_per_full = 0; mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { @@ -773,17 +775,20 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu) } /* Must be with slots_lock held */ -static uint64_t kvm_dirty_ring_reap_locked(KVMState *s) +static uint64_t kvm_dirty_ring_reap_locked(KVMState *s, CPUState* cpu) { int ret; - CPUState *cpu; uint64_t total = 0; int64_t stamp; stamp = get_clock(); - CPU_FOREACH(cpu) { - total += kvm_dirty_ring_reap_one(s, cpu); + if (cpu) { + total = kvm_dirty_ring_reap_one(s, cpu); + } else { + CPU_FOREACH(cpu) { + total += kvm_dirty_ring_reap_one(s, cpu); + } } if (total) { @@ -804,7 +809,7 @@ static uint64_t kvm_dirty_ring_reap_locked(KVMState *s) * Currently for simplicity, we must hold BQL before calling this. We can * consider to drop the BQL if we're clear with all the race conditions. */ -static uint64_t kvm_dirty_ring_reap(KVMState *s) +static uint64_t kvm_dirty_ring_reap(KVMState *s, CPUState *cpu) { uint64_t total; @@ -824,7 +829,7 @@ static uint64_t kvm_dirty_ring_reap(KVMState *s) * reset below. */ kvm_slots_lock(); - total = kvm_dirty_ring_reap_locked(s); + total = kvm_dirty_ring_reap_locked(s, cpu); kvm_slots_unlock(); return total; @@ -871,7 +876,7 @@ static void kvm_dirty_ring_flush(void) * vcpus out in a synchronous way. */ kvm_cpu_synchronize_kick_all(); - kvm_dirty_ring_reap(kvm_state); + kvm_dirty_ring_reap(kvm_state, NULL); trace_kvm_dirty_ring_flush(1); } @@ -1415,7 +1420,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, * Not easy. Let's cross the fingers until it's fixed. */ if (kvm_state->kvm_dirty_ring_size) { - kvm_dirty_ring_reap_locked(kvm_state); + kvm_dirty_ring_reap_locked(kvm_state, NULL); } else { kvm_slot_get_dirty_log(kvm_state, mem); } @@ -1483,11 +1488,16 @@ static void *kvm_dirty_ring_reaper_thread(void *data) */ sleep(1); + /* keep sleeping so that dirtylimit not be interfered by reaper */ + if (dirtylimit_in_service()) { + continue; + } + trace_kvm_dirty_ring_reaper("wakeup"); r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING; qemu_mutex_lock_iothread(); - kvm_dirty_ring_reap(s); + kvm_dirty_ring_reap(s, NULL); qemu_mutex_unlock_iothread(); r->reaper_iteration++; @@ -2329,6 +2339,11 @@ bool kvm_dirty_ring_enabled(void) return kvm_state->kvm_dirty_ring_size ? true : false; } +uint32_t kvm_dirty_ring_size(void) +{ + return kvm_state->kvm_dirty_ring_size; +} + static int kvm_init(MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); @@ -2957,8 +2972,19 @@ int kvm_cpu_exec(CPUState *cpu) */ trace_kvm_dirty_ring_full(cpu->cpu_index); qemu_mutex_lock_iothread(); - kvm_dirty_ring_reap(kvm_state); + /* + * We throttle vCPU by making it sleep once it exit from kernel + * due to dirty ring full. In the dirtylimit scenario, reaping + * all vCPUs after a single vCPU dirty ring get full result in + * the miss of sleep, so just reap the ring-fulled vCPU. + */ + if (dirtylimit_in_service()) { + kvm_dirty_ring_reap(kvm_state, cpu); + } else { + kvm_dirty_ring_reap(kvm_state, NULL); + } qemu_mutex_unlock_iothread(); + dirtylimit_vcpu_execute(cpu); ret = 0; break; case KVM_EXIT_SYSTEM_EVENT: diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 5319573e0032a336612a822cb15a60ec113c9785..1128cb2928a0aa375b2f474665b99f2fc5ad7665 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -152,4 +152,9 @@ bool kvm_dirty_ring_enabled(void) { return false; } + +uint32_t kvm_dirty_ring_size(void) +{ + return 0; +} #endif diff --git a/cpus-common.c b/cpus-common.c index 6e73d3e58dac6b6f4e117d56f5020d8496f17439..31c6415f37c64ef62d4e9a9d41be6b0263e5716e 100644 --- a/cpus-common.c +++ b/cpus-common.c @@ -73,6 +73,12 @@ static int cpu_get_free_index(void) } CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); +static unsigned int cpu_list_generation_id; + +unsigned int cpu_list_generation_id_get(void) +{ + return cpu_list_generation_id; +} void cpu_list_add(CPUState *cpu) { @@ -84,6 +90,7 @@ void cpu_list_add(CPUState *cpu) assert(!cpu_index_auto_assigned); } QTAILQ_INSERT_TAIL_RCU(&cpus, cpu, node); + cpu_list_generation_id++; } void cpu_list_remove(CPUState *cpu) @@ -96,6 +103,7 @@ void cpu_list_remove(CPUState *cpu) QTAILQ_REMOVE_RCU(&cpus, cpu, node); cpu->cpu_index = UNASSIGNED_CPU_INDEX; + cpu_list_generation_id++; } CPUState *qemu_get_cpu(int index) diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index 407a1da800cfc19376dc48e1941de9346462e7f7..5dd3001af09ead0966391cc36b7b2fb2e4389e3d 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -863,6 +863,19 @@ SRST Display the vcpu dirty rate information. ERST + { + .name = "vcpu_dirty_limit", + .args_type = "", + .params = "", + .help = "show dirty page limit information of all vCPU", + .cmd = hmp_info_vcpu_dirty_limit, + }, + +SRST + ``info vcpu_dirty_limit`` + Display the vcpu dirty page limit information. +ERST + #if defined(TARGET_I386) { .name = "sgx", diff --git a/hmp-commands.hx b/hmp-commands.hx index 70a9136ac2935b0cb2b2f132266196b5df6cd556..5bedee2d49547acfcda639f7cb6fe8636d643e76 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1744,3 +1744,35 @@ ERST "\n\t\t\t -b to specify dirty bitmap as method of calculation)", .cmd = hmp_calc_dirty_rate, }, + +SRST +``set_vcpu_dirty_limit`` + Set dirty page rate limit on virtual CPU, the information about all the + virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit`` + command. +ERST + + { + .name = "set_vcpu_dirty_limit", + .args_type = "dirty_rate:l,cpu_index:l?", + .params = "dirty_rate [cpu_index]", + .help = "set dirty page rate limit, use cpu_index to set limit" + "\n\t\t\t\t\t on a specified virtual cpu", + .cmd = hmp_set_vcpu_dirty_limit, + }, + +SRST +``cancel_vcpu_dirty_limit`` + Cancel dirty page rate limit on virtual CPU, the information about all the + virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit`` + command. +ERST + + { + .name = "cancel_vcpu_dirty_limit", + .args_type = "cpu_index:l?", + .params = "[cpu_index]", + .help = "cancel dirty page rate limit, use cpu_index to cancel" + "\n\t\t\t\t\t limit on a specified virtual cpu", + .cmd = hmp_cancel_vcpu_dirty_limit, + }, diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 039d422bf4cb40d4bfd5f37296c0b4e3f699f248..cdee668f20da88e3c4ccb1b9574b4ec22f10e13b 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -11,6 +11,7 @@ void qemu_init_cpu_list(void); void cpu_list_lock(void); void cpu_list_unlock(void); +unsigned int cpu_list_generation_id_get(void); void tcg_flush_softmmu_tlb(CPUState *cs); diff --git a/include/exec/memory.h b/include/exec/memory.h index 3e84d62e40d0672ea63f3e9dcf83392f86c84330..4326d74b95d7249178edb606d58fd78d472e3c30 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -69,7 +69,10 @@ static inline void fuzz_dma_read_cb(size_t addr, /* Dirty tracking enabled because measuring dirty rate */ #define GLOBAL_DIRTY_DIRTY_RATE (1U << 1) -#define GLOBAL_DIRTY_MASK (0x3) +/* Dirty tracking enabled because dirty limit */ +#define GLOBAL_DIRTY_LIMIT (1U << 2) + +#define GLOBAL_DIRTY_MASK (0x7) extern unsigned int global_dirty_tracking; diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index e948e81f1a972898790f52ce274398fe591e1d55..9631c1e2f6da55ce44eddb2165c9c55ac7504632 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -411,6 +411,12 @@ struct CPUState { */ bool throttle_thread_scheduled; + /* + * Sleep throttle_us_per_full microseconds once dirty ring is full + * if dirty page rate limit is enabled. + */ + int64_t throttle_us_per_full; + bool ignore_memory_transaction_failures; struct hax_vcpu_state *hax_vcpu; diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index 96d014826ada85ab9e9e743c5a7bffbcf50ab750..478820e54fe3023192003636ee651c8b2afa6211 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -131,6 +131,9 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict); void hmp_replay_seek(Monitor *mon, const QDict *qdict); void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict); void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict); +void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); +void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); +void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); void hmp_human_readable_text_helper(Monitor *mon, HumanReadableText *(*qmp_handler)(Error **)); diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h new file mode 100644 index 0000000000000000000000000000000000000000..8d2c1f3a6b9c66a3a3827623ed14686ad0a4f8af --- /dev/null +++ b/include/sysemu/dirtylimit.h @@ -0,0 +1,37 @@ +/* + * Dirty page rate limit common functions + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(黄勇) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_DIRTYRLIMIT_H +#define QEMU_DIRTYRLIMIT_H + +#define DIRTYLIMIT_CALC_TIME_MS 1000 /* 1000ms */ + +int64_t vcpu_dirty_rate_get(int cpu_index); +void vcpu_dirty_rate_stat_start(void); +void vcpu_dirty_rate_stat_stop(void); +void vcpu_dirty_rate_stat_initialize(void); +void vcpu_dirty_rate_stat_finalize(void); + +void dirtylimit_state_lock(void); +void dirtylimit_state_unlock(void); +void dirtylimit_state_initialize(void); +void dirtylimit_state_finalize(void); +bool dirtylimit_in_service(void); +bool dirtylimit_vcpu_index_valid(int cpu_index); +void dirtylimit_process(void); +void dirtylimit_change(bool start); +void dirtylimit_set_vcpu(int cpu_index, + uint64_t quota, + bool enable); +void dirtylimit_set_all(uint64_t quota, + bool enable); +void dirtylimit_vcpu_execute(CPUState *cpu); +#endif diff --git a/include/sysemu/dirtyrate.h b/include/sysemu/dirtyrate.h new file mode 100644 index 0000000000000000000000000000000000000000..4d3b9a4902211a1f8d475054fc244dddae009bcf --- /dev/null +++ b/include/sysemu/dirtyrate.h @@ -0,0 +1,28 @@ +/* + * dirty page rate helper functions + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(黄勇) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_DIRTYRATE_H +#define QEMU_DIRTYRATE_H + +typedef struct VcpuStat { + int nvcpu; /* number of vcpu */ + DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ +} VcpuStat; + +int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, + VcpuStat *stat, + unsigned int flag, + bool one_shot); + +void global_dirty_log_change(unsigned int flag, + bool start); +#endif diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 2623775c27e436619b287191ba2d7c1ee8e17a02..19c5c8402ada0cd6688488d8235c1a06739a3136 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -549,4 +549,6 @@ bool kvm_cpu_check_are_resettable(void); bool kvm_arch_cpu_check_are_resettable(void); bool kvm_dirty_ring_enabled(void); + +uint32_t kvm_dirty_ring_size(void); #endif diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 8043bc794681d2f841bcdb996fc23fa466875d61..c449095fc31776acea96c737d5a499f3ea635656 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -46,7 +46,7 @@ static struct DirtyRateStat DirtyStat; static DirtyRateMeasureMode dirtyrate_mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING; -static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) +static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time) { int64_t current_time; @@ -60,6 +60,132 @@ static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) return msec; } +static inline void record_dirtypages(DirtyPageRecord *dirty_pages, + CPUState *cpu, bool start) +{ + if (start) { + dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages; + } else { + dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages; + } +} + +static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages, + int64_t calc_time_ms) +{ + uint64_t memory_size_MB; + uint64_t increased_dirty_pages = + dirty_pages.end_pages - dirty_pages.start_pages; + + memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20; + + return memory_size_MB * 1000 / calc_time_ms; +} + +void global_dirty_log_change(unsigned int flag, bool start) +{ + qemu_mutex_lock_iothread(); + if (start) { + memory_global_dirty_log_start(flag); + } else { + memory_global_dirty_log_stop(flag); + } + qemu_mutex_unlock_iothread(); +} + +/* + * global_dirty_log_sync + * 1. sync dirty log from kvm + * 2. stop dirty tracking if needed. + */ +static void global_dirty_log_sync(unsigned int flag, bool one_shot) +{ + qemu_mutex_lock_iothread(); + memory_global_dirty_log_sync(); + if (one_shot) { + memory_global_dirty_log_stop(flag); + } + qemu_mutex_unlock_iothread(); +} + +static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat) +{ + CPUState *cpu; + DirtyPageRecord *records; + int nvcpu = 0; + + CPU_FOREACH(cpu) { + nvcpu++; + } + + stat->nvcpu = nvcpu; + stat->rates = g_malloc0(sizeof(DirtyRateVcpu) * nvcpu); + + records = g_malloc0(sizeof(DirtyPageRecord) * nvcpu); + + return records; +} + +static void vcpu_dirty_stat_collect(VcpuStat *stat, + DirtyPageRecord *records, + bool start) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + record_dirtypages(records, cpu, start); + } +} + +int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, + VcpuStat *stat, + unsigned int flag, + bool one_shot) +{ + DirtyPageRecord *records; + int64_t init_time_ms; + int64_t duration; + int64_t dirtyrate; + int i = 0; + unsigned int gen_id; + +retry: + init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + cpu_list_lock(); + gen_id = cpu_list_generation_id_get(); + records = vcpu_dirty_stat_alloc(stat); + vcpu_dirty_stat_collect(stat, records, true); + cpu_list_unlock(); + + duration = dirty_stat_wait(calc_time_ms, init_time_ms); + + global_dirty_log_sync(flag, one_shot); + + cpu_list_lock(); + if (gen_id != cpu_list_generation_id_get()) { + g_free(records); + g_free(stat->rates); + cpu_list_unlock(); + goto retry; + } + vcpu_dirty_stat_collect(stat, records, false); + cpu_list_unlock(); + + for (i = 0; i < stat->nvcpu; i++) { + dirtyrate = do_calculate_dirtyrate(records[i], duration); + + stat->rates[i].id = i; + stat->rates[i].dirty_rate = dirtyrate; + + trace_dirtyrate_do_calculate_vcpu(i, dirtyrate); + } + + g_free(records); + + return duration; +} + static bool is_sample_period_valid(int64_t sec) { if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || @@ -396,44 +522,6 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, return true; } -static inline void record_dirtypages(DirtyPageRecord *dirty_pages, - CPUState *cpu, bool start) -{ - if (start) { - dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages; - } else { - dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages; - } -} - -static void dirtyrate_global_dirty_log_start(void) -{ - qemu_mutex_lock_iothread(); - memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); - qemu_mutex_unlock_iothread(); -} - -static void dirtyrate_global_dirty_log_stop(void) -{ - qemu_mutex_lock_iothread(); - memory_global_dirty_log_sync(); - memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE); - qemu_mutex_unlock_iothread(); -} - -static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages) -{ - uint64_t memory_size_MB; - int64_t time_s; - uint64_t increased_dirty_pages = - dirty_pages.end_pages - dirty_pages.start_pages; - - memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20; - time_s = DirtyStat.calc_time; - - return memory_size_MB / time_s; -} - static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages, bool start) { @@ -444,11 +532,6 @@ static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages, } } -static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages) -{ - DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages); -} - static inline void dirtyrate_manual_reset_protect(void) { RAMBlock *block = NULL; @@ -492,71 +575,49 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) DirtyStat.start_time = start_time / 1000; msec = config.sample_period_seconds * 1000; - msec = set_sample_page_period(msec, start_time); + msec = dirty_stat_wait(msec, start_time); DirtyStat.calc_time = msec / 1000; /* - * dirtyrate_global_dirty_log_stop do two things. + * do two things. * 1. fetch dirty bitmap from kvm * 2. stop dirty tracking */ - dirtyrate_global_dirty_log_stop(); + global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true); record_dirtypages_bitmap(&dirty_pages, false); - do_calculate_dirtyrate_bitmap(dirty_pages); + DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages, msec); } static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config) { - CPUState *cpu; - int64_t msec = 0; - int64_t start_time; + int64_t duration; uint64_t dirtyrate = 0; uint64_t dirtyrate_sum = 0; - DirtyPageRecord *dirty_pages; - int nvcpu = 0; int i = 0; - CPU_FOREACH(cpu) { - nvcpu++; - } - - dirty_pages = g_new(DirtyPageRecord, nvcpu); - - DirtyStat.dirty_ring.nvcpu = nvcpu; - DirtyStat.dirty_ring.rates = g_new(DirtyRateVcpu, nvcpu); - - dirtyrate_global_dirty_log_start(); - - CPU_FOREACH(cpu) { - record_dirtypages(dirty_pages, cpu, true); - } - - start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - DirtyStat.start_time = start_time / 1000; + /* start log sync */ + global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true); - msec = config.sample_period_seconds * 1000; - msec = set_sample_page_period(msec, start_time); - DirtyStat.calc_time = msec / 1000; + DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; - dirtyrate_global_dirty_log_stop(); + /* calculate vcpu dirtyrate */ + duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000, + &DirtyStat.dirty_ring, + GLOBAL_DIRTY_DIRTY_RATE, + true); - CPU_FOREACH(cpu) { - record_dirtypages(dirty_pages, cpu, false); - } + DirtyStat.calc_time = duration / 1000; + /* calculate vm dirtyrate */ for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) { - dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]); - trace_dirtyrate_do_calculate_vcpu(i, dirtyrate); - - DirtyStat.dirty_ring.rates[i].id = i; + dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate; DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate; dirtyrate_sum += dirtyrate; } DirtyStat.dirty_rate = dirtyrate_sum; - g_free(dirty_pages); } static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config) @@ -574,7 +635,7 @@ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config) rcu_read_unlock(); msec = config.sample_period_seconds * 1000; - msec = set_sample_page_period(msec, initial_time); + msec = dirty_stat_wait(msec, initial_time); DirtyStat.start_time = initial_time / 1000; DirtyStat.calc_time = msec / 1000; diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 69d4c5b8655f5fa6ae7bd31a97e46e05cf260d27..594a5c0bb64c296bd294ed37949e095ddf9209cd 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -13,6 +13,8 @@ #ifndef QEMU_MIGRATION_DIRTYRATE_H #define QEMU_MIGRATION_DIRTYRATE_H +#include "sysemu/dirtyrate.h" + /* * Sample 512 pages per GB as default. */ @@ -65,11 +67,6 @@ typedef struct SampleVMStat { uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ } SampleVMStat; -typedef struct VcpuStat { - int nvcpu; /* number of vcpu */ - DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ -} VcpuStat; - /* * Store calculation statistics for each measure. */ diff --git a/qapi/migration.json b/qapi/migration.json index d4ebc5f02848b87bc5f5803820df6f13ad6f0e4b..6b3b86386093a51e89f280645d4dd288da2919d5 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1874,6 +1874,86 @@ ## { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } +## +# @DirtyLimitInfo: +# +# Dirty page rate limit information of a virtual CPU. +# +# @cpu-index: index of a virtual CPU. +# +# @limit-rate: upper limit of dirty page rate (MB/s) for a virtual +# CPU, 0 means unlimited. +# +# @current-rate: current dirty page rate (MB/s) for a virtual CPU. +# +# Since: 7.1 +# +## +{ 'struct': 'DirtyLimitInfo', + 'data': { 'cpu-index': 'int', + 'limit-rate': 'uint64', + 'current-rate': 'uint64' } } + +## +# @set-vcpu-dirty-limit: +# +# Set the upper limit of dirty page rate for virtual CPUs. +# +# Requires KVM with accelerator property "dirty-ring-size" set. +# A virtual CPU's dirty page rate is a measure of its memory load. +# To observe dirty page rates, use @calc-dirty-rate. +# +# @cpu-index: index of a virtual CPU, default is all. +# +# @dirty-rate: upper limit of dirty page rate (MB/s) for virtual CPUs. +# +# Since: 7.1 +# +# Example: +# {"execute": "set-vcpu-dirty-limit"} +# "arguments": { "dirty-rate": 200, +# "cpu-index": 1 } } +# +## +{ 'command': 'set-vcpu-dirty-limit', + 'data': { '*cpu-index': 'int', + 'dirty-rate': 'uint64' } } + +## +# @cancel-vcpu-dirty-limit: +# +# Cancel the upper limit of dirty page rate for virtual CPUs. +# +# Cancel the dirty page limit for the vCPU which has been set with +# set-vcpu-dirty-limit command. Note that this command requires +# support from dirty ring, same as the "set-vcpu-dirty-limit". +# +# @cpu-index: index of a virtual CPU, default is all. +# +# Since: 7.1 +# +# Example: +# {"execute": "cancel-vcpu-dirty-limit"} +# "arguments": { "cpu-index": 1 } } +# +## +{ 'command': 'cancel-vcpu-dirty-limit', + 'data': { '*cpu-index': 'int'} } + +## +# @query-vcpu-dirty-limit: +# +# Returns information about virtual CPU dirty page rate limits, if any. +# +# Since: 7.1 +# +# Example: +# {"execute": "query-vcpu-dirty-limit"} +# +## +{ 'command': 'query-vcpu-dirty-limit', + 'returns': [ 'DirtyLimitInfo' ] } + ## # @snapshot-save: # diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c new file mode 100644 index 0000000000000000000000000000000000000000..8d98cb7f2c1c5735410a42ec4dc820df5f370a34 --- /dev/null +++ b/softmmu/dirtylimit.c @@ -0,0 +1,601 @@ +/* + * Dirty page rate limit implementation code + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(黄勇) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/main-loop.h" +#include "qapi/qapi-commands-migration.h" +#include "qapi/qmp/qdict.h" +#include "qapi/error.h" +#include "sysemu/dirtyrate.h" +#include "sysemu/dirtylimit.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "exec/memory.h" +#include "hw/boards.h" +#include "sysemu/kvm.h" +#include "trace.h" + +/* + * Dirtylimit stop working if dirty page rate error + * value less than DIRTYLIMIT_TOLERANCE_RANGE + */ +#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ +/* + * Plus or minus vcpu sleep time linearly if dirty + * page rate error value percentage over + * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. + * Otherwise, plus or minus a fixed vcpu sleep time. + */ +#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 +/* + * Max vcpu sleep time percentage during a cycle + * composed of dirty ring full and sleep time. + */ +#define DIRTYLIMIT_THROTTLE_PCT_MAX 99 + +struct { + VcpuStat stat; + bool running; + QemuThread thread; +} *vcpu_dirty_rate_stat; + +typedef struct VcpuDirtyLimitState { + int cpu_index; + bool enabled; + /* + * Quota dirty page rate, unit is MB/s + * zero if not enabled. + */ + uint64_t quota; +} VcpuDirtyLimitState; + +struct { + VcpuDirtyLimitState *states; + /* Max cpus number configured by user */ + int max_cpus; + /* Number of vcpu under dirtylimit */ + int limited_nvcpu; +} *dirtylimit_state; + +/* protect dirtylimit_state */ +static QemuMutex dirtylimit_mutex; + +/* dirtylimit thread quit if dirtylimit_quit is true */ +static bool dirtylimit_quit; + +static void vcpu_dirty_rate_stat_collect(void) +{ + VcpuStat stat; + int i = 0; + + /* calculate vcpu dirtyrate */ + vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS, + &stat, + GLOBAL_DIRTY_LIMIT, + false); + + for (i = 0; i < stat.nvcpu; i++) { + vcpu_dirty_rate_stat->stat.rates[i].id = i; + vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = + stat.rates[i].dirty_rate; + } + + free(stat.rates); +} + +static void *vcpu_dirty_rate_stat_thread(void *opaque) +{ + rcu_register_thread(); + + /* start log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); + + while (qatomic_read(&vcpu_dirty_rate_stat->running)) { + vcpu_dirty_rate_stat_collect(); + if (dirtylimit_in_service()) { + dirtylimit_process(); + } + } + + /* stop log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); + + rcu_unregister_thread(); + return NULL; +} + +int64_t vcpu_dirty_rate_get(int cpu_index) +{ + DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; + return qatomic_read_i64(&rates[cpu_index].dirty_rate); +} + +void vcpu_dirty_rate_stat_start(void) +{ + if (qatomic_read(&vcpu_dirty_rate_stat->running)) { + return; + } + + qatomic_set(&vcpu_dirty_rate_stat->running, 1); + qemu_thread_create(&vcpu_dirty_rate_stat->thread, + "dirtyrate-stat", + vcpu_dirty_rate_stat_thread, + NULL, + QEMU_THREAD_JOINABLE); +} + +void vcpu_dirty_rate_stat_stop(void) +{ + qatomic_set(&vcpu_dirty_rate_stat->running, 0); + dirtylimit_state_unlock(); + qemu_mutex_unlock_iothread(); + qemu_thread_join(&vcpu_dirty_rate_stat->thread); + qemu_mutex_lock_iothread(); + dirtylimit_state_lock(); +} + +void vcpu_dirty_rate_stat_initialize(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + + vcpu_dirty_rate_stat = + g_malloc0(sizeof(*vcpu_dirty_rate_stat)); + + vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; + vcpu_dirty_rate_stat->stat.rates = + g_malloc0(sizeof(DirtyRateVcpu) * max_cpus); + + vcpu_dirty_rate_stat->running = false; +} + +void vcpu_dirty_rate_stat_finalize(void) +{ + free(vcpu_dirty_rate_stat->stat.rates); + vcpu_dirty_rate_stat->stat.rates = NULL; + + free(vcpu_dirty_rate_stat); + vcpu_dirty_rate_stat = NULL; +} + +void dirtylimit_state_lock(void) +{ + qemu_mutex_lock(&dirtylimit_mutex); +} + +void dirtylimit_state_unlock(void) +{ + qemu_mutex_unlock(&dirtylimit_mutex); +} + +static void +__attribute__((__constructor__)) dirtylimit_mutex_init(void) +{ + qemu_mutex_init(&dirtylimit_mutex); +} + +static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) +{ + return &dirtylimit_state->states[cpu_index]; +} + +void dirtylimit_state_initialize(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + int i; + + dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); + + dirtylimit_state->states = + g_malloc0(sizeof(VcpuDirtyLimitState) * max_cpus); + + for (i = 0; i < max_cpus; i++) { + dirtylimit_state->states[i].cpu_index = i; + } + + dirtylimit_state->max_cpus = max_cpus; + trace_dirtylimit_state_initialize(max_cpus); +} + +void dirtylimit_state_finalize(void) +{ + free(dirtylimit_state->states); + dirtylimit_state->states = NULL; + + free(dirtylimit_state); + dirtylimit_state = NULL; + + trace_dirtylimit_state_finalize(); +} + +bool dirtylimit_in_service(void) +{ + return !!dirtylimit_state; +} + +bool dirtylimit_vcpu_index_valid(int cpu_index) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + return !(cpu_index < 0 || + cpu_index >= ms->smp.max_cpus); +} + +static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) +{ + static uint64_t max_dirtyrate; + uint32_t dirty_ring_size = kvm_dirty_ring_size(); + uint64_t dirty_ring_size_meory_MB = + dirty_ring_size * TARGET_PAGE_SIZE >> 20; + + if (max_dirtyrate < dirtyrate) { + max_dirtyrate = dirtyrate; + } + + return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate; +} + +static inline bool dirtylimit_done(uint64_t quota, + uint64_t current) +{ + uint64_t min, max; + + min = MIN(quota, current); + max = MAX(quota, current); + + return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; +} + +static inline bool +dirtylimit_need_linear_adjustment(uint64_t quota, + uint64_t current) +{ + uint64_t min, max; + + min = MIN(quota, current); + max = MAX(quota, current); + + return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; +} + +static void dirtylimit_set_throttle(CPUState *cpu, + uint64_t quota, + uint64_t current) +{ + int64_t ring_full_time_us = 0; + uint64_t sleep_pct = 0; + uint64_t throttle_us = 0; + + if (current == 0) { + cpu->throttle_us_per_full = 0; + return; + } + + ring_full_time_us = dirtylimit_dirty_ring_full_time(current); + + if (dirtylimit_need_linear_adjustment(quota, current)) { + if (quota < current) { + sleep_pct = (current - quota) * 100 / current; + throttle_us = + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full += throttle_us; + } else { + sleep_pct = (quota - current) * 100 / quota; + throttle_us = + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full -= throttle_us; + } + + trace_dirtylimit_throttle_pct(cpu->cpu_index, + sleep_pct, + throttle_us); + } else { + if (quota < current) { + cpu->throttle_us_per_full += ring_full_time_us / 10; + } else { + cpu->throttle_us_per_full -= ring_full_time_us / 10; + } + } + + /* + * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), + * current dirty page rate may never reach the quota, we should stop + * increasing sleep time? + */ + cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, + ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); + + cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); +} + +static void dirtylimit_adjust_throttle(CPUState *cpu) +{ + uint64_t quota = 0; + uint64_t current = 0; + int cpu_index = cpu->cpu_index; + + quota = dirtylimit_vcpu_get_state(cpu_index)->quota; + current = vcpu_dirty_rate_get(cpu_index); + + if (!dirtylimit_done(quota, current)) { + dirtylimit_set_throttle(cpu, quota, current); + } + + return; +} + +void dirtylimit_process(void) +{ + CPUState *cpu; + + if (!qatomic_read(&dirtylimit_quit)) { + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_state_unlock(); + return; + } + + CPU_FOREACH(cpu) { + if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { + continue; + } + dirtylimit_adjust_throttle(cpu); + } + dirtylimit_state_unlock(); + } +} + +void dirtylimit_change(bool start) +{ + if (start) { + qatomic_set(&dirtylimit_quit, 0); + } else { + qatomic_set(&dirtylimit_quit, 1); + } +} + +void dirtylimit_set_vcpu(int cpu_index, + uint64_t quota, + bool enable) +{ + trace_dirtylimit_set_vcpu(cpu_index, quota); + + if (enable) { + dirtylimit_state->states[cpu_index].quota = quota; + if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { + dirtylimit_state->limited_nvcpu++; + } + } else { + dirtylimit_state->states[cpu_index].quota = 0; + if (dirtylimit_state->states[cpu_index].enabled) { + dirtylimit_state->limited_nvcpu--; + } + } + + dirtylimit_state->states[cpu_index].enabled = enable; +} + +void dirtylimit_set_all(uint64_t quota, + bool enable) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + int i; + + for (i = 0; i < max_cpus; i++) { + dirtylimit_set_vcpu(i, quota, enable); + } +} + +void dirtylimit_vcpu_execute(CPUState *cpu) +{ + if (dirtylimit_in_service() && + dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && + cpu->throttle_us_per_full) { + trace_dirtylimit_vcpu_execute(cpu->cpu_index, + cpu->throttle_us_per_full); + usleep(cpu->throttle_us_per_full); + } +} + +static void dirtylimit_init(void) +{ + dirtylimit_state_initialize(); + dirtylimit_change(true); + vcpu_dirty_rate_stat_initialize(); + vcpu_dirty_rate_stat_start(); +} + +static void dirtylimit_cleanup(void) +{ + vcpu_dirty_rate_stat_stop(); + vcpu_dirty_rate_stat_finalize(); + dirtylimit_change(false); + dirtylimit_state_finalize(); +} + +void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, + int64_t cpu_index, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + return; + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (!dirtylimit_in_service()) { + return; + } + + dirtylimit_state_lock(); + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, 0, false); + } else { + dirtylimit_set_all(0, false); + } + + if (!dirtylimit_state->limited_nvcpu) { + dirtylimit_cleanup(); + } + + dirtylimit_state_unlock(); +} + +void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); + Error *err = NULL; + + qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " + "dirty limit for virtual CPU]\n"); +} + +void qmp_set_vcpu_dirty_limit(bool has_cpu_index, + int64_t cpu_index, + uint64_t dirty_rate, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + error_setg(errp, "dirty page limit feature requires KVM with" + " accelerator property 'dirty-ring-size' set'"); + return; + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (!dirty_rate) { + qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); + return; + } + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_init(); + } + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, dirty_rate, true); + } else { + dirtylimit_set_all(dirty_rate, true); + } + + dirtylimit_state_unlock(); +} + +void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); + int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); + Error *err = NULL; + + qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " + "dirty limit for virtual CPU]\n"); +} + +static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) +{ + DirtyLimitInfo *info = NULL; + + info = g_malloc0(sizeof(*info)); + info->cpu_index = cpu_index; + info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; + info->current_rate = vcpu_dirty_rate_get(cpu_index); + + return info; +} + +static struct DirtyLimitInfoList *dirtylimit_query_all(void) +{ + int i, index; + DirtyLimitInfo *info = NULL; + DirtyLimitInfoList *head = NULL, **tail = &head; + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_state_unlock(); + return NULL; + } + + for (i = 0; i < dirtylimit_state->max_cpus; i++) { + index = dirtylimit_state->states[i].cpu_index; + if (dirtylimit_vcpu_get_state(index)->enabled) { + info = dirtylimit_query_vcpu(index); + QAPI_LIST_APPEND(tail, info); + } + } + + dirtylimit_state_unlock(); + + return head; +} + +struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) +{ + if (!dirtylimit_in_service()) { + return NULL; + } + + return dirtylimit_query_all(); +} + +void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + DirtyLimitInfoList *limit, *head, *info = NULL; + Error *err = NULL; + + if (!dirtylimit_in_service()) { + monitor_printf(mon, "Dirty page limit not enabled!\n"); + return; + } + + info = qmp_query_vcpu_dirty_limit(&err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + head = info; + for (limit = head; limit != NULL; limit = limit->next) { + monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," + " current rate %"PRIi64 " (MB/s)\n", + limit->value->cpu_index, + limit->value->limit_rate, + limit->value->current_rate); + } + + g_free(info); +} diff --git a/softmmu/meson.build b/softmmu/meson.build index d8e03018abf5e711079516b0fb48d5c8fa9f5cb1..95029a5db21ae0cc209356f1a76ed32eb7bb558d 100644 --- a/softmmu/meson.build +++ b/softmmu/meson.build @@ -15,6 +15,7 @@ specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files( 'vl.c', 'cpu-timers.c', 'runstate-action.c', + 'dirtylimit.c', )]) specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files( diff --git a/softmmu/trace-events b/softmmu/trace-events index 9c88887b3c643e10cef84c312296a6a92b457a12..22606dc27b3107f8459d2268cf83b9436fa786d8 100644 --- a/softmmu/trace-events +++ b/softmmu/trace-events @@ -31,3 +31,10 @@ runstate_set(int current_state, const char *current_state_str, int new_state, co system_wakeup_request(int reason) "reason=%d" qemu_system_shutdown_request(int reason) "reason=%d" qemu_system_powerdown_request(void) "" + +#dirtylimit.c +dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d" +dirtylimit_state_finalize(void) +dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" +dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64 +dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us" diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index 4ee26014b78322df9a04ad7be2c368726b9f19d8..1e594f9cb1c65c0a5573ed1ae98e66f68db79851 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -75,6 +75,28 @@ QDict *wait_command(QTestState *who, const char *command, ...) return ret; } +/* + * Execute the qmp command only + */ +QDict *qmp_command(QTestState *who, const char *command, ...) +{ + va_list ap; + QDict *resp, *ret; + + va_start(ap, command); + resp = qtest_vqmp(who, command, ap); + va_end(ap); + + g_assert(!qdict_haskey(resp, "error")); + g_assert(qdict_haskey(resp, "return")); + + ret = qdict_get_qdict(resp, "return"); + qobject_ref(ret); + qobject_unref(resp); + + return ret; +} + /* * Send QMP command "migrate". * Arguments are built from @fmt... (formatted like diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index d63bba9630f9572fe4f33a4f606474d7ee63224c..9bc809fb7505dd3322b38949828339c22a2d57b1 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -22,6 +22,8 @@ QDict *wait_command_fd(QTestState *who, int fd, const char *command, ...); GCC_FMT_ATTR(2, 3) QDict *wait_command(QTestState *who, const char *command, ...); +QDict *qmp_command(QTestState *who, const char *command, ...); + GCC_FMT_ATTR(3, 4) void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...); diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 7b42f6fd909bfbcd3ae8c6051bcaaff9016bed53..8fad247f6cd7d2f1259e692f9753d35591a48db7 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -23,6 +23,7 @@ #include "qapi/qapi-visit-sockets.h" #include "qapi/qobject-input-visitor.h" #include "qapi/qobject-output-visitor.h" +#include "qapi/qmp/qlist.h" #include "migration-helpers.h" #include "tests/migration/migration-test.h" @@ -42,6 +43,12 @@ static bool uffd_feature_thread_id; /* A downtime where the test really should converge */ #define CONVERGE_DOWNTIME 1000 +/* + * Dirtylimit stop working if dirty page rate error + * value less than DIRTYLIMIT_TOLERANCE_RANGE + */ +#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ + #if defined(__linux__) #include #include @@ -1394,6 +1401,253 @@ static void test_multifd_tcp_cancel(void) test_migrate_end(from, to2, true); } +static void calc_dirty_rate(QTestState *who, uint64_t calc_time) +{ + qobject_unref(qmp_command(who, + "{ 'execute': 'calc-dirty-rate'," + "'arguments': { " + "'calc-time': %ld," + "'mode': 'dirty-ring' }}", + calc_time)); +} + +static QDict *query_dirty_rate(QTestState *who) +{ + return qmp_command(who, "{ 'execute': 'query-dirty-rate' }"); +} + +static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate) +{ + qobject_unref(qmp_command(who, + "{ 'execute': 'set-vcpu-dirty-limit'," + "'arguments': { " + "'dirty-rate': %ld } }", + dirtyrate)); +} + +static void cancel_vcpu_dirty_limit(QTestState *who) +{ + qobject_unref(qmp_command(who, + "{ 'execute': 'cancel-vcpu-dirty-limit' }")); +} + +static QDict *query_vcpu_dirty_limit(QTestState *who) +{ + QDict *rsp; + + rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }"); + g_assert(!qdict_haskey(rsp, "error")); + g_assert(qdict_haskey(rsp, "return")); + + return rsp; +} + +static bool calc_dirtyrate_ready(QTestState *who) +{ + QDict *rsp_return; + gchar *status; + + rsp_return = query_dirty_rate(who); + g_assert(rsp_return); + + status = g_strdup(qdict_get_str(rsp_return, "status")); + g_assert(status); + + return g_strcmp0(status, "measuring"); +} + +static void wait_for_calc_dirtyrate_complete(QTestState *who, + int64_t time_s) +{ + int max_try_count = 10000; + usleep(time_s * 1000000); + + while (!calc_dirtyrate_ready(who) && max_try_count--) { + usleep(1000); + } + + /* + * Set the timeout with 10 s(max_try_count * 1000us), + * if dirtyrate measurement not complete, fail test. + */ + g_assert_cmpint(max_try_count, !=, 0); +} + +static int64_t get_dirty_rate(QTestState *who) +{ + QDict *rsp_return; + gchar *status; + QList *rates; + const QListEntry *entry; + QDict *rate; + int64_t dirtyrate; + + rsp_return = query_dirty_rate(who); + g_assert(rsp_return); + + status = g_strdup(qdict_get_str(rsp_return, "status")); + g_assert(status); + g_assert_cmpstr(status, ==, "measured"); + + rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate"); + g_assert(rates && !qlist_empty(rates)); + + entry = qlist_first(rates); + g_assert(entry); + + rate = qobject_to(QDict, qlist_entry_obj(entry)); + g_assert(rate); + + dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1); + + qobject_unref(rsp_return); + return dirtyrate; +} + +static int64_t get_limit_rate(QTestState *who) +{ + QDict *rsp_return; + QList *rates; + const QListEntry *entry; + QDict *rate; + int64_t dirtyrate; + + rsp_return = query_vcpu_dirty_limit(who); + g_assert(rsp_return); + + rates = qdict_get_qlist(rsp_return, "return"); + g_assert(rates && !qlist_empty(rates)); + + entry = qlist_first(rates); + g_assert(entry); + + rate = qobject_to(QDict, qlist_entry_obj(entry)); + g_assert(rate); + + dirtyrate = qdict_get_try_int(rate, "limit-rate", -1); + + qobject_unref(rsp_return); + return dirtyrate; +} + +static QTestState *dirtylimit_start_vm(void) +{ + QTestState *vm = NULL; + g_autofree gchar *cmd = NULL; + const char *arch = qtest_get_arch(); + g_autofree char *bootpath = NULL; + + assert((strcmp(arch, "x86_64") == 0)); + bootpath = g_strdup_printf("%s/bootsect", tmpfs); + assert(sizeof(x86_bootsect) == 512); + init_bootfile(bootpath, x86_bootsect, sizeof(x86_bootsect)); + + cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " + "-name dirtylimit-test,debug-threads=on " + "-m 150M -smp 1 " + "-serial file:%s/vm_serial " + "-drive file=%s,format=raw ", + tmpfs, bootpath); + + vm = qtest_init(cmd); + return vm; +} + +static void dirtylimit_stop_vm(QTestState *vm) +{ + qtest_quit(vm); + cleanup("bootsect"); + cleanup("vm_serial"); +} + +static void test_vcpu_dirty_limit(void) +{ + QTestState *vm; + int64_t origin_rate; + int64_t quota_rate; + int64_t rate ; + int max_try_count = 20; + int hit = 0; + + /* Start vm for vcpu dirtylimit test */ + vm = dirtylimit_start_vm(); + + /* Wait for the first serial output from the vm*/ + wait_for_serial("vm_serial"); + + /* Do dirtyrate measurement with calc time equals 1s */ + calc_dirty_rate(vm, 1); + + /* Sleep calc time and wait for calc dirtyrate complete */ + wait_for_calc_dirtyrate_complete(vm, 1); + + /* Query original dirty page rate */ + origin_rate = get_dirty_rate(vm); + + /* VM booted from bootsect should dirty memory steadily */ + assert(origin_rate != 0); + + /* Setup quota dirty page rate at half of origin */ + quota_rate = origin_rate / 2; + + /* Set dirtylimit */ + dirtylimit_set_all(vm, quota_rate); + + /* + * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit + * works literally + */ + g_assert_cmpint(quota_rate, ==, get_limit_rate(vm)); + + /* Sleep a bit to check if it take effect */ + usleep(2000000); + + /* + * Check if dirtylimit take effect realistically, set the + * timeout with 20 s(max_try_count * 1s), if dirtylimit + * doesn't take effect, fail test. + */ + while (--max_try_count) { + calc_dirty_rate(vm, 1); + wait_for_calc_dirtyrate_complete(vm, 1); + rate = get_dirty_rate(vm); + + /* + * Assume hitting if current rate is less + * than quota rate (within accepting error) + */ + if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { + hit = 1; + break; + } + } + + g_assert_cmpint(hit, ==, 1); + + hit = 0; + max_try_count = 20; + + /* Check if dirtylimit cancellation take effect */ + cancel_vcpu_dirty_limit(vm); + while (--max_try_count) { + calc_dirty_rate(vm, 1); + wait_for_calc_dirtyrate_complete(vm, 1); + rate = get_dirty_rate(vm); + + /* + * Assume dirtylimit be canceled if current rate is + * greater than quota rate (within accepting error) + */ + if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { + hit = 1; + break; + } + } + + g_assert_cmpint(hit, ==, 1); + dirtylimit_stop_vm(vm); +} + static bool kvm_dirty_ring_supported(void) { #if defined(__linux__) && defined(HOST_X86_64) @@ -1483,6 +1737,8 @@ int main(int argc, char **argv) if (kvm_dirty_ring_supported()) { qtest_add_func("/migration/dirty_ring", test_precopy_unix_dirty_ring); + qtest_add_func("/migration/vcpu_dirty_limit", + test_vcpu_dirty_limit); } ret = g_test_run(); diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c index 7f103ea3fd2ac5c9fd0b6ebb91ce4b66bac31a41..4b216a0435db47ee2e5d46efff289826b5435d22 100644 --- a/tests/qtest/qmp-cmd-test.c +++ b/tests/qtest/qmp-cmd-test.c @@ -110,6 +110,8 @@ static bool query_is_ignored(const char *cmd) "query-sev-capabilities", "query-sgx", "query-sgx-capabilities", + /* Success depends on enabling dirty page rate limit */ + "query-vcpu-dirty-limit", NULL }; int i;