diff --git a/Makefile.target b/Makefile.target index 933b27453a1dbb9273e0a9ff2d586c84ddf38fae..5ea840964c0b0e1aab2ef2473443e964b2d0b499 100644 --- a/Makefile.target +++ b/Makefile.target @@ -161,6 +161,7 @@ obj-y += qapi/ obj-y += memory.o obj-y += memory_mapping.o obj-y += migration/ram.o +obj-y += migration/dirtyrate.o LIBS := $(libs_softmmu) $(LIBS) # Hardware support diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c new file mode 100644 index 0000000000000000000000000000000000000000..f65ea85620a6baa02b91d2162fd9cc3a09fecb74 --- /dev/null +++ b/migration/dirtyrate.c @@ -0,0 +1,428 @@ +/* + * Dirtyrate implement code + * + * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. + * + * Authors: + * Chuan Zheng + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include +#include "qapi/error.h" +#include "cpu.h" +#include "exec/memory.h" +#include "exec/ram_addr.h" +#include "exec/target_page.h" +#include "qemu/rcu_queue.h" +#include "qapi/qapi-commands-migration.h" +#include "ram.h" +#include "trace.h" +#include "dirtyrate.h" + +static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; +static struct DirtyRateStat DirtyStat; + +static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) +{ + int64_t current_time; + + current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if ((current_time - initial_time) >= msec) { + msec = current_time - initial_time; + } else { + g_usleep((msec + initial_time - current_time) * 1000); + } + + return msec; +} + +static bool is_sample_period_valid(int64_t sec) +{ + if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || + sec > MAX_FETCH_DIRTYRATE_TIME_SEC) { + return false; + } + + return true; +} + +static int dirtyrate_set_state(int *state, int old_state, int new_state) +{ + assert(new_state < DIRTY_RATE_STATUS__MAX); + trace_dirtyrate_set_state(DirtyRateStatus_str(new_state)); + if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { + return 0; + } else { + return -1; + } +} + +static struct DirtyRateInfo *query_dirty_rate_info(void) +{ + int64_t dirty_rate = DirtyStat.dirty_rate; + struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); + + if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { + info->has_dirty_rate = true; + info->dirty_rate = dirty_rate; + } + + info->status = CalculatingState; + info->start_time = DirtyStat.start_time; + info->calc_time = DirtyStat.calc_time; + + trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState)); + + return info; +} + +static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time) +{ + DirtyStat.total_dirty_samples = 0; + DirtyStat.total_sample_count = 0; + DirtyStat.total_block_mem_MB = 0; + DirtyStat.dirty_rate = -1; + DirtyStat.start_time = start_time; + DirtyStat.calc_time = calc_time; +} + +static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) +{ + DirtyStat.total_dirty_samples += info->sample_dirty_count; + DirtyStat.total_sample_count += info->sample_pages_count; + /* size of total pages in MB */ + DirtyStat.total_block_mem_MB += (info->ramblock_pages * + TARGET_PAGE_SIZE) >> 20; +} + +static void update_dirtyrate(uint64_t msec) +{ + uint64_t dirtyrate; + uint64_t total_dirty_samples = DirtyStat.total_dirty_samples; + uint64_t total_sample_count = DirtyStat.total_sample_count; + uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB; + + dirtyrate = total_dirty_samples * total_block_mem_MB * + 1000 / (total_sample_count * msec); + + DirtyStat.dirty_rate = dirtyrate; +} + +/* + * get hash result for the sampled memory with length of TARGET_PAGE_SIZE + * in ramblock, which starts from ramblock base address. + */ +static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, + uint64_t vfn) +{ + uint32_t crc; + + crc = crc32(0, (info->ramblock_addr + + vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); + + trace_get_ramblock_vfn_hash(info->idstr, vfn, crc); + return crc; +} + +static bool save_ramblock_hash(struct RamblockDirtyInfo *info) +{ + unsigned int sample_pages_count; + int i; + GRand *rand; + + sample_pages_count = info->sample_pages_count; + + /* ramblock size less than one page, return success to skip this ramblock */ + if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) { + return true; + } + + info->hash_result = g_try_malloc0_n(sample_pages_count, + sizeof(uint32_t)); + if (!info->hash_result) { + return false; + } + + info->sample_page_vfn = g_try_malloc0_n(sample_pages_count, + sizeof(uint64_t)); + if (!info->sample_page_vfn) { + g_free(info->hash_result); + return false; + } + + rand = g_rand_new(); + for (i = 0; i < sample_pages_count; i++) { + info->sample_page_vfn[i] = g_rand_int_range(rand, 0, + info->ramblock_pages - 1); + info->hash_result[i] = get_ramblock_vfn_hash(info, + info->sample_page_vfn[i]); + } + g_rand_free(rand); + + return true; +} + +static void get_ramblock_dirty_info(RAMBlock *block, + struct RamblockDirtyInfo *info, + struct DirtyRateConfig *config) +{ + uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes; + + /* Right shift 30 bits to calc ramblock size in GB */ + info->sample_pages_count = (qemu_ram_get_used_length(block) * + sample_pages_per_gigabytes) >> 30; + /* Right shift TARGET_PAGE_BITS to calc page count */ + info->ramblock_pages = qemu_ram_get_used_length(block) >> + TARGET_PAGE_BITS; + info->ramblock_addr = qemu_ram_get_host_addr(block); + strcpy(info->idstr, qemu_ram_get_idstr(block)); +} + +static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count) +{ + int i; + + if (!infos) { + return; + } + + for (i = 0; i < count; i++) { + g_free(infos[i].sample_page_vfn); + g_free(infos[i].hash_result); + } + g_free(infos); +} + +static bool skip_sample_ramblock(RAMBlock *block) +{ + /* + * Sample only blocks larger than MIN_RAMBLOCK_SIZE. + */ + if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { + trace_skip_sample_ramblock(block->idstr, + qemu_ram_get_used_length(block)); + return true; + } + + return false; +} + +static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, + struct DirtyRateConfig config, + int *block_count) +{ + struct RamblockDirtyInfo *info = NULL; + struct RamblockDirtyInfo *dinfo = NULL; + RAMBlock *block = NULL; + int total_count = 0; + int index = 0; + bool ret = false; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } + total_count++; + } + + dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo)); + if (dinfo == NULL) { + goto out; + } + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } + if (index >= total_count) { + break; + } + info = &dinfo[index]; + get_ramblock_dirty_info(block, info, &config); + if (!save_ramblock_hash(info)) { + goto out; + } + index++; + } + ret = true; + +out: + *block_count = index; + *block_dinfo = dinfo; + return ret; +} + +static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) +{ + uint32_t crc; + int i; + + for (i = 0; i < info->sample_pages_count; i++) { + crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); + if (crc != info->hash_result[i]) { + trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]); + info->sample_dirty_count++; + } + } +} + +static struct RamblockDirtyInfo * +find_block_matched(RAMBlock *block, int count, + struct RamblockDirtyInfo *infos) +{ + int i; + struct RamblockDirtyInfo *matched; + + for (i = 0; i < count; i++) { + if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) { + break; + } + } + + if (i == count) { + return NULL; + } + + if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || + infos[i].ramblock_pages != + (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { + trace_find_page_matched(block->idstr); + return NULL; + } + + matched = &infos[i]; + + return matched; +} + +static bool compare_page_hash_info(struct RamblockDirtyInfo *info, + int block_count) +{ + struct RamblockDirtyInfo *block_dinfo = NULL; + RAMBlock *block = NULL; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } + block_dinfo = find_block_matched(block, block_count, info); + if (block_dinfo == NULL) { + continue; + } + calc_page_dirty_rate(block_dinfo); + update_dirtyrate_stat(block_dinfo); + } + + if (DirtyStat.total_sample_count == 0) { + return false; + } + + return true; +} + +static void calculate_dirtyrate(struct DirtyRateConfig config) +{ + struct RamblockDirtyInfo *block_dinfo = NULL; + int block_count = 0; + int64_t msec = 0; + int64_t initial_time; + + rcu_register_thread(); + rcu_read_lock(); + initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { + goto out; + } + rcu_read_unlock(); + + msec = config.sample_period_seconds * 1000; + msec = set_sample_page_period(msec, initial_time); + DirtyStat.start_time = initial_time / 1000; + DirtyStat.calc_time = msec / 1000; + + rcu_read_lock(); + if (!compare_page_hash_info(block_dinfo, block_count)) { + goto out; + } + + update_dirtyrate(msec); + +out: + rcu_read_unlock(); + free_ramblock_dirty_info(block_dinfo, block_count); + rcu_unregister_thread(); +} + +void *get_dirtyrate_thread(void *arg) +{ + struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; + int ret; + int64_t start_time; + int64_t calc_time; + + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, + DIRTY_RATE_STATUS_MEASURING); + if (ret == -1) { + error_report("change dirtyrate state failed."); + return NULL; + } + + start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; + calc_time = config.sample_period_seconds; + init_dirtyrate_stat(start_time, calc_time); + + calculate_dirtyrate(config); + + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, + DIRTY_RATE_STATUS_MEASURED); + if (ret == -1) { + error_report("change dirtyrate state failed."); + } + return NULL; +} + +void qmp_calc_dirty_rate(int64_t calc_time, Error **errp) +{ + static struct DirtyRateConfig config; + QemuThread thread; + int ret; + + /* + * If the dirty rate is already being measured, don't attempt to start. + */ + if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) { + error_setg(errp, "the dirty rate is already being measured."); + return; + } + + if (!is_sample_period_valid(calc_time)) { + error_setg(errp, "calc-time is out of range[%d, %d].", + MIN_FETCH_DIRTYRATE_TIME_SEC, + MAX_FETCH_DIRTYRATE_TIME_SEC); + return; + } + + /* + * Init calculation state as unstarted. + */ + ret = dirtyrate_set_state(&CalculatingState, CalculatingState, + DIRTY_RATE_STATUS_UNSTARTED); + if (ret == -1) { + error_setg(errp, "init dirty rate calculation state failed."); + return; + } + + config.sample_period_seconds = calc_time; + config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES; + qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread, + (void *)&config, QEMU_THREAD_DETACHED); +} + +struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp) +{ + return query_dirty_rate_info(); +} diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h new file mode 100644 index 0000000000000000000000000000000000000000..6ec429534d75fdf74f8b5947fd10bb88647d3caf --- /dev/null +++ b/migration/dirtyrate.h @@ -0,0 +1,69 @@ +/* + * Dirtyrate common functions + * + * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Chuan Zheng + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_MIGRATION_DIRTYRATE_H +#define QEMU_MIGRATION_DIRTYRATE_H + +/* + * Sample 512 pages per GB as default. + * TODO: Make it configurable. + */ +#define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 + +/* + * Record ramblock idstr + */ +#define RAMBLOCK_INFO_MAX_LEN 256 + +/* + * Minimum RAMBlock size to sample, in megabytes. + */ +#define MIN_RAMBLOCK_SIZE 128 + +/* + * Take 1s as minimum time for calculation duration + */ +#define MIN_FETCH_DIRTYRATE_TIME_SEC 1 +#define MAX_FETCH_DIRTYRATE_TIME_SEC 60 + +struct DirtyRateConfig { + uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ + int64_t sample_period_seconds; /* time duration between two sampling */ +}; + +/* + * Store dirtypage info for each ramblock. + */ +struct RamblockDirtyInfo { + char idstr[RAMBLOCK_INFO_MAX_LEN]; /* idstr for each ramblock */ + uint8_t *ramblock_addr; /* base address of ramblock we measure */ + uint64_t ramblock_pages; /* ramblock size in TARGET_PAGE_SIZE */ + uint64_t *sample_page_vfn; /* relative offset address for sampled page */ + uint64_t sample_pages_count; /* count of sampled pages */ + uint64_t sample_dirty_count; /* count of dirty pages we measure */ + uint32_t *hash_result; /* array of hash result for sampled pages */ +}; + +/* + * Store calculation statistics for each measure. + */ +struct DirtyRateStat { + uint64_t total_dirty_samples; /* total dirty sampled page */ + uint64_t total_sample_count; /* total sampled pages */ + uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ + int64_t dirty_rate; /* dirty rate in MB/s */ + int64_t start_time; /* calculation start time in units of second */ + int64_t calc_time; /* time duration of two sampling in units of second */ +}; + +void *get_dirtyrate_thread(void *arg); +#endif diff --git a/migration/ram.c b/migration/ram.c index 848059d9fbbc9781054d6c443371f1d582cbe122..1a33c7b3e20da91c45a46c42a26317bbcee3c5b5 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -159,21 +159,12 @@ out: return ret; } -static bool ramblock_is_ignored(RAMBlock *block) +bool ramblock_is_ignored(RAMBlock *block) { return !qemu_ram_is_migratable(block) || (migrate_ignore_shared() && qemu_ram_is_shared(block)); } -/* Should be holding either ram_list.mutex, or the RCU lock. */ -#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ - INTERNAL_RAMBLOCK_FOREACH(block) \ - if (ramblock_is_ignored(block)) {} else - -#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ - INTERNAL_RAMBLOCK_FOREACH(block) \ - if (!qemu_ram_is_migratable(block)) {} else - #undef RAMBLOCK_FOREACH int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) diff --git a/migration/ram.h b/migration/ram.h index a788ff0e8e97726d9b6d56b23c5fe293bb4d57a0..565ec86b1f505c9cd0f999b16432e63deb8274b4 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -37,6 +37,16 @@ extern MigrationStats ram_counters; extern XBZRLECacheStats xbzrle_counters; extern CompressionStats compression_counters; +bool ramblock_is_ignored(RAMBlock *block); +/* Should be holding either ram_list.mutex, or the RCU lock. */ +#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ + INTERNAL_RAMBLOCK_FOREACH(block) \ + if (ramblock_is_ignored(block)) {} else + +#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ + INTERNAL_RAMBLOCK_FOREACH(block) \ + if (!qemu_ram_is_migratable(block)) {} else + int xbzrle_cache_resize(int64_t new_size, Error **errp); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); diff --git a/migration/trace-events b/migration/trace-events index d8e54c367a4b978baead3ea6d1836355d540fdbb..69620c43c2eba9414eecf5c9775e82fb12e29936 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -296,3 +296,11 @@ dirty_bitmap_load_bits_zeroes(void) "" dirty_bitmap_load_header(uint32_t flags) "flags 0x%x" dirty_bitmap_load_enter(void) "" dirty_bitmap_load_success(void) "" + +# dirtyrate.c +dirtyrate_set_state(const char *new_state) "new state %s" +query_dirty_rate_info(const char *new_state) "current state %s" +get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock name: %s, vfn: %"PRIu64 ", crc: %" PRIu32 +calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32 +skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64 +find_page_matched(const char *idstr) "ramblock %s addr or size changed" diff --git a/qapi/migration.json b/qapi/migration.json index 9cfbaf8c6ca61bc874f351ee6d7402b6d72588e5..6844ddfab3a238b6d0aee08e8e738904473b9c78 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1445,3 +1445,68 @@ # Since: 3.0 ## { 'command': 'migrate-pause', 'allow-oob': true } + +## +# @DirtyRateStatus: +# +# An enumeration of dirtyrate status. +# +# @unstarted: the dirtyrate thread has not been started. +# +# @measuring: the dirtyrate thread is measuring. +# +# @measured: the dirtyrate thread has measured and results are available. +# +# Since: 5.2 +# +## +{ 'enum': 'DirtyRateStatus', + 'data': [ 'unstarted', 'measuring', 'measured'] } + +## +# @DirtyRateInfo: +# +# Information about current dirty page rate of vm. +# +# @dirty-rate: an estimate of the dirty page rate of the VM in units of +# MB/s, present only when estimating the rate has completed. +# +# @status: status containing dirtyrate query status includes +# 'unstarted' or 'measuring' or 'measured' +# +# @start-time: start time in units of second for calculation +# +# @calc-time: time in units of second for sample dirty pages +# +# Since: 5.2 +# +## +{ 'struct': 'DirtyRateInfo', + 'data': {'*dirty-rate': 'int64', + 'status': 'DirtyRateStatus', + 'start-time': 'int64', + 'calc-time': 'int64'} } + +## +# @calc-dirty-rate: +# +# start calculating dirty page rate for vm +# +# @calc-time: time in units of second for sample dirty pages +# +# Since: 5.2 +# +# Example: +# {"command": "calc-dirty-rate", "data": {"calc-time": 1} } +# +## +{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} } + +## +# @query-dirty-rate: +# +# query dirty page rate in units of MB/s for vm +# +# Since: 5.2 +## +{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }