From 8577a6b59eac5566e3a2b39fa554ca93c550ffc9 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:56 +0800 Subject: [PATCH 01/31] migration/dirtyrate: setup up query-dirtyrate framwork Add get_dirtyrate_thread() functions to setup query-dirtyrate framework. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-2-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- Makefile.target | 1 + migration/dirtyrate.c | 38 ++++++++++++++++++++++++++++++++++++++ migration/dirtyrate.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 migration/dirtyrate.c create mode 100644 migration/dirtyrate.h diff --git a/Makefile.target b/Makefile.target index 933b27453a..5ea840964c 100644 --- a/Makefile.target +++ b/Makefile.target @@ -161,6 +161,7 @@ obj-y += qapi/ obj-y += memory.o obj-y += memory_mapping.o obj-y += migration/ram.o +obj-y += migration/dirtyrate.o LIBS := $(libs_softmmu) $(LIBS) # Hardware support diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c new file mode 100644 index 0000000000..29ef663acb --- /dev/null +++ b/migration/dirtyrate.c @@ -0,0 +1,38 @@ +/* + * Dirtyrate implement code + * + * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. + * + * Authors: + * Chuan Zheng + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "cpu.h" +#include "qemu/config-file.h" +#include "exec/memory.h" +#include "exec/ram_addr.h" +#include "exec/target_page.h" +#include "qemu/rcu_queue.h" +#include "qapi/qapi-commands-migration.h" +#include "migration.h" +#include "dirtyrate.h" + +static void calculate_dirtyrate(struct DirtyRateConfig config) +{ + /* todo */ + return; +} + +void *get_dirtyrate_thread(void *arg) +{ + struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; + + calculate_dirtyrate(config); + + return NULL; +} diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h new file mode 100644 index 0000000000..84ab9409ac --- /dev/null +++ b/migration/dirtyrate.h @@ -0,0 +1,28 @@ +/* + * Dirtyrate common functions + * + * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Chuan Zheng + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_MIGRATION_DIRTYRATE_H +#define QEMU_MIGRATION_DIRTYRATE_H + +/* + * Sample 512 pages per GB as default. + * TODO: Make it configurable. + */ +#define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 + +struct DirtyRateConfig { + uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ + int64_t sample_period_seconds; /* time duration between two sampling */ +}; + +void *get_dirtyrate_thread(void *arg); +#endif -- Gitee From 8b2c7a2ce631ccd908eb9e9a4e46565024e60a1d Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:57 +0800 Subject: [PATCH 02/31] migration/dirtyrate: add DirtyRateStatus to denote calculation status add DirtyRateStatus to denote calculating status. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-3-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert atomic name fixup --- migration/dirtyrate.c | 26 ++++++++++++++++++++++++++ qapi/migration.json | 17 +++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 29ef663acb..44a60bf10d 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -22,6 +22,19 @@ #include "migration.h" #include "dirtyrate.h" +static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; + +static int dirtyrate_set_state(int *state, int old_state, int new_state) +{ + assert(new_state < DIRTY_RATE_STATUS__MAX); + if (atomic_cmpxchg(state, old_state, new_state) == old_state) { + return 0; + } else { + return -1; + } +} + + static void calculate_dirtyrate(struct DirtyRateConfig config) { /* todo */ @@ -31,8 +44,21 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) void *get_dirtyrate_thread(void *arg) { struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; + int ret; + + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, + DIRTY_RATE_STATUS_MEASURING); + if (ret == -1) { + error_report("change dirtyrate state failed."); + return NULL; + } calculate_dirtyrate(config); + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, + DIRTY_RATE_STATUS_MEASURED); + if (ret == -1) { + error_report("change dirtyrate state failed."); + } return NULL; } diff --git a/qapi/migration.json b/qapi/migration.json index 9cfbaf8c6c..fdddde0af7 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1445,3 +1445,20 @@ # Since: 3.0 ## { 'command': 'migrate-pause', 'allow-oob': true } + +## +# @DirtyRateStatus: +# +# An enumeration of dirtyrate status. +# +# @unstarted: the dirtyrate thread has not been started. +# +# @measuring: the dirtyrate thread is measuring. +# +# @measured: the dirtyrate thread has measured and results are available. +# +# Since: 5.2 +# +## +{ 'enum': 'DirtyRateStatus', + 'data': [ 'unstarted', 'measuring', 'measured'] } -- Gitee From 71772aab12eb70408c9aa94efd49b19b14ec7852 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:58 +0800 Subject: [PATCH 03/31] migration/dirtyrate: Add RamblockDirtyInfo to store sampled page info Add RamblockDirtyInfo to store sampled page info of each ramblock. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-4-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 84ab9409ac..8707df852d 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -19,10 +19,28 @@ */ #define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 +/* + * Record ramblock idstr + */ +#define RAMBLOCK_INFO_MAX_LEN 256 + struct DirtyRateConfig { uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ int64_t sample_period_seconds; /* time duration between two sampling */ }; +/* + * Store dirtypage info for each ramblock. + */ +struct RamblockDirtyInfo { + char idstr[RAMBLOCK_INFO_MAX_LEN]; /* idstr for each ramblock */ + uint8_t *ramblock_addr; /* base address of ramblock we measure */ + uint64_t ramblock_pages; /* ramblock size in TARGET_PAGE_SIZE */ + uint64_t *sample_page_vfn; /* relative offset address for sampled page */ + uint64_t sample_pages_count; /* count of sampled pages */ + uint64_t sample_dirty_count; /* count of dirty pages we measure */ + uint32_t *hash_result; /* array of hash result for sampled pages */ +}; + void *get_dirtyrate_thread(void *arg); #endif -- Gitee From 16ae6c8e5cff5268df282d16c0f87c0578979853 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:59 +0800 Subject: [PATCH 04/31] migration/dirtyrate: Add dirtyrate statistics series functions Add dirtyrate statistics functions to record/update dirtyrate info. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-5-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 32 ++++++++++++++++++++++++++++++++ migration/dirtyrate.h | 12 ++++++++++++ 2 files changed, 44 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 44a60bf10d..cbb323d6ec 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -23,6 +23,7 @@ #include "dirtyrate.h" static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; +static struct DirtyRateStat DirtyStat; static int dirtyrate_set_state(int *state, int old_state, int new_state) { @@ -34,6 +35,37 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) } } +static void reset_dirtyrate_stat(void) +{ + DirtyStat.total_dirty_samples = 0; + DirtyStat.total_sample_count = 0; + DirtyStat.total_block_mem_MB = 0; + DirtyStat.dirty_rate = -1; + DirtyStat.start_time = 0; + DirtyStat.calc_time = 0; +} + +static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) +{ + DirtyStat.total_dirty_samples += info->sample_dirty_count; + DirtyStat.total_sample_count += info->sample_pages_count; + /* size of total pages in MB */ + DirtyStat.total_block_mem_MB += (info->ramblock_pages * + TARGET_PAGE_SIZE) >> 20; +} + +static void update_dirtyrate(uint64_t msec) +{ + uint64_t dirtyrate; + uint64_t total_dirty_samples = DirtyStat.total_dirty_samples; + uint64_t total_sample_count = DirtyStat.total_sample_count; + uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB; + + dirtyrate = total_dirty_samples * total_block_mem_MB * + 1000 / (total_sample_count * msec); + + DirtyStat.dirty_rate = dirtyrate; +} static void calculate_dirtyrate(struct DirtyRateConfig config) { diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 8707df852d..312debca6f 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -42,5 +42,17 @@ struct RamblockDirtyInfo { uint32_t *hash_result; /* array of hash result for sampled pages */ }; +/* + * Store calculation statistics for each measure. + */ +struct DirtyRateStat { + uint64_t total_dirty_samples; /* total dirty sampled page */ + uint64_t total_sample_count; /* total sampled pages */ + uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ + int64_t dirty_rate; /* dirty rate in MB/s */ + int64_t start_time; /* calculation start time in units of second */ + int64_t calc_time; /* time duration of two sampling in units of second */ +}; + void *get_dirtyrate_thread(void *arg); #endif -- Gitee From 49359b12589ce9fd0ffc689e62357ba7c6270fd1 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:00 +0800 Subject: [PATCH 05/31] migration/dirtyrate: move RAMBLOCK_FOREACH_MIGRATABLE into ram.h RAMBLOCK_FOREACH_MIGRATABLE is need in dirtyrate measure, move the existing definition up into migration/ram.h Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-6-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 1 + migration/ram.c | 11 +---------- migration/ram.h | 10 ++++++++++ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index cbb323d6ec..1ccc71077d 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -20,6 +20,7 @@ #include "qemu/rcu_queue.h" #include "qapi/qapi-commands-migration.h" #include "migration.h" +#include "ram.h" #include "dirtyrate.h" static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; diff --git a/migration/ram.c b/migration/ram.c index 840e35480b..8e80ee20ad 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -159,21 +159,12 @@ out: return ret; } -static bool ramblock_is_ignored(RAMBlock *block) +bool ramblock_is_ignored(RAMBlock *block) { return !qemu_ram_is_migratable(block) || (migrate_ignore_shared() && qemu_ram_is_shared(block)); } -/* Should be holding either ram_list.mutex, or the RCU lock. */ -#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ - INTERNAL_RAMBLOCK_FOREACH(block) \ - if (ramblock_is_ignored(block)) {} else - -#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ - INTERNAL_RAMBLOCK_FOREACH(block) \ - if (!qemu_ram_is_migratable(block)) {} else - #undef RAMBLOCK_FOREACH int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) diff --git a/migration/ram.h b/migration/ram.h index a788ff0e8e..565ec86b1f 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -37,6 +37,16 @@ extern MigrationStats ram_counters; extern XBZRLECacheStats xbzrle_counters; extern CompressionStats compression_counters; +bool ramblock_is_ignored(RAMBlock *block); +/* Should be holding either ram_list.mutex, or the RCU lock. */ +#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ + INTERNAL_RAMBLOCK_FOREACH(block) \ + if (ramblock_is_ignored(block)) {} else + +#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ + INTERNAL_RAMBLOCK_FOREACH(block) \ + if (!qemu_ram_is_migratable(block)) {} else + int xbzrle_cache_resize(int64_t new_size, Error **errp); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); -- Gitee From 93b9704b1d2b1e8748ba303f11f9484b34f28d22 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:01 +0800 Subject: [PATCH 06/31] migration/dirtyrate: Record hash results for each sampled page Record hash results for each sampled page, crc32 is taken to calculate hash results for each sampled length in TARGET_PAGE_SIZE. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-7-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 109 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 1ccc71077d..f93601f8ab 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory. */ +#include #include "qemu/osdep.h" #include "qapi/error.h" #include "cpu.h" @@ -68,6 +69,114 @@ static void update_dirtyrate(uint64_t msec) DirtyStat.dirty_rate = dirtyrate; } +/* + * get hash result for the sampled memory with length of TARGET_PAGE_SIZE + * in ramblock, which starts from ramblock base address. + */ +static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, + uint64_t vfn) +{ + uint32_t crc; + + crc = crc32(0, (info->ramblock_addr + + vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); + + return crc; +} + +static bool save_ramblock_hash(struct RamblockDirtyInfo *info) +{ + unsigned int sample_pages_count; + int i; + GRand *rand; + + sample_pages_count = info->sample_pages_count; + + /* ramblock size less than one page, return success to skip this ramblock */ + if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) { + return true; + } + + info->hash_result = g_try_malloc0_n(sample_pages_count, + sizeof(uint32_t)); + if (!info->hash_result) { + return false; + } + + info->sample_page_vfn = g_try_malloc0_n(sample_pages_count, + sizeof(uint64_t)); + if (!info->sample_page_vfn) { + g_free(info->hash_result); + return false; + } + + rand = g_rand_new(); + for (i = 0; i < sample_pages_count; i++) { + info->sample_page_vfn[i] = g_rand_int_range(rand, 0, + info->ramblock_pages - 1); + info->hash_result[i] = get_ramblock_vfn_hash(info, + info->sample_page_vfn[i]); + } + g_rand_free(rand); + + return true; +} + +static void get_ramblock_dirty_info(RAMBlock *block, + struct RamblockDirtyInfo *info, + struct DirtyRateConfig *config) +{ + uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes; + + /* Right shift 30 bits to calc ramblock size in GB */ + info->sample_pages_count = (qemu_ram_get_used_length(block) * + sample_pages_per_gigabytes) >> 30; + /* Right shift TARGET_PAGE_BITS to calc page count */ + info->ramblock_pages = qemu_ram_get_used_length(block) >> + TARGET_PAGE_BITS; + info->ramblock_addr = qemu_ram_get_host_addr(block); + strcpy(info->idstr, qemu_ram_get_idstr(block)); +} + +static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, + struct DirtyRateConfig config, + int *block_count) +{ + struct RamblockDirtyInfo *info = NULL; + struct RamblockDirtyInfo *dinfo = NULL; + RAMBlock *block = NULL; + int total_count = 0; + int index = 0; + bool ret = false; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + total_count++; + } + + dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo)); + if (dinfo == NULL) { + goto out; + } + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (index >= total_count) { + break; + } + info = &dinfo[index]; + get_ramblock_dirty_info(block, info, &config); + if (!save_ramblock_hash(info)) { + goto out; + } + index++; + } + ret = true; + +out: + *block_count = index; + *block_dinfo = dinfo; + return ret; +} + static void calculate_dirtyrate(struct DirtyRateConfig config) { /* todo */ -- Gitee From ee90e6bb58e22207ae7c45fc4bd2f77fc8c87830 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:02 +0800 Subject: [PATCH 07/31] migration/dirtyrate: Compare page hash results for recorded sampled page Compare page hash results for recorded sampled page. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-8-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 63 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index f93601f8ab..0412f825dc 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -177,6 +177,69 @@ out: return ret; } +static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) +{ + uint32_t crc; + int i; + + for (i = 0; i < info->sample_pages_count; i++) { + crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); + if (crc != info->hash_result[i]) { + info->sample_dirty_count++; + } + } +} + +static struct RamblockDirtyInfo * +find_block_matched(RAMBlock *block, int count, + struct RamblockDirtyInfo *infos) +{ + int i; + struct RamblockDirtyInfo *matched; + + for (i = 0; i < count; i++) { + if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) { + break; + } + } + + if (i == count) { + return NULL; + } + + if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || + infos[i].ramblock_pages != + (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { + return NULL; + } + + matched = &infos[i]; + + return matched; +} + +static bool compare_page_hash_info(struct RamblockDirtyInfo *info, + int block_count) +{ + struct RamblockDirtyInfo *block_dinfo = NULL; + RAMBlock *block = NULL; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + block_dinfo = find_block_matched(block, block_count, info); + if (block_dinfo == NULL) { + continue; + } + calc_page_dirty_rate(block_dinfo); + update_dirtyrate_stat(block_dinfo); + } + + if (DirtyStat.total_sample_count == 0) { + return false; + } + + return true; +} + static void calculate_dirtyrate(struct DirtyRateConfig config) { /* todo */ -- Gitee From 0a079387a3176e5f05480f6cb14f970d43689986 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:03 +0800 Subject: [PATCH 08/31] migration/dirtyrate: skip sampling ramblock with size below MIN_RAMBLOCK_SIZE In order to sample real RAM, skip ramblock with size below MIN_RAMBLOCK_SIZE which is set as 128M. Signed-off-by: Chuan Zheng Reviewed-by: David Edmondson Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-9-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 21 +++++++++++++++++++++ migration/dirtyrate.h | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 0412f825dc..97bb883850 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -138,6 +138,18 @@ static void get_ramblock_dirty_info(RAMBlock *block, strcpy(info->idstr, qemu_ram_get_idstr(block)); } +static bool skip_sample_ramblock(RAMBlock *block) +{ + /* + * Sample only blocks larger than MIN_RAMBLOCK_SIZE. + */ + if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { + return true; + } + + return false; +} + static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, struct DirtyRateConfig config, int *block_count) @@ -150,6 +162,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, bool ret = false; RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } total_count++; } @@ -159,6 +174,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, } RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } if (index >= total_count) { break; } @@ -225,6 +243,9 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, RAMBlock *block = NULL; RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } block_dinfo = find_block_matched(block, block_count, info); if (block_dinfo == NULL) { continue; diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 312debca6f..be5b8ec2b1 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -24,6 +24,11 @@ */ #define RAMBLOCK_INFO_MAX_LEN 256 +/* + * Minimum RAMBlock size to sample, in megabytes. + */ +#define MIN_RAMBLOCK_SIZE 128 + struct DirtyRateConfig { uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ int64_t sample_period_seconds; /* time duration between two sampling */ -- Gitee From ed999f31105526f5dc343c4f27ac761d299ebfd9 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:04 +0800 Subject: [PATCH 09/31] migration/dirtyrate: Implement set_sample_page_period() and is_sample_period_valid() Implement is_sample_period_valid() to check if the sample period is vaild and do set_sample_page_period() to sleep specific time between sample actions. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-10-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 24 ++++++++++++++++++++++++ migration/dirtyrate.h | 6 ++++++ 2 files changed, 30 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 97bb883850..485d6467c9 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -27,6 +27,30 @@ static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; static struct DirtyRateStat DirtyStat; +static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) +{ + int64_t current_time; + + current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if ((current_time - initial_time) >= msec) { + msec = current_time - initial_time; + } else { + g_usleep((msec + initial_time - current_time) * 1000); + } + + return msec; +} + +static bool is_sample_period_valid(int64_t sec) +{ + if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || + sec > MAX_FETCH_DIRTYRATE_TIME_SEC) { + return false; + } + + return true; +} + static int dirtyrate_set_state(int *state, int old_state, int new_state) { assert(new_state < DIRTY_RATE_STATUS__MAX); diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index be5b8ec2b1..6ec429534d 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -29,6 +29,12 @@ */ #define MIN_RAMBLOCK_SIZE 128 +/* + * Take 1s as minimum time for calculation duration + */ +#define MIN_FETCH_DIRTYRATE_TIME_SEC 1 +#define MAX_FETCH_DIRTYRATE_TIME_SEC 60 + struct DirtyRateConfig { uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ int64_t sample_period_seconds; /* time duration between two sampling */ -- Gitee From 0d08b43b5499e77bac3fb28bb8ff1a12b9e356ce Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:05 +0800 Subject: [PATCH 10/31] migration/dirtyrate: Implement calculate_dirtyrate() function Implement calculate_dirtyrate() function. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-11-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 45 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 485d6467c9..c7a389a527 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -162,6 +162,21 @@ static void get_ramblock_dirty_info(RAMBlock *block, strcpy(info->idstr, qemu_ram_get_idstr(block)); } +static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count) +{ + int i; + + if (!infos) { + return; + } + + for (i = 0; i < count; i++) { + g_free(infos[i].sample_page_vfn); + g_free(infos[i].hash_result); + } + g_free(infos); +} + static bool skip_sample_ramblock(RAMBlock *block) { /* @@ -287,8 +302,34 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, static void calculate_dirtyrate(struct DirtyRateConfig config) { - /* todo */ - return; + struct RamblockDirtyInfo *block_dinfo = NULL; + int block_count = 0; + int64_t msec = 0; + int64_t initial_time; + + rcu_register_thread(); + reset_dirtyrate_stat(); + rcu_read_lock(); + initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { + goto out; + } + rcu_read_unlock(); + + msec = config.sample_period_seconds * 1000; + msec = set_sample_page_period(msec, initial_time); + + rcu_read_lock(); + if (!compare_page_hash_info(block_dinfo, block_count)) { + goto out; + } + + update_dirtyrate(msec); + +out: + rcu_read_unlock(); + free_ramblock_dirty_info(block_dinfo, block_count); + rcu_unregister_thread(); } void *get_dirtyrate_thread(void *arg) -- Gitee From ee26843a852d9de4408812891ac3475eb6d1dcaa Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:06 +0800 Subject: [PATCH 11/31] migration/dirtyrate: Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function which could be called Signed-off-by: Chuan Zheng Message-Id: <1600237327-33618-12-git-send-email-zhengchuan@huawei.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert atomic function fixup Wording fixup in migration.json based on Eric's review --- migration/dirtyrate.c | 62 +++++++++++++++++++++++++++++++++++++++++++ qapi/migration.json | 50 ++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index c7a389a527..9d9155f8ab 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -61,6 +61,24 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) } } +static struct DirtyRateInfo *query_dirty_rate_info(void) +{ + int64_t dirty_rate = DirtyStat.dirty_rate; + struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); + + if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { + info->dirty_rate = dirty_rate; + } else { + info->dirty_rate = -1; + } + + info->status = CalculatingState; + info->start_time = DirtyStat.start_time; + info->calc_time = DirtyStat.calc_time; + + return info; +} + static void reset_dirtyrate_stat(void) { DirtyStat.total_dirty_samples = 0; @@ -318,6 +336,8 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) msec = config.sample_period_seconds * 1000; msec = set_sample_page_period(msec, initial_time); + DirtyStat.start_time = initial_time / 1000; + DirtyStat.calc_time = msec / 1000; rcu_read_lock(); if (!compare_page_hash_info(block_dinfo, block_count)) { @@ -353,3 +373,45 @@ void *get_dirtyrate_thread(void *arg) } return NULL; } + +void qmp_calc_dirty_rate(int64_t calc_time, Error **errp) +{ + static struct DirtyRateConfig config; + QemuThread thread; + int ret; + + /* + * If the dirty rate is already being measured, don't attempt to start. + */ + if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) { + error_setg(errp, "the dirty rate is already being measured."); + return; + } + + if (!is_sample_period_valid(calc_time)) { + error_setg(errp, "calc-time is out of range[%d, %d].", + MIN_FETCH_DIRTYRATE_TIME_SEC, + MAX_FETCH_DIRTYRATE_TIME_SEC); + return; + } + + /* + * Init calculation state as unstarted. + */ + ret = dirtyrate_set_state(&CalculatingState, CalculatingState, + DIRTY_RATE_STATUS_UNSTARTED); + if (ret == -1) { + error_setg(errp, "init dirty rate calculation state failed."); + return; + } + + config.sample_period_seconds = calc_time; + config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES; + qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread, + (void *)&config, QEMU_THREAD_DETACHED); +} + +struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp) +{ + return query_dirty_rate_info(); +} diff --git a/qapi/migration.json b/qapi/migration.json index fdddde0af7..76f5b42493 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1462,3 +1462,53 @@ ## { 'enum': 'DirtyRateStatus', 'data': [ 'unstarted', 'measuring', 'measured'] } + +## +# @DirtyRateInfo: +# +# Information about current dirty page rate of vm. +# +# @dirty-rate: @dirtyrate describing the dirty page rate of vm +# in units of MB/s. +# If this field returns '-1', it means querying has not +# yet started or completed. +# +# @status: status containing dirtyrate query status includes +# 'unstarted' or 'measuring' or 'measured' +# +# @start-time: start time in units of second for calculation +# +# @calc-time: time in units of second for sample dirty pages +# +# Since: 5.2 +# +## +{ 'struct': 'DirtyRateInfo', + 'data': {'dirty-rate': 'int64', + 'status': 'DirtyRateStatus', + 'start-time': 'int64', + 'calc-time': 'int64'} } + +## +# @calc-dirty-rate: +# +# start calculating dirty page rate for vm +# +# @calc-time: time in units of second for sample dirty pages +# +# Since: 5.2 +# +# Example: +# {"command": "calc-dirty-rate", "data": {"calc-time": 1} } +# +## +{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} } + +## +# @query-dirty-rate: +# +# query dirty page rate in units of MB/s for vm +# +# Since: 5.2 +## +{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } -- Gitee From 43c41bd0296af4698b031da4b2f3d29a419db0ff Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:07 +0800 Subject: [PATCH 12/31] migration/dirtyrate: Add trace_calls to make it easier to debug Add trace_calls to make it easier to debug Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Message-Id: <1600237327-33618-13-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 9 +++++++++ migration/trace-events | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 9d9155f8ab..80936a4ca6 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -22,6 +22,7 @@ #include "qapi/qapi-commands-migration.h" #include "migration.h" #include "ram.h" +#include "trace.h" #include "dirtyrate.h" static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; @@ -54,6 +55,7 @@ static bool is_sample_period_valid(int64_t sec) static int dirtyrate_set_state(int *state, int old_state, int new_state) { assert(new_state < DIRTY_RATE_STATUS__MAX); + trace_dirtyrate_set_state(DirtyRateStatus_str(new_state)); if (atomic_cmpxchg(state, old_state, new_state) == old_state) { return 0; } else { @@ -76,6 +78,8 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) info->start_time = DirtyStat.start_time; info->calc_time = DirtyStat.calc_time; + trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState)); + return info; } @@ -123,6 +127,7 @@ static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, crc = crc32(0, (info->ramblock_addr + vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); + trace_get_ramblock_vfn_hash(info->idstr, vfn, crc); return crc; } @@ -201,6 +206,8 @@ static bool skip_sample_ramblock(RAMBlock *block) * Sample only blocks larger than MIN_RAMBLOCK_SIZE. */ if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { + trace_skip_sample_ramblock(block->idstr, + qemu_ram_get_used_length(block)); return true; } @@ -260,6 +267,7 @@ static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) for (i = 0; i < info->sample_pages_count; i++) { crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); if (crc != info->hash_result[i]) { + trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]); info->sample_dirty_count++; } } @@ -285,6 +293,7 @@ find_block_matched(RAMBlock *block, int count, if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || infos[i].ramblock_pages != (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { + trace_find_page_matched(block->idstr); return NULL; } diff --git a/migration/trace-events b/migration/trace-events index d8e54c367a..69620c43c2 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -296,3 +296,11 @@ dirty_bitmap_load_bits_zeroes(void) "" dirty_bitmap_load_header(uint32_t flags) "flags 0x%x" dirty_bitmap_load_enter(void) "" dirty_bitmap_load_success(void) "" + +# dirtyrate.c +dirtyrate_set_state(const char *new_state) "new state %s" +query_dirty_rate_info(const char *new_state) "current state %s" +get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock name: %s, vfn: %"PRIu64 ", crc: %" PRIu32 +calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32 +skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64 +find_page_matched(const char *idstr) "ramblock %s addr or size changed" -- Gitee From ec54b40b9cef16e793a2cd323b3a16e3c08b1db5 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 29 Sep 2020 11:42:17 +0800 Subject: [PATCH 13/31] migration/dirtyrate: record start_time and calc_time while at the measuring state Querying could include both the start-time and the calc-time while at the measuring state, allowing a caller to determine when they should expect to come back looking for a result. Signed-off-by: Chuan Zheng Message-Id: <1601350938-128320-2-git-send-email-zhengchuan@huawei.com> Reviewed-by: David Edmondson Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 80936a4ca6..f1c007d569 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -83,14 +83,14 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) return info; } -static void reset_dirtyrate_stat(void) +static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time) { DirtyStat.total_dirty_samples = 0; DirtyStat.total_sample_count = 0; DirtyStat.total_block_mem_MB = 0; DirtyStat.dirty_rate = -1; - DirtyStat.start_time = 0; - DirtyStat.calc_time = 0; + DirtyStat.start_time = start_time; + DirtyStat.calc_time = calc_time; } static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) @@ -335,7 +335,6 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) int64_t initial_time; rcu_register_thread(); - reset_dirtyrate_stat(); rcu_read_lock(); initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { @@ -365,6 +364,8 @@ void *get_dirtyrate_thread(void *arg) { struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; int ret; + int64_t start_time; + int64_t calc_time; ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, DIRTY_RATE_STATUS_MEASURING); @@ -373,6 +374,10 @@ void *get_dirtyrate_thread(void *arg) return NULL; } + start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; + calc_time = config.sample_period_seconds; + init_dirtyrate_stat(start_time, calc_time); + calculate_dirtyrate(config); ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, -- Gitee From fd1c1f1f7bd07533a61f0a628dc9b956520eb700 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 29 Sep 2020 11:42:18 +0800 Subject: [PATCH 14/31] migration/dirtyrate: present dirty rate only when querying the rate has completed Make dirty_rate field optional, present dirty rate only when querying the rate has completed. The qmp results is shown as follow: @unstarted: {"return":{"status":"unstarted","start-time":0,"calc-time":0},"id":"libvirt-12"} @measuring: {"return":{"status":"measuring","start-time":102931,"calc-time":1},"id":"libvirt-85"} @measured: {"return":{"status":"measured","dirty-rate":4,"start-time":150146,"calc-time":1},"id":"libvirt-15"} Signed-off-by: Chuan Zheng Reviewed-by: David Edmondson Message-Id: <1601350938-128320-3-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 3 +-- qapi/migration.json | 8 +++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index f1c007d569..00c8085456 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -69,9 +69,8 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { + info->has_dirty_rate = true; info->dirty_rate = dirty_rate; - } else { - info->dirty_rate = -1; } info->status = CalculatingState; diff --git a/qapi/migration.json b/qapi/migration.json index 76f5b42493..6844ddfab3 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1468,10 +1468,8 @@ # # Information about current dirty page rate of vm. # -# @dirty-rate: @dirtyrate describing the dirty page rate of vm -# in units of MB/s. -# If this field returns '-1', it means querying has not -# yet started or completed. +# @dirty-rate: an estimate of the dirty page rate of the VM in units of +# MB/s, present only when estimating the rate has completed. # # @status: status containing dirtyrate query status includes # 'unstarted' or 'measuring' or 'measured' @@ -1484,7 +1482,7 @@ # ## { 'struct': 'DirtyRateInfo', - 'data': {'dirty-rate': 'int64', + 'data': {'*dirty-rate': 'int64', 'status': 'DirtyRateStatus', 'start-time': 'int64', 'calc-time': 'int64'} } -- Gitee From 7f6f248e2f245938c1fb7a9f7c979079050fc431 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Fri, 30 Oct 2020 11:58:01 +0800 Subject: [PATCH 15/31] migration/dirtyrate: simplify includes in dirtyrate.c Remove redundant blank line which is left by Commit 662770af7c6e8c, also take this opportunity to remove redundant includes in dirtyrate.c. Signed-off-by: Chuan Zheng Message-Id: <1604030281-112946-1-git-send-email-zhengchuan@huawei.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 00c8085456..9a6d0e2cc6 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -10,17 +10,16 @@ * See the COPYING file in the top-level directory. */ -#include #include "qemu/osdep.h" +#include #include "qapi/error.h" #include "cpu.h" -#include "qemu/config-file.h" #include "exec/memory.h" #include "exec/ram_addr.h" #include "exec/target_page.h" #include "qemu/rcu_queue.h" +#include "qemu/error-report.h" #include "qapi/qapi-commands-migration.h" -#include "migration.h" #include "ram.h" #include "trace.h" #include "dirtyrate.h" -- Gitee From 202ff21fd91eabffca41c15cc93b9483a0b8778c Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:03:57 +0800 Subject: [PATCH 16/31] migration/tls: save hostname into MigrationState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hostname is need in multifd-tls, save hostname into MigrationState. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Message-Id: <1600139042-104593-2-git-send-email-zhengchuan@huawei.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/channel.c | 1 + migration/migration.c | 1 + migration/migration.h | 5 +++++ migration/tls.c | 2 ++ 4 files changed, 9 insertions(+) diff --git a/migration/channel.c b/migration/channel.c index 7462181484..46ed40b89c 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -99,5 +99,6 @@ void migration_channel_connect(MigrationState *s, } } migrate_fd_connect(s, error); + g_free(s->hostname); error_free(error); } diff --git a/migration/migration.c b/migration/migration.c index 8f2fc2b4ff..01740df471 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1710,6 +1710,7 @@ void migrate_init(MigrationState *s) s->migration_thread_running = false; error_free(s->error); s->error = NULL; + s->hostname = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); diff --git a/migration/migration.h b/migration/migration.h index feb344306a..e5aaf2ef70 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -259,6 +259,11 @@ struct MigrationState * (which is in 4M chunk). */ uint8_t clear_bitmap_shift; + + /* + * This save hostname when out-going migration starts + */ + char *hostname; }; void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/tls.c b/migration/tls.c index 5171afc6c4..a0eb553e14 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -155,6 +155,8 @@ void migration_tls_channel_connect(MigrationState *s, return; } + /* Save hostname into MigrationState for handshake */ + s->hostname = g_strdup(hostname); trace_migration_tls_outgoing_handshake_start(hostname); qio_channel_set_name(QIO_CHANNEL(tioc), "migration-tls-outgoing"); qio_channel_tls_handshake(tioc, -- Gitee From 38f80174ada1076fe1dd365fdd95b782808c9039 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:03:58 +0800 Subject: [PATCH 17/31] migration/tls: extract migration_tls_client_create for common-use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit migration_tls_client_create will be used in multifd-tls, let's extract it. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-3-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/tls.c | 26 ++++++++++++++++++-------- migration/tls.h | 6 ++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/migration/tls.c b/migration/tls.c index a0eb553e14..1d5b571d8e 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -22,7 +22,6 @@ #include "channel.h" #include "migration.h" #include "tls.h" -#include "io/channel-tls.h" #include "crypto/tlscreds.h" #include "qemu/error-report.h" #include "qapi/error.h" @@ -126,11 +125,10 @@ static void migration_tls_outgoing_handshake(QIOTask *task, object_unref(OBJECT(ioc)); } - -void migration_tls_channel_connect(MigrationState *s, - QIOChannel *ioc, - const char *hostname, - Error **errp) +QIOChannelTLS *migration_tls_client_create(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp) { QCryptoTLSCreds *creds; QIOChannelTLS *tioc; @@ -138,7 +136,7 @@ void migration_tls_channel_connect(MigrationState *s, creds = migration_tls_get_creds( s, QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, errp); if (!creds) { - return; + return NULL; } if (s->parameters.tls_hostname && *s->parameters.tls_hostname) { @@ -146,11 +144,23 @@ void migration_tls_channel_connect(MigrationState *s, } if (!hostname) { error_setg(errp, "No hostname available for TLS"); - return; + return NULL; } tioc = qio_channel_tls_new_client( ioc, creds, hostname, errp); + + return tioc; +} + +void migration_tls_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp) +{ + QIOChannelTLS *tioc; + + tioc = migration_tls_client_create(s, ioc, hostname, errp); if (!tioc) { return; } diff --git a/migration/tls.h b/migration/tls.h index cdd70001ed..0cfbe368ba 100644 --- a/migration/tls.h +++ b/migration/tls.h @@ -22,11 +22,17 @@ #define QEMU_MIGRATION_TLS_H #include "io/channel.h" +#include "io/channel-tls.h" void migration_tls_channel_process_incoming(MigrationState *s, QIOChannel *ioc, Error **errp); +QIOChannelTLS *migration_tls_client_create(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp); + void migration_tls_channel_connect(MigrationState *s, QIOChannel *ioc, const char *hostname, -- Gitee From 07514c032bd20e6012b59ecbf07595eb41fd5ff5 Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 11:25:44 +0800 Subject: [PATCH 18/31] migration/tls: add tls_hostname into MultiFDSendParams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since multifd creation is async with migration_channel_connect, we should pass the hostname from MigrationState to MultiFDSendParams. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Message-Id: <1600139042-104593-4-git-send-email-zhengchuan@huawei.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index 8e80ee20ad..e9b25f78a6 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -621,6 +621,8 @@ typedef struct { uint8_t id; /* channel thread name */ char *name; + /* tls hostname */ + char *tls_hostname; /* channel thread id */ QemuThread thread; /* communication channel */ @@ -1035,6 +1037,8 @@ void multifd_save_cleanup(void) qemu_sem_destroy(&p->sem_sync); g_free(p->name); p->name = NULL; + g_free(p->tls_hostname); + p->tls_hostname = NULL; multifd_pages_clear(p->pages); p->pages = NULL; p->packet_len = 0; @@ -1222,10 +1226,12 @@ int multifd_save_setup(void) int thread_count; uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); uint8_t i; + MigrationState *s; if (!migrate_use_multifd()) { return 0; } + s = migrate_get_current(); thread_count = migrate_multifd_channels(); multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); @@ -1246,6 +1252,7 @@ int multifd_save_setup(void) + sizeof(ram_addr_t) * page_count; p->packet = g_malloc0(p->packet_len); p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); socket_send_channel_create(multifd_new_send_channel_async, p); } return 0; -- Gitee From c1eb9f8be8cc6e52c1634ec13fcc5a158ebf75eb Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 11:32:44 +0800 Subject: [PATCH 19/31] migration/tls: extract cleanup function for common-use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit multifd channel cleanup is need if multifd handshake failed, let's extract it. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-5-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index e9b25f78a6..3c79bc6017 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1193,6 +1193,23 @@ out: return NULL; } +static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, + QIOChannel *ioc, Error *err) +{ + migrate_set_error(migrate_get_current(), err); + /* Error happen, we need to tell who pay attention to me */ + qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_post(&p->sem_sync); + /* + * Although multifd_send_thread is not created, but main migration + * thread neet to judge whether it is running, so we need to mark + * its status. + */ + p->quit = true; + object_unref(OBJECT(ioc)); + error_free(err); +} + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) { MultiFDSendParams *p = opaque; @@ -1200,25 +1217,18 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) Error *local_err = NULL; if (qio_task_propagate_error(task, &local_err)) { - migrate_set_error(migrate_get_current(), local_err); - /* Error happen, we need to tell who pay attention to me */ - qemu_sem_post(&multifd_send_state->channels_ready); - qemu_sem_post(&p->sem_sync); - /* - * Although multifd_send_thread is not created, but main migration - * thread neet to judge whether it is running, so we need to mark - * its status. - */ - p->quit = true; - object_unref(OBJECT(sioc)); - error_free(local_err); + goto cleanup; } else { p->c = QIO_CHANNEL(sioc); qio_channel_set_delay(p->c, false); p->running = true; qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, QEMU_THREAD_JOINABLE); + return; } + +cleanup: + multifd_new_send_channel_cleanup(p, sioc, local_err); } int multifd_save_setup(void) -- Gitee From 906a3b501ba85c6320141d376ef0b22cd387677e Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 11:38:37 +0800 Subject: [PATCH 20/31] migration/tls: add support for multifd tls-handshake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar like migration main thread, we need to do handshake for each multifd thread. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-6-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 3c79bc6017..88b7019df2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -38,6 +38,7 @@ #include "ram.h" #include "migration.h" #include "socket.h" +#include "tls.h" #include "migration/register.h" #include "migration/misc.h" #include "qemu-file.h" @@ -1193,6 +1194,77 @@ out: return NULL; } +static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error); + +static void multifd_tls_outgoing_handshake(QIOTask *task, + gpointer opaque) +{ + MultiFDSendParams *p = opaque; + QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); + Error *err = NULL; + + qio_task_propagate_error(task, &err); + multifd_channel_connect(p, ioc, err); +} + +static void multifd_tls_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + const char *hostname = p->tls_hostname; + QIOChannelTLS *tioc; + + tioc = migration_tls_client_create(s, ioc, hostname, errp); + if (!tioc) { + return; + } + + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); + qio_channel_tls_handshake(tioc, + multifd_tls_outgoing_handshake, + p, + NULL, + NULL); + +} + +static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error) +{ + MigrationState *s = migrate_get_current(); + + if (!error) { + if (s->parameters.tls_creds && + *s->parameters.tls_creds && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + multifd_tls_channel_connect(p, ioc, &error); + if (!error) { + /* + * tls_channel_connect will call back to this + * function after the TLS handshake, + * so we mustn't call multifd_send_thread until then + */ + return false; + } else { + return true; + } + } else { + /* update for tls qio channel */ + p->c = ioc; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); + } + return false; + } + + return true; +} + static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, QIOChannel *ioc, Error *err) { @@ -1222,8 +1294,9 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) p->c = QIO_CHANNEL(sioc); qio_channel_set_delay(p->c, false); p->running = true; - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, - QEMU_THREAD_JOINABLE); + if (multifd_channel_connect(p, sioc, local_err)) { + goto cleanup; + } return; } -- Gitee From a801cd3ed905e18b51fcb3add82b826c099dc6a9 Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 13:56:11 +0800 Subject: [PATCH 21/31] migration/tls: add trace points for multifd-tls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add trace points for multifd-tls for debug. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-7-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 10 +++++++++- migration/trace-events | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 88b7019df2..30c95397c3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1205,7 +1205,11 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); Error *err = NULL; - qio_task_propagate_error(task, &err); + if (qio_task_propagate_error(task, &err)) { + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + } else { + trace_multifd_tls_outgoing_handshake_complete(ioc); + } multifd_channel_connect(p, ioc, err); } @@ -1222,6 +1226,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, return; } + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); qio_channel_tls_handshake(tioc, multifd_tls_outgoing_handshake, @@ -1237,6 +1242,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p, { MigrationState *s = migrate_get_current(); + trace_multifd_set_outgoing_channel( + ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + if (!error) { if (s->parameters.tls_creds && *s->parameters.tls_creds && diff --git a/migration/trace-events b/migration/trace-events index 69620c43c2..c0640cd424 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -93,6 +93,10 @@ multifd_send_sync_main_signal(uint8_t id) "channel %d" multifd_send_sync_main_wait(uint8_t id) "channel %d" multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 multifd_send_thread_start(uint8_t id) "%d" +multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" +multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" +multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" +multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname, void *err) "ioc=%p ioctype=%s hostname=%s err=%p" ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p" ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" -- Gitee From 6ff76ab1455c520d936e2b9badd2a346a7c71466 Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 14:25:13 +0800 Subject: [PATCH 22/31] qemu-file: Don't do IO after shutdown Be sure that we are not doing neither read/write after shutdown of the QEMUFile. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert --- migration/qemu-file.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 18f480529a..cd96d04e9a 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -51,6 +51,8 @@ struct QEMUFile { unsigned int iovcnt; int last_error; + /* has the file has been shutdown */ + bool shutdown; }; /* @@ -59,10 +61,18 @@ struct QEMUFile { */ int qemu_file_shutdown(QEMUFile *f) { + int ret; + + f->shutdown = true; if (!f->ops->shut_down) { return -ENOSYS; } - return f->ops->shut_down(f->opaque, true, true); + + ret = f->ops->shut_down(f->opaque, true, true); + if (!f->last_error) { + qemu_file_set_error(f, -EIO); + } + return ret; } /* @@ -181,6 +191,10 @@ void qemu_fflush(QEMUFile *f) return; } + if (f->shutdown) { + return; + } + if (f->iovcnt > 0) { expect = iov_size(f->iov, f->iovcnt); ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); @@ -293,6 +307,9 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) f->buf_index = 0; f->buf_size = pending; + if (f->shutdown) { + return 0; + } len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, IO_BUF_SIZE - pending); if (len > 0) { @@ -591,6 +608,9 @@ int64_t qemu_ftell(QEMUFile *f) int qemu_file_rate_limit(QEMUFile *f) { + if (f->shutdown) { + return 1; + } if (qemu_file_get_error(f)) { return 1; } -- Gitee From c1d1732010d97fb14bc661bc9cd1872699348501 Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 14:31:07 +0800 Subject: [PATCH 23/31] multifd: Make sure that we don't do any IO after an error Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert --- migration/ram.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 30c95397c3..c28cfabfcc 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3604,7 +3604,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) { RAMState **temp = opaque; RAMState *rs = *temp; - int ret; + int ret = 0; int i; int64_t t0; int done = 0; @@ -3673,12 +3673,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_ROUND); out: - multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - ram_counters.transferred += 8; + if (ret >= 0) { + multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_counters.transferred += 8; - ret = qemu_file_get_error(f); + ret = qemu_file_get_error(f); + } if (ret < 0) { return ret; } @@ -3732,9 +3734,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) rcu_read_unlock(); - multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); + if (ret >= 0) { + multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + } return ret; } -- Gitee From be4e43905885b465d4d1a64cea24590147d4be13 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 22 Jan 2020 11:36:12 +0100 Subject: [PATCH 24/31] migration: Don't send data if we have stopped If we do a cancel, we got out without one error, but we can't do the rest of the output as in a normal situation. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert --- migration/ram.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index c28cfabfcc..49f742757f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3673,7 +3673,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_ROUND); out: - if (ret >= 0) { + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { multifd_send_sync_main(rs); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); -- Gitee From 0f5e5a7492af2e0a53758cd898d8c205b80bfacd Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 14:39:46 +0800 Subject: [PATCH 25/31] migration: Create migration_is_running() This function returns true if we are in the middle of a migration. It is like migration_is_setup_or_active() with CANCELLING and COLO. Adapt all callers that are needed. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert --- migration/migration.c | 28 +++++++++++++++++++++++----- migration/migration.h | 1 + migration/savevm.c | 4 +--- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 01740df471..3aa2a2ca8e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -822,6 +822,26 @@ bool migration_is_setup_or_active(int state) } } +bool migration_is_running(int state) +{ + switch (state) { + case MIGRATION_STATUS_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER: + case MIGRATION_STATUS_SETUP: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_COLO: + return true; + + default: + return false; + + } +} + static void populate_ram_info(MigrationInfo *info, MigrationState *s) { info->has_ram = true; @@ -1074,7 +1094,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, MigrationCapabilityStatusList *cap; bool cap_list[MIGRATION_CAPABILITY__MAX]; - if (migration_is_setup_or_active(s->state)) { + if (migration_is_running(s->state)) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; } @@ -1588,7 +1608,7 @@ static void migrate_fd_cancel(MigrationState *s) do { old_state = s->state; - if (!migration_is_setup_or_active(old_state)) { + if (!migration_is_running(old_state)) { break; } /* If the migration is paused, kick it out of the pause */ @@ -1873,9 +1893,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return true; } - if (migration_is_setup_or_active(s->state) || - s->state == MIGRATION_STATUS_CANCELLING || - s->state == MIGRATION_STATUS_COLO) { + if (migration_is_running(s->state)) { error_setg(errp, QERR_MIGRATION_ACTIVE); return false; } diff --git a/migration/migration.h b/migration/migration.h index e5aaf2ef70..f2bd4ebe33 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -282,6 +282,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); void migrate_fd_connect(MigrationState *s, Error *error_in); bool migration_is_setup_or_active(int state); +bool migration_is_running(int state); void migrate_init(MigrationState *s); bool migration_is_blocked(Error **errp); diff --git a/migration/savevm.c b/migration/savevm.c index 480c511b19..74b388da43 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1411,9 +1411,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) MigrationState *ms = migrate_get_current(); MigrationStatus status; - if (migration_is_setup_or_active(ms->state) || - ms->state == MIGRATION_STATUS_CANCELLING || - ms->state == MIGRATION_STATUS_COLO) { + if (migration_is_running(ms->state)) { error_setg(errp, QERR_MIGRATION_ACTIVE); return -EINVAL; } -- Gitee From e2e65d71494fb4c971cdc70db181a5ab242b183e Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 14:43:45 +0800 Subject: [PATCH 26/31] migration: fix COLO broken caused by a previous commit This commit "migration: Create migration_is_running()" broke COLO. Becuase there is a process broken by this commit. colo_process_checkpoint ->colo_do_checkpoint_transaction ->migrate_set_block_enabled ->qmp_migrate_set_capabilities It can be fixed by make COLO process as an exception, Maybe we need a better way to fix it. Cc: Juan Quintela Signed-off-by: zhanghailiang Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 1 - 1 file changed, 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 3aa2a2ca8e..1b2b22f4dc 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -833,7 +833,6 @@ bool migration_is_running(int state) case MIGRATION_STATUS_PRE_SWITCHOVER: case MIGRATION_STATUS_DEVICE: case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_COLO: return true; default: -- Gitee From 6f1d0d80da4fa14de4b45d6fdc15449940e1b721 Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 14:50:12 +0800 Subject: [PATCH 27/31] migration/multifd: fix hangup with TLS-Multifd due to blocking handshake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The qemu main loop could hang up forever when we enable TLS+Multifd. The Src multifd_send_0 invokes tls handshake, it sends hello to sever and wait response. However, the Dst main qemu loop has been waiting recvmsg() for multifd_recv_1. Both of Src and Dst main qemu loop are blocking and waiting for reponse which results in hanging up forever. Src: (multifd_send_0) Dst: (multifd_recv_1) multifd_channel_connect migration_channel_process_incoming multifd_tls_channel_connect migration_tls_channel_process_incoming multifd_tls_channel_connect qio_channel_tls_handshake_task qio_channel_tls_handshake gnutls_handshake qio_channel_tls_handshake_task ... qcrypto_tls_session_handshake ... gnutls_handshake ... ... ... recvmsg (Blocking I/O waiting for response) recvmsg (Blocking I/O waiting for response) Fix this by offloadinig handshake work to a background thread. Reported-by: Yan Jin Suggested-by: Daniel P. Berrangé Signed-off-by: Chuan Zheng Message-Id: <1604643893-8223-1-git-send-email-zhengchuan@huawei.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 49f742757f..fe0b1601fe 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1213,6 +1213,19 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, multifd_channel_connect(p, ioc, err); } +static void *multifd_tls_handshake_thread(void *opaque) +{ + MultiFDSendParams *p = opaque; + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c); + + qio_channel_tls_handshake(tioc, + multifd_tls_outgoing_handshake, + p, + NULL, + NULL); + return NULL; +} + static void multifd_tls_channel_connect(MultiFDSendParams *p, QIOChannel *ioc, Error **errp) @@ -1228,12 +1241,10 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); - qio_channel_tls_handshake(tioc, - multifd_tls_outgoing_handshake, - p, - NULL, - NULL); - + p->c = QIO_CHANNEL(tioc); + qemu_thread_create(&p->thread, "multifd-tls-handshake-worker", + multifd_tls_handshake_thread, p, + QEMU_THREAD_JOINABLE); } static bool multifd_channel_connect(MultiFDSendParams *p, -- Gitee From 7e90a0c75ff4ecc444d34a90158dc4c5daf90692 Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Wed, 2 Dec 2020 14:51:51 +0800 Subject: [PATCH 28/31] multifd/tls: fix memoryleak of the QIOChannelSocket object when cancelling migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating new tls client, the tioc->master will be referenced which results in socket leaking after multifd_save_cleanup if we cancel migration. Fix it by do object_unref() after tls client creation. Suggested-by: Daniel P. Berrangé Signed-off-by: Chuan Zheng Message-Id: <1605104763-118687-1-git-send-email-zhengchuan@huawei.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/ram.c b/migration/ram.c index fe0b1601fe..7fc2b9108d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1239,6 +1239,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, return; } + object_unref(OBJECT(ioc)); trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); p->c = QIO_CHANNEL(tioc); -- Gitee From ae0a4a9f990fbe1c27df657f7af530cfb5439799 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Fri, 5 Mar 2021 16:06:52 +0800 Subject: [PATCH 29/31] migration: fix memory leak in qmp_migrate_set_parameters "tmp.tls_hostname" and "tmp.tls_creds" allocated by migrate_params_test_apply() is forgot to free at the end of qmp_migrate_set_parameters(). Fix that. The leak stack: Direct leak of 2 byte(s) in 2 object(s) allocated from: #0 0xffffb597c20b in __interceptor_malloc (/usr/lib64/libasan.so.4+0xd320b) #1 0xffffb52dcb1b in g_malloc (/usr/lib64/libglib-2.0.so.0+0x58b1b) #2 0xffffb52f8143 in g_strdup (/usr/lib64/libglib-2.0.so.0+0x74143) #3 0xaaaac52447fb in migrate_params_test_apply (/usr/src/debug/qemu-4.1.0/migration/migration.c:1377) #4 0xaaaac52fdca7 in qmp_migrate_set_parameters (/usr/src/debug/qemu-4.1.0/qapi/qapi-commands-migration.c:192) #5 0xaaaac551d543 in qmp_dispatch (/usr/src/debug/qemu-4.1.0/qapi/qmp-dispatch.c:165) #6 0xaaaac52a0a8f in qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:125) #7 0xaaaac52a1c7f in monitor_qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:214) #8 0xaaaac55cb0cf in aio_bh_call (/usr/src/debug/qemu-4.1.0/util/async.c:117) #9 0xaaaac55d4543 in aio_bh_poll (/usr/src/debug/qemu-4.1.0/util/aio-posix.c:459) #10 0xaaaac55cae0f in aio_dispatch (/usr/src/debug/qemu-4.1.0/util/async.c:268) #11 0xffffb52d6a7b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a7b) #12 0xaaaac55d1e3b(/usr/bin/qemu-kvm-4.1.0+0x1622e3b) #13 0xaaaac4e314bb(/usr/bin/qemu-kvm-4.1.0+0xe824bb) #14 0xaaaac47f45ef(/usr/bin/qemu-kvm-4.1.0+0x8455ef) #15 0xffffb4bfef3f in __libc_start_main (/usr/lib64/libc.so.6+0x23f3f) #16 0xaaaac47ffacb(/usr/bin/qemu-kvm-4.1.0+0x850acb) Direct leak of 2 byte(s) in 2 object(s) allocated from: #0 0xffffb597c20b in __interceptor_malloc (/usr/lib64/libasan.so.4+0xd320b) #1 0xffffb52dcb1b in g_malloc (/usr/lib64/libglib-2.0.so.0+0x58b1b) #2 0xffffb52f8143 in g_strdup (/usr/lib64/libglib-2.0.so.0+0x74143) #3 0xaaaac5244893 in migrate_params_test_apply (/usr/src/debug/qemu-4.1.0/migration/migration.c:1382) #4 0xaaaac52fdca7 in qmp_migrate_set_parameters (/usr/src/debug/qemu-4.1.0/qapi/qapi-commands-migration.c:192) #5 0xaaaac551d543 in qmp_dispatch (/usr/src/debug/qemu-4.1.0/qapi/qmp-dispatch.c) #6 0xaaaac52a0a8f in qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:125) #7 0xaaaac52a1c7f in monitor_qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:214) #8 0xaaaac55cb0cf in aio_bh_call (/usr/src/debug/qemu-4.1.0/util/async.c:117) #9 0xaaaac55d4543 in aio_bh_poll (/usr/src/debug/qemu-4.1.0/util/aio-posix.c:459) #10 0xaaaac55cae0f in in aio_dispatch (/usr/src/debug/qemu-4.1.0/util/async.c:268) #11 0xffffb52d6a7b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a7b) #12 0xaaaac55d1e3b(/usr/bin/qemu-kvm-4.1.0+0x1622e3b) #13 0xaaaac4e314bb(/usr/bin/qemu-kvm-4.1.0+0xe824bb) #14 0xaaaac47f45ef (/usr/bin/qemu-kvm-4.1.0+0x8455ef) #15 0xffffb4bfef3f in __libc_start_main (/usr/lib64/libc.so.6+0x23f3f) #16 0xaaaac47ffacb(/usr/bin/qemu-kvm-4.1.0+0x850acb) Signed-off-by: Chuan Zheng Reviewed-by: KeQian Zhu Reviewed-by: HaiLiang Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 1b2b22f4dc..ffa6d1875f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1260,12 +1260,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_tls_creds) { assert(params->tls_creds->type == QTYPE_QSTRING); - dest->tls_creds = g_strdup(params->tls_creds->u.s); + dest->tls_creds = params->tls_creds->u.s; } if (params->has_tls_hostname) { assert(params->tls_hostname->type == QTYPE_QSTRING); - dest->tls_hostname = g_strdup(params->tls_hostname->u.s); + dest->tls_hostname = params->tls_hostname->u.s; } if (params->has_max_bandwidth) { -- Gitee From bf463c0511fce696dc676e87fe040a467ed55c54 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Fri, 5 Mar 2021 16:09:29 +0800 Subject: [PATCH 30/31] migration/tls: fix inverted semantics in multifd_channel_connect Function multifd_channel_connect() return "true" to indicate failure, which is rather confusing. Fix that. Signed-off-by: Hao Wang --- migration/ram.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 7fc2b9108d..5d4c7eb754 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1269,9 +1269,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p, * function after the TLS handshake, * so we mustn't call multifd_send_thread until then */ - return false; - } else { return true; + } else { + return false; } } else { /* update for tls qio channel */ @@ -1279,10 +1279,10 @@ static bool multifd_channel_connect(MultiFDSendParams *p, qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, QEMU_THREAD_JOINABLE); } - return false; + return true; } - return true; + return false; } static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, @@ -1314,7 +1314,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) p->c = QIO_CHANNEL(sioc); qio_channel_set_delay(p->c, false); p->running = true; - if (multifd_channel_connect(p, sioc, local_err)) { + if (!multifd_channel_connect(p, sioc, local_err)) { goto cleanup; } return; -- Gitee From 7e31bbdd4ee42fcd1af80c4c882b04184bf8b8bf Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Fri, 5 Mar 2021 16:10:57 +0800 Subject: [PATCH 31/31] migration/tls: add error handling in multifd_tls_handshake_thread If any error happens during multifd send thread creating (e.g. channel broke because new domain is destroyed by the dst), multifd_tls_handshake_thread may exit silently, leaving main migration thread hanging (ram_save_setup -> multifd_send_sync_main -> qemu_sem_wait(&p->sem_sync)). Fix that by adding error handling in multifd_tls_handshake_thread. Signed-off-by: Hao Wang --- migration/ram.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 5d4c7eb754..c6cd5c0c7a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1210,7 +1210,16 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, } else { trace_multifd_tls_outgoing_handshake_complete(ioc); } - multifd_channel_connect(p, ioc, err); + + if (!multifd_channel_connect(p, ioc, err)) { + /* + * Error happen, mark multifd_send_thread status as 'quit' although it + * is not created, and then tell who pay attention to me. + */ + p->quit = true; + qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_post(&p->sem_sync); + } } static void *multifd_tls_handshake_thread(void *opaque) -- Gitee