From e9095b76ae8be40ed088bf155356ab11d61f9e3b Mon Sep 17 00:00:00 2001 From: yezengruan Date: Thu, 3 Nov 2022 19:44:12 +0800 Subject: [PATCH] Qemu update to version 6.2.0-55 - support dirty restraint on vCPU - support SPR AMX in Qemu - fix compilation errors of sw64 Signed-off-by: yezengruan --- ...nd-invalid-CPUID-0xD-9-info-on-some-.patch | 113 +++++ ...ve-the-unused-local-variable-records.patch | 25 + Remove-this-redundant-return.patch | 25 + ...l-Introduce-kvm_dirty_ring_size-func.patch | 66 +++ ...l-Refactor-per-vcpu-dirty-ring-reapi.patch | 106 ++++ cpus-Introduce-cpu_list_generation_id.patch | 73 +++ ...errors-of-sw64-architecture-on-x86-p.patch | 25 + ...that-no-bios-file-soft-link-was-crea.patch | 28 ++ ...k-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch | 34 ++ ...rs-include-missing-changes-from-5.17.patch | 65 +++ ...ate-Refactor-dirty-page-rate-calcula.patch | 399 +++++++++++++++ ...-dirtyrate-Replace-malloc-with-g_new.patch | 48 ++ qemu.spec | 31 +- ...imit-Implement-dirty-page-rate-limit.patch | 435 ++++++++++++++++ ...it-Implement-vCPU-dirtyrate-calculat.patch | 214 ++++++++ ...limit-Implement-virtual-CPU-throttle.patch | 469 ++++++++++++++++++ ...do-not-access-uninitialized-variable.patch | 77 +++ tests-Add-dirty-page-rate-limit-test.patch | 362 ++++++++++++++ x86-Add-AMX-CPUIDs-enumeration.patch | 138 ++++++ ...MX-XTILECFG-and-XTILEDATA-components.patch | 115 +++++ ...FD-faulting-bit-for-state-components.patch | 66 +++ ...yte-boundary-enumeration-for-extende.patch | 91 ++++ x86-Grant-AMX-permission-for-guest.patch | 218 ++++++++ ...ort-XFD-and-AMX-xsave-data-migration.patch | 182 +++++++ ...for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch | 186 +++++++ 25 files changed, 3590 insertions(+), 1 deletion(-) create mode 100644 KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch create mode 100644 Remove-the-unused-local-variable-records.patch create mode 100644 Remove-this-redundant-return.patch create mode 100644 accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch create mode 100644 accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch create mode 100644 cpus-Introduce-cpu_list_generation_id.patch create mode 100644 fix-compilation-errors-of-sw64-architecture-on-x86-p.patch create mode 100644 fixed-the-error-that-no-bios-file-soft-link-was-crea.patch create mode 100644 hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch create mode 100644 linux-headers-include-missing-changes-from-5.17.patch create mode 100644 migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch create mode 100644 migration-dirtyrate-Replace-malloc-with-g_new.patch create mode 100644 softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch create mode 100644 softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch create mode 100644 softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch create mode 100644 target-i386-kvm-do-not-access-uninitialized-variable.patch create mode 100644 tests-Add-dirty-page-rate-limit-test.patch create mode 100644 x86-Add-AMX-CPUIDs-enumeration.patch create mode 100644 x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch create mode 100644 x86-Add-XFD-faulting-bit-for-state-components.patch create mode 100644 x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch create mode 100644 x86-Grant-AMX-permission-for-guest.patch create mode 100644 x86-Support-XFD-and-AMX-xsave-data-migration.patch create mode 100644 x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch diff --git a/KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch b/KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch new file mode 100644 index 0000000..bf01f17 --- /dev/null +++ b/KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch @@ -0,0 +1,113 @@ +From 49cb3c9f3cc3a567ce2e6159bf27328c64b6601d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 23 Mar 2022 12:33:25 +0100 +Subject: [PATCH 10/10] KVM: x86: workaround invalid CPUID[0xD,9] info on some + AMD processors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +from mainline-v7.0.0-rc2 +commit 58f7db26f21c690cf9a669c314cfd7371506084a +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit 58f7db26f21c ("KVM: x86: workaround invalid CPUID[0xD,9] info +on some AMD processors") + +---------------------------------------------------------------- + +KVM: x86: workaround invalid CPUID[0xD,9] info on some AMD processors + +Some AMD processors expose the PKRU extended save state even if they do not have +the related PKU feature in CPUID. Worse, when they do they report a size of +64, whereas the expected size of the PKRU extended save state is 8, therefore +the esa->size == eax assertion does not hold. + +The state is already ignored by KVM_GET_SUPPORTED_CPUID because it +was not enabled in the host XCR0. However, QEMU kvm_cpu_xsave_init() +runs before QEMU invokes arch_prctl() to enable dynamically-enabled +save states such as XTILEDATA, and KVM_GET_SUPPORTED_CPUID hides save +states that have yet to be enabled. Therefore, kvm_cpu_xsave_init() +needs to consult the host CPUID instead of KVM_GET_SUPPORTED_CPUID, +and dies with an assertion failure. + +When setting up the ExtSaveArea array to match the host, ignore features that +KVM does not report as supported. This will cause QEMU to skip the incorrect +CPUID leaf instead of tripping the assertion. + +Closes: https://gitlab.com/qemu-project/qemu/-/issues/916 +Reported-by: Daniel P. Berrangé +Analyzed-by: Yang Zhong +Reported-by: Peter Krempa +Tested-by: Daniel P. Berrangé +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 2 ++ + target/i386/kvm/kvm-cpu.c | 19 ++++++++++++------- + 3 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1bc03d3eef..551b47ab1e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4973,8 +4973,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + return cpu_list; + } + +-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, +- bool migratable_only) ++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, ++ bool migratable_only) + { + FeatureWordInfo *wi = &feature_word_info[w]; + uint64_t r = 0; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index eaa99c302f..290f1beaea 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -605,6 +605,8 @@ typedef enum FeatureWord { + } FeatureWord; + + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; ++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, ++ bool migratable_only); + + /* cpuid_features bits */ + #define CPUID_FP87 (1U << 0) +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index a35a1bf9fe..5eb955ce9a 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -99,13 +99,18 @@ static void kvm_cpu_xsave_init(void) + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + +- if (esa->size) { +- host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); +- if (eax != 0) { +- assert(esa->size == eax); +- esa->offset = ebx; +- esa->ecx = ecx; +- } ++ if (!esa->size) { ++ continue; ++ } ++ if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits) ++ != esa->bits) { ++ continue; ++ } ++ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); ++ if (eax != 0) { ++ assert(esa->size == eax); ++ esa->offset = ebx; ++ esa->ecx = ecx; + } + } + } +-- +2.27.0 + diff --git a/Remove-the-unused-local-variable-records.patch b/Remove-the-unused-local-variable-records.patch new file mode 100644 index 0000000..d41ab54 --- /dev/null +++ b/Remove-the-unused-local-variable-records.patch @@ -0,0 +1,25 @@ +From 7b859a86cbdde8bf17619c43a6d4ae687a20f003 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Wed, 29 Jun 2022 16:26:17 +0800 +Subject: [PATCH] Remove the unused local variable "records". + +Signed-off-by: dinglimin +--- + tests/migration/guestperf/engine.py | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py +index 87a6ab2009..59fca2c70b 100644 +--- a/tests/migration/guestperf/engine.py ++++ b/tests/migration/guestperf/engine.py +@@ -65,7 +65,6 @@ def _vcpu_timing(self, pid, tid_list): + return records + + def _cpu_timing(self, pid): +- records = [] + now = time.time() + + jiffies_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK']) +-- +2.27.0 + diff --git a/Remove-this-redundant-return.patch b/Remove-this-redundant-return.patch new file mode 100644 index 0000000..4d028bd --- /dev/null +++ b/Remove-this-redundant-return.patch @@ -0,0 +1,25 @@ +From e7ef56975af8553690afb16f32fe74d62762b853 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Wed, 29 Jun 2022 14:02:59 +0800 +Subject: [PATCH] Remove this redundant return. + +Signed-off-by: dinglimin +--- + scripts/vmstate-static-checker.py | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py +index 539ead62b4..6838bf7e7c 100755 +--- a/scripts/vmstate-static-checker.py ++++ b/scripts/vmstate-static-checker.py +@@ -367,7 +367,6 @@ def check_machine_type(s, d): + if s["Name"] != d["Name"]: + print("Warning: checking incompatible machine types:", end=' ') + print("\"" + s["Name"] + "\", \"" + d["Name"] + "\"") +- return + + + def main(): +-- +2.27.0 + diff --git a/accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch b/accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch new file mode 100644 index 0000000..6e9274e --- /dev/null +++ b/accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch @@ -0,0 +1,66 @@ +From 85583352f3bc28badd4cb336517f6a4eb440d5b0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:34 +0800 +Subject: [PATCH 2/3] accel/kvm/kvm-all: Introduce kvm_dirty_ring_size function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce kvm_dirty_ring_size util function to help calculate +dirty ring ful time. + +Signed-off-by: Hyman Huang(黄勇) +Acked-by: Peter Xu +Message-Id: +Signed-off-by: Dr. David Alan Gilbert +--- + accel/kvm/kvm-all.c | 5 +++++ + accel/stubs/kvm-stub.c | 5 +++++ + include/sysemu/kvm.h | 2 ++ + 3 files changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 3bc6eb6294..d0c4310507 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2332,6 +2332,11 @@ bool kvm_dirty_ring_enabled(void) + return kvm_state->kvm_dirty_ring_size ? true : false; + } + ++uint32_t kvm_dirty_ring_size(void) ++{ ++ return kvm_state->kvm_dirty_ring_size; ++} ++ + static int kvm_init(MachineState *ms) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index 5319573e00..1128cb2928 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -152,4 +152,9 @@ bool kvm_dirty_ring_enabled(void) + { + return false; + } ++ ++uint32_t kvm_dirty_ring_size(void) ++{ ++ return 0; ++} + #endif +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 2623775c27..19c5c8402a 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -549,4 +549,6 @@ bool kvm_cpu_check_are_resettable(void); + bool kvm_arch_cpu_check_are_resettable(void); + + bool kvm_dirty_ring_enabled(void); ++ ++uint32_t kvm_dirty_ring_size(void); + #endif +-- +2.27.0 + diff --git a/accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch b/accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch new file mode 100644 index 0000000..bec0b4a --- /dev/null +++ b/accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch @@ -0,0 +1,106 @@ +From c6f781e50e75fc2e6b819291b6c5ce6c212f018b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:30 +0800 +Subject: [PATCH 1/3] accel/kvm/kvm-all: Refactor per-vcpu dirty ring reaping +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add a non-required argument 'CPUState' to kvm_dirty_ring_reap so +that it can cover single vcpu dirty-ring-reaping scenario. + +Signed-off-by: Hyman Huang(黄勇) +Reviewed-by: Peter Xu +Message-Id: +Signed-off-by: Dr. David Alan Gilbert +--- + accel/kvm/kvm-all.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index f2ce5cd45a..3bc6eb6294 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -773,17 +773,20 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu) + } + + /* Must be with slots_lock held */ +-static uint64_t kvm_dirty_ring_reap_locked(KVMState *s) ++static uint64_t kvm_dirty_ring_reap_locked(KVMState *s, CPUState* cpu) + { + int ret; +- CPUState *cpu; + uint64_t total = 0; + int64_t stamp; + + stamp = get_clock(); + +- CPU_FOREACH(cpu) { +- total += kvm_dirty_ring_reap_one(s, cpu); ++ if (cpu) { ++ total = kvm_dirty_ring_reap_one(s, cpu); ++ } else { ++ CPU_FOREACH(cpu) { ++ total += kvm_dirty_ring_reap_one(s, cpu); ++ } + } + + if (total) { +@@ -804,7 +807,7 @@ static uint64_t kvm_dirty_ring_reap_locked(KVMState *s) + * Currently for simplicity, we must hold BQL before calling this. We can + * consider to drop the BQL if we're clear with all the race conditions. + */ +-static uint64_t kvm_dirty_ring_reap(KVMState *s) ++static uint64_t kvm_dirty_ring_reap(KVMState *s, CPUState *cpu) + { + uint64_t total; + +@@ -824,7 +827,7 @@ static uint64_t kvm_dirty_ring_reap(KVMState *s) + * reset below. + */ + kvm_slots_lock(); +- total = kvm_dirty_ring_reap_locked(s); ++ total = kvm_dirty_ring_reap_locked(s, cpu); + kvm_slots_unlock(); + + return total; +@@ -871,7 +874,7 @@ static void kvm_dirty_ring_flush(void) + * vcpus out in a synchronous way. + */ + kvm_cpu_synchronize_kick_all(); +- kvm_dirty_ring_reap(kvm_state); ++ kvm_dirty_ring_reap(kvm_state, NULL); + trace_kvm_dirty_ring_flush(1); + } + +@@ -1415,7 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + * Not easy. Let's cross the fingers until it's fixed. + */ + if (kvm_state->kvm_dirty_ring_size) { +- kvm_dirty_ring_reap_locked(kvm_state); ++ kvm_dirty_ring_reap_locked(kvm_state, NULL); + } else { + kvm_slot_get_dirty_log(kvm_state, mem); + } +@@ -1487,7 +1490,7 @@ static void *kvm_dirty_ring_reaper_thread(void *data) + r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING; + + qemu_mutex_lock_iothread(); +- kvm_dirty_ring_reap(s); ++ kvm_dirty_ring_reap(s, NULL); + qemu_mutex_unlock_iothread(); + + r->reaper_iteration++; +@@ -2957,7 +2960,7 @@ int kvm_cpu_exec(CPUState *cpu) + */ + trace_kvm_dirty_ring_full(cpu->cpu_index); + qemu_mutex_lock_iothread(); +- kvm_dirty_ring_reap(kvm_state); ++ kvm_dirty_ring_reap(kvm_state, NULL); + qemu_mutex_unlock_iothread(); + ret = 0; + break; +-- +2.27.0 + diff --git a/cpus-Introduce-cpu_list_generation_id.patch b/cpus-Introduce-cpu_list_generation_id.patch new file mode 100644 index 0000000..23cc872 --- /dev/null +++ b/cpus-Introduce-cpu_list_generation_id.patch @@ -0,0 +1,73 @@ +From 6e057dd5df580f0e525d808f5476ee973280371d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:31 +0800 +Subject: [PATCH 2/3] cpus: Introduce cpu_list_generation_id +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce cpu_list_generation_id to track cpu list generation so +that cpu hotplug/unplug can be detected during measurement of +dirty page rate. + +cpu_list_generation_id could be used to detect changes of cpu +list, which is prepared for dirty page rate measurement. + +Signed-off-by: Hyman Huang(黄勇) +Reviewed-by: Peter Xu +Message-Id: <06e1f1362b2501a471dce796abb065b04f320fa5.1656177590.git.huangy81@chinatelecom.cn> +Signed-off-by: Dr. David Alan Gilbert +--- + cpus-common.c | 8 ++++++++ + include/exec/cpu-common.h | 1 + + 2 files changed, 9 insertions(+) + +diff --git a/cpus-common.c b/cpus-common.c +index 6e73d3e58d..31c6415f37 100644 +--- a/cpus-common.c ++++ b/cpus-common.c +@@ -73,6 +73,12 @@ static int cpu_get_free_index(void) + } + + CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); ++static unsigned int cpu_list_generation_id; ++ ++unsigned int cpu_list_generation_id_get(void) ++{ ++ return cpu_list_generation_id; ++} + + void cpu_list_add(CPUState *cpu) + { +@@ -84,6 +90,7 @@ void cpu_list_add(CPUState *cpu) + assert(!cpu_index_auto_assigned); + } + QTAILQ_INSERT_TAIL_RCU(&cpus, cpu, node); ++ cpu_list_generation_id++; + } + + void cpu_list_remove(CPUState *cpu) +@@ -96,6 +103,7 @@ void cpu_list_remove(CPUState *cpu) + + QTAILQ_REMOVE_RCU(&cpus, cpu, node); + cpu->cpu_index = UNASSIGNED_CPU_INDEX; ++ cpu_list_generation_id++; + } + + CPUState *qemu_get_cpu(int index) +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 039d422bf4..cdee668f20 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -11,6 +11,7 @@ + void qemu_init_cpu_list(void); + void cpu_list_lock(void); + void cpu_list_unlock(void); ++unsigned int cpu_list_generation_id_get(void); + + void tcg_flush_softmmu_tlb(CPUState *cs); + +-- +2.27.0 + diff --git a/fix-compilation-errors-of-sw64-architecture-on-x86-p.patch b/fix-compilation-errors-of-sw64-architecture-on-x86-p.patch new file mode 100644 index 0000000..8f2cdc1 --- /dev/null +++ b/fix-compilation-errors-of-sw64-architecture-on-x86-p.patch @@ -0,0 +1,25 @@ +From 58471cd8dcf8e6a66113ddf9bb4ac45c89bbd57b Mon Sep 17 00:00:00 2001 +From: lifeng 71117973 +Date: Wed, 2 Nov 2022 11:19:55 +0800 +Subject: [PATCH 1/2] fix compilation errors of sw64 architecture on x86 + platform + +--- + target/sw64/float_helper.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/target/sw64/float_helper.c b/target/sw64/float_helper.c +index ad1c3cce48..c8e0845afc 100644 +--- a/target/sw64/float_helper.c ++++ b/target/sw64/float_helper.c +@@ -653,7 +653,6 @@ void helper_ieee_input(CPUSW64State *env, uint64_t val) + { + #ifndef CONFIG_USER_ONLY + uint32_t exp = (uint32_t)(val >> 52) & 0x7ff; +- uint64_t frac = val & 0xfffffffffffffull; + + if (exp == 0x7ff) { + /* Infinity or NaN. */ +-- +2.27.0 + diff --git a/fixed-the-error-that-no-bios-file-soft-link-was-crea.patch b/fixed-the-error-that-no-bios-file-soft-link-was-crea.patch new file mode 100644 index 0000000..4bc85c4 --- /dev/null +++ b/fixed-the-error-that-no-bios-file-soft-link-was-crea.patch @@ -0,0 +1,28 @@ +From cf6be03a1f5b7595a2ecada71fa8aa30de744703 Mon Sep 17 00:00:00 2001 +From: lifeng 71117973 +Date: Wed, 2 Nov 2022 17:20:50 +0800 +Subject: [PATCH 2/2] fixed the error that no bios file soft link was created + in the build directory when compiling the sw64 architecture + +--- + configure | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/configure b/configure +index 9569d7a3d0..0ae7bcf065 100755 +--- a/configure ++++ b/configure +@@ -3861,7 +3861,9 @@ for bios_file in \ + $source_path/pc-bios/u-boot.* \ + $source_path/pc-bios/edk2-*.fd.bz2 \ + $source_path/pc-bios/palcode-* \ +- $source_path/pc-bios/qemu_vga.ndrv ++ $source_path/pc-bios/qemu_vga.ndrv \ ++ $source_path/pc-bios/core* \ ++ $source_path/pc-bios/uefi-bios-sw + + do + LINKS="$LINKS pc-bios/$(basename $bios_file)" +-- +2.27.0 + diff --git a/hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch b/hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch new file mode 100644 index 0000000..1c874a4 --- /dev/null +++ b/hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch @@ -0,0 +1,34 @@ +From 4f66d261c0f20189e387de57baca17167cc542ab Mon Sep 17 00:00:00 2001 +From: Andy Pei +Date: Mon, 3 Jan 2022 17:28:12 +0800 +Subject: [PATCH] hw/vhost-user-blk: turn on VIRTIO_BLK_F_SIZE_MAX feature for + virtio blk device + +Turn on pre-defined feature VIRTIO_BLK_F_SIZE_MAX for virtio blk device to +avoid guest DMA request sizes which are too large for hardware spec. + +Signed-off-by: dinglimin +Signed-off-by: Andy Pei +Message-Id: <1641202092-149677-1-git-send-email-andy.pei@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Raphael Norwitz +--- + hw/block/vhost-user-blk.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index ba13cb87e5..eb1264afc7 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -252,6 +252,7 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + /* Turn on pre-defined features */ ++ virtio_add_feature(&features, VIRTIO_BLK_F_SIZE_MAX); + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); + virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); + virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); +-- +2.27.0 + diff --git a/linux-headers-include-missing-changes-from-5.17.patch b/linux-headers-include-missing-changes-from-5.17.patch new file mode 100644 index 0000000..1461d59 --- /dev/null +++ b/linux-headers-include-missing-changes-from-5.17.patch @@ -0,0 +1,65 @@ +From d6398243714a7a775c64e74dbd63c00863cb7e83 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 22 Feb 2022 17:58:11 +0100 +Subject: [PATCH 01/10] linux-headers: include missing changes from 5.17 + +mainline inclusion +from mainline-v7.0.0-rc0 +commit 1ea5208febcc068449b63282d72bb719ab67a466 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit 1ea5208febcc ("linux-headers: include missing changes from 5.17") + +------------------------------------------------ + +linux-headers: include missing changes from 5.17 + +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + linux-headers/asm-x86/kvm.h | 3 +++ + linux-headers/linux/kvm.h | 7 +++++++ + 2 files changed, 10 insertions(+) + +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index a6c327f8ad..2ab4f1818a 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -437,6 +437,9 @@ struct kvm_sync_regs { + + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + ++/* attributes for system fd (group 0) */ ++#define KVM_X86_XCOMP_GUEST_SUPP 0 ++ + struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 5d8e42b8f8..7870cd0280 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1112,6 +1112,10 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_BINARY_STATS_FD 203 + #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 + #define KVM_CAP_ARM_MTE 205 ++#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 ++#define KVM_CAP_VM_GPA_BITS 207 ++#define KVM_CAP_XSAVE2 208 ++#define KVM_CAP_SYS_ATTRIBUTES 209 + + #define KVM_CAP_ARM_CPU_FEATURE 555 + +@@ -2006,4 +2010,7 @@ struct kvm_stats_desc { + + #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + ++/* Available with KVM_CAP_XSAVE2 */ ++#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) ++ + #endif /* __LINUX_KVM_H */ +-- +2.27.0 + diff --git a/migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch b/migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch new file mode 100644 index 0000000..28097ef --- /dev/null +++ b/migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch @@ -0,0 +1,399 @@ +From b6d1e022b7bb06faf2dcad3062b7061b59ef68a9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:32 +0800 +Subject: [PATCH 3/3] migration/dirtyrate: Refactor dirty page rate calculation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +abstract out dirty log change logic into function +global_dirty_log_change. + +abstract out dirty page rate calculation logic via +dirty-ring into function vcpu_calculate_dirtyrate. + +abstract out mathematical dirty page rate calculation +into do_calculate_dirtyrate, decouple it from DirtyStat. + +rename set_sample_page_period to dirty_stat_wait, which +is well-understood and will be reused in dirtylimit. + +handle cpu hotplug/unplug scenario during measurement of +dirty page rate. + +export util functions outside migration. + +Signed-off-by: Hyman Huang(黄勇) +Reviewed-by: Peter Xu +Message-Id: <7b6f6f4748d5b3d017b31a0429e630229ae97538.1656177590.git.huangy81@chinatelecom.cn> +Signed-off-by: Dr. David Alan Gilbert +--- + include/sysemu/dirtyrate.h | 28 +++++ + migration/dirtyrate.c | 227 +++++++++++++++++++++++-------------- + migration/dirtyrate.h | 7 +- + 3 files changed, 174 insertions(+), 88 deletions(-) + create mode 100644 include/sysemu/dirtyrate.h + +diff --git a/include/sysemu/dirtyrate.h b/include/sysemu/dirtyrate.h +new file mode 100644 +index 0000000000..4d3b9a4902 +--- /dev/null ++++ b/include/sysemu/dirtyrate.h +@@ -0,0 +1,28 @@ ++/* ++ * dirty page rate helper functions ++ * ++ * Copyright (c) 2022 CHINA TELECOM CO.,LTD. ++ * ++ * Authors: ++ * Hyman Huang(黄勇) ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_DIRTYRATE_H ++#define QEMU_DIRTYRATE_H ++ ++typedef struct VcpuStat { ++ int nvcpu; /* number of vcpu */ ++ DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ ++} VcpuStat; ++ ++int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, ++ VcpuStat *stat, ++ unsigned int flag, ++ bool one_shot); ++ ++void global_dirty_log_change(unsigned int flag, ++ bool start); ++#endif +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 8043bc7946..c449095fc3 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -46,7 +46,7 @@ static struct DirtyRateStat DirtyStat; + static DirtyRateMeasureMode dirtyrate_mode = + DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING; + +-static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) ++static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time) + { + int64_t current_time; + +@@ -60,6 +60,132 @@ static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) + return msec; + } + ++static inline void record_dirtypages(DirtyPageRecord *dirty_pages, ++ CPUState *cpu, bool start) ++{ ++ if (start) { ++ dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages; ++ } else { ++ dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages; ++ } ++} ++ ++static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages, ++ int64_t calc_time_ms) ++{ ++ uint64_t memory_size_MB; ++ uint64_t increased_dirty_pages = ++ dirty_pages.end_pages - dirty_pages.start_pages; ++ ++ memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20; ++ ++ return memory_size_MB * 1000 / calc_time_ms; ++} ++ ++void global_dirty_log_change(unsigned int flag, bool start) ++{ ++ qemu_mutex_lock_iothread(); ++ if (start) { ++ memory_global_dirty_log_start(flag); ++ } else { ++ memory_global_dirty_log_stop(flag); ++ } ++ qemu_mutex_unlock_iothread(); ++} ++ ++/* ++ * global_dirty_log_sync ++ * 1. sync dirty log from kvm ++ * 2. stop dirty tracking if needed. ++ */ ++static void global_dirty_log_sync(unsigned int flag, bool one_shot) ++{ ++ qemu_mutex_lock_iothread(); ++ memory_global_dirty_log_sync(); ++ if (one_shot) { ++ memory_global_dirty_log_stop(flag); ++ } ++ qemu_mutex_unlock_iothread(); ++} ++ ++static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat) ++{ ++ CPUState *cpu; ++ DirtyPageRecord *records; ++ int nvcpu = 0; ++ ++ CPU_FOREACH(cpu) { ++ nvcpu++; ++ } ++ ++ stat->nvcpu = nvcpu; ++ stat->rates = g_malloc0(sizeof(DirtyRateVcpu) * nvcpu); ++ ++ records = g_malloc0(sizeof(DirtyPageRecord) * nvcpu); ++ ++ return records; ++} ++ ++static void vcpu_dirty_stat_collect(VcpuStat *stat, ++ DirtyPageRecord *records, ++ bool start) ++{ ++ CPUState *cpu; ++ ++ CPU_FOREACH(cpu) { ++ record_dirtypages(records, cpu, start); ++ } ++} ++ ++int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, ++ VcpuStat *stat, ++ unsigned int flag, ++ bool one_shot) ++{ ++ DirtyPageRecord *records; ++ int64_t init_time_ms; ++ int64_t duration; ++ int64_t dirtyrate; ++ int i = 0; ++ unsigned int gen_id; ++ ++retry: ++ init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ ++ cpu_list_lock(); ++ gen_id = cpu_list_generation_id_get(); ++ records = vcpu_dirty_stat_alloc(stat); ++ vcpu_dirty_stat_collect(stat, records, true); ++ cpu_list_unlock(); ++ ++ duration = dirty_stat_wait(calc_time_ms, init_time_ms); ++ ++ global_dirty_log_sync(flag, one_shot); ++ ++ cpu_list_lock(); ++ if (gen_id != cpu_list_generation_id_get()) { ++ g_free(records); ++ g_free(stat->rates); ++ cpu_list_unlock(); ++ goto retry; ++ } ++ vcpu_dirty_stat_collect(stat, records, false); ++ cpu_list_unlock(); ++ ++ for (i = 0; i < stat->nvcpu; i++) { ++ dirtyrate = do_calculate_dirtyrate(records[i], duration); ++ ++ stat->rates[i].id = i; ++ stat->rates[i].dirty_rate = dirtyrate; ++ ++ trace_dirtyrate_do_calculate_vcpu(i, dirtyrate); ++ } ++ ++ g_free(records); ++ ++ return duration; ++} ++ + static bool is_sample_period_valid(int64_t sec) + { + if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || +@@ -396,44 +522,6 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, + return true; + } + +-static inline void record_dirtypages(DirtyPageRecord *dirty_pages, +- CPUState *cpu, bool start) +-{ +- if (start) { +- dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages; +- } else { +- dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages; +- } +-} +- +-static void dirtyrate_global_dirty_log_start(void) +-{ +- qemu_mutex_lock_iothread(); +- memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); +- qemu_mutex_unlock_iothread(); +-} +- +-static void dirtyrate_global_dirty_log_stop(void) +-{ +- qemu_mutex_lock_iothread(); +- memory_global_dirty_log_sync(); +- memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE); +- qemu_mutex_unlock_iothread(); +-} +- +-static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages) +-{ +- uint64_t memory_size_MB; +- int64_t time_s; +- uint64_t increased_dirty_pages = +- dirty_pages.end_pages - dirty_pages.start_pages; +- +- memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20; +- time_s = DirtyStat.calc_time; +- +- return memory_size_MB / time_s; +-} +- + static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages, + bool start) + { +@@ -444,11 +532,6 @@ static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages, + } + } + +-static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages) +-{ +- DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages); +-} +- + static inline void dirtyrate_manual_reset_protect(void) + { + RAMBlock *block = NULL; +@@ -492,71 +575,49 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) + DirtyStat.start_time = start_time / 1000; + + msec = config.sample_period_seconds * 1000; +- msec = set_sample_page_period(msec, start_time); ++ msec = dirty_stat_wait(msec, start_time); + DirtyStat.calc_time = msec / 1000; + + /* +- * dirtyrate_global_dirty_log_stop do two things. ++ * do two things. + * 1. fetch dirty bitmap from kvm + * 2. stop dirty tracking + */ +- dirtyrate_global_dirty_log_stop(); ++ global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true); + + record_dirtypages_bitmap(&dirty_pages, false); + +- do_calculate_dirtyrate_bitmap(dirty_pages); ++ DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages, msec); + } + + static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config) + { +- CPUState *cpu; +- int64_t msec = 0; +- int64_t start_time; ++ int64_t duration; + uint64_t dirtyrate = 0; + uint64_t dirtyrate_sum = 0; +- DirtyPageRecord *dirty_pages; +- int nvcpu = 0; + int i = 0; + +- CPU_FOREACH(cpu) { +- nvcpu++; +- } +- +- dirty_pages = g_new(DirtyPageRecord, nvcpu); +- +- DirtyStat.dirty_ring.nvcpu = nvcpu; +- DirtyStat.dirty_ring.rates = g_new(DirtyRateVcpu, nvcpu); +- +- dirtyrate_global_dirty_log_start(); +- +- CPU_FOREACH(cpu) { +- record_dirtypages(dirty_pages, cpu, true); +- } +- +- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +- DirtyStat.start_time = start_time / 1000; ++ /* start log sync */ ++ global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true); + +- msec = config.sample_period_seconds * 1000; +- msec = set_sample_page_period(msec, start_time); +- DirtyStat.calc_time = msec / 1000; ++ DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; + +- dirtyrate_global_dirty_log_stop(); ++ /* calculate vcpu dirtyrate */ ++ duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000, ++ &DirtyStat.dirty_ring, ++ GLOBAL_DIRTY_DIRTY_RATE, ++ true); + +- CPU_FOREACH(cpu) { +- record_dirtypages(dirty_pages, cpu, false); +- } ++ DirtyStat.calc_time = duration / 1000; + ++ /* calculate vm dirtyrate */ + for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) { +- dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]); +- trace_dirtyrate_do_calculate_vcpu(i, dirtyrate); +- +- DirtyStat.dirty_ring.rates[i].id = i; ++ dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate; + DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate; + dirtyrate_sum += dirtyrate; + } + + DirtyStat.dirty_rate = dirtyrate_sum; +- g_free(dirty_pages); + } + + static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config) +@@ -574,7 +635,7 @@ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config) + rcu_read_unlock(); + + msec = config.sample_period_seconds * 1000; +- msec = set_sample_page_period(msec, initial_time); ++ msec = dirty_stat_wait(msec, initial_time); + DirtyStat.start_time = initial_time / 1000; + DirtyStat.calc_time = msec / 1000; + +diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h +index 69d4c5b865..594a5c0bb6 100644 +--- a/migration/dirtyrate.h ++++ b/migration/dirtyrate.h +@@ -13,6 +13,8 @@ + #ifndef QEMU_MIGRATION_DIRTYRATE_H + #define QEMU_MIGRATION_DIRTYRATE_H + ++#include "sysemu/dirtyrate.h" ++ + /* + * Sample 512 pages per GB as default. + */ +@@ -65,11 +67,6 @@ typedef struct SampleVMStat { + uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ + } SampleVMStat; + +-typedef struct VcpuStat { +- int nvcpu; /* number of vcpu */ +- DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ +-} VcpuStat; +- + /* + * Store calculation statistics for each measure. + */ +-- +2.27.0 + diff --git a/migration-dirtyrate-Replace-malloc-with-g_new.patch b/migration-dirtyrate-Replace-malloc-with-g_new.patch new file mode 100644 index 0000000..9e146a4 --- /dev/null +++ b/migration-dirtyrate-Replace-malloc-with-g_new.patch @@ -0,0 +1,48 @@ +From 7cb2d342b9073ec9548202df6e1fb25fa4997d71 Mon Sep 17 00:00:00 2001 +From: jianchunfu +Date: Thu, 30 Jun 2022 11:34:50 +0000 +Subject: [PATCH] migration/dirtyrate: Replace malloc with g_new Using macro + g_new() to handling potential memory allocation failures in dirtyrate. + +--- + migration/dirtyrate.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index d65e744af9..8043bc7946 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -157,7 +157,7 @@ static void cleanup_dirtyrate_stat(struct DirtyRateConfig config) + { + /* last calc-dirty-rate qmp use dirty ring mode */ + if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) { +- free(DirtyStat.dirty_ring.rates); ++ g_free(DirtyStat.dirty_ring.rates); + DirtyStat.dirty_ring.rates = NULL; + } + } +@@ -522,10 +522,10 @@ static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config) + nvcpu++; + } + +- dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu); ++ dirty_pages = g_new(DirtyPageRecord, nvcpu); + + DirtyStat.dirty_ring.nvcpu = nvcpu; +- DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu); ++ DirtyStat.dirty_ring.rates = g_new(DirtyRateVcpu, nvcpu); + + dirtyrate_global_dirty_log_start(); + +@@ -556,7 +556,7 @@ static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config) + } + + DirtyStat.dirty_rate = dirtyrate_sum; +- free(dirty_pages); ++ g_free(dirty_pages); + } + + static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config) +-- +2.27.0 + diff --git a/qemu.spec b/qemu.spec index 65cde5f..cb108f2 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,6 +1,6 @@ Name: qemu Version: 6.2.0 -Release: 54 +Release: 55 Epoch: 10 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 @@ -317,6 +317,30 @@ Patch0304: pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch Patch0305: acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch Patch0306: hw-display-ati_2d-Fix-buffer-overflow-in-ati_2d_blt-.patch Patch0307: ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch +Patch0308: Remove-the-unused-local-variable-records.patch +Patch0309: Remove-this-redundant-return.patch +Patch0310: hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch +Patch0311: migration-dirtyrate-Replace-malloc-with-g_new.patch +Patch0312: accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch +Patch0313: cpus-Introduce-cpu_list_generation_id.patch +Patch0314: migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch +Patch0315: softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch +Patch0316: accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch +Patch0317: softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch +Patch0318: softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch +Patch0319: tests-Add-dirty-page-rate-limit-test.patch +Patch0320: linux-headers-include-missing-changes-from-5.17.patch +Patch0321: x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch +Patch0322: x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch +Patch0323: x86-Grant-AMX-permission-for-guest.patch +Patch0324: x86-Add-XFD-faulting-bit-for-state-components.patch +Patch0325: x86-Add-AMX-CPUIDs-enumeration.patch +Patch0326: x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch +Patch0327: x86-Support-XFD-and-AMX-xsave-data-migration.patch +Patch0328: target-i386-kvm-do-not-access-uninitialized-variable.patch +Patch0329: KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch +Patch0330: fix-compilation-errors-of-sw64-architecture-on-x86-p.patch +Patch0331: fixed-the-error-that-no-bios-file-soft-link-was-crea.patch BuildRequires: flex BuildRequires: gcc @@ -831,6 +855,11 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Thu Nov 03 2022 yezengruan - 10:6.2.0-55 +- support dirty restraint on vCPU +- support SPR AMX in Qemu +- fix compilation errors of sw64 + * Mon Oct 24 2022 fushanqing - 10:6.2.0-54 - add '--enable-slirp' compilation options diff --git a/softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch b/softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch new file mode 100644 index 0000000..3c56ed3 --- /dev/null +++ b/softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch @@ -0,0 +1,435 @@ +From 39d9c1f6de01abf003980f4c2fe3c08f9e6cd60c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:36 +0800 +Subject: [PATCH] softmmu/dirtylimit: Implement dirty page rate limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Implement dirtyrate calculation periodically basing on +dirty-ring and throttle virtual CPU until it reachs the quota +dirty page rate given by user. + +Introduce qmp commands "set-vcpu-dirty-limit", +"cancel-vcpu-dirty-limit", "query-vcpu-dirty-limit" +to enable, disable, query dirty page limit for virtual CPU. + +Meanwhile, introduce corresponding hmp commands +"set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit", +"info vcpu_dirty_limit" so the feature can be more usable. + +"query-vcpu-dirty-limit" success depends on enabling dirty +page rate limit, so just add it to the list of skipped +command to ensure qmp-cmd-test run successfully. + +Signed-off-by: Hyman Huang(黄勇) +Acked-by: Markus Armbruster +Reviewed-by: Peter Xu +Message-Id: <4143f26706d413dd29db0b672fe58b3d3fbe34bc.1656177590.git.huangy81@chinatelecom.cn> +Signed-off-by: Dr. David Alan Gilbert +--- + hmp-commands-info.hx | 13 +++ + hmp-commands.hx | 32 ++++++ + include/monitor/hmp.h | 3 + + qapi/migration.json | 80 +++++++++++++++ + softmmu/dirtylimit.c | 194 +++++++++++++++++++++++++++++++++++++ + tests/qtest/qmp-cmd-test.c | 2 + + 6 files changed, 324 insertions(+) + +diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx +index 407a1da800..5dd3001af0 100644 +--- a/hmp-commands-info.hx ++++ b/hmp-commands-info.hx +@@ -863,6 +863,19 @@ SRST + Display the vcpu dirty rate information. + ERST + ++ { ++ .name = "vcpu_dirty_limit", ++ .args_type = "", ++ .params = "", ++ .help = "show dirty page limit information of all vCPU", ++ .cmd = hmp_info_vcpu_dirty_limit, ++ }, ++ ++SRST ++ ``info vcpu_dirty_limit`` ++ Display the vcpu dirty page limit information. ++ERST ++ + #if defined(TARGET_I386) + { + .name = "sgx", +diff --git a/hmp-commands.hx b/hmp-commands.hx +index 70a9136ac2..5bedee2d49 100644 +--- a/hmp-commands.hx ++++ b/hmp-commands.hx +@@ -1744,3 +1744,35 @@ ERST + "\n\t\t\t -b to specify dirty bitmap as method of calculation)", + .cmd = hmp_calc_dirty_rate, + }, ++ ++SRST ++``set_vcpu_dirty_limit`` ++ Set dirty page rate limit on virtual CPU, the information about all the ++ virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit`` ++ command. ++ERST ++ ++ { ++ .name = "set_vcpu_dirty_limit", ++ .args_type = "dirty_rate:l,cpu_index:l?", ++ .params = "dirty_rate [cpu_index]", ++ .help = "set dirty page rate limit, use cpu_index to set limit" ++ "\n\t\t\t\t\t on a specified virtual cpu", ++ .cmd = hmp_set_vcpu_dirty_limit, ++ }, ++ ++SRST ++``cancel_vcpu_dirty_limit`` ++ Cancel dirty page rate limit on virtual CPU, the information about all the ++ virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit`` ++ command. ++ERST ++ ++ { ++ .name = "cancel_vcpu_dirty_limit", ++ .args_type = "cpu_index:l?", ++ .params = "[cpu_index]", ++ .help = "cancel dirty page rate limit, use cpu_index to cancel" ++ "\n\t\t\t\t\t limit on a specified virtual cpu", ++ .cmd = hmp_cancel_vcpu_dirty_limit, ++ }, +diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h +index 96d014826a..478820e54f 100644 +--- a/include/monitor/hmp.h ++++ b/include/monitor/hmp.h +@@ -131,6 +131,9 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict); + void hmp_replay_seek(Monitor *mon, const QDict *qdict); + void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict); + void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict); ++void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); ++void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); ++void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); + void hmp_human_readable_text_helper(Monitor *mon, + HumanReadableText *(*qmp_handler)(Error **)); + +diff --git a/qapi/migration.json b/qapi/migration.json +index d4ebc5f028..fee266017d 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1874,6 +1874,86 @@ + ## + { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } + ++## ++# @DirtyLimitInfo: ++# ++# Dirty page rate limit information of a virtual CPU. ++# ++# @cpu-index: index of a virtual CPU. ++# ++# @limit-rate: upper limit of dirty page rate (MB/s) for a virtual ++# CPU, 0 means unlimited. ++# ++# @current-rate: current dirty page rate (MB/s) for a virtual CPU. ++# ++# Since: 6.2 ++# ++## ++{ 'struct': 'DirtyLimitInfo', ++ 'data': { 'cpu-index': 'int', ++ 'limit-rate': 'uint64', ++ 'current-rate': 'uint64' } } ++ ++## ++# @set-vcpu-dirty-limit: ++# ++# Set the upper limit of dirty page rate for virtual CPUs. ++# ++# Requires KVM with accelerator property "dirty-ring-size" set. ++# A virtual CPU's dirty page rate is a measure of its memory load. ++# To observe dirty page rates, use @calc-dirty-rate. ++# ++# @cpu-index: index of a virtual CPU, default is all. ++# ++# @dirty-rate: upper limit of dirty page rate (MB/s) for virtual CPUs. ++# ++# Since: 6.2 ++# ++# Example: ++# {"execute": "set-vcpu-dirty-limit"} ++# "arguments": { "dirty-rate": 200, ++# "cpu-index": 1 } } ++# ++## ++{ 'command': 'set-vcpu-dirty-limit', ++ 'data': { '*cpu-index': 'int', ++ 'dirty-rate': 'uint64' } } ++ ++## ++# @cancel-vcpu-dirty-limit: ++# ++# Cancel the upper limit of dirty page rate for virtual CPUs. ++# ++# Cancel the dirty page limit for the vCPU which has been set with ++# set-vcpu-dirty-limit command. Note that this command requires ++# support from dirty ring, same as the "set-vcpu-dirty-limit". ++# ++# @cpu-index: index of a virtual CPU, default is all. ++# ++# Since: 6.2 ++# ++# Example: ++# {"execute": "cancel-vcpu-dirty-limit"} ++# "arguments": { "cpu-index": 1 } } ++# ++## ++{ 'command': 'cancel-vcpu-dirty-limit', ++ 'data': { '*cpu-index': 'int'} } ++ ++## ++# @query-vcpu-dirty-limit: ++# ++# Returns information about virtual CPU dirty page rate limits, if any. ++# ++# Since: 6.2 ++# ++# Example: ++# {"execute": "query-vcpu-dirty-limit"} ++# ++## ++{ 'command': 'query-vcpu-dirty-limit', ++ 'returns': [ 'DirtyLimitInfo' ] } ++ + ## + # @snapshot-save: + # +diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c +index e5a4f970bd..8d98cb7f2c 100644 +--- a/softmmu/dirtylimit.c ++++ b/softmmu/dirtylimit.c +@@ -14,8 +14,12 @@ + #include "qapi/error.h" + #include "qemu/main-loop.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qmp/qdict.h" ++#include "qapi/error.h" + #include "sysemu/dirtyrate.h" + #include "sysemu/dirtylimit.h" ++#include "monitor/hmp.h" ++#include "monitor/monitor.h" + #include "exec/memory.h" + #include "hw/boards.h" + #include "sysemu/kvm.h" +@@ -405,3 +409,193 @@ void dirtylimit_vcpu_execute(CPUState *cpu) + usleep(cpu->throttle_us_per_full); + } + } ++ ++static void dirtylimit_init(void) ++{ ++ dirtylimit_state_initialize(); ++ dirtylimit_change(true); ++ vcpu_dirty_rate_stat_initialize(); ++ vcpu_dirty_rate_stat_start(); ++} ++ ++static void dirtylimit_cleanup(void) ++{ ++ vcpu_dirty_rate_stat_stop(); ++ vcpu_dirty_rate_stat_finalize(); ++ dirtylimit_change(false); ++ dirtylimit_state_finalize(); ++} ++ ++void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, ++ int64_t cpu_index, ++ Error **errp) ++{ ++ if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { ++ return; ++ } ++ ++ if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { ++ error_setg(errp, "incorrect cpu index specified"); ++ return; ++ } ++ ++ if (!dirtylimit_in_service()) { ++ return; ++ } ++ ++ dirtylimit_state_lock(); ++ ++ if (has_cpu_index) { ++ dirtylimit_set_vcpu(cpu_index, 0, false); ++ } else { ++ dirtylimit_set_all(0, false); ++ } ++ ++ if (!dirtylimit_state->limited_nvcpu) { ++ dirtylimit_cleanup(); ++ } ++ ++ dirtylimit_state_unlock(); ++} ++ ++void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) ++{ ++ int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); ++ Error *err = NULL; ++ ++ qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); ++ if (err) { ++ hmp_handle_error(mon, err); ++ return; ++ } ++ ++ monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " ++ "dirty limit for virtual CPU]\n"); ++} ++ ++void qmp_set_vcpu_dirty_limit(bool has_cpu_index, ++ int64_t cpu_index, ++ uint64_t dirty_rate, ++ Error **errp) ++{ ++ if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { ++ error_setg(errp, "dirty page limit feature requires KVM with" ++ " accelerator property 'dirty-ring-size' set'"); ++ return; ++ } ++ ++ if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { ++ error_setg(errp, "incorrect cpu index specified"); ++ return; ++ } ++ ++ if (!dirty_rate) { ++ qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); ++ return; ++ } ++ ++ dirtylimit_state_lock(); ++ ++ if (!dirtylimit_in_service()) { ++ dirtylimit_init(); ++ } ++ ++ if (has_cpu_index) { ++ dirtylimit_set_vcpu(cpu_index, dirty_rate, true); ++ } else { ++ dirtylimit_set_all(dirty_rate, true); ++ } ++ ++ dirtylimit_state_unlock(); ++} ++ ++void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) ++{ ++ int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); ++ int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); ++ Error *err = NULL; ++ ++ qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); ++ if (err) { ++ hmp_handle_error(mon, err); ++ return; ++ } ++ ++ monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " ++ "dirty limit for virtual CPU]\n"); ++} ++ ++static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) ++{ ++ DirtyLimitInfo *info = NULL; ++ ++ info = g_malloc0(sizeof(*info)); ++ info->cpu_index = cpu_index; ++ info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; ++ info->current_rate = vcpu_dirty_rate_get(cpu_index); ++ ++ return info; ++} ++ ++static struct DirtyLimitInfoList *dirtylimit_query_all(void) ++{ ++ int i, index; ++ DirtyLimitInfo *info = NULL; ++ DirtyLimitInfoList *head = NULL, **tail = &head; ++ ++ dirtylimit_state_lock(); ++ ++ if (!dirtylimit_in_service()) { ++ dirtylimit_state_unlock(); ++ return NULL; ++ } ++ ++ for (i = 0; i < dirtylimit_state->max_cpus; i++) { ++ index = dirtylimit_state->states[i].cpu_index; ++ if (dirtylimit_vcpu_get_state(index)->enabled) { ++ info = dirtylimit_query_vcpu(index); ++ QAPI_LIST_APPEND(tail, info); ++ } ++ } ++ ++ dirtylimit_state_unlock(); ++ ++ return head; ++} ++ ++struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) ++{ ++ if (!dirtylimit_in_service()) { ++ return NULL; ++ } ++ ++ return dirtylimit_query_all(); ++} ++ ++void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) ++{ ++ DirtyLimitInfoList *limit, *head, *info = NULL; ++ Error *err = NULL; ++ ++ if (!dirtylimit_in_service()) { ++ monitor_printf(mon, "Dirty page limit not enabled!\n"); ++ return; ++ } ++ ++ info = qmp_query_vcpu_dirty_limit(&err); ++ if (err) { ++ hmp_handle_error(mon, err); ++ return; ++ } ++ ++ head = info; ++ for (limit = head; limit != NULL; limit = limit->next) { ++ monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," ++ " current rate %"PRIi64 " (MB/s)\n", ++ limit->value->cpu_index, ++ limit->value->limit_rate, ++ limit->value->current_rate); ++ } ++ ++ g_free(info); ++} +diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c +index 7f103ea3fd..4b216a0435 100644 +--- a/tests/qtest/qmp-cmd-test.c ++++ b/tests/qtest/qmp-cmd-test.c +@@ -110,6 +110,8 @@ static bool query_is_ignored(const char *cmd) + "query-sev-capabilities", + "query-sgx", + "query-sgx-capabilities", ++ /* Success depends on enabling dirty page rate limit */ ++ "query-vcpu-dirty-limit", + NULL + }; + int i; +-- +2.27.0 + diff --git a/softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch b/softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch new file mode 100644 index 0000000..f408a51 --- /dev/null +++ b/softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch @@ -0,0 +1,214 @@ +From 1c1049bda8e91cc6015c32fc7cc9d0f16ad46b58 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:33 +0800 +Subject: [PATCH 1/3] softmmu/dirtylimit: Implement vCPU dirtyrate calculation + periodically +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce the third method GLOBAL_DIRTY_LIMIT of dirty +tracking for calculate dirtyrate periodly for dirty page +rate limit. + +Add dirtylimit.c to implement dirtyrate calculation periodly, +which will be used for dirty page rate limit. + +Add dirtylimit.h to export util functions for dirty page rate +limit implementation. + +Signed-off-by: Hyman Huang(黄勇) +Reviewed-by: Peter Xu +Message-Id: <5d0d641bffcb9b1c4cc3e323b6dfecb36050d948.1656177590.git.huangy81@chinatelecom.cn> +Signed-off-by: Dr. David Alan Gilbert +--- + include/exec/memory.h | 5 +- + include/sysemu/dirtylimit.h | 22 +++++++ + softmmu/dirtylimit.c | 116 ++++++++++++++++++++++++++++++++++++ + softmmu/meson.build | 1 + + 4 files changed, 143 insertions(+), 1 deletion(-) + create mode 100644 include/sysemu/dirtylimit.h + create mode 100644 softmmu/dirtylimit.c + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 3e84d62e40..4326d74b95 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -69,7 +69,10 @@ static inline void fuzz_dma_read_cb(size_t addr, + /* Dirty tracking enabled because measuring dirty rate */ + #define GLOBAL_DIRTY_DIRTY_RATE (1U << 1) + +-#define GLOBAL_DIRTY_MASK (0x3) ++/* Dirty tracking enabled because dirty limit */ ++#define GLOBAL_DIRTY_LIMIT (1U << 2) ++ ++#define GLOBAL_DIRTY_MASK (0x7) + + extern unsigned int global_dirty_tracking; + +diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h +new file mode 100644 +index 0000000000..da459f03d6 +--- /dev/null ++++ b/include/sysemu/dirtylimit.h +@@ -0,0 +1,22 @@ ++/* ++ * Dirty page rate limit common functions ++ * ++ * Copyright (c) 2022 CHINA TELECOM CO.,LTD. ++ * ++ * Authors: ++ * Hyman Huang(黄勇) ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef QEMU_DIRTYRLIMIT_H ++#define QEMU_DIRTYRLIMIT_H ++ ++#define DIRTYLIMIT_CALC_TIME_MS 1000 /* 1000ms */ ++ ++int64_t vcpu_dirty_rate_get(int cpu_index); ++void vcpu_dirty_rate_stat_start(void); ++void vcpu_dirty_rate_stat_stop(void); ++void vcpu_dirty_rate_stat_initialize(void); ++void vcpu_dirty_rate_stat_finalize(void); ++#endif +diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c +new file mode 100644 +index 0000000000..ebdc064c9d +--- /dev/null ++++ b/softmmu/dirtylimit.c +@@ -0,0 +1,116 @@ ++/* ++ * Dirty page rate limit implementation code ++ * ++ * Copyright (c) 2022 CHINA TELECOM CO.,LTD. ++ * ++ * Authors: ++ * Hyman Huang(黄勇) ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "qemu/main-loop.h" ++#include "qapi/qapi-commands-migration.h" ++#include "sysemu/dirtyrate.h" ++#include "sysemu/dirtylimit.h" ++#include "exec/memory.h" ++#include "hw/boards.h" ++ ++struct { ++ VcpuStat stat; ++ bool running; ++ QemuThread thread; ++} *vcpu_dirty_rate_stat; ++ ++static void vcpu_dirty_rate_stat_collect(void) ++{ ++ VcpuStat stat; ++ int i = 0; ++ ++ /* calculate vcpu dirtyrate */ ++ vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS, ++ &stat, ++ GLOBAL_DIRTY_LIMIT, ++ false); ++ ++ for (i = 0; i < stat.nvcpu; i++) { ++ vcpu_dirty_rate_stat->stat.rates[i].id = i; ++ vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = ++ stat.rates[i].dirty_rate; ++ } ++ ++ free(stat.rates); ++} ++ ++static void *vcpu_dirty_rate_stat_thread(void *opaque) ++{ ++ rcu_register_thread(); ++ ++ /* start log sync */ ++ global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); ++ ++ while (qatomic_read(&vcpu_dirty_rate_stat->running)) { ++ vcpu_dirty_rate_stat_collect(); ++ } ++ ++ /* stop log sync */ ++ global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); ++ ++ rcu_unregister_thread(); ++ return NULL; ++} ++ ++int64_t vcpu_dirty_rate_get(int cpu_index) ++{ ++ DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; ++ return qatomic_read_i64(&rates[cpu_index].dirty_rate); ++} ++ ++void vcpu_dirty_rate_stat_start(void) ++{ ++ if (qatomic_read(&vcpu_dirty_rate_stat->running)) { ++ return; ++ } ++ ++ qatomic_set(&vcpu_dirty_rate_stat->running, 1); ++ qemu_thread_create(&vcpu_dirty_rate_stat->thread, ++ "dirtyrate-stat", ++ vcpu_dirty_rate_stat_thread, ++ NULL, ++ QEMU_THREAD_JOINABLE); ++} ++ ++void vcpu_dirty_rate_stat_stop(void) ++{ ++ qatomic_set(&vcpu_dirty_rate_stat->running, 0); ++ qemu_mutex_unlock_iothread(); ++ qemu_thread_join(&vcpu_dirty_rate_stat->thread); ++ qemu_mutex_lock_iothread(); ++} ++ ++void vcpu_dirty_rate_stat_initialize(void) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ int max_cpus = ms->smp.max_cpus; ++ ++ vcpu_dirty_rate_stat = ++ g_malloc0(sizeof(*vcpu_dirty_rate_stat)); ++ ++ vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; ++ vcpu_dirty_rate_stat->stat.rates = ++ g_malloc0(sizeof(DirtyRateVcpu) * max_cpus); ++ ++ vcpu_dirty_rate_stat->running = false; ++} ++ ++void vcpu_dirty_rate_stat_finalize(void) ++{ ++ free(vcpu_dirty_rate_stat->stat.rates); ++ vcpu_dirty_rate_stat->stat.rates = NULL; ++ ++ free(vcpu_dirty_rate_stat); ++ vcpu_dirty_rate_stat = NULL; ++} +diff --git a/softmmu/meson.build b/softmmu/meson.build +index d8e03018ab..95029a5db2 100644 +--- a/softmmu/meson.build ++++ b/softmmu/meson.build +@@ -15,6 +15,7 @@ specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files( + 'vl.c', + 'cpu-timers.c', + 'runstate-action.c', ++ 'dirtylimit.c', + )]) + + specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files( +-- +2.27.0 + diff --git a/softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch b/softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch new file mode 100644 index 0000000..b515a73 --- /dev/null +++ b/softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch @@ -0,0 +1,469 @@ +From 7b6ab56e68fb5031ea13b82743415413b1e70e71 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:35 +0800 +Subject: [PATCH 3/3] softmmu/dirtylimit: Implement virtual CPU throttle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Setup a negative feedback system when vCPU thread +handling KVM_EXIT_DIRTY_RING_FULL exit by introducing +throttle_us_per_full field in struct CPUState. Sleep +throttle_us_per_full microseconds to throttle vCPU +if dirtylimit is in service. + +Signed-off-by: Hyman Huang(黄勇) +Reviewed-by: Peter Xu +Message-Id: <977e808e03a1cef5151cae75984658b6821be618.1656177590.git.huangy81@chinatelecom.cn> +Signed-off-by: Dr. David Alan Gilbert +--- + accel/kvm/kvm-all.c | 20 ++- + include/hw/core/cpu.h | 6 + + include/sysemu/dirtylimit.h | 15 ++ + softmmu/dirtylimit.c | 291 ++++++++++++++++++++++++++++++++++++ + softmmu/trace-events | 7 + + 5 files changed, 338 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index d0c4310507..946ccb260b 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -45,6 +45,7 @@ + #include "qemu/guest-random.h" + #include "sysemu/hw_accel.h" + #include "kvm-cpus.h" ++#include "sysemu/dirtylimit.h" + + #include "hw/boards.h" + +@@ -493,6 +494,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) + cpu->kvm_state = s; + cpu->vcpu_dirty = true; + cpu->dirty_pages = 0; ++ cpu->throttle_us_per_full = 0; + + mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size < 0) { +@@ -1486,6 +1488,11 @@ static void *kvm_dirty_ring_reaper_thread(void *data) + */ + sleep(1); + ++ /* keep sleeping so that dirtylimit not be interfered by reaper */ ++ if (dirtylimit_in_service()) { ++ continue; ++ } ++ + trace_kvm_dirty_ring_reaper("wakeup"); + r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING; + +@@ -2965,8 +2972,19 @@ int kvm_cpu_exec(CPUState *cpu) + */ + trace_kvm_dirty_ring_full(cpu->cpu_index); + qemu_mutex_lock_iothread(); +- kvm_dirty_ring_reap(kvm_state, NULL); ++ /* ++ * We throttle vCPU by making it sleep once it exit from kernel ++ * due to dirty ring full. In the dirtylimit scenario, reaping ++ * all vCPUs after a single vCPU dirty ring get full result in ++ * the miss of sleep, so just reap the ring-fulled vCPU. ++ */ ++ if (dirtylimit_in_service()) { ++ kvm_dirty_ring_reap(kvm_state, cpu); ++ } else { ++ kvm_dirty_ring_reap(kvm_state, NULL); ++ } + qemu_mutex_unlock_iothread(); ++ dirtylimit_vcpu_execute(cpu); + ret = 0; + break; + case KVM_EXIT_SYSTEM_EVENT: +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index e948e81f1a..9631c1e2f6 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -411,6 +411,12 @@ struct CPUState { + */ + bool throttle_thread_scheduled; + ++ /* ++ * Sleep throttle_us_per_full microseconds once dirty ring is full ++ * if dirty page rate limit is enabled. ++ */ ++ int64_t throttle_us_per_full; ++ + bool ignore_memory_transaction_failures; + + struct hax_vcpu_state *hax_vcpu; +diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h +index da459f03d6..8d2c1f3a6b 100644 +--- a/include/sysemu/dirtylimit.h ++++ b/include/sysemu/dirtylimit.h +@@ -19,4 +19,19 @@ void vcpu_dirty_rate_stat_start(void); + void vcpu_dirty_rate_stat_stop(void); + void vcpu_dirty_rate_stat_initialize(void); + void vcpu_dirty_rate_stat_finalize(void); ++ ++void dirtylimit_state_lock(void); ++void dirtylimit_state_unlock(void); ++void dirtylimit_state_initialize(void); ++void dirtylimit_state_finalize(void); ++bool dirtylimit_in_service(void); ++bool dirtylimit_vcpu_index_valid(int cpu_index); ++void dirtylimit_process(void); ++void dirtylimit_change(bool start); ++void dirtylimit_set_vcpu(int cpu_index, ++ uint64_t quota, ++ bool enable); ++void dirtylimit_set_all(uint64_t quota, ++ bool enable); ++void dirtylimit_vcpu_execute(CPUState *cpu); + #endif +diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c +index ebdc064c9d..e5a4f970bd 100644 +--- a/softmmu/dirtylimit.c ++++ b/softmmu/dirtylimit.c +@@ -18,6 +18,26 @@ + #include "sysemu/dirtylimit.h" + #include "exec/memory.h" + #include "hw/boards.h" ++#include "sysemu/kvm.h" ++#include "trace.h" ++ ++/* ++ * Dirtylimit stop working if dirty page rate error ++ * value less than DIRTYLIMIT_TOLERANCE_RANGE ++ */ ++#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ ++/* ++ * Plus or minus vcpu sleep time linearly if dirty ++ * page rate error value percentage over ++ * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. ++ * Otherwise, plus or minus a fixed vcpu sleep time. ++ */ ++#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 ++/* ++ * Max vcpu sleep time percentage during a cycle ++ * composed of dirty ring full and sleep time. ++ */ ++#define DIRTYLIMIT_THROTTLE_PCT_MAX 99 + + struct { + VcpuStat stat; +@@ -25,6 +45,30 @@ struct { + QemuThread thread; + } *vcpu_dirty_rate_stat; + ++typedef struct VcpuDirtyLimitState { ++ int cpu_index; ++ bool enabled; ++ /* ++ * Quota dirty page rate, unit is MB/s ++ * zero if not enabled. ++ */ ++ uint64_t quota; ++} VcpuDirtyLimitState; ++ ++struct { ++ VcpuDirtyLimitState *states; ++ /* Max cpus number configured by user */ ++ int max_cpus; ++ /* Number of vcpu under dirtylimit */ ++ int limited_nvcpu; ++} *dirtylimit_state; ++ ++/* protect dirtylimit_state */ ++static QemuMutex dirtylimit_mutex; ++ ++/* dirtylimit thread quit if dirtylimit_quit is true */ ++static bool dirtylimit_quit; ++ + static void vcpu_dirty_rate_stat_collect(void) + { + VcpuStat stat; +@@ -54,6 +98,9 @@ static void *vcpu_dirty_rate_stat_thread(void *opaque) + + while (qatomic_read(&vcpu_dirty_rate_stat->running)) { + vcpu_dirty_rate_stat_collect(); ++ if (dirtylimit_in_service()) { ++ dirtylimit_process(); ++ } + } + + /* stop log sync */ +@@ -86,9 +133,11 @@ void vcpu_dirty_rate_stat_start(void) + void vcpu_dirty_rate_stat_stop(void) + { + qatomic_set(&vcpu_dirty_rate_stat->running, 0); ++ dirtylimit_state_unlock(); + qemu_mutex_unlock_iothread(); + qemu_thread_join(&vcpu_dirty_rate_stat->thread); + qemu_mutex_lock_iothread(); ++ dirtylimit_state_lock(); + } + + void vcpu_dirty_rate_stat_initialize(void) +@@ -114,3 +163,245 @@ void vcpu_dirty_rate_stat_finalize(void) + free(vcpu_dirty_rate_stat); + vcpu_dirty_rate_stat = NULL; + } ++ ++void dirtylimit_state_lock(void) ++{ ++ qemu_mutex_lock(&dirtylimit_mutex); ++} ++ ++void dirtylimit_state_unlock(void) ++{ ++ qemu_mutex_unlock(&dirtylimit_mutex); ++} ++ ++static void ++__attribute__((__constructor__)) dirtylimit_mutex_init(void) ++{ ++ qemu_mutex_init(&dirtylimit_mutex); ++} ++ ++static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) ++{ ++ return &dirtylimit_state->states[cpu_index]; ++} ++ ++void dirtylimit_state_initialize(void) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ int max_cpus = ms->smp.max_cpus; ++ int i; ++ ++ dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); ++ ++ dirtylimit_state->states = ++ g_malloc0(sizeof(VcpuDirtyLimitState) * max_cpus); ++ ++ for (i = 0; i < max_cpus; i++) { ++ dirtylimit_state->states[i].cpu_index = i; ++ } ++ ++ dirtylimit_state->max_cpus = max_cpus; ++ trace_dirtylimit_state_initialize(max_cpus); ++} ++ ++void dirtylimit_state_finalize(void) ++{ ++ free(dirtylimit_state->states); ++ dirtylimit_state->states = NULL; ++ ++ free(dirtylimit_state); ++ dirtylimit_state = NULL; ++ ++ trace_dirtylimit_state_finalize(); ++} ++ ++bool dirtylimit_in_service(void) ++{ ++ return !!dirtylimit_state; ++} ++ ++bool dirtylimit_vcpu_index_valid(int cpu_index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ++ return !(cpu_index < 0 || ++ cpu_index >= ms->smp.max_cpus); ++} ++ ++static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) ++{ ++ static uint64_t max_dirtyrate; ++ uint32_t dirty_ring_size = kvm_dirty_ring_size(); ++ uint64_t dirty_ring_size_meory_MB = ++ dirty_ring_size * TARGET_PAGE_SIZE >> 20; ++ ++ if (max_dirtyrate < dirtyrate) { ++ max_dirtyrate = dirtyrate; ++ } ++ ++ return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate; ++} ++ ++static inline bool dirtylimit_done(uint64_t quota, ++ uint64_t current) ++{ ++ uint64_t min, max; ++ ++ min = MIN(quota, current); ++ max = MAX(quota, current); ++ ++ return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; ++} ++ ++static inline bool ++dirtylimit_need_linear_adjustment(uint64_t quota, ++ uint64_t current) ++{ ++ uint64_t min, max; ++ ++ min = MIN(quota, current); ++ max = MAX(quota, current); ++ ++ return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; ++} ++ ++static void dirtylimit_set_throttle(CPUState *cpu, ++ uint64_t quota, ++ uint64_t current) ++{ ++ int64_t ring_full_time_us = 0; ++ uint64_t sleep_pct = 0; ++ uint64_t throttle_us = 0; ++ ++ if (current == 0) { ++ cpu->throttle_us_per_full = 0; ++ return; ++ } ++ ++ ring_full_time_us = dirtylimit_dirty_ring_full_time(current); ++ ++ if (dirtylimit_need_linear_adjustment(quota, current)) { ++ if (quota < current) { ++ sleep_pct = (current - quota) * 100 / current; ++ throttle_us = ++ ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); ++ cpu->throttle_us_per_full += throttle_us; ++ } else { ++ sleep_pct = (quota - current) * 100 / quota; ++ throttle_us = ++ ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); ++ cpu->throttle_us_per_full -= throttle_us; ++ } ++ ++ trace_dirtylimit_throttle_pct(cpu->cpu_index, ++ sleep_pct, ++ throttle_us); ++ } else { ++ if (quota < current) { ++ cpu->throttle_us_per_full += ring_full_time_us / 10; ++ } else { ++ cpu->throttle_us_per_full -= ring_full_time_us / 10; ++ } ++ } ++ ++ /* ++ * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), ++ * current dirty page rate may never reach the quota, we should stop ++ * increasing sleep time? ++ */ ++ cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, ++ ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); ++ ++ cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); ++} ++ ++static void dirtylimit_adjust_throttle(CPUState *cpu) ++{ ++ uint64_t quota = 0; ++ uint64_t current = 0; ++ int cpu_index = cpu->cpu_index; ++ ++ quota = dirtylimit_vcpu_get_state(cpu_index)->quota; ++ current = vcpu_dirty_rate_get(cpu_index); ++ ++ if (!dirtylimit_done(quota, current)) { ++ dirtylimit_set_throttle(cpu, quota, current); ++ } ++ ++ return; ++} ++ ++void dirtylimit_process(void) ++{ ++ CPUState *cpu; ++ ++ if (!qatomic_read(&dirtylimit_quit)) { ++ dirtylimit_state_lock(); ++ ++ if (!dirtylimit_in_service()) { ++ dirtylimit_state_unlock(); ++ return; ++ } ++ ++ CPU_FOREACH(cpu) { ++ if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { ++ continue; ++ } ++ dirtylimit_adjust_throttle(cpu); ++ } ++ dirtylimit_state_unlock(); ++ } ++} ++ ++void dirtylimit_change(bool start) ++{ ++ if (start) { ++ qatomic_set(&dirtylimit_quit, 0); ++ } else { ++ qatomic_set(&dirtylimit_quit, 1); ++ } ++} ++ ++void dirtylimit_set_vcpu(int cpu_index, ++ uint64_t quota, ++ bool enable) ++{ ++ trace_dirtylimit_set_vcpu(cpu_index, quota); ++ ++ if (enable) { ++ dirtylimit_state->states[cpu_index].quota = quota; ++ if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { ++ dirtylimit_state->limited_nvcpu++; ++ } ++ } else { ++ dirtylimit_state->states[cpu_index].quota = 0; ++ if (dirtylimit_state->states[cpu_index].enabled) { ++ dirtylimit_state->limited_nvcpu--; ++ } ++ } ++ ++ dirtylimit_state->states[cpu_index].enabled = enable; ++} ++ ++void dirtylimit_set_all(uint64_t quota, ++ bool enable) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ int max_cpus = ms->smp.max_cpus; ++ int i; ++ ++ for (i = 0; i < max_cpus; i++) { ++ dirtylimit_set_vcpu(i, quota, enable); ++ } ++} ++ ++void dirtylimit_vcpu_execute(CPUState *cpu) ++{ ++ if (dirtylimit_in_service() && ++ dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && ++ cpu->throttle_us_per_full) { ++ trace_dirtylimit_vcpu_execute(cpu->cpu_index, ++ cpu->throttle_us_per_full); ++ usleep(cpu->throttle_us_per_full); ++ } ++} +diff --git a/softmmu/trace-events b/softmmu/trace-events +index 9c88887b3c..22606dc27b 100644 +--- a/softmmu/trace-events ++++ b/softmmu/trace-events +@@ -31,3 +31,10 @@ runstate_set(int current_state, const char *current_state_str, int new_state, co + system_wakeup_request(int reason) "reason=%d" + qemu_system_shutdown_request(int reason) "reason=%d" + qemu_system_powerdown_request(void) "" ++ ++#dirtylimit.c ++dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d" ++dirtylimit_state_finalize(void) ++dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" ++dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64 ++dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us" +-- +2.27.0 + diff --git a/target-i386-kvm-do-not-access-uninitialized-variable.patch b/target-i386-kvm-do-not-access-uninitialized-variable.patch new file mode 100644 index 0000000..3af5ef7 --- /dev/null +++ b/target-i386-kvm-do-not-access-uninitialized-variable.patch @@ -0,0 +1,77 @@ +From 550d43a946b61bdadb418e0f8bef8b98e646276d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 18 Mar 2022 16:23:47 +0100 +Subject: [PATCH 09/10] target/i386: kvm: do not access uninitialized variable + on older kernels + +from mainline-v7.0.0-rc1 +commit 3ec5ad40081b14af28496198b4d08dbe13386790 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit 3ec5ad40081b ("target/i386: kvm: do not access +uninitialized variable on older kernels") + +--------------------------------------------------------- + +target/i386: kvm: do not access uninitialized variable on older kernels + +KVM support for AMX includes a new system attribute, KVM_X86_XCOMP_GUEST_SUPP. +Commit 19db68ca68 ("x86: Grant AMX permission for guest", 2022-03-15) however +did not fully consider the behavior on older kernels. First, it warns +too aggressively. Second, it invokes the KVM_GET_DEVICE_ATTR ioctl +unconditionally and then uses the "bitmask" variable, which remains +uninitialized if the ioctl fails. Third, kvm_ioctl returns -errno rather +than -1 on errors. + +While at it, explain why the ioctl is needed and KVM_GET_SUPPORTED_CPUID +is not enough. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 49fca5ea88..20e418463d 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -409,6 +409,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + } + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { ++ /* ++ * The value returned by KVM_GET_SUPPORTED_CPUID does not include ++ * features that still have to be enabled with the arch_prctl ++ * system call. QEMU needs the full value, which is retrieved ++ * with KVM_GET_DEVICE_ATTR. ++ */ + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, +@@ -417,13 +423,16 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { +- warn_report("cannot get sys attribute capabilities %d", sys_attr); ++ return ret; + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); +- if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { +- warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " +- "error: %d", rc); ++ if (rc < 0) { ++ if (rc != -ENXIO) { ++ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " ++ "error: %d", rc); ++ } ++ return ret; + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; + } else if (function == 0x80000001 && reg == R_ECX) { +-- +2.27.0 + diff --git a/tests-Add-dirty-page-rate-limit-test.patch b/tests-Add-dirty-page-rate-limit-test.patch new file mode 100644 index 0000000..12ae236 --- /dev/null +++ b/tests-Add-dirty-page-rate-limit-test.patch @@ -0,0 +1,362 @@ +From 8a0f4dcf94b280d5b7db7f604c42d088c928ac0d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= + +Date: Sun, 26 Jun 2022 01:38:37 +0800 +Subject: [PATCH] tests: Add dirty page rate limit test +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add dirty page rate limit test if kernel support dirty ring, + +The following qmp commands are covered by this test case: +"calc-dirty-rate", "query-dirty-rate", "set-vcpu-dirty-limit", +"cancel-vcpu-dirty-limit" and "query-vcpu-dirty-limit". + +Signed-off-by: Hyman Huang(黄勇) +Acked-by: Peter Xu +Message-Id: +Signed-off-by: Dr. David Alan Gilbert +--- + tests/qtest/migration-helpers.c | 22 +++ + tests/qtest/migration-helpers.h | 2 + + tests/qtest/migration-test.c | 256 ++++++++++++++++++++++++++++++++ + 3 files changed, 280 insertions(+) + +diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c +index 4ee26014b7..1e594f9cb1 100644 +--- a/tests/qtest/migration-helpers.c ++++ b/tests/qtest/migration-helpers.c +@@ -75,6 +75,28 @@ QDict *wait_command(QTestState *who, const char *command, ...) + return ret; + } + ++/* ++ * Execute the qmp command only ++ */ ++QDict *qmp_command(QTestState *who, const char *command, ...) ++{ ++ va_list ap; ++ QDict *resp, *ret; ++ ++ va_start(ap, command); ++ resp = qtest_vqmp(who, command, ap); ++ va_end(ap); ++ ++ g_assert(!qdict_haskey(resp, "error")); ++ g_assert(qdict_haskey(resp, "return")); ++ ++ ret = qdict_get_qdict(resp, "return"); ++ qobject_ref(ret); ++ qobject_unref(resp); ++ ++ return ret; ++} ++ + /* + * Send QMP command "migrate". + * Arguments are built from @fmt... (formatted like +diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h +index d63bba9630..9bc809fb75 100644 +--- a/tests/qtest/migration-helpers.h ++++ b/tests/qtest/migration-helpers.h +@@ -22,6 +22,8 @@ QDict *wait_command_fd(QTestState *who, int fd, const char *command, ...); + GCC_FMT_ATTR(2, 3) + QDict *wait_command(QTestState *who, const char *command, ...); + ++QDict *qmp_command(QTestState *who, const char *command, ...); ++ + GCC_FMT_ATTR(3, 4) + void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...); + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 7b42f6fd90..8fad247f6c 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -23,6 +23,7 @@ + #include "qapi/qapi-visit-sockets.h" + #include "qapi/qobject-input-visitor.h" + #include "qapi/qobject-output-visitor.h" ++#include "qapi/qmp/qlist.h" + + #include "migration-helpers.h" + #include "tests/migration/migration-test.h" +@@ -42,6 +43,12 @@ static bool uffd_feature_thread_id; + /* A downtime where the test really should converge */ + #define CONVERGE_DOWNTIME 1000 + ++/* ++ * Dirtylimit stop working if dirty page rate error ++ * value less than DIRTYLIMIT_TOLERANCE_RANGE ++ */ ++#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ ++ + #if defined(__linux__) + #include + #include +@@ -1394,6 +1401,253 @@ static void test_multifd_tcp_cancel(void) + test_migrate_end(from, to2, true); + } + ++static void calc_dirty_rate(QTestState *who, uint64_t calc_time) ++{ ++ qobject_unref(qmp_command(who, ++ "{ 'execute': 'calc-dirty-rate'," ++ "'arguments': { " ++ "'calc-time': %ld," ++ "'mode': 'dirty-ring' }}", ++ calc_time)); ++} ++ ++static QDict *query_dirty_rate(QTestState *who) ++{ ++ return qmp_command(who, "{ 'execute': 'query-dirty-rate' }"); ++} ++ ++static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate) ++{ ++ qobject_unref(qmp_command(who, ++ "{ 'execute': 'set-vcpu-dirty-limit'," ++ "'arguments': { " ++ "'dirty-rate': %ld } }", ++ dirtyrate)); ++} ++ ++static void cancel_vcpu_dirty_limit(QTestState *who) ++{ ++ qobject_unref(qmp_command(who, ++ "{ 'execute': 'cancel-vcpu-dirty-limit' }")); ++} ++ ++static QDict *query_vcpu_dirty_limit(QTestState *who) ++{ ++ QDict *rsp; ++ ++ rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }"); ++ g_assert(!qdict_haskey(rsp, "error")); ++ g_assert(qdict_haskey(rsp, "return")); ++ ++ return rsp; ++} ++ ++static bool calc_dirtyrate_ready(QTestState *who) ++{ ++ QDict *rsp_return; ++ gchar *status; ++ ++ rsp_return = query_dirty_rate(who); ++ g_assert(rsp_return); ++ ++ status = g_strdup(qdict_get_str(rsp_return, "status")); ++ g_assert(status); ++ ++ return g_strcmp0(status, "measuring"); ++} ++ ++static void wait_for_calc_dirtyrate_complete(QTestState *who, ++ int64_t time_s) ++{ ++ int max_try_count = 10000; ++ usleep(time_s * 1000000); ++ ++ while (!calc_dirtyrate_ready(who) && max_try_count--) { ++ usleep(1000); ++ } ++ ++ /* ++ * Set the timeout with 10 s(max_try_count * 1000us), ++ * if dirtyrate measurement not complete, fail test. ++ */ ++ g_assert_cmpint(max_try_count, !=, 0); ++} ++ ++static int64_t get_dirty_rate(QTestState *who) ++{ ++ QDict *rsp_return; ++ gchar *status; ++ QList *rates; ++ const QListEntry *entry; ++ QDict *rate; ++ int64_t dirtyrate; ++ ++ rsp_return = query_dirty_rate(who); ++ g_assert(rsp_return); ++ ++ status = g_strdup(qdict_get_str(rsp_return, "status")); ++ g_assert(status); ++ g_assert_cmpstr(status, ==, "measured"); ++ ++ rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate"); ++ g_assert(rates && !qlist_empty(rates)); ++ ++ entry = qlist_first(rates); ++ g_assert(entry); ++ ++ rate = qobject_to(QDict, qlist_entry_obj(entry)); ++ g_assert(rate); ++ ++ dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1); ++ ++ qobject_unref(rsp_return); ++ return dirtyrate; ++} ++ ++static int64_t get_limit_rate(QTestState *who) ++{ ++ QDict *rsp_return; ++ QList *rates; ++ const QListEntry *entry; ++ QDict *rate; ++ int64_t dirtyrate; ++ ++ rsp_return = query_vcpu_dirty_limit(who); ++ g_assert(rsp_return); ++ ++ rates = qdict_get_qlist(rsp_return, "return"); ++ g_assert(rates && !qlist_empty(rates)); ++ ++ entry = qlist_first(rates); ++ g_assert(entry); ++ ++ rate = qobject_to(QDict, qlist_entry_obj(entry)); ++ g_assert(rate); ++ ++ dirtyrate = qdict_get_try_int(rate, "limit-rate", -1); ++ ++ qobject_unref(rsp_return); ++ return dirtyrate; ++} ++ ++static QTestState *dirtylimit_start_vm(void) ++{ ++ QTestState *vm = NULL; ++ g_autofree gchar *cmd = NULL; ++ const char *arch = qtest_get_arch(); ++ g_autofree char *bootpath = NULL; ++ ++ assert((strcmp(arch, "x86_64") == 0)); ++ bootpath = g_strdup_printf("%s/bootsect", tmpfs); ++ assert(sizeof(x86_bootsect) == 512); ++ init_bootfile(bootpath, x86_bootsect, sizeof(x86_bootsect)); ++ ++ cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " ++ "-name dirtylimit-test,debug-threads=on " ++ "-m 150M -smp 1 " ++ "-serial file:%s/vm_serial " ++ "-drive file=%s,format=raw ", ++ tmpfs, bootpath); ++ ++ vm = qtest_init(cmd); ++ return vm; ++} ++ ++static void dirtylimit_stop_vm(QTestState *vm) ++{ ++ qtest_quit(vm); ++ cleanup("bootsect"); ++ cleanup("vm_serial"); ++} ++ ++static void test_vcpu_dirty_limit(void) ++{ ++ QTestState *vm; ++ int64_t origin_rate; ++ int64_t quota_rate; ++ int64_t rate ; ++ int max_try_count = 20; ++ int hit = 0; ++ ++ /* Start vm for vcpu dirtylimit test */ ++ vm = dirtylimit_start_vm(); ++ ++ /* Wait for the first serial output from the vm*/ ++ wait_for_serial("vm_serial"); ++ ++ /* Do dirtyrate measurement with calc time equals 1s */ ++ calc_dirty_rate(vm, 1); ++ ++ /* Sleep calc time and wait for calc dirtyrate complete */ ++ wait_for_calc_dirtyrate_complete(vm, 1); ++ ++ /* Query original dirty page rate */ ++ origin_rate = get_dirty_rate(vm); ++ ++ /* VM booted from bootsect should dirty memory steadily */ ++ assert(origin_rate != 0); ++ ++ /* Setup quota dirty page rate at half of origin */ ++ quota_rate = origin_rate / 2; ++ ++ /* Set dirtylimit */ ++ dirtylimit_set_all(vm, quota_rate); ++ ++ /* ++ * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit ++ * works literally ++ */ ++ g_assert_cmpint(quota_rate, ==, get_limit_rate(vm)); ++ ++ /* Sleep a bit to check if it take effect */ ++ usleep(2000000); ++ ++ /* ++ * Check if dirtylimit take effect realistically, set the ++ * timeout with 20 s(max_try_count * 1s), if dirtylimit ++ * doesn't take effect, fail test. ++ */ ++ while (--max_try_count) { ++ calc_dirty_rate(vm, 1); ++ wait_for_calc_dirtyrate_complete(vm, 1); ++ rate = get_dirty_rate(vm); ++ ++ /* ++ * Assume hitting if current rate is less ++ * than quota rate (within accepting error) ++ */ ++ if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { ++ hit = 1; ++ break; ++ } ++ } ++ ++ g_assert_cmpint(hit, ==, 1); ++ ++ hit = 0; ++ max_try_count = 20; ++ ++ /* Check if dirtylimit cancellation take effect */ ++ cancel_vcpu_dirty_limit(vm); ++ while (--max_try_count) { ++ calc_dirty_rate(vm, 1); ++ wait_for_calc_dirtyrate_complete(vm, 1); ++ rate = get_dirty_rate(vm); ++ ++ /* ++ * Assume dirtylimit be canceled if current rate is ++ * greater than quota rate (within accepting error) ++ */ ++ if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { ++ hit = 1; ++ break; ++ } ++ } ++ ++ g_assert_cmpint(hit, ==, 1); ++ dirtylimit_stop_vm(vm); ++} ++ + static bool kvm_dirty_ring_supported(void) + { + #if defined(__linux__) && defined(HOST_X86_64) +@@ -1483,6 +1737,8 @@ int main(int argc, char **argv) + if (kvm_dirty_ring_supported()) { + qtest_add_func("/migration/dirty_ring", + test_precopy_unix_dirty_ring); ++ qtest_add_func("/migration/vcpu_dirty_limit", ++ test_vcpu_dirty_limit); + } + + ret = g_test_run(); +-- +2.27.0 + diff --git a/x86-Add-AMX-CPUIDs-enumeration.patch b/x86-Add-AMX-CPUIDs-enumeration.patch new file mode 100644 index 0000000..ef7d5ef --- /dev/null +++ b/x86-Add-AMX-CPUIDs-enumeration.patch @@ -0,0 +1,138 @@ +From 42f96b9e73ff4a23fad56bc8fefea5e477ee95b9 Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:31 -0800 +Subject: [PATCH 06/10] x86: Add AMX CPUIDs enumeration + +from mainline-v7.0.0-rc0 +commit f21a48171cf3fa39532fc8553fd82e81b88b6474 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit f21a48171cf3 ("x86: Add AMX CPUIDs enumeration") + +---------------------------------------------- + +x86: Add AMX CPUIDs enumeration + +Add AMX primary feature bits XFD and AMX_TILE to +enumerate the CPU's AMX capability. Meanwhile, add +AMX TILE and TMUL CPUID leaf and subleaves which +exist when AMX TILE is present to provide the maximum +capability of TILE and TMUL. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-6-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 55 ++++++++++++++++++++++++++++++++++++++++--- + target/i386/kvm/kvm.c | 4 +++- + 2 files changed, 55 insertions(+), 4 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index da81e47dc3..1bc03d3eef 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -574,6 +574,18 @@ static CPUCacheInfo legacy_l3_cache = { + #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ + #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ + ++/* CPUID Leaf 0x1D constants: */ ++#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1 ++#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000 ++#define INTEL_AMX_BYTES_PER_TILE 0x400 ++#define INTEL_AMX_BYTES_PER_ROW 0x40 ++#define INTEL_AMX_TILE_MAX_NAMES 0x8 ++#define INTEL_AMX_TILE_MAX_ROWS 0x10 ++ ++/* CPUID Leaf 0x1E constants: */ ++#define INTEL_AMX_TMUL_MAX_K 0x10 ++#define INTEL_AMX_TMUL_MAX_N 0x40 ++ + void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + uint32_t vendor2, uint32_t vendor3) + { +@@ -843,8 +855,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, "serialize", NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, +- NULL, NULL, NULL, "avx512-fp16", +- NULL, NULL, "spec-ctrl", "stibp", ++ NULL, NULL, "amx-bf16", "avx512-fp16", ++ "amx-tile", "amx-int8", "spec-ctrl", "stibp", + NULL, "arch-capabilities", "core-capability", "ssbd", + }, + .cpuid = { +@@ -909,7 +921,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "xsaveopt", "xsavec", "xgetbv1", "xsaves", +- NULL, NULL, NULL, NULL, ++ "xfd", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +@@ -5605,6 +5617,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + break; + } ++ case 0x1D: { ++ /* AMX TILE */ ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { ++ break; ++ } ++ ++ if (count == 0) { ++ /* Highest numbered palette subleaf */ ++ *eax = INTEL_AMX_TILE_MAX_SUBLEAF; ++ } else if (count == 1) { ++ *eax = INTEL_AMX_TOTAL_TILE_BYTES | ++ (INTEL_AMX_BYTES_PER_TILE << 16); ++ *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16); ++ *ecx = INTEL_AMX_TILE_MAX_ROWS; ++ } ++ break; ++ } ++ case 0x1E: { ++ /* AMX TMUL */ ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { ++ break; ++ } ++ ++ if (count == 0) { ++ /* Highest numbered palette subleaf */ ++ *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8); ++ } ++ break; ++ } + case 0x40000000: + /* + * CPUID code in kvm_arch_init_vcpu() ignores stuff +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index e7f57d05a2..60ccdec5e8 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; +- case 0x14: { ++ case 0x14: ++ case 0x1d: ++ case 0x1e: { + uint32_t times; + + c->function = i; +-- +2.27.0 + diff --git a/x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch b/x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch new file mode 100644 index 0000000..d47f736 --- /dev/null +++ b/x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch @@ -0,0 +1,115 @@ +From 98f5dbc3fd8390728401528786ac94b39f0581ee Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:28 -0800 +Subject: [PATCH 03/10] x86: Add AMX XTILECFG and XTILEDATA components + +from mainline-v7.0.0-rc0 +commit 1f16764f7d4515bfd5e4ae0aae814fa280a7d0c8 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit 1f16764f7d45 ("x86: Add AMX XTILECFG and XTILEDATA components") + +------------------------------------------------------------- + +x86: Add AMX XTILECFG and XTILEDATA components + +The AMX TILECFG register and the TMMx tile data registers are +saved/restored via XSAVE, respectively in state component 17 +(64 bytes) and state component 18 (8192 bytes). + +Add AMX feature bits to x86_ext_save_areas array to set +up AMX components. Add structs that define the layout of +AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the +structs sizes. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-3-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 8 ++++++++ + target/i386/cpu.h | 18 +++++++++++++++++- + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 532ca45015..31d63be081 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { + [XSTATE_PKRU_BIT] = + { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, + .size = sizeof(XSavePKRU) }, ++ [XSTATE_XTILE_CFG_BIT] = { ++ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, ++ .size = sizeof(XSaveXTILECFG), ++ }, ++ [XSTATE_XTILE_DATA_BIT] = { ++ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, ++ .size = sizeof(XSaveXTILEDATA) ++ }, + }; + + static uint32_t xsave_area_size(uint64_t mask) +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 52330d1112..cc431b1d76 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -538,6 +538,8 @@ typedef enum X86Seg { + #define XSTATE_ZMM_Hi256_BIT 6 + #define XSTATE_Hi16_ZMM_BIT 7 + #define XSTATE_PKRU_BIT 9 ++#define XSTATE_XTILE_CFG_BIT 17 ++#define XSTATE_XTILE_DATA_BIT 18 + + #define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT) + #define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT) +@@ -846,6 +848,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) + /* AVX512_FP16 instruction */ + #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) ++/* AMX tile (two-dimensional register) */ ++#define CPUID_7_0_EDX_AMX_TILE (1U << 24) + /* Speculation Control */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ +@@ -1349,6 +1353,16 @@ typedef struct XSavePKRU { + uint32_t padding; + } XSavePKRU; + ++/* Ext. save area 17: AMX XTILECFG state */ ++typedef struct XSaveXTILECFG { ++ uint8_t xtilecfg[64]; ++} XSaveXTILECFG; ++ ++/* Ext. save area 18: AMX XTILEDATA state */ ++typedef struct XSaveXTILEDATA { ++ uint8_t xtiledata[8][1024]; ++} XSaveXTILEDATA; ++ + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); + QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); + QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); +@@ -1356,6 +1370,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40); + QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200); + QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); + QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); ++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); ++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); + + typedef struct ExtSaveArea { + uint32_t feature, bits; +@@ -1363,7 +1379,7 @@ typedef struct ExtSaveArea { + uint32_t ecx; + } ExtSaveArea; + +-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) ++#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1) + + extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT]; + +-- +2.27.0 + diff --git a/x86-Add-XFD-faulting-bit-for-state-components.patch b/x86-Add-XFD-faulting-bit-for-state-components.patch new file mode 100644 index 0000000..4b2edc2 --- /dev/null +++ b/x86-Add-XFD-faulting-bit-for-state-components.patch @@ -0,0 +1,66 @@ +From 52eed626a2200da02e67aa93c2a8d59cb529737b Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:30 -0800 +Subject: [PATCH 05/10] x86: Add XFD faulting bit for state components + +from mainline-v7.0.0-rc0 +commit 0f17f6b30f3b051f0f96ccc98c9f7f395713699f +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit 0f17f6b30f3b ("x86: Add XFD faulting bit for state +components") + +------------------------------------------------- + +x86: Add XFD faulting bit for state components + +Intel introduces XFD faulting mechanism for extended +XSAVE features to dynamically enable the features in +runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set +as 1, it indicates support for XFD faulting of this +state component. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 3 ++- + target/i386/cpu.h | 2 ++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index fb6b4c86de..da81e47dc3 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5515,7 +5515,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + *eax = esa->size; + *ebx = esa->offset; +- *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK; ++ *ecx = esa->ecx & ++ (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); + } + } + break; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 93d1c60ac1..09c725ee13 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -556,8 +556,10 @@ typedef enum X86Seg { + #define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + + #define ESA_FEATURE_ALIGN64_BIT 1 ++#define ESA_FEATURE_XFD_BIT 2 + + #define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) ++#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) + + + /* CPUID feature words */ +-- +2.27.0 + diff --git a/x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch b/x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch new file mode 100644 index 0000000..e0aede1 --- /dev/null +++ b/x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch @@ -0,0 +1,91 @@ +From ab183c656a2bee466e7c609224cddb75b80d9d6f Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:27 -0800 +Subject: [PATCH 02/10] x86: Fix the 64-byte boundary enumeration for extended + state + +from mainline-v7.0.0-rc0 +commit 131266b7565bd437127bd231563572696bb27235 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit 131266b7565b ("x86: Fix the 64-byte boundary enumeration for extended state") + +----------------------------------------------------------- + +x86: Fix the 64-byte boundary enumeration for extended state + +The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1] +indicate whether the extended state component locates +on the next 64-byte boundary following the preceding state +component when the compacted format of an XSAVE area is +used. + +Right now, they are all zero because no supported component +needed the bit to be set, but the upcoming AMX feature will +use it. Fix the subleaves value according to KVM's supported +cpuid. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 1 + + target/i386/cpu.h | 6 ++++++ + target/i386/kvm/kvm-cpu.c | 1 + + 3 files changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index d9dca1dafb..532ca45015 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5507,6 +5507,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + *eax = esa->size; + *ebx = esa->offset; ++ *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK; + } + } + break; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index d9296a9abc..52330d1112 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -549,6 +549,11 @@ typedef enum X86Seg { + #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) + #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) + ++#define ESA_FEATURE_ALIGN64_BIT 1 ++ ++#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) ++ ++ + /* CPUID feature words */ + typedef enum FeatureWord { + FEAT_1_EDX, /* CPUID[1].EDX */ +@@ -1355,6 +1360,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); + typedef struct ExtSaveArea { + uint32_t feature, bits; + uint32_t offset, size; ++ uint32_t ecx; + } ExtSaveArea; + + #define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index d95028018e..ce27d3b1df 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void) + if (sz != 0) { + assert(esa->size == sz); + esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); ++ esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX); + } + } + } +-- +2.27.0 + diff --git a/x86-Grant-AMX-permission-for-guest.patch b/x86-Grant-AMX-permission-for-guest.patch new file mode 100644 index 0000000..9ecbc46 --- /dev/null +++ b/x86-Grant-AMX-permission-for-guest.patch @@ -0,0 +1,218 @@ +From b7e588a4506ce61c13e78175c2da5b69b60af128 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Wed, 16 Feb 2022 22:04:29 -0800 +Subject: [PATCH 04/10] x86: Grant AMX permission for guest + +from mainline-v7.0.0-rc0 +commit 19db68ca68a78fa033a21d419036b6e416554564 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit 19db68ca68a7 ("x86: Grant AMX permission for guest") + +-------------------------------------------------------- + +x86: Grant AMX permission for guest + +Kernel allocates 4K xstate buffer by default. For XSAVE features +which require large state component (e.g. AMX), Linux kernel +dynamically expands the xstate buffer only after the process has +acquired the necessary permissions. Those are called dynamically- +enabled XSAVE features (or dynamic xfeatures). + +There are separate permissions for native tasks and guests. + +Qemu should request the guest permissions for dynamic xfeatures +which will be exposed to the guest. This only needs to be done +once before the first vcpu is created. + +KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to +get host side supported_xcr0 and Qemu can decide if it can request +dynamically enabled XSAVE features permission. +https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/ + +Suggested-by: Paolo Bonzini +Signed-off-by: Yang Zhong +Signed-off-by: Jing Liu +Message-Id: <20220217060434.52460-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 7 +++++ + target/i386/cpu.h | 4 +++ + target/i386/kvm/kvm-cpu.c | 12 ++++---- + target/i386/kvm/kvm.c | 57 ++++++++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 1 + + 5 files changed, 75 insertions(+), 6 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 31d63be081..fb6b4c86de 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6048,6 +6048,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + CPUX86State *env = &cpu->env; + int i; + uint64_t mask; ++ static bool request_perm; + + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + env->features[FEAT_XSAVE_COMP_LO] = 0; +@@ -6063,6 +6064,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + } + } + ++ /* Only request permission for first vcpu */ ++ if (kvm_enabled() && !request_perm) { ++ kvm_request_xsave_components(cpu, mask); ++ request_perm = true; ++ } ++ + env->features[FEAT_XSAVE_COMP_LO] = mask; + env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; + } +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index cc431b1d76..93d1c60ac1 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -550,6 +550,10 @@ typedef enum X86Seg { + #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) + #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) + #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) ++#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) ++#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) ++ ++#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + + #define ESA_FEATURE_ALIGN64_BIT 1 + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index ce27d3b1df..a35a1bf9fe 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) + static void kvm_cpu_xsave_init(void) + { + static bool first = true; +- KVMState *s = kvm_state; ++ uint32_t eax, ebx, ecx, edx; + int i; + + if (!first) { +@@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void) + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (esa->size) { +- int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX); +- if (sz != 0) { +- assert(esa->size == sz); +- esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); +- esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX); ++ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); ++ if (eax != 0) { ++ assert(esa->size == eax); ++ esa->offset = ebx; ++ esa->ecx = ecx; + } + } + } +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5a698bde19..e7f57d05a2 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -17,6 +17,7 @@ + #include "qapi/error.h" + #include + #include ++#include + + #include + #include "standard-headers/asm-x86/kvm_para.h" +@@ -347,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + struct kvm_cpuid2 *cpuid; + uint32_t ret = 0; + uint32_t cpuid_1_edx; ++ uint64_t bitmask; + + cpuid = get_supported_cpuid(s); + +@@ -404,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + if (!has_msr_arch_capabs) { + ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; + } ++ } else if (function == 0xd && index == 0 && ++ (reg == R_EAX || reg == R_EDX)) { ++ struct kvm_device_attr attr = { ++ .group = 0, ++ .attr = KVM_X86_XCOMP_GUEST_SUPP, ++ .addr = (unsigned long) &bitmask ++ }; ++ ++ bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); ++ if (!sys_attr) { ++ warn_report("cannot get sys attribute capabilities %d", sys_attr); ++ } ++ ++ int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); ++ if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { ++ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " ++ "error: %d", rc); ++ } ++ ret = (reg == R_EAX) ? bitmask : bitmask >> 32; + } else if (function == 0x80000001 && reg == R_ECX) { + /* + * It's safe to enable TOPOEXT even if it's not returned by +@@ -5050,3 +5071,39 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return !sev_es_enabled(); + } ++ ++#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 ++ ++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) ++{ ++ KVMState *s = kvm_state; ++ uint64_t supported; ++ ++ mask &= XSTATE_DYNAMIC_MASK; ++ if (!mask) { ++ return; ++ } ++ /* ++ * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. ++ * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned ++ * about them already because they are not supported features. ++ */ ++ supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); ++ supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; ++ mask &= supported; ++ ++ while (mask) { ++ int bit = ctz64(mask); ++ int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); ++ if (rc) { ++ /* ++ * Older kernel version (<5.17) do not support ++ * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return ++ * any dynamic feature from kvm_arch_get_supported_cpuid. ++ */ ++ warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " ++ "for feature bit %d", bit); ++ } ++ mask &= ~BIT_ULL(bit); ++ } ++} +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index a978509d50..4124912c20 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); + + bool kvm_enable_sgx_provisioning(KVMState *s); ++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + + #endif +-- +2.27.0 + diff --git a/x86-Support-XFD-and-AMX-xsave-data-migration.patch b/x86-Support-XFD-and-AMX-xsave-data-migration.patch new file mode 100644 index 0000000..a33ad5b --- /dev/null +++ b/x86-Support-XFD-and-AMX-xsave-data-migration.patch @@ -0,0 +1,182 @@ +From bb1b53e5d0b67d97042ea3c33b5c4c80e33809f2 Mon Sep 17 00:00:00 2001 +From: Zeng Guang +Date: Wed, 16 Feb 2022 22:04:33 -0800 +Subject: [PATCH 08/10] x86: Support XFD and AMX xsave data migration + +from mainline-v7.0.0-rc0 +commit cdec2b753b487d9e8aab028231c35d87789ea083 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit cdec2b753b48 ("x86: Support XFD and AMX xsave data +migration") + +------------------------------------------------ + +x86: Support XFD and AMX xsave data migration + +XFD(eXtended Feature Disable) allows to enable a +feature on xsave state while preventing specific +user threads from using the feature. + +Support save and restore XFD MSRs if CPUID.D.1.EAX[4] +enumerate to be valid. Likewise migrate the MSRs and +related xsave state necessarily. + +Signed-off-by: Zeng Guang +Signed-off-by: Wei Wang +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-8-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.h | 9 +++++++++ + target/i386/kvm/kvm.c | 18 +++++++++++++++++ + target/i386/machine.c | 46 +++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 73 insertions(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 74e66c352c..eaa99c302f 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -506,6 +506,9 @@ typedef enum X86Seg { + + #define MSR_VM_HSAVE_PA 0xc0010117 + ++#define MSR_IA32_XFD 0x000001c4 ++#define MSR_IA32_XFD_ERR 0x000001c5 ++ + #define MSR_IA32_BNDCFGS 0x00000d90 + #define MSR_IA32_XSS 0x00000da0 + #define MSR_IA32_UMWAIT_CONTROL 0xe1 +@@ -871,6 +874,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) + /* AVX512 BFloat16 Instruction */ + #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) ++/* XFD Extend Feature Disabled */ ++#define CPUID_D_1_EAX_XFD (1U << 4) + + /* Packets which contain IP payload have LIP values */ + #define CPUID_14_0_ECX_LIP (1U << 31) +@@ -1612,6 +1617,10 @@ typedef struct CPUX86State { + uint64_t msr_rtit_cr3_match; + uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; + ++ /* Per-VCPU XFD MSRs */ ++ uint64_t msr_xfd; ++ uint64_t msr_xfd_err; ++ + /* exception/interrupt handling */ + int error_code; + int exception_is_int; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index b0b22dcf7c..49fca5ea88 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3219,6 +3219,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + env->msr_ia32_sgxlepubkeyhash[3]); + } + ++ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD, ++ env->msr_xfd); ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, ++ env->msr_xfd_err); ++ } ++ + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ + } +@@ -3570,6 +3577,11 @@ static int kvm_get_msrs(X86CPU *cpu) + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); + } + ++ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); ++ } ++ + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); + if (ret < 0) { + return ret; +@@ -3866,6 +3878,12 @@ static int kvm_get_msrs(X86CPU *cpu) + env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = + msrs[i].data; + break; ++ case MSR_IA32_XFD: ++ env->msr_xfd = msrs[i].data; ++ break; ++ case MSR_IA32_XFD_ERR: ++ env->msr_xfd_err = msrs[i].data; ++ break; + } + } + +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 83c2b91529..3977e9d8f8 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -1455,6 +1455,48 @@ static const VMStateDescription vmstate_msr_intel_sgx = { + } + }; + ++static bool xfd_msrs_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD); ++} ++ ++static const VMStateDescription vmstate_msr_xfd = { ++ .name = "cpu/msr_xfd", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = xfd_msrs_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.msr_xfd, X86CPU), ++ VMSTATE_UINT64(env.msr_xfd_err, X86CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++#ifdef TARGET_X86_64 ++static bool amx_xtile_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE); ++} ++ ++static const VMStateDescription vmstate_amx_xtile = { ++ .name = "cpu/intel_amx_xtile", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = amx_xtile_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64), ++ VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++#endif ++ + const VMStateDescription vmstate_x86_cpu = { + .name = "cpu", + .version_id = 12, +@@ -1593,6 +1635,10 @@ const VMStateDescription vmstate_x86_cpu = { + #endif + &vmstate_msr_tsx_ctrl, + &vmstate_msr_intel_sgx, ++ &vmstate_msr_xfd, ++#ifdef TARGET_X86_64 ++ &vmstate_amx_xtile, ++#endif + NULL + } + }; +-- +2.27.0 + diff --git a/x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch b/x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch new file mode 100644 index 0000000..7331af7 --- /dev/null +++ b/x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch @@ -0,0 +1,186 @@ +From e98958c23ea5b15a8e84642c373336a8898cd63f Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:32 -0800 +Subject: [PATCH 07/10] x86: add support for KVM_CAP_XSAVE2 and AMX state + migration + +from mainline-v7.0.0-rc0 +commit e56dd3c70abb31893c61ac834109fa7a38841330 +category: feature +feature: SPR AMX support for Qemu +bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB + +Intel-SIG: commit e56dd3c70abb ("x86: add support for KVM_CAP_XSAVE2 and +AMX state migration") + +------------------------------------------------------- + +x86: add support for KVM_CAP_XSAVE2 and AMX state migration + +When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave +area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE +under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB. +Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it. + +Signed-off-by: Jing Liu +Signed-off-by: Zeng Guang +Signed-off-by: Wei Wang +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-7-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.h | 4 ++++ + target/i386/kvm/kvm.c | 42 ++++++++++++++++++++++++-------------- + target/i386/xsave_helper.c | 28 +++++++++++++++++++++++++ + 3 files changed, 59 insertions(+), 15 deletions(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 09c725ee13..74e66c352c 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1523,6 +1523,10 @@ typedef struct CPUX86State { + uint64_t opmask_regs[NB_OPMASK_REGS]; + YMMReg zmmh_regs[CPU_NB_REGS]; + ZMMReg hi16_zmm_regs[CPU_NB_REGS]; ++#ifdef TARGET_X86_64 ++ uint8_t xtilecfg[64]; ++ uint8_t xtiledata[8192]; ++#endif + + /* sysenter registers */ + uint32_t sysenter_cs; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 60ccdec5e8..b0b22dcf7c 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -123,6 +123,7 @@ static uint32_t num_architectural_pmu_gp_counters; + static uint32_t num_architectural_pmu_fixed_counters; + + static int has_xsave; ++static int has_xsave2; + static int has_xcrs; + static int has_pit_state2; + static int has_exception_payload; +@@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker; + + #define KVM_MAX_CPUID_ENTRIES 100 + ++static void kvm_init_xsave(CPUX86State *env) ++{ ++ if (has_xsave2) { ++ env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); ++ } else if (has_xsave) { ++ env->xsave_buf_len = sizeof(struct kvm_xsave); ++ } else { ++ return; ++ } ++ ++ env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); ++ memset(env->xsave_buf, 0, env->xsave_buf_len); ++ /* ++ * The allocated storage must be large enough for all of the ++ * possible XSAVE state components. ++ */ ++ assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= ++ env->xsave_buf_len); ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + struct { +@@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + + cpuid_i = 0; + ++ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); ++ + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; +@@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + if (r) { + goto fail; + } +- +- if (has_xsave) { +- env->xsave_buf_len = sizeof(struct kvm_xsave); +- env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); +- memset(env->xsave_buf, 0, env->xsave_buf_len); +- +- /* +- * The allocated storage must be large enough for all of the +- * possible XSAVE state components. +- */ +- assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) +- <= env->xsave_buf_len); +- } ++ kvm_init_xsave(env); + + max_nested_state_len = kvm_max_nested_state_length(); + if (max_nested_state_len > 0) { +@@ -3263,13 +3274,14 @@ static int kvm_get_xsave(X86CPU *cpu) + { + CPUX86State *env = &cpu->env; + void *xsave = env->xsave_buf; +- int ret; ++ int type, ret; + + if (!has_xsave) { + return kvm_get_fpu(cpu); + } + +- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); ++ type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; ++ ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); + if (ret < 0) { + return ret; + } +diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c +index ac61a96344..996e9f3bfe 100644 +--- a/target/i386/xsave_helper.c ++++ b/target/i386/xsave_helper.c +@@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) + + memcpy(pkru, &env->pkru, sizeof(env->pkru)); + } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; ++ if (e->size && e->offset) { ++ XSaveXTILECFG *tilecfg = buf + e->offset; ++ ++ memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); ++ } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; ++ if (e->size && e->offset && buflen >= e->size + e->offset) { ++ XSaveXTILEDATA *tiledata = buf + e->offset; ++ ++ memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); ++ } + #endif + } + +@@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen) + pkru = buf + e->offset; + memcpy(&env->pkru, pkru, sizeof(env->pkru)); + } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; ++ if (e->size && e->offset) { ++ const XSaveXTILECFG *tilecfg = buf + e->offset; ++ ++ memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); ++ } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; ++ if (e->size && e->offset && buflen >= e->size + e->offset) { ++ const XSaveXTILEDATA *tiledata = buf + e->offset; ++ ++ memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); ++ } + #endif + } +-- +2.27.0 + -- Gitee