From e9095b76ae8be40ed088bf155356ab11d61f9e3b Mon Sep 17 00:00:00 2001
From: yezengruan <yezengruan@huawei.com>
Date: Thu, 3 Nov 2022 19:44:12 +0800
Subject: [PATCH] Qemu update to version 6.2.0-55

- support dirty restraint on vCPU
- support SPR AMX in Qemu
- fix compilation errors of sw64

Signed-off-by: yezengruan <yezengruan@huawei.com>
---
 ...nd-invalid-CPUID-0xD-9-info-on-some-.patch | 113 +++++
 ...ve-the-unused-local-variable-records.patch |  25 +
 Remove-this-redundant-return.patch            |  25 +
 ...l-Introduce-kvm_dirty_ring_size-func.patch |  66 +++
 ...l-Refactor-per-vcpu-dirty-ring-reapi.patch | 106 ++++
 cpus-Introduce-cpu_list_generation_id.patch   |  73 +++
 ...errors-of-sw64-architecture-on-x86-p.patch |  25 +
 ...that-no-bios-file-soft-link-was-crea.patch |  28 ++
 ...k-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch |  34 ++
 ...rs-include-missing-changes-from-5.17.patch |  65 +++
 ...ate-Refactor-dirty-page-rate-calcula.patch | 399 +++++++++++++++
 ...-dirtyrate-Replace-malloc-with-g_new.patch |  48 ++
 qemu.spec                                     |  31 +-
 ...imit-Implement-dirty-page-rate-limit.patch | 435 ++++++++++++++++
 ...it-Implement-vCPU-dirtyrate-calculat.patch | 214 ++++++++
 ...limit-Implement-virtual-CPU-throttle.patch | 469 ++++++++++++++++++
 ...do-not-access-uninitialized-variable.patch |  77 +++
 tests-Add-dirty-page-rate-limit-test.patch    | 362 ++++++++++++++
 x86-Add-AMX-CPUIDs-enumeration.patch          | 138 ++++++
 ...MX-XTILECFG-and-XTILEDATA-components.patch | 115 +++++
 ...FD-faulting-bit-for-state-components.patch |  66 +++
 ...yte-boundary-enumeration-for-extende.patch |  91 ++++
 x86-Grant-AMX-permission-for-guest.patch      | 218 ++++++++
 ...ort-XFD-and-AMX-xsave-data-migration.patch | 182 +++++++
 ...for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch | 186 +++++++
 25 files changed, 3590 insertions(+), 1 deletion(-)
 create mode 100644 KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
 create mode 100644 Remove-the-unused-local-variable-records.patch
 create mode 100644 Remove-this-redundant-return.patch
 create mode 100644 accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch
 create mode 100644 accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch
 create mode 100644 cpus-Introduce-cpu_list_generation_id.patch
 create mode 100644 fix-compilation-errors-of-sw64-architecture-on-x86-p.patch
 create mode 100644 fixed-the-error-that-no-bios-file-soft-link-was-crea.patch
 create mode 100644 hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch
 create mode 100644 linux-headers-include-missing-changes-from-5.17.patch
 create mode 100644 migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch
 create mode 100644 migration-dirtyrate-Replace-malloc-with-g_new.patch
 create mode 100644 softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch
 create mode 100644 softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch
 create mode 100644 softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch
 create mode 100644 target-i386-kvm-do-not-access-uninitialized-variable.patch
 create mode 100644 tests-Add-dirty-page-rate-limit-test.patch
 create mode 100644 x86-Add-AMX-CPUIDs-enumeration.patch
 create mode 100644 x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
 create mode 100644 x86-Add-XFD-faulting-bit-for-state-components.patch
 create mode 100644 x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
 create mode 100644 x86-Grant-AMX-permission-for-guest.patch
 create mode 100644 x86-Support-XFD-and-AMX-xsave-data-migration.patch
 create mode 100644 x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch

diff --git a/KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch b/KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
new file mode 100644
index 0000000..bf01f17
--- /dev/null
+++ b/KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
@@ -0,0 +1,113 @@
+From 49cb3c9f3cc3a567ce2e6159bf27328c64b6601d Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Wed, 23 Mar 2022 12:33:25 +0100
+Subject: [PATCH 10/10] KVM: x86: workaround invalid CPUID[0xD,9] info on some
+ AMD processors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+from mainline-v7.0.0-rc2
+commit 58f7db26f21c690cf9a669c314cfd7371506084a
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit 58f7db26f21c ("KVM: x86: workaround invalid CPUID[0xD,9] info
+on some AMD processors")
+
+----------------------------------------------------------------
+
+KVM: x86: workaround invalid CPUID[0xD,9] info on some AMD processors
+
+Some AMD processors expose the PKRU extended save state even if they do not have
+the related PKU feature in CPUID.  Worse, when they do they report a size of
+64, whereas the expected size of the PKRU extended save state is 8, therefore
+the esa->size == eax assertion does not hold.
+
+The state is already ignored by KVM_GET_SUPPORTED_CPUID because it
+was not enabled in the host XCR0.  However, QEMU kvm_cpu_xsave_init()
+runs before QEMU invokes arch_prctl() to enable dynamically-enabled
+save states such as XTILEDATA, and KVM_GET_SUPPORTED_CPUID hides save
+states that have yet to be enabled.  Therefore, kvm_cpu_xsave_init()
+needs to consult the host CPUID instead of KVM_GET_SUPPORTED_CPUID,
+and dies with an assertion failure.
+
+When setting up the ExtSaveArea array to match the host, ignore features that
+KVM does not report as supported.  This will cause QEMU to skip the incorrect
+CPUID leaf instead of tripping the assertion.
+
+Closes: https://gitlab.com/qemu-project/qemu/-/issues/916
+Reported-by: Daniel P. Berrangé <berrange@redhat.com>
+Analyzed-by: Yang Zhong <yang.zhong@intel.com>
+Reported-by: Peter Krempa <pkrempa@redhat.com>
+Tested-by: Daniel P. Berrangé <berrange@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.c         |  4 ++--
+ target/i386/cpu.h         |  2 ++
+ target/i386/kvm/kvm-cpu.c | 19 ++++++++++++-------
+ 3 files changed, 16 insertions(+), 9 deletions(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index 1bc03d3eef..551b47ab1e 100644
+--- a/target/i386/cpu.c
++++ b/target/i386/cpu.c
+@@ -4973,8 +4973,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
+     return cpu_list;
+ }
+ 
+-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+-                                                   bool migratable_only)
++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
++                                            bool migratable_only)
+ {
+     FeatureWordInfo *wi = &feature_word_info[w];
+     uint64_t r = 0;
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index eaa99c302f..290f1beaea 100644
+--- a/target/i386/cpu.h
++++ b/target/i386/cpu.h
+@@ -605,6 +605,8 @@ typedef enum FeatureWord {
+ } FeatureWord;
+ 
+ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
++                                            bool migratable_only);
+ 
+ /* cpuid_features bits */
+ #define CPUID_FP87 (1U << 0)
+diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
+index a35a1bf9fe..5eb955ce9a 100644
+--- a/target/i386/kvm/kvm-cpu.c
++++ b/target/i386/kvm/kvm-cpu.c
+@@ -99,13 +99,18 @@ static void kvm_cpu_xsave_init(void)
+     for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) {
+         ExtSaveArea *esa = &x86_ext_save_areas[i];
+ 
+-        if (esa->size) {
+-            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+-            if (eax != 0) {
+-                assert(esa->size == eax);
+-                esa->offset = ebx;
+-                esa->ecx = ecx;
+-            }
++        if (!esa->size) {
++            continue;
++        }
++        if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits)
++            != esa->bits) {
++            continue;
++        }
++        host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
++        if (eax != 0) {
++            assert(esa->size == eax);
++            esa->offset = ebx;
++            esa->ecx = ecx;
+         }
+     }
+ }
+-- 
+2.27.0
+
diff --git a/Remove-the-unused-local-variable-records.patch b/Remove-the-unused-local-variable-records.patch
new file mode 100644
index 0000000..d41ab54
--- /dev/null
+++ b/Remove-the-unused-local-variable-records.patch
@@ -0,0 +1,25 @@
+From 7b859a86cbdde8bf17619c43a6d4ae687a20f003 Mon Sep 17 00:00:00 2001
+From: dinglimin <dinglimin@cmss.chinamobile.com>
+Date: Wed, 29 Jun 2022 16:26:17 +0800
+Subject: [PATCH] Remove the unused local variable "records".
+
+Signed-off-by: dinglimin <dinglimin@cmss.chinamobile.com>
+---
+ tests/migration/guestperf/engine.py | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py
+index 87a6ab2009..59fca2c70b 100644
+--- a/tests/migration/guestperf/engine.py
++++ b/tests/migration/guestperf/engine.py
+@@ -65,7 +65,6 @@ def _vcpu_timing(self, pid, tid_list):
+         return records
+ 
+     def _cpu_timing(self, pid):
+-        records = []
+         now = time.time()
+ 
+         jiffies_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
+-- 
+2.27.0
+
diff --git a/Remove-this-redundant-return.patch b/Remove-this-redundant-return.patch
new file mode 100644
index 0000000..4d028bd
--- /dev/null
+++ b/Remove-this-redundant-return.patch
@@ -0,0 +1,25 @@
+From e7ef56975af8553690afb16f32fe74d62762b853 Mon Sep 17 00:00:00 2001
+From: dinglimin <dinglimin@cmss.chinamobile.com>
+Date: Wed, 29 Jun 2022 14:02:59 +0800
+Subject: [PATCH] Remove this redundant return.
+
+Signed-off-by: dinglimin <dinglimin@cmss.chinamobile.com>
+---
+ scripts/vmstate-static-checker.py | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
+index 539ead62b4..6838bf7e7c 100755
+--- a/scripts/vmstate-static-checker.py
++++ b/scripts/vmstate-static-checker.py
+@@ -367,7 +367,6 @@ def check_machine_type(s, d):
+     if s["Name"] != d["Name"]:
+         print("Warning: checking incompatible machine types:", end=' ')
+         print("\"" + s["Name"] + "\", \"" + d["Name"] + "\"")
+-    return
+ 
+ 
+ def main():
+-- 
+2.27.0
+
diff --git a/accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch b/accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch
new file mode 100644
index 0000000..6e9274e
--- /dev/null
+++ b/accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch
@@ -0,0 +1,66 @@
+From 85583352f3bc28badd4cb336517f6a4eb440d5b0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:34 +0800
+Subject: [PATCH 2/3] accel/kvm/kvm-all: Introduce kvm_dirty_ring_size function
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce kvm_dirty_ring_size util function to help calculate
+dirty ring ful time.
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Acked-by: Peter Xu <peterx@redhat.com>
+Message-Id: <f9ce1f550bfc0e3a1f711e17b1dbc8f701700e56.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ accel/kvm/kvm-all.c    | 5 +++++
+ accel/stubs/kvm-stub.c | 5 +++++
+ include/sysemu/kvm.h   | 2 ++
+ 3 files changed, 12 insertions(+)
+
+diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
+index 3bc6eb6294..d0c4310507 100644
+--- a/accel/kvm/kvm-all.c
++++ b/accel/kvm/kvm-all.c
+@@ -2332,6 +2332,11 @@ bool kvm_dirty_ring_enabled(void)
+     return kvm_state->kvm_dirty_ring_size ? true : false;
+ }
+ 
++uint32_t kvm_dirty_ring_size(void)
++{
++    return kvm_state->kvm_dirty_ring_size;
++}
++
+ static int kvm_init(MachineState *ms)
+ {
+     MachineClass *mc = MACHINE_GET_CLASS(ms);
+diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
+index 5319573e00..1128cb2928 100644
+--- a/accel/stubs/kvm-stub.c
++++ b/accel/stubs/kvm-stub.c
+@@ -152,4 +152,9 @@ bool kvm_dirty_ring_enabled(void)
+ {
+     return false;
+ }
++
++uint32_t kvm_dirty_ring_size(void)
++{
++    return 0;
++}
+ #endif
+diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
+index 2623775c27..19c5c8402a 100644
+--- a/include/sysemu/kvm.h
++++ b/include/sysemu/kvm.h
+@@ -549,4 +549,6 @@ bool kvm_cpu_check_are_resettable(void);
+ bool kvm_arch_cpu_check_are_resettable(void);
+ 
+ bool kvm_dirty_ring_enabled(void);
++
++uint32_t kvm_dirty_ring_size(void);
+ #endif
+-- 
+2.27.0
+
diff --git a/accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch b/accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch
new file mode 100644
index 0000000..bec0b4a
--- /dev/null
+++ b/accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch
@@ -0,0 +1,106 @@
+From c6f781e50e75fc2e6b819291b6c5ce6c212f018b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:30 +0800
+Subject: [PATCH 1/3] accel/kvm/kvm-all: Refactor per-vcpu dirty ring reaping
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add a non-required argument 'CPUState' to kvm_dirty_ring_reap so
+that it can cover single vcpu dirty-ring-reaping scenario.
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <c32001242875e83b0d9f78f396fe2dcd380ba9e8.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ accel/kvm/kvm-all.c | 23 +++++++++++++----------
+ 1 file changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
+index f2ce5cd45a..3bc6eb6294 100644
+--- a/accel/kvm/kvm-all.c
++++ b/accel/kvm/kvm-all.c
+@@ -773,17 +773,20 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu)
+ }
+ 
+ /* Must be with slots_lock held */
+-static uint64_t kvm_dirty_ring_reap_locked(KVMState *s)
++static uint64_t kvm_dirty_ring_reap_locked(KVMState *s, CPUState* cpu)
+ {
+     int ret;
+-    CPUState *cpu;
+     uint64_t total = 0;
+     int64_t stamp;
+ 
+     stamp = get_clock();
+ 
+-    CPU_FOREACH(cpu) {
+-        total += kvm_dirty_ring_reap_one(s, cpu);
++    if (cpu) {
++        total = kvm_dirty_ring_reap_one(s, cpu);
++    } else {
++        CPU_FOREACH(cpu) {
++            total += kvm_dirty_ring_reap_one(s, cpu);
++        }
+     }
+ 
+     if (total) {
+@@ -804,7 +807,7 @@ static uint64_t kvm_dirty_ring_reap_locked(KVMState *s)
+  * Currently for simplicity, we must hold BQL before calling this.  We can
+  * consider to drop the BQL if we're clear with all the race conditions.
+  */
+-static uint64_t kvm_dirty_ring_reap(KVMState *s)
++static uint64_t kvm_dirty_ring_reap(KVMState *s, CPUState *cpu)
+ {
+     uint64_t total;
+ 
+@@ -824,7 +827,7 @@ static uint64_t kvm_dirty_ring_reap(KVMState *s)
+      *     reset below.
+      */
+     kvm_slots_lock();
+-    total = kvm_dirty_ring_reap_locked(s);
++    total = kvm_dirty_ring_reap_locked(s, cpu);
+     kvm_slots_unlock();
+ 
+     return total;
+@@ -871,7 +874,7 @@ static void kvm_dirty_ring_flush(void)
+      * vcpus out in a synchronous way.
+      */
+     kvm_cpu_synchronize_kick_all();
+-    kvm_dirty_ring_reap(kvm_state);
++    kvm_dirty_ring_reap(kvm_state, NULL);
+     trace_kvm_dirty_ring_flush(1);
+ }
+ 
+@@ -1415,7 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
+                  * Not easy.  Let's cross the fingers until it's fixed.
+                  */
+                 if (kvm_state->kvm_dirty_ring_size) {
+-                    kvm_dirty_ring_reap_locked(kvm_state);
++                    kvm_dirty_ring_reap_locked(kvm_state, NULL);
+                 } else {
+                     kvm_slot_get_dirty_log(kvm_state, mem);
+                 }
+@@ -1487,7 +1490,7 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
+         r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
+ 
+         qemu_mutex_lock_iothread();
+-        kvm_dirty_ring_reap(s);
++        kvm_dirty_ring_reap(s, NULL);
+         qemu_mutex_unlock_iothread();
+ 
+         r->reaper_iteration++;
+@@ -2957,7 +2960,7 @@ int kvm_cpu_exec(CPUState *cpu)
+              */
+             trace_kvm_dirty_ring_full(cpu->cpu_index);
+             qemu_mutex_lock_iothread();
+-            kvm_dirty_ring_reap(kvm_state);
++            kvm_dirty_ring_reap(kvm_state, NULL);
+             qemu_mutex_unlock_iothread();
+             ret = 0;
+             break;
+-- 
+2.27.0
+
diff --git a/cpus-Introduce-cpu_list_generation_id.patch b/cpus-Introduce-cpu_list_generation_id.patch
new file mode 100644
index 0000000..23cc872
--- /dev/null
+++ b/cpus-Introduce-cpu_list_generation_id.patch
@@ -0,0 +1,73 @@
+From 6e057dd5df580f0e525d808f5476ee973280371d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:31 +0800
+Subject: [PATCH 2/3] cpus: Introduce cpu_list_generation_id
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce cpu_list_generation_id to track cpu list generation so
+that cpu hotplug/unplug can be detected during measurement of
+dirty page rate.
+
+cpu_list_generation_id could be used to detect changes of cpu
+list, which is prepared for dirty page rate measurement.
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <06e1f1362b2501a471dce796abb065b04f320fa5.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ cpus-common.c             | 8 ++++++++
+ include/exec/cpu-common.h | 1 +
+ 2 files changed, 9 insertions(+)
+
+diff --git a/cpus-common.c b/cpus-common.c
+index 6e73d3e58d..31c6415f37 100644
+--- a/cpus-common.c
++++ b/cpus-common.c
+@@ -73,6 +73,12 @@ static int cpu_get_free_index(void)
+ }
+ 
+ CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
++static unsigned int cpu_list_generation_id;
++
++unsigned int cpu_list_generation_id_get(void)
++{
++    return cpu_list_generation_id;
++}
+ 
+ void cpu_list_add(CPUState *cpu)
+ {
+@@ -84,6 +90,7 @@ void cpu_list_add(CPUState *cpu)
+         assert(!cpu_index_auto_assigned);
+     }
+     QTAILQ_INSERT_TAIL_RCU(&cpus, cpu, node);
++    cpu_list_generation_id++;
+ }
+ 
+ void cpu_list_remove(CPUState *cpu)
+@@ -96,6 +103,7 @@ void cpu_list_remove(CPUState *cpu)
+ 
+     QTAILQ_REMOVE_RCU(&cpus, cpu, node);
+     cpu->cpu_index = UNASSIGNED_CPU_INDEX;
++    cpu_list_generation_id++;
+ }
+ 
+ CPUState *qemu_get_cpu(int index)
+diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
+index 039d422bf4..cdee668f20 100644
+--- a/include/exec/cpu-common.h
++++ b/include/exec/cpu-common.h
+@@ -11,6 +11,7 @@
+ void qemu_init_cpu_list(void);
+ void cpu_list_lock(void);
+ void cpu_list_unlock(void);
++unsigned int cpu_list_generation_id_get(void);
+ 
+ void tcg_flush_softmmu_tlb(CPUState *cs);
+ 
+-- 
+2.27.0
+
diff --git a/fix-compilation-errors-of-sw64-architecture-on-x86-p.patch b/fix-compilation-errors-of-sw64-architecture-on-x86-p.patch
new file mode 100644
index 0000000..8f2cdc1
--- /dev/null
+++ b/fix-compilation-errors-of-sw64-architecture-on-x86-p.patch
@@ -0,0 +1,25 @@
+From 58471cd8dcf8e6a66113ddf9bb4ac45c89bbd57b Mon Sep 17 00:00:00 2001
+From: lifeng 71117973 <lif121@chinatelecom.cn>
+Date: Wed, 2 Nov 2022 11:19:55 +0800
+Subject: [PATCH 1/2] fix compilation errors of sw64 architecture on x86
+ platform
+
+---
+ target/sw64/float_helper.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/target/sw64/float_helper.c b/target/sw64/float_helper.c
+index ad1c3cce48..c8e0845afc 100644
+--- a/target/sw64/float_helper.c
++++ b/target/sw64/float_helper.c
+@@ -653,7 +653,6 @@ void helper_ieee_input(CPUSW64State *env, uint64_t val)
+ {
+ #ifndef CONFIG_USER_ONLY
+     uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+-    uint64_t frac = val & 0xfffffffffffffull;
+ 
+     if (exp == 0x7ff) {
+         /* Infinity or NaN.  */
+-- 
+2.27.0
+
diff --git a/fixed-the-error-that-no-bios-file-soft-link-was-crea.patch b/fixed-the-error-that-no-bios-file-soft-link-was-crea.patch
new file mode 100644
index 0000000..4bc85c4
--- /dev/null
+++ b/fixed-the-error-that-no-bios-file-soft-link-was-crea.patch
@@ -0,0 +1,28 @@
+From cf6be03a1f5b7595a2ecada71fa8aa30de744703 Mon Sep 17 00:00:00 2001
+From: lifeng 71117973 <lif121@chinatelecom.cn>
+Date: Wed, 2 Nov 2022 17:20:50 +0800
+Subject: [PATCH 2/2] fixed the error that no bios file soft link was created
+ in the build directory when compiling the sw64 architecture
+
+---
+ configure | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/configure b/configure
+index 9569d7a3d0..0ae7bcf065 100755
+--- a/configure
++++ b/configure
+@@ -3861,7 +3861,9 @@ for bios_file in \
+     $source_path/pc-bios/u-boot.* \
+     $source_path/pc-bios/edk2-*.fd.bz2 \
+     $source_path/pc-bios/palcode-* \
+-    $source_path/pc-bios/qemu_vga.ndrv
++    $source_path/pc-bios/qemu_vga.ndrv \
++    $source_path/pc-bios/core* \
++    $source_path/pc-bios/uefi-bios-sw
+ 
+ do
+     LINKS="$LINKS pc-bios/$(basename $bios_file)"
+-- 
+2.27.0
+
diff --git a/hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch b/hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch
new file mode 100644
index 0000000..1c874a4
--- /dev/null
+++ b/hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch
@@ -0,0 +1,34 @@
+From 4f66d261c0f20189e387de57baca17167cc542ab Mon Sep 17 00:00:00 2001
+From: Andy Pei <andy.pei@intel.com>
+Date: Mon, 3 Jan 2022 17:28:12 +0800
+Subject: [PATCH] hw/vhost-user-blk: turn on VIRTIO_BLK_F_SIZE_MAX feature for
+ virtio blk device
+
+Turn on pre-defined feature VIRTIO_BLK_F_SIZE_MAX for virtio blk device to
+avoid guest DMA request sizes which are too large for hardware spec.
+
+Signed-off-by: dinglimin <dinglimin@cmss.chinamobile.com>
+Signed-off-by: Andy Pei <andy.pei@intel.com>
+Message-Id: <1641202092-149677-1-git-send-email-andy.pei@intel.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
+---
+ hw/block/vhost-user-blk.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
+index ba13cb87e5..eb1264afc7 100644
+--- a/hw/block/vhost-user-blk.c
++++ b/hw/block/vhost-user-blk.c
+@@ -252,6 +252,7 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
+     VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ 
+     /* Turn on pre-defined features */
++    virtio_add_feature(&features, VIRTIO_BLK_F_SIZE_MAX);
+     virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
+     virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
+     virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
+-- 
+2.27.0
+
diff --git a/linux-headers-include-missing-changes-from-5.17.patch b/linux-headers-include-missing-changes-from-5.17.patch
new file mode 100644
index 0000000..1461d59
--- /dev/null
+++ b/linux-headers-include-missing-changes-from-5.17.patch
@@ -0,0 +1,65 @@
+From d6398243714a7a775c64e74dbd63c00863cb7e83 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 22 Feb 2022 17:58:11 +0100
+Subject: [PATCH 01/10] linux-headers: include missing changes from 5.17
+
+mainline inclusion
+from mainline-v7.0.0-rc0
+commit 1ea5208febcc068449b63282d72bb719ab67a466
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit 1ea5208febcc ("linux-headers: include missing changes from 5.17")
+
+------------------------------------------------
+
+linux-headers: include missing changes from 5.17
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ linux-headers/asm-x86/kvm.h | 3 +++
+ linux-headers/linux/kvm.h   | 7 +++++++
+ 2 files changed, 10 insertions(+)
+
+diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
+index a6c327f8ad..2ab4f1818a 100644
+--- a/linux-headers/asm-x86/kvm.h
++++ b/linux-headers/asm-x86/kvm.h
+@@ -437,6 +437,9 @@ struct kvm_sync_regs {
+ 
+ #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE	0x00000001
+ 
++/* attributes for system fd (group 0) */
++#define KVM_X86_XCOMP_GUEST_SUPP	0
++
+ struct kvm_vmx_nested_state_data {
+ 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
+index 5d8e42b8f8..7870cd0280 100644
+--- a/linux-headers/linux/kvm.h
++++ b/linux-headers/linux/kvm.h
+@@ -1112,6 +1112,10 @@ struct kvm_ppc_resize_hpt {
+ #define KVM_CAP_BINARY_STATS_FD 203
+ #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
+ #define KVM_CAP_ARM_MTE 205
++#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
++#define KVM_CAP_VM_GPA_BITS 207
++#define KVM_CAP_XSAVE2 208
++#define KVM_CAP_SYS_ATTRIBUTES 209
+ 
+ #define KVM_CAP_ARM_CPU_FEATURE 555
+ 
+@@ -2006,4 +2010,7 @@ struct kvm_stats_desc {
+ 
+ #define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
+ 
++/* Available with KVM_CAP_XSAVE2 */
++#define KVM_GET_XSAVE2		  _IOR(KVMIO,  0xcf, struct kvm_xsave)
++
+ #endif /* __LINUX_KVM_H */
+-- 
+2.27.0
+
diff --git a/migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch b/migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch
new file mode 100644
index 0000000..28097ef
--- /dev/null
+++ b/migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch
@@ -0,0 +1,399 @@
+From b6d1e022b7bb06faf2dcad3062b7061b59ef68a9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:32 +0800
+Subject: [PATCH 3/3] migration/dirtyrate: Refactor dirty page rate calculation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+abstract out dirty log change logic into function
+global_dirty_log_change.
+
+abstract out dirty page rate calculation logic via
+dirty-ring into function vcpu_calculate_dirtyrate.
+
+abstract out mathematical dirty page rate calculation
+into do_calculate_dirtyrate, decouple it from DirtyStat.
+
+rename set_sample_page_period to dirty_stat_wait, which
+is well-understood and will be reused in dirtylimit.
+
+handle cpu hotplug/unplug scenario during measurement of
+dirty page rate.
+
+export util functions outside migration.
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <7b6f6f4748d5b3d017b31a0429e630229ae97538.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ include/sysemu/dirtyrate.h |  28 +++++
+ migration/dirtyrate.c      | 227 +++++++++++++++++++++++--------------
+ migration/dirtyrate.h      |   7 +-
+ 3 files changed, 174 insertions(+), 88 deletions(-)
+ create mode 100644 include/sysemu/dirtyrate.h
+
+diff --git a/include/sysemu/dirtyrate.h b/include/sysemu/dirtyrate.h
+new file mode 100644
+index 0000000000..4d3b9a4902
+--- /dev/null
++++ b/include/sysemu/dirtyrate.h
+@@ -0,0 +1,28 @@
++/*
++ * dirty page rate helper functions
++ *
++ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
++ *
++ * Authors:
++ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2 or later.
++ * See the COPYING file in the top-level directory.
++ */
++
++#ifndef QEMU_DIRTYRATE_H
++#define QEMU_DIRTYRATE_H
++
++typedef struct VcpuStat {
++    int nvcpu; /* number of vcpu */
++    DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
++} VcpuStat;
++
++int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
++                                 VcpuStat *stat,
++                                 unsigned int flag,
++                                 bool one_shot);
++
++void global_dirty_log_change(unsigned int flag,
++                             bool start);
++#endif
+diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
+index 8043bc7946..c449095fc3 100644
+--- a/migration/dirtyrate.c
++++ b/migration/dirtyrate.c
+@@ -46,7 +46,7 @@ static struct DirtyRateStat DirtyStat;
+ static DirtyRateMeasureMode dirtyrate_mode =
+                 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+ 
+-static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
++static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
+ {
+     int64_t current_time;
+ 
+@@ -60,6 +60,132 @@ static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
+     return msec;
+ }
+ 
++static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
++                                     CPUState *cpu, bool start)
++{
++    if (start) {
++        dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
++    } else {
++        dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
++    }
++}
++
++static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
++                                      int64_t calc_time_ms)
++{
++    uint64_t memory_size_MB;
++    uint64_t increased_dirty_pages =
++        dirty_pages.end_pages - dirty_pages.start_pages;
++
++    memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
++
++    return memory_size_MB * 1000 / calc_time_ms;
++}
++
++void global_dirty_log_change(unsigned int flag, bool start)
++{
++    qemu_mutex_lock_iothread();
++    if (start) {
++        memory_global_dirty_log_start(flag);
++    } else {
++        memory_global_dirty_log_stop(flag);
++    }
++    qemu_mutex_unlock_iothread();
++}
++
++/*
++ * global_dirty_log_sync
++ * 1. sync dirty log from kvm
++ * 2. stop dirty tracking if needed.
++ */
++static void global_dirty_log_sync(unsigned int flag, bool one_shot)
++{
++    qemu_mutex_lock_iothread();
++    memory_global_dirty_log_sync();
++    if (one_shot) {
++        memory_global_dirty_log_stop(flag);
++    }
++    qemu_mutex_unlock_iothread();
++}
++
++static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
++{
++    CPUState *cpu;
++    DirtyPageRecord *records;
++    int nvcpu = 0;
++
++    CPU_FOREACH(cpu) {
++        nvcpu++;
++    }
++
++    stat->nvcpu = nvcpu;
++    stat->rates = g_malloc0(sizeof(DirtyRateVcpu) * nvcpu);
++
++    records = g_malloc0(sizeof(DirtyPageRecord) * nvcpu);
++
++    return records;
++}
++
++static void vcpu_dirty_stat_collect(VcpuStat *stat,
++                                    DirtyPageRecord *records,
++                                    bool start)
++{
++    CPUState *cpu;
++
++    CPU_FOREACH(cpu) {
++        record_dirtypages(records, cpu, start);
++    }
++}
++
++int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
++                                 VcpuStat *stat,
++                                 unsigned int flag,
++                                 bool one_shot)
++{
++    DirtyPageRecord *records;
++    int64_t init_time_ms;
++    int64_t duration;
++    int64_t dirtyrate;
++    int i = 0;
++    unsigned int gen_id;
++
++retry:
++    init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
++
++    cpu_list_lock();
++    gen_id = cpu_list_generation_id_get();
++    records = vcpu_dirty_stat_alloc(stat);
++    vcpu_dirty_stat_collect(stat, records, true);
++    cpu_list_unlock();
++
++    duration = dirty_stat_wait(calc_time_ms, init_time_ms);
++
++    global_dirty_log_sync(flag, one_shot);
++
++    cpu_list_lock();
++    if (gen_id != cpu_list_generation_id_get()) {
++        g_free(records);
++        g_free(stat->rates);
++        cpu_list_unlock();
++        goto retry;
++    }
++    vcpu_dirty_stat_collect(stat, records, false);
++    cpu_list_unlock();
++
++    for (i = 0; i < stat->nvcpu; i++) {
++        dirtyrate = do_calculate_dirtyrate(records[i], duration);
++
++        stat->rates[i].id = i;
++        stat->rates[i].dirty_rate = dirtyrate;
++
++        trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
++    }
++
++    g_free(records);
++
++    return duration;
++}
++
+ static bool is_sample_period_valid(int64_t sec)
+ {
+     if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC ||
+@@ -396,44 +522,6 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
+     return true;
+ }
+ 
+-static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
+-                                     CPUState *cpu, bool start)
+-{
+-    if (start) {
+-        dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
+-    } else {
+-        dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
+-    }
+-}
+-
+-static void dirtyrate_global_dirty_log_start(void)
+-{
+-    qemu_mutex_lock_iothread();
+-    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+-    qemu_mutex_unlock_iothread();
+-}
+-
+-static void dirtyrate_global_dirty_log_stop(void)
+-{
+-    qemu_mutex_lock_iothread();
+-    memory_global_dirty_log_sync();
+-    memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
+-    qemu_mutex_unlock_iothread();
+-}
+-
+-static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
+-{
+-    uint64_t memory_size_MB;
+-    int64_t time_s;
+-    uint64_t increased_dirty_pages =
+-        dirty_pages.end_pages - dirty_pages.start_pages;
+-
+-    memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
+-    time_s = DirtyStat.calc_time;
+-
+-    return memory_size_MB / time_s;
+-}
+-
+ static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
+                                             bool start)
+ {
+@@ -444,11 +532,6 @@ static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
+     }
+ }
+ 
+-static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages)
+-{
+-    DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages);
+-}
+-
+ static inline void dirtyrate_manual_reset_protect(void)
+ {
+     RAMBlock *block = NULL;
+@@ -492,71 +575,49 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
+     DirtyStat.start_time = start_time / 1000;
+ 
+     msec = config.sample_period_seconds * 1000;
+-    msec = set_sample_page_period(msec, start_time);
++    msec = dirty_stat_wait(msec, start_time);
+     DirtyStat.calc_time = msec / 1000;
+ 
+     /*
+-     * dirtyrate_global_dirty_log_stop do two things.
++     * do two things.
+      * 1. fetch dirty bitmap from kvm
+      * 2. stop dirty tracking
+      */
+-    dirtyrate_global_dirty_log_stop();
++    global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
+ 
+     record_dirtypages_bitmap(&dirty_pages, false);
+ 
+-    do_calculate_dirtyrate_bitmap(dirty_pages);
++    DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages, msec);
+ }
+ 
+ static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
+ {
+-    CPUState *cpu;
+-    int64_t msec = 0;
+-    int64_t start_time;
++    int64_t duration;
+     uint64_t dirtyrate = 0;
+     uint64_t dirtyrate_sum = 0;
+-    DirtyPageRecord *dirty_pages;
+-    int nvcpu = 0;
+     int i = 0;
+ 
+-    CPU_FOREACH(cpu) {
+-        nvcpu++;
+-    }
+-
+-    dirty_pages = g_new(DirtyPageRecord, nvcpu);
+-
+-    DirtyStat.dirty_ring.nvcpu = nvcpu;
+-    DirtyStat.dirty_ring.rates = g_new(DirtyRateVcpu, nvcpu);
+-
+-    dirtyrate_global_dirty_log_start();
+-
+-    CPU_FOREACH(cpu) {
+-        record_dirtypages(dirty_pages, cpu, true);
+-    }
+-
+-    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+-    DirtyStat.start_time = start_time / 1000;
++    /* start log sync */
++    global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
+ 
+-    msec = config.sample_period_seconds * 1000;
+-    msec = set_sample_page_period(msec, start_time);
+-    DirtyStat.calc_time = msec / 1000;
++    DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+ 
+-    dirtyrate_global_dirty_log_stop();
++    /* calculate vcpu dirtyrate */
++    duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000,
++                                        &DirtyStat.dirty_ring,
++                                        GLOBAL_DIRTY_DIRTY_RATE,
++                                        true);
+ 
+-    CPU_FOREACH(cpu) {
+-        record_dirtypages(dirty_pages, cpu, false);
+-    }
++    DirtyStat.calc_time = duration / 1000;
+ 
++    /* calculate vm dirtyrate */
+     for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+-        dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
+-        trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
+-
+-        DirtyStat.dirty_ring.rates[i].id = i;
++        dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
+         DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
+         dirtyrate_sum += dirtyrate;
+     }
+ 
+     DirtyStat.dirty_rate = dirtyrate_sum;
+-    g_free(dirty_pages);
+ }
+ 
+ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
+@@ -574,7 +635,7 @@ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
+     rcu_read_unlock();
+ 
+     msec = config.sample_period_seconds * 1000;
+-    msec = set_sample_page_period(msec, initial_time);
++    msec = dirty_stat_wait(msec, initial_time);
+     DirtyStat.start_time = initial_time / 1000;
+     DirtyStat.calc_time = msec / 1000;
+ 
+diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
+index 69d4c5b865..594a5c0bb6 100644
+--- a/migration/dirtyrate.h
++++ b/migration/dirtyrate.h
+@@ -13,6 +13,8 @@
+ #ifndef QEMU_MIGRATION_DIRTYRATE_H
+ #define QEMU_MIGRATION_DIRTYRATE_H
+ 
++#include "sysemu/dirtyrate.h"
++
+ /*
+  * Sample 512 pages per GB as default.
+  */
+@@ -65,11 +67,6 @@ typedef struct SampleVMStat {
+     uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
+ } SampleVMStat;
+ 
+-typedef struct VcpuStat {
+-    int nvcpu; /* number of vcpu */
+-    DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
+-} VcpuStat;
+-
+ /*
+  * Store calculation statistics for each measure.
+  */
+-- 
+2.27.0
+
diff --git a/migration-dirtyrate-Replace-malloc-with-g_new.patch b/migration-dirtyrate-Replace-malloc-with-g_new.patch
new file mode 100644
index 0000000..9e146a4
--- /dev/null
+++ b/migration-dirtyrate-Replace-malloc-with-g_new.patch
@@ -0,0 +1,48 @@
+From 7cb2d342b9073ec9548202df6e1fb25fa4997d71 Mon Sep 17 00:00:00 2001
+From: jianchunfu <jianchunfu_yewu@cmss.chinamobile.com>
+Date: Thu, 30 Jun 2022 11:34:50 +0000
+Subject: [PATCH] migration/dirtyrate: Replace malloc with g_new Using macro
+ g_new() to handling potential memory allocation failures in dirtyrate.
+
+---
+ migration/dirtyrate.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
+index d65e744af9..8043bc7946 100644
+--- a/migration/dirtyrate.c
++++ b/migration/dirtyrate.c
+@@ -157,7 +157,7 @@ static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
+ {
+     /* last calc-dirty-rate qmp use dirty ring mode */
+     if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+-        free(DirtyStat.dirty_ring.rates);
++        g_free(DirtyStat.dirty_ring.rates);
+         DirtyStat.dirty_ring.rates = NULL;
+     }
+ }
+@@ -522,10 +522,10 @@ static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
+         nvcpu++;
+     }
+ 
+-    dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
++    dirty_pages = g_new(DirtyPageRecord, nvcpu);
+ 
+     DirtyStat.dirty_ring.nvcpu = nvcpu;
+-    DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
++    DirtyStat.dirty_ring.rates = g_new(DirtyRateVcpu, nvcpu);
+ 
+     dirtyrate_global_dirty_log_start();
+ 
+@@ -556,7 +556,7 @@ static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
+     }
+ 
+     DirtyStat.dirty_rate = dirtyrate_sum;
+-    free(dirty_pages);
++    g_free(dirty_pages);
+ }
+ 
+ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
+-- 
+2.27.0
+
diff --git a/qemu.spec b/qemu.spec
index 65cde5f..cb108f2 100644
--- a/qemu.spec
+++ b/qemu.spec
@@ -1,6 +1,6 @@
 Name: qemu
 Version: 6.2.0
-Release: 54
+Release: 55
 Epoch: 10
 Summary: QEMU is a generic and open source machine emulator and virtualizer
 License: GPLv2 and BSD and MIT and CC-BY-SA-4.0
@@ -317,6 +317,30 @@ Patch0304: pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch
 Patch0305: acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch
 Patch0306: hw-display-ati_2d-Fix-buffer-overflow-in-ati_2d_blt-.patch
 Patch0307: ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch
+Patch0308: Remove-the-unused-local-variable-records.patch
+Patch0309: Remove-this-redundant-return.patch
+Patch0310: hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch
+Patch0311: migration-dirtyrate-Replace-malloc-with-g_new.patch
+Patch0312: accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch
+Patch0313: cpus-Introduce-cpu_list_generation_id.patch
+Patch0314: migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch
+Patch0315: softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch
+Patch0316: accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch
+Patch0317: softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch
+Patch0318: softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch
+Patch0319: tests-Add-dirty-page-rate-limit-test.patch
+Patch0320: linux-headers-include-missing-changes-from-5.17.patch
+Patch0321: x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
+Patch0322: x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
+Patch0323: x86-Grant-AMX-permission-for-guest.patch
+Patch0324: x86-Add-XFD-faulting-bit-for-state-components.patch
+Patch0325: x86-Add-AMX-CPUIDs-enumeration.patch
+Patch0326: x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
+Patch0327: x86-Support-XFD-and-AMX-xsave-data-migration.patch
+Patch0328: target-i386-kvm-do-not-access-uninitialized-variable.patch
+Patch0329: KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
+Patch0330: fix-compilation-errors-of-sw64-architecture-on-x86-p.patch
+Patch0331: fixed-the-error-that-no-bios-file-soft-link-was-crea.patch
 
 BuildRequires: flex
 BuildRequires: gcc
@@ -831,6 +855,11 @@ getent passwd qemu >/dev/null || \
 %endif
 
 %changelog
+* Thu Nov 03 2022 yezengruan <yezengruan@huawei.com> - 10:6.2.0-55
+- support dirty restraint on vCPU
+- support SPR AMX in Qemu
+- fix compilation errors of sw64
+
 * Mon Oct 24 2022 fushanqing <fushanqing@kylinos.cn> - 10:6.2.0-54
 - add '--enable-slirp' compilation options
 
diff --git a/softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch b/softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch
new file mode 100644
index 0000000..3c56ed3
--- /dev/null
+++ b/softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch
@@ -0,0 +1,435 @@
+From 39d9c1f6de01abf003980f4c2fe3c08f9e6cd60c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:36 +0800
+Subject: [PATCH] softmmu/dirtylimit: Implement dirty page rate limit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Implement dirtyrate calculation periodically basing on
+dirty-ring and throttle virtual CPU until it reachs the quota
+dirty page rate given by user.
+
+Introduce qmp commands "set-vcpu-dirty-limit",
+"cancel-vcpu-dirty-limit", "query-vcpu-dirty-limit"
+to enable, disable, query dirty page limit for virtual CPU.
+
+Meanwhile, introduce corresponding hmp commands
+"set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit",
+"info vcpu_dirty_limit" so the feature can be more usable.
+
+"query-vcpu-dirty-limit" success depends on enabling dirty
+page rate limit, so just add it to the list of skipped
+command to ensure qmp-cmd-test run successfully.
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Acked-by: Markus Armbruster <armbru@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <4143f26706d413dd29db0b672fe58b3d3fbe34bc.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ hmp-commands-info.hx       |  13 +++
+ hmp-commands.hx            |  32 ++++++
+ include/monitor/hmp.h      |   3 +
+ qapi/migration.json        |  80 +++++++++++++++
+ softmmu/dirtylimit.c       | 194 +++++++++++++++++++++++++++++++++++++
+ tests/qtest/qmp-cmd-test.c |   2 +
+ 6 files changed, 324 insertions(+)
+
+diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
+index 407a1da800..5dd3001af0 100644
+--- a/hmp-commands-info.hx
++++ b/hmp-commands-info.hx
+@@ -863,6 +863,19 @@ SRST
+     Display the vcpu dirty rate information.
+ ERST
+ 
++    {
++        .name       = "vcpu_dirty_limit",
++        .args_type  = "",
++        .params     = "",
++        .help       = "show dirty page limit information of all vCPU",
++        .cmd        = hmp_info_vcpu_dirty_limit,
++    },
++
++SRST
++  ``info vcpu_dirty_limit``
++    Display the vcpu dirty page limit information.
++ERST
++
+ #if defined(TARGET_I386)
+     {
+         .name       = "sgx",
+diff --git a/hmp-commands.hx b/hmp-commands.hx
+index 70a9136ac2..5bedee2d49 100644
+--- a/hmp-commands.hx
++++ b/hmp-commands.hx
+@@ -1744,3 +1744,35 @@ ERST
+                       "\n\t\t\t -b to specify dirty bitmap as method of calculation)",
+         .cmd        = hmp_calc_dirty_rate,
+     },
++
++SRST
++``set_vcpu_dirty_limit``
++  Set dirty page rate limit on virtual CPU, the information about all the
++  virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit``
++  command.
++ERST
++
++    {
++        .name       = "set_vcpu_dirty_limit",
++        .args_type  = "dirty_rate:l,cpu_index:l?",
++        .params     = "dirty_rate [cpu_index]",
++        .help       = "set dirty page rate limit, use cpu_index to set limit"
++                      "\n\t\t\t\t\t on a specified virtual cpu",
++        .cmd        = hmp_set_vcpu_dirty_limit,
++    },
++
++SRST
++``cancel_vcpu_dirty_limit``
++  Cancel dirty page rate limit on virtual CPU, the information about all the
++  virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit``
++  command.
++ERST
++
++    {
++        .name       = "cancel_vcpu_dirty_limit",
++        .args_type  = "cpu_index:l?",
++        .params     = "[cpu_index]",
++        .help       = "cancel dirty page rate limit, use cpu_index to cancel"
++                      "\n\t\t\t\t\t limit on a specified virtual cpu",
++        .cmd        = hmp_cancel_vcpu_dirty_limit,
++    },
+diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
+index 96d014826a..478820e54f 100644
+--- a/include/monitor/hmp.h
++++ b/include/monitor/hmp.h
+@@ -131,6 +131,9 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
+ void hmp_replay_seek(Monitor *mon, const QDict *qdict);
+ void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict);
+ void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict);
++void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
++void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
++void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
+ void hmp_human_readable_text_helper(Monitor *mon,
+                                     HumanReadableText *(*qmp_handler)(Error **));
+ 
+diff --git a/qapi/migration.json b/qapi/migration.json
+index d4ebc5f028..fee266017d 100644
+--- a/qapi/migration.json
++++ b/qapi/migration.json
+@@ -1874,6 +1874,86 @@
+ ##
+ { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
+ 
++##
++# @DirtyLimitInfo:
++#
++# Dirty page rate limit information of a virtual CPU.
++#
++# @cpu-index: index of a virtual CPU.
++#
++# @limit-rate: upper limit of dirty page rate (MB/s) for a virtual
++#              CPU, 0 means unlimited.
++#
++# @current-rate: current dirty page rate (MB/s) for a virtual CPU.
++#
++# Since: 6.2
++#
++##
++{ 'struct': 'DirtyLimitInfo',
++  'data': { 'cpu-index': 'int',
++            'limit-rate': 'uint64',
++            'current-rate': 'uint64' } }
++
++##
++# @set-vcpu-dirty-limit:
++#
++# Set the upper limit of dirty page rate for virtual CPUs.
++#
++# Requires KVM with accelerator property "dirty-ring-size" set.
++# A virtual CPU's dirty page rate is a measure of its memory load.
++# To observe dirty page rates, use @calc-dirty-rate.
++#
++# @cpu-index: index of a virtual CPU, default is all.
++#
++# @dirty-rate: upper limit of dirty page rate (MB/s) for virtual CPUs.
++#
++# Since: 6.2
++#
++# Example:
++#   {"execute": "set-vcpu-dirty-limit"}
++#    "arguments": { "dirty-rate": 200,
++#                   "cpu-index": 1 } }
++#
++##
++{ 'command': 'set-vcpu-dirty-limit',
++  'data': { '*cpu-index': 'int',
++            'dirty-rate': 'uint64' } }
++
++##
++# @cancel-vcpu-dirty-limit:
++#
++# Cancel the upper limit of dirty page rate for virtual CPUs.
++#
++# Cancel the dirty page limit for the vCPU which has been set with
++# set-vcpu-dirty-limit command. Note that this command requires
++# support from dirty ring, same as the "set-vcpu-dirty-limit".
++#
++# @cpu-index: index of a virtual CPU, default is all.
++#
++# Since: 6.2
++#
++# Example:
++#   {"execute": "cancel-vcpu-dirty-limit"}
++#    "arguments": { "cpu-index": 1 } }
++#
++##
++{ 'command': 'cancel-vcpu-dirty-limit',
++  'data': { '*cpu-index': 'int'} }
++
++##
++# @query-vcpu-dirty-limit:
++#
++# Returns information about virtual CPU dirty page rate limits, if any.
++#
++# Since: 6.2
++#
++# Example:
++#   {"execute": "query-vcpu-dirty-limit"}
++#
++##
++{ 'command': 'query-vcpu-dirty-limit',
++  'returns': [ 'DirtyLimitInfo' ] }
++
+ ##
+ # @snapshot-save:
+ #
+diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
+index e5a4f970bd..8d98cb7f2c 100644
+--- a/softmmu/dirtylimit.c
++++ b/softmmu/dirtylimit.c
+@@ -14,8 +14,12 @@
+ #include "qapi/error.h"
+ #include "qemu/main-loop.h"
+ #include "qapi/qapi-commands-migration.h"
++#include "qapi/qmp/qdict.h"
++#include "qapi/error.h"
+ #include "sysemu/dirtyrate.h"
+ #include "sysemu/dirtylimit.h"
++#include "monitor/hmp.h"
++#include "monitor/monitor.h"
+ #include "exec/memory.h"
+ #include "hw/boards.h"
+ #include "sysemu/kvm.h"
+@@ -405,3 +409,193 @@ void dirtylimit_vcpu_execute(CPUState *cpu)
+         usleep(cpu->throttle_us_per_full);
+     }
+ }
++
++static void dirtylimit_init(void)
++{
++    dirtylimit_state_initialize();
++    dirtylimit_change(true);
++    vcpu_dirty_rate_stat_initialize();
++    vcpu_dirty_rate_stat_start();
++}
++
++static void dirtylimit_cleanup(void)
++{
++    vcpu_dirty_rate_stat_stop();
++    vcpu_dirty_rate_stat_finalize();
++    dirtylimit_change(false);
++    dirtylimit_state_finalize();
++}
++
++void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
++                                 int64_t cpu_index,
++                                 Error **errp)
++{
++    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
++        return;
++    }
++
++    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
++        error_setg(errp, "incorrect cpu index specified");
++        return;
++    }
++
++    if (!dirtylimit_in_service()) {
++        return;
++    }
++
++    dirtylimit_state_lock();
++
++    if (has_cpu_index) {
++        dirtylimit_set_vcpu(cpu_index, 0, false);
++    } else {
++        dirtylimit_set_all(0, false);
++    }
++
++    if (!dirtylimit_state->limited_nvcpu) {
++        dirtylimit_cleanup();
++    }
++
++    dirtylimit_state_unlock();
++}
++
++void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
++{
++    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
++    Error *err = NULL;
++
++    qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
++    if (err) {
++        hmp_handle_error(mon, err);
++        return;
++    }
++
++    monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
++                   "dirty limit for virtual CPU]\n");
++}
++
++void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
++                              int64_t cpu_index,
++                              uint64_t dirty_rate,
++                              Error **errp)
++{
++    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
++        error_setg(errp, "dirty page limit feature requires KVM with"
++                   " accelerator property 'dirty-ring-size' set'");
++        return;
++    }
++
++    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
++        error_setg(errp, "incorrect cpu index specified");
++        return;
++    }
++
++    if (!dirty_rate) {
++        qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
++        return;
++    }
++
++    dirtylimit_state_lock();
++
++    if (!dirtylimit_in_service()) {
++        dirtylimit_init();
++    }
++
++    if (has_cpu_index) {
++        dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
++    } else {
++        dirtylimit_set_all(dirty_rate, true);
++    }
++
++    dirtylimit_state_unlock();
++}
++
++void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
++{
++    int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
++    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
++    Error *err = NULL;
++
++    qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
++    if (err) {
++        hmp_handle_error(mon, err);
++        return;
++    }
++
++    monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
++                   "dirty limit for virtual CPU]\n");
++}
++
++static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
++{
++    DirtyLimitInfo *info = NULL;
++
++    info = g_malloc0(sizeof(*info));
++    info->cpu_index = cpu_index;
++    info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
++    info->current_rate = vcpu_dirty_rate_get(cpu_index);
++
++    return info;
++}
++
++static struct DirtyLimitInfoList *dirtylimit_query_all(void)
++{
++    int i, index;
++    DirtyLimitInfo *info = NULL;
++    DirtyLimitInfoList *head = NULL, **tail = &head;
++
++    dirtylimit_state_lock();
++
++    if (!dirtylimit_in_service()) {
++        dirtylimit_state_unlock();
++        return NULL;
++    }
++
++    for (i = 0; i < dirtylimit_state->max_cpus; i++) {
++        index = dirtylimit_state->states[i].cpu_index;
++        if (dirtylimit_vcpu_get_state(index)->enabled) {
++            info = dirtylimit_query_vcpu(index);
++            QAPI_LIST_APPEND(tail, info);
++        }
++    }
++
++    dirtylimit_state_unlock();
++
++    return head;
++}
++
++struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
++{
++    if (!dirtylimit_in_service()) {
++        return NULL;
++    }
++
++    return dirtylimit_query_all();
++}
++
++void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
++{
++    DirtyLimitInfoList *limit, *head, *info = NULL;
++    Error *err = NULL;
++
++    if (!dirtylimit_in_service()) {
++        monitor_printf(mon, "Dirty page limit not enabled!\n");
++        return;
++    }
++
++    info = qmp_query_vcpu_dirty_limit(&err);
++    if (err) {
++        hmp_handle_error(mon, err);
++        return;
++    }
++
++    head = info;
++    for (limit = head; limit != NULL; limit = limit->next) {
++        monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
++                            " current rate %"PRIi64 " (MB/s)\n",
++                            limit->value->cpu_index,
++                            limit->value->limit_rate,
++                            limit->value->current_rate);
++    }
++
++    g_free(info);
++}
+diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
+index 7f103ea3fd..4b216a0435 100644
+--- a/tests/qtest/qmp-cmd-test.c
++++ b/tests/qtest/qmp-cmd-test.c
+@@ -110,6 +110,8 @@ static bool query_is_ignored(const char *cmd)
+         "query-sev-capabilities",
+         "query-sgx",
+         "query-sgx-capabilities",
++        /* Success depends on enabling dirty page rate limit */
++        "query-vcpu-dirty-limit",
+         NULL
+     };
+     int i;
+-- 
+2.27.0
+
diff --git a/softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch b/softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch
new file mode 100644
index 0000000..f408a51
--- /dev/null
+++ b/softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch
@@ -0,0 +1,214 @@
+From 1c1049bda8e91cc6015c32fc7cc9d0f16ad46b58 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:33 +0800
+Subject: [PATCH 1/3] softmmu/dirtylimit: Implement vCPU dirtyrate calculation
+ periodically
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce the third method GLOBAL_DIRTY_LIMIT of dirty
+tracking for calculate dirtyrate periodly for dirty page
+rate limit.
+
+Add dirtylimit.c to implement dirtyrate calculation periodly,
+which will be used for dirty page rate limit.
+
+Add dirtylimit.h to export util functions for dirty page rate
+limit implementation.
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <5d0d641bffcb9b1c4cc3e323b6dfecb36050d948.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ include/exec/memory.h       |   5 +-
+ include/sysemu/dirtylimit.h |  22 +++++++
+ softmmu/dirtylimit.c        | 116 ++++++++++++++++++++++++++++++++++++
+ softmmu/meson.build         |   1 +
+ 4 files changed, 143 insertions(+), 1 deletion(-)
+ create mode 100644 include/sysemu/dirtylimit.h
+ create mode 100644 softmmu/dirtylimit.c
+
+diff --git a/include/exec/memory.h b/include/exec/memory.h
+index 3e84d62e40..4326d74b95 100644
+--- a/include/exec/memory.h
++++ b/include/exec/memory.h
+@@ -69,7 +69,10 @@ static inline void fuzz_dma_read_cb(size_t addr,
+ /* Dirty tracking enabled because measuring dirty rate */
+ #define GLOBAL_DIRTY_DIRTY_RATE (1U << 1)
+ 
+-#define GLOBAL_DIRTY_MASK  (0x3)
++/* Dirty tracking enabled because dirty limit */
++#define GLOBAL_DIRTY_LIMIT      (1U << 2)
++
++#define GLOBAL_DIRTY_MASK  (0x7)
+ 
+ extern unsigned int global_dirty_tracking;
+ 
+diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
+new file mode 100644
+index 0000000000..da459f03d6
+--- /dev/null
++++ b/include/sysemu/dirtylimit.h
+@@ -0,0 +1,22 @@
++/*
++ * Dirty page rate limit common functions
++ *
++ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
++ *
++ * Authors:
++ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2 or later.
++ * See the COPYING file in the top-level directory.
++ */
++#ifndef QEMU_DIRTYRLIMIT_H
++#define QEMU_DIRTYRLIMIT_H
++
++#define DIRTYLIMIT_CALC_TIME_MS         1000    /* 1000ms */
++
++int64_t vcpu_dirty_rate_get(int cpu_index);
++void vcpu_dirty_rate_stat_start(void);
++void vcpu_dirty_rate_stat_stop(void);
++void vcpu_dirty_rate_stat_initialize(void);
++void vcpu_dirty_rate_stat_finalize(void);
++#endif
+diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
+new file mode 100644
+index 0000000000..ebdc064c9d
+--- /dev/null
++++ b/softmmu/dirtylimit.c
+@@ -0,0 +1,116 @@
++/*
++ * Dirty page rate limit implementation code
++ *
++ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
++ *
++ * Authors:
++ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2 or later.
++ * See the COPYING file in the top-level directory.
++ */
++
++#include "qemu/osdep.h"
++#include "qapi/error.h"
++#include "qemu/main-loop.h"
++#include "qapi/qapi-commands-migration.h"
++#include "sysemu/dirtyrate.h"
++#include "sysemu/dirtylimit.h"
++#include "exec/memory.h"
++#include "hw/boards.h"
++
++struct {
++    VcpuStat stat;
++    bool running;
++    QemuThread thread;
++} *vcpu_dirty_rate_stat;
++
++static void vcpu_dirty_rate_stat_collect(void)
++{
++    VcpuStat stat;
++    int i = 0;
++
++    /* calculate vcpu dirtyrate */
++    vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
++                             &stat,
++                             GLOBAL_DIRTY_LIMIT,
++                             false);
++
++    for (i = 0; i < stat.nvcpu; i++) {
++        vcpu_dirty_rate_stat->stat.rates[i].id = i;
++        vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
++            stat.rates[i].dirty_rate;
++    }
++
++    free(stat.rates);
++}
++
++static void *vcpu_dirty_rate_stat_thread(void *opaque)
++{
++    rcu_register_thread();
++
++    /* start log sync */
++    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
++
++    while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
++        vcpu_dirty_rate_stat_collect();
++    }
++
++    /* stop log sync */
++    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
++
++    rcu_unregister_thread();
++    return NULL;
++}
++
++int64_t vcpu_dirty_rate_get(int cpu_index)
++{
++    DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
++    return qatomic_read_i64(&rates[cpu_index].dirty_rate);
++}
++
++void vcpu_dirty_rate_stat_start(void)
++{
++    if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
++        return;
++    }
++
++    qatomic_set(&vcpu_dirty_rate_stat->running, 1);
++    qemu_thread_create(&vcpu_dirty_rate_stat->thread,
++                       "dirtyrate-stat",
++                       vcpu_dirty_rate_stat_thread,
++                       NULL,
++                       QEMU_THREAD_JOINABLE);
++}
++
++void vcpu_dirty_rate_stat_stop(void)
++{
++    qatomic_set(&vcpu_dirty_rate_stat->running, 0);
++    qemu_mutex_unlock_iothread();
++    qemu_thread_join(&vcpu_dirty_rate_stat->thread);
++    qemu_mutex_lock_iothread();
++}
++
++void vcpu_dirty_rate_stat_initialize(void)
++{
++    MachineState *ms = MACHINE(qdev_get_machine());
++    int max_cpus = ms->smp.max_cpus;
++
++    vcpu_dirty_rate_stat =
++        g_malloc0(sizeof(*vcpu_dirty_rate_stat));
++
++    vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
++    vcpu_dirty_rate_stat->stat.rates =
++        g_malloc0(sizeof(DirtyRateVcpu) * max_cpus);
++
++    vcpu_dirty_rate_stat->running = false;
++}
++
++void vcpu_dirty_rate_stat_finalize(void)
++{
++    free(vcpu_dirty_rate_stat->stat.rates);
++    vcpu_dirty_rate_stat->stat.rates = NULL;
++
++    free(vcpu_dirty_rate_stat);
++    vcpu_dirty_rate_stat = NULL;
++}
+diff --git a/softmmu/meson.build b/softmmu/meson.build
+index d8e03018ab..95029a5db2 100644
+--- a/softmmu/meson.build
++++ b/softmmu/meson.build
+@@ -15,6 +15,7 @@ specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files(
+   'vl.c',
+   'cpu-timers.c',
+   'runstate-action.c',
++  'dirtylimit.c',
+ )])
+ 
+ specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files(
+-- 
+2.27.0
+
diff --git a/softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch b/softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch
new file mode 100644
index 0000000..b515a73
--- /dev/null
+++ b/softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch
@@ -0,0 +1,469 @@
+From 7b6ab56e68fb5031ea13b82743415413b1e70e71 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:35 +0800
+Subject: [PATCH 3/3] softmmu/dirtylimit: Implement virtual CPU throttle
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Setup a negative feedback system when vCPU thread
+handling KVM_EXIT_DIRTY_RING_FULL exit by introducing
+throttle_us_per_full field in struct CPUState. Sleep
+throttle_us_per_full microseconds to throttle vCPU
+if dirtylimit is in service.
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <977e808e03a1cef5151cae75984658b6821be618.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ accel/kvm/kvm-all.c         |  20 ++-
+ include/hw/core/cpu.h       |   6 +
+ include/sysemu/dirtylimit.h |  15 ++
+ softmmu/dirtylimit.c        | 291 ++++++++++++++++++++++++++++++++++++
+ softmmu/trace-events        |   7 +
+ 5 files changed, 338 insertions(+), 1 deletion(-)
+
+diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
+index d0c4310507..946ccb260b 100644
+--- a/accel/kvm/kvm-all.c
++++ b/accel/kvm/kvm-all.c
+@@ -45,6 +45,7 @@
+ #include "qemu/guest-random.h"
+ #include "sysemu/hw_accel.h"
+ #include "kvm-cpus.h"
++#include "sysemu/dirtylimit.h"
+ 
+ #include "hw/boards.h"
+ 
+@@ -493,6 +494,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
+     cpu->kvm_state = s;
+     cpu->vcpu_dirty = true;
+     cpu->dirty_pages = 0;
++    cpu->throttle_us_per_full = 0;
+ 
+     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
+     if (mmap_size < 0) {
+@@ -1486,6 +1488,11 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
+          */
+         sleep(1);
+ 
++        /* keep sleeping so that dirtylimit not be interfered by reaper */
++        if (dirtylimit_in_service()) {
++            continue;
++        }
++
+         trace_kvm_dirty_ring_reaper("wakeup");
+         r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
+ 
+@@ -2965,8 +2972,19 @@ int kvm_cpu_exec(CPUState *cpu)
+              */
+             trace_kvm_dirty_ring_full(cpu->cpu_index);
+             qemu_mutex_lock_iothread();
+-            kvm_dirty_ring_reap(kvm_state, NULL);
++            /*
++             * We throttle vCPU by making it sleep once it exit from kernel
++             * due to dirty ring full. In the dirtylimit scenario, reaping
++             * all vCPUs after a single vCPU dirty ring get full result in
++             * the miss of sleep, so just reap the ring-fulled vCPU.
++             */
++            if (dirtylimit_in_service()) {
++                kvm_dirty_ring_reap(kvm_state, cpu);
++            } else {
++                kvm_dirty_ring_reap(kvm_state, NULL);
++            }
+             qemu_mutex_unlock_iothread();
++            dirtylimit_vcpu_execute(cpu);
+             ret = 0;
+             break;
+         case KVM_EXIT_SYSTEM_EVENT:
+diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
+index e948e81f1a..9631c1e2f6 100644
+--- a/include/hw/core/cpu.h
++++ b/include/hw/core/cpu.h
+@@ -411,6 +411,12 @@ struct CPUState {
+      */
+     bool throttle_thread_scheduled;
+ 
++    /*
++     * Sleep throttle_us_per_full microseconds once dirty ring is full
++     * if dirty page rate limit is enabled.
++     */
++    int64_t throttle_us_per_full;
++
+     bool ignore_memory_transaction_failures;
+ 
+     struct hax_vcpu_state *hax_vcpu;
+diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
+index da459f03d6..8d2c1f3a6b 100644
+--- a/include/sysemu/dirtylimit.h
++++ b/include/sysemu/dirtylimit.h
+@@ -19,4 +19,19 @@ void vcpu_dirty_rate_stat_start(void);
+ void vcpu_dirty_rate_stat_stop(void);
+ void vcpu_dirty_rate_stat_initialize(void);
+ void vcpu_dirty_rate_stat_finalize(void);
++
++void dirtylimit_state_lock(void);
++void dirtylimit_state_unlock(void);
++void dirtylimit_state_initialize(void);
++void dirtylimit_state_finalize(void);
++bool dirtylimit_in_service(void);
++bool dirtylimit_vcpu_index_valid(int cpu_index);
++void dirtylimit_process(void);
++void dirtylimit_change(bool start);
++void dirtylimit_set_vcpu(int cpu_index,
++                         uint64_t quota,
++                         bool enable);
++void dirtylimit_set_all(uint64_t quota,
++                        bool enable);
++void dirtylimit_vcpu_execute(CPUState *cpu);
+ #endif
+diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
+index ebdc064c9d..e5a4f970bd 100644
+--- a/softmmu/dirtylimit.c
++++ b/softmmu/dirtylimit.c
+@@ -18,6 +18,26 @@
+ #include "sysemu/dirtylimit.h"
+ #include "exec/memory.h"
+ #include "hw/boards.h"
++#include "sysemu/kvm.h"
++#include "trace.h"
++
++/*
++ * Dirtylimit stop working if dirty page rate error
++ * value less than DIRTYLIMIT_TOLERANCE_RANGE
++ */
++#define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
++/*
++ * Plus or minus vcpu sleep time linearly if dirty
++ * page rate error value percentage over
++ * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
++ * Otherwise, plus or minus a fixed vcpu sleep time.
++ */
++#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
++/*
++ * Max vcpu sleep time percentage during a cycle
++ * composed of dirty ring full and sleep time.
++ */
++#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
+ 
+ struct {
+     VcpuStat stat;
+@@ -25,6 +45,30 @@ struct {
+     QemuThread thread;
+ } *vcpu_dirty_rate_stat;
+ 
++typedef struct VcpuDirtyLimitState {
++    int cpu_index;
++    bool enabled;
++    /*
++     * Quota dirty page rate, unit is MB/s
++     * zero if not enabled.
++     */
++    uint64_t quota;
++} VcpuDirtyLimitState;
++
++struct {
++    VcpuDirtyLimitState *states;
++    /* Max cpus number configured by user */
++    int max_cpus;
++    /* Number of vcpu under dirtylimit */
++    int limited_nvcpu;
++} *dirtylimit_state;
++
++/* protect dirtylimit_state */
++static QemuMutex dirtylimit_mutex;
++
++/* dirtylimit thread quit if dirtylimit_quit is true */
++static bool dirtylimit_quit;
++
+ static void vcpu_dirty_rate_stat_collect(void)
+ {
+     VcpuStat stat;
+@@ -54,6 +98,9 @@ static void *vcpu_dirty_rate_stat_thread(void *opaque)
+ 
+     while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
+         vcpu_dirty_rate_stat_collect();
++        if (dirtylimit_in_service()) {
++            dirtylimit_process();
++        }
+     }
+ 
+     /* stop log sync */
+@@ -86,9 +133,11 @@ void vcpu_dirty_rate_stat_start(void)
+ void vcpu_dirty_rate_stat_stop(void)
+ {
+     qatomic_set(&vcpu_dirty_rate_stat->running, 0);
++    dirtylimit_state_unlock();
+     qemu_mutex_unlock_iothread();
+     qemu_thread_join(&vcpu_dirty_rate_stat->thread);
+     qemu_mutex_lock_iothread();
++    dirtylimit_state_lock();
+ }
+ 
+ void vcpu_dirty_rate_stat_initialize(void)
+@@ -114,3 +163,245 @@ void vcpu_dirty_rate_stat_finalize(void)
+     free(vcpu_dirty_rate_stat);
+     vcpu_dirty_rate_stat = NULL;
+ }
++
++void dirtylimit_state_lock(void)
++{
++    qemu_mutex_lock(&dirtylimit_mutex);
++}
++
++void dirtylimit_state_unlock(void)
++{
++    qemu_mutex_unlock(&dirtylimit_mutex);
++}
++
++static void
++__attribute__((__constructor__)) dirtylimit_mutex_init(void)
++{
++    qemu_mutex_init(&dirtylimit_mutex);
++}
++
++static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
++{
++    return &dirtylimit_state->states[cpu_index];
++}
++
++void dirtylimit_state_initialize(void)
++{
++    MachineState *ms = MACHINE(qdev_get_machine());
++    int max_cpus = ms->smp.max_cpus;
++    int i;
++
++    dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
++
++    dirtylimit_state->states =
++            g_malloc0(sizeof(VcpuDirtyLimitState) * max_cpus);
++
++    for (i = 0; i < max_cpus; i++) {
++        dirtylimit_state->states[i].cpu_index = i;
++    }
++
++    dirtylimit_state->max_cpus = max_cpus;
++    trace_dirtylimit_state_initialize(max_cpus);
++}
++
++void dirtylimit_state_finalize(void)
++{
++    free(dirtylimit_state->states);
++    dirtylimit_state->states = NULL;
++
++    free(dirtylimit_state);
++    dirtylimit_state = NULL;
++
++    trace_dirtylimit_state_finalize();
++}
++
++bool dirtylimit_in_service(void)
++{
++    return !!dirtylimit_state;
++}
++
++bool dirtylimit_vcpu_index_valid(int cpu_index)
++{
++    MachineState *ms = MACHINE(qdev_get_machine());
++
++    return !(cpu_index < 0 ||
++             cpu_index >= ms->smp.max_cpus);
++}
++
++static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
++{
++    static uint64_t max_dirtyrate;
++    uint32_t dirty_ring_size = kvm_dirty_ring_size();
++    uint64_t dirty_ring_size_meory_MB =
++        dirty_ring_size * TARGET_PAGE_SIZE >> 20;
++
++    if (max_dirtyrate < dirtyrate) {
++        max_dirtyrate = dirtyrate;
++    }
++
++    return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
++}
++
++static inline bool dirtylimit_done(uint64_t quota,
++                                   uint64_t current)
++{
++    uint64_t min, max;
++
++    min = MIN(quota, current);
++    max = MAX(quota, current);
++
++    return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
++}
++
++static inline bool
++dirtylimit_need_linear_adjustment(uint64_t quota,
++                                  uint64_t current)
++{
++    uint64_t min, max;
++
++    min = MIN(quota, current);
++    max = MAX(quota, current);
++
++    return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
++}
++
++static void dirtylimit_set_throttle(CPUState *cpu,
++                                    uint64_t quota,
++                                    uint64_t current)
++{
++    int64_t ring_full_time_us = 0;
++    uint64_t sleep_pct = 0;
++    uint64_t throttle_us = 0;
++
++    if (current == 0) {
++        cpu->throttle_us_per_full = 0;
++        return;
++    }
++
++    ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
++
++    if (dirtylimit_need_linear_adjustment(quota, current)) {
++        if (quota < current) {
++            sleep_pct = (current - quota) * 100 / current;
++            throttle_us =
++                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
++            cpu->throttle_us_per_full += throttle_us;
++        } else {
++            sleep_pct = (quota - current) * 100 / quota;
++            throttle_us =
++                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
++            cpu->throttle_us_per_full -= throttle_us;
++        }
++
++        trace_dirtylimit_throttle_pct(cpu->cpu_index,
++                                      sleep_pct,
++                                      throttle_us);
++    } else {
++        if (quota < current) {
++            cpu->throttle_us_per_full += ring_full_time_us / 10;
++        } else {
++            cpu->throttle_us_per_full -= ring_full_time_us / 10;
++        }
++    }
++
++    /*
++     * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
++     *       current dirty page rate may never reach the quota, we should stop
++     *       increasing sleep time?
++     */
++    cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
++        ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
++
++    cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
++}
++
++static void dirtylimit_adjust_throttle(CPUState *cpu)
++{
++    uint64_t quota = 0;
++    uint64_t current = 0;
++    int cpu_index = cpu->cpu_index;
++
++    quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
++    current = vcpu_dirty_rate_get(cpu_index);
++
++    if (!dirtylimit_done(quota, current)) {
++        dirtylimit_set_throttle(cpu, quota, current);
++    }
++
++    return;
++}
++
++void dirtylimit_process(void)
++{
++    CPUState *cpu;
++
++    if (!qatomic_read(&dirtylimit_quit)) {
++        dirtylimit_state_lock();
++
++        if (!dirtylimit_in_service()) {
++            dirtylimit_state_unlock();
++            return;
++        }
++
++        CPU_FOREACH(cpu) {
++            if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
++                continue;
++            }
++            dirtylimit_adjust_throttle(cpu);
++        }
++        dirtylimit_state_unlock();
++    }
++}
++
++void dirtylimit_change(bool start)
++{
++    if (start) {
++        qatomic_set(&dirtylimit_quit, 0);
++    } else {
++        qatomic_set(&dirtylimit_quit, 1);
++    }
++}
++
++void dirtylimit_set_vcpu(int cpu_index,
++                         uint64_t quota,
++                         bool enable)
++{
++    trace_dirtylimit_set_vcpu(cpu_index, quota);
++
++    if (enable) {
++        dirtylimit_state->states[cpu_index].quota = quota;
++        if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
++            dirtylimit_state->limited_nvcpu++;
++        }
++    } else {
++        dirtylimit_state->states[cpu_index].quota = 0;
++        if (dirtylimit_state->states[cpu_index].enabled) {
++            dirtylimit_state->limited_nvcpu--;
++        }
++    }
++
++    dirtylimit_state->states[cpu_index].enabled = enable;
++}
++
++void dirtylimit_set_all(uint64_t quota,
++                        bool enable)
++{
++    MachineState *ms = MACHINE(qdev_get_machine());
++    int max_cpus = ms->smp.max_cpus;
++    int i;
++
++    for (i = 0; i < max_cpus; i++) {
++        dirtylimit_set_vcpu(i, quota, enable);
++    }
++}
++
++void dirtylimit_vcpu_execute(CPUState *cpu)
++{
++    if (dirtylimit_in_service() &&
++        dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
++        cpu->throttle_us_per_full) {
++        trace_dirtylimit_vcpu_execute(cpu->cpu_index,
++                cpu->throttle_us_per_full);
++        usleep(cpu->throttle_us_per_full);
++    }
++}
+diff --git a/softmmu/trace-events b/softmmu/trace-events
+index 9c88887b3c..22606dc27b 100644
+--- a/softmmu/trace-events
++++ b/softmmu/trace-events
+@@ -31,3 +31,10 @@ runstate_set(int current_state, const char *current_state_str, int new_state, co
+ system_wakeup_request(int reason) "reason=%d"
+ qemu_system_shutdown_request(int reason) "reason=%d"
+ qemu_system_powerdown_request(void) ""
++
++#dirtylimit.c
++dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d"
++dirtylimit_state_finalize(void)
++dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
++dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
++dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
+-- 
+2.27.0
+
diff --git a/target-i386-kvm-do-not-access-uninitialized-variable.patch b/target-i386-kvm-do-not-access-uninitialized-variable.patch
new file mode 100644
index 0000000..3af5ef7
--- /dev/null
+++ b/target-i386-kvm-do-not-access-uninitialized-variable.patch
@@ -0,0 +1,77 @@
+From 550d43a946b61bdadb418e0f8bef8b98e646276d Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 18 Mar 2022 16:23:47 +0100
+Subject: [PATCH 09/10] target/i386: kvm: do not access uninitialized variable
+ on older kernels
+
+from mainline-v7.0.0-rc1
+commit 3ec5ad40081b14af28496198b4d08dbe13386790
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit 3ec5ad40081b ("target/i386: kvm: do not access
+uninitialized variable on older kernels")
+
+---------------------------------------------------------
+
+target/i386: kvm: do not access uninitialized variable on older kernels
+
+KVM support for AMX includes a new system attribute, KVM_X86_XCOMP_GUEST_SUPP.
+Commit 19db68ca68 ("x86: Grant AMX permission for guest", 2022-03-15) however
+did not fully consider the behavior on older kernels.  First, it warns
+too aggressively.  Second, it invokes the KVM_GET_DEVICE_ATTR ioctl
+unconditionally and then uses the "bitmask" variable, which remains
+uninitialized if the ioctl fails.  Third, kvm_ioctl returns -errno rather
+than -1 on errors.
+
+While at it, explain why the ioctl is needed and KVM_GET_SUPPORTED_CPUID
+is not enough.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/kvm/kvm.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index 49fca5ea88..20e418463d 100644
+--- a/target/i386/kvm/kvm.c
++++ b/target/i386/kvm/kvm.c
+@@ -409,6 +409,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+         }
+     } else if (function == 0xd && index == 0 &&
+                (reg == R_EAX || reg == R_EDX)) {
++        /*
++         * The value returned by KVM_GET_SUPPORTED_CPUID does not include
++         * features that still have to be enabled with the arch_prctl
++         * system call.  QEMU needs the full value, which is retrieved
++         * with KVM_GET_DEVICE_ATTR.
++         */
+         struct kvm_device_attr attr = {
+             .group = 0,
+             .attr = KVM_X86_XCOMP_GUEST_SUPP,
+@@ -417,13 +423,16 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+ 
+         bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
+         if (!sys_attr) {
+-            warn_report("cannot get sys attribute capabilities %d", sys_attr);
++            return ret;
+         }
+ 
+         int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
+-        if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
+-            warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
+-                        "error: %d", rc);
++        if (rc < 0) {
++            if (rc != -ENXIO) {
++                warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
++                            "error: %d", rc);
++            }
++            return ret;
+         }
+         ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
+     } else if (function == 0x80000001 && reg == R_ECX) {
+-- 
+2.27.0
+
diff --git a/tests-Add-dirty-page-rate-limit-test.patch b/tests-Add-dirty-page-rate-limit-test.patch
new file mode 100644
index 0000000..12ae236
--- /dev/null
+++ b/tests-Add-dirty-page-rate-limit-test.patch
@@ -0,0 +1,362 @@
+From 8a0f4dcf94b280d5b7db7f604c42d088c928ac0d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
+ <huangy81@chinatelecom.cn>
+Date: Sun, 26 Jun 2022 01:38:37 +0800
+Subject: [PATCH] tests: Add dirty page rate limit test
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add dirty page rate limit test if kernel support dirty ring,
+
+The following qmp commands are covered by this test case:
+"calc-dirty-rate", "query-dirty-rate", "set-vcpu-dirty-limit",
+"cancel-vcpu-dirty-limit" and "query-vcpu-dirty-limit".
+
+Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+Acked-by: Peter Xu <peterx@redhat.com>
+Message-Id: <eed5b847a6ef0a9c02a36383dbdd7db367dd1e7e.1656177590.git.huangy81@chinatelecom.cn>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+---
+ tests/qtest/migration-helpers.c |  22 +++
+ tests/qtest/migration-helpers.h |   2 +
+ tests/qtest/migration-test.c    | 256 ++++++++++++++++++++++++++++++++
+ 3 files changed, 280 insertions(+)
+
+diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
+index 4ee26014b7..1e594f9cb1 100644
+--- a/tests/qtest/migration-helpers.c
++++ b/tests/qtest/migration-helpers.c
+@@ -75,6 +75,28 @@ QDict *wait_command(QTestState *who, const char *command, ...)
+     return ret;
+ }
+ 
++/*
++ * Execute the qmp command only
++ */
++QDict *qmp_command(QTestState *who, const char *command, ...)
++{
++    va_list ap;
++    QDict *resp, *ret;
++
++    va_start(ap, command);
++    resp = qtest_vqmp(who, command, ap);
++    va_end(ap);
++
++    g_assert(!qdict_haskey(resp, "error"));
++    g_assert(qdict_haskey(resp, "return"));
++
++    ret = qdict_get_qdict(resp, "return");
++    qobject_ref(ret);
++    qobject_unref(resp);
++
++    return ret;
++}
++
+ /*
+  * Send QMP command "migrate".
+  * Arguments are built from @fmt... (formatted like
+diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
+index d63bba9630..9bc809fb75 100644
+--- a/tests/qtest/migration-helpers.h
++++ b/tests/qtest/migration-helpers.h
+@@ -22,6 +22,8 @@ QDict *wait_command_fd(QTestState *who, int fd, const char *command, ...);
+ GCC_FMT_ATTR(2, 3)
+ QDict *wait_command(QTestState *who, const char *command, ...);
+ 
++QDict *qmp_command(QTestState *who, const char *command, ...);
++
+ GCC_FMT_ATTR(3, 4)
+ void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...);
+ 
+diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
+index 7b42f6fd90..8fad247f6c 100644
+--- a/tests/qtest/migration-test.c
++++ b/tests/qtest/migration-test.c
+@@ -23,6 +23,7 @@
+ #include "qapi/qapi-visit-sockets.h"
+ #include "qapi/qobject-input-visitor.h"
+ #include "qapi/qobject-output-visitor.h"
++#include "qapi/qmp/qlist.h"
+ 
+ #include "migration-helpers.h"
+ #include "tests/migration/migration-test.h"
+@@ -42,6 +43,12 @@ static bool uffd_feature_thread_id;
+ /* A downtime where the test really should converge */
+ #define CONVERGE_DOWNTIME 1000
+ 
++/*
++ * Dirtylimit stop working if dirty page rate error
++ * value less than DIRTYLIMIT_TOLERANCE_RANGE
++ */
++#define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
++
+ #if defined(__linux__)
+ #include <sys/syscall.h>
+ #include <sys/vfs.h>
+@@ -1394,6 +1401,253 @@ static void test_multifd_tcp_cancel(void)
+     test_migrate_end(from, to2, true);
+ }
+ 
++static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
++{
++    qobject_unref(qmp_command(who,
++                  "{ 'execute': 'calc-dirty-rate',"
++                  "'arguments': { "
++                  "'calc-time': %ld,"
++                  "'mode': 'dirty-ring' }}",
++                  calc_time));
++}
++
++static QDict *query_dirty_rate(QTestState *who)
++{
++    return qmp_command(who, "{ 'execute': 'query-dirty-rate' }");
++}
++
++static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate)
++{
++    qobject_unref(qmp_command(who,
++                  "{ 'execute': 'set-vcpu-dirty-limit',"
++                  "'arguments': { "
++                  "'dirty-rate': %ld } }",
++                  dirtyrate));
++}
++
++static void cancel_vcpu_dirty_limit(QTestState *who)
++{
++    qobject_unref(qmp_command(who,
++                  "{ 'execute': 'cancel-vcpu-dirty-limit' }"));
++}
++
++static QDict *query_vcpu_dirty_limit(QTestState *who)
++{
++    QDict *rsp;
++
++    rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }");
++    g_assert(!qdict_haskey(rsp, "error"));
++    g_assert(qdict_haskey(rsp, "return"));
++
++    return rsp;
++}
++
++static bool calc_dirtyrate_ready(QTestState *who)
++{
++    QDict *rsp_return;
++    gchar *status;
++
++    rsp_return = query_dirty_rate(who);
++    g_assert(rsp_return);
++
++    status = g_strdup(qdict_get_str(rsp_return, "status"));
++    g_assert(status);
++
++    return g_strcmp0(status, "measuring");
++}
++
++static void wait_for_calc_dirtyrate_complete(QTestState *who,
++                                             int64_t time_s)
++{
++    int max_try_count = 10000;
++    usleep(time_s * 1000000);
++
++    while (!calc_dirtyrate_ready(who) && max_try_count--) {
++        usleep(1000);
++    }
++
++    /*
++     * Set the timeout with 10 s(max_try_count * 1000us),
++     * if dirtyrate measurement not complete, fail test.
++     */
++    g_assert_cmpint(max_try_count, !=, 0);
++}
++
++static int64_t get_dirty_rate(QTestState *who)
++{
++    QDict *rsp_return;
++    gchar *status;
++    QList *rates;
++    const QListEntry *entry;
++    QDict *rate;
++    int64_t dirtyrate;
++
++    rsp_return = query_dirty_rate(who);
++    g_assert(rsp_return);
++
++    status = g_strdup(qdict_get_str(rsp_return, "status"));
++    g_assert(status);
++    g_assert_cmpstr(status, ==, "measured");
++
++    rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate");
++    g_assert(rates && !qlist_empty(rates));
++
++    entry = qlist_first(rates);
++    g_assert(entry);
++
++    rate = qobject_to(QDict, qlist_entry_obj(entry));
++    g_assert(rate);
++
++    dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1);
++
++    qobject_unref(rsp_return);
++    return dirtyrate;
++}
++
++static int64_t get_limit_rate(QTestState *who)
++{
++    QDict *rsp_return;
++    QList *rates;
++    const QListEntry *entry;
++    QDict *rate;
++    int64_t dirtyrate;
++
++    rsp_return = query_vcpu_dirty_limit(who);
++    g_assert(rsp_return);
++
++    rates = qdict_get_qlist(rsp_return, "return");
++    g_assert(rates && !qlist_empty(rates));
++
++    entry = qlist_first(rates);
++    g_assert(entry);
++
++    rate = qobject_to(QDict, qlist_entry_obj(entry));
++    g_assert(rate);
++
++    dirtyrate = qdict_get_try_int(rate, "limit-rate", -1);
++
++    qobject_unref(rsp_return);
++    return dirtyrate;
++}
++
++static QTestState *dirtylimit_start_vm(void)
++{
++    QTestState *vm = NULL;
++    g_autofree gchar *cmd = NULL;
++    const char *arch = qtest_get_arch();
++    g_autofree char *bootpath = NULL;
++
++    assert((strcmp(arch, "x86_64") == 0));
++    bootpath = g_strdup_printf("%s/bootsect", tmpfs);
++    assert(sizeof(x86_bootsect) == 512);
++    init_bootfile(bootpath, x86_bootsect, sizeof(x86_bootsect));
++
++    cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 "
++                          "-name dirtylimit-test,debug-threads=on "
++                          "-m 150M -smp 1 "
++                          "-serial file:%s/vm_serial "
++                          "-drive file=%s,format=raw ",
++                          tmpfs, bootpath);
++
++    vm = qtest_init(cmd);
++    return vm;
++}
++
++static void dirtylimit_stop_vm(QTestState *vm)
++{
++    qtest_quit(vm);
++    cleanup("bootsect");
++    cleanup("vm_serial");
++}
++
++static void test_vcpu_dirty_limit(void)
++{
++    QTestState *vm;
++    int64_t origin_rate;
++    int64_t quota_rate;
++    int64_t rate ;
++    int max_try_count = 20;
++    int hit = 0;
++
++    /* Start vm for vcpu dirtylimit test */
++    vm = dirtylimit_start_vm();
++
++    /* Wait for the first serial output from the vm*/
++    wait_for_serial("vm_serial");
++
++    /* Do dirtyrate measurement with calc time equals 1s */
++    calc_dirty_rate(vm, 1);
++
++    /* Sleep calc time and wait for calc dirtyrate complete */
++    wait_for_calc_dirtyrate_complete(vm, 1);
++
++    /* Query original dirty page rate */
++    origin_rate = get_dirty_rate(vm);
++
++    /* VM booted from bootsect should dirty memory steadily */
++    assert(origin_rate != 0);
++
++    /* Setup quota dirty page rate at half of origin */
++    quota_rate = origin_rate / 2;
++
++    /* Set dirtylimit */
++    dirtylimit_set_all(vm, quota_rate);
++
++    /*
++     * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit
++     * works literally
++     */
++    g_assert_cmpint(quota_rate, ==, get_limit_rate(vm));
++
++    /* Sleep a bit to check if it take effect */
++    usleep(2000000);
++
++    /*
++     * Check if dirtylimit take effect realistically, set the
++     * timeout with 20 s(max_try_count * 1s), if dirtylimit
++     * doesn't take effect, fail test.
++     */
++    while (--max_try_count) {
++        calc_dirty_rate(vm, 1);
++        wait_for_calc_dirtyrate_complete(vm, 1);
++        rate = get_dirty_rate(vm);
++
++        /*
++         * Assume hitting if current rate is less
++         * than quota rate (within accepting error)
++         */
++        if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
++            hit = 1;
++            break;
++        }
++    }
++
++    g_assert_cmpint(hit, ==, 1);
++
++    hit = 0;
++    max_try_count = 20;
++
++    /* Check if dirtylimit cancellation take effect */
++    cancel_vcpu_dirty_limit(vm);
++    while (--max_try_count) {
++        calc_dirty_rate(vm, 1);
++        wait_for_calc_dirtyrate_complete(vm, 1);
++        rate = get_dirty_rate(vm);
++
++        /*
++         * Assume dirtylimit be canceled if current rate is
++         * greater than quota rate (within accepting error)
++         */
++        if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
++            hit = 1;
++            break;
++        }
++    }
++
++    g_assert_cmpint(hit, ==, 1);
++    dirtylimit_stop_vm(vm);
++}
++
+ static bool kvm_dirty_ring_supported(void)
+ {
+ #if defined(__linux__) && defined(HOST_X86_64)
+@@ -1483,6 +1737,8 @@ int main(int argc, char **argv)
+     if (kvm_dirty_ring_supported()) {
+         qtest_add_func("/migration/dirty_ring",
+                        test_precopy_unix_dirty_ring);
++        qtest_add_func("/migration/vcpu_dirty_limit",
++                       test_vcpu_dirty_limit);
+     }
+ 
+     ret = g_test_run();
+-- 
+2.27.0
+
diff --git a/x86-Add-AMX-CPUIDs-enumeration.patch b/x86-Add-AMX-CPUIDs-enumeration.patch
new file mode 100644
index 0000000..ef7d5ef
--- /dev/null
+++ b/x86-Add-AMX-CPUIDs-enumeration.patch
@@ -0,0 +1,138 @@
+From 42f96b9e73ff4a23fad56bc8fefea5e477ee95b9 Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:31 -0800
+Subject: [PATCH 06/10] x86: Add AMX CPUIDs enumeration
+
+from mainline-v7.0.0-rc0
+commit f21a48171cf3fa39532fc8553fd82e81b88b6474
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit f21a48171cf3 ("x86: Add AMX CPUIDs enumeration")
+
+----------------------------------------------
+
+x86: Add AMX CPUIDs enumeration
+
+Add AMX primary feature bits XFD and AMX_TILE to
+enumerate the CPU's AMX capability. Meanwhile, add
+AMX TILE and TMUL CPUID leaf and subleaves which
+exist when AMX TILE is present to provide the maximum
+capability of TILE and TMUL.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-6-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.c     | 55 ++++++++++++++++++++++++++++++++++++++++---
+ target/i386/kvm/kvm.c |  4 +++-
+ 2 files changed, 55 insertions(+), 4 deletions(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index da81e47dc3..1bc03d3eef 100644
+--- a/target/i386/cpu.c
++++ b/target/i386/cpu.c
+@@ -574,6 +574,18 @@ static CPUCacheInfo legacy_l3_cache = {
+ #define INTEL_PT_CYCLE_BITMAP    0x1fff         /* Support 0,2^(0~11) */
+ #define INTEL_PT_PSB_BITMAP      (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
+ 
++/* CPUID Leaf 0x1D constants: */
++#define INTEL_AMX_TILE_MAX_SUBLEAF     0x1
++#define INTEL_AMX_TOTAL_TILE_BYTES     0x2000
++#define INTEL_AMX_BYTES_PER_TILE       0x400
++#define INTEL_AMX_BYTES_PER_ROW        0x40
++#define INTEL_AMX_TILE_MAX_NAMES       0x8
++#define INTEL_AMX_TILE_MAX_ROWS        0x10
++
++/* CPUID Leaf 0x1E constants: */
++#define INTEL_AMX_TMUL_MAX_K           0x10
++#define INTEL_AMX_TMUL_MAX_N           0x40
++
+ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
+                               uint32_t vendor2, uint32_t vendor3)
+ {
+@@ -843,8 +855,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
+             "avx512-vp2intersect", NULL, "md-clear", NULL,
+             NULL, NULL, "serialize", NULL,
+             "tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
+-            NULL, NULL, NULL, "avx512-fp16",
+-            NULL, NULL, "spec-ctrl", "stibp",
++            NULL, NULL, "amx-bf16", "avx512-fp16",
++            "amx-tile", "amx-int8", "spec-ctrl", "stibp",
+             NULL, "arch-capabilities", "core-capability", "ssbd",
+         },
+         .cpuid = {
+@@ -909,7 +921,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
+         .type = CPUID_FEATURE_WORD,
+         .feat_names = {
+             "xsaveopt", "xsavec", "xgetbv1", "xsaves",
+-            NULL, NULL, NULL, NULL,
++            "xfd", NULL, NULL, NULL,
+             NULL, NULL, NULL, NULL,
+             NULL, NULL, NULL, NULL,
+             NULL, NULL, NULL, NULL,
+@@ -5605,6 +5617,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+         }
+         break;
+     }
++    case 0x1D: {
++        /* AMX TILE */
++        *eax = 0;
++        *ebx = 0;
++        *ecx = 0;
++        *edx = 0;
++        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
++            break;
++        }
++
++        if (count == 0) {
++            /* Highest numbered palette subleaf */
++            *eax = INTEL_AMX_TILE_MAX_SUBLEAF;
++        } else if (count == 1) {
++            *eax = INTEL_AMX_TOTAL_TILE_BYTES |
++                   (INTEL_AMX_BYTES_PER_TILE << 16);
++            *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
++            *ecx = INTEL_AMX_TILE_MAX_ROWS;
++        }
++        break;
++    }
++    case 0x1E: {
++        /* AMX TMUL */
++        *eax = 0;
++        *ebx = 0;
++        *ecx = 0;
++        *edx = 0;
++        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
++            break;
++        }
++
++        if (count == 0) {
++            /* Highest numbered palette subleaf */
++            *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
++        }
++        break;
++    }
+     case 0x40000000:
+         /*
+          * CPUID code in kvm_arch_init_vcpu() ignores stuff
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index e7f57d05a2..60ccdec5e8 100644
+--- a/target/i386/kvm/kvm.c
++++ b/target/i386/kvm/kvm.c
+@@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
+                 c = &cpuid_data.entries[cpuid_i++];
+             }
+             break;
+-        case 0x14: {
++        case 0x14:
++        case 0x1d:
++        case 0x1e: {
+             uint32_t times;
+ 
+             c->function = i;
+-- 
+2.27.0
+
diff --git a/x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch b/x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
new file mode 100644
index 0000000..d47f736
--- /dev/null
+++ b/x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
@@ -0,0 +1,115 @@
+From 98f5dbc3fd8390728401528786ac94b39f0581ee Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:28 -0800
+Subject: [PATCH 03/10] x86: Add AMX XTILECFG and XTILEDATA components
+
+from mainline-v7.0.0-rc0
+commit 1f16764f7d4515bfd5e4ae0aae814fa280a7d0c8
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit 1f16764f7d45 ("x86: Add AMX XTILECFG and XTILEDATA components")
+
+-------------------------------------------------------------
+
+x86: Add AMX XTILECFG and XTILEDATA components
+
+The AMX TILECFG register and the TMMx tile data registers are
+saved/restored via XSAVE, respectively in state component 17
+(64 bytes) and state component 18 (8192 bytes).
+
+Add AMX feature bits to x86_ext_save_areas array to set
+up AMX components. Add structs that define the layout of
+AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the
+structs sizes.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-3-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.c |  8 ++++++++
+ target/i386/cpu.h | 18 +++++++++++++++++-
+ 2 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index 532ca45015..31d63be081 100644
+--- a/target/i386/cpu.c
++++ b/target/i386/cpu.c
+@@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
+     [XSTATE_PKRU_BIT] =
+           { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
+             .size = sizeof(XSavePKRU) },
++    [XSTATE_XTILE_CFG_BIT] = {
++        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
++        .size = sizeof(XSaveXTILECFG),
++    },
++    [XSTATE_XTILE_DATA_BIT] = {
++        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
++        .size = sizeof(XSaveXTILEDATA)
++    },
+ };
+ 
+ static uint32_t xsave_area_size(uint64_t mask)
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 52330d1112..cc431b1d76 100644
+--- a/target/i386/cpu.h
++++ b/target/i386/cpu.h
+@@ -538,6 +538,8 @@ typedef enum X86Seg {
+ #define XSTATE_ZMM_Hi256_BIT            6
+ #define XSTATE_Hi16_ZMM_BIT             7
+ #define XSTATE_PKRU_BIT                 9
++#define XSTATE_XTILE_CFG_BIT            17
++#define XSTATE_XTILE_DATA_BIT           18
+ 
+ #define XSTATE_FP_MASK                  (1ULL << XSTATE_FP_BIT)
+ #define XSTATE_SSE_MASK                 (1ULL << XSTATE_SSE_BIT)
+@@ -846,6 +848,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
+ #define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
+ /* AVX512_FP16 instruction */
+ #define CPUID_7_0_EDX_AVX512_FP16       (1U << 23)
++/* AMX tile (two-dimensional register) */
++#define CPUID_7_0_EDX_AMX_TILE          (1U << 24)
+ /* Speculation Control */
+ #define CPUID_7_0_EDX_SPEC_CTRL         (1U << 26)
+ /* Single Thread Indirect Branch Predictors */
+@@ -1349,6 +1353,16 @@ typedef struct XSavePKRU {
+     uint32_t padding;
+ } XSavePKRU;
+ 
++/* Ext. save area 17: AMX XTILECFG state */
++typedef struct XSaveXTILECFG {
++    uint8_t xtilecfg[64];
++} XSaveXTILECFG;
++
++/* Ext. save area 18: AMX XTILEDATA state */
++typedef struct XSaveXTILEDATA {
++    uint8_t xtiledata[8][1024];
++} XSaveXTILEDATA;
++
+ QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
+@@ -1356,6 +1370,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
+ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40);
++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000);
+ 
+ typedef struct ExtSaveArea {
+     uint32_t feature, bits;
+@@ -1363,7 +1379,7 @@ typedef struct ExtSaveArea {
+     uint32_t ecx;
+ } ExtSaveArea;
+ 
+-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
++#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1)
+ 
+ extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
+ 
+-- 
+2.27.0
+
diff --git a/x86-Add-XFD-faulting-bit-for-state-components.patch b/x86-Add-XFD-faulting-bit-for-state-components.patch
new file mode 100644
index 0000000..4b2edc2
--- /dev/null
+++ b/x86-Add-XFD-faulting-bit-for-state-components.patch
@@ -0,0 +1,66 @@
+From 52eed626a2200da02e67aa93c2a8d59cb529737b Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:30 -0800
+Subject: [PATCH 05/10] x86: Add XFD faulting bit for state components
+
+from mainline-v7.0.0-rc0
+commit 0f17f6b30f3b051f0f96ccc98c9f7f395713699f
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit 0f17f6b30f3b ("x86: Add XFD faulting bit for state
+components")
+
+-------------------------------------------------
+
+x86: Add XFD faulting bit for state components
+
+Intel introduces XFD faulting mechanism for extended
+XSAVE features to dynamically enable the features in
+runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set
+as 1, it indicates support for XFD faulting of this
+state component.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-5-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.c | 3 ++-
+ target/i386/cpu.h | 2 ++
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index fb6b4c86de..da81e47dc3 100644
+--- a/target/i386/cpu.c
++++ b/target/i386/cpu.c
+@@ -5515,7 +5515,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+                 const ExtSaveArea *esa = &x86_ext_save_areas[count];
+                 *eax = esa->size;
+                 *ebx = esa->offset;
+-                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
++                *ecx = esa->ecx &
++                       (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK);
+             }
+         }
+         break;
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 93d1c60ac1..09c725ee13 100644
+--- a/target/i386/cpu.h
++++ b/target/i386/cpu.h
+@@ -556,8 +556,10 @@ typedef enum X86Seg {
+ #define XSTATE_DYNAMIC_MASK             (XSTATE_XTILE_DATA_MASK)
+ 
+ #define ESA_FEATURE_ALIGN64_BIT         1
++#define ESA_FEATURE_XFD_BIT             2
+ 
+ #define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
++#define ESA_FEATURE_XFD_MASK            (1U << ESA_FEATURE_XFD_BIT)
+ 
+ 
+ /* CPUID feature words */
+-- 
+2.27.0
+
diff --git a/x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch b/x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
new file mode 100644
index 0000000..e0aede1
--- /dev/null
+++ b/x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
@@ -0,0 +1,91 @@
+From ab183c656a2bee466e7c609224cddb75b80d9d6f Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:27 -0800
+Subject: [PATCH 02/10] x86: Fix the 64-byte boundary enumeration for extended
+ state
+
+from mainline-v7.0.0-rc0
+commit 131266b7565bd437127bd231563572696bb27235
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit 131266b7565b ("x86: Fix the 64-byte boundary enumeration for extended state")
+
+-----------------------------------------------------------
+
+x86: Fix the 64-byte boundary enumeration for extended state
+
+The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1]
+indicate whether the extended state component locates
+on the next 64-byte boundary following the preceding state
+component when the compacted format of an XSAVE area is
+used.
+
+Right now, they are all zero because no supported component
+needed the bit to be set, but the upcoming AMX feature will
+use it.  Fix the subleaves value according to KVM's supported
+cpuid.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-2-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.c         | 1 +
+ target/i386/cpu.h         | 6 ++++++
+ target/i386/kvm/kvm-cpu.c | 1 +
+ 3 files changed, 8 insertions(+)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index d9dca1dafb..532ca45015 100644
+--- a/target/i386/cpu.c
++++ b/target/i386/cpu.c
+@@ -5507,6 +5507,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+                 const ExtSaveArea *esa = &x86_ext_save_areas[count];
+                 *eax = esa->size;
+                 *ebx = esa->offset;
++                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
+             }
+         }
+         break;
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index d9296a9abc..52330d1112 100644
+--- a/target/i386/cpu.h
++++ b/target/i386/cpu.h
+@@ -549,6 +549,11 @@ typedef enum X86Seg {
+ #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
+ #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
+ 
++#define ESA_FEATURE_ALIGN64_BIT         1
++
++#define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
++
++
+ /* CPUID feature words */
+ typedef enum FeatureWord {
+     FEAT_1_EDX,         /* CPUID[1].EDX */
+@@ -1355,6 +1360,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
+ typedef struct ExtSaveArea {
+     uint32_t feature, bits;
+     uint32_t offset, size;
++    uint32_t ecx;
+ } ExtSaveArea;
+ 
+ #define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
+diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
+index d95028018e..ce27d3b1df 100644
+--- a/target/i386/kvm/kvm-cpu.c
++++ b/target/i386/kvm/kvm-cpu.c
+@@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void)
+             if (sz != 0) {
+                 assert(esa->size == sz);
+                 esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
++                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
+             }
+         }
+     }
+-- 
+2.27.0
+
diff --git a/x86-Grant-AMX-permission-for-guest.patch b/x86-Grant-AMX-permission-for-guest.patch
new file mode 100644
index 0000000..9ecbc46
--- /dev/null
+++ b/x86-Grant-AMX-permission-for-guest.patch
@@ -0,0 +1,218 @@
+From b7e588a4506ce61c13e78175c2da5b69b60af128 Mon Sep 17 00:00:00 2001
+From: Yang Zhong <yang.zhong@intel.com>
+Date: Wed, 16 Feb 2022 22:04:29 -0800
+Subject: [PATCH 04/10] x86: Grant AMX permission for guest
+
+from mainline-v7.0.0-rc0
+commit 19db68ca68a78fa033a21d419036b6e416554564
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit 19db68ca68a7 ("x86: Grant AMX permission for guest")
+
+--------------------------------------------------------
+
+x86: Grant AMX permission for guest
+
+Kernel allocates 4K xstate buffer by default. For XSAVE features
+which require large state component (e.g. AMX), Linux kernel
+dynamically expands the xstate buffer only after the process has
+acquired the necessary permissions. Those are called dynamically-
+enabled XSAVE features (or dynamic xfeatures).
+
+There are separate permissions for native tasks and guests.
+
+Qemu should request the guest permissions for dynamic xfeatures
+which will be exposed to the guest. This only needs to be done
+once before the first vcpu is created.
+
+KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to
+get host side supported_xcr0 and Qemu can decide if it can request
+dynamically enabled XSAVE features permission.
+https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/
+
+Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Message-Id: <20220217060434.52460-4-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.c          |  7 +++++
+ target/i386/cpu.h          |  4 +++
+ target/i386/kvm/kvm-cpu.c  | 12 ++++----
+ target/i386/kvm/kvm.c      | 57 ++++++++++++++++++++++++++++++++++++++
+ target/i386/kvm/kvm_i386.h |  1 +
+ 5 files changed, 75 insertions(+), 6 deletions(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index 31d63be081..fb6b4c86de 100644
+--- a/target/i386/cpu.c
++++ b/target/i386/cpu.c
+@@ -6048,6 +6048,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
+     CPUX86State *env = &cpu->env;
+     int i;
+     uint64_t mask;
++    static bool request_perm;
+ 
+     if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
+         env->features[FEAT_XSAVE_COMP_LO] = 0;
+@@ -6063,6 +6064,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
+         }
+     }
+ 
++    /* Only request permission for first vcpu */
++    if (kvm_enabled() && !request_perm) {
++        kvm_request_xsave_components(cpu, mask);
++        request_perm = true;
++    }
++
+     env->features[FEAT_XSAVE_COMP_LO] = mask;
+     env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
+ }
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index cc431b1d76..93d1c60ac1 100644
+--- a/target/i386/cpu.h
++++ b/target/i386/cpu.h
+@@ -550,6 +550,10 @@ typedef enum X86Seg {
+ #define XSTATE_ZMM_Hi256_MASK           (1ULL << XSTATE_ZMM_Hi256_BIT)
+ #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
+ #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
++#define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
++#define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
++
++#define XSTATE_DYNAMIC_MASK             (XSTATE_XTILE_DATA_MASK)
+ 
+ #define ESA_FEATURE_ALIGN64_BIT         1
+ 
+diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
+index ce27d3b1df..a35a1bf9fe 100644
+--- a/target/i386/kvm/kvm-cpu.c
++++ b/target/i386/kvm/kvm-cpu.c
+@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
+ static void kvm_cpu_xsave_init(void)
+ {
+     static bool first = true;
+-    KVMState *s = kvm_state;
++    uint32_t eax, ebx, ecx, edx;
+     int i;
+ 
+     if (!first) {
+@@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void)
+         ExtSaveArea *esa = &x86_ext_save_areas[i];
+ 
+         if (esa->size) {
+-            int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
+-            if (sz != 0) {
+-                assert(esa->size == sz);
+-                esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
+-                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
++            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
++            if (eax != 0) {
++                assert(esa->size == eax);
++                esa->offset = ebx;
++                esa->ecx = ecx;
+             }
+         }
+     }
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index 5a698bde19..e7f57d05a2 100644
+--- a/target/i386/kvm/kvm.c
++++ b/target/i386/kvm/kvm.c
+@@ -17,6 +17,7 @@
+ #include "qapi/error.h"
+ #include <sys/ioctl.h>
+ #include <sys/utsname.h>
++#include <sys/syscall.h>
+ 
+ #include <linux/kvm.h>
+ #include "standard-headers/asm-x86/kvm_para.h"
+@@ -347,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+     struct kvm_cpuid2 *cpuid;
+     uint32_t ret = 0;
+     uint32_t cpuid_1_edx;
++    uint64_t bitmask;
+ 
+     cpuid = get_supported_cpuid(s);
+ 
+@@ -404,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+         if (!has_msr_arch_capabs) {
+             ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
+         }
++    } else if (function == 0xd && index == 0 &&
++               (reg == R_EAX || reg == R_EDX)) {
++        struct kvm_device_attr attr = {
++            .group = 0,
++            .attr = KVM_X86_XCOMP_GUEST_SUPP,
++            .addr = (unsigned long) &bitmask
++        };
++
++        bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
++        if (!sys_attr) {
++            warn_report("cannot get sys attribute capabilities %d", sys_attr);
++        }
++
++        int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
++        if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
++            warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
++                        "error: %d", rc);
++        }
++        ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
+     } else if (function == 0x80000001 && reg == R_ECX) {
+         /*
+          * It's safe to enable TOPOEXT even if it's not returned by
+@@ -5050,3 +5071,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
+ {
+     return !sev_es_enabled();
+ }
++
++#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
++
++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
++{
++    KVMState *s = kvm_state;
++    uint64_t supported;
++
++    mask &= XSTATE_DYNAMIC_MASK;
++    if (!mask) {
++        return;
++    }
++    /*
++     * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
++     * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
++     * about them already because they are not supported features.
++     */
++    supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
++    supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
++    mask &= supported;
++
++    while (mask) {
++        int bit = ctz64(mask);
++        int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
++        if (rc) {
++            /*
++             * Older kernel version (<5.17) do not support
++             * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
++             * any dynamic feature from kvm_arch_get_supported_cpuid.
++             */
++            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
++                        "for feature bit %d", bit);
++        }
++        mask &= ~BIT_ULL(bit);
++    }
++}
+diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
+index a978509d50..4124912c20 100644
+--- a/target/i386/kvm/kvm_i386.h
++++ b/target/i386/kvm/kvm_i386.h
+@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
+ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
+ 
+ bool kvm_enable_sgx_provisioning(KVMState *s);
++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
+ 
+ #endif
+-- 
+2.27.0
+
diff --git a/x86-Support-XFD-and-AMX-xsave-data-migration.patch b/x86-Support-XFD-and-AMX-xsave-data-migration.patch
new file mode 100644
index 0000000..a33ad5b
--- /dev/null
+++ b/x86-Support-XFD-and-AMX-xsave-data-migration.patch
@@ -0,0 +1,182 @@
+From bb1b53e5d0b67d97042ea3c33b5c4c80e33809f2 Mon Sep 17 00:00:00 2001
+From: Zeng Guang <guang.zeng@intel.com>
+Date: Wed, 16 Feb 2022 22:04:33 -0800
+Subject: [PATCH 08/10] x86: Support XFD and AMX xsave data migration
+
+from mainline-v7.0.0-rc0
+commit cdec2b753b487d9e8aab028231c35d87789ea083
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit cdec2b753b48 ("x86: Support XFD and AMX xsave data
+migration")
+
+------------------------------------------------
+
+x86: Support XFD and AMX xsave data migration
+
+XFD(eXtended Feature Disable) allows to enable a
+feature on xsave state while preventing specific
+user threads from using the feature.
+
+Support save and restore XFD MSRs if CPUID.D.1.EAX[4]
+enumerate to be valid. Likewise migrate the MSRs and
+related xsave state necessarily.
+
+Signed-off-by: Zeng Guang <guang.zeng@intel.com>
+Signed-off-by: Wei Wang <wei.w.wang@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-8-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.h     |  9 +++++++++
+ target/i386/kvm/kvm.c | 18 +++++++++++++++++
+ target/i386/machine.c | 46 +++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 73 insertions(+)
+
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 74e66c352c..eaa99c302f 100644
+--- a/target/i386/cpu.h
++++ b/target/i386/cpu.h
+@@ -506,6 +506,9 @@ typedef enum X86Seg {
+ 
+ #define MSR_VM_HSAVE_PA                 0xc0010117
+ 
++#define MSR_IA32_XFD                    0x000001c4
++#define MSR_IA32_XFD_ERR                0x000001c5
++
+ #define MSR_IA32_BNDCFGS                0x00000d90
+ #define MSR_IA32_XSS                    0x00000da0
+ #define MSR_IA32_UMWAIT_CONTROL         0xe1
+@@ -871,6 +874,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
+ #define CPUID_7_1_EAX_AVX_VNNI          (1U << 4)
+ /* AVX512 BFloat16 Instruction */
+ #define CPUID_7_1_EAX_AVX512_BF16       (1U << 5)
++/* XFD Extend Feature Disabled */
++#define CPUID_D_1_EAX_XFD               (1U << 4)
+ 
+ /* Packets which contain IP payload have LIP values */
+ #define CPUID_14_0_ECX_LIP              (1U << 31)
+@@ -1612,6 +1617,10 @@ typedef struct CPUX86State {
+     uint64_t msr_rtit_cr3_match;
+     uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
+ 
++    /* Per-VCPU XFD MSRs */
++    uint64_t msr_xfd;
++    uint64_t msr_xfd_err;
++
+     /* exception/interrupt handling */
+     int error_code;
+     int exception_is_int;
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index b0b22dcf7c..49fca5ea88 100644
+--- a/target/i386/kvm/kvm.c
++++ b/target/i386/kvm/kvm.c
+@@ -3219,6 +3219,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
+                               env->msr_ia32_sgxlepubkeyhash[3]);
+         }
+ 
++        if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
++            kvm_msr_entry_add(cpu, MSR_IA32_XFD,
++                              env->msr_xfd);
++            kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
++                              env->msr_xfd_err);
++        }
++
+         /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
+          *       kvm_put_msr_feature_control. */
+     }
+@@ -3570,6 +3577,11 @@ static int kvm_get_msrs(X86CPU *cpu)
+         kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
+     }
+ 
++    if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
++        kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
++        kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
++    }
++
+     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
+     if (ret < 0) {
+         return ret;
+@@ -3866,6 +3878,12 @@ static int kvm_get_msrs(X86CPU *cpu)
+             env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
+                            msrs[i].data;
+             break;
++        case MSR_IA32_XFD:
++            env->msr_xfd = msrs[i].data;
++            break;
++        case MSR_IA32_XFD_ERR:
++            env->msr_xfd_err = msrs[i].data;
++            break;
+         }
+     }
+ 
+diff --git a/target/i386/machine.c b/target/i386/machine.c
+index 83c2b91529..3977e9d8f8 100644
+--- a/target/i386/machine.c
++++ b/target/i386/machine.c
+@@ -1455,6 +1455,48 @@ static const VMStateDescription vmstate_msr_intel_sgx = {
+     }
+ };
+ 
++static bool xfd_msrs_needed(void *opaque)
++{
++    X86CPU *cpu = opaque;
++    CPUX86State *env = &cpu->env;
++
++    return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD);
++}
++
++static const VMStateDescription vmstate_msr_xfd = {
++    .name = "cpu/msr_xfd",
++    .version_id = 1,
++    .minimum_version_id = 1,
++    .needed = xfd_msrs_needed,
++    .fields = (VMStateField[]) {
++        VMSTATE_UINT64(env.msr_xfd, X86CPU),
++        VMSTATE_UINT64(env.msr_xfd_err, X86CPU),
++        VMSTATE_END_OF_LIST()
++    }
++};
++
++#ifdef TARGET_X86_64
++static bool amx_xtile_needed(void *opaque)
++{
++    X86CPU *cpu = opaque;
++    CPUX86State *env = &cpu->env;
++
++    return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE);
++}
++
++static const VMStateDescription vmstate_amx_xtile = {
++    .name = "cpu/intel_amx_xtile",
++    .version_id = 1,
++    .minimum_version_id = 1,
++    .needed = amx_xtile_needed,
++    .fields = (VMStateField[]) {
++        VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64),
++        VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192),
++        VMSTATE_END_OF_LIST()
++    }
++};
++#endif
++
+ const VMStateDescription vmstate_x86_cpu = {
+     .name = "cpu",
+     .version_id = 12,
+@@ -1593,6 +1635,10 @@ const VMStateDescription vmstate_x86_cpu = {
+ #endif
+         &vmstate_msr_tsx_ctrl,
+         &vmstate_msr_intel_sgx,
++        &vmstate_msr_xfd,
++#ifdef TARGET_X86_64
++        &vmstate_amx_xtile,
++#endif
+         NULL
+     }
+ };
+-- 
+2.27.0
+
diff --git a/x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch b/x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
new file mode 100644
index 0000000..7331af7
--- /dev/null
+++ b/x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
@@ -0,0 +1,186 @@
+From e98958c23ea5b15a8e84642c373336a8898cd63f Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:32 -0800
+Subject: [PATCH 07/10] x86: add support for KVM_CAP_XSAVE2 and AMX state
+ migration
+
+from mainline-v7.0.0-rc0
+commit e56dd3c70abb31893c61ac834109fa7a38841330
+category: feature
+feature: SPR AMX support for Qemu
+bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
+
+Intel-SIG: commit e56dd3c70abb ("x86: add support for KVM_CAP_XSAVE2 and
+AMX state migration")
+
+-------------------------------------------------------
+
+x86: add support for KVM_CAP_XSAVE2 and AMX state migration
+
+When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave
+area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE
+under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB.
+Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Zeng Guang <guang.zeng@intel.com>
+Signed-off-by: Wei Wang <wei.w.wang@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-7-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Jason Zeng <jason.zeng@intel.com>
+---
+ target/i386/cpu.h          |  4 ++++
+ target/i386/kvm/kvm.c      | 42 ++++++++++++++++++++++++--------------
+ target/i386/xsave_helper.c | 28 +++++++++++++++++++++++++
+ 3 files changed, 59 insertions(+), 15 deletions(-)
+
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 09c725ee13..74e66c352c 100644
+--- a/target/i386/cpu.h
++++ b/target/i386/cpu.h
+@@ -1523,6 +1523,10 @@ typedef struct CPUX86State {
+     uint64_t opmask_regs[NB_OPMASK_REGS];
+     YMMReg zmmh_regs[CPU_NB_REGS];
+     ZMMReg hi16_zmm_regs[CPU_NB_REGS];
++#ifdef TARGET_X86_64
++    uint8_t xtilecfg[64];
++    uint8_t xtiledata[8192];
++#endif
+ 
+     /* sysenter registers */
+     uint32_t sysenter_cs;
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index 60ccdec5e8..b0b22dcf7c 100644
+--- a/target/i386/kvm/kvm.c
++++ b/target/i386/kvm/kvm.c
+@@ -123,6 +123,7 @@ static uint32_t num_architectural_pmu_gp_counters;
+ static uint32_t num_architectural_pmu_fixed_counters;
+ 
+ static int has_xsave;
++static int has_xsave2;
+ static int has_xcrs;
+ static int has_pit_state2;
+ static int has_exception_payload;
+@@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker;
+ 
+ #define KVM_MAX_CPUID_ENTRIES  100
+ 
++static void kvm_init_xsave(CPUX86State *env)
++{
++    if (has_xsave2) {
++        env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
++    } else if (has_xsave) {
++        env->xsave_buf_len = sizeof(struct kvm_xsave);
++    } else {
++        return;
++    }
++
++    env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
++    memset(env->xsave_buf, 0, env->xsave_buf_len);
++    /*
++     * The allocated storage must be large enough for all of the
++     * possible XSAVE state components.
++     */
++    assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
++           env->xsave_buf_len);
++}
++
+ int kvm_arch_init_vcpu(CPUState *cs)
+ {
+     struct {
+@@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
+ 
+     cpuid_i = 0;
+ 
++    has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
++
+     r = kvm_arch_set_tsc_khz(cs);
+     if (r < 0) {
+         return r;
+@@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
+     if (r) {
+         goto fail;
+     }
+-
+-    if (has_xsave) {
+-        env->xsave_buf_len = sizeof(struct kvm_xsave);
+-        env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+-        memset(env->xsave_buf, 0, env->xsave_buf_len);
+-
+-        /*
+-         * The allocated storage must be large enough for all of the
+-         * possible XSAVE state components.
+-         */
+-        assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
+-               <= env->xsave_buf_len);
+-    }
++    kvm_init_xsave(env);
+ 
+     max_nested_state_len = kvm_max_nested_state_length();
+     if (max_nested_state_len > 0) {
+@@ -3263,13 +3274,14 @@ static int kvm_get_xsave(X86CPU *cpu)
+ {
+     CPUX86State *env = &cpu->env;
+     void *xsave = env->xsave_buf;
+-    int ret;
++    int type, ret;
+ 
+     if (!has_xsave) {
+         return kvm_get_fpu(cpu);
+     }
+ 
+-    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
++    type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
++    ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
+     if (ret < 0) {
+         return ret;
+     }
+diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
+index ac61a96344..996e9f3bfe 100644
+--- a/target/i386/xsave_helper.c
++++ b/target/i386/xsave_helper.c
+@@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
+ 
+         memcpy(pkru, &env->pkru, sizeof(env->pkru));
+     }
++
++    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
++    if (e->size && e->offset) {
++        XSaveXTILECFG *tilecfg = buf + e->offset;
++
++        memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg));
++    }
++
++    e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
++    if (e->size && e->offset && buflen >= e->size + e->offset) {
++        XSaveXTILEDATA *tiledata = buf + e->offset;
++
++        memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata));
++    }
+ #endif
+ }
+ 
+@@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
+         pkru = buf + e->offset;
+         memcpy(&env->pkru, pkru, sizeof(env->pkru));
+     }
++
++    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
++    if (e->size && e->offset) {
++        const XSaveXTILECFG *tilecfg = buf + e->offset;
++
++        memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg));
++    }
++
++    e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
++    if (e->size && e->offset && buflen >= e->size + e->offset) {
++        const XSaveXTILEDATA *tiledata = buf + e->offset;
++
++        memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata));
++    }
+ #endif
+ }
+-- 
+2.27.0
+
-- 
Gitee