diff --git a/Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch b/Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch new file mode 100644 index 0000000000000000000000000000000000000000..97726fce67283fe9b7b5c4d9f069592da0081130 --- /dev/null +++ b/Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch @@ -0,0 +1,38 @@ +From 08a4aa240587fed26c17271bf9af87f0a5997f4a Mon Sep 17 00:00:00 2001 +From: libai +Date: Wed, 26 Mar 2025 18:59:33 +0800 +Subject: [PATCH] Kconfig/iommufd/VDPA: Update IOMMUFD module configuration + dependencies The vDPA module can also use IOMMUFD like the VFIO module. + Therefore, adjust Kconfig to remove the dependency of IOMMUFD on VFIO and add + a reverse dependency on IOMMUFD for vDPA + +Signed-off-by: libai +--- + Kconfig.host | 1 + + backends/Kconfig | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Kconfig.host b/Kconfig.host +index f496475f8e..faf58d9af5 100644 +--- a/Kconfig.host ++++ b/Kconfig.host +@@ -28,6 +28,7 @@ config VHOST_USER + + config VHOST_VDPA + bool ++ select IOMMUFD + + config VHOST_KERNEL + bool +diff --git a/backends/Kconfig b/backends/Kconfig +index 2cb23f62fa..8d0be5a263 100644 +--- a/backends/Kconfig ++++ b/backends/Kconfig +@@ -2,4 +2,3 @@ source tpm/Kconfig + + config IOMMUFD + bool +- depends on VFIO +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch b/backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch new file mode 100644 index 0000000000000000000000000000000000000000..8685d1200499ea5152bc661f1c842ed0df8efc6c --- /dev/null +++ b/backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch @@ -0,0 +1,59 @@ +From 88006385c8e58b2aa612bf5aa184263f0d4245de Mon Sep 17 00:00:00 2001 +From: Zhao Liu +Date: Mon, 11 Mar 2024 11:37:55 +0800 +Subject: [PATCH] backends/iommufd: Fix missing ERRP_GUARD() for + error_prepend() + +As the comment in qapi/error, passing @errp to error_prepend() requires +ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +... +* - It should not be passed to error_prepend(), error_vprepend() or +* error_append_hint(), because that doesn't work with &error_fatal. +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. + +ERRP_GUARD() could avoid the case when @errp is &error_fatal, the user +can't see this additional information, because exit() happens in +error_setg earlier than information is added [1]. + +The iommufd_backend_set_fd() passes @errp to error_prepend(), to avoid +the above issue, add missing ERRP_GUARD() at the beginning of this +function. + +[1]: Issue description in the commit message of commit ae7c80a7bd73 + ("error: New macro ERRP_GUARD()"). + +Cc: Yi Liu +Cc: Eric Auger +Cc: Zhenzhong Duan +Signed-off-by: Zhao Liu +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Message-ID: <20240311033822.3142585-3-zhao1.liu@linux.intel.com> +Signed-off-by: Thomas Huth +--- + backends/iommufd.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 3cbf11fc8b..f061b6869a 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -44,6 +44,7 @@ static void iommufd_backend_finalize(Object *obj) + + static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) + { ++ ERRP_GUARD(); + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + int fd = -1; + +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Get-rid-of-qemu_open_old.patch b/backends-iommufd-Get-rid-of-qemu_open_old.patch new file mode 100644 index 0000000000000000000000000000000000000000..184d44ddd2edf4e28d5b6df53375e913f3b53155 --- /dev/null +++ b/backends-iommufd-Get-rid-of-qemu_open_old.patch @@ -0,0 +1,45 @@ +From 959b91b9b45b3ec649c6de0e268a4dcd603ce8af Mon Sep 17 00:00:00 2001 +From: Zhao Liu +Date: Mon, 15 Jul 2024 16:21:54 +0800 +Subject: [PATCH] backends/iommufd: Get rid of qemu_open_old() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For qemu_open_old(), osdep.h said: + +> Don't introduce new usage of this function, prefer the following +> qemu_open/qemu_create that take an "Error **errp". + +So replace qemu_open_old() with qemu_open(). + +Cc: Yi Liu +Cc: Eric Auger +Cc: Zhenzhong Duan +Signed-off-by: Zhao Liu +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Yi Liu +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +--- + backends/iommufd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index fad580fdcb..62df6e41f0 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -79,9 +79,8 @@ bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + int fd; + + if (be->owned && !be->users) { +- fd = qemu_open_old("/dev/iommu", O_RDWR); ++ fd = qemu_open("/dev/iommu", O_RDWR, errp); + if (fd < 0) { +- error_setg_errno(errp, errno, "/dev/iommu opening failed"); + return false; + } + be->fd = fd; +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Make-iommufd_backend_-return-bool.patch b/backends-iommufd-Make-iommufd_backend_-return-bool.patch new file mode 100644 index 0000000000000000000000000000000000000000..134f5c15f35c886ca23eb3b5ab393f6330a2c92e --- /dev/null +++ b/backends-iommufd-Make-iommufd_backend_-return-bool.patch @@ -0,0 +1,140 @@ +From c9a107b1f73bddb4c9844c12444e3802e5f576b4 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 7 May 2024 14:42:52 +0800 +Subject: [PATCH] backends/iommufd: Make iommufd_backend_*() return bool +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is to follow the coding standand to return bool if 'Error **' +is used to pass error. + +The changed functions include: + +iommufd_backend_connect +iommufd_backend_alloc_ioas + +By this chance, simplify the functions a bit by avoiding duplicate +recordings, e.g., log through either error interface or trace, not +both. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +--- + backends/iommufd.c | 29 +++++++++++++---------------- + backends/trace-events | 4 ++-- + include/sysemu/iommufd.h | 6 +++--- + 3 files changed, 18 insertions(+), 21 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index f061b6869a..fad580fdcb 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -74,24 +74,22 @@ static void iommufd_backend_class_init(ObjectClass *oc, void *data) + object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); + } + +-int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) ++bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + { +- int fd, ret = 0; ++ int fd; + + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "/dev/iommu opening failed"); +- ret = fd; +- goto out; ++ return false; + } + be->fd = fd; + } + be->users++; +-out: +- trace_iommufd_backend_connect(be->fd, be->owned, +- be->users, ret); +- return ret; ++ ++ trace_iommufd_backend_connect(be->fd, be->owned, be->users); ++ return true; + } + + void iommufd_backend_disconnect(IOMMUFDBackend *be) +@@ -108,25 +106,24 @@ out: + trace_iommufd_backend_disconnect(be->fd, be->users); + } + +-int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +- Error **errp) ++bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp) + { +- int ret, fd = be->fd; ++ int fd = be->fd; + struct iommu_ioas_alloc alloc_data = { + .size = sizeof(alloc_data), + .flags = 0, + }; + +- ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); +- if (ret) { ++ if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) { + error_setg_errno(errp, errno, "Failed to allocate ioas"); +- return ret; ++ return false; + } + + *ioas_id = alloc_data.out_ioas_id; +- trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); ++ trace_iommufd_backend_alloc_ioas(fd, *ioas_id); + +- return ret; ++ return true; + } + + void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) +diff --git a/backends/trace-events b/backends/trace-events +index f8592a2711..8fe77149b2 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -7,13 +7,13 @@ dbus_vmstate_loading(const char *id) "id: %s" + dbus_vmstate_saving(const char *id) "id: %s" + + # iommufd.c +-iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" ++iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d" + iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" + iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" + iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" + iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" + iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" +-iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" ++iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d" + iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)" + iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" + iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 908c94d811..0531a4ad98 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -43,11 +43,11 @@ typedef struct IOMMUFDViommu { + uint32_t viommu_id; + } IOMMUFDViommu; + +-int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); ++bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); + void iommufd_backend_disconnect(IOMMUFDBackend *be); + +-int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +- Error **errp); ++bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp); + void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); + int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Remove-check-on-number-of-backend-u.patch b/backends-iommufd-Remove-check-on-number-of-backend-u.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ec96e2aa1d7b51a5dc9646732e9add93f61c2a3 --- /dev/null +++ b/backends-iommufd-Remove-check-on-number-of-backend-u.patch @@ -0,0 +1,37 @@ +From e2bc395c5db34111faf2adcecdb385e5a4e8d23d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 22 Dec 2023 08:55:23 +0100 +Subject: [PATCH] backends/iommufd: Remove check on number of backend users +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +QOM already has a ref count on objects and it will assert much +earlier, when INT_MAX is reached. + +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +--- + backends/iommufd.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 4f5df63331..f17a846aab 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -81,11 +81,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + int fd, ret = 0; + + qemu_mutex_lock(&be->lock); +- if (be->users == UINT32_MAX) { +- error_setg(errp, "too many connections"); +- ret = -E2BIG; +- goto out; +- } + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Remove-mutex.patch b/backends-iommufd-Remove-mutex.patch new file mode 100644 index 0000000000000000000000000000000000000000..db4217e8e10b12d4471c208bf9226ebe8bc9244e --- /dev/null +++ b/backends-iommufd-Remove-mutex.patch @@ -0,0 +1,103 @@ +From 1e6734af14b3223a7d7e304262c96051ddf8637f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 21 Dec 2023 16:58:41 +0100 +Subject: [PATCH] backends/iommufd: Remove mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Coverity reports a concurrent data access violation because be->users +is being accessed in iommufd_backend_can_be_deleted() without holding +the mutex. + +However, these routines are called from the QEMU main thread when a +device is created. In this case, the code paths should be protected by +the BQL lock and it should be safe to drop the IOMMUFD backend mutex. +Simply remove it. + +Fixes: CID 1531550 +Fixes: CID 1531549 +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +--- + backends/iommufd.c | 7 ------- + include/sysemu/iommufd.h | 2 -- + 2 files changed, 9 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index f17a846aab..3cbf11fc8b 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -30,7 +30,6 @@ static void iommufd_backend_init(Object *obj) + be->fd = -1; + be->users = 0; + be->owned = true; +- qemu_mutex_init(&be->lock); + } + + static void iommufd_backend_finalize(Object *obj) +@@ -53,10 +52,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) + error_prepend(errp, "Could not parse remote object fd %s:", str); + return; + } +- qemu_mutex_lock(&be->lock); + be->fd = fd; + be->owned = false; +- qemu_mutex_unlock(&be->lock); + trace_iommu_backend_set_fd(be->fd); + } + +@@ -80,7 +77,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + { + int fd, ret = 0; + +- qemu_mutex_lock(&be->lock); + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +@@ -94,13 +90,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + out: + trace_iommufd_backend_connect(be->fd, be->owned, + be->users, ret); +- qemu_mutex_unlock(&be->lock); + return ret; + } + + void iommufd_backend_disconnect(IOMMUFDBackend *be) + { +- qemu_mutex_lock(&be->lock); + if (!be->users) { + goto out; + } +@@ -111,7 +105,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) + } + out: + trace_iommufd_backend_disconnect(be->fd, be->users); +- qemu_mutex_unlock(&be->lock); + } + + int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 29afaa429d..908c94d811 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -15,7 +15,6 @@ + #define SYSEMU_IOMMUFD_H + + #include "qom/object.h" +-#include "qemu/thread.h" + #include "exec/hwaddr.h" + #include "exec/cpu-common.h" + #include "sysemu/host_iommu_device.h" +@@ -33,7 +32,6 @@ struct IOMMUFDBackend { + /*< protected >*/ + int fd; /* /dev/iommu file descriptor */ + bool owned; /* is the /dev/iommu opened internally */ +- QemuMutex lock; + uint32_t users; + + /*< public >*/ +-- +2.41.0.windows.1 + diff --git a/docs-Add-GNR-SRF-and-CWF-CPU-models.patch b/docs-Add-GNR-SRF-and-CWF-CPU-models.patch new file mode 100644 index 0000000000000000000000000000000000000000..f59f818c17d48eb158df6ea08b2be8b906111e30 --- /dev/null +++ b/docs-Add-GNR-SRF-and-CWF-CPU-models.patch @@ -0,0 +1,119 @@ +From 2753607e8768002debb4608dacafe1309420a4dd Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:50 +0800 +Subject: [PATCH] docs: Add GNR, SRF and CWF CPU models + +commit 0a6dec6d11e5e392dcd6299548bf1514f1201707 upstream. + +Update GraniteRapids, SierraForest and ClearwaterForest CPU models in +section "Preferred CPU models for Intel x86 hosts". + +Also introduce bhi-no, gds-no and rfds-no in doc. + +Intel-SIG: commit 0a6dec6d11e5 docs: Add GNR, SRF and CWF CPU models. + +Suggested-by: Zhao Liu +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-5-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + docs/system/cpu-models-x86.rst.inc | 50 +++++++++++++++++++++++++++--- + 1 file changed, 46 insertions(+), 4 deletions(-) + +diff --git a/docs/system/cpu-models-x86.rst.inc b/docs/system/cpu-models-x86.rst.inc +index 7f6368f999..37fe1d0ac8 100644 +--- a/docs/system/cpu-models-x86.rst.inc ++++ b/docs/system/cpu-models-x86.rst.inc +@@ -71,6 +71,16 @@ mixture of host CPU models between machines, if live migration + compatibility is required, use the newest CPU model that is compatible + across all desired hosts. + ++``ClearwaterForest`` ++ Intel Xeon Processor (ClearwaterForest, 2025) ++ ++``SierraForest``, ``SierraForest-v2`` ++ Intel Xeon Processor (SierraForest, 2024), SierraForest-v2 mitigates ++ the GDS and RFDS vulnerabilities with stepping 3. ++ ++``GraniteRapids``, ``GraniteRapids-v2`` ++ Intel Xeon Processor (GraniteRapids, 2024) ++ + ``Cascadelake-Server``, ``Cascadelake-Server-noTSX`` + Intel Xeon Processor (Cascade Lake, 2019), with "stepping" levels 6 + or 7 only. (The Cascade Lake Xeon processor with *stepping 5 is +@@ -181,7 +191,7 @@ features are included if using "Host passthrough" or "Host model". + CVE-2018-12127, [MSBDS] CVE-2018-12126). + + This is an MSR (Model-Specific Register) feature rather than a CPUID feature, +- so it will not appear in the Linux ``/proc/cpuinfo`` in the host or ++ therefore it will not appear in the Linux ``/proc/cpuinfo`` in the host or + guest. Instead, the host kernel uses it to populate the MDS + vulnerability file in ``sysfs``. + +@@ -189,10 +199,10 @@ features are included if using "Host passthrough" or "Host model". + affected} in the ``/sys/devices/system/cpu/vulnerabilities/mds`` file. + + ``taa-no`` +- Recommended to inform that the guest that the host is ``not`` ++ Recommended to inform the guest that the host is ``not`` + vulnerable to CVE-2019-11135, TSX Asynchronous Abort (TAA). + +- This too is an MSR feature, so it does not show up in the Linux ++ This is also an MSR feature, therefore it does not show up in the Linux + ``/proc/cpuinfo`` in the host or guest. + + It should only be enabled for VMs if the host reports ``Not affected`` +@@ -214,7 +224,7 @@ features are included if using "Host passthrough" or "Host model". + By disabling TSX, KVM-based guests can avoid paying the price of + mitigating TSX-based attacks. + +- Note that ``tsx-ctrl`` too is an MSR feature, so it does not show ++ Note that ``tsx-ctrl`` is also an MSR feature, therefore it does not show + up in the Linux ``/proc/cpuinfo`` in the host or guest. + + To validate that Intel TSX is indeed disabled for the guest, there are +@@ -223,6 +233,38 @@ features are included if using "Host passthrough" or "Host model". + ``/sys/devices/system/cpu/vulnerabilities/tsx_async_abort`` file in + the guest should report ``Mitigation: TSX disabled``. + ++``bhi-no`` ++ Recommended to inform the guest that the host is ``not`` ++ vulnerable to CVE-2022-0001, Branch History Injection (BHI). ++ ++ This is also an MSR feature, therefore it does not show up in the Linux ++ ``/proc/cpuinfo`` in the host or guest. ++ ++ It should only be enabled for VMs if the host reports ++ ``BHI: Not affected`` in the ++ ``/sys/devices/system/cpu/vulnerabilities/spectre_v2`` file. ++ ++``gds-no`` ++ Recommended to inform the guest that the host is ``not`` ++ vulnerable to CVE-2022-40982, Gather Data Sampling (GDS). ++ ++ This is also an MSR feature, therefore it does not show up in the Linux ++ ``/proc/cpuinfo`` in the host or guest. ++ ++ It should only be enabled for VMs if the host reports ``Not affected`` ++ in the ``/sys/devices/system/cpu/vulnerabilities/gather_data_sampling`` ++ file. ++ ++``rfds-no`` ++ Recommended to inform the guest that the host is ``not`` ++ vulnerable to CVE-2023-28746, Register File Data Sampling (RFDS). ++ ++ This is also an MSR feature, therefore it does not show up in the Linux ++ ``/proc/cpuinfo`` in the host or guest. ++ ++ It should only be enabled for VMs if the host reports ``Not affected`` ++ in the ``/sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling`` ++ file. + + Preferred CPU models for AMD x86 hosts + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +-- +2.41.0.windows.1 + diff --git a/hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch b/hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f026bf5734f19a3cbb11f5ce0c44c8587be139c --- /dev/null +++ b/hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch @@ -0,0 +1,204 @@ +From d6f75f9e532a4a4b6bb4610049f4fa7f26160733 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Thu, 20 Feb 2025 19:24:18 +0800 +Subject: [PATCH] hw/intc: Add extioi ability of 256 vcpu interrupt routing + +Add the feature field for the CPU-encoded interrupt +route to extioi and the corresponding mechanism for +backup recovery. + +Signed-off-by: Xianglai Li +--- + hw/intc/loongarch_extioi_kvm.c | 65 ++++++++++++++++++++++++++++-- + hw/loongarch/virt.c | 2 + + include/hw/intc/loongarch_extioi.h | 4 ++ + linux-headers/asm-loongarch/kvm.h | 10 +++++ + 4 files changed, 77 insertions(+), 4 deletions(-) + +diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c +index f5bbc33255..2e7c764b7c 100644 +--- a/hw/intc/loongarch_extioi_kvm.c ++++ b/hw/intc/loongarch_extioi_kvm.c +@@ -18,8 +18,32 @@ + static void kvm_extioi_access_regs(int fd, uint64_t addr, + void *val, int is_write) + { +- kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS, +- addr, val, is_write, &error_abort); ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS, ++ addr, val, is_write, &error_abort); ++} ++ ++static void kvm_extioi_access_sw_status(int fd, uint64_t addr, ++ void *val, bool is_write) ++{ ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS, ++ addr, val, is_write, &error_abort); ++} ++ ++static void kvm_extioi_save_load_sw_status(void *opaque, bool is_write) ++{ ++ KVMLoongArchExtIOI *s = (KVMLoongArchExtIOI *)opaque; ++ KVMLoongArchExtIOIClass *class = KVM_LOONGARCH_EXTIOI_GET_CLASS(s); ++ int fd = class->dev_fd; ++ int addr; ++ ++ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU; ++ kvm_extioi_access_sw_status(fd, addr, (void *)&s->num_cpu, is_write); ++ ++ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE; ++ kvm_extioi_access_sw_status(fd, addr, (void *)&s->features, is_write); ++ ++ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE; ++ kvm_extioi_access_sw_status(fd, addr, (void *)&s->status, is_write); + } + + static int kvm_loongarch_extioi_pre_save(void *opaque) +@@ -41,6 +65,8 @@ static int kvm_loongarch_extioi_pre_save(void *opaque) + kvm_extioi_access_regs(fd, EXTIOI_COREISR_START, + (void *)s->coreisr, false); + ++ kvm_extioi_save_load_sw_status(opaque, false); ++ + return 0; + } + +@@ -61,12 +87,19 @@ static int kvm_loongarch_extioi_post_load(void *opaque, int version_id) + (void *)s->sw_coremap, true); + kvm_extioi_access_regs(fd, EXTIOI_COREISR_START, (void *)s->coreisr, true); + ++ kvm_extioi_save_load_sw_status(opaque, true); ++ ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, ++ KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED, ++ NULL, true, &error_abort); ++ + return 0; + } + + static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + { + KVMLoongArchExtIOIClass *extioi_class = KVM_LOONGARCH_EXTIOI_GET_CLASS(dev); ++ KVMLoongArchExtIOI *s = KVM_LOONGARCH_EXTIOI(dev); + struct kvm_create_device cd = {0}; + Error *err = NULL; + int ret,i; +@@ -77,6 +110,10 @@ static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + return; + } + ++ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { ++ s->features |= EXTIOI_VIRT_HAS_FEATURES; ++ } ++ + if (!extioi_class->is_created) { + cd.type = KVM_DEV_TYPE_LA_EXTIOI; + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); +@@ -87,6 +124,15 @@ static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + } + extioi_class->is_created = true; + extioi_class->dev_fd = cd.fd; ++ ++ kvm_device_access(cd.fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, ++ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU, ++ &s->num_cpu, true, NULL); ++ ++ kvm_device_access(cd.fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, ++ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE, ++ &s->features, true, NULL); ++ + fprintf(stdout, "Create LoongArch extioi irqchip in KVM done!\n"); + } + +@@ -102,8 +148,8 @@ static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + + static const VMStateDescription vmstate_kvm_extioi_core = { + .name = "kvm-extioi-single", +- .version_id = 1, +- .minimum_version_id = 1, ++ .version_id = 2, ++ .minimum_version_id = 2, + .pre_save = kvm_loongarch_extioi_pre_save, + .post_load = kvm_loongarch_extioi_post_load, + .fields = (VMStateField[]) { +@@ -119,10 +165,20 @@ static const VMStateDescription vmstate_kvm_extioi_core = { + EXTIOI_IRQS_IPMAP_SIZE / 4), + VMSTATE_UINT32_ARRAY(coremap, KVMLoongArchExtIOI, EXTIOI_IRQS / 4), + VMSTATE_UINT8_ARRAY(sw_coremap, KVMLoongArchExtIOI, EXTIOI_IRQS), ++ VMSTATE_UINT32(num_cpu, KVMLoongArchExtIOI), ++ VMSTATE_UINT32(features, KVMLoongArchExtIOI), ++ VMSTATE_UINT32(status, KVMLoongArchExtIOI), + VMSTATE_END_OF_LIST() + } + }; + ++static Property extioi_properties[] = { ++ DEFINE_PROP_UINT32("num-cpu", KVMLoongArchExtIOI, num_cpu, 1), ++ DEFINE_PROP_BIT("has-virtualization-extension", KVMLoongArchExtIOI, ++ features, EXTIOI_HAS_VIRT_EXTENSION, 0), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ + static void kvm_loongarch_extioi_class_init(ObjectClass *oc, void *data) + { + DeviceClass *dc = DEVICE_CLASS(oc); +@@ -131,6 +187,7 @@ static void kvm_loongarch_extioi_class_init(ObjectClass *oc, void *data) + extioi_class->parent_realize = dc->realize; + dc->realize = kvm_loongarch_extioi_realize; + extioi_class->is_created = false; ++ device_class_set_props(dc, extioi_properties); + dc->vmsd = &vmstate_kvm_extioi_core; + } + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index ce026a4c3c..233297d78f 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -874,6 +874,8 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + /* Create EXTIOI device */ + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + extioi = qdev_new(TYPE_KVM_LOONGARCH_EXTIOI); ++ qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.max_cpus); ++ qdev_prop_set_bit(extioi, "has-virtualization-extension", true); + sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); + } else { + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index 9966cd98d3..92b38d5c38 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -94,6 +94,10 @@ struct LoongArchExtIOI { + + struct KVMLoongArchExtIOI { + SysBusDevice parent_obj; ++ uint32_t num_cpu; ++ uint32_t features; ++ uint32_t status; ++ + /* hardware state */ + uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT]; +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 13c1280662..34abd65939 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -141,6 +141,16 @@ struct kvm_iocsr_entry { + + #define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 + ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS 0x40000006 ++#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU 0x0 ++#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE 0x1 ++#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE 0x2 ++ ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL 0x40000007 ++#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU 0x0 ++#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE 0x1 ++#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED 0x3 ++ + #define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000004 + #define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 + +-- +2.41.0.windows.1 + diff --git a/hw-loongarch-boot-Adjust-the-loading-position-of-the.patch b/hw-loongarch-boot-Adjust-the-loading-position-of-the.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a10afd591917cb63988dfc9fe9ef975dacc627d --- /dev/null +++ b/hw-loongarch-boot-Adjust-the-loading-position-of-the.patch @@ -0,0 +1,95 @@ +From 16670675cbf7fc4db147a698ba7787d2e2fa675b Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Wed, 26 Mar 2025 17:02:37 +0800 +Subject: [PATCH] hw/loongarch/boot: Adjust the loading position of the initrd + +When only the -kernel parameter is used to load the elf kernel, +the initrd is loaded in the ram. If the initrd size is too large, +the loading fails, resulting in a VM startup failure. +This patch first loads initrd near the kernel. +When the nearby memory space of the kernel is insufficient, +it tries to load it to the starting position of high memory. +If there is still not enough, qemu will report an error +and ask the user to increase the memory space for the +virtual machine to boot. + +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 53 +++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 44 insertions(+), 9 deletions(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 53dcefbb55..39c4a6d8c6 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -171,6 +171,48 @@ static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) + return addr & MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS); + } + ++static void find_initrd_loadoffset(struct loongarch_boot_info *info, ++ uint64_t kernel_high, ssize_t kernel_size) ++{ ++ hwaddr base, size, gap, low_end; ++ ram_addr_t initrd_end, initrd_start; ++ ++ base = VIRT_LOWMEM_BASE; ++ gap = VIRT_LOWMEM_SIZE; ++ initrd_start = ROUND_UP(kernel_high + 4 * kernel_size, 64 * KiB); ++ initrd_end = initrd_start + initrd_size; ++ ++ size = info->ram_size; ++ low_end = base + MIN(size, gap); ++ if (initrd_end <= low_end) { ++ initrd_offset = initrd_start; ++ return; ++ } ++ ++ if (size <= gap) { ++ error_report("The low memory too small for initial ram disk '%s'," ++ "You need to expand the memory space", ++ info->initrd_filename); ++ exit(1); ++ } ++ ++ /* ++ * Try to load initrd in the high memory ++ */ ++ size -= gap; ++ base = VIRT_HIGHMEM_BASE; ++ initrd_start = ROUND_UP(base, 64 * KiB); ++ if (initrd_size <= size) { ++ initrd_offset = initrd_start; ++ return; ++ } ++ ++ error_report("The high memory too small for initial ram disk '%s'," ++ "You need to expand the memory space", ++ info->initrd_filename); ++ exit(1); ++} ++ + static int64_t load_kernel_info(struct loongarch_boot_info *info) + { + uint64_t kernel_entry, kernel_low, kernel_high; +@@ -192,16 +234,9 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) + if (info->initrd_filename) { + initrd_size = get_image_size(info->initrd_filename); + if (initrd_size > 0) { +- initrd_offset = ROUND_UP(kernel_high + 4 * kernel_size, 64 * KiB); +- +- if (initrd_offset + initrd_size > info->ram_size) { +- error_report("memory too small for initial ram disk '%s'", +- info->initrd_filename); +- exit(1); +- } +- ++ find_initrd_loadoffset(info, kernel_high, kernel_size); + initrd_size = load_image_targphys(info->initrd_filename, initrd_offset, +- info->ram_size - initrd_offset); ++ initrd_size); + } + + if (initrd_size == (target_ulong)-1) { +-- +2.41.0.windows.1 + diff --git a/hw-rtc-Fixed-loongson-rtc-emulation-errors.patch b/hw-rtc-Fixed-loongson-rtc-emulation-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a759ac44790e0908326fee49402a13827a6de53 --- /dev/null +++ b/hw-rtc-Fixed-loongson-rtc-emulation-errors.patch @@ -0,0 +1,137 @@ +From 4044284b230182cbaeb401bdb1b65dcbd11c7550 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 7 Apr 2025 18:59:42 +0800 +Subject: [PATCH] hw/rtc: Fixed loongson rtc emulation errors + +The expire time is sent to the timer only +when the expire Time is greater than 0 or +greater than now. Otherwise, the timer +will trigger interruption continuously. + +Timer interrupts are sent using pulse functions. + +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 9 +++++++-- + hw/rtc/ls7a_rtc.c | 22 +++++++++++++--------- + 2 files changed, 20 insertions(+), 11 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 0c24e632bb..ce026a4c3c 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -51,6 +51,11 @@ + #include "qemu/error-report.h" + #include "qemu/guest-random.h" + ++#define FDT_IRQ_FLAGS_EDGE_LO_HI 1 ++#define FDT_IRQ_FLAGS_EDGE_HI_LO 2 ++#define FDT_IRQ_FLAGS_LEVEL_HI 4 ++#define FDT_IRQ_FLAGS_LEVEL_LO 8 ++ + static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms) + { + if (lvms->veiointc == ON_OFF_AUTO_OFF) { +@@ -275,7 +280,7 @@ static void fdt_add_rtc_node(LoongArchVirtMachineState *lvms, + "loongson,ls7a-rtc"); + qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 2, size); + qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", +- VIRT_RTC_IRQ - VIRT_GSI_BASE , 0x4); ++ VIRT_RTC_IRQ - VIRT_GSI_BASE , FDT_IRQ_FLAGS_EDGE_LO_HI); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + *pch_pic_phandle); + g_free(nodename); +@@ -334,7 +339,7 @@ static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, + qemu_fdt_setprop_cell(ms->fdt, nodename, "clock-frequency", 100000000); + if (chosen) + qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); +- qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, 0x4); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, FDT_IRQ_FLAGS_LEVEL_HI); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + *pch_pic_phandle); + g_free(nodename); +diff --git a/hw/rtc/ls7a_rtc.c b/hw/rtc/ls7a_rtc.c +index 1f9e38a735..be9546c850 100644 +--- a/hw/rtc/ls7a_rtc.c ++++ b/hw/rtc/ls7a_rtc.c +@@ -145,20 +145,22 @@ static void toymatch_write(LS7ARtcState *s, uint64_t val, int num) + now = qemu_clock_get_ms(rtc_clock); + toymatch_val_to_time(s, val, &tm); + expire_time = now + (qemu_timedate_diff(&tm) - s->offset_toy) * 1000; +- timer_mod(s->toy_timer[num], expire_time); ++ if (expire_time > now) ++ timer_mod(s->toy_timer[num], expire_time); + } + } + + static void rtcmatch_write(LS7ARtcState *s, uint64_t val, int num) + { +- uint64_t expire_ns; ++ int64_t expire_ns; + + /* it do not support write when toy disabled */ + if (rtc_enabled(s)) { + s->rtcmatch[num] = val; + /* calculate expire time */ + expire_ns = ticks_to_ns(val) - ticks_to_ns(s->offset_rtc); +- timer_mod_ns(s->rtc_timer[num], expire_ns); ++ if (expire_ns > 0) ++ timer_mod_ns(s->rtc_timer[num], expire_ns); + } + } + +@@ -185,7 +187,7 @@ static void ls7a_rtc_stop(LS7ARtcState *s) + static void ls7a_toy_start(LS7ARtcState *s) + { + int i; +- uint64_t expire_time, now; ++ int64_t expire_time, now; + struct tm tm = {}; + + now = qemu_clock_get_ms(rtc_clock); +@@ -194,19 +196,21 @@ static void ls7a_toy_start(LS7ARtcState *s) + for (i = 0; i < TIMER_NUMS; i++) { + toymatch_val_to_time(s, s->toymatch[i], &tm); + expire_time = now + (qemu_timedate_diff(&tm) - s->offset_toy) * 1000; +- timer_mod(s->toy_timer[i], expire_time); ++ if (expire_time > now) ++ timer_mod(s->toy_timer[i], expire_time); + } + } + + static void ls7a_rtc_start(LS7ARtcState *s) + { + int i; +- uint64_t expire_time; ++ int64_t expire_time; + + /* recalculate expire time and enable timer */ + for (i = 0; i < TIMER_NUMS; i++) { + expire_time = ticks_to_ns(s->rtcmatch[i]) - ticks_to_ns(s->offset_rtc); +- timer_mod_ns(s->rtc_timer[i], expire_time); ++ if (expire_time > 0) ++ timer_mod_ns(s->rtc_timer[i], expire_time); + } + } + +@@ -370,7 +374,7 @@ static void toy_timer_cb(void *opaque) + LS7ARtcState *s = opaque; + + if (toy_enabled(s)) { +- qemu_irq_raise(s->irq); ++ qemu_irq_pulse(s->irq); + } + } + +@@ -379,7 +383,7 @@ static void rtc_timer_cb(void *opaque) + LS7ARtcState *s = opaque; + + if (rtc_enabled(s)) { +- qemu_irq_raise(s->irq); ++ qemu_irq_pulse(s->irq); + } + } + +-- +2.41.0.windows.1 + diff --git a/qemu.spec b/qemu.spec index 80dffeb630504122bf64cc3acb51782ac1a19616..675f081af246fe673702e650bfffcbe6adc5d03b 100644 --- a/qemu.spec +++ b/qemu.spec @@ -3,7 +3,7 @@ Name: qemu Version: 8.2.0 -Release: 31 +Release: 32 Epoch: 11 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 @@ -796,6 +796,27 @@ Patch0779: gpex-acpi-Remove-duplicate-DSM-5.patch Patch0780: Revert-linux-user-Print-tid-not-pid-with-strace.patch Patch0781: fw_cfg-Don-t-set-callback_opaque-NULL-in-fw_cfg_modi.patch Patch0782: target-arm-Change-arm_cpu_mp_affinity-when-enabled-I.patch +Patch0783: vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch +Patch0784: target-loongarch-fix-vcpu-reset-command-word-issue.patch +Patch0785: target-loongarch-Fix-the-cpu-unplug-resource-leak.patch +Patch0786: hw-loongarch-boot-Adjust-the-loading-position-of-the.patch +Patch0787: hw-rtc-Fixed-loongson-rtc-emulation-errors.patch +Patch0788: hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch +Patch0789: backends-iommufd-Remove-check-on-number-of-backend-u.patch +Patch0790: backends-iommufd-Remove-mutex.patch +Patch0791: backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch +Patch0792: backends-iommufd-Make-iommufd_backend_-return-bool.patch +Patch0793: backends-iommufd-Get-rid-of-qemu_open_old.patch +Patch0794: Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch +Patch0795: vdpa-iommufd-support-associating-iommufd-backend-for.patch +Patch0796: vdpa-iommufd-Introduce-vdpa-iommufd-module.patch +Patch0797: vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch +Patch0798: target-i386-Introduce-SierraForest-v2-model.patch +Patch0799: target-i386-Export-BHI_NO-bit-to-guests.patch +Patch0800: docs-Add-GNR-SRF-and-CWF-CPU-models.patch +Patch0801: target-i386-add-sha512-sm3-sm4-feature-bits.patch +Patch0802: target-i386-Add-new-CPU-model-ClearwaterForest.patch +Patch0803: target-i386-csv-Release-CSV3-shared-pages-after-unma.patch BuildRequires: flex BuildRequires: gcc @@ -1394,6 +1415,29 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Thu May 15 2025 Jiabo Feng - 11:8.2.0-32 +- target/i386: csv: Release CSV3 shared pages after unmapping DMA +- target/i386: Add new CPU model ClearwaterForest +- target/i386: add sha512, sm3, sm4 feature bits +- docs: Add GNR, SRF and CWF CPU models +- target/i386: Export BHI_NO bit to guests +- target/i386: Introduce SierraForest-v2 model +- vdpa/iommufd:Implement DMA mapping through the iommufd interface +- vdpa/iommufd:Introduce vdpa-iommufd module +- vdpa/iommufd:support associating iommufd backend for vDPA devices +- Kconfig/iommufd/VDPA: Update IOMMUFD module configuration dependencies The vDPA module can also use IOMMUFD like the VFIO module. +- backends/iommufd: Get rid of qemu_open_old() +- backends/iommufd: Make iommufd_backend_*() return bool +- backends/iommufd: Fix missing ERRP_GUARD() for error_prepend() +- backends/iommufd: Remove mutex +- backends/iommufd: Remove check on number of backend users +- hw/intc: Add extioi ability of 256 vcpu interrupt routing +- hw/rtc: Fixed loongson rtc emulation errors +- hw/loongarch/boot: Adjust the loading position of the initrd +- target/loongarch: Fix the cpu unplug resource leak +- target/loongarch: fix vcpu reset command word issue +- vdpa:Fix dirty page bitmap synchronization not done after suspend for vdpa devices + * Thu Apr 24 2025 Jiabo Feng - 11:8.2.0-31 - target/arm: Change arm_cpu_mp_affinity when enabled IPIV feature - fw_cfg: Don't set callback_opaque NULL in fw_cfg_modify_bytes_read() diff --git a/target-i386-Add-new-CPU-model-ClearwaterForest.patch b/target-i386-Add-new-CPU-model-ClearwaterForest.patch new file mode 100644 index 0000000000000000000000000000000000000000..35ee518b8f7a533ebf9d01db37c9226787bea70c --- /dev/null +++ b/target-i386-Add-new-CPU-model-ClearwaterForest.patch @@ -0,0 +1,272 @@ +From e6464174c2261e809764ed63f8a064913a108446 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:49 +0800 +Subject: [PATCH] target/i386: Add new CPU model ClearwaterForest + +commit 56e84d898f17606b5d88778726466540af96b234 upstream. + +According to table 1-2 in Intel Architecture Instruction Set Extensions +and Future Features (rev 056) [1], ClearwaterForest has the following new +features which have already been virtualized: + + - AVX-VNNI-INT16 CPUID.(EAX=7,ECX=1):EDX[bit 10] + - SHA512 CPUID.(EAX=7,ECX=1):EAX[bit 0] + - SM3 CPUID.(EAX=7,ECX=1):EAX[bit 1] + - SM4 CPUID.(EAX=7,ECX=1):EAX[bit 2] + +Add above features to new CPU model ClearwaterForest. Comparing with +SierraForest, ClearwaterForest bare-metal contains all features of +SierraForest-v2 CPU model and adds: + + - PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] + - DDPD_U CPUID.(EAX=7,ECX=2):EDX[bit 3] + - BHI_NO IA32_ARCH_CAPABILITIES[bit 20] + +Add above and all features of SierraForest-v2 CPU model to new CPU model +ClearwaterForest. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Intel-SIG: commit 56e84d898f17 target/i386: Add new CPU model ClearwaterForest. + +Tested-by: Xuelian Guo +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-4-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++ + target/i386/cpu.h | 35 +++++++++--- + 2 files changed, 164 insertions(+), 6 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6ed4e84b5c..f79d0c9abf 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4337,6 +4337,141 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "ClearwaterForest", ++ .level = 0x23, ++ .xlevel = 0x80000008, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 221, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2 | CPUID_SS, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_TSC_ADJUST | ++ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | ++ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | ++ CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | ++ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT | ++ CPUID_7_0_ECX_CLDEMOTE | CPUID_7_0_ECX_MOVDIRI | ++ CPUID_7_0_ECX_MOVDIR64B, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_BHI_NO | MSR_ARCH_CAP_PBRSB_NO | ++ MSR_ARCH_CAP_GDS_NO | MSR_ARCH_CAP_RFDS_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_SHA512 | CPUID_7_1_EAX_SM3 | CPUID_7_1_EAX_SM4 | ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA | ++ CPUID_7_1_EAX_LAM, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT | ++ CPUID_7_1_EDX_AVX_VNNI_INT16 | CPUID_7_1_EDX_PREFETCHITI, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | ++ CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_DDPD_U | ++ CPUID_7_2_EDX_BHI_CTRL | CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .model_id = "Intel Xeon Processor (ClearwaterForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index b883e5e1d6..4424e58d1b 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -801,6 +801,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + + /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ + #define CPUID_7_0_EBX_FSGSBASE (1U << 0) ++/* Support TSC adjust MSR */ ++#define CPUID_7_0_EBX_TSC_ADJUST (1U << 1) + /* Support SGX */ + #define CPUID_7_0_EBX_SGX (1U << 2) + /* 1st Group of Advanced Bit Manipulation Extensions */ +@@ -934,6 +936,12 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + /* Speculative Store Bypass Disable */ + #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) + ++/* SHA512 Instruction */ ++#define CPUID_7_1_EAX_SHA512 (1U << 0) ++/* SM3 Instruction */ ++#define CPUID_7_1_EAX_SM3 (1U << 1) ++/* SM4 Instruction */ ++#define CPUID_7_1_EAX_SM4 (1U << 2) + /* AVX VNNI Instruction */ + #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) + /* AVX512 BFloat16 Instruction */ +@@ -946,6 +954,12 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define CPUID_7_1_EAX_FSRS (1U << 11) + /* Fast Short REP CMPS/SCAS */ + #define CPUID_7_1_EAX_FSRC (1U << 12) ++/* Flexible return and event delivery (FRED) */ ++#define CPUID_7_1_EAX_FRED (1U << 17) ++/* Load into IA32_KERNEL_GS_BASE (LKGS) */ ++#define CPUID_7_1_EAX_LKGS (1U << 18) ++/* Non-Serializing Write to Model Specific Register (WRMSRNS) */ ++#define CPUID_7_1_EAX_WRMSRNS (1U << 19) + /* Support Tile Computational Operations on FP16 Numbers */ + #define CPUID_7_1_EAX_AMX_FP16 (1U << 21) + /* Support for VPMADD52[H,L]UQ */ +@@ -957,17 +971,23 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) + /* AVX NE CONVERT Instructions */ + #define CPUID_7_1_EDX_AVX_NE_CONVERT (1U << 5) ++/* AVX-VNNI-INT16 Instructions */ ++#define CPUID_7_1_EDX_AVX_VNNI_INT16 (1U << 10) + /* AMX COMPLEX Instructions */ + #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) + /* PREFETCHIT0/1 Instructions */ + #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) +-/* Flexible return and event delivery (FRED) */ +-#define CPUID_7_1_EAX_FRED (1U << 17) +-/* Load into IA32_KERNEL_GS_BASE (LKGS) */ +-#define CPUID_7_1_EAX_LKGS (1U << 18) +-/* Non-Serializing Write to Model Specific Register (WRMSRNS) */ +-#define CPUID_7_1_EAX_WRMSRNS (1U << 19) + ++/* Indicate bit 7 of the IA32_SPEC_CTRL MSR is supported */ ++#define CPUID_7_2_EDX_PSFD (1U << 0) ++/* Indicate bits 3 and 4 of the IA32_SPEC_CTRL MSR are supported */ ++#define CPUID_7_2_EDX_IPRED_CTRL (1U << 1) ++/* Indicate bits 5 and 6 of the IA32_SPEC_CTRL MSR are supported */ ++#define CPUID_7_2_EDX_RRSBA_CTRL (1U << 2) ++/* Indicate bit 8 of the IA32_SPEC_CTRL MSR is supported */ ++#define CPUID_7_2_EDX_DDPD_U (1U << 3) ++/* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */ ++#define CPUID_7_2_EDX_BHI_CTRL (1U << 4) + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ + #define CPUID_7_2_EDX_MCDT_NO (1U << 5) + +@@ -1061,7 +1081,10 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define MSR_ARCH_CAP_FBSDP_NO (1U << 14) + #define MSR_ARCH_CAP_PSDP_NO (1U << 15) + #define MSR_ARCH_CAP_FB_CLEAR (1U << 17) ++#define MSR_ARCH_CAP_BHI_NO (1U << 20) + #define MSR_ARCH_CAP_PBRSB_NO (1U << 24) ++#define MSR_ARCH_CAP_GDS_NO (1U << 26) ++#define MSR_ARCH_CAP_RFDS_NO (1U << 27) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +2.41.0.windows.1 + diff --git a/target-i386-Export-BHI_NO-bit-to-guests.patch b/target-i386-Export-BHI_NO-bit-to-guests.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f24a14d0556ca036325c2ef94121bfb05e98ae9 --- /dev/null +++ b/target-i386-Export-BHI_NO-bit-to-guests.patch @@ -0,0 +1,44 @@ +From bd65b82f94b07c90f856a34cb10d535b5301d9d9 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:48 +0800 +Subject: [PATCH] target/i386: Export BHI_NO bit to guests + +commit b611931d4f70b9a3e49e39c405c63b3b5e9c0df1 upstream. + +Branch History Injection (BHI) is a CPU side-channel vulnerability, where +an attacker may manipulate branch history before transitioning from user +to supervisor mode or from VMX non-root/guest to root mode. CPUs that set +BHI_NO bit in MSR IA32_ARCH_CAPABILITIES to indicate no additional +mitigation is required to prevent BHI. + +Make BHI_NO bit available to guests. + +Intel-SIG: commit b611931d4f70 target/i386: Export BHI_NO bit to guests. + +Tested-by: Xuelian Guo +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-3-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index bad30581ce..b5231432e7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1157,7 +1157,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "taa-no", NULL, NULL, NULL, + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, +- NULL, NULL, NULL, NULL, ++ "bhi-no", NULL, NULL, NULL, + "pbrsb-no", NULL, "gds-no", "rfds-no", + "rfds-clear", NULL, NULL, NULL, + }, +-- +2.41.0.windows.1 + diff --git a/target-i386-Introduce-SierraForest-v2-model.patch b/target-i386-Introduce-SierraForest-v2-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..09d04d938f733f57cd5e0108353454a01b381985 --- /dev/null +++ b/target-i386-Introduce-SierraForest-v2-model.patch @@ -0,0 +1,62 @@ +From 79a6baa688a19242512a753ab240a2238bb7ed7e Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:47 +0800 +Subject: [PATCH] target/i386: Introduce SierraForest-v2 model + +commit c597ff5339a9918b00d9f4160126db0ac2a423cc upstream. + +Update SierraForest CPU model to add LAM, 4 bits indicating certain bits +of IA32_SPEC_CTR are supported(intel-psfd, ipred-ctrl, rrsba-ctrl, +bhi-ctrl) and the missing features(ss, tsc-adjust, cldemote, movdiri, +movdir64b) + +Also add GDS-NO and RFDS-NO to indicate the related vulnerabilities are +mitigated in stepping 3. + +Intel-SIG: commit c597ff5339a9 target/i386: Introduce SierraForest-v2 model. +Add SRF CPU model support + +Tested-by: Xuelian Guo +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-2-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 20358ffa91..bad30581ce 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4315,6 +4315,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Intel Xeon Processor (SierraForest)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, ++ { ++ .version = 2, ++ .props = (PropValue[]) { ++ { "ss", "on" }, ++ { "tsc-adjust", "on" }, ++ { "cldemote", "on" }, ++ { "movdiri", "on" }, ++ { "movdir64b", "on" }, ++ { "gds-no", "on" }, ++ { "rfds-no", "on" }, ++ { "lam", "on" }, ++ { "intel-psfd", "on"}, ++ { "ipred-ctrl", "on"}, ++ { "rrsba-ctrl", "on"}, ++ { "bhi-ctrl", "on"}, ++ { "stepping", "3" }, ++ { /* end of list */ } ++ } ++ }, + { /* end of list */ }, + }, + }, +-- +2.41.0.windows.1 + diff --git a/target-i386-add-sha512-sm3-sm4-feature-bits.patch b/target-i386-add-sha512-sm3-sm4-feature-bits.patch new file mode 100644 index 0000000000000000000000000000000000000000..21bf96c24a11ff48e7574802cdae10027d8583e8 --- /dev/null +++ b/target-i386-add-sha512-sm3-sm4-feature-bits.patch @@ -0,0 +1,40 @@ +From 87871b854241cc52f967805e005bdd66a923c555 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 3 Jul 2024 13:42:49 +0200 +Subject: [PATCH] target/i386: add sha512, sm3, sm4 feature bits + +commit 78be258c0eeba3d5613c37888889e84f2ba9bd94 upstream. + +SHA512, SM3, SM4 (CPUID[EAX=7,ECX=1).EAX bits 0 to 2) is supported by +Clearwater Forest processor, add it to QEMU as it does not need any +specific enablement. + +See https://lore.kernel.org/kvm/20241105054825.870939-1-tao1.su@linux.intel.com/ +for reference. + +Intel-SIG: commit 78be258c0eeb target/i386: add sha512, sm3, sm4 feature bits. + +Reviewed-by: Tao Su +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index b5231432e7..6ed4e84b5c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -962,7 +962,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + [FEAT_7_1_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +- NULL, NULL, NULL, NULL, ++ "sha512", "sm3", "sm4", NULL, + "avx-vnni", "avx512-bf16", NULL, "cmpccxadd", + NULL, NULL, "fzrm", "fsrs", + "fsrc", NULL, NULL, NULL, +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Release-CSV3-shared-pages-after-unma.patch b/target-i386-csv-Release-CSV3-shared-pages-after-unma.patch new file mode 100644 index 0000000000000000000000000000000000000000..51ed89e887e308c78cb4c57ee8991a62135f7501 --- /dev/null +++ b/target-i386-csv-Release-CSV3-shared-pages-after-unma.patch @@ -0,0 +1,134 @@ +From ee97f42ea46a2527d19a3e87f33994d350959a90 Mon Sep 17 00:00:00 2001 +From: eastmoutain <14304864+eastmoutain@user.noreply.gitee.com> +Date: Mon, 20 May 2024 21:12:23 +0800 +Subject: [PATCH] target/i386: csv: Release CSV3 shared pages after unmapping + DMA + +The shared pages are created for Device DMA access, release them +once DMA mapping is removed. + +Signed-off-by: yangwencheng +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 9 +++++++++ + target/i386/csv-sysemu-stub.c | 5 +++++ + target/i386/csv.c | 34 ++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 1 + + target/i386/kvm/kvm.c | 1 + + 5 files changed, 50 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 44a326fddc..a19683f1e9 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2142,6 +2142,7 @@ enum csv3_cmd_id { + KVM_CSV3_SEND_ENCRYPT_CONTEXT, + KVM_CSV3_RECEIVE_ENCRYPT_DATA, + KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT, ++ KVM_CSV3_HANDLE_MEMORY, + + KVM_CSV3_SET_GUEST_PRIVATE_MEMORY = 0xc8, + +@@ -2190,6 +2191,14 @@ struct kvm_csv3_receive_encrypt_context { + __u32 trans_len; + }; + ++#define KVM_CSV3_RELEASE_SHARED_MEMORY (0x0001) ++ ++struct kvm_csv3_handle_memory { ++ __u64 gpa; ++ __u32 num_pages; ++ __u32 opcode; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +index e49755da5c..735cce0e4b 100644 +--- a/target/i386/csv-sysemu-stub.c ++++ b/target/i386/csv-sysemu-stub.c +@@ -40,6 +40,11 @@ void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) + + } + ++void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages) ++{ ++ ++} ++ + int csv3_set_guest_private_memory(Error **errp) + { + g_assert_not_reached(); +diff --git a/target/i386/csv.c b/target/i386/csv.c +index d9b50040a3..b229f7c317 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -270,6 +270,40 @@ end: + return ret; + } + ++void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages) ++{ ++ struct kvm_csv3_handle_memory mem = { 0 }; ++ MemoryRegion *mr = NULL; ++ void *hva; ++ int ret; ++ ++ if (!csv3_enabled()) ++ return; ++ ++ if (!gpa || !num_pages) ++ return; ++ ++ mem.gpa = (__u64)gpa; ++ mem.num_pages = (__u32)num_pages; ++ mem.opcode = (__u32)KVM_CSV3_RELEASE_SHARED_MEMORY; ++ ++ /* unpin the pages */ ++ ret = csv3_ioctl(KVM_CSV3_HANDLE_MEMORY, &mem, NULL); ++ if (ret <= 0) { ++ if (ret < 0) ++ error_report("%s: CSV3 unpin failed ret %d", __func__, ret); ++ return; ++ } ++ ++ /* drop the pages */ ++ hva = gpa2hva(&mr, gpa, num_pages << TARGET_PAGE_BITS, NULL); ++ if (hva) { ++ ret = madvise(hva, num_pages << TARGET_PAGE_BITS, MADV_DONTNEED); ++ if (ret) ++ error_report("%s: madvise failed %d", __func__, ret); ++ } ++} ++ + void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) + { + MemoryRegionSection section; +diff --git a/target/i386/csv.h b/target/i386/csv.h +index fb669279a8..70f9933d3b 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -124,6 +124,7 @@ int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + + int csv3_shared_region_dma_map(uint64_t start, uint64_t end); + void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); ++void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages); + int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr); + int csv3_load_incoming_context(QEMUFile *f); + int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a867512822..2df3ff99c3 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5099,6 +5099,7 @@ static int kvm_handle_exit_hypercall(X86CPU *cpu, struct kvm_run *run) + if (enc) { + sev_remove_shared_regions_list(gfn_start, gfn_end); + csv3_shared_region_dma_unmap(gpa, gfn_end << TARGET_PAGE_BITS); ++ csv3_shared_region_release(gpa, npages); + } else { + sev_add_shared_regions_list(gfn_start, gfn_end); + csv3_shared_region_dma_map(gpa, gfn_end << TARGET_PAGE_BITS); +-- +2.41.0.windows.1 + diff --git a/target-loongarch-Fix-the-cpu-unplug-resource-leak.patch b/target-loongarch-Fix-the-cpu-unplug-resource-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..ade56ca4b9d13ce3d05b9f674e76a3d72158a78c --- /dev/null +++ b/target-loongarch-Fix-the-cpu-unplug-resource-leak.patch @@ -0,0 +1,76 @@ +From 2a51f062a46c2e3fbd96a1d75f9d53cab449f4ac Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Fri, 21 Mar 2025 20:40:37 +0800 +Subject: [PATCH] target/loongarch: Fix the cpu unplug resource leak + +When the cpu is created, qemu_add_vm_change_state_handler +is called in the kvm_arch_init_vcpu function to create +the VMChangeStateEntry resource. + +However, the resource is not released when the cpu is destroyed. +This results in a qemu process segment error when the virtual +machine restarts after the cpu is unplugged. + +This patch solves the problem by adding the corresponding resource +release process to the kvm_arch_destroy_vcpu function. + +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 2 +- + target/loongarch/cpu.h | 1 + + target/loongarch/kvm/kvm.c | 5 ++++- + 3 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 570ce8be3b..561566f3a0 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -573,7 +573,7 @@ static void loongarch_cpu_reset_hold(Object *obj) + env->CSR_ECFG = FIELD_DP64(env->CSR_ECFG, CSR_ECFG, VS, 0); + env->CSR_ECFG = FIELD_DP64(env->CSR_ECFG, CSR_ECFG, LIE, 0); + +- env->CSR_ESTAT = env->CSR_ESTAT & (~MAKE_64BIT_MASK(0, 2)); ++ env->CSR_ESTAT = 0; + env->CSR_RVACFG = FIELD_DP64(env->CSR_RVACFG, CSR_RVACFG, RBITS, 0); + env->CSR_CPUID = cs->cpu_index; + env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0); +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 9af622aba5..6cc717c5ea 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -427,6 +427,7 @@ struct ArchCPU { + const char *dtb_compatible; + /* used by KVM_REG_LOONGARCH_COUNTER ioctl to access guest time counters */ + uint64_t kvm_state_counter; ++ VMChangeStateEntry *vmsentry; + }; + + /** +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 277210ca04..f6e008a517 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -905,9 +905,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + uint64_t val; + int ret; + Error *local_err = NULL; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); + + ret = 0; +- qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); ++ cpu->vmsentry = qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); + + if (!kvm_get_one_reg(cs, KVM_REG_LOONGARCH_DEBUG_INST, &val)) { + brk_insn = val; +@@ -928,6 +929,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + + int kvm_arch_destroy_vcpu(CPUState *cs) + { ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ qemu_del_vm_change_state_handler(cpu->vmsentry); + return 0; + } + +-- +2.41.0.windows.1 + diff --git a/target-loongarch-fix-vcpu-reset-command-word-issue.patch b/target-loongarch-fix-vcpu-reset-command-word-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..9e5ef3b15867877936ee6c68a6e757252eaf00fe --- /dev/null +++ b/target-loongarch-fix-vcpu-reset-command-word-issue.patch @@ -0,0 +1,56 @@ +From 655073e4e179e601e35a444f585d8e2049df97f5 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Wed, 5 Feb 2025 19:56:54 +0800 +Subject: [PATCH] target/loongarch: fix vcpu reset command word issue + +When the KVM_REG_LOONGARCH_VCPU_RESET command word +is sent to the kernel through the kvm_set_one_reg interface, +the parameter source needs to be a legal address, +otherwise the kernel will return an error and the command word +will fail to be sent. + +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 2 +- + target/loongarch/kvm/kvm.c | 9 ++++++++- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index ee764f0bc7..570ce8be3b 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -638,8 +638,8 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error **errp) + + loongarch_cpu_register_gdb_regs_for_features(cs); + +- cpu_reset(cs); + qemu_init_vcpu(cs); ++ cpu_reset(cs); + + lacc->parent_realize(dev, errp); + } +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 0acdd5c4c1..277210ca04 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -590,9 +590,16 @@ static int kvm_loongarch_get_lbt(CPUState *cs) + void kvm_arch_reset_vcpu(CPUState *cs) + { + CPULoongArchState *env = cpu_env(cs); ++ int ret = 0; ++ uint64_t unused = 0; + + env->mp_state = KVM_MP_STATE_RUNNABLE; +- kvm_set_one_reg(cs, KVM_REG_LOONGARCH_VCPU_RESET, 0); ++ ret = kvm_set_one_reg(cs, KVM_REG_LOONGARCH_VCPU_RESET, &unused); ++ if (ret) { ++ error_report("Failed to set KVM_REG_LOONGARCH_VCPU_RESET: %s", ++ strerror(errno)); ++ exit(EXIT_FAILURE); ++ } + } + + static int kvm_loongarch_get_mpstate(CPUState *cs) +-- +2.41.0.windows.1 + diff --git a/vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch b/vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2b1f66101c80676f464727a00710ae9c6d3a11c --- /dev/null +++ b/vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch @@ -0,0 +1,39 @@ +From 257ffabb9c06b476a3a42bf679db6fbc61c19459 Mon Sep 17 00:00:00 2001 +From: Adttil <2429917001@qq.com> +Date: Fri, 25 Apr 2025 09:41:59 +0800 +Subject: [PATCH] vdpa:Fix dirty page bitmap synchronization not done after + suspend for vdpa devices + +Change the flag for vdpa device to determine whether to perform log_sync +from dev->start to dev->log, and do not release dev->log after vdpa device +suspend, and release it uniformly by vhost_dev_stop. + +Signed-off-by: Adttil <2429917001@qq.com> +--- + hw/virtio/vhost.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index d29075aa04..bec6e63fc7 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -252,7 +252,7 @@ static void vhost_log_sync(MemoryListener *listener, + memory_listener); + MigrationState *ms = migrate_get_current(); + +- if (!dev->log_enabled || !dev->started) { ++ if (!dev->log_enabled || !dev->log) { + return; + } + +@@ -2624,7 +2624,6 @@ int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + memory_listener_unregister(&hdev->iommu_listener); + } + vhost_stop_config_intr(hdev); +- vhost_log_put(hdev, true); + hdev->started = false; + vdev->vhost_started = false; + hdev->vdev = NULL; +-- +2.41.0.windows.1 + diff --git a/vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch b/vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch new file mode 100644 index 0000000000000000000000000000000000000000..c166f38f11d2672b6db9591f57e33b93870a3e46 --- /dev/null +++ b/vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch @@ -0,0 +1,288 @@ +From b88b03c84aa695b96a91329e2d01fffad551c34d Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 27 Mar 2025 19:24:53 +0800 +Subject: [PATCH] vdpa/iommufd:Implement DMA mapping through the iommufd + interface + +Change the owner of memorylistener from the independent vDPA device to VDPAIOMMUFDContainer + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-iommufd.c | 137 +++++++++++++++++++++++++++ + hw/virtio/vdpa-dev.c | 4 +- + hw/virtio/vhost-vdpa.c | 13 +-- + include/hw/virtio/vdpa-dev-iommufd.h | 1 + + include/hw/virtio/vhost-vdpa.h | 7 ++ + 5 files changed, 154 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-iommufd.c b/hw/virtio/vdpa-dev-iommufd.c +index d72f56d52f..668c6a1cb1 100644 +--- a/hw/virtio/vdpa-dev-iommufd.c ++++ b/hw/virtio/vdpa-dev-iommufd.c +@@ -9,11 +9,124 @@ + #include + #include + #include "qapi/error.h" ++#include "exec/target_page.h" ++#include "exec/address-spaces.h" + #include "hw/virtio/vdpa-dev-iommufd.h" + + static QLIST_HEAD(, VDPAIOMMUFDContainer) vdpa_container_list = + QLIST_HEAD_INITIALIZER(vdpa_container_list); + ++static int vhost_vdpa_iommufd_container_dma_map(VDPAIOMMUFDContainer *container, hwaddr iova, ++ hwaddr size, void *vaddr, bool readonly) ++{ ++ return iommufd_backend_map_dma(container->iommufd, container->ioas_id, iova, size, vaddr, readonly); ++ ++} ++static int vhost_vdpa_iommufd_container_dma_unmap(VDPAIOMMUFDContainer *container, ++ hwaddr iova, hwaddr size) ++{ ++ return iommufd_backend_unmap_dma(container->iommufd, container->ioas_id, iova, size); ++} ++ ++static void vhost_vdpa_iommufd_container_region_add(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VDPAIOMMUFDContainer *container = container_of(listener, VDPAIOMMUFDContainer, listener); ++ hwaddr iova; ++ Int128 llend, llsize; ++ void *vaddr; ++ int page_size = qemu_target_page_size(); ++ int page_mask = -page_size; ++ int ret; ++ ++ if (vhost_vdpa_listener_skipped_section(section, 0, ULLONG_MAX, page_mask)) { ++ return; ++ } ++ ++ if (unlikely((section->offset_within_address_space & ~page_mask) != ++ (section->offset_within_region & ~page_mask))) { ++ return; ++ } ++ ++ iova = ROUND_UP(section->offset_within_address_space, page_size); ++ llend = vhost_vdpa_section_end(section, page_mask); ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ ++ memory_region_ref(section->mr); ++ vaddr = memory_region_get_ram_ptr(section->mr) + ++ section->offset_within_region + ++ (iova - section->offset_within_address_space); ++ ++ llsize = int128_sub(llend, int128_make64(iova)); ++ ++ ret = vhost_vdpa_iommufd_container_dma_map(container, iova, int128_get64(llsize), ++ vaddr, section->readonly); ++ if (ret) { ++ qemu_log("vhost vdpa iommufd container dma map failed: %d\n", ret); ++ } ++} ++ ++static void vhost_vdpa_iommufd_container_region_del(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VDPAIOMMUFDContainer *container = container_of(listener, VDPAIOMMUFDContainer, listener); ++ hwaddr iova; ++ Int128 llend, llsize; ++ int page_size = qemu_target_page_size(); ++ int page_mask = -page_size; ++ int ret; ++ ++ if (vhost_vdpa_listener_skipped_section(section, 0, ULLONG_MAX, page_mask)) { ++ return; ++ } ++ ++ if (unlikely((section->offset_within_address_space & ~page_mask) != ++ (section->offset_within_region & ~page_mask))) { ++ return; ++ } ++ ++ iova = ROUND_UP(section->offset_within_address_space, page_size); ++ llend = vhost_vdpa_section_end(section, page_mask); ++ ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ ++ llsize = int128_sub(llend, int128_make64(iova)); ++ /* ++ * The unmap ioctl doesn't accept a full 64-bit. need to check it ++ */ ++ if (int128_eq(llsize, int128_2_64())) { ++ llsize = int128_rshift(llsize, 1); ++ ret = vhost_vdpa_iommufd_container_dma_unmap(container, iova, int128_get64(llsize)); ++ ++ if (ret) { ++ qemu_log("vhost vdpa iommufd container unmap failed(0x%" HWADDR_PRIx ", " ++ "0x%" HWADDR_PRIx ") = %d (%m)", iova, int128_get64(llsize), ret); ++ } ++ iova += int128_get64(llsize); ++ } ++ ret = vhost_vdpa_iommufd_container_dma_unmap(container, iova, int128_get64(llsize)); ++ ++ if (ret) { ++ qemu_log("vhost vdpa iommufd container unmap failed(0x%" HWADDR_PRIx ", " ++ "0x%" HWADDR_PRIx ") = %d (%m)", iova, int128_get64(llsize), ret); ++ } ++ ++ memory_region_unref(section->mr); ++} ++ ++/* ++ * IOTLB API used by vhost vdpa iommufd container ++ */ ++const MemoryListener vhost_vdpa_iommufd_container_listener = { ++ .name = "vhost-vdpa-iommufd-container", ++ .region_add = vhost_vdpa_iommufd_container_region_add, ++ .region_del = vhost_vdpa_iommufd_container_region_del, ++}; ++ + static int vhost_vdpa_container_connect_iommufd(VDPAIOMMUFDContainer *container) + { + IOMMUFDBackend *iommufd = container->iommufd; +@@ -87,6 +200,7 @@ static VDPAIOMMUFDContainer *vhost_vdpa_create_container(VhostVdpaDevice *vdev) + + container = g_new0(VDPAIOMMUFDContainer, 1); + container->iommufd = vdev->iommufd; ++ container->listener = vhost_vdpa_iommufd_container_listener; + QLIST_INIT(&container->hwpt_list); + + QLIST_INSERT_HEAD(&vdpa_container_list, container, next); +@@ -213,11 +327,27 @@ static void vhost_vdpa_container_detach_device(VDPAIOMMUFDContainer *container, + } + } + ++static int vhost_vdpa_container_get_dev_count(VDPAIOMMUFDContainer *container) ++{ ++ IOMMUFDHWPT *hwpt; ++ VhostVdpaDevice *dev; ++ int dev_count = 0; ++ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ QLIST_FOREACH(dev, &hwpt->device_list, next) { ++ dev_count++; ++ } ++ } ++ ++ return dev_count; ++} ++ + int vhost_vdpa_attach_container(VhostVdpaDevice *vdev) + { + VDPAIOMMUFDContainer *container = NULL; + IOMMUFDBackend *iommufd = vdev->iommufd; + bool new_container = false; ++ int dev_count = 0; + int ret = 0; + + if (!iommufd) { +@@ -251,6 +381,12 @@ int vhost_vdpa_attach_container(VhostVdpaDevice *vdev) + goto unbind; + } + ++ /* register the container memory listener when attaching the first device */ ++ dev_count = vhost_vdpa_container_get_dev_count(container); ++ if (dev_count == 1) { ++ memory_listener_register(&container->listener, &address_space_memory); ++ } ++ + return 0; + + unbind: +@@ -288,6 +424,7 @@ void vhost_vdpa_detach_container(VhostVdpaDevice *vdev) + return; + } + /* No HWPT in this container, destroy it */ ++ memory_listener_unregister(&container->listener); + vhost_vdpa_container_disconnect_iommufd(container); + + vhost_vdpa_destroy_container(container); +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index a6bd695724..b256ad540c 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -136,9 +136,9 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + strerror(-ret)); + goto free_vqs; + } ++ } else { ++ memory_listener_register(&v->vdpa.listener, &address_space_memory); + } +- +- memory_listener_register(&v->vdpa.listener, &address_space_memory); + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_CONFIG_SIZE, + errp); +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 4a8fc37851..b5fb89b98e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -26,13 +26,14 @@ + #include "qemu/main-loop.h" + #include "trace.h" + #include "qapi/error.h" ++#include "hw/virtio/vdpa-dev-iommufd.h" + + /* + * Return one past the end of the end of section. Be careful with uint64_t + * conversions! + */ +-static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, +- int page_mask) ++Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, ++ int page_mask) + { + Int128 llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); +@@ -41,10 +42,10 @@ static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, + return llend; + } + +-static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, +- uint64_t iova_min, +- uint64_t iova_max, +- int page_mask) ++bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, ++ uint64_t iova_min, ++ uint64_t iova_max, ++ int page_mask) + { + Int128 llend; + +diff --git a/include/hw/virtio/vdpa-dev-iommufd.h b/include/hw/virtio/vdpa-dev-iommufd.h +index dc14d9dd15..8e56647690 100644 +--- a/include/hw/virtio/vdpa-dev-iommufd.h ++++ b/include/hw/virtio/vdpa-dev-iommufd.h +@@ -23,6 +23,7 @@ typedef struct IOMMUFDHWPT { + } IOMMUFDHWPT; + + typedef struct VDPAIOMMUFDContainer { ++ MemoryListener listener; + struct IOMMUFDBackend *iommufd; + uint32_t ioas_id; + QLIST_HEAD(, IOMMUFDHWPT) hwpt_list; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index ee255bc1bd..e32effc6e1 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -57,6 +57,13 @@ typedef struct vhost_vdpa { + int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); + int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); + ++Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, ++ int page_mask); ++bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, ++ uint64_t iova_min, ++ uint64_t iova_max, ++ int page_mask); ++ + int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, + hwaddr size, void *vaddr, bool readonly); + int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +-- +2.41.0.windows.1 + diff --git a/vdpa-iommufd-Introduce-vdpa-iommufd-module.patch b/vdpa-iommufd-Introduce-vdpa-iommufd-module.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7a365c8e9c3182e557ee0a30c87e6f6674b46e5 --- /dev/null +++ b/vdpa-iommufd-Introduce-vdpa-iommufd-module.patch @@ -0,0 +1,495 @@ +From 9cdd7c19a08c773f1f8a2d314bb94d61bd08fd77 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 27 Mar 2025 16:51:03 +0800 +Subject: [PATCH] vdpa/iommufd:Introduce vdpa-iommufd module + +The purpose of the vdpa-iommufd module is to share +the DMA mapping of multiple vdpa through the kernel iommufd interface. +The VDPA devices can share the same DMA mapping by +associating with the same IOMMUFD backend. +This can avoid VDPA devices from repeatedly establishing DMA mappings, +reduce the time required for hot plugging and unplugging VDPA devices, +and minimize duplicate IOMMU TLB. +The vDPA devices that need to be isolated can also be divided into +different groups by associating them with different iommufds. +Each iommufd backend is associated with a VDPAIOMMUFDContainer to +establish contact with multiple vDPA devices. +To improve availability, even if vDPA devices encounter problems when +sharing page tables, they can still complete DMA mapping by applying for a separate HWPT. + +Signed-off-by: libai +--- + hw/virtio/meson.build | 2 +- + hw/virtio/vdpa-dev-iommufd.c | 294 +++++++++++++++++++++++++++ + hw/virtio/vdpa-dev.c | 17 ++ + include/hw/virtio/vdpa-dev-iommufd.h | 40 ++++ + include/hw/virtio/vdpa-dev.h | 2 + + linux-headers/linux/vhost.h | 28 +++ + 6 files changed, 382 insertions(+), 1 deletion(-) + create mode 100644 hw/virtio/vdpa-dev-iommufd.c + create mode 100644 include/hw/virtio/vdpa-dev-iommufd.h + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index 596651d113..67291563d3 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -5,7 +5,7 @@ system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c') + system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) + system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +-system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c')) ++system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c', 'vdpa-dev-iommufd.c')) + + specific_virtio_ss = ss.source_set() + specific_virtio_ss.add(files('virtio.c')) +diff --git a/hw/virtio/vdpa-dev-iommufd.c b/hw/virtio/vdpa-dev-iommufd.c +new file mode 100644 +index 0000000000..d72f56d52f +--- /dev/null ++++ b/hw/virtio/vdpa-dev-iommufd.c +@@ -0,0 +1,294 @@ ++/* ++ * vhost vdpa device iommufd backend ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All Rights Reserved. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/log.h" ++#include ++#include ++#include "qapi/error.h" ++#include "hw/virtio/vdpa-dev-iommufd.h" ++ ++static QLIST_HEAD(, VDPAIOMMUFDContainer) vdpa_container_list = ++ QLIST_HEAD_INITIALIZER(vdpa_container_list); ++ ++static int vhost_vdpa_container_connect_iommufd(VDPAIOMMUFDContainer *container) ++{ ++ IOMMUFDBackend *iommufd = container->iommufd; ++ uint32_t ioas_id; ++ Error *err = NULL; ++ ++ if (!iommufd) { ++ return -1; ++ } ++ ++ if (!iommufd_backend_connect(iommufd, &err)) { ++ error_report_err(err); ++ return -1; ++ } ++ ++ if (!iommufd_backend_alloc_ioas(iommufd, &ioas_id, &err)) { ++ error_report_err(err); ++ iommufd_backend_disconnect(iommufd); ++ return -1; ++ } ++ container->ioas_id = ioas_id; ++ return 0; ++} ++ ++static void vhost_vdpa_container_disconnect_iommufd(VDPAIOMMUFDContainer *container) ++{ ++ IOMMUFDBackend *iommufd = container->iommufd; ++ uint32_t ioas_id = container->ioas_id; ++ ++ if (!iommufd) { ++ return; ++ } ++ ++ iommufd_backend_free_id(iommufd, ioas_id); ++ iommufd_backend_disconnect(iommufd); ++} ++ ++static IOMMUFDHWPT *vhost_vdpa_find_hwpt(VDPAIOMMUFDContainer *container, ++ VhostVdpaDevice *vdev) ++{ ++ IOMMUFDHWPT *hwpt = NULL; ++ VhostVdpaDevice *tmp = NULL; ++ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ QLIST_FOREACH(tmp, &hwpt->device_list, next) { ++ if (tmp == vdev) { ++ return hwpt; ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++static VDPAIOMMUFDContainer *vhost_vdpa_find_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ ++ QLIST_FOREACH(container, &vdpa_container_list, next) { ++ if (container->iommufd == vdev->iommufd) { ++ return container; ++ } ++ } ++ ++ return NULL; ++} ++ ++static VDPAIOMMUFDContainer *vhost_vdpa_create_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ ++ container = g_new0(VDPAIOMMUFDContainer, 1); ++ container->iommufd = vdev->iommufd; ++ QLIST_INIT(&container->hwpt_list); ++ ++ QLIST_INSERT_HEAD(&vdpa_container_list, container, next); ++ ++ return container; ++} ++ ++static void vhost_vdpa_destroy_container(VDPAIOMMUFDContainer *container) ++{ ++ if (!container) { ++ return; ++ } ++ ++ container->iommufd = NULL; ++ QLIST_SAFE_REMOVE(container, next); ++ g_free(container); ++} ++ ++static void vhost_vdpa_device_unbind_iommufd(VhostVdpaDevice *vdev) ++{ ++ int ret; ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_UNBIND_IOMMUFD, 0); ++ if (ret) { ++ qemu_log("vhost vdpa device unbind iommufd failed: %d, devid: %d\n", ++ ret, vdev->iommufd_devid); ++ } ++} ++ ++static int vhost_vdpa_device_bind_iommufd(VhostVdpaDevice *vdev) ++{ ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ struct vdpa_dev_bind_iommufd bind = { ++ .iommufd = iommufd->fd, ++ .out_devid = -1, ++ }; ++ int ret; ++ ++ /* iommufd auto unbind when vdev->vhostfd close */ ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_BIND_IOMMUFD, &bind); ++ if (ret) { ++ qemu_log("vhost vdpa device bind iommufd failed: %d\n", ret); ++ return ret; ++ } ++ vdev->iommufd_devid = bind.out_devid; ++ return 0; ++} ++ ++static int vhost_vdpa_container_attach_device(VDPAIOMMUFDContainer *container, VhostVdpaDevice *vdev) ++{ ++ IOMMUFDBackend *iommufd = NULL; ++ IOMMUFDHWPT *hwpt = NULL; ++ Error *err = NULL; ++ uint32_t pt_id; ++ int ret; ++ ++ if (!container || !container->iommufd || container->iommufd != vdev->iommufd) { ++ return -1; ++ } ++ ++ iommufd = container->iommufd; ++ ++ /* try to find an available hwpt */ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ pt_id = hwpt->hwpt_id; ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_ATTACH_IOMMUFD_PT, &pt_id); ++ if (ret == 0) { ++ QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); ++ return 0; ++ } ++ } ++ ++ /* available hwpt not found in the container, create a new one */ ++ hwpt = g_new0(IOMMUFDHWPT, 1); ++ QLIST_INIT(&hwpt->device_list); ++ ++ if (!iommufd_backend_alloc_hwpt(iommufd, vdev->iommufd_devid, ++ container->ioas_id, 0, 0, 0, NULL, ++ &pt_id, NULL, &err)) { ++ error_report_err(err); ++ ret = -1; ++ goto free_mem; ++ } ++ ++ hwpt->hwpt_id = pt_id; ++ ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_ATTACH_IOMMUFD_PT, &pt_id); ++ if (ret) { ++ qemu_log("vhost vdpa device attach iommufd pt failed: %d\n", ret); ++ goto free_hwpt; ++ } ++ ++ QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); ++ QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); ++ ++ return 0; ++ ++free_hwpt: ++ iommufd_backend_free_id(iommufd, hwpt->hwpt_id); ++free_mem: ++ g_free(hwpt); ++ return ret; ++} ++ ++static void vhost_vdpa_container_detach_device(VDPAIOMMUFDContainer *container, VhostVdpaDevice *vdev) ++{ ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ IOMMUFDHWPT *hwpt = NULL; ++ ++ /* find the hwpt using by this device */ ++ hwpt = vhost_vdpa_find_hwpt(container, vdev); ++ if (!hwpt) { ++ return; ++ } ++ ++ ioctl(vdev->vhostfd, VHOST_VDPA_DETACH_IOMMUFD_PT, &hwpt->hwpt_id); ++ ++ QLIST_SAFE_REMOVE(vdev, next); ++ ++ /* No device using this hwpt, free it */ ++ if (QLIST_EMPTY(&hwpt->device_list)) { ++ iommufd_backend_free_id(iommufd, hwpt->hwpt_id); ++ QLIST_SAFE_REMOVE(hwpt, next); ++ g_free(hwpt); ++ } ++} ++ ++int vhost_vdpa_attach_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ bool new_container = false; ++ int ret = 0; ++ ++ if (!iommufd) { ++ return 0; ++ } ++ ++ container = vhost_vdpa_find_container(vdev); ++ if (!container) { ++ container = vhost_vdpa_create_container(vdev); ++ if (!container) { ++ qemu_log("vdpa create container failed\n"); ++ return -1; ++ } ++ ret = vhost_vdpa_container_connect_iommufd(container); ++ if (ret) { ++ qemu_log("vdpa container connect iommufd failed\n"); ++ goto destroy; ++ } ++ new_container = true; ++ } ++ ++ ret = vhost_vdpa_device_bind_iommufd(vdev); ++ if (ret) { ++ qemu_log("vdpa device bind iommufd failed\n"); ++ goto disconnect; ++ } ++ ++ ret = vhost_vdpa_container_attach_device(container, vdev); ++ if (ret) { ++ qemu_log("vdpa container attach device failed\n"); ++ goto unbind; ++ } ++ ++ return 0; ++ ++unbind: ++ vhost_vdpa_device_unbind_iommufd(vdev); ++disconnect: ++ if (!new_container) { ++ return ret; ++ } ++ vhost_vdpa_container_disconnect_iommufd(container); ++destroy: ++ vhost_vdpa_destroy_container(container); ++ ++ return ret; ++} ++ ++void vhost_vdpa_detach_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ ++ if (!iommufd) { ++ return; ++ } ++ ++ container = vhost_vdpa_find_container(vdev); ++ if (!container) { ++ return; ++ } ++ ++ vhost_vdpa_container_detach_device(container, vdev); ++ ++ vhost_vdpa_device_unbind_iommufd(vdev); ++ ++ if (!QLIST_EMPTY(&container->hwpt_list)) { ++ return; ++ } ++ /* No HWPT in this container, destroy it */ ++ vhost_vdpa_container_disconnect_iommufd(container); ++ ++ vhost_vdpa_destroy_container(container); ++} +\ No newline at end of file +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 9ce7ed7eae..a6bd695724 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -32,6 +32,7 @@ + #include "migration/migration.h" + #include "exec/address-spaces.h" + #include "standard-headers/linux/virtio_ids.h" ++#include "hw/virtio/vdpa-dev-iommufd.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -127,6 +128,16 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + goto free_vqs; + } + ++ /* If the vdpa device is associated with an iommufd, attach device to container */ ++ if (v->iommufd) { ++ ret = vhost_vdpa_attach_container(v); ++ if (ret < 0) { ++ error_setg(errp, "vhost vdpa device attach container failed: %s", ++ strerror(-ret)); ++ goto free_vqs; ++ } ++ } ++ + memory_listener_register(&v->vdpa.listener, &address_space_memory); + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_CONFIG_SIZE, +@@ -168,6 +179,9 @@ free_config: + vhost_cleanup: + memory_listener_unregister(&v->vdpa.listener); + vhost_dev_cleanup(&v->dev); ++ if (v->iommufd) { ++ vhost_vdpa_detach_container(v); ++ } + free_vqs: + g_free(vqs); + out: +@@ -194,6 +208,9 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + g_free(s->dev.vqs); + memory_listener_unregister(&s->vdpa.listener); + vhost_dev_cleanup(&s->dev); ++ if (s->iommufd) { ++ vhost_vdpa_detach_container(s); ++ } + qemu_close(s->vhostfd); + s->vhostfd = -1; + } +diff --git a/include/hw/virtio/vdpa-dev-iommufd.h b/include/hw/virtio/vdpa-dev-iommufd.h +new file mode 100644 +index 0000000000..dc14d9dd15 +--- /dev/null ++++ b/include/hw/virtio/vdpa-dev-iommufd.h +@@ -0,0 +1,40 @@ ++/* ++ * vhost vDPA device support iommufd header ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All Rights Reserved. ++ */ ++ ++#ifndef _VHOST_VDPA_IOMMUFD_H ++#define _VHOST_VDPA_IOMMUFD_H ++ ++#include "hw/virtio/vdpa-dev.h" ++ ++/* ++ * A HW pagetable is called an iommu_domain inside the kernel. ++ * This user object allows directly creating an inspecting the ++ * domains. Domains that have kernel owned page tables will be ++ * associated with an iommufd_ioas that provides the IOVA to ++ * PFN map. ++ */ ++typedef struct IOMMUFDHWPT { ++ uint32_t hwpt_id; ++ QLIST_HEAD(, VhostVdpaDevice) device_list; ++ QLIST_ENTRY(IOMMUFDHWPT) next; ++} IOMMUFDHWPT; ++ ++typedef struct VDPAIOMMUFDContainer { ++ struct IOMMUFDBackend *iommufd; ++ uint32_t ioas_id; ++ QLIST_HEAD(, IOMMUFDHWPT) hwpt_list; ++ QLIST_ENTRY(VDPAIOMMUFDContainer) next; ++} VDPAIOMMUFDContainer; ++ ++struct vdpa_dev_bind_iommufd { ++ __s32 iommufd; ++ __u32 out_devid; ++}; ++ ++int vhost_vdpa_attach_container(VhostVdpaDevice *vdev); ++void vhost_vdpa_detach_container(VhostVdpaDevice *vdev); ++ ++#endif /* _VHOST_VDPA_IOMMUFD_H */ +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index accdb7fa28..872e630546 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -43,6 +43,8 @@ struct VhostVdpaDevice { + VMChangeStateEntry *vmstate; + Notifier migration_state; + IOMMUFDBackend *iommufd; ++ uint32_t iommufd_devid; ++ QLIST_ENTRY(VhostVdpaDevice) next; + }; + + #endif +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index a08e980a1e..f5c05abe8b 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -232,6 +232,34 @@ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) + ++/* Bind a vDPA device to the specified iommufd ++ * ++ * After the return of this ioctl, the vDPA device is binded to the specified ++ * iommufd, and the device id is also returned. ++ */ ++#define VHOST_VDPA_BIND_IOMMUFD _IO(VHOST_VIRTIO, 0x90) ++ ++/* Unbind a vDPA device from the specified iommufd ++ * ++ * After the return of this ioctl, the vDPA device is unbinded from the specified ++ * iommufd. ++ */ ++#define VHOST_VDPA_UNBIND_IOMMUFD _IO(VHOST_VIRTIO, 0x91) ++ ++/* Associate the vDPA device with an address space within the bound iommufd ++ * ++ * After the return of this ioctl, the vDPA device is attached to the bound ++ * iommufd. ++ */ ++#define VHOST_VDPA_ATTACH_IOMMUFD_PT _IO(VHOST_VIRTIO, 0x92) ++ ++/* Detach the vDPA device from an address space within the bound iommufd. ++ * ++ * After the return of this ioctl, the vDPA device is detached from the address ++ * space within the bound iommufd. ++ */ ++#define VHOST_VDPA_DETACH_IOMMUFD_PT _IO(VHOST_VIRTIO, 0x93) ++ + /* set and get device buffer */ + #define VHOST_GET_DEV_BUFFER _IOR(VHOST_VIRTIO, 0xb0, struct vhost_vdpa_config) + #define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) +-- +2.41.0.windows.1 + diff --git a/vdpa-iommufd-support-associating-iommufd-backend-for.patch b/vdpa-iommufd-support-associating-iommufd-backend-for.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a0b14d324027dc2dbad2b1d5be7dbb3cea7d2d3 --- /dev/null +++ b/vdpa-iommufd-support-associating-iommufd-backend-for.patch @@ -0,0 +1,57 @@ +From 184e5195a815d57701cd9358f4b0537025729833 Mon Sep 17 00:00:00 2001 +From: libai +Date: Wed, 26 Mar 2025 20:44:40 +0800 +Subject: [PATCH] vdpa/iommufd:support associating iommufd backend for vDPA + devices + +The following parameters can associate the iommufd object with the vdpa device: + +-object iommufd,id=iommufd1 +-device '{ + "driver":"vhost-vdpa-device-pci", + "id":"vhostdev0", + "vhostdev":"/dev/vhost-vdpa-1", + "iommufd":"iommufd1", +}' + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 1 + + include/hw/virtio/vdpa-dev.h | 2 ++ + 2 files changed, 3 insertions(+) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index bd787cf39c..9ce7ed7eae 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -356,6 +356,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + static Property vhost_vdpa_device_properties[] = { + DEFINE_PROP_STRING("vhostdev", VhostVdpaDevice, vhostdev), + DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0), ++ DEFINE_PROP_LINK("iommufd", VhostVdpaDevice, iommufd, TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 60e9c3f3fe..accdb7fa28 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -18,6 +18,7 @@ + #include "hw/virtio/vhost.h" + #include "hw/virtio/vhost-vdpa.h" + #include "qom/object.h" ++#include "sysemu/iommufd.h" + + + #define TYPE_VHOST_VDPA_DEVICE "vhost-vdpa-device" +@@ -41,6 +42,7 @@ struct VhostVdpaDevice { + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; + Notifier migration_state; ++ IOMMUFDBackend *iommufd; + }; + + #endif +-- +2.41.0.windows.1 +