From 1ded815698707756787aafa308257a9318bbe3c3 Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Sun, 14 Mar 2021 13:15:34 -0700 Subject: [PATCH 01/20] iommu/vt-d: Disable SVM when ATS/PRI/PASID are not enabled in the device mainline inclusion from mainline-v5.13-rc1 commit 028e0f9de7c42d32dd88652c59c494652b5010ec category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 028e0f9de7c4 iommu/vt-d: Disable SVM when ATS/PRI/PASID are not Backport for intel SPR ENQCMD Instruction support -------------------------------- Currently, the Intel VT-d supports Shared Virtual Memory (SVM) only when IO page fault is supported. Otherwise, shared memory pages can not be swapped out and need to be pinned. The device needs the Address Translation Service (ATS), Page Request Interface (PRI) and Process Address Space Identifier (PASID) capabilities to be enabled to support IO page fault. Disable SVM when ATS, PRI and PASID are not enabled in the device. Signed-off-by: Kyung Min Park Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210314201534.918-1-kyung.min.park@intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8b6d10c2add5..d953619bd819 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6197,6 +6197,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) return 0; } -- Gitee From 5508b14fe6a461f1cd0be8023ce9c4dca8e504c4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:57 +0800 Subject: [PATCH 02/20] iommu/vt-d: Remove svm_dev_ops mainline inclusion from mainline-v5.13-rc1 commit 03744d588567aae6c3ae0dfc8fcb81a89ac21f55 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 03744d588567 iommu/vt-d: Remove svm_dev_ops Backport for intel SPR ENQCMD Instruction support -------------------------------- The svm_dev_ops has never been referenced in the tree, and there's no plan to have anything to use it. Remove it to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 15 +-------------- include/linux/intel-iommu.h | 3 --- include/linux/intel-svm.h | 7 ------- 3 files changed, 1 insertion(+), 24 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index aaf9f9589bdc..18fa4bdab92c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -462,7 +462,6 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) /* Caller must hold pasid_mutex, mm reference */ static int intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct svm_dev_ops *ops, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); @@ -512,10 +511,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Find the matching device in svm list */ for_each_svm_dev(sdev, svm, dev) { - if (sdev->ops != ops) { - ret = -EBUSY; - goto out; - } sdev->users++; goto success; } @@ -550,7 +545,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Finish the setup now we know we're keeping it */ sdev->users = 1; - sdev->ops = ops; init_rcu_head(&sdev->rcu); if (!svm) { @@ -1006,13 +1000,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) mmap_read_unlock(svm->mm); mmput(svm->mm); bad_req: - WARN_ON(!sdev); - if (sdev && sdev->ops && sdev->ops->fault_cb) { - int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->pm_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, - req->priv_data, rwxp, result); - } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; @@ -1087,7 +1074,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) if (drvdata) flags = *(unsigned int *)drvdata; mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + ret = intel_svm_bind_mm(dev, flags, mm, &sdev); if (ret) sva = ERR_PTR(ret); else if (sdev) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 805e748f7eae..0e157779c977 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -761,14 +761,11 @@ u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); -struct svm_dev_ops; - struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; int users; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 39d368a810b8..6c9d10c0fb1e 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,13 +8,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -struct device; - -struct svm_dev_ops { - void (*fault_cb)(struct device *dev, u32 pasid, u64 address, - void *private, int rwxp, int response); -}; - /* Values for rxwp in fault_cb callback */ #define SVM_REQ_READ (1<<3) #define SVM_REQ_WRITE (1<<2) -- Gitee From 784f1bd7f7307acf080f1f88e810df59e1572142 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:58 +0800 Subject: [PATCH 03/20] iommu/vt-d: Remove SVM_FLAG_PRIVATE_PASID mainline inclusion from mainline-v5.13-rc1 commit 072df4e47af508ef272f0d0ca8fd29a67df782ce category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 072df4e47af5 iommu/vt-d: Remove SVM_FLAG_PRIVATE_PASID Backport for intel SPR ENQCMD Instruction support -------------------------------- The SVM_FLAG_PRIVATE_PASID has never been referenced in the tree, and there's no plan to have anything to use it. So cleanup it. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 40 ++++++++++++++++++--------------------- include/linux/intel-svm.h | 16 +++------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 18fa4bdab92c..069b539db81a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -465,9 +465,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct intel_svm *svm = NULL, *t; struct device_domain_info *info; struct intel_svm_dev *sdev; - struct intel_svm *svm = NULL; unsigned long iflags; int pasid_max; int ret; @@ -493,30 +493,26 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - if (!(flags & SVM_FLAG_PRIVATE_PASID)) { - struct intel_svm *t; - - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) - continue; - - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; - } + list_for_each_entry(t, &global_svm_list, list) { + if (t->mm != mm) + continue; - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; - } + svm = t; + if (svm->pasid >= pasid_max) { + dev_warn(dev, + "Limited PASID width. Cannot use existing PASID %d\n", + svm->pasid); + ret = -ENOSPC; + goto out; + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } + + break; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 6c9d10c0fb1e..10fa80eef13a 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -14,16 +14,6 @@ #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) -/* - * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" - * PASID for the current process. Even if a PASID already exists, a new one - * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID - * will not be given to subsequent callers. This facility allows a driver to - * disambiguate between multiple device contexts which access the same MM, - * if there is no other way to do so. It should be used sparingly, if at all. - */ -#define SVM_FLAG_PRIVATE_PASID (1<<0) - /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done @@ -35,18 +25,18 @@ * It is unlikely that we will ever hook into flush_tlb_kernel_range() to * do such IOTLB flushes automatically. */ -#define SVM_FLAG_SUPERVISOR_MODE (1<<1) +#define SVM_FLAG_SUPERVISOR_MODE BIT(0) /* * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest * processes. Compared to the host bind, the primary differences are: * 1. mm life cycle management * 2. fault reporting */ -#define SVM_FLAG_GUEST_MODE (1<<2) +#define SVM_FLAG_GUEST_MODE BIT(1) /* * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, * which requires guest and host PASID translation at both directions. */ -#define SVM_FLAG_GUEST_PASID (1<<3) +#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ -- Gitee From 8551de6b98b086a33b5482ac90cdf16e1bbf5075 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:58 +0800 Subject: [PATCH 04/20] iommu/vt-d: Add pasid private data helpers mainline inclusion from mainline-v5.14-rc1 commit 2516f1b5a449b38946a4a1b146e8d3b1d8f13102 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 2516f1b5a449 iommu/vt-d: Add pasid private data helpers Backport for intel SPR ENQCMD Instruction support -------------------------------- We are about to use iommu_sva_alloc/free_pasid() helpers in iommu core. That means the pasid life cycle will be managed by iommu core. Use a local array to save the per pasid private data instead of attaching it the real pasid. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 62 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 069b539db81a..4e036e71b8c2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,23 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid); #define PRQ_ORDER 0 +static DEFINE_XARRAY_ALLOC(pasid_private_array); +static int pasid_private_add(ioasid_t pasid, void *priv) +{ + return xa_alloc(&pasid_private_array, &pasid, priv, + XA_LIMIT(pasid, pasid), GFP_ATOMIC); +} + +static void pasid_private_remove(ioasid_t pasid) +{ + xa_erase(&pasid_private_array, pasid); +} + +static void *pasid_private_find(ioasid_t pasid) +{ + return xa_load(&pasid_private_array, pasid); +} + int intel_svm_enable_prq(struct intel_iommu *iommu) { struct page *pages; @@ -224,7 +242,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (pasid == INVALID_IOASID || pasid >= PASID_MAX) return -EINVAL; - svm = ioasid_find(NULL, pasid, NULL); + svm = pasid_private_find(pasid); if (IS_ERR(svm)) return PTR_ERR(svm); @@ -334,7 +352,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, svm->gpasid = data->gpasid; svm->flags |= SVM_FLAG_GUEST_PASID; } - ioasid_set_data(data->hpasid, svm); + pasid_private_add(data->hpasid, svm); INIT_LIST_HEAD_RCU(&svm->devs); mmput(svm->mm); } @@ -388,7 +406,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, list_add_rcu(&sdev->list, &svm->devs); out: if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - ioasid_set_data(data->hpasid, NULL); + pasid_private_remove(data->hpasid); kfree(svm); } @@ -431,7 +449,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) * the unbind, IOMMU driver will get notified * and perform cleanup. */ - ioasid_set_data(pasid, NULL); + pasid_private_remove(pasid); kfree(svm); } } @@ -547,8 +565,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) { ret = -ENOMEM; - kfree(sdev); - goto out; + goto sdev_err; } if (pasid_max > intel_pasid_max_id) @@ -556,13 +573,16 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Do not use PASID 0, reserved for RID to PASID */ svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, svm); + pasid_max - 1, NULL); if (svm->pasid == INVALID_IOASID) { - kfree(svm); - kfree(sdev); ret = -ENOSPC; - goto out; + goto svm_err; } + + ret = pasid_private_add(svm->pasid, svm); + if (ret) + goto pasid_err; + svm->notifier.ops = &intel_mmuops; svm->mm = mm; svm->flags = flags; @@ -571,12 +591,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, ret = -ENOMEM; if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } + if (ret) + goto priv_err; } spin_lock_irqsave(&iommu->lock, iflags); @@ -590,8 +606,13 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); +priv_err: + pasid_private_remove(svm->pasid); +pasid_err: ioasid_put(svm->pasid); +svm_err: kfree(svm); +sdev_err: kfree(sdev); goto out; } @@ -614,10 +635,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - kfree(sdev); - goto out; - } + if (ret) + goto sdev_err; } list_add_rcu(&sdev->list, &svm->devs); success: @@ -670,6 +689,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) load_pasid(svm->mm, PASID_DISABLED); } list_del(&svm->list); + pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -925,7 +945,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) } if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = ioasid_find(NULL, req->pasid, NULL); + svm = pasid_private_find(req->pasid); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ -- Gitee From 731d9df38d1761309c5f40f9776dd2cbc038d28d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:59 +0800 Subject: [PATCH 05/20] iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers mainline inclusion from mainline-v5.14-rc1 commit 761fed4ec0c575a77f93ae6eafe76fb43566cb18 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 761fed4ec0c5 iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers Backport for intel SPR ENQCMD Instruction support -------------------------------- Align the pasid alloc/free code with the generic helpers defined in the iommu core. This also refactored the SVA binding code to improve the readability. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 3 + drivers/iommu/intel/svm.c | 278 +++++++++++++++--------------------- include/linux/intel-iommu.h | 1 - 4 files changed, 120 insertions(+), 163 deletions(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 5337ee1584b0..4a6919f95094 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -40,6 +40,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID + select IOMMU_SVA_LIB help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d953619bd819..89bb473fa11f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6197,6 +6197,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (intel_iommu_enable_pasid(info->iommu, dev)) + return -ENODEV; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) return -EINVAL; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4e036e71b8c2..7675de59e60b 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -23,9 +23,11 @@ #include #include "pasid.h" +#include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) #define PRQ_ORDER 0 @@ -222,7 +224,6 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); -static LIST_HEAD(global_svm_list); #define for_each_svm_dev(sdev, svm, d) \ list_for_each_entry((sdev), &(svm)->devs, list) \ @@ -477,79 +478,80 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) mutex_unlock(&mm->context.lock); } -/* Caller must hold pasid_mutex, mm reference */ -static int -intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct mm_struct *mm, struct intel_svm_dev **sd) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, + unsigned int flags) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm *svm = NULL, *t; - struct device_domain_info *info; - struct intel_svm_dev *sdev; - unsigned long iflags; - int pasid_max; - int ret; + ioasid_t max_pasid = dev_is_pci(dev) ? + pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - if (!iommu || dmar_disabled) - return -EINVAL; + return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); +} - if (!intel_svm_capable(iommu)) - return -ENOTSUPP; +static void intel_svm_free_pasid(struct mm_struct *mm) +{ + iommu_sva_free_pasid(mm); +} - if (dev_is_pci(dev)) { - pasid_max = pci_max_pasids(to_pci_dev(dev)); - if (pasid_max < 0) - return -EINVAL; - } else - pasid_max = 1 << 20; +static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, + struct device *dev, + struct mm_struct *mm, + unsigned int flags) +{ + struct device_domain_info *info = get_domain_info(dev); + unsigned long iflags, sflags; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; - /* Bind supervisor PASID shuld have mm = NULL */ - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap) || mm) { - pr_err("Supervisor PASID with user provided mm.\n"); - return -EINVAL; - } - } + svm = pasid_private_find(mm->pasid); + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) + return ERR_PTR(-ENOMEM); - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm) - continue; + svm->pasid = mm->pasid; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; + if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); + } } - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; + ret = pasid_private_add(svm->pasid, svm); + if (ret) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + kfree(svm); + return ERR_PTR(ret); } + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; - goto out; + goto free_svm; } + sdev->dev = dev; sdev->iommu = iommu; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) { - kfree(sdev); - goto out; - } - - info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->users = 1; + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; @@ -557,96 +559,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } - /* Finish the setup now we know we're keeping it */ - sdev->users = 1; - init_rcu_head(&sdev->rcu); - - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - goto sdev_err; - } - - if (pasid_max > intel_pasid_max_id) - pasid_max = intel_pasid_max_id; - - /* Do not use PASID 0, reserved for RID to PASID */ - svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, NULL); - if (svm->pasid == INVALID_IOASID) { - ret = -ENOSPC; - goto svm_err; - } - - ret = pasid_private_add(svm->pasid, svm); - if (ret) - goto pasid_err; + /* Setup the pasid table: */ + sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? + PASID_FLAG_SUPERVISOR_MODE : 0; + sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + spin_lock_irqsave(&iommu->lock, iflags); + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + FLPT_DEFAULT_DID, sflags); + spin_unlock_irqrestore(&iommu->lock, iflags); - svm->notifier.ops = &intel_mmuops; - svm->mm = mm; - svm->flags = flags; - INIT_LIST_HEAD_RCU(&svm->devs); - INIT_LIST_HEAD(&svm->list); - ret = -ENOMEM; - if (mm) { - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) - goto priv_err; - } + if (ret) + goto free_sdev; - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - if (mm) - mmu_notifier_unregister(&svm->notifier, mm); -priv_err: - pasid_private_remove(svm->pasid); -pasid_err: - ioasid_put(svm->pasid); -svm_err: - kfree(svm); -sdev_err: - kfree(sdev); - goto out; - } + /* The newly allocated pasid is loaded to the mm. */ + if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) + load_pasid(mm, svm->pasid); - list_add_tail(&svm->list, &global_svm_list); - if (mm) { - /* The newly allocated pasid is loaded to the mm. */ - load_pasid(mm, svm->pasid); - } - } else { - /* - * Binding a new device with existing PASID, need to setup - * the PASID entry. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) - goto sdev_err; - } list_add_rcu(&sdev->list, &svm->devs); success: - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; - if (sd) - *sd = sdev; - ret = 0; -out: - return ret; + return &sdev->sva; + +free_sdev: + kfree(sdev); +free_svm: + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(mm->pasid); + kfree(svm); + } + + return ERR_PTR(ret); } /* Caller must hold pasid_mutex */ @@ -655,6 +598,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; + struct mm_struct *mm; int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); @@ -664,6 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); if (ret) goto out; + mm = svm->mm; if (sdev) { sdev->users--; @@ -682,13 +627,12 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - ioasid_put(svm->pasid); - if (svm->mm) { - mmu_notifier_unregister(&svm->notifier, svm->mm); + intel_svm_free_pasid(mm); + if (svm->notifier.ops) { + mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ - load_pasid(svm->mm, PASID_DISABLED); + load_pasid(mm, PASID_DISABLED); } - list_del(&svm->list); pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. @@ -1073,31 +1017,42 @@ static irqreturn_t prq_event_thread(int irq, void *d) return IRQ_RETVAL(handled); } -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -struct iommu_sva * -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { - struct iommu_sva *sva = ERR_PTR(-EINVAL); - struct intel_svm_dev *sdev = NULL; + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); unsigned int flags = 0; + struct iommu_sva *sva; int ret; - /* - * TODO: Consolidate with generic iommu-sva bind after it is merged. - * It will require shared SVM data structures, i.e. combine io_mm - * and intel_svm etc. - */ if (drvdata) flags = *(unsigned int *)drvdata; + + if (flags & SVM_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + dev_err(dev, "%s: Supervisor PASID not supported\n", + iommu->name); + return ERR_PTR(-EOPNOTSUPP); + } + + if (mm) { + dev_err(dev, "%s: Supervisor PASID with user provided mm\n", + iommu->name); + return ERR_PTR(-EINVAL); + } + + mm = &init_mm; + } + mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, mm, &sdev); - if (ret) - sva = ERR_PTR(ret); - else if (sdev) - sva = &sdev->sva; - else - WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + ret = intel_svm_alloc_pasid(dev, mm, flags); + if (ret) { + mutex_unlock(&pasid_mutex); + return ERR_PTR(ret); + } + sva = intel_svm_bind_mm(iommu, dev, mm, flags); + if (IS_ERR_OR_NULL(sva)) + intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; @@ -1105,10 +1060,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) void intel_svm_unbind(struct iommu_sva *sva) { - struct intel_svm_dev *sdev; + struct intel_svm_dev *sdev = to_intel_svm_dev(sva); mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); intel_svm_unbind_mm(sdev->dev, sdev->pasid); mutex_unlock(&pasid_mutex); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0e157779c977..3f54def870b8 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -782,7 +782,6 @@ struct intel_svm { u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; - struct list_head list; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} -- Gitee From ca90ab9287f988738cb326936f348613e0a139c6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 7 Jul 2021 18:08:22 -0700 Subject: [PATCH 06/20] mm: add setup_initial_init_mm() helper mainline inclusion from mainline-v5.14-rc1 commit 8d0c7adaee0899a8b4c2e789fc8c237bb359afe1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 8d0c7adaee08 mm: add setup_initial_init_mm() helper Backport for intel SPR ENQCMD Instruction support -------------------------------- Patch series "init_mm: cleanup ARCH's text/data/brk setup code", v3. Add setup_initial_init_mm() helper, then use it to cleanup the text, data and brk setup code. This patch (of 15): Add setup_initial_init_mm() helper to setup kernel text, data and brk. Link: https://lkml.kernel.org/r/20210608083418.137226-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20210608083418.137226-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Souptick Joarder Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guo Ren Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonas Bonn Cc: Ley Foon Tan Cc: Michael Ellerman Cc: Nick Hu Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Rich Felker Cc: Russell King (Oracle) Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jun Yang --- include/linux/mm.h | 3 +++ mm/init-mm.c | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index a886f48b6a0e..e73f98751680 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -246,6 +246,9 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +void setup_initial_init_mm(void *start_code, void *end_code, + void *end_data, void *brk); + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way diff --git a/mm/init-mm.c b/mm/init-mm.c index 153162669f80..b4a6f38fb51d 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -40,3 +40,12 @@ struct mm_struct init_mm = { .cpu_bitmap = CPU_BITS_NONE, INIT_MM_CONTEXT(init_mm) }; + +void setup_initial_init_mm(void *start_code, void *end_code, + void *end_data, void *brk) +{ + init_mm.start_code = (unsigned long)start_code; + init_mm.end_code = (unsigned long)end_code; + init_mm.end_data = (unsigned long)end_data; + init_mm.brk = (unsigned long)brk; +} -- Gitee From 4258bb956495c845189d5d52ea7f4386a678a9bd Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 7 Jul 2021 18:09:03 -0700 Subject: [PATCH 07/20] x86: convert to setup_initial_init_mm() mainline inclusion from mainline-v5.14-rc1 commit cccb423b3ee1c03cde5fd9ed4b43d1ea0a862525 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit cccb423b3ee1 x86: convert to setup_initial_init_mm() Backport for intel SPR ENQCMD Instruction support -------------------------------- Use setup_initial_init_mm() helper to simplify code. Link: https://lkml.kernel.org/r/20210608083418.137226-16-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jun Yang --- arch/x86/kernel/setup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 85979c1a404e..8f1e85832eae 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -748,10 +748,7 @@ void __init setup_arch(char **cmdline_p) if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = _brk_end; + setup_initial_init_mm(_text, _etext, _edata, (void *)_brk_end); code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; -- Gitee From e8bec2100df40c633bd09025504c9ea803042110 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 Aug 2021 20:43:20 +0800 Subject: [PATCH 08/20] iommu/vt-d: Fix PASID reference leak mainline inclusion from mainline-v5.14-rc7 commit fdfc71c26760521e81bbae095336fb32ac087e3b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit fdfc71c26760 iommu/vt-d: Fix PASID reference leak Backport for intel SPR ENQCMD Instruction support -------------------------------- A PASID reference is increased whenever a device is bound to an mm (and its PASID) successfully (i.e. the device's sdev user count is increased). But the reference is not dropped every time the device is unbound successfully from the mm (i.e. the device's sdev user count is decreased). The reference is dropped only once by calling intel_svm_free_pasid() when there isn't any device bound to the mm. intel_svm_free_pasid() drops the reference and only frees the PASID on zero reference. Fix the issue by dropping the PASID reference and freeing the PASID when no reference on successful unbinding the device by calling intel_svm_free_pasid() . Fixes: 4048377414162 ("iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers") Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20210813181345.1870742-1-fenghua.yu@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210817124321.1517985-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7675de59e60b..8825574dcef2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -627,7 +627,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - intel_svm_free_pasid(mm); if (svm->notifier.ops) { mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ @@ -642,6 +641,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } + /* Drop a PASID reference and free it if no reference. */ + intel_svm_free_pasid(mm); } out: return ret; -- Gitee From b1dbc93d8278c7d9a0d8b72fb7c9538b25073490 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Jan 2022 18:44:33 +0800 Subject: [PATCH 09/20] iommu: Fix some W=1 warnings mainline inclusion from mainline-v5.17-rc3 commit 2c3b4f8d87877da9ae8281e1fcba64ec1663b381 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 2c3b4f8d8787 iommu: Fix some W=1 warnings Backport for intel SPR ENQCMD Instruction support -------------------------------- The code is mostly free of W=1 warning, so fix the following: drivers/iommu/iommu.c:996: warning: expecting prototype for iommu_group_for_each_dev(). Prototype was for __iommu_group_for_each_dev() instead drivers/iommu/iommu.c:3048: warning: Function parameter or member 'drvdata' not described in 'iommu_sva_bind_device' drivers/iommu/ioasid.c:354: warning: Function parameter or member 'ioasid' not described in 'ioasid_get' drivers/iommu/omap-iommu.c:1098: warning: expecting prototype for omap_iommu_suspend_prepare(). Prototype was for omap_iommu_prepare() instead Signed-off-by: John Garry Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1643366673-26803-1-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/ioasid.c | 1 + drivers/iommu/iommu.c | 24 ++++++++++++------------ drivers/iommu/omap-iommu.c | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 50ee27bbd04e..06fee7416816 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -349,6 +349,7 @@ EXPORT_SYMBOL_GPL(ioasid_alloc); /** * ioasid_get - obtain a reference to the IOASID + * @ioasid: the ID to get */ void ioasid_get(ioasid_t ioasid) { diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9116c93945d0..5257bb4da271 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -968,17 +968,6 @@ static int iommu_group_device_count(struct iommu_group *group) return ret; } -/** - * iommu_group_for_each_dev - iterate over each device in the group - * @group: the group - * @data: caller opaque data to be passed to callback function - * @fn: caller supplied callback function - * - * This function is called by group users to iterate over group devices. - * Callers should hold a reference count to the group during callback. - * The group->mutex is held across callbacks, which will block calls to - * iommu_group_add/remove_device. - */ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -993,7 +982,17 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, return ret; } - +/** + * iommu_group_for_each_dev - iterate over each device in the group + * @group: the group + * @data: caller opaque data to be passed to callback function + * @fn: caller supplied callback function + * + * This function is called by group users to iterate over group devices. + * Callers should hold a reference count to the group during callback. + * The group->mutex is held across callbacks, which will block calls to + * iommu_group_add/remove_device. + */ int iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -3485,6 +3484,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_group); * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device * @mm: the mm to bind, caller must hold a reference to it + * @drvdata: opaque data pointer to pass to bind callback * * Create a bond between device and address space, allowing the device to access * the mm using the returned PASID. If a bond already exists between @device and diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 71f29c0927fc..cf5e9cb2cb26 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1086,7 +1086,7 @@ static __maybe_unused int omap_iommu_runtime_resume(struct device *dev) } /** - * omap_iommu_suspend_prepare - prepare() dev_pm_ops implementation + * omap_iommu_prepare - prepare() dev_pm_ops implementation * @dev: iommu device * * This function performs the necessary checks to determine if the IOMMU -- Gitee From aa06fd540f64622d0ca5ff8f22c8815d5eb2d868 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:44 -0800 Subject: [PATCH 10/20] iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA mainline inclusion from mainline-v5.18-rc1 commit 8e71ebc3c8dfbc1bac3876bfb1e5f5264fabe47b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 8e71ebc3c8df iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA Backport for intel SPR ENQCMD Instruction support -------------------------------- This CONFIG option originally only referred to the Shared Virtual Address (SVA) library. But it is now also used for non-library portions of code. Drop the "_LIB" suffix so that there is just one configuration option for all code relating to SVA. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-2-fenghua.yu@intel.com Signed-off-by: Jun Yang --- drivers/iommu/Kconfig | 6 +++--- drivers/iommu/Makefile | 3 +-- drivers/iommu/intel/Kconfig | 2 +- drivers/iommu/iommu-sva-lib.h | 6 +++--- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d97e38bfbe4b..a0d1e6da9aa0 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -103,8 +103,8 @@ config IOMMU_DMA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH -# Shared Virtual Addressing library -config IOMMU_SVA_LIB +# Shared Virtual Addressing +config IOMMU_SVA bool select IOASID @@ -318,7 +318,7 @@ config ARM_SMMU_V3 config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" depends on ARM_SMMU_V3 - select IOMMU_SVA_LIB + select IOMMU_SVA select MMU_NOTIFIER help Support for sharing process address spaces with devices using the diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 1e7519f56afb..ae38d815537e 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,5 +27,4 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o -obj-$(CONFIG_IOMMU_SVA_LIB) += io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 4a6919f95094..cecdad7f2aba 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -40,7 +40,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID - select IOMMU_SVA_LIB + select IOMMU_SVA help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 031155010ca8..95dc3ebc1928 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -17,7 +17,7 @@ struct device; struct iommu_fault; struct iopf_queue; -#ifdef CONFIG_IOMMU_SVA_LIB +#ifdef CONFIG_IOMMU_SVA int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); @@ -28,7 +28,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); -#else /* CONFIG_IOMMU_SVA_LIB */ +#else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) { return -ENODEV; @@ -64,5 +64,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } -#endif /* CONFIG_IOMMU_SVA_LIB */ +#endif /* CONFIG_IOMMU_SVA */ #endif /* _IOMMU_SVA_LIB_H */ -- Gitee From 0ea808b290ea4281f580324d9d359c925a015500 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:45 -0800 Subject: [PATCH 11/20] mm: Change CONFIG option for mm->pasid field mainline inclusion from mainline-v5.18-rc1 commit a5acc88f1ab7b60ff450665a9bd7011671724193 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit a5acc88f1ab7 mm: Change CONFIG option for mm->pasid field Backport for intel SPR ENQCMD Instruction support -------------------------------- This currently depends on CONFIG_IOMMU_SUPPORT. But it is only needed when CONFIG_IOMMU_SVA option is enabled. Change the CONFIG guards around definition and initialization of mm->pasid field. YES, refreshed. Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-3-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/mm_types.h | 2 +- kernel/fork.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c22e294f083..d9d10990661b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -582,7 +582,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif diff --git a/kernel/fork.c b/kernel/fork.c index 0fb86b65ae60..e73539a6b54a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -997,7 +997,7 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) static void mm_init_pasid(struct mm_struct *mm) { -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA mm->pasid = INIT_PASID; #endif } -- Gitee From c82862c9f7b424c47250b91d0d391d8ab17ef00a Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:46 -0800 Subject: [PATCH 12/20] iommu/ioasid: Introduce a helper to check for valid PASIDs mainline inclusion from mainline-v5.18-rc1 commit 35436e33a37d7686ef2a7a36706ba367ff82587e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 35436e33a37d iommu/ioasid: Introduce a helper to check for valid PASIDs Backport for intel SPR ENQCMD Instruction support -------------------------------- Define a pasid_valid() helper to check if a given PASID is valid. [ bp: Massage commit message. ] Suggested-by: Ashok Raj Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-4-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/ioasid.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6b..2237f64dbaae 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -41,6 +41,10 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -78,5 +82,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ -- Gitee From 94bb7e5646e5e5fb9ea49d44ed63c78544842823 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:47 -0800 Subject: [PATCH 13/20] kernel/fork: Initialize mm's PASID mainline inclusion from mainline-v5.18-rc1 commit 5a5686b57471cd4b0a03c4aa89d40de5ff3845ac category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 5a5686b57471 kernel/fork: Initialize mm's PASID Backport for intel SPR ENQCMD Instruction support -------------------------------- A new mm doesn't have a PASID yet when it's created. Initialize the mm's PASID on fork() or for init_mm to INVALID_IOASID (-1). INIT_PASID (0) is reserved for kernel legacy DMA PASID. It cannot be allocated to a user process. Initializing the process's PASID to 0 may cause confusion that's why the process uses the reserved kernel legacy DMA PASID. Initializing the PASID to INVALID_IOASID (-1) explicitly tells the process doesn't have a valid PASID yet. Even though the only user of mm_pasid_init() is in fork.c, define it in as the first of three mm/pasid life cycle functions (init/set/drop) to keep these all together. Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-5-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/sched/mm.h | 10 ++++++++++ kernel/fork.c | 11 ++--------- mm/init-mm.c | 4 ++++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index dc1f4dcd9a82..b697833b4362 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Routines for handling mm_structs @@ -364,4 +365,13 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index e73539a6b54a..87a77c3061d4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,8 +97,8 @@ #include #include #include +#include -#include #include #include #include @@ -995,13 +995,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static void mm_init_pasid(struct mm_struct *mm) -{ -#ifdef CONFIG_IOMMU_SVA - mm->pasid = INIT_PASID; -#endif -} - static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1032,7 +1025,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); - mm_init_pasid(mm); + mm_pasid_init(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); diff --git a/mm/init-mm.c b/mm/init-mm.c index b4a6f38fb51d..fbe7844d0912 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -10,6 +10,7 @@ #include #include +#include #include #ifndef INIT_MM_CONTEXT @@ -38,6 +39,9 @@ struct mm_struct init_mm = { .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, +#ifdef CONFIG_IOMMU_SVA + .pasid = INVALID_IOASID, +#endif INIT_MM_CONTEXT(init_mm) }; -- Gitee From cac73397c75775dc0e85205d2cde64ceaa234d63 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:48 -0800 Subject: [PATCH 14/20] iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit mainline inclusion from mainline-v5.18-rc1 commit bb851179d340e4779a498dc9380a9fafdb726dfd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit bb851179d340 iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit Backport for intel SPR ENQCMD Instruction support -------------------------------- PASIDs are process-wide. It was attempted to use refcounted PASIDs to free them when the last thread drops the refcount. This turned out to be complex and error prone. Given the fact that the PASID space is 20 bits, which allows up to 1M processes to have a PASID associated concurrently, PASID resource exhaustion is not a realistic concern. Therefore, it was decided to simplify the approach and stick with lazy on demand PASID allocation, but drop the eager free approach and make an allocated PASID's lifetime bound to the lifetime of the process. Get rid of the refcounting mechanisms and replace/rename the interfaces to reflect this new approach. [ bp: Massage commit message. ] Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Lu Baolu Reviewed-by: Jacob Pan Reviewed-by: Thomas Gleixner Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20220207230254.3342514-6-fenghua.yu@intel.com Signed-off-by: Jun Yang --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 5 +-- drivers/iommu/intel/iommu.c | 4 +- drivers/iommu/intel/svm.c | 9 ----- drivers/iommu/ioasid.c | 39 ++----------------- drivers/iommu/iommu-sva-lib.c | 39 ++++++------------- drivers/iommu/iommu-sva-lib.h | 1 - include/linux/ioasid.h | 12 +----- include/linux/sched/mm.h | 16 ++++++++ kernel/fork.c | 1 + 9 files changed, 38 insertions(+), 88 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index d4e1238347a6..a8e8fd9d7c8b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -351,15 +351,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { ret = PTR_ERR(bond->smmu_mn); - goto err_free_pasid; + goto err_free_bond; } list_add(&bond->list, &master->bonds); trace_smmu_bind_alloc(dev, mm->pasid); return &bond->sva; -err_free_pasid: - iommu_sva_free_pasid(mm); err_free_bond: kfree(bond); return ERR_PTR(ret); @@ -403,7 +401,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle) trace_smmu_unbind_free(handle->dev, bond->mm->pasid); list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); - iommu_sva_free_pasid(bond->mm); kfree(bond); } else { trace_smmu_unbind_put(handle->dev, bond->mm->pasid); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 89bb473fa11f..6622489e8fa4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5450,7 +5450,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain, link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); return ret; } @@ -5480,7 +5480,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 8825574dcef2..d886b494dddb 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -487,11 +487,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); } -static void intel_svm_free_pasid(struct mm_struct *mm) -{ - iommu_sva_free_pasid(mm); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, struct mm_struct *mm, @@ -641,8 +636,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } - /* Drop a PASID reference and free it if no reference. */ - intel_svm_free_pasid(mm); } out: return ret; @@ -1052,8 +1045,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void } sva = intel_svm_bind_mm(iommu, dev, mm, flags); - if (IS_ERR_OR_NULL(sva)) - intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 06fee7416816..a786c034907c 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -2,7 +2,7 @@ /* * I/O Address Space ID allocator. There is one global IOASID space, split into * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and - * free IOASIDs with ioasid_alloc and ioasid_put. + * free IOASIDs with ioasid_alloc() and ioasid_free(). */ #include #include @@ -15,7 +15,6 @@ struct ioasid_data { struct ioasid_set *set; void *private; struct rcu_head rcu; - refcount_t refs; }; /* @@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, data->set = set; data->private = private; - refcount_set(&data->refs, 1); /* * Custom allocator needs allocator data to perform platform specific @@ -348,35 +346,11 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, EXPORT_SYMBOL_GPL(ioasid_alloc); /** - * ioasid_get - obtain a reference to the IOASID - * @ioasid: the ID to get - */ -void ioasid_get(ioasid_t ioasid) -{ - struct ioasid_data *ioasid_data; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (ioasid_data) - refcount_inc(&ioasid_data->refs); - else - WARN_ON(1); - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_get); - -/** - * ioasid_put - Release a reference to an ioasid + * ioasid_free - Free an ioasid * @ioasid: the ID to remove - * - * Put a reference to the IOASID, free it when the number of references drops to - * zero. - * - * Return: %true if the IOASID was freed, %false otherwise. */ -bool ioasid_put(ioasid_t ioasid) +void ioasid_free(ioasid_t ioasid) { - bool free = false; struct ioasid_data *ioasid_data; spin_lock(&ioasid_allocator_lock); @@ -386,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid) goto exit_unlock; } - free = refcount_dec_and_test(&ioasid_data->refs); - if (!free) - goto exit_unlock; - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); /* Custom allocator needs additional steps to free the xa element */ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { @@ -399,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid) exit_unlock: spin_unlock(&ioasid_allocator_lock); - return free; } -EXPORT_SYMBOL_GPL(ioasid_put); +EXPORT_SYMBOL_GPL(ioasid_free); /** * ioasid_find - Find IOASID data diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index bd41405d34e9..106506143896 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid); * * Try to allocate a PASID for this mm, or take a reference to the existing one * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid, and must be released with iommu_sva_free_pasid(). - * @min must be greater than 0, because 0 indicates an unused mm->pasid. + * available in mm->pasid and will be available for the lifetime of the mm. * * Returns 0 on success and < 0 on error. */ @@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) return -EINVAL; mutex_lock(&iommu_sva_lock); - if (mm->pasid) { - if (mm->pasid >= min && mm->pasid <= max) - ioasid_get(mm->pasid); - else + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) ret = -EOVERFLOW; - } else { - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (pasid == INVALID_IOASID) - ret = -ENOMEM; - else - mm->pasid = pasid; + goto out; } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: mutex_unlock(&iommu_sva_lock); return ret; } EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); -/** - * iommu_sva_free_pasid - Release the mm's PASID - * @mm: the mm - * - * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid() - */ -void iommu_sva_free_pasid(struct mm_struct *mm) -{ - mutex_lock(&iommu_sva_lock); - if (ioasid_put(mm->pasid)) - mm->pasid = 0; - mutex_unlock(&iommu_sva_lock); -} -EXPORT_SYMBOL_GPL(iommu_sva_free_pasid); - /* ioasid_find getter() requires a void * argument */ static bool __mmget_not_zero(void *mm) { diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 95dc3ebc1928..8909ea1094e3 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -9,7 +9,6 @@ #include int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); -void iommu_sva_free_pasid(struct mm_struct *mm); struct mm_struct *iommu_sva_find(ioasid_t pasid); /* I/O Page fault */ diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 2237f64dbaae..af1c9d62e642 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,8 +34,7 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); @@ -53,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index b697833b4362..88adf3a686bd 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -370,8 +370,24 @@ static inline void mm_pasid_init(struct mm_struct *mm) { mm->pasid = INVALID_IOASID; } + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} #else static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 87a77c3061d4..422379890a74 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1098,6 +1098,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + mm_pasid_drop(mm); mmdrop(mm); } -- Gitee From 40973e5a7516246499ecec81b0e9ff6e5c64cda5 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:49 -0800 Subject: [PATCH 15/20] x86/fpu: Clear PASID when copying fpstate mainline inclusion from mainline-v5.18-rc1 commit dc7507ddce593cbd9c93330024a5658db6f8ef73 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit dc7507ddce59 x86/fpu: Clear PASID when copying fpstate Backport for intel SPR ENQCMD Instruction support -------------------------------- The kernel must allocate a Process Address Space ID (PASID) on behalf of each process which will use ENQCMD and program it into the new MSR to communicate the process identity to platform hardware. ENQCMD uses the PASID stored in this MSR to tag requests from this process. The PASID state must be cleared on fork() since fork creates a new address space. For clone(), it would be functionally OK to copy the PASID. However, clearing it is _also_ functionally OK since any PASID use will trigger the #GP handler to populate the MSR. Copying the PASID state has two main downsides: * It requires differentiating fork() and clone() in the code, both in the FPU code and keeping tsk->pasid_activated consistent. * It guarantees that the PASID is out of its init state, which incurs small but non-zero cost on every XSAVE/XRSTOR. The main downside of clearing the PASID at fpstate copy is the future, one-time #GP for the thread. Use the simplest approach: clear the PASID state both on clone() and fork(). Rely on the #GP handler for MSR population in children. Also, just clear the PASID bit from xfeatures if XSAVE is supported. This will have no effect on systems that do not have PASID support. It is virtually zero overhead because 'dst_fpu' was just written and the whole thing is cache hot. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-7-fenghua.yu@intel.com Signed-off-by: Jun Yang --- arch/x86/kernel/fpu/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 571220ac8bea..7782e9d2e284 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -247,6 +247,13 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); + /* + * Children never inherit PASID state. + * Force it to have its init value: + */ + if (use_xsave()) + dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID; + trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); -- Gitee From e27b978a7ccc886b1047f0dbb00bcafb53b159d0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Feb 2022 15:02:50 -0800 Subject: [PATCH 16/20] sched: Define and initialize a flag to identify valid PASID in the task mainline inclusion from mainline-v5.18-rc1 commit b1b9cd41cfd58c81626dd3d7f15f1dad96b4a994 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit b1b9cd41cfd5 sched: Define and initialize a flag to identify valid PASID in the task Backport for intel SPR ENQCMD Instruction support -------------------------------- Add a new single bit field to the task structure to track whether this task has initialized the IA32_PASID MSR to the mm's PASID. Initialize the field to zero when creating a new task with fork/clone. Signed-off-by: Peter Zijlstra Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-8-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/sched.h | 3 +++ kernel/fork.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4d..c763bd95a7ea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -865,6 +865,9 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/fork.c b/kernel/fork.c index 422379890a74..5bfe124bcd7b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -943,6 +943,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif +#ifdef CONFIG_IOMMU_SVA + tsk->pasid_activated = 0; +#endif + #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif -- Gitee From 87552e28644f2a695cc42c4ce65fc2b7df85555b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:51 -0800 Subject: [PATCH 17/20] x86/traps: Demand-populate PASID MSR via #GP mainline inclusion from mainline-v5.18-rc1 commit 51ace79ac5d9544cdd9e1094bdebe260661d5704 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 51ace79ac5d9 x86/traps: Demand-populate PASID MSR via #GP Backport for intel SPR ENQCMD Instruction support -------------------------------- All tasks start with PASID state disabled. This means that the first time they execute an ENQCMD instruction they will take a #GP fault. Modify the #GP fault handler to check if the "mm" for the task has already been allocated a PASID. If so, try to fix the #GP fault by loading the IA32_PASID MSR. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-9-fenghua.yu@intel.com Signed-off-by: Jun Yang --- arch/x86/kernel/traps.c | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 696ec85164e6..924ca49c0050 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -562,6 +563,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -570,6 +622,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { -- Gitee From 669c6b1f256874c6aed4de2c071094f486d1e889 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:52 -0800 Subject: [PATCH 18/20] x86/cpufeatures: Re-enable ENQCMD mainline inclusion from mainline-v5.18-rc1 commit d6e9c1ec98dc4b0aef6f78e3266913dfa1adad4f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit d6e9c1ec98dc x86/cpufeatures: Re-enable ENQCMD Backport for intel SPR ENQCMD Instruction support -------------------------------- The ENQCMD feature can only be used if CONFIG_INTEL_IOMMU_SVM is set. Add X86_FEATURE_ENQCMD to the disabled features mask as appropriate so that cpu_feature_enabled() can be used to check the feature. [ bp: Massage commit message. ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-10-fenghua.yu@intel.com Signed-off-by: Jun Yang --- arch/x86/include/asm/disabled-features.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 018654e65c60..73aaf1e0cbb6 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,8 +56,11 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 -- Gitee From ad03116a350ae52b0fbb69ec8624e2b4136ad153 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:54 -0800 Subject: [PATCH 19/20] Documentation/x86: Update documentation for SVA (Shared Virtual Addressing) mainline inclusion from mainline-v5.18-rc1 commit 294b9a3dd092fb90c5439987c8df8b0c5dd21bc1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 294b9a3dd092 Documentation/x86: Update documentation for SVA (Shared Virtual Addressing) Backport for intel SPR ENQCMD Instruction support -------------------------------- Adjust the documentation to the new way how a PASID is being allocated, freed and fixed up. Based on a patch by Ashok Raj [ bp: Massage commit message, fix htmldocs build warning ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-12-fenghua.yu@intel.com Signed-off-by: Jun Yang --- Documentation/x86/sva.rst | 53 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst index 076efd51ef1f..2e9b8b0f9a0f 100644 --- a/Documentation/x86/sva.rst +++ b/Documentation/x86/sva.rst @@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application thread can interact with a device. Threads that belong to the same process share the same page tables, thus the same MSR value. -PASID is cleared when a process is created. The PASID allocation and MSR -programming may occur long after a process and its threads have been created. -One thread must call iommu_sva_bind_device() to allocate the PASID for the -process. If a thread uses ENQCMD without the MSR first being populated, a #GP -will be raised. The kernel will update the PASID MSR with the PASID for all -threads in the process. A single process PASID can be used simultaneously -with multiple devices since they all share the same address space. - -One thread can call iommu_sva_unbind_device() to free the allocated PASID. -The kernel will clear the PASID MSR for all threads belonging to the process. - -New threads inherit the MSR value from the parent. +PASID Life Cycle Management +=========================== + +PASID is initialized as INVALID_IOASID (-1) when a process is created. + +Only processes that access SVA-capable devices need to have a PASID +allocated. This allocation happens when a process opens/binds an SVA-capable +device but finds no PASID for this process. Subsequent binds of the same, or +other devices will share the same PASID. + +Although the PASID is allocated to the process by opening a device, +it is not active in any of the threads of that process. It's loaded to the +IA32_PASID MSR lazily when a thread tries to submit a work descriptor +to a device using the ENQCMD. + +That first access will trigger a #GP fault because the IA32_PASID MSR +has not been initialized with the PASID value assigned to the process +when the device was opened. The Linux #GP handler notes that a PASID has +been allocated for the process, and so initializes the IA32_PASID MSR +and returns so that the ENQCMD instruction is re-executed. + +On fork(2) or exec(2) the PASID is removed from the process as it no +longer has the same address space that it had when the device was opened. + +On clone(2) the new task shares the same address space, so will be +able to use the PASID allocated to the process. The IA32_PASID is not +preemptively initialized as the PASID value might not be allocated yet or +the kernel does not know whether this thread is going to access the device +and the cleared IA32_PASID MSR reduces context switch overhead by xstate +init optimization. Since #GP faults have to be handled on any threads that +were created before the PASID was assigned to the mm of the process, newly +created threads might as well be treated in a consistent way. + +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in +all threads in unbind, free the PASID lazily only on mm exit. + +If a process does a close(2) of the device file descriptor and munmap(2) +of the device MMIO portal, then the driver will unbind the device. The +PASID is still marked VALID in the PASID_MSR for any threads in the +process that accessed the device. But this is harmless as without the +MMIO portal they cannot submit new work to the device. Relationships ============= -- Gitee From 8f41fbb4061dd04990ca10e7d81eee9e39cf8875 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:53 -0800 Subject: [PATCH 20/20] tools/objtool: Check for use of the ENQCMD instruction in the kernel mainline inclusion from mainline-v5.18-rc1 commit dcef0d39f0a0f6918628ba7de480af4431b93dc6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit dcef0d39f0a0 tools/objtool: Check for use of the ENQCMD instruction in the kernel Backport for intel SPR ENQCMD Instruction support -------------------------------- The ENQCMD instruction implicitly accesses the PASID_MSR to fill in the pasid field of the descriptor being submitted to an accelerator. But there is no precise (and stable across kernel changes) point at which the PASID_MSR is updated from the value for one task to the next. Kernel code that uses accelerators must always use the ENQCMDS instruction which does not access the PASID_MSR. Check for use of the ENQCMD instruction in the kernel and warn on its usage. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220207230254.3342514-11-fenghua.yu@intel.com Signed-off-by: Peter Zijlstra Signed-off-by: Jun Yang --- tools/objtool/arch/x86/decode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cde9c36e40ae..dac78f7c8ad0 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -91,7 +91,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, { struct insn insn; int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0; struct stack_op *op = NULL; @@ -117,6 +117,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -423,6 +424,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ -- Gitee