diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 55285c136cf06a4fda159de104ef091561b3fed9..c67c63bacd3c9c70f88f16c1cc472873e3855173 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -128,6 +128,8 @@ Date: Aug 28, 2020 KernelVersion: 5.10.0 Contact: dmaengine@vger.kernel.org Description: The last executed device administrative command's status/error. + Also last configuration error overloaded. + Writing to it will clear the status. What: /sys/bus/dsa/devices/wq./block_on_fault Date: Oct 27, 2020 @@ -211,6 +213,13 @@ Contact: dmaengine@vger.kernel.org Description: Indicate whether ATS disable is turned on for the workqueue. 0 indicates ATS is on, and 1 indicates ATS is off for the workqueue. +What: /sys/bus/dsa/devices/wq./occupancy +Date May 25, 2021 +KernelVersion: 5.14.0 +Contact: dmaengine@vger.kernel.org +Description: Show the current number of entries in this WQ if WQ Occupancy + Support bit WQ capabilities is 1. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa new file mode 100644 index 0000000000000000000000000000000000000000..3c7d132281b03a88346ea4538cacac7f870db783 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa @@ -0,0 +1,30 @@ +What: /sys/bus/event_source/devices/dsa*/format +Date: April 2021 +KernelVersion: 5.13 +Contact: Tom Zanussi +Description: Read-only. Attribute group to describe the magic bits + that go into perf_event_attr.config or + perf_event_attr.config1 for the IDXD DSA pmu. (See also + ABI/testing/sysfs-bus-event_source-devices-format). + + Each attribute in this group defines a bit range in + perf_event_attr.config or perf_event_attr.config1. + All supported attributes are listed below (See the + IDXD DSA Spec for possible attribute values):: + + event_category = "config:0-3" - event category + event = "config:4-31" - event ID + + filter_wq = "config1:0-31" - workqueue filter + filter_tc = "config1:32-39" - traffic class filter + filter_pgsz = "config1:40-43" - page size filter + filter_sz = "config1:44-51" - transfer size filter + filter_eng = "config1:52-59" - engine filter + +What: /sys/bus/event_source/devices/dsa*/cpumask +Date: April 2021 +KernelVersion: 5.13 +Contact: Tom Zanussi +Description: Read-only. This file always returns the cpu to which the + IDXD DSA pmu is bound for access to all dsa pmu + performance monitoring events. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a734227e8512cb58104cb9e9c66031e1a0b15d93..b8e1c0acf4b71d8310204c6766d227a1075cf225 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1687,6 +1687,11 @@ support for the idxd driver. By default it is set to true (1). + idxd.tc_override= [HW] + Format: + Allow override of default traffic class configuration + for the device. By default it is set to false (0). + ieee754= [MIPS] Select IEEE Std 754 conformance mode Format: { strict | legacy | 2008 | relaxed } Default: strict diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 1d3cbaef4bb7187988eeee330476eee5af807b30..1f5b1a9dd6af87a365dbae3fb3df1743a6d6a8b5 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -287,7 +287,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } __iomem *__dst = dst; - int zf; + bool zf; /* * ENQCMDS %(rdx), rax diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 35ee4a775c311fe3af3c4c2613a31a0718ff9b1b..99a31b51f01e77da4992297f787d5d5161ad232a 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -283,10 +283,15 @@ config INTEL_IDMA64 Enable DMA support for Intel Low Power Subsystem such as found on Intel Skylake PCH. +config INTEL_IDXD_BUS + tristate + default INTEL_IDXD + config INTEL_IDXD tristate "Intel Data Accelerators support" - depends on PCI && X86_64 + depends on PCI && X86_64 && !UML depends on PCI_MSI + depends on PCI_PASID depends on SBITMAP select DMA_ENGINE help @@ -297,6 +302,23 @@ config INTEL_IDXD If unsure, say N. +config INTEL_IDXD_COMPAT + bool "Legacy behavior for idxd driver" + depends on PCI && X86_64 + select INTEL_IDXD_BUS + help + Compatible driver to support old /sys/bus/dsa/drivers/dsa behavior. + The old behavior performed driver bind/unbind for device and wq + devices all under the dsa driver. The compat driver will emulate + the legacy behavior in order to allow existing support apps (i.e. + accel-config) to continue function. It is expected that accel-config + v3.2 and earlier will need the compat mode. A distro with later + accel-config version can disable this compat config. + + Say Y if you have old applications that require such behavior. + + If unsure, say N. + # Config symbol that collects all the dependencies that's necessary to # support shared virtual memory for the devices supported by idxd. config INTEL_IDXD_SVM @@ -307,6 +329,18 @@ config INTEL_IDXD_SVM depends on PCI_PASID depends on PCI_IOV +config INTEL_IDXD_PERFMON + bool "Intel Data Accelerators performance monitor support" + depends on INTEL_IDXD + help + Enable performance monitor (pmu) support for the Intel(R) + data accelerators present in Intel Xeon CPU. With this + enabled, perf can be used to monitor the DSA (Intel Data + Streaming Accelerator) events described in the Intel DSA + spec. + + If unsure, say N. + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86_64 diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 948a8da05f8b6829c498d9961b9c3e8dfc665586..8928816a4f304b6c04e8c09b36e2b5a24c2eac29 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o obj-$(CONFIG_IMX_SDMA) += imx-sdma.o obj-$(CONFIG_INTEL_IDMA64) += idma64.o obj-$(CONFIG_INTEL_IOATDMA) += ioat/ -obj-$(CONFIG_INTEL_IDXD) += idxd/ +obj-y += idxd/ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_K3_DMA) += k3dma.o obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index 8978b898d777075b36da88868a997f2c09f39a22..a1e9f2b3a37ccac9c78e3bd5cc3e52e96a0982a9 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,2 +1,12 @@ +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD + obj-$(CONFIG_INTEL_IDXD) += idxd.o idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o + +idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o + +obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o +idxd_bus-y := bus.o + +obj-$(CONFIG_INTEL_IDXD_COMPAT) += idxd_compat.o +idxd_compat-y := compat.o diff --git a/drivers/dma/idxd/bus.c b/drivers/dma/idxd/bus.c new file mode 100644 index 0000000000000000000000000000000000000000..02837f0fb3e4ec5294af08045269d90f057b6df7 --- /dev/null +++ b/drivers/dma/idxd/bus.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include "idxd.h" + + +int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner, + const char *mod_name) +{ + struct device_driver *drv = &idxd_drv->drv; + + if (!idxd_drv->type) { + pr_debug("driver type not set (%ps)\n", __builtin_return_address(0)); + return -EINVAL; + } + + drv->name = idxd_drv->name; + drv->bus = &dsa_bus_type; + drv->owner = owner; + drv->mod_name = mod_name; + + return driver_register(drv); +} +EXPORT_SYMBOL_GPL(__idxd_driver_register); + +void idxd_driver_unregister(struct idxd_device_driver *idxd_drv) +{ + driver_unregister(&idxd_drv->drv); +} +EXPORT_SYMBOL_GPL(idxd_driver_unregister); + +static int idxd_config_bus_match(struct device *dev, + struct device_driver *drv) +{ + struct idxd_device_driver *idxd_drv = + container_of(drv, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + int i = 0; + + while (idxd_drv->type[i] != IDXD_DEV_NONE) { + if (idxd_dev->type == idxd_drv->type[i]) + return 1; + i++; + } + + return 0; +} + +static int idxd_config_bus_probe(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return idxd_drv->probe(idxd_dev); +} + +static int idxd_config_bus_remove(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + idxd_drv->remove(idxd_dev); + return 0; +} + +struct bus_type dsa_bus_type = { + .name = "dsa", + .match = idxd_config_bus_match, + .probe = idxd_config_bus_probe, + .remove = idxd_config_bus_remove, +}; +EXPORT_SYMBOL_GPL(dsa_bus_type); + +static int __init dsa_bus_init(void) +{ + return bus_register(&dsa_bus_type); +} +module_init(dsa_bus_init); + +static void __exit dsa_bus_exit(void) +{ + bus_unregister(&dsa_bus_type); +} +module_exit(dsa_bus_exit); + +MODULE_DESCRIPTION("IDXD driver dsa_bus_type driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 1d8a3876b74520d172ee4518bc471efcf7f06234..b9b2b4a4124eee75dd04720c082023cd8a875177 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -41,11 +41,11 @@ struct idxd_user_context { static void idxd_cdev_dev_release(struct device *dev) { - struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev); + struct idxd_cdev *idxd_cdev = dev_to_cdev(dev); struct idxd_cdev_context *cdev_ctx; struct idxd_wq *wq = idxd_cdev->wq; - cdev_ctx = &ictx[wq->idxd->type]; + cdev_ctx = &ictx[wq->idxd->data->type]; ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor); kfree(idxd_cdev); } @@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) pasid = iommu_sva_get_pasid(sva); if (pasid == IOMMU_PASID_INVALID) { iommu_sva_unbind_device(sva); + rc = -EINVAL; goto failed; } @@ -217,14 +218,13 @@ static __poll_t idxd_cdev_poll(struct file *filp, struct idxd_user_context *ctx = filp->private_data; struct idxd_wq *wq = ctx->wq; struct idxd_device *idxd = wq->idxd; - unsigned long flags; __poll_t out = 0; poll_wait(filp, &wq->err_queue, wait); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); if (idxd->sw_err.valid) out = EPOLLIN | EPOLLRDNORM; - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); return out; } @@ -239,7 +239,7 @@ static const struct file_operations idxd_cdev_fops = { int idxd_cdev_get_major(struct idxd_device *idxd) { - return MAJOR(ictx[idxd->type].devt); + return MAJOR(ictx[idxd->data->type].devt); } int idxd_wq_add_cdev(struct idxd_wq *wq) @@ -255,10 +255,11 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) if (!idxd_cdev) return -ENOMEM; + idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV; idxd_cdev->wq = wq; cdev = &idxd_cdev->cdev; - dev = &idxd_cdev->dev; - cdev_ctx = &ictx[wq->idxd->type]; + dev = cdev_dev(idxd_cdev); + cdev_ctx = &ictx[wq->idxd->data->type]; minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL); if (minor < 0) { kfree(idxd_cdev); @@ -267,13 +268,12 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) idxd_cdev->minor = minor; device_initialize(dev); - dev->parent = &wq->conf_dev; - dev->bus = idxd_get_bus_type(idxd); + dev->parent = wq_confdev(wq); + dev->bus = &dsa_bus_type; dev->type = &idxd_cdev_device_type; dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); - rc = dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd), - idxd->id, wq->id); + rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id); if (rc < 0) goto err; @@ -296,15 +296,70 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) void idxd_wq_del_cdev(struct idxd_wq *wq) { struct idxd_cdev *idxd_cdev; - struct idxd_cdev_context *cdev_ctx; - cdev_ctx = &ictx[wq->idxd->type]; idxd_cdev = wq->idxd_cdev; wq->idxd_cdev = NULL; - cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev); - put_device(&idxd_cdev->dev); + cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); + put_device(cdev_dev(idxd_cdev)); } +static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + + mutex_lock(&wq->wq_lock); + wq->type = IDXD_WQT_USER; + rc = __drv_enable_wq(wq); + if (rc < 0) + goto err; + + rc = idxd_wq_add_cdev(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_CDEV_ERR; + goto err_cdev; + } + + idxd->cmd_status = 0; + mutex_unlock(&wq->wq_lock); + return 0; + +err_cdev: + __drv_disable_wq(wq); +err: + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); + return rc; +} + +static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + + mutex_lock(&wq->wq_lock); + idxd_wq_del_cdev(wq); + __drv_disable_wq(wq); + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_WQ, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_user_drv = { + .probe = idxd_user_drv_probe, + .remove = idxd_user_drv_remove, + .name = "user", + .type = dev_types, +}; +EXPORT_SYMBOL_GPL(idxd_user_drv); + int idxd_cdev_register(void) { int rc, i; diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c new file mode 100644 index 0000000000000000000000000000000000000000..3df21615f8883454d04ae7d84c6e18f4d4c6364d --- /dev/null +++ b/drivers/dma/idxd/compat.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include "idxd.h" + +extern int device_driver_attach(struct device_driver *drv, struct device *dev); +extern void device_driver_detach(struct device *dev); + +#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = \ + __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) + +static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) +{ + struct bus_type *bus = drv->bus; + struct device *dev; + int rc = -ENODEV; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (dev && dev->driver) { + device_driver_detach(dev); + rc = count; + } + + return rc; +} +static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store); + +static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count) +{ + struct bus_type *bus = drv->bus; + struct device *dev; + struct device_driver *alt_drv = NULL; + int rc = -ENODEV; + struct idxd_dev *idxd_dev; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (!dev || dev->driver || drv != &dsa_drv.drv) + return -ENODEV; + + idxd_dev = confdev_to_idxd_dev(dev); + if (is_idxd_dev(idxd_dev)) { + alt_drv = driver_find("idxd", bus); + } else if (is_idxd_wq_dev(idxd_dev)) { + struct idxd_wq *wq = confdev_to_wq(dev); + + if (is_idxd_wq_kernel(wq)) + alt_drv = driver_find("dmaengine", bus); + else if (is_idxd_wq_user(wq)) + alt_drv = driver_find("user", bus); + } + if (!alt_drv) + return -ENODEV; + + rc = device_driver_attach(alt_drv, dev); + if (rc < 0) + return rc; + + return count; +} +static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store); + +static struct attribute *dsa_drv_compat_attrs[] = { + &driver_attr_bind.attr, + &driver_attr_unbind.attr, + NULL, +}; + +static const struct attribute_group dsa_drv_compat_attr_group = { + .attrs = dsa_drv_compat_attrs, +}; + +static const struct attribute_group *dsa_drv_compat_groups[] = { + &dsa_drv_compat_attr_group, + NULL, +}; + +static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) +{ + return -ENODEV; +} + +static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) +{ +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_NONE, +}; + +struct idxd_device_driver dsa_drv = { + .name = "dsa", + .probe = idxd_dsa_drv_probe, + .remove = idxd_dsa_drv_remove, + .type = dev_types, + .drv = { + .suppress_bind_attrs = true, + .groups = dsa_drv_compat_groups, + }, +}; + +module_idxd_driver(dsa_drv); +MODULE_IMPORT_NS(IDXD); diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 97cc1653b5ffb6e16367c141adc9e06f6c821889..fab412349f7feb96bfefba70341ccb26b5b0cd6a 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -15,11 +15,13 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, u32 *status); +static void idxd_device_wqs_clear_state(struct idxd_device *idxd); +static void idxd_wq_disable_cleanup(struct idxd_wq *wq); /* Interrupt control bits */ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); + struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); pci_msi_mask_irq(data); } @@ -36,7 +38,7 @@ void idxd_mask_msix_vectors(struct idxd_device *idxd) void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); + struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); pci_msi_unmask_irq(data); } @@ -47,6 +49,7 @@ void idxd_unmask_error_interrupts(struct idxd_device *idxd) genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); genctrl.softerr_int_en = 1; + genctrl.halt_int_en = 1; iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); } @@ -56,6 +59,7 @@ void idxd_mask_error_interrupts(struct idxd_device *idxd) genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); genctrl.softerr_int_en = 0; + genctrl.halt_int_en = 0; iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); } @@ -131,40 +135,24 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; int rc, num_descs, i; - int align; - u64 tmp; if (wq->type != IDXD_WQT_KERNEL) return 0; - wq->num_descs = wq->size; - num_descs = wq->size; + num_descs = wq_dedicated(wq) ? wq->size : wq->threshold; + wq->num_descs = num_descs; rc = alloc_hw_descs(wq, num_descs); if (rc < 0) return rc; - if (idxd->type == IDXD_TYPE_DSA) - align = 32; - else if (idxd->type == IDXD_TYPE_IAX) - align = 64; - else - return -ENODEV; - - wq->compls_size = num_descs * idxd->compl_size + align; - wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size, - &wq->compls_addr_raw, GFP_KERNEL); - if (!wq->compls_raw) { + wq->compls_size = num_descs * idxd->data->compl_size; + wq->compls = dma_alloc_coherent(dev, wq->compls_size, &wq->compls_addr, GFP_KERNEL); + if (!wq->compls) { rc = -ENOMEM; goto fail_alloc_compls; } - /* Adjust alignment */ - wq->compls_addr = (wq->compls_addr_raw + (align - 1)) & ~(align - 1); - tmp = (u64)wq->compls_raw; - tmp = (tmp + (align - 1)) & ~(align - 1); - wq->compls = (struct dsa_completion_record *)tmp; - rc = alloc_descs(wq, num_descs); if (rc < 0) goto fail_alloc_descs; @@ -178,11 +166,11 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) struct idxd_desc *desc = wq->descs[i]; desc->hw = wq->hw_descs[i]; - if (idxd->type == IDXD_TYPE_DSA) + if (idxd->data->type == IDXD_TYPE_DSA) desc->completion = &wq->compls[i]; - else if (idxd->type == IDXD_TYPE_IAX) + else if (idxd->data->type == IDXD_TYPE_IAX) desc->iax_completion = &wq->iax_compls[i]; - desc->compl_dma = wq->compls_addr + idxd->compl_size * i; + desc->compl_dma = wq->compls_addr + idxd->data->compl_size * i; desc->id = i; desc->wq = wq; desc->cpu = -1; @@ -193,8 +181,7 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) fail_sbitmap_init: free_descs(wq); fail_alloc_descs: - dma_free_coherent(dev, wq->compls_size, wq->compls_raw, - wq->compls_addr_raw); + dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); fail_alloc_compls: free_hw_descs(wq); return rc; @@ -209,8 +196,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq) free_hw_descs(wq); free_descs(wq); - dma_free_coherent(dev, wq->compls_size, wq->compls_raw, - wq->compls_addr_raw); + dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); sbitmap_queue_free(&wq->sbq); } @@ -238,7 +224,7 @@ int idxd_wq_enable(struct idxd_wq *wq) return 0; } -int idxd_wq_disable(struct idxd_wq *wq) +int idxd_wq_disable(struct idxd_wq *wq, bool reset_config) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; @@ -259,6 +245,8 @@ int idxd_wq_disable(struct idxd_wq *wq) return -ENXIO; } + if (reset_config) + idxd_wq_disable_cleanup(wq); wq->state = IDXD_WQ_DISABLED; dev_dbg(dev, "WQ %d disabled\n", wq->id); return 0; @@ -293,6 +281,7 @@ void idxd_wq_reset(struct idxd_wq *wq) operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); + idxd_wq_disable_cleanup(wq); wq->state = IDXD_WQ_DISABLED; } @@ -318,6 +307,20 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq) struct device *dev = &wq->idxd->pdev->dev; devm_iounmap(dev, wq->portal); + wq->portal = NULL; + wq->portal_offset = 0; +} + +void idxd_wqs_unmap_portal(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (wq->portal) + idxd_wq_unmap_portal(wq); + } } int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) @@ -326,19 +329,18 @@ int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) int rc; union wqcfg wqcfg; unsigned int offset; - unsigned long flags; - rc = idxd_wq_disable(wq); + rc = idxd_wq_disable(wq, false); if (rc < 0) return rc; offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); wqcfg.pasid_en = 1; wqcfg.pasid = pasid; iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); rc = idxd_wq_enable(wq); if (rc < 0) @@ -353,19 +355,18 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) int rc; union wqcfg wqcfg; unsigned int offset; - unsigned long flags; - rc = idxd_wq_disable(wq); + rc = idxd_wq_disable(wq, false); if (rc < 0) return rc; offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); wqcfg.pasid_en = 0; wqcfg.pasid = 0; iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); rc = idxd_wq_enable(wq); if (rc < 0) @@ -374,11 +375,11 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) return 0; } -void idxd_wq_disable_cleanup(struct idxd_wq *wq) +static void idxd_wq_disable_cleanup(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - lockdep_assert_held(&idxd->dev_lock); + lockdep_assert_held(&wq->wq_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; wq->size = 0; @@ -387,9 +388,35 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->priority = 0; wq->ats_dis = 0; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); } +static void idxd_wq_ref_release(struct percpu_ref *ref) +{ + struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active); + + complete(&wq->wq_dead); +} + +int idxd_wq_init_percpu_ref(struct idxd_wq *wq) +{ + int rc; + + memset(&wq->wq_active, 0, sizeof(wq->wq_active)); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + if (rc < 0) + return rc; + reinit_completion(&wq->wq_dead); + return 0; +} + +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + percpu_ref_kill(&wq->wq_active); + wait_for_completion(&wq->wq_dead); +} + /* Device control bits */ static inline bool idxd_is_enabled(struct idxd_device *idxd) { @@ -420,7 +447,6 @@ int idxd_device_init_reset(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; union idxd_command_reg cmd; - unsigned long flags; if (idxd_device_is_halted(idxd)) { dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); @@ -430,13 +456,13 @@ int idxd_device_init_reset(struct idxd_device *idxd) memset(&cmd, 0, sizeof(cmd)); cmd.cmd = IDXD_CMD_RESET_DEVICE; dev_dbg(dev, "%s: sending reset for init.\n", __func__); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->cmd_lock); iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) cpu_relax(); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->cmd_lock); return 0; } @@ -445,7 +471,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, { union idxd_command_reg cmd; DECLARE_COMPLETION_ONSTACK(done); - unsigned long flags; + u32 stat; if (idxd_device_is_halted(idxd)) { dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); @@ -459,10 +485,10 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, cmd.operand = operand; cmd.int_req = 1; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->cmd_lock); wait_event_lock_irq(idxd->cmd_waitq, !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), - idxd->dev_lock); + idxd->cmd_lock); dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n", __func__, cmd_code, operand); @@ -476,18 +502,18 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, * After command submitted, release lock and go to sleep until * the command completes via interrupt. */ - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->cmd_lock); wait_for_completion(&done); - spin_lock_irqsave(&idxd->dev_lock, flags); - if (status) { - *status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); - idxd->cmd_status = *status & GENMASK(7, 0); - } + stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + spin_lock(&idxd->cmd_lock); + if (status) + *status = stat; + idxd->cmd_status = stat & GENMASK(7, 0); __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); /* Wake up other pending commands */ wake_up(&idxd->cmd_waitq); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->cmd_lock); } int idxd_device_enable(struct idxd_device *idxd) @@ -513,27 +539,10 @@ int idxd_device_enable(struct idxd_device *idxd) return 0; } -void idxd_device_wqs_clear_state(struct idxd_device *idxd) -{ - int i; - - lockdep_assert_held(&idxd->dev_lock); - - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - if (wq->state == IDXD_WQ_ENABLED) { - idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; - } - } -} - int idxd_device_disable(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; u32 status; - unsigned long flags; if (!idxd_is_enabled(idxd)) { dev_dbg(dev, "Device is not enabled\n"); @@ -549,22 +558,22 @@ int idxd_device_disable(struct idxd_device *idxd) return -ENXIO; } - spin_lock_irqsave(&idxd->dev_lock, flags); - idxd_device_wqs_clear_state(idxd); - idxd->state = IDXD_DEV_CONF_READY; - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); + idxd_device_clear_state(idxd); + idxd->state = IDXD_DEV_DISABLED; + spin_unlock(&idxd->dev_lock); return 0; } void idxd_device_reset(struct idxd_device *idxd) { - unsigned long flags; - idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL); - spin_lock_irqsave(&idxd->dev_lock, flags); - idxd_device_wqs_clear_state(idxd); - idxd->state = IDXD_DEV_CONF_READY; - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); + idxd_device_clear_state(idxd); + idxd->state = IDXD_DEV_DISABLED; + idxd_unmask_error_interrupts(idxd); + idxd_msix_perm_setup(idxd); + spin_unlock(&idxd->dev_lock); } void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) @@ -578,7 +587,130 @@ void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) dev_dbg(dev, "pasid %d drained\n", pasid); } +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "get int handle, idx %d\n", idx); + + operand = idx & GENMASK(15, 0); + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_REQUEST_INT_HANDLE, operand); + + idxd_cmd_exec(idxd, IDXD_CMD_REQUEST_INT_HANDLE, operand, &status); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "request int handle failed: %#x\n", status); + return -ENXIO; + } + + *handle = (status >> IDXD_CMDSTS_RES_SHIFT) & GENMASK(15, 0); + + dev_dbg(dev, "int handle acquired: %u\n", *handle); + return 0; +} + +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + union idxd_command_reg cmd; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "release int handle, handle %d\n", handle); + + memset(&cmd, 0, sizeof(cmd)); + operand = handle & GENMASK(15, 0); + + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + cmd.cmd = IDXD_CMD_RELEASE_INT_HANDLE; + cmd.operand = operand; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand); + + spin_lock(&idxd->cmd_lock); + iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); + + while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) + cpu_relax(); + status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + spin_unlock(&idxd->cmd_lock); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "release int handle failed: %#x\n", status); + return -ENXIO; + } + + dev_dbg(dev, "int handle released.\n"); + return 0; +} + /* Device configuration bits */ +static void idxd_engines_clear_state(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + engine->group = NULL; + } +} + +static void idxd_groups_clear_state(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + memset(&group->grpcfg, 0, sizeof(group->grpcfg)); + group->num_engines = 0; + group->num_wqs = 0; + group->use_token_limit = false; + group->tokens_allowed = 0; + group->tokens_reserved = 0; + group->tc_a = -1; + group->tc_b = -1; + } +} + +static void idxd_device_wqs_clear_state(struct idxd_device *idxd) +{ + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (wq->state == IDXD_WQ_ENABLED) { + idxd_wq_disable_cleanup(wq); + wq->state = IDXD_WQ_DISABLED; + } + } +} + +void idxd_device_clear_state(struct idxd_device *idxd) +{ + idxd_groups_clear_state(idxd); + idxd_engines_clear_state(idxd); + idxd_device_wqs_clear_state(idxd); +} + void idxd_msix_perm_setup(struct idxd_device *idxd) { union msix_perm mperm; @@ -619,24 +751,22 @@ static void idxd_group_config_write(struct idxd_group *group) dev_dbg(dev, "Writing group %d cfg registers\n", group->id); /* setup GRPWQCFG */ - for (i = 0; i < 4; i++) { - grpcfg_offset = idxd->grpcfg_offset + - group->id * 64 + i * sizeof(u64); - iowrite64(group->grpcfg.wqs[i], - idxd->reg_base + grpcfg_offset); + for (i = 0; i < GRPWQCFG_STRIDES; i++) { + grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i); + iowrite64(group->grpcfg.wqs[i], idxd->reg_base + grpcfg_offset); dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n", group->id, i, grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset)); } /* setup GRPENGCFG */ - grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 32; + grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id); iowrite64(group->grpcfg.engines, idxd->reg_base + grpcfg_offset); dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id, grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset)); /* setup GRPFLAGS */ - grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 40; + grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", group->id, grpcfg_offset, @@ -651,7 +781,7 @@ static int idxd_groups_config_write(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; /* Setup bandwidth token limit */ - if (idxd->token_limit) { + if (idxd->hw.gen_cap.config_en && idxd->token_limit) { reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); reg.token_limit = idxd->token_limit; iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); @@ -661,7 +791,7 @@ static int idxd_groups_config_write(struct idxd_device *idxd) ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET)); for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; idxd_group_config_write(group); } @@ -669,6 +799,15 @@ static int idxd_groups_config_write(struct idxd_device *idxd) return 0; } +static bool idxd_device_pasid_priv_enabled(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + + if (pdev->pasid_enabled && (pdev->pasid_features & PCI_PASID_CAP_PRIV)) + return true; + return false; +} + static int idxd_wq_config_write(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; @@ -692,6 +831,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->wq_size = wq->size; if (wq->size == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE; dev_warn(dev, "Incorrect work queue size: 0\n"); return -EINVAL; } @@ -700,7 +840,6 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->wq_thresh = wq->threshold; /* byte 8-11 */ - wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); if (wq_dedicated(wq)) wq->wqcfg->mode = 1; @@ -710,6 +849,25 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->pasid = idxd->pasid; } + /* + * Here the priv bit is set depending on the WQ type. priv = 1 if the + * WQ type is kernel to indicate privileged access. This setting only + * matters for dedicated WQ. According to the DSA spec: + * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the + * Privileged Mode Enable field of the PCI Express PASID capability + * is 0, this field must be 0. + * + * In the case of a dedicated kernel WQ that is not able to support + * the PASID cap, then the configuration will be rejected. + */ + wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); + if (wq_dedicated(wq) && wq->wqcfg->pasid_en && + !idxd_device_pasid_priv_enabled(idxd) && + wq->type == IDXD_WQT_KERNEL) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_PRIV; + return -EOPNOTSUPP; + } + wq->wqcfg->priority = wq->priority; if (idxd->hw.gen_cap.block_on_fault && @@ -740,7 +898,7 @@ static int idxd_wqs_config_write(struct idxd_device *idxd) int i, rc; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; rc = idxd_wq_config_write(wq); if (rc < 0) @@ -756,7 +914,7 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) /* TC-A 0 and TC-B 1 should be defaults */ for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; if (group->tc_a == -1) group->tc_a = group->grpcfg.flags.tc_a = 0; @@ -783,12 +941,12 @@ static int idxd_engines_setup(struct idxd_device *idxd) struct idxd_group *group; for (i = 0; i < idxd->max_groups; i++) { - group = &idxd->groups[i]; + group = idxd->groups[i]; group->grpcfg.engines = 0; } for (i = 0; i < idxd->max_engines; i++) { - eng = &idxd->engines[i]; + eng = idxd->engines[i]; group = eng->group; if (!group) @@ -812,13 +970,13 @@ static int idxd_wqs_setup(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; for (i = 0; i < idxd->max_groups; i++) { - group = &idxd->groups[i]; + group = idxd->groups[i]; for (j = 0; j < 4; j++) group->grpcfg.wqs[j] = 0; } for (i = 0; i < idxd->max_wqs; i++) { - wq = &idxd->wqs[i]; + wq = idxd->wqs[i]; group = wq->group; if (!wq->group) @@ -827,6 +985,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd) continue; if (wq_shared(wq) && !device_swq_supported(idxd)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; dev_warn(dev, "No shared wq support but configured.\n"); return -EINVAL; } @@ -835,8 +994,10 @@ static int idxd_wqs_setup(struct idxd_device *idxd) configured++; } - if (configured == 0) + if (configured == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NONE_CONFIGURED; return -EINVAL; + } return 0; } @@ -866,3 +1027,317 @@ int idxd_device_config(struct idxd_device *idxd) return 0; } + +static int idxd_wq_load_config(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + int wqcfg_offset; + int i; + + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, 0); + memcpy_fromio(wq->wqcfg, idxd->reg_base + wqcfg_offset, idxd->wqcfg_size); + + wq->size = wq->wqcfg->wq_size; + wq->threshold = wq->wqcfg->wq_thresh; + + /* The driver does not support shared WQ mode in read-only config yet */ + if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en) + return -EOPNOTSUPP; + + set_bit(WQ_FLAG_DEDICATED, &wq->flags); + + wq->priority = wq->wqcfg->priority; + + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i); + dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]); + } + + return 0; +} + +static void idxd_group_load_config(struct idxd_group *group) +{ + struct idxd_device *idxd = group->idxd; + struct device *dev = &idxd->pdev->dev; + int i, j, grpcfg_offset; + + /* + * Load WQS bit fields + * Iterate through all 256 bits 64 bits at a time + */ + for (i = 0; i < GRPWQCFG_STRIDES; i++) { + struct idxd_wq *wq; + + grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i); + group->grpcfg.wqs[i] = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n", + group->id, i, grpcfg_offset, group->grpcfg.wqs[i]); + + if (i * 64 >= idxd->max_wqs) + break; + + /* Iterate through all 64 bits and check for wq set */ + for (j = 0; j < 64; j++) { + int id = i * 64 + j; + + /* No need to check beyond max wqs */ + if (id >= idxd->max_wqs) + break; + + /* Set group assignment for wq if wq bit is set */ + if (group->grpcfg.wqs[i] & BIT(j)) { + wq = idxd->wqs[id]; + wq->group = group; + } + } + } + + grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id); + group->grpcfg.engines = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id, + grpcfg_offset, group->grpcfg.engines); + + /* Iterate through all 64 bits to check engines set */ + for (i = 0; i < 64; i++) { + if (i >= idxd->max_engines) + break; + + if (group->grpcfg.engines & BIT(i)) { + struct idxd_engine *engine = idxd->engines[i]; + + engine->group = group; + } + } + + grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); + group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + group->id, grpcfg_offset, group->grpcfg.flags.bits); +} + +int idxd_device_load_config(struct idxd_device *idxd) +{ + union gencfg_reg reg; + int i, rc; + + reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + idxd->token_limit = reg.token_limit; + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *group = idxd->groups[i]; + + idxd_group_load_config(group); + } + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + rc = idxd_wq_load_config(wq); + if (rc < 0) + return rc; + } + + return 0; +} + +int __drv_enable_wq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + int rc = -ENXIO; + + lockdep_assert_held(&wq->wq_lock); + + if (idxd->state != IDXD_DEV_ENABLED) { + idxd->cmd_status = IDXD_SCMD_DEV_NOT_ENABLED; + goto err; + } + + if (wq->state != IDXD_WQ_DISABLED) { + dev_dbg(dev, "wq %d already enabled.\n", wq->id); + idxd->cmd_status = IDXD_SCMD_WQ_ENABLED; + rc = -EBUSY; + goto err; + } + + if (!wq->group) { + dev_dbg(dev, "wq %d not attached to group.\n", wq->id); + idxd->cmd_status = IDXD_SCMD_WQ_NO_GRP; + goto err; + } + + if (strlen(wq->name) == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_NAME; + dev_dbg(dev, "wq %d name not set.\n", wq->id); + goto err; + } + + /* Shared WQ checks */ + if (wq_shared(wq)) { + if (!device_swq_supported(idxd)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM; + dev_dbg(dev, "PASID not enabled and shared wq.\n"); + goto err; + } + /* + * Shared wq with the threshold set to 0 means the user + * did not set the threshold or transitioned from a + * dedicated wq but did not set threshold. A value + * of 0 would effectively disable the shared wq. The + * driver does not allow a value of 0 to be set for + * threshold via sysfs. + */ + if (wq->threshold == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_THRESH; + dev_dbg(dev, "Shared wq and threshold 0.\n"); + goto err; + } + } + + rc = 0; + spin_lock(&idxd->dev_lock); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); + spin_unlock(&idxd->dev_lock); + if (rc < 0) { + dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc); + goto err; + } + + rc = idxd_wq_enable(wq); + if (rc < 0) { + dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); + goto err; + } + + rc = idxd_wq_map_portal(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_PORTAL_ERR; + dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc); + goto err_map_portal; + } + + wq->client_count = 0; + return 0; + +err_map_portal: + rc = idxd_wq_disable(wq, false); + if (rc < 0) + dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq))); +err: + return rc; +} + +int drv_enable_wq(struct idxd_wq *wq) +{ + int rc; + + mutex_lock(&wq->wq_lock); + rc = __drv_enable_wq(wq); + mutex_unlock(&wq->wq_lock); + return rc; +} + +void __drv_disable_wq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + + lockdep_assert_held(&wq->wq_lock); + + if (idxd_wq_refcount(wq)) + dev_warn(dev, "Clients has claim on wq %d: %d\n", + wq->id, idxd_wq_refcount(wq)); + + idxd_wq_unmap_portal(wq); + + idxd_wq_drain(wq); + idxd_wq_reset(wq); + + wq->client_count = 0; +} + +void drv_disable_wq(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __drv_disable_wq(wq); + mutex_unlock(&wq->wq_lock); +} + +int idxd_device_drv_probe(struct idxd_dev *idxd_dev) +{ + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); + int rc = 0; + + /* + * Device should be in disabled state for the idxd_drv to load. If it's in + * enabled state, then the device was altered outside of driver's control. + * If the state is in halted state, then we don't want to proceed. + */ + if (idxd->state != IDXD_DEV_DISABLED) { + idxd->cmd_status = IDXD_SCMD_DEV_ENABLED; + return -ENXIO; + } + + /* Device configuration */ + spin_lock(&idxd->dev_lock); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); + spin_unlock(&idxd->dev_lock); + if (rc < 0) + return -ENXIO; + + /* Start device */ + rc = idxd_device_enable(idxd); + if (rc < 0) + return rc; + + /* Setup DMA device without channels */ + rc = idxd_register_dma_device(idxd); + if (rc < 0) { + idxd_device_disable(idxd); + idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR; + return rc; + } + + idxd->cmd_status = 0; + return 0; +} + +void idxd_device_drv_remove(struct idxd_dev *idxd_dev) +{ + struct device *dev = &idxd_dev->conf_dev; + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + struct device *wq_dev = wq_confdev(wq); + + if (wq->state == IDXD_WQ_DISABLED) + continue; + dev_warn(dev, "Active wq %d on disable %s.\n", i, dev_name(wq_dev)); + device_release_driver(wq_dev); + } + + idxd_unregister_dma_device(idxd); + idxd_device_disable(idxd); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + idxd_device_reset(idxd); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_DSA, + IDXD_DEV_IAX, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_drv = { + .type = dev_types, + .probe = idxd_device_drv_probe, + .remove = idxd_device_drv_remove, + .name = "idxd", +}; +EXPORT_SYMBOL_GPL(idxd_drv); diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 77439b6450448d81e81d9010ca69874fac84c58f..c39e9483206adec9b7917dbf192f3989ddcf1567 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -69,7 +69,11 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq, hw->src_addr = addr_f1; hw->dst_addr = addr_f2; hw->xfer_size = len; - hw->priv = !!(wq->type == IDXD_WQT_KERNEL); + /* + * For dedicated WQ, this field is ignored and HW will use the WQCFG.priv + * field instead. This field should be set to 1 for kernel descriptors. + */ + hw->priv = 1; hw->completion_addr = compl; } @@ -149,10 +153,8 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); rc = idxd_submit_desc(wq, desc); - if (rc < 0) { - idxd_free_desc(wq, desc); + if (rc < 0) return rc; - } return cookie; } @@ -245,7 +247,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq) wq->idxd_chan = idxd_chan; idxd_chan->wq = wq; - get_device(&wq->conf_dev); + get_device(wq_confdev(wq)); return 0; } @@ -260,5 +262,88 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq) list_del(&chan->device_node); kfree(wq->idxd_chan); wq->idxd_chan = NULL; - put_device(&wq->conf_dev); + put_device(wq_confdev(wq)); } + +static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) +{ + struct device *dev = &idxd_dev->conf_dev; + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + + mutex_lock(&wq->wq_lock); + wq->type = IDXD_WQT_KERNEL; + rc = __drv_enable_wq(wq); + if (rc < 0) { + dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); + rc = -ENXIO; + goto err; + } + + rc = idxd_wq_alloc_resources(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR; + dev_dbg(dev, "WQ resource alloc failed\n"); + goto err_res_alloc; + } + + rc = idxd_wq_init_percpu_ref(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_PERCPU_ERR; + dev_dbg(dev, "percpu_ref setup failed\n"); + goto err_ref; + } + + rc = idxd_register_dma_channel(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR; + dev_dbg(dev, "Failed to register dma channel\n"); + goto err_dma; + } + + idxd->cmd_status = 0; + mutex_unlock(&wq->wq_lock); + return 0; + +err_dma: + idxd_wq_quiesce(wq); + percpu_ref_exit(&wq->wq_active); +err_ref: + idxd_wq_free_resources(wq); +err_res_alloc: + __drv_disable_wq(wq); +err: + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); + return rc; +} + +static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + + mutex_lock(&wq->wq_lock); + idxd_wq_quiesce(wq); + idxd_unregister_dma_channel(wq); + idxd_wq_free_resources(wq); + __drv_disable_wq(wq); + percpu_ref_exit(&wq->wq_active); + mutex_unlock(&wq->wq_lock); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_WQ, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_dmaengine_drv = { + .probe = idxd_dmaengine_drv_probe, + .remove = idxd_dmaengine_drv_remove, + .name = "dmaengine", + .type = dev_types, +}; +EXPORT_SYMBOL_GPL(idxd_dmaengine_drv); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index a0371035abd71f6220146e058912bf4606726501..0cf8d3145870afd007dcd4e5b1bc179dd64f93ab 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -8,14 +8,35 @@ #include #include #include +#include +#include +#include +#include #include "registers.h" #define IDXD_DRIVER_VERSION "1.00" extern struct kmem_cache *idxd_desc_pool; +extern bool tc_override; -struct idxd_device; struct idxd_wq; +struct idxd_dev; + +enum idxd_dev_type { + IDXD_DEV_NONE = -1, + IDXD_DEV_DSA = 0, + IDXD_DEV_IAX, + IDXD_DEV_WQ, + IDXD_DEV_GROUP, + IDXD_DEV_ENGINE, + IDXD_DEV_CDEV, + IDXD_DEV_MAX_TYPE, +}; + +struct idxd_dev { + struct device conf_dev; + enum idxd_dev_type type; +}; #define IDXD_REG_TIMEOUT 50 #define IDXD_DRAIN_TIMEOUT 5000 @@ -28,14 +49,25 @@ enum idxd_type { }; #define IDXD_NAME_SIZE 128 +#define IDXD_PMU_EVENT_MAX 64 struct idxd_device_driver { + const char *name; + enum idxd_dev_type *type; + int (*probe)(struct idxd_dev *idxd_dev); + void (*remove)(struct idxd_dev *idxd_dev); struct device_driver drv; }; +extern struct idxd_device_driver dsa_drv; +extern struct idxd_device_driver idxd_drv; +extern struct idxd_device_driver idxd_dmaengine_drv; +extern struct idxd_device_driver idxd_user_drv; + struct idxd_irq_entry { struct idxd_device *idxd; int id; + int vector; struct llist_head pending_llist; struct list_head work_list; /* @@ -46,7 +78,7 @@ struct idxd_irq_entry { }; struct idxd_group { - struct device conf_dev; + struct idxd_dev idxd_dev; struct idxd_device *idxd; struct grpcfg grpcfg; int id; @@ -59,6 +91,31 @@ struct idxd_group { int tc_b; }; +struct idxd_pmu { + struct idxd_device *idxd; + + struct perf_event *event_list[IDXD_PMU_EVENT_MAX]; + int n_events; + + DECLARE_BITMAP(used_mask, IDXD_PMU_EVENT_MAX); + + struct pmu pmu; + char name[IDXD_NAME_SIZE]; + int cpu; + + int n_counters; + int counter_width; + int n_event_categories; + + bool per_counter_caps_supported; + unsigned long supported_event_categories; + + unsigned long supported_filters; + int n_filters; + + struct hlist_node cpuhp_node; +}; + #define IDXD_MAX_PRIORITY 0xf enum idxd_wq_state { @@ -80,7 +137,7 @@ enum idxd_wq_type { struct idxd_cdev { struct idxd_wq *wq; struct cdev cdev; - struct device dev; + struct idxd_dev idxd_dev; int minor; }; @@ -106,7 +163,10 @@ struct idxd_dma_chan { struct idxd_wq { void __iomem *portal; - struct device conf_dev; + u32 portal_offset; + struct percpu_ref wq_active; + struct completion wq_dead; + struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; struct idxd_device *idxd; @@ -121,16 +181,13 @@ struct idxd_wq { enum idxd_wq_state state; unsigned long flags; union wqcfg *wqcfg; - u32 vec_ptr; /* interrupt steering */ struct dsa_hw_desc **hw_descs; int num_descs; union { struct dsa_completion_record *compls; struct iax_completion_record *iax_compls; }; - void *compls_raw; dma_addr_t compls_addr; - dma_addr_t compls_addr_raw; int compls_size; struct idxd_desc **descs; struct sbitmap_queue sbq; @@ -142,7 +199,7 @@ struct idxd_wq { }; struct idxd_engine { - struct device conf_dev; + struct idxd_dev idxd_dev; int id; struct idxd_group *group; struct idxd_device *idxd; @@ -156,12 +213,12 @@ struct idxd_hw { union group_cap_reg group_cap; union engine_cap_reg engine_cap; struct opcap opcap; + u32 cmd_cap; }; enum idxd_device_state { IDXD_DEV_HALTED = -1, IDXD_DEV_DISABLED = 0, - IDXD_DEV_CONF_READY, IDXD_DEV_ENABLED, }; @@ -176,25 +233,34 @@ struct idxd_dma_dev { struct dma_device dma; }; -struct idxd_device { +struct idxd_driver_data { + const char *name_prefix; enum idxd_type type; - struct device conf_dev; + struct device_type *dev_type; + int compl_size; + int align; +}; + +struct idxd_device { + struct idxd_dev idxd_dev; + struct idxd_driver_data *data; struct list_head list; struct idxd_hw hw; enum idxd_device_state state; unsigned long flags; int id; int major; - u8 cmd_status; + u32 cmd_status; struct pci_dev *pdev; void __iomem *reg_base; spinlock_t dev_lock; /* spinlock for device */ + spinlock_t cmd_lock; /* spinlock for device commands */ struct completion *cmd_done; - struct idxd_group *groups; - struct idxd_wq *wqs; - struct idxd_engine *engines; + struct idxd_group **groups; + struct idxd_wq **wqs; + struct idxd_engine **engines; struct iommu_sva *sva; unsigned int pasid; @@ -216,17 +282,19 @@ struct idxd_device { int token_limit; int nr_tokens; /* non-reserved tokens */ unsigned int wqcfg_size; - int compl_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; - struct msix_entry *msix_entries; int num_wq_irqs; struct idxd_irq_entry *irq_entries; struct idxd_dma_dev *idxd_dma; struct workqueue_struct *wq; struct work_struct work; + + int *int_handles; + + struct idxd_pmu *idxd_pmu; }; /* IDXD software descriptor */ @@ -249,13 +317,115 @@ struct idxd_desc { struct idxd_wq *wq; }; -#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev) -#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev) +/* + * This is software defined error for the completion status. We overload the error code + * that will never appear in completion status and only SWERR register. + */ +enum idxd_completion_status { + IDXD_COMP_DESC_ABORT = 0xff, +}; + +#define idxd_confdev(idxd) &idxd->idxd_dev.conf_dev +#define wq_confdev(wq) &wq->idxd_dev.conf_dev +#define engine_confdev(engine) &engine->idxd_dev.conf_dev +#define group_confdev(group) &group->idxd_dev.conf_dev +#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev + +#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev) +#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev) +#define idxd_dev_to_wq(idxd_dev) container_of(idxd_dev, struct idxd_wq, idxd_dev) + +static inline struct idxd_device *confdev_to_idxd(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return idxd_dev_to_idxd(idxd_dev); +} + +static inline struct idxd_wq *confdev_to_wq(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return idxd_dev_to_wq(idxd_dev); +} + +static inline struct idxd_engine *confdev_to_engine(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_engine, idxd_dev); +} + +static inline struct idxd_group *confdev_to_group(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_group, idxd_dev); +} + +static inline struct idxd_cdev *dev_to_cdev(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_cdev, idxd_dev); +} + +static inline void idxd_dev_set_type(struct idxd_dev *idev, int type) +{ + if (type >= IDXD_DEV_MAX_TYPE) { + idev->type = IDXD_DEV_NONE; + return; + } + + idev->type = type; +} extern struct bus_type dsa_bus_type; -extern struct bus_type iax_bus_type; extern bool support_enqcmd; +extern struct ida idxd_ida; +extern struct device_type dsa_device_type; +extern struct device_type iax_device_type; +extern struct device_type idxd_wq_device_type; +extern struct device_type idxd_engine_device_type; +extern struct device_type idxd_group_device_type; + +static inline bool is_dsa_dev(struct idxd_dev *idxd_dev) +{ + return idxd_dev->type == IDXD_DEV_DSA; +} + +static inline bool is_iax_dev(struct idxd_dev *idxd_dev) +{ + return idxd_dev->type == IDXD_DEV_IAX; +} + +static inline bool is_idxd_dev(struct idxd_dev *idxd_dev) +{ + return is_dsa_dev(idxd_dev) || is_iax_dev(idxd_dev); +} + +static inline bool is_idxd_wq_dev(struct idxd_dev *idxd_dev) +{ + return idxd_dev->type == IDXD_DEV_WQ; +} + +static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) +{ + if (wq->type == IDXD_WQT_KERNEL && strcmp(wq->name, "dmaengine") == 0) + return true; + return false; +} + +static inline bool is_idxd_wq_user(struct idxd_wq *wq) +{ + return wq->type == IDXD_WQT_USER; +} + +static inline bool is_idxd_wq_kernel(struct idxd_wq *wq) +{ + return wq->type == IDXD_WQT_KERNEL; +} static inline bool wq_dedicated(struct idxd_wq *wq) { @@ -282,6 +452,11 @@ enum idxd_portal_prot { IDXD_PORTAL_LIMITED, }; +enum idxd_interrupt_type { + IDXD_IRQ_MSIX = 0, + IDXD_IRQ_IMS, +}; + static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot) { return prot * 0x1000; @@ -293,16 +468,22 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id, return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot); } -static inline void idxd_set_type(struct idxd_device *idxd) +#define IDXD_PORTAL_MASK (PAGE_SIZE - 1) + +/* + * Even though this function can be accessed by multiple threads, it is safe to use. + * At worst the address gets used more than once before it gets incremented. We don't + * hit a threshold until iops becomes many million times a second. So the occasional + * reuse of the same address is tolerable compare to using an atomic variable. This is + * safe on a system that has atomic load/store for 32bit integers. Given that this is an + * Intel iEP device, that should not be a problem. + */ +static inline void __iomem *idxd_wq_portal_addr(struct idxd_wq *wq) { - struct pci_dev *pdev = idxd->pdev; - - if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0) - idxd->type = IDXD_TYPE_DSA; - else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0) - idxd->type = IDXD_TYPE_IAX; - else - idxd->type = IDXD_TYPE_UNKNOWN; + int ofs = wq->portal_offset; + + wq->portal_offset = (ofs + sizeof(struct dsa_raw_desc)) & IDXD_PORTAL_MASK; + return wq->portal + ofs; } static inline void idxd_wq_get(struct idxd_wq *wq) @@ -320,19 +501,27 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq) return wq->client_count; }; -const char *idxd_get_dev_name(struct idxd_device *idxd); +int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv, + struct module *module, const char *mod_name); +#define idxd_driver_register(driver) \ + __idxd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) + +void idxd_driver_unregister(struct idxd_device_driver *idxd_drv); + +#define module_idxd_driver(__idxd_driver) \ + module_driver(__idxd_driver, idxd_driver_register, idxd_driver_unregister) + int idxd_register_bus_type(void); void idxd_unregister_bus_type(void); -int idxd_setup_sysfs(struct idxd_device *idxd); -void idxd_cleanup_sysfs(struct idxd_device *idxd); +int idxd_register_devices(struct idxd_device *idxd); +void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); -struct bus_type *idxd_get_bus_type(struct idxd_device *idxd); +void idxd_wqs_quiesce(struct idxd_device *idxd); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); void idxd_msix_perm_clear(struct idxd_device *idxd); -irqreturn_t idxd_irq_handler(int vec, void *data); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); void idxd_mask_error_interrupts(struct idxd_device *idxd); @@ -342,27 +531,41 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ +int idxd_register_idxd_drv(void); +void idxd_unregister_idxd_drv(void); +int idxd_device_drv_probe(struct idxd_dev *idxd_dev); +void idxd_device_drv_remove(struct idxd_dev *idxd_dev); +int drv_enable_wq(struct idxd_wq *wq); +int __drv_enable_wq(struct idxd_wq *wq); +void drv_disable_wq(struct idxd_wq *wq); +void __drv_disable_wq(struct idxd_wq *wq); int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); void idxd_device_reset(struct idxd_device *idxd); -void idxd_device_cleanup(struct idxd_device *idxd); +void idxd_device_clear_state(struct idxd_device *idxd); int idxd_device_config(struct idxd_device *idxd); -void idxd_device_wqs_clear_state(struct idxd_device *idxd); void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid); +int idxd_device_load_config(struct idxd_device *idxd); +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type); +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type); /* work queue control */ +void idxd_wqs_unmap_portal(struct idxd_device *idxd); int idxd_wq_alloc_resources(struct idxd_wq *wq); void idxd_wq_free_resources(struct idxd_wq *wq); int idxd_wq_enable(struct idxd_wq *wq); -int idxd_wq_disable(struct idxd_wq *wq); +int idxd_wq_disable(struct idxd_wq *wq, bool reset_config); void idxd_wq_drain(struct idxd_wq *wq); void idxd_wq_reset(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); -void idxd_wq_disable_cleanup(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void idxd_wq_quiesce(struct idxd_wq *wq); +int idxd_wq_init_percpu_ref(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); @@ -385,4 +588,25 @@ int idxd_cdev_get_major(struct idxd_device *idxd); int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); +/* perfmon */ +#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) +int perfmon_pmu_init(struct idxd_device *idxd); +void perfmon_pmu_remove(struct idxd_device *idxd); +void perfmon_counter_overflow(struct idxd_device *idxd); +void perfmon_init(void); +void perfmon_exit(void); +#else +static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; } +static inline void perfmon_pmu_remove(struct idxd_device *idxd) {} +static inline void perfmon_counter_overflow(struct idxd_device *idxd) {} +static inline void perfmon_init(void) {} +static inline void perfmon_exit(void) {} +#endif + +static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) +{ + idxd_dma_complete_txd(desc, reason); + idxd_free_desc(desc->wq, desc); +} + #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 40ea8a1903406b03c1e7089d7a9f2ea1aebfb096..7bf03f371ce191c6bc6cf05d226d4afec912e926 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -21,47 +21,57 @@ #include "../dmaengine.h" #include "registers.h" #include "idxd.h" +#include "perfmon.h" MODULE_VERSION(IDXD_DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); +MODULE_IMPORT_NS(IDXD); static bool sva = true; module_param(sva, bool, 0644); MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); +bool tc_override; +module_param(tc_override, bool, 0644); +MODULE_PARM_DESC(tc_override, "Override traffic class defaults"); + #define DRV_NAME "idxd" bool support_enqcmd; - -static struct idr idxd_idrs[IDXD_TYPE_MAX]; -static DEFINE_MUTEX(idxd_idr_lock); +DEFINE_IDA(idxd_ida); + +static struct idxd_driver_data idxd_driver_data[] = { + [IDXD_TYPE_DSA] = { + .name_prefix = "dsa", + .type = IDXD_TYPE_DSA, + .compl_size = sizeof(struct dsa_completion_record), + .align = 32, + .dev_type = &dsa_device_type, + }, + [IDXD_TYPE_IAX] = { + .name_prefix = "iax", + .type = IDXD_TYPE_IAX, + .compl_size = sizeof(struct iax_completion_record), + .align = 64, + .dev_type = &iax_device_type, + }, +}; static struct pci_device_id idxd_pci_tbl[] = { /* DSA ver 1.0 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) }, + { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) }, /* IAX ver 1.0 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IAX_SPR0) }, + { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) }, { 0, } }; MODULE_DEVICE_TABLE(pci, idxd_pci_tbl); -static char *idxd_name[] = { - "dsa", - "iax" -}; - -const char *idxd_get_dev_name(struct idxd_device *idxd) -{ - return idxd_name[idxd->type]; -} - static int idxd_setup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct device *dev = &pdev->dev; - struct msix_entry *msix; struct idxd_irq_entry *irq_entry; int i, msixcnt; int rc = 0; @@ -69,23 +79,13 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) msixcnt = pci_msix_vec_count(pdev); if (msixcnt < 0) { dev_err(dev, "Not MSI-X interrupt capable.\n"); - goto err_no_irq; - } - - idxd->msix_entries = devm_kzalloc(dev, sizeof(struct msix_entry) * - msixcnt, GFP_KERNEL); - if (!idxd->msix_entries) { - rc = -ENOMEM; - goto err_no_irq; + return -ENOSPC; } - for (i = 0; i < msixcnt; i++) - idxd->msix_entries[i].entry = i; - - rc = pci_enable_msix_exact(pdev, idxd->msix_entries, msixcnt); - if (rc) { - dev_err(dev, "Failed enabling %d MSIX entries.\n", msixcnt); - goto err_no_irq; + rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); + if (rc != msixcnt) { + dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc); + return -ENOSPC; } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); @@ -93,119 +93,331 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) * We implement 1 completion list per MSI-X entry except for * entry 0, which is for errors and others. */ - idxd->irq_entries = devm_kcalloc(dev, msixcnt, - sizeof(struct idxd_irq_entry), - GFP_KERNEL); + idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry), + GFP_KERNEL, dev_to_node(dev)); if (!idxd->irq_entries) { rc = -ENOMEM; - goto err_no_irq; + goto err_irq_entries; } for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); spin_lock_init(&idxd->irq_entries[i].list_lock); } - msix = &idxd->msix_entries[0]; + idxd_msix_perm_setup(idxd); + irq_entry = &idxd->irq_entries[0]; - rc = devm_request_threaded_irq(dev, msix->vector, idxd_irq_handler, - idxd_misc_thread, 0, "idxd-misc", - irq_entry); + rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, + 0, "idxd-misc", irq_entry); if (rc < 0) { dev_err(dev, "Failed to allocate misc interrupt.\n"); - goto err_no_irq; + goto err_misc_irq; } - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", - msix->vector); + dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector); /* first MSI-X entry is not for wq interrupts */ idxd->num_wq_irqs = msixcnt - 1; for (i = 1; i < msixcnt; i++) { - msix = &idxd->msix_entries[i]; irq_entry = &idxd->irq_entries[i]; init_llist_head(&idxd->irq_entries[i].pending_llist); INIT_LIST_HEAD(&idxd->irq_entries[i].work_list); - rc = devm_request_threaded_irq(dev, msix->vector, - idxd_irq_handler, - idxd_wq_thread, 0, - "idxd-portal", irq_entry); + rc = request_threaded_irq(irq_entry->vector, NULL, + idxd_wq_thread, 0, "idxd-portal", irq_entry); if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", - msix->vector); - goto err_no_irq; + dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); + goto err_wq_irqs; + } + + dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { + /* + * The MSIX vector enumeration starts at 1 with vector 0 being the + * misc interrupt that handles non I/O completion events. The + * interrupt handles are for IMS enumeration on guest. The misc + * interrupt vector does not require a handle and therefore we start + * the int_handles at index 0. Since 'i' starts at 1, the first + * int_handles index will be 0. + */ + rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], + IDXD_IRQ_MSIX); + if (rc < 0) { + free_irq(irq_entry->vector, irq_entry); + goto err_wq_irqs; + } + dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); } - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", - i, msix->vector); } idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); return 0; - err_no_irq: + err_wq_irqs: + while (--i >= 0) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + if (i != 0) + idxd_device_release_int_handle(idxd, + idxd->int_handles[i], IDXD_IRQ_MSIX); + } + err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); - pci_disable_msix(pdev); + idxd_msix_perm_clear(idxd); + err_irq_entries: + pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; } -static int idxd_setup_internals(struct idxd_device *idxd) +static void idxd_cleanup_interrupts(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i; + struct pci_dev *pdev = idxd->pdev; + struct idxd_irq_entry *irq_entry; + int i, msixcnt; - init_waitqueue_head(&idxd->cmd_waitq); - idxd->groups = devm_kcalloc(dev, idxd->max_groups, - sizeof(struct idxd_group), GFP_KERNEL); - if (!idxd->groups) - return -ENOMEM; + msixcnt = pci_msix_vec_count(pdev); + if (msixcnt <= 0) + return; - for (i = 0; i < idxd->max_groups; i++) { - idxd->groups[i].idxd = idxd; - idxd->groups[i].id = i; - idxd->groups[i].tc_a = -1; - idxd->groups[i].tc_b = -1; + irq_entry = &idxd->irq_entries[0]; + free_irq(irq_entry->vector, irq_entry); + + for (i = 1; i < msixcnt; i++) { + + irq_entry = &idxd->irq_entries[i]; + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) + idxd_device_release_int_handle(idxd, idxd->int_handles[i], + IDXD_IRQ_MSIX); + free_irq(irq_entry->vector, irq_entry); } - idxd->wqs = devm_kcalloc(dev, idxd->max_wqs, sizeof(struct idxd_wq), - GFP_KERNEL); - if (!idxd->wqs) - return -ENOMEM; + idxd_mask_error_interrupts(idxd); + pci_free_irq_vectors(pdev); +} - idxd->engines = devm_kcalloc(dev, idxd->max_engines, - sizeof(struct idxd_engine), GFP_KERNEL); - if (!idxd->engines) +static int idxd_setup_wqs(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_wq *wq; + struct device *conf_dev; + int i, rc; + + idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->wqs) return -ENOMEM; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev)); + if (!wq) { + rc = -ENOMEM; + goto err; + } + idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ); + conf_dev = wq_confdev(wq); wq->id = i; wq->idxd = idxd; + device_initialize(wq_confdev(wq)); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_wq_device_type; + rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); + if (rc < 0) { + put_device(conf_dev); + goto err; + } + mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); + init_completion(&wq->wq_dead); wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; - wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL); - if (!wq->wqcfg) - return -ENOMEM; + wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); + if (!wq->wqcfg) { + put_device(conf_dev); + rc = -ENOMEM; + goto err; + } + idxd->wqs[i] = wq; } + return 0; + + err: + while (--i >= 0) { + wq = idxd->wqs[i]; + conf_dev = wq_confdev(wq); + put_device(conf_dev); + } + return rc; +} + +static int idxd_setup_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *dev = &idxd->pdev->dev; + struct device *conf_dev; + int i, rc; + + idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->engines) + return -ENOMEM; + for (i = 0; i < idxd->max_engines; i++) { - idxd->engines[i].idxd = idxd; - idxd->engines[i].id = i; + engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev)); + if (!engine) { + rc = -ENOMEM; + goto err; + } + + idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE); + conf_dev = engine_confdev(engine); + engine->id = i; + engine->idxd = idxd; + device_initialize(conf_dev); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_engine_device_type; + rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id); + if (rc < 0) { + put_device(conf_dev); + goto err; + } + + idxd->engines[i] = engine; } - idxd->wq = create_workqueue(dev_name(dev)); - if (!idxd->wq) + return 0; + + err: + while (--i >= 0) { + engine = idxd->engines[i]; + conf_dev = engine_confdev(engine); + put_device(conf_dev); + } + return rc; +} + +static int idxd_setup_groups(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct device *conf_dev; + struct idxd_group *group; + int i, rc; + + idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->groups) return -ENOMEM; + for (i = 0; i < idxd->max_groups; i++) { + group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev)); + if (!group) { + rc = -ENOMEM; + goto err; + } + + idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP); + conf_dev = group_confdev(group); + group->id = i; + group->idxd = idxd; + device_initialize(conf_dev); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_group_device_type; + rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id); + if (rc < 0) { + put_device(conf_dev); + goto err; + } + + idxd->groups[i] = group; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) { + group->tc_a = 1; + group->tc_b = 1; + } else { + group->tc_a = -1; + group->tc_b = -1; + } + } + return 0; + + err: + while (--i >= 0) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + } + return rc; +} + +static void idxd_cleanup_internals(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_groups; i++) + put_device(group_confdev(idxd->groups[i])); + for (i = 0; i < idxd->max_engines; i++) + put_device(engine_confdev(idxd->engines[i])); + for (i = 0; i < idxd->max_wqs; i++) + put_device(wq_confdev(idxd->wqs[i])); + destroy_workqueue(idxd->wq); +} + +static int idxd_setup_internals(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int rc, i; + + init_waitqueue_head(&idxd->cmd_waitq); + + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { + idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL, + dev_to_node(dev)); + if (!idxd->int_handles) + return -ENOMEM; + } + + rc = idxd_setup_wqs(idxd); + if (rc < 0) + goto err_wqs; + + rc = idxd_setup_engines(idxd); + if (rc < 0) + goto err_engine; + + rc = idxd_setup_groups(idxd); + if (rc < 0) + goto err_group; + + idxd->wq = create_workqueue(dev_name(dev)); + if (!idxd->wq) { + rc = -ENOMEM; + goto err_wkq_create; + } + + return 0; + + err_wkq_create: + for (i = 0; i < idxd->max_groups; i++) + put_device(group_confdev(idxd->groups[i])); + err_group: + for (i = 0; i < idxd->max_engines; i++) + put_device(engine_confdev(idxd->engines[i])); + err_engine: + for (i = 0; i < idxd->max_wqs; i++) + put_device(wq_confdev(idxd->wqs[i])); + err_wqs: + kfree(idxd->int_handles); + return rc; } static void idxd_read_table_offsets(struct idxd_device *idxd) @@ -233,6 +445,12 @@ static void idxd_read_caps(struct idxd_device *idxd) /* reading generic capabilities */ idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET); dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits); + + if (idxd->hw.gen_cap.cmd_cap) { + idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET); + dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); + } + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -275,17 +493,37 @@ static void idxd_read_caps(struct idxd_device *idxd) } } -static struct idxd_device *idxd_alloc(struct pci_dev *pdev) +static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; + struct device *conf_dev; struct idxd_device *idxd; + int rc; - idxd = devm_kzalloc(dev, sizeof(struct idxd_device), GFP_KERNEL); + idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev)); if (!idxd) return NULL; + conf_dev = idxd_confdev(idxd); idxd->pdev = pdev; + idxd->data = data; + idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type); + idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); + if (idxd->id < 0) + return NULL; + + device_initialize(conf_dev); + conf_dev->parent = dev; + conf_dev->bus = &dsa_bus_type; + conf_dev->type = idxd->data->dev_type; + rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); + if (rc < 0) { + put_device(conf_dev); + return NULL; + } + spin_lock_init(&idxd->dev_lock); + spin_lock_init(&idxd->cmd_lock); return idxd; } @@ -338,11 +576,18 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - rc = idxd_enable_system_pasid(idxd); - if (rc < 0) - dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); - else - set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA); + if (rc == 0) { + rc = idxd_enable_system_pasid(idxd); + if (rc < 0) { + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); + } else { + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } + } else { + dev_warn(dev, "Unable to turn on SVA feature.\n"); + } } else if (!sva) { dev_warn(dev, "User forced SVA off via module param.\n"); } @@ -352,80 +597,83 @@ static int idxd_probe(struct idxd_device *idxd) rc = idxd_setup_internals(idxd); if (rc) - goto err_setup; + goto err; + + /* If the configs are readonly, then load them from device */ + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + dev_dbg(dev, "Loading RO device config\n"); + rc = idxd_device_load_config(idxd); + if (rc < 0) + goto err_config; + } rc = idxd_setup_interrupts(idxd); if (rc) - goto err_setup; + goto err_config; dev_dbg(dev, "IDXD interrupt setup complete.\n"); - mutex_lock(&idxd_idr_lock); - idxd->id = idr_alloc(&idxd_idrs[idxd->type], idxd, 0, 0, GFP_KERNEL); - mutex_unlock(&idxd_idr_lock); - if (idxd->id < 0) { - rc = -ENOMEM; - goto err_idr_fail; - } - idxd->major = idxd_cdev_get_major(idxd); + rc = perfmon_pmu_init(idxd); + if (rc < 0) + dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc); + dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; - err_idr_fail: - idxd_mask_error_interrupts(idxd); - idxd_mask_msix_vectors(idxd); - err_setup: + err_config: + idxd_cleanup_internals(idxd); + err: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); return rc; } -static void idxd_type_init(struct idxd_device *idxd) +static void idxd_cleanup(struct idxd_device *idxd) { - if (idxd->type == IDXD_TYPE_DSA) - idxd->compl_size = sizeof(struct dsa_completion_record); - else if (idxd->type == IDXD_TYPE_IAX) - idxd->compl_size = sizeof(struct iax_completion_record); + struct device *dev = &idxd->pdev->dev; + + perfmon_pmu_remove(idxd); + idxd_cleanup_interrupts(idxd); + idxd_cleanup_internals(idxd); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); } static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; struct idxd_device *idxd; + struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data; int rc; - rc = pcim_enable_device(pdev); + rc = pci_enable_device(pdev); if (rc) return rc; dev_dbg(dev, "Alloc IDXD context\n"); - idxd = idxd_alloc(pdev); - if (!idxd) - return -ENOMEM; + idxd = idxd_alloc(pdev, data); + if (!idxd) { + rc = -ENOMEM; + goto err_idxd_alloc; + } dev_dbg(dev, "Mapping BARs\n"); - idxd->reg_base = pcim_iomap(pdev, IDXD_MMIO_BAR, 0); - if (!idxd->reg_base) - return -ENOMEM; + idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0); + if (!idxd->reg_base) { + rc = -ENOMEM; + goto err_iomap; + } dev_dbg(dev, "Set DMA masks\n"); - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (rc) - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) - return rc; - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) - return rc; - - idxd_set_type(idxd); - - idxd_type_init(idxd); + goto err; dev_dbg(dev, "Set PCI master\n"); pci_set_master(pdev); @@ -435,21 +683,29 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = idxd_probe(idxd); if (rc) { dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n"); - return -ENODEV; + goto err; } - rc = idxd_setup_sysfs(idxd); + rc = idxd_register_devices(idxd); if (rc) { dev_err(dev, "IDXD sysfs setup failed\n"); - return -ENODEV; + goto err_dev_register; } - idxd->state = IDXD_DEV_CONF_READY; - dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); return 0; + + err_dev_register: + idxd_cleanup(idxd); + err: + pci_iounmap(pdev, idxd->reg_base); + err_iomap: + put_device(idxd_confdev(idxd)); + err_idxd_alloc: + pci_disable_device(pdev); + return rc; } static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) @@ -478,6 +734,36 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) } } +void idxd_wqs_quiesce(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL) + idxd_wq_quiesce(wq); + } +} + +static void idxd_release_int_handles(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int i, rc; + + for (i = 0; i < idxd->num_wq_irqs; i++) { + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { + rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], + IDXD_IRQ_MSIX); + if (rc < 0) + dev_warn(dev, "irq handle %d release failed\n", + idxd->int_handles[i]); + else + dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); + } + } +} + static void idxd_shutdown(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); @@ -495,29 +781,49 @@ static void idxd_shutdown(struct pci_dev *pdev) for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; - synchronize_irq(idxd->msix_entries[i].vector); + synchronize_irq(irq_entry->vector); if (i == 0) continue; idxd_flush_pending_llist(irq_entry); idxd_flush_work_list(irq_entry); } - - idxd_msix_perm_clear(idxd); - destroy_workqueue(idxd->wq); + flush_workqueue(idxd->wq); } static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); + struct idxd_irq_entry *irq_entry; + int msixcnt = pci_msix_vec_count(pdev); + int i; - dev_dbg(&pdev->dev, "%s called\n", __func__); - idxd_cleanup_sysfs(idxd); + idxd_unregister_devices(idxd); + /* + * When ->release() is called for the idxd->conf_dev, it frees all the memory related + * to the idxd context. The driver still needs those bits in order to do the rest of + * the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref + * on the device here to hold off the freeing while allowing the idxd sub-driver + * to unbind. + */ + get_device(idxd_confdev(idxd)); + device_unregister(idxd_confdev(idxd)); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - mutex_lock(&idxd_idr_lock); - idr_remove(&idxd_idrs[idxd->type], idxd->id); - mutex_unlock(&idxd_idr_lock); + + for (i = 0; i < msixcnt; i++) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + } + idxd_msix_perm_clear(idxd); + idxd_release_int_handles(idxd); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + pci_disable_device(pdev); + destroy_workqueue(idxd->wq); + perfmon_pmu_remove(idxd); + put_device(idxd_confdev(idxd)); } static struct pci_driver idxd_pci_driver = { @@ -530,7 +836,7 @@ static struct pci_driver idxd_pci_driver = { static int __init idxd_init_module(void) { - int err, i; + int err; /* * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in @@ -546,16 +852,19 @@ static int __init idxd_init_module(void) else support_enqcmd = true; - for (i = 0; i < IDXD_TYPE_MAX; i++) - idr_init(&idxd_idrs[i]); + perfmon_init(); + + err = idxd_driver_register(&idxd_drv); + if (err < 0) + goto err_idxd_driver_register; - err = idxd_register_bus_type(); + err = idxd_driver_register(&idxd_dmaengine_drv); if (err < 0) - return err; + goto err_idxd_dmaengine_driver_register; - err = idxd_register_driver(); + err = idxd_driver_register(&idxd_user_drv); if (err < 0) - goto err_idxd_driver_register; + goto err_idxd_user_driver_register; err = idxd_cdev_register(); if (err) @@ -570,18 +879,23 @@ static int __init idxd_init_module(void) err_pci_register: idxd_cdev_remove(); err_cdev_register: - idxd_unregister_driver(); + idxd_driver_unregister(&idxd_user_drv); +err_idxd_user_driver_register: + idxd_driver_unregister(&idxd_dmaengine_drv); +err_idxd_dmaengine_driver_register: + idxd_driver_unregister(&idxd_drv); err_idxd_driver_register: - idxd_unregister_bus_type(); return err; } module_init(idxd_init_module); static void __exit idxd_exit_module(void) { - idxd_unregister_driver(); + idxd_driver_unregister(&idxd_user_drv); + idxd_driver_unregister(&idxd_dmaengine_drv); + idxd_driver_unregister(&idxd_drv); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); - idxd_unregister_bus_type(); + perfmon_exit(); } module_exit(idxd_exit_module); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index f287233fff65ce53e14aee1d81c871269bec3196..17f2f8a31b6303638f0e29db18b980b3f8b961e4 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -22,13 +22,6 @@ struct idxd_fault { struct idxd_device *idxd; }; -static int irq_process_work_list(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, - int *processed, u64 data); -static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, - int *processed, u64 data); - static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -45,13 +38,13 @@ static void idxd_device_reinit(struct work_struct *work) goto out; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_ENABLED) { rc = idxd_wq_enable(wq); if (rc < 0) { dev_warn(dev, "Unable to re-enable wq %s\n", - dev_name(&wq->conf_dev)); + dev_name(wq_confdev(wq))); } } } @@ -59,56 +52,7 @@ static void idxd_device_reinit(struct work_struct *work) return; out: - idxd_device_wqs_clear_state(idxd); -} - -static void idxd_device_fault_work(struct work_struct *work) -{ - struct idxd_fault *fault = container_of(work, struct idxd_fault, work); - struct idxd_irq_entry *ie; - int i; - int processed; - int irqcnt = fault->idxd->num_wq_irqs + 1; - - for (i = 1; i < irqcnt; i++) { - ie = &fault->idxd->irq_entries[i]; - irq_process_work_list(ie, IRQ_WORK_PROCESS_FAULT, - &processed, fault->addr); - if (processed) - break; - - irq_process_pending_llist(ie, IRQ_WORK_PROCESS_FAULT, - &processed, fault->addr); - if (processed) - break; - } - - kfree(fault); -} - -static int idxd_device_schedule_fault_process(struct idxd_device *idxd, - u64 fault_addr) -{ - struct idxd_fault *fault; - - fault = kmalloc(sizeof(*fault), GFP_ATOMIC); - if (!fault) - return -ENOMEM; - - fault->addr = fault_addr; - fault->idxd = idxd; - INIT_WORK(&fault->work, idxd_device_fault_work); - queue_work(idxd->wq, &fault->work); - return 0; -} - -irqreturn_t idxd_irq_handler(int vec, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; - - idxd_mask_msix_vector(idxd, irq_entry->id); - return IRQ_WAKE_THREAD; + idxd_device_clear_state(idxd); } static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) @@ -119,8 +63,11 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) int i; bool err = false; + if (cause & IDXD_INTC_HALT_STATE) + goto halt; + if (cause & IDXD_INTC_ERR) { - spin_lock_bh(&idxd->dev_lock); + spin_lock(&idxd->dev_lock); for (i = 0; i < 4; i++) idxd->sw_err.bits[i] = ioread64(idxd->reg_base + IDXD_SWERR_OFFSET + i * sizeof(u64)); @@ -130,7 +77,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { int id = idxd->sw_err.wq_idx; - struct idxd_wq *wq = &idxd->wqs[id]; + struct idxd_wq *wq = idxd->wqs[id]; if (wq->type == IDXD_WQT_USER) wake_up_interruptible(&wq->err_queue); @@ -138,14 +85,14 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) int i; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->type == IDXD_WQT_USER) wake_up_interruptible(&wq->err_queue); } } - spin_unlock_bh(&idxd->dev_lock); + spin_unlock(&idxd->dev_lock); val |= IDXD_INTC_ERR; for (i = 0; i < 4; i++) @@ -165,11 +112,8 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) } if (cause & IDXD_INTC_PERFMON_OVFL) { - /* - * Driver does not utilize perfmon counter overflow interrupt - * yet. - */ val |= IDXD_INTC_PERFMON_OVFL; + perfmon_counter_overflow(idxd); } val ^= cause; @@ -180,15 +124,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) if (!err) return 0; - /* - * This case should rarely happen and typically is due to software - * programming error by the driver. - */ - if (idxd->sw_err.valid && - idxd->sw_err.desc_valid && - idxd->sw_err.fault_addr) - idxd_device_schedule_fault_process(idxd, idxd->sw_err.fault_addr); - +halt: gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); if (gensts.state == IDXD_DEVICE_STATE_HALT) { idxd->state = IDXD_DEV_HALTED; @@ -201,13 +137,16 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) INIT_WORK(&idxd->work, idxd_device_reinit); queue_work(idxd->wq, &idxd->work); } else { - spin_lock_bh(&idxd->dev_lock); - idxd_device_wqs_clear_state(idxd); + spin_lock(&idxd->dev_lock); + idxd->state = IDXD_DEV_HALTED; + idxd_wqs_quiesce(idxd); + idxd_wqs_unmap_portal(idxd); + idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); - spin_unlock_bh(&idxd->dev_lock); + spin_unlock(&idxd->dev_lock); return -ENXIO; } } @@ -235,122 +174,81 @@ irqreturn_t idxd_misc_thread(int vec, void *data) iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); } - idxd_unmask_msix_vector(idxd, irq_entry->id); return IRQ_HANDLED; } -static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) -{ - /* - * Completion address can be bad as well. Check fault address match for descriptor - * and completion address. - */ - if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) { - struct idxd_device *idxd = desc->wq->idxd; - struct device *dev = &idxd->pdev->dev; - - dev_warn(dev, "desc with fault address: %#llx\n", fault_addr); - return true; - } - - return false; -} - -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - -static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, - int *processed, u64 data) +static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; struct llist_node *head; - int queued = 0; - unsigned long flags; - enum idxd_complete_type reason; - *processed = 0; head = llist_del_all(&irq_entry->pending_llist); if (!head) - goto out; - - if (wtype == IRQ_WORK_NORMAL) - reason = IDXD_COMPLETE_NORMAL; - else - reason = IDXD_COMPLETE_DEV_FAIL; + return; llist_for_each_entry_safe(desc, t, head, llnode) { - if (desc->completion->status) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) - match_fault(desc, data); - complete_desc(desc, reason); - (*processed)++; + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (status) { + /* + * Check against the original status as ABORT is software defined + * and 0xff, which DSA_COMP_STATUS_MASK can mask out. + */ + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + continue; + } + + complete_desc(desc, IDXD_COMPLETE_NORMAL); } else { - spin_lock_irqsave(&irq_entry->list_lock, flags); + spin_lock(&irq_entry->list_lock); list_add_tail(&desc->list, &irq_entry->work_list); - spin_unlock_irqrestore(&irq_entry->list_lock, flags); - queued++; + spin_unlock(&irq_entry->list_lock); } } - - out: - return queued; } -static int irq_process_work_list(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, - int *processed, u64 data) +static void irq_process_work_list(struct idxd_irq_entry *irq_entry) { - int queued = 0; - unsigned long flags; LIST_HEAD(flist); struct idxd_desc *desc, *n; - enum idxd_complete_type reason; - - *processed = 0; - if (wtype == IRQ_WORK_NORMAL) - reason = IDXD_COMPLETE_NORMAL; - else - reason = IDXD_COMPLETE_DEV_FAIL; /* * This lock protects list corruption from access of list outside of the irq handler * thread. */ - spin_lock_irqsave(&irq_entry->list_lock, flags); + spin_lock(&irq_entry->list_lock); if (list_empty(&irq_entry->work_list)) { - spin_unlock_irqrestore(&irq_entry->list_lock, flags); - return 0; + spin_unlock(&irq_entry->list_lock); + return; } list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { if (desc->completion->status) { - list_del(&desc->list); - (*processed)++; - list_add_tail(&desc->list, &flist); - } else { - queued++; + list_move_tail(&desc->list, &flist); } } - spin_unlock_irqrestore(&irq_entry->list_lock, flags); + spin_unlock(&irq_entry->list_lock); list_for_each_entry(desc, &flist, list) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) - match_fault(desc, data); - complete_desc(desc, reason); - } + /* + * Check against the original status as ABORT is software defined + * and 0xff, which DSA_COMP_STATUS_MASK can mask out. + */ + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + continue; + } - return queued; + complete_desc(desc, IDXD_COMPLETE_NORMAL); + } } -static int idxd_desc_process(struct idxd_irq_entry *irq_entry) +irqreturn_t idxd_wq_thread(int irq, void *data) { - int rc, processed, total = 0; + struct idxd_irq_entry *irq_entry = data; /* * There are two lists we are processing. The pending_llist is where @@ -369,33 +267,9 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry) * and process the completed entries. * 4. If the entry is still waiting on hardware, list_add_tail() to * the work_list. - * 5. Repeat until no more descriptors. */ - do { - rc = irq_process_work_list(irq_entry, IRQ_WORK_NORMAL, - &processed, 0); - total += processed; - if (rc != 0) - continue; - - rc = irq_process_pending_llist(irq_entry, IRQ_WORK_NORMAL, - &processed, 0); - total += processed; - } while (rc != 0); - - return total; -} - -irqreturn_t idxd_wq_thread(int irq, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - int processed; - - processed = idxd_desc_process(irq_entry); - idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id); - - if (processed == 0) - return IRQ_NONE; + irq_process_work_list(irq_entry); + irq_process_pending_llist(irq_entry); return IRQ_HANDLED; } diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c new file mode 100644 index 0000000000000000000000000000000000000000..d73004f47cf4b40f0cddf8a57907500884b69e29 --- /dev/null +++ b/drivers/dma/idxd/perfmon.c @@ -0,0 +1,662 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#include +#include +#include "idxd.h" +#include "perfmon.h" + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf); + +static cpumask_t perfmon_dsa_cpu_mask; +static bool cpuhp_set_up; +static enum cpuhp_state cpuhp_slot; + +/* + * perf userspace reads this attribute to determine which cpus to open + * counters on. It's connected to perfmon_dsa_cpu_mask, which is + * maintained by the cpu hotplug handlers. + */ +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *perfmon_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = perfmon_cpumask_attrs, +}; + +/* + * These attributes specify the bits in the config word that the perf + * syscall uses to pass the event ids and categories to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3"); +DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31"); + +/* + * These attributes specify the bits in the config1 word that the perf + * syscall uses to pass filter data to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31"); +DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39"); +DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43"); +DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51"); +DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59"); + +#define PERFMON_FILTERS_START 2 +#define PERFMON_FILTERS_MAX 5 + +static struct attribute *perfmon_format_attrs[] = { + &format_attr_idxd_event_category.attr, + &format_attr_idxd_event.attr, + &format_attr_idxd_filter_wq.attr, + &format_attr_idxd_filter_tc.attr, + &format_attr_idxd_filter_pgsz.attr, + &format_attr_idxd_filter_sz.attr, + &format_attr_idxd_filter_eng.attr, + NULL, +}; + +static struct attribute_group perfmon_format_attr_group = { + .name = "format", + .attrs = perfmon_format_attrs, +}; + +static const struct attribute_group *perfmon_attr_groups[] = { + &perfmon_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask); +} + +static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event) +{ + return &idxd_pmu->pmu == event->pmu; +} + +static int perfmon_collect_events(struct idxd_pmu *idxd_pmu, + struct perf_event *leader, + bool do_grp) +{ + struct perf_event *event; + int n, max_count; + + max_count = idxd_pmu->n_counters; + n = idxd_pmu->n_events; + + if (n >= max_count) + return -EINVAL; + + if (is_idxd_event(idxd_pmu, leader)) { + idxd_pmu->event_list[n] = leader; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + if (!do_grp) + return n; + + for_each_sibling_event(event, leader) { + if (!is_idxd_event(idxd_pmu, event) || + event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -EINVAL; + + idxd_pmu->event_list[n] = event; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + return n; +} + +static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event, int idx) +{ + struct idxd_device *idxd = idxd_pmu->idxd; + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = idx; + hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx)); + hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx)); +} + +static int perfmon_assign_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event) +{ + int i; + + for (i = 0; i < IDXD_PMU_EVENT_MAX; i++) + if (!test_and_set_bit(i, idxd_pmu->used_mask)) + return i; + + return -EINVAL; +} + +/* + * Check whether there are enough counters to satisfy that all the + * events in the group can actually be scheduled at the same time. + * + * To do this, create a fake idxd_pmu object so the event collection + * and assignment functions can be used without affecting the internal + * state of the real idxd_pmu object. + */ +static int perfmon_validate_group(struct idxd_pmu *pmu, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct idxd_pmu *fake_pmu; + int i, ret = 0, n, idx; + + fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL); + if (!fake_pmu) + return -ENOMEM; + + fake_pmu->pmu.name = pmu->pmu.name; + fake_pmu->n_counters = pmu->n_counters; + + n = perfmon_collect_events(fake_pmu, leader, true); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + n = perfmon_collect_events(fake_pmu, event, false); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + + for (i = 0; i < n; i++) { + event = fake_pmu->event_list[i]; + + idx = perfmon_assign_event(fake_pmu, event); + if (idx < 0) { + ret = idx; + goto out; + } + } +out: + kfree(fake_pmu); + + return ret; +} + +static int perfmon_pmu_event_init(struct perf_event *event) +{ + struct idxd_device *idxd; + int ret = 0; + + idxd = event_to_idxd(event); + event->hw.idx = -1; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* sampling not supported */ + if (event->attr.sample_period) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->pmu != &idxd->idxd_pmu->pmu) + return -EINVAL; + + event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd)); + event->cpu = idxd->idxd_pmu->cpu; + event->hw.config = event->attr.config; + + if (event->group_leader != event) + /* non-group events have themselves as leader */ + ret = perfmon_validate_group(idxd->idxd_pmu, event); + + return ret; +} + +static inline u64 perfmon_pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int cntr = hwc->idx; + + idxd = event_to_idxd(event); + + return ioread64(CNTRDATA_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_update(struct perf_event *event) +{ + struct idxd_device *idxd = event_to_idxd(event); + u64 prev_raw_count, new_raw_count, delta, p, n; + int shift = 64 - idxd->idxd_pmu->counter_width; + struct hw_perf_event *hwc = &event->hw; + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = perfmon_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count); + + n = (new_raw_count << shift); + p = (prev_raw_count << shift); + + delta = ((n - p) >> shift); + + local64_add(delta, &event->count); +} + +void perfmon_counter_overflow(struct idxd_device *idxd) +{ + int i, n_counters, max_loop = OVERFLOW_SIZE; + struct perf_event *event; + unsigned long ovfstatus; + + n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE); + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + + /* + * While updating overflowed counters, other counters behind + * them could overflow and be missed in a given pass. + * Normally this could happen at most n_counters times, but in + * theory a tiny counter width could result in continual + * overflows and endless looping. max_loop provides a + * failsafe in that highly unlikely case. + */ + while (ovfstatus && max_loop--) { + /* Figure out which counter(s) overflowed */ + for_each_set_bit(i, &ovfstatus, n_counters) { + unsigned long ovfstatus_clear = 0; + + /* Update event->count for overflowed counter */ + event = idxd->idxd_pmu->event_list[i]; + perfmon_pmu_event_update(event); + /* Writing 1 to OVFSTATUS bit clears it */ + set_bit(i, &ovfstatus_clear); + iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd)); + } + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + } + + /* + * Should never happen. If so, it means a counter(s) looped + * around twice while this handler was running. + */ + WARN_ON_ONCE(ovfstatus); +} + +static inline void perfmon_reset_config(struct idxd_device *idxd) +{ + iowrite32(CONFIG_RESET, PERFRST_REG(idxd)); + iowrite32(0, OVFSTATUS_REG(idxd)); + iowrite32(0, PERFFRZ_REG(idxd)); +} + +static inline void perfmon_reset_counters(struct idxd_device *idxd) +{ + iowrite32(CNTR_RESET, PERFRST_REG(idxd)); +} + +static inline void perfmon_reset(struct idxd_device *idxd) +{ + perfmon_reset_config(idxd); + perfmon_reset_counters(idxd); +} + +static void perfmon_pmu_event_start(struct perf_event *event, int mode) +{ + u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0; + u64 cntr_cfg, cntrdata, event_enc, event_cat = 0; + struct hw_perf_event *hwc = &event->hw; + union filter_cfg flt_cfg; + union event_cfg event_cfg; + struct idxd_device *idxd; + int cntr; + + idxd = event_to_idxd(event); + + event->hw.idx = hwc->idx; + cntr = hwc->idx; + + /* Obtain event category and event value from user space */ + event_cfg.val = event->attr.config; + flt_cfg.val = event->attr.config1; + event_cat = event_cfg.event_cat; + event_enc = event_cfg.event_enc; + + /* Obtain filter configuration from user space */ + flt_wq = flt_cfg.wq; + flt_tc = flt_cfg.tc; + flt_pg_sz = flt_cfg.pg_sz; + flt_xfer_sz = flt_cfg.xfer_sz; + flt_eng = flt_cfg.eng; + + if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ)); + if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC)); + if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ)); + if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ)); + if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG)); + + /* Read the start value */ + cntrdata = ioread64(CNTRDATA_REG(idxd, cntr)); + local64_set(&event->hw.prev_count, cntrdata); + + /* Set counter to event/category */ + cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT; + cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT; + /* Set interrupt on overflow and counter enable bits */ + cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE); + + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_stop(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int i, cntr = hwc->idx; + u64 cntr_cfg; + + idxd = event_to_idxd(event); + + /* remove this event from event list */ + for (i = 0; i < idxd->idxd_pmu->n_events; i++) { + if (event != idxd->idxd_pmu->event_list[i]) + continue; + + for (++i; i < idxd->idxd_pmu->n_events; i++) + idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i]; + --idxd->idxd_pmu->n_events; + break; + } + + cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr)); + cntr_cfg &= ~CNTRCFG_ENABLE; + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); + + if (mode == PERF_EF_UPDATE) + perfmon_pmu_event_update(event); + + event->hw.idx = -1; + clear_bit(cntr, idxd->idxd_pmu->used_mask); +} + +static void perfmon_pmu_event_del(struct perf_event *event, int mode) +{ + perfmon_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int perfmon_pmu_event_add(struct perf_event *event, int flags) +{ + struct idxd_device *idxd = event_to_idxd(event); + struct idxd_pmu *idxd_pmu = idxd->idxd_pmu; + struct hw_perf_event *hwc = &event->hw; + int idx, n; + + n = perfmon_collect_events(idxd_pmu, event, false); + if (n < 0) + return n; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + idx = perfmon_assign_event(idxd_pmu, event); + if (idx < 0) + return idx; + + perfmon_assign_hw_event(idxd_pmu, event, idx); + + if (flags & PERF_EF_START) + perfmon_pmu_event_start(event, 0); + + idxd_pmu->n_events = n; + + return 0; +} + +static void enable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd)); +} + +static void disable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd)); +} + +static void perfmon_pmu_enable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + enable_perfmon_pmu(idxd); +} + +static void perfmon_pmu_disable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + disable_perfmon_pmu(idxd); +} + +static void skip_filter(int i) +{ + int j; + + for (j = i; j < PERFMON_FILTERS_MAX; j++) + perfmon_format_attrs[PERFMON_FILTERS_START + j] = + perfmon_format_attrs[PERFMON_FILTERS_START + j + 1]; +} + +static void idxd_pmu_init(struct idxd_pmu *idxd_pmu) +{ + int i; + + for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) { + if (!test_bit(i, &idxd_pmu->supported_filters)) + skip_filter(i); + } + + idxd_pmu->pmu.name = idxd_pmu->name; + idxd_pmu->pmu.attr_groups = perfmon_attr_groups; + idxd_pmu->pmu.task_ctx_nr = perf_invalid_context; + idxd_pmu->pmu.event_init = perfmon_pmu_event_init; + idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable, + idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable, + idxd_pmu->pmu.add = perfmon_pmu_event_add; + idxd_pmu->pmu.del = perfmon_pmu_event_del; + idxd_pmu->pmu.start = perfmon_pmu_event_start; + idxd_pmu->pmu.stop = perfmon_pmu_event_stop; + idxd_pmu->pmu.read = perfmon_pmu_event_update; + idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + idxd_pmu->pmu.module = THIS_MODULE; +} + +void perfmon_pmu_remove(struct idxd_device *idxd) +{ + if (!idxd->idxd_pmu) + return; + + cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node); + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + kfree(idxd->idxd_pmu); + idxd->idxd_pmu = NULL; +} + +static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + /* select the first online CPU as the designated reader */ + if (cpumask_empty(&perfmon_dsa_cpu_mask)) { + cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask); + idxd_pmu->cpu = cpu; + } + + return 0; +} + +static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + unsigned int target; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + + /* migrate events if there is a valid target */ + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); + else + target = -1; + + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + + return 0; +} + +int perfmon_pmu_init(struct idxd_device *idxd) +{ + union idxd_perfcap perfcap; + struct idxd_pmu *idxd_pmu; + int rc = -ENODEV; + + /* + * perfmon module initialization failed, nothing to do + */ + if (!cpuhp_set_up) + return -ENODEV; + + /* + * If perfmon_offset or num_counters is 0, it means perfmon is + * not supported on this hardware. + */ + if (idxd->perfmon_offset == 0) + return -ENODEV; + + idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL); + if (!idxd_pmu) + return -ENOMEM; + + idxd_pmu->idxd = idxd; + idxd->idxd_pmu = idxd_pmu; + + if (idxd->data->type == IDXD_TYPE_DSA) { + rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id); + if (rc < 0) + goto free; + } else if (idxd->data->type == IDXD_TYPE_IAX) { + rc = sprintf(idxd_pmu->name, "iax%d", idxd->id); + if (rc < 0) + goto free; + } else { + goto free; + } + + perfmon_reset(idxd); + + perfcap.bits = ioread64(PERFCAP_REG(idxd)); + + /* + * If total perf counter is 0, stop further registration. + * This is necessary in order to support driver running on + * guest which does not have pmon support. + */ + if (perfcap.num_perf_counter == 0) + goto free; + + /* A counter width of 0 means it can't count */ + if (perfcap.counter_width == 0) + goto free; + + /* Overflow interrupt and counter freeze support must be available */ + if (!perfcap.overflow_interrupt || !perfcap.counter_freeze) + goto free; + + /* Number of event categories cannot be 0 */ + if (perfcap.num_event_category == 0) + goto free; + + /* + * We don't support per-counter capabilities for now. + */ + if (perfcap.cap_per_counter) + goto free; + + idxd_pmu->n_event_categories = perfcap.num_event_category; + idxd_pmu->supported_event_categories = perfcap.global_event_category; + idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter; + + /* check filter capability. If 0, then filters are not supported */ + idxd_pmu->supported_filters = perfcap.filter; + if (perfcap.filter) + idxd_pmu->n_filters = hweight8(perfcap.filter); + + /* Store the total number of counters categories, and counter width */ + idxd_pmu->n_counters = perfcap.num_perf_counter; + idxd_pmu->counter_width = perfcap.counter_width; + + idxd_pmu_init(idxd_pmu); + + rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1); + if (rc) + goto free; + + rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node); + if (rc) { + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + goto free; + } +out: + return rc; +free: + kfree(idxd_pmu); + idxd->idxd_pmu = NULL; + + goto out; +} + +void __init perfmon_init(void) +{ + int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "driver/dma/idxd/perf:online", + perf_event_cpu_online, + perf_event_cpu_offline); + if (WARN_ON(rc < 0)) + return; + + cpuhp_slot = rc; + cpuhp_set_up = true; +} + +void __exit perfmon_exit(void) +{ + if (cpuhp_set_up) + cpuhp_remove_multi_state(cpuhp_slot); +} diff --git a/drivers/dma/idxd/perfmon.h b/drivers/dma/idxd/perfmon.h new file mode 100644 index 0000000000000000000000000000000000000000..9a081a1bc60587a3ee42ec9b63ad1e3921183f32 --- /dev/null +++ b/drivers/dma/idxd/perfmon.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#ifndef _PERFMON_H_ +#define _PERFMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registers.h" + +static inline struct idxd_pmu *event_to_pmu(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu; +} + +static inline struct idxd_device *event_to_idxd(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +static inline struct idxd_device *pmu_to_idxd(struct pmu *pmu) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +enum dsa_perf_events { + DSA_PERF_EVENT_WQ = 0, + DSA_PERF_EVENT_ENGINE, + DSA_PERF_EVENT_ADDR_TRANS, + DSA_PERF_EVENT_OP, + DSA_PERF_EVENT_COMPL, + DSA_PERF_EVENT_MAX, +}; + +enum filter_enc { + FLT_WQ = 0, + FLT_TC, + FLT_PG_SZ, + FLT_XFER_SZ, + FLT_ENG, + FLT_MAX, +}; + +#define CONFIG_RESET 0x0000000000000001 +#define CNTR_RESET 0x0000000000000002 +#define CNTR_ENABLE 0x0000000000000001 +#define INTR_OVFL 0x0000000000000002 + +#define COUNTER_FREEZE 0x00000000FFFFFFFF +#define COUNTER_UNFREEZE 0x0000000000000000 +#define OVERFLOW_SIZE 32 + +#define CNTRCFG_ENABLE BIT(0) +#define CNTRCFG_IRQ_OVERFLOW BIT(1) +#define CNTRCFG_CATEGORY_SHIFT 8 +#define CNTRCFG_EVENT_SHIFT 32 + +#define PERFMON_TABLE_OFFSET(_idxd) \ +({ \ + typeof(_idxd) __idxd = (_idxd); \ + ((__idxd)->reg_base + (__idxd)->perfmon_offset); \ +}) +#define PERFMON_REG_OFFSET(idxd, offset) \ + (PERFMON_TABLE_OFFSET(idxd) + (offset)) + +#define PERFCAP_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFCAP_OFFSET)) +#define PERFRST_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFRST_OFFSET)) +#define OVFSTATUS_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_OVFSTATUS_OFFSET)) +#define PERFFRZ_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFFRZ_OFFSET)) + +#define FLTCFG_REG(idxd, cntr, flt) \ + (PERFMON_REG_OFFSET(idxd, IDXD_FLTCFG_OFFSET) + ((cntr) * 32) + ((flt) * 4)) + +#define CNTRCFG_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCFG_OFFSET) + ((cntr) * 8)) +#define CNTRDATA_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRDATA_OFFSET) + ((cntr) * 8)) +#define CNTRCAP_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCAP_OFFSET) + ((cntr) * 8)) + +#define EVNTCAP_REG(idxd, category) \ + (PERFMON_REG_OFFSET(idxd, IDXD_EVNTCAP_OFFSET) + ((category) * 8)) + +#define DEFINE_PERFMON_FORMAT_ATTR(_name, _format) \ +static ssize_t __perfmon_idxd_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_idxd_##_name = \ + __ATTR(_name, 0444, __perfmon_idxd_##_name##_show, NULL) + +#endif diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 20628b958509355ec1956450b74ae6680573602e..262c8220adbdad76289a9b9d2c72a25fa4233fec 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -7,6 +7,9 @@ #define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe +#define DEVICE_VERSION_1 0x100 +#define DEVICE_VERSION_2 0x200 + #define IDXD_MMIO_BAR 0 #define IDXD_WQ_BAR 2 #define IDXD_PORTAL_SIZE PAGE_SIZE @@ -24,8 +27,8 @@ union gen_cap_reg { u64 overlap_copy:1; u64 cache_control_mem:1; u64 cache_control_cache:1; + u64 cmd_cap:1; u64 rsvd:3; - u64 int_handle_req:1; u64 dest_readback:1; u64 drain_readback:1; u64 rsvd2:6; @@ -33,8 +36,7 @@ union gen_cap_reg { u64 max_batch_shift:4; u64 max_ims_mult:6; u64 config_en:1; - u64 max_descs_per_engine:8; - u64 rsvd3:24; + u64 rsvd3:32; }; u64 bits; } __packed; @@ -120,7 +122,8 @@ union gencfg_reg { union genctrl_reg { struct { u32 softerr_int_en:1; - u32 rsvd:31; + u32 halt_int_en:1; + u32 rsvd:30; }; u32 bits; } __packed; @@ -154,6 +157,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_CMD 0x02 #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 +#define IDXD_INTC_HALT_STATE 0x10 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { @@ -180,8 +184,11 @@ enum idxd_cmd { IDXD_CMD_DRAIN_PASID, IDXD_CMD_ABORT_PASID, IDXD_CMD_REQUEST_INT_HANDLE, + IDXD_CMD_RELEASE_INT_HANDLE, }; +#define CMD_INT_HANDLE_IMS 0x10000 + #define IDXD_CMDSTS_OFFSET 0xa8 union cmdsts_reg { struct { @@ -193,6 +200,8 @@ union cmdsts_reg { u32 bits; } __packed; #define IDXD_CMDSTS_ACTIVE 0x80000000 +#define IDXD_CMDSTS_ERR_MASK 0xff +#define IDXD_CMDSTS_RES_SHIFT 8 enum idxd_cmdsts_err { IDXD_CMDSTS_SUCCESS = 0, @@ -228,6 +237,8 @@ enum idxd_cmdsts_err { IDXD_CMDSTS_ERR_NO_HANDLE, }; +#define IDXD_CMDCAP_OFFSET 0xb0 + #define IDXD_SWERR_OFFSET 0xc0 #define IDXD_SWERR_VALID 0x00000001 #define IDXD_SWERR_OVERFLOW 0x00000002 @@ -341,6 +352,9 @@ union wqcfg { } __packed; #define WQCFG_PASID_IDX 2 +#define WQCFG_OCCUP_IDX 6 + +#define WQCFG_OCCUP_MASK 0xffff /* * This macro calculates the offset into the WQCFG register @@ -360,4 +374,130 @@ union wqcfg { #define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32)) +#define GRPCFG_SIZE 64 +#define GRPWQCFG_STRIDES 4 + +/* + * This macro calculates the offset into the GRPCFG register + * idxd - struct idxd * + * n - wq id + * ofs - the index of the 32b dword for the config register + * + * The WQCFG register block is divided into groups per each wq. The n index + * allows us to move to the register group that's for that particular wq. + * Each register is 32bits. The ofs gives us the number of register to access. + */ +#define GRPWQCFG_OFFSET(idxd_dev, n, ofs) ((idxd_dev)->grpcfg_offset +\ + (n) * GRPCFG_SIZE + sizeof(u64) * (ofs)) +#define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32) +#define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40) + +/* Following is performance monitor registers */ +#define IDXD_PERFCAP_OFFSET 0x0 +union idxd_perfcap { + struct { + u64 num_perf_counter:6; + u64 rsvd1:2; + u64 counter_width:8; + u64 num_event_category:4; + u64 global_event_category:16; + u64 filter:8; + u64 rsvd2:8; + u64 cap_per_counter:1; + u64 writeable_counter:1; + u64 counter_freeze:1; + u64 overflow_interrupt:1; + u64 rsvd3:8; + }; + u64 bits; +} __packed; + +#define IDXD_EVNTCAP_OFFSET 0x80 +union idxd_evntcap { + struct { + u64 events:28; + u64 rsvd:36; + }; + u64 bits; +} __packed; + +struct idxd_event { + union { + struct { + u32 event_category:4; + u32 events:28; + }; + u32 val; + }; +} __packed; + +#define IDXD_CNTRCAP_OFFSET 0x800 +struct idxd_cntrcap { + union { + struct { + u32 counter_width:8; + u32 rsvd:20; + u32 num_events:4; + }; + u32 val; + }; + struct idxd_event events[]; +} __packed; + +#define IDXD_PERFRST_OFFSET 0x10 +union idxd_perfrst { + struct { + u32 perfrst_config:1; + u32 perfrst_counter:1; + u32 rsvd:30; + }; + u32 val; +} __packed; + +#define IDXD_OVFSTATUS_OFFSET 0x30 +#define IDXD_PERFFRZ_OFFSET 0x20 +#define IDXD_CNTRCFG_OFFSET 0x100 +union idxd_cntrcfg { + struct { + u64 enable:1; + u64 interrupt_ovf:1; + u64 global_freeze_ovf:1; + u64 rsvd1:5; + u64 event_category:4; + u64 rsvd2:20; + u64 events:28; + u64 rsvd3:4; + }; + u64 val; +} __packed; + +#define IDXD_FLTCFG_OFFSET 0x300 + +#define IDXD_CNTRDATA_OFFSET 0x200 +union idxd_cntrdata { + struct { + u64 event_count_value; + }; + u64 val; +} __packed; + +union event_cfg { + struct { + u64 event_cat:4; + u64 event_enc:28; + }; + u64 val; +} __packed; + +union filter_cfg { + struct { + u64 wq:32; + u64 tc:8; + u64 pg_sz:4; + u64 xfer_sz:8; + u64 eng:8; + }; + u64 val; +} __packed; + #endif diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index a7a61bcc17d58ff45b88db9e47f00c19c2ddfbb4..de76fb4abac24af94f143f5255ff51aa8fe93c37 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -15,18 +15,21 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) desc = wq->descs[idx]; memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); - memset(desc->completion, 0, idxd->compl_size); + memset(desc->completion, 0, idxd->data->compl_size); desc->cpu = cpu; if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; /* - * Descriptor completion vectors are 1-8 for MSIX. We will round - * robin through the 8 vectors. + * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match + * vector 1:1 to the WQ id, we need to add 1 */ - wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; - desc->hw->int_handle = wq->vec_ptr; + if (!idxd->int_handles) + desc->hw->int_handle = wq->id + 1; + else + desc->hw->int_handle = idxd->int_handles[wq->id]; + return desc; } @@ -56,7 +59,7 @@ struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) if (signal_pending_state(TASK_INTERRUPTIBLE, current)) break; idx = sbitmap_queue_get(sbq, &cpu); - if (idx > 0) + if (idx >= 0) break; schedule(); } @@ -76,17 +79,77 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } +static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *n; + + lockdep_assert_held(&ie->list_lock); + list_for_each_entry_safe(d, n, &ie->work_list, list) { + if (d == desc) { + list_del(&d->list); + return d; + } + } + + /* + * At this point, the desc needs to be aborted is held by the completion + * handler where it has taken it off the pending list but has not added to the + * work list. It will be cleaned up by the interrupt handler when it sees the + * IDXD_COMP_DESC_ABORT for completion status. + */ + return NULL; +} + +static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *t, *found = NULL; + struct llist_node *head; + + desc->completion->status = IDXD_COMP_DESC_ABORT; + /* + * Grab the list lock so it will block the irq thread handler. This allows the + * abort code to locate the descriptor need to be aborted. + */ + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) { + if (d == desc) { + found = desc; + continue; + } + list_add_tail(&desc->list, &ie->work_list); + } + } + + if (!found) + found = list_abort_desc(wq, ie, desc); + spin_unlock(&ie->list_lock); + + if (found) + complete_desc(found, IDXD_COMPLETE_ABORT); +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; - int vec = desc->hw->int_handle; + struct idxd_irq_entry *ie = NULL; void __iomem *portal; int rc; - if (idxd->state != IDXD_DEV_ENABLED) + if (idxd->state != IDXD_DEV_ENABLED) { + idxd_free_desc(wq, desc); return -EIO; + } + + if (!percpu_ref_tryget_live(&wq->wq_active)) { + idxd_free_desc(wq, desc); + return -ENXIO; + } - portal = wq->portal; + portal = idxd_wq_portal_addr(wq); /* * The wmb() flushes writes to coherent DMA data before @@ -94,6 +157,16 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * even on UP because the recipient is a device. */ wmb(); + + /* + * Pending the descriptor to the lockless list for the irq_entry + * that we designated the descriptor to. + */ + if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + ie = &idxd->irq_entries[wq->id + 1]; + llist_add(&desc->llnode, &ie->pending_llist); + } + if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { @@ -104,17 +177,17 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * device is not accepting descriptor at all. */ rc = enqcmds(portal, desc->hw); - if (rc < 0) + if (rc < 0) { + percpu_ref_put(&wq->wq_active); + /* abort operation frees the descriptor */ + if (ie) + llist_abort_desc(wq, ie, desc); + else + idxd_free_desc(wq, desc); return rc; + } } - /* - * Pending the descriptor to the lockless list for the irq_entry - * that we designated the descriptor to. - */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) - llist_add(&desc->llnode, - &idxd->irq_entries[vec].pending_llist); - + percpu_ref_put(&wq->wq_active); return 0; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7a08f0d63b430b6074d1990b89fab31e110f242e..a9025be940db2a964ef6c3da6472d0611383d0f7 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -16,451 +16,23 @@ static char *idxd_wq_type_names[] = { [IDXD_WQT_USER] = "user", }; -static void idxd_conf_device_release(struct device *dev) -{ - dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev)); -} - -static struct device_type idxd_group_device_type = { - .name = "group", - .release = idxd_conf_device_release, -}; - -static struct device_type idxd_wq_device_type = { - .name = "wq", - .release = idxd_conf_device_release, -}; - -static struct device_type idxd_engine_device_type = { - .name = "engine", - .release = idxd_conf_device_release, -}; - -static struct device_type dsa_device_type = { - .name = "dsa", - .release = idxd_conf_device_release, -}; - -static struct device_type iax_device_type = { - .name = "iax", - .release = idxd_conf_device_release, -}; - -static inline bool is_dsa_dev(struct device *dev) -{ - return dev ? dev->type == &dsa_device_type : false; -} - -static inline bool is_iax_dev(struct device *dev) -{ - return dev ? dev->type == &iax_device_type : false; -} - -static inline bool is_idxd_dev(struct device *dev) -{ - return is_dsa_dev(dev) || is_iax_dev(dev); -} - -static inline bool is_idxd_wq_dev(struct device *dev) -{ - return dev ? dev->type == &idxd_wq_device_type : false; -} - -static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) -{ - if (wq->type == IDXD_WQT_KERNEL && - strcmp(wq->name, "dmaengine") == 0) - return true; - return false; -} - -static inline bool is_idxd_wq_cdev(struct idxd_wq *wq) -{ - return wq->type == IDXD_WQT_USER; -} - -static int idxd_config_bus_match(struct device *dev, - struct device_driver *drv) -{ - int matched = 0; - - if (is_idxd_dev(dev)) { - struct idxd_device *idxd = confdev_to_idxd(dev); - - if (idxd->state != IDXD_DEV_CONF_READY) - return 0; - matched = 1; - } else if (is_idxd_wq_dev(dev)) { - struct idxd_wq *wq = confdev_to_wq(dev); - struct idxd_device *idxd = wq->idxd; - - if (idxd->state < IDXD_DEV_CONF_READY) - return 0; - - if (wq->state != IDXD_WQ_DISABLED) { - dev_dbg(dev, "%s not disabled\n", dev_name(dev)); - return 0; - } - matched = 1; - } - - if (matched) - dev_dbg(dev, "%s matched\n", dev_name(dev)); - - return matched; -} - -static int idxd_config_bus_probe(struct device *dev) -{ - int rc; - unsigned long flags; - - dev_dbg(dev, "%s called\n", __func__); - - if (is_idxd_dev(dev)) { - struct idxd_device *idxd = confdev_to_idxd(dev); - - if (idxd->state != IDXD_DEV_CONF_READY) { - dev_warn(dev, "Device not ready for config\n"); - return -EBUSY; - } - - if (!try_module_get(THIS_MODULE)) - return -ENXIO; - - /* Perform IDXD configuration and enabling */ - spin_lock_irqsave(&idxd->dev_lock, flags); - rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); - if (rc < 0) { - module_put(THIS_MODULE); - dev_warn(dev, "Device config failed: %d\n", rc); - return rc; - } - - /* start device */ - rc = idxd_device_enable(idxd); - if (rc < 0) { - module_put(THIS_MODULE); - dev_warn(dev, "Device enable failed: %d\n", rc); - return rc; - } - - dev_info(dev, "Device %s enabled\n", dev_name(dev)); - - rc = idxd_register_dma_device(idxd); - if (rc < 0) { - module_put(THIS_MODULE); - dev_dbg(dev, "Failed to register dmaengine device\n"); - return rc; - } - return 0; - } else if (is_idxd_wq_dev(dev)) { - struct idxd_wq *wq = confdev_to_wq(dev); - struct idxd_device *idxd = wq->idxd; - - mutex_lock(&wq->wq_lock); - - if (idxd->state != IDXD_DEV_ENABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Enabling while device not enabled.\n"); - return -EPERM; - } - - if (wq->state != IDXD_WQ_DISABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d already enabled.\n", wq->id); - return -EBUSY; - } - - if (!wq->group) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ not attached to group.\n"); - return -EINVAL; - } - - if (strlen(wq->name) == 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ name not set.\n"); - return -EINVAL; - } - - /* Shared WQ checks */ - if (wq_shared(wq)) { - if (!device_swq_supported(idxd)) { - dev_warn(dev, - "PASID not enabled and shared WQ.\n"); - mutex_unlock(&wq->wq_lock); - return -ENXIO; - } - /* - * Shared wq with the threshold set to 0 means the user - * did not set the threshold or transitioned from a - * dedicated wq but did not set threshold. A value - * of 0 would effectively disable the shared wq. The - * driver does not allow a value of 0 to be set for - * threshold via sysfs. - */ - if (wq->threshold == 0) { - dev_warn(dev, - "Shared WQ and threshold 0.\n"); - mutex_unlock(&wq->wq_lock); - return -EINVAL; - } - } - - rc = idxd_wq_alloc_resources(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ resource alloc failed\n"); - return rc; - } - - spin_lock_irqsave(&idxd->dev_lock, flags); - rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Writing WQ %d config failed: %d\n", - wq->id, rc); - return rc; - } - - rc = idxd_wq_enable(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d enabling failed: %d\n", - wq->id, rc); - return rc; - } - - rc = idxd_wq_map_portal(wq); - if (rc < 0) { - dev_warn(dev, "wq portal mapping failed: %d\n", rc); - rc = idxd_wq_disable(wq); - if (rc < 0) - dev_warn(dev, "IDXD wq disable failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - - wq->client_count = 0; - - dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev)); - - if (is_idxd_wq_dmaengine(wq)) { - rc = idxd_register_dma_channel(wq); - if (rc < 0) { - dev_dbg(dev, "DMA channel register failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } else if (is_idxd_wq_cdev(wq)) { - rc = idxd_wq_add_cdev(wq); - if (rc < 0) { - dev_dbg(dev, "Cdev creation failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } - - mutex_unlock(&wq->wq_lock); - return 0; - } - - return -ENODEV; -} - -static void disable_wq(struct idxd_wq *wq) -{ - struct idxd_device *idxd = wq->idxd; - struct device *dev = &idxd->pdev->dev; - - mutex_lock(&wq->wq_lock); - dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev)); - if (wq->state == IDXD_WQ_DISABLED) { - mutex_unlock(&wq->wq_lock); - return; - } - - if (is_idxd_wq_dmaengine(wq)) - idxd_unregister_dma_channel(wq); - else if (is_idxd_wq_cdev(wq)) - idxd_wq_del_cdev(wq); - - if (idxd_wq_refcount(wq)) - dev_warn(dev, "Clients has claim on wq %d: %d\n", - wq->id, idxd_wq_refcount(wq)); - - idxd_wq_unmap_portal(wq); - - idxd_wq_drain(wq); - idxd_wq_reset(wq); - - idxd_wq_free_resources(wq); - wq->client_count = 0; - mutex_unlock(&wq->wq_lock); - - dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); -} - -static int idxd_config_bus_remove(struct device *dev) -{ - int rc; - - dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev)); - - /* disable workqueue here */ - if (is_idxd_wq_dev(dev)) { - struct idxd_wq *wq = confdev_to_wq(dev); - - disable_wq(wq); - } else if (is_idxd_dev(dev)) { - struct idxd_device *idxd = confdev_to_idxd(dev); - int i; - - dev_dbg(dev, "%s removing dev %s\n", __func__, - dev_name(&idxd->conf_dev)); - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - if (wq->state == IDXD_WQ_DISABLED) - continue; - dev_warn(dev, "Active wq %d on disable %s.\n", i, - dev_name(&idxd->conf_dev)); - device_release_driver(&wq->conf_dev); - } - - idxd_unregister_dma_device(idxd); - rc = idxd_device_disable(idxd); - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - mutex_lock(&wq->wq_lock); - idxd_wq_disable_cleanup(wq); - mutex_unlock(&wq->wq_lock); - } - module_put(THIS_MODULE); - if (rc < 0) - dev_warn(dev, "Device disable failed\n"); - else - dev_info(dev, "Device %s disabled\n", dev_name(dev)); - - } - - return 0; -} - -static void idxd_config_bus_shutdown(struct device *dev) -{ - dev_dbg(dev, "%s called\n", __func__); -} - -struct bus_type dsa_bus_type = { - .name = "dsa", - .match = idxd_config_bus_match, - .probe = idxd_config_bus_probe, - .remove = idxd_config_bus_remove, - .shutdown = idxd_config_bus_shutdown, -}; - -struct bus_type iax_bus_type = { - .name = "iax", - .match = idxd_config_bus_match, - .probe = idxd_config_bus_probe, - .remove = idxd_config_bus_remove, - .shutdown = idxd_config_bus_shutdown, -}; - -static struct bus_type *idxd_bus_types[] = { - &dsa_bus_type, - &iax_bus_type -}; - -static struct idxd_device_driver dsa_drv = { - .drv = { - .name = "dsa", - .bus = &dsa_bus_type, - .owner = THIS_MODULE, - .mod_name = KBUILD_MODNAME, - }, -}; - -static struct idxd_device_driver iax_drv = { - .drv = { - .name = "iax", - .bus = &iax_bus_type, - .owner = THIS_MODULE, - .mod_name = KBUILD_MODNAME, - }, -}; - -static struct idxd_device_driver *idxd_drvs[] = { - &dsa_drv, - &iax_drv -}; - -struct bus_type *idxd_get_bus_type(struct idxd_device *idxd) -{ - return idxd_bus_types[idxd->type]; -} - -static struct device_type *idxd_get_device_type(struct idxd_device *idxd) -{ - if (idxd->type == IDXD_TYPE_DSA) - return &dsa_device_type; - else if (idxd->type == IDXD_TYPE_IAX) - return &iax_device_type; - else - return NULL; -} - -/* IDXD generic driver setup */ -int idxd_register_driver(void) -{ - int i, rc; - - for (i = 0; i < IDXD_TYPE_MAX; i++) { - rc = driver_register(&idxd_drvs[i]->drv); - if (rc < 0) - goto drv_fail; - } - - return 0; - -drv_fail: - while (--i >= 0) - driver_unregister(&idxd_drvs[i]->drv); - return rc; -} - -void idxd_unregister_driver(void) -{ - int i; - - for (i = 0; i < IDXD_TYPE_MAX; i++) - driver_unregister(&idxd_drvs[i]->drv); -} - /* IDXD engine attributes */ static ssize_t engine_group_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_engine *engine = - container_of(dev, struct idxd_engine, conf_dev); + struct idxd_engine *engine = confdev_to_engine(dev); if (engine->group) - return sprintf(buf, "%d\n", engine->group->id); + return sysfs_emit(buf, "%d\n", engine->group->id); else - return sprintf(buf, "%d\n", -1); + return sysfs_emit(buf, "%d\n", -1); } static ssize_t engine_group_id_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_engine *engine = - container_of(dev, struct idxd_engine, conf_dev); + struct idxd_engine *engine = confdev_to_engine(dev); struct idxd_device *idxd = engine->idxd; long id; int rc; @@ -488,7 +60,7 @@ static ssize_t engine_group_id_store(struct device *dev, if (prevg) prevg->num_engines--; - engine->group = &idxd->groups[id]; + engine->group = idxd->groups[id]; engine->group->num_engines++; return count; @@ -512,6 +84,19 @@ static const struct attribute_group *idxd_engine_attribute_groups[] = { NULL, }; +static void idxd_conf_engine_release(struct device *dev) +{ + struct idxd_engine *engine = confdev_to_engine(dev); + + kfree(engine); +} + +struct device_type idxd_engine_device_type = { + .name = "engine", + .release = idxd_conf_engine_release, + .groups = idxd_engine_attribute_groups, +}; + /* Group attributes */ static void idxd_set_free_tokens(struct idxd_device *idxd) @@ -519,7 +104,7 @@ static void idxd_set_free_tokens(struct idxd_device *idxd) int i, tokens; for (i = 0, tokens = 0; i < idxd->max_groups; i++) { - struct idxd_group *g = &idxd->groups[i]; + struct idxd_group *g = idxd->groups[i]; tokens += g->tokens_reserved; } @@ -531,18 +116,16 @@ static ssize_t group_tokens_reserved_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); - return sprintf(buf, "%u\n", group->tokens_reserved); + return sysfs_emit(buf, "%u\n", group->tokens_reserved); } static ssize_t group_tokens_reserved_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; unsigned long val; int rc; @@ -551,7 +134,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -579,18 +162,16 @@ static ssize_t group_tokens_allowed_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); - return sprintf(buf, "%u\n", group->tokens_allowed); + return sysfs_emit(buf, "%u\n", group->tokens_allowed); } static ssize_t group_tokens_allowed_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; unsigned long val; int rc; @@ -599,7 +180,7 @@ static ssize_t group_tokens_allowed_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -624,18 +205,16 @@ static ssize_t group_use_token_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); - return sprintf(buf, "%u\n", group->use_token_limit); + return sysfs_emit(buf, "%u\n", group->use_token_limit); } static ssize_t group_use_token_limit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; unsigned long val; int rc; @@ -644,7 +223,7 @@ static ssize_t group_use_token_limit_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -667,25 +246,24 @@ static struct device_attribute dev_attr_group_use_token_limit = static ssize_t group_engines_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); int i, rc = 0; - char *tmp = buf; struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; + struct idxd_engine *engine = idxd->engines[i]; if (!engine->group) continue; if (engine->group->id == group->id) - rc += sprintf(tmp + rc, "engine%d.%d ", - idxd->id, engine->id); + rc += sysfs_emit_at(buf, rc, "engine%d.%d ", idxd->id, engine->id); } + if (!rc) + return 0; rc--; - rc += sprintf(tmp + rc, "\n"); + rc += sysfs_emit_at(buf, rc, "\n"); return rc; } @@ -696,25 +274,24 @@ static struct device_attribute dev_attr_group_engines = static ssize_t group_work_queues_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); int i, rc = 0; - char *tmp = buf; struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (!wq->group) continue; if (wq->group->id == group->id) - rc += sprintf(tmp + rc, "wq%d.%d ", - idxd->id, wq->id); + rc += sysfs_emit_at(buf, rc, "wq%d.%d ", idxd->id, wq->id); } + if (!rc) + return 0; rc--; - rc += sprintf(tmp + rc, "\n"); + rc += sysfs_emit_at(buf, rc, "\n"); return rc; } @@ -726,18 +303,16 @@ static ssize_t group_traffic_class_a_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); - return sprintf(buf, "%d\n", group->tc_a); + return sysfs_emit(buf, "%d\n", group->tc_a); } static ssize_t group_traffic_class_a_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; long val; int rc; @@ -752,6 +327,9 @@ static ssize_t group_traffic_class_a_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) + return -EPERM; + if (val < 0 || val > 7) return -EINVAL; @@ -767,18 +345,16 @@ static ssize_t group_traffic_class_b_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); - return sprintf(buf, "%d\n", group->tc_b); + return sysfs_emit(buf, "%d\n", group->tc_b); } static ssize_t group_traffic_class_b_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; long val; int rc; @@ -793,6 +369,9 @@ static ssize_t group_traffic_class_b_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) + return -EPERM; + if (val < 0 || val > 7) return -EINVAL; @@ -824,13 +403,26 @@ static const struct attribute_group *idxd_group_attribute_groups[] = { NULL, }; +static void idxd_conf_group_release(struct device *dev) +{ + struct idxd_group *group = confdev_to_group(dev); + + kfree(group); +} + +struct device_type idxd_group_device_type = { + .name = "group", + .release = idxd_conf_group_release, + .groups = idxd_group_attribute_groups, +}; + /* IDXD work queue attribs */ static ssize_t wq_clients_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%d\n", wq->client_count); + return sysfs_emit(buf, "%d\n", wq->client_count); } static struct device_attribute dev_attr_wq_clients = @@ -839,16 +431,16 @@ static struct device_attribute dev_attr_wq_clients = static ssize_t wq_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); switch (wq->state) { case IDXD_WQ_DISABLED: - return sprintf(buf, "disabled\n"); + return sysfs_emit(buf, "disabled\n"); case IDXD_WQ_ENABLED: - return sprintf(buf, "enabled\n"); + return sysfs_emit(buf, "enabled\n"); } - return sprintf(buf, "unknown\n"); + return sysfs_emit(buf, "unknown\n"); } static struct device_attribute dev_attr_wq_state = @@ -857,19 +449,19 @@ static struct device_attribute dev_attr_wq_state = static ssize_t wq_group_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); if (wq->group) - return sprintf(buf, "%u\n", wq->group->id); + return sysfs_emit(buf, "%u\n", wq->group->id); else - return sprintf(buf, "-1\n"); + return sysfs_emit(buf, "-1\n"); } static ssize_t wq_group_id_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; long id; int rc; @@ -896,7 +488,7 @@ static ssize_t wq_group_id_store(struct device *dev, return count; } - group = &idxd->groups[id]; + group = idxd->groups[id]; prevg = wq->group; if (prevg) @@ -912,17 +504,16 @@ static struct device_attribute dev_attr_wq_group_id = static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%s\n", - wq_dedicated(wq) ? "dedicated" : "shared"); + return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared"); } static ssize_t wq_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -949,9 +540,9 @@ static struct device_attribute dev_attr_wq_mode = static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%u\n", wq->size); + return sysfs_emit(buf, "%u\n", wq->size); } static int total_claimed_wq_size(struct idxd_device *idxd) @@ -960,7 +551,7 @@ static int total_claimed_wq_size(struct idxd_device *idxd) int wq_size = 0; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; wq_size += wq->size; } @@ -972,7 +563,7 @@ static ssize_t wq_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); unsigned long size; struct idxd_device *idxd = wq->idxd; int rc; @@ -1000,16 +591,16 @@ static struct device_attribute dev_attr_wq_size = static ssize_t wq_priority_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%u\n", wq->priority); + return sysfs_emit(buf, "%u\n", wq->priority); } static ssize_t wq_priority_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); unsigned long prio; struct idxd_device *idxd = wq->idxd; int rc; @@ -1037,21 +628,23 @@ static struct device_attribute dev_attr_wq_priority = static ssize_t wq_block_on_fault_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%u\n", - test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); } static ssize_t wq_block_on_fault_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; bool bof; int rc; + if (!idxd->hw.gen_cap.block_on_fault) + return -EOPNOTSUPP; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; @@ -1077,16 +670,16 @@ static struct device_attribute dev_attr_wq_block_on_fault = static ssize_t wq_threshold_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%u\n", wq->threshold); + return sysfs_emit(buf, "%u\n", wq->threshold); } static ssize_t wq_threshold_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; unsigned int val; int rc; @@ -1118,19 +711,16 @@ static struct device_attribute dev_attr_wq_threshold = static ssize_t wq_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); switch (wq->type) { case IDXD_WQT_KERNEL: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_KERNEL]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_KERNEL]); case IDXD_WQT_USER: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_USER]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_USER]); case IDXD_WQT_NONE: default: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_NONE]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_NONE]); } return -EINVAL; @@ -1140,7 +730,7 @@ static ssize_t wq_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); enum idxd_wq_type old_type; if (wq->state != IDXD_WQ_DISABLED) @@ -1169,16 +759,16 @@ static struct device_attribute dev_attr_wq_type = static ssize_t wq_name_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%s\n", wq->name); + return sysfs_emit(buf, "%s\n", wq->name); } static ssize_t wq_name_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); if (wq->state != IDXD_WQ_DISABLED) return -EPERM; @@ -1205,7 +795,7 @@ static struct device_attribute dev_attr_wq_name = static ssize_t wq_cdev_minor_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); int minor = -1; mutex_lock(&wq->wq_lock); @@ -1239,15 +829,15 @@ static int __get_sysfs_u64(const char *buf, u64 *val) static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%llu\n", wq->max_xfer_bytes); + return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes); } static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; u64 xfer_size; int rc; @@ -1273,15 +863,15 @@ static struct device_attribute dev_attr_wq_max_transfer_size = static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%u\n", wq->max_batch_size); + return sysfs_emit(buf, "%u\n", wq->max_batch_size); } static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; u64 batch_size; int rc; @@ -1306,15 +896,15 @@ static struct device_attribute dev_attr_wq_max_batch_size = static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); - return sprintf(buf, "%u\n", wq->ats_dis); + return sysfs_emit(buf, "%u\n", wq->ats_dis); } static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; bool ats_dis; int rc; @@ -1337,6 +927,24 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute static struct device_attribute dev_attr_wq_ats_disable = __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store); +static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + u32 occup, offset; + + if (!idxd->hw.wq_cap.occupancy) + return -EOPNOTSUPP; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_OCCUP_IDX); + occup = ioread32(idxd->reg_base + offset) & WQCFG_OCCUP_MASK; + + return sysfs_emit(buf, "%u\n", occup); +} + +static struct device_attribute dev_attr_wq_occupancy = + __ATTR(occupancy, 0444, wq_occupancy_show, NULL); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -1352,6 +960,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_transfer_size.attr, &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, + &dev_attr_wq_occupancy.attr, NULL, }; @@ -1364,14 +973,27 @@ static const struct attribute_group *idxd_wq_attribute_groups[] = { NULL, }; +static void idxd_conf_wq_release(struct device *dev) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + kfree(wq->wqcfg); + kfree(wq); +} + +struct device_type idxd_wq_device_type = { + .name = "wq", + .release = idxd_conf_wq_release, + .groups = idxd_wq_attribute_groups, +}; + /* IDXD device attribs */ static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%#x\n", idxd->hw.version); + return sysfs_emit(buf, "%#x\n", idxd->hw.version); } static DEVICE_ATTR_RO(version); @@ -1379,60 +1001,54 @@ static ssize_t max_work_queues_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->max_wq_size); + return sysfs_emit(buf, "%u\n", idxd->max_wq_size); } static DEVICE_ATTR_RO(max_work_queues_size); static ssize_t max_groups_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->max_groups); + return sysfs_emit(buf, "%u\n", idxd->max_groups); } static DEVICE_ATTR_RO(max_groups); static ssize_t max_work_queues_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->max_wqs); + return sysfs_emit(buf, "%u\n", idxd->max_wqs); } static DEVICE_ATTR_RO(max_work_queues); static ssize_t max_engines_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->max_engines); + return sysfs_emit(buf, "%u\n", idxd->max_engines); } static DEVICE_ATTR_RO(max_engines); static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); + return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); } static DEVICE_ATTR_RO(numa_node); static ssize_t max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->max_batch_size); + return sysfs_emit(buf, "%u\n", idxd->max_batch_size); } static DEVICE_ATTR_RO(max_batch_size); @@ -1440,18 +1056,16 @@ static ssize_t max_transfer_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%llu\n", idxd->max_xfer_bytes); + return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes); } static DEVICE_ATTR_RO(max_transfer_size); static ssize_t op_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); int i, rc = 0; for (i = 0; i < 4; i++) @@ -1466,88 +1080,78 @@ static DEVICE_ATTR_RO(op_cap); static ssize_t gen_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%#llx\n", idxd->hw.gen_cap.bits); + return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits); } static DEVICE_ATTR_RO(gen_cap); static ssize_t configurable_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", - test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); + return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); } static DEVICE_ATTR_RO(configurable); static ssize_t clients_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); - unsigned long flags; + struct idxd_device *idxd = confdev_to_idxd(dev); int count = 0, i; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; count += wq->client_count; } - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); - return sprintf(buf, "%d\n", count); + return sysfs_emit(buf, "%d\n", count); } static DEVICE_ATTR_RO(clients); static ssize_t pasid_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", device_pasid_enabled(idxd)); + return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd)); } static DEVICE_ATTR_RO(pasid_enabled); static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); switch (idxd->state) { case IDXD_DEV_DISABLED: - case IDXD_DEV_CONF_READY: - return sprintf(buf, "disabled\n"); + return sysfs_emit(buf, "disabled\n"); case IDXD_DEV_ENABLED: - return sprintf(buf, "enabled\n"); + return sysfs_emit(buf, "enabled\n"); case IDXD_DEV_HALTED: - return sprintf(buf, "halted\n"); + return sysfs_emit(buf, "halted\n"); } - return sprintf(buf, "unknown\n"); + return sysfs_emit(buf, "unknown\n"); } static DEVICE_ATTR_RO(state); static ssize_t errors_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); int i, out = 0; - unsigned long flags; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); for (i = 0; i < 4; i++) - out += sprintf(buf + out, "%#018llx ", idxd->sw_err.bits[i]); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]); + spin_unlock(&idxd->dev_lock); out--; - out += sprintf(buf + out, "\n"); + out += sysfs_emit_at(buf, out, "\n"); return out; } static DEVICE_ATTR_RO(errors); @@ -1555,28 +1159,25 @@ static DEVICE_ATTR_RO(errors); static ssize_t max_tokens_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->max_tokens); + return sysfs_emit(buf, "%u\n", idxd->max_tokens); } static DEVICE_ATTR_RO(max_tokens); static ssize_t token_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->token_limit); + return sysfs_emit(buf, "%u\n", idxd->token_limit); } static ssize_t token_limit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); unsigned long val; int rc; @@ -1604,21 +1205,29 @@ static DEVICE_ATTR_RW(token_limit); static ssize_t cdev_major_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%u\n", idxd->major); + return sysfs_emit(buf, "%u\n", idxd->major); } static DEVICE_ATTR_RO(cdev_major); static ssize_t cmd_status_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); - return sprintf(buf, "%#x\n", idxd->cmd_status); + return sysfs_emit(buf, "%#x\n", idxd->cmd_status); } -static DEVICE_ATTR_RO(cmd_status); + +static ssize_t cmd_status_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + idxd->cmd_status = 0; + return count; +} +static DEVICE_ATTR_RW(cmd_status); static struct attribute *idxd_device_attributes[] = { &dev_attr_version.attr, @@ -1652,212 +1261,184 @@ static const struct attribute_group *idxd_attribute_groups[] = { NULL, }; -static int idxd_setup_engine_sysfs(struct idxd_device *idxd) +static void idxd_conf_device_release(struct device *dev) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + struct idxd_device *idxd = confdev_to_idxd(dev); + + kfree(idxd->groups); + kfree(idxd->wqs); + kfree(idxd->engines); + kfree(idxd->irq_entries); + kfree(idxd->int_handles); + ida_free(&idxd_ida, idxd->id); + kfree(idxd); +} + +struct device_type dsa_device_type = { + .name = "dsa", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + +struct device_type iax_device_type = { + .name = "iax", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + +static int idxd_register_engine_devices(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + int i, j, rc; for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; - - engine->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&engine->conf_dev, "engine%d.%d", - idxd->id, engine->id); - engine->conf_dev.bus = idxd_get_bus_type(idxd); - engine->conf_dev.groups = idxd_engine_attribute_groups; - engine->conf_dev.type = &idxd_engine_device_type; - dev_dbg(dev, "Engine device register: %s\n", - dev_name(&engine->conf_dev)); - rc = device_register(&engine->conf_dev); - if (rc < 0) { - put_device(&engine->conf_dev); + engine = idxd->engines[i]; + rc = device_add(engine_confdev(engine)); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_engine *engine = &idxd->engines[i]; + j = i - 1; + for (; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + put_device(engine_confdev(engine)); + } - device_unregister(&engine->conf_dev); + while (j--) { + engine = idxd->engines[j]; + device_unregister(engine_confdev(engine)); } return rc; } -static int idxd_setup_group_sysfs(struct idxd_device *idxd) +static int idxd_register_group_devices(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + struct idxd_group *group; + int i, j, rc; for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; - - group->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&group->conf_dev, "group%d.%d", - idxd->id, group->id); - group->conf_dev.bus = idxd_get_bus_type(idxd); - group->conf_dev.groups = idxd_group_attribute_groups; - group->conf_dev.type = &idxd_group_device_type; - dev_dbg(dev, "Group device register: %s\n", - dev_name(&group->conf_dev)); - rc = device_register(&group->conf_dev); - if (rc < 0) { - put_device(&group->conf_dev); + group = idxd->groups[i]; + rc = device_add(group_confdev(group)); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_group *group = &idxd->groups[i]; + j = i - 1; + for (; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + } - device_unregister(&group->conf_dev); + while (j--) { + group = idxd->groups[j]; + device_unregister(group_confdev(group)); } return rc; } -static int idxd_setup_wq_sysfs(struct idxd_device *idxd) +static int idxd_register_wq_devices(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + struct idxd_wq *wq; + int i, rc, j; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - wq->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id); - wq->conf_dev.bus = idxd_get_bus_type(idxd); - wq->conf_dev.groups = idxd_wq_attribute_groups; - wq->conf_dev.type = &idxd_wq_device_type; - dev_dbg(dev, "WQ device register: %s\n", - dev_name(&wq->conf_dev)); - rc = device_register(&wq->conf_dev); - if (rc < 0) { - put_device(&wq->conf_dev); + wq = idxd->wqs[i]; + rc = device_add(wq_confdev(wq)); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_wq *wq = &idxd->wqs[i]; + j = i - 1; + for (; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + put_device(wq_confdev(wq)); + } - device_unregister(&wq->conf_dev); + while (j--) { + wq = idxd->wqs[j]; + device_unregister(wq_confdev(wq)); } return rc; } -static int idxd_setup_device_sysfs(struct idxd_device *idxd) +int idxd_register_devices(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc; - char devname[IDXD_NAME_SIZE]; + int rc, i; - sprintf(devname, "%s%d", idxd_get_dev_name(idxd), idxd->id); - idxd->conf_dev.parent = dev; - dev_set_name(&idxd->conf_dev, "%s", devname); - idxd->conf_dev.bus = idxd_get_bus_type(idxd); - idxd->conf_dev.groups = idxd_attribute_groups; - idxd->conf_dev.type = idxd_get_device_type(idxd); - - dev_dbg(dev, "IDXD device register: %s\n", dev_name(&idxd->conf_dev)); - rc = device_register(&idxd->conf_dev); - if (rc < 0) { - put_device(&idxd->conf_dev); + rc = device_add(idxd_confdev(idxd)); + if (rc < 0) return rc; - } - return 0; -} - -int idxd_setup_sysfs(struct idxd_device *idxd) -{ - struct device *dev = &idxd->pdev->dev; - int rc; - - rc = idxd_setup_device_sysfs(idxd); + rc = idxd_register_wq_devices(idxd); if (rc < 0) { - dev_dbg(dev, "Device sysfs registering failed: %d\n", rc); - return rc; + dev_dbg(dev, "WQ devices registering failed: %d\n", rc); + goto err_wq; } - rc = idxd_setup_wq_sysfs(idxd); + rc = idxd_register_engine_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Work Queue sysfs registering failed: %d\n", rc); - return rc; + dev_dbg(dev, "Engine devices registering failed: %d\n", rc); + goto err_engine; } - rc = idxd_setup_group_sysfs(idxd); + rc = idxd_register_group_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Group sysfs registering failed: %d\n", rc); - return rc; - } - - rc = idxd_setup_engine_sysfs(idxd); - if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc); - return rc; + dev_dbg(dev, "Group device registering failed: %d\n", rc); + goto err_group; } return 0; + + err_group: + for (i = 0; i < idxd->max_engines; i++) + device_unregister(engine_confdev(idxd->engines[i])); + err_engine: + for (i = 0; i < idxd->max_wqs; i++) + device_unregister(wq_confdev(idxd->wqs[i])); + err_wq: + device_del(idxd_confdev(idxd)); + return rc; } -void idxd_cleanup_sysfs(struct idxd_device *idxd) +void idxd_unregister_devices(struct idxd_device *idxd) { int i; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; - device_unregister(&wq->conf_dev); + device_unregister(wq_confdev(wq)); } for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; + struct idxd_engine *engine = idxd->engines[i]; - device_unregister(&engine->conf_dev); + device_unregister(engine_confdev(engine)); } for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; - device_unregister(&group->conf_dev); + device_unregister(group_confdev(group)); } - - device_unregister(&idxd->conf_dev); } int idxd_register_bus_type(void) { - int i, rc; - - for (i = 0; i < IDXD_TYPE_MAX; i++) { - rc = bus_register(idxd_bus_types[i]); - if (rc < 0) - goto bus_err; - } - - return 0; - -bus_err: - while (--i >= 0) - bus_unregister(idxd_bus_types[i]); - return rc; + return bus_register(&dsa_bus_type); } void idxd_unregister_bus_type(void) { - int i; - - for (i = 0; i < IDXD_TYPE_MAX; i++) - bus_unregister(idxd_bus_types[i]); + bus_unregister(&dsa_bus_type); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 295c29108a2cb2f75288b9452825711e73539d30..a54fa0f7006fdf39ecc8ccf8de93d49793bce6de 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -168,6 +168,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_RAPL_ONLINE, CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, + CPUHP_AP_PERF_X86_IDXD_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750e9ca474306cadbf637b2975f9594..c750eac09fc9c446d6abcc60bde63253818e11c8 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ @@ -9,6 +9,30 @@ #include #endif +/* Driver command error status */ +enum idxd_scmd_stat { + IDXD_SCMD_DEV_ENABLED = 0x80000010, + IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020, + IDXD_SCMD_WQ_ENABLED = 0x80000021, + IDXD_SCMD_DEV_DMA_ERR = 0x80020000, + IDXD_SCMD_WQ_NO_GRP = 0x80030000, + IDXD_SCMD_WQ_NO_NAME = 0x80040000, + IDXD_SCMD_WQ_NO_SVM = 0x80050000, + IDXD_SCMD_WQ_NO_THRESH = 0x80060000, + IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000, + IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000, + IDXD_SCMD_PERCPU_ERR = 0x80090000, + IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000, + IDXD_SCMD_CDEV_ERR = 0x800b0000, + IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, + IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, + IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, + IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, +}; + +#define IDXD_SCMD_SOFTERR_MASK 0x80000000 +#define IDXD_SCMD_SOFTERR_SHIFT 16 + /* Descriptor flags */ #define IDXD_OP_FLAG_FENCE 0x0001 #define IDXD_OP_FLAG_BOF 0x0002