diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index f253a27e1a1bcadf921c02559fc5902b260ce491..e09b9c13b7458c2007c74853e7d9f7af50ba77cb 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -471,14 +471,13 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
/* Unmap the whole notifier's range */
static void smmu_unmap_notifier_range(IOMMUNotifier *n)
{
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
event.type = IOMMU_NOTIFIER_UNMAP;
event.entry.target_as = &address_space_memory;
event.entry.iova = n->start;
event.entry.perm = IOMMU_NONE;
event.entry.addr_mask = n->end - n->start;
- event.entry.granularity = IOMMU_INV_GRAN_DOMAIN;
memory_region_notify_iommu_one(n, &event);
}
diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h
index 5ef8c598c61f242108b8d2c1fc55d4c1bbc12223..2d75b31953149227345d62809822b2670a99a880 100644
--- a/hw/arm/smmu-internal.h
+++ b/hw/arm/smmu-internal.h
@@ -105,7 +105,6 @@ typedef struct SMMUIOTLBPageInvInfo {
} SMMUIOTLBPageInvInfo;
typedef struct SMMUSIDRange {
- SMMUState *state;
uint32_t start;
uint32_t end;
} SMMUSIDRange;
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 2bd9f220558ba219785ee0c46f6bf43bfeeb2cf4..3b43368be0fac2bc50137fa340ba0f21d6f3bd1c 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -16,10 +16,6 @@
* with this program; if not, see .
*/
-#ifdef __linux__
-#include "linux/iommu.h"
-#endif
-
#include "qemu/osdep.h"
#include "qemu/bitops.h"
#include "hw/irq.h"
@@ -366,7 +362,6 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
"SMMUv3 S1 stalling fault model not allowed yet\n");
goto bad_ste;
}
- cfg->s1ctxptr = STE_CTXPTR(ste);
return 0;
bad_ste:
@@ -808,10 +803,10 @@ epilogue:
static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
IOMMUNotifier *n,
int asid, dma_addr_t iova,
- uint8_t tg, uint64_t num_pages, bool leaf)
+ uint8_t tg, uint64_t num_pages)
{
SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
uint8_t granule;
if (!tg) {
@@ -841,41 +836,13 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
event.entry.iova = iova;
event.entry.addr_mask = num_pages * (1 << granule) - 1;
event.entry.perm = IOMMU_NONE;
- event.entry.flags = IOMMU_INV_FLAGS_ARCHID;
- event.entry.arch_id = asid;
- event.entry.leaf = leaf;
-
- memory_region_notify_iommu_one(n, &event);
-}
-
-/**
- * smmuv3_notify_asid - call the notifier @n for a given asid
- *
- * @mr: IOMMU mr region handle
- * @n: notifier to be called
- * @asid: address space ID or negative value if we don't care
- */
-static void smmuv3_notify_asid(IOMMUMemoryRegion *mr,
- IOMMUNotifier *n, int asid)
-{
- IOMMUTLBEvent event = {};
-
- event.type = IOMMU_NOTIFIER_UNMAP;
- event.entry.target_as = &address_space_memory;
- event.entry.perm = IOMMU_NONE;
- event.entry.granularity = IOMMU_INV_GRAN_PASID;
- event.entry.flags = IOMMU_INV_FLAGS_ARCHID;
- event.entry.arch_id = asid;
- event.entry.iova = n->start;
- event.entry.addr_mask = n->end - n->start;
memory_region_notify_iommu_one(n, &event);
}
-
/* invalidate an asid/iova range tuple in all mr's */
static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova,
- uint8_t tg, uint64_t num_pages, bool leaf)
+ uint8_t tg, uint64_t num_pages)
{
SMMUDevice *sdev;
@@ -887,7 +854,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova,
tg, num_pages);
IOMMU_NOTIFIER_FOREACH(n, mr) {
- smmuv3_notify_iova(mr, n, asid, iova, tg, num_pages, leaf);
+ smmuv3_notify_iova(mr, n, asid, iova, tg, num_pages);
}
}
}
@@ -912,7 +879,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
if (!tg) {
trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
- smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1, leaf);
+ smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1);
smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
return;
}
@@ -930,71 +897,12 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
num_pages = (mask + 1) >> granule;
trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
- smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages, leaf);
+ smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
addr += mask + 1;
}
}
-static int smmuv3_notify_config_change(SMMUState *bs, uint32_t sid)
-{
-#ifdef __linux__
- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid);
- SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
- .inval_ste_allowed = true};
- IOMMUConfig iommu_config = {};
- SMMUTransCfg *cfg;
- SMMUDevice *sdev;
- int ret;
-
- if (!mr) {
- return 0;
- }
-
- sdev = container_of(mr, SMMUDevice, iommu);
-
- /* flush QEMU config cache */
- smmuv3_flush_config(sdev);
-
- if (!pci_device_is_pasid_ops_set(sdev->bus, sdev->devfn)) {
- return 0;
- }
-
- cfg = smmuv3_get_config(sdev, &event);
-
- if (!cfg) {
- return 0;
- }
-
- iommu_config.pasid_cfg.argsz = sizeof(struct iommu_pasid_table_config);
- iommu_config.pasid_cfg.version = PASID_TABLE_CFG_VERSION_1;
- iommu_config.pasid_cfg.format = IOMMU_PASID_FORMAT_SMMUV3;
- iommu_config.pasid_cfg.base_ptr = cfg->s1ctxptr;
- iommu_config.pasid_cfg.pasid_bits = 0;
- iommu_config.pasid_cfg.vendor_data.smmuv3.version = PASID_TABLE_SMMUV3_CFG_VERSION_1;
-
- if (cfg->disabled || cfg->bypassed) {
- iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_BYPASS;
- } else if (cfg->aborted) {
- iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_ABORT;
- } else {
- iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_TRANSLATE;
- }
-
- trace_smmuv3_notify_config_change(mr->parent_obj.name,
- iommu_config.pasid_cfg.config,
- iommu_config.pasid_cfg.base_ptr);
-
- ret = pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config);
- if (ret) {
- error_report("Failed to pass PASID table to host for iommu mr %s (%m)",
- mr->parent_obj.name);
- }
-
- return ret;
-#endif
-}
-
static gboolean
smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
{
@@ -1005,27 +913,10 @@ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
if (sid < sid_range->start || sid > sid_range->end) {
return false;
}
- smmuv3_notify_config_change(sid_range->state, sid);
trace_smmuv3_config_cache_inv(sid);
return true;
}
-static void smmuv3_s1_asid_inval(SMMUState *s, uint16_t asid)
-{
- SMMUDevice *sdev;
-
- trace_smmuv3_s1_asid_inval(asid);
- QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) {
- IOMMUMemoryRegion *mr = &sdev->iommu;
- IOMMUNotifier *n;
-
- IOMMU_NOTIFIER_FOREACH(n, mr) {
- smmuv3_notify_asid(mr, n, asid);
- }
- }
- smmu_iotlb_inv_asid(s, asid);
-}
-
static int smmuv3_cmdq_consume(SMMUv3State *s)
{
SMMUState *bs = ARM_SMMU(s);
@@ -1076,14 +967,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
case SMMU_CMD_CFGI_STE:
{
uint32_t sid = CMD_SID(&cmd);
+ IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid);
+ SMMUDevice *sdev;
if (CMD_SSEC(&cmd)) {
cmd_error = SMMU_CERROR_ILL;
break;
}
+ if (!mr) {
+ break;
+ }
+
trace_smmuv3_cmdq_cfgi_ste(sid);
- smmuv3_notify_config_change(bs, sid);
+ sdev = container_of(mr, SMMUDevice, iommu);
+ smmuv3_flush_config(sdev);
+
break;
}
case SMMU_CMD_CFGI_STE_RANGE: /* same as SMMU_CMD_CFGI_ALL */
@@ -1098,7 +997,6 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
}
mask = (1ULL << (range + 1)) - 1;
- sid_range.state = bs;
sid_range.start = sid & ~mask;
sid_range.end = sid_range.start + mask;
@@ -1133,7 +1031,8 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
uint16_t asid = CMD_ASID(&cmd);
trace_smmuv3_cmdq_tlbi_nh_asid(asid);
- smmuv3_s1_asid_inval(bs, asid);
+ smmu_inv_notifiers_all(&s->smmu_state);
+ smmu_iotlb_inv_asid(bs, asid);
break;
}
case SMMU_CMD_TLBI_NH_ALL:
@@ -1561,24 +1460,6 @@ static void smmu_realize(DeviceState *d, Error **errp)
smmu_init_irq(s, dev);
}
-static int smmuv3_post_load(void *opaque, int version_id)
-{
- SMMUv3State *s3 = opaque;
- SMMUState *s = &(s3->smmu_state);
- SMMUDevice *sdev;
- int ret = 0;
-
- QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) {
- uint32_t sid = smmu_get_sid(sdev);
- ret = smmuv3_notify_config_change(s, sid);
- if (ret) {
- break;
- }
- }
-
- return ret;
-}
-
static const VMStateDescription vmstate_smmuv3_queue = {
.name = "smmuv3_queue",
.version_id = 1,
@@ -1597,7 +1478,6 @@ static const VMStateDescription vmstate_smmuv3 = {
.version_id = 1,
.minimum_version_id = 1,
.priority = MIG_PRI_IOMMU,
- .post_load = smmuv3_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT32(features, SMMUv3State),
VMSTATE_UINT8(sid_size, SMMUv3State),
@@ -1655,6 +1535,14 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
return -EINVAL;
}
+ if (new & IOMMU_NOTIFIER_MAP) {
+ error_setg(errp,
+ "device %02x.%02x.%x requires iommu MAP notifier which is "
+ "not currently supported", pci_bus_num(sdev->bus),
+ PCI_SLOT(sdev->devfn), PCI_FUNC(sdev->devfn));
+ return -EINVAL;
+ }
+
if (old == IOMMU_NOTIFIER_NONE) {
trace_smmuv3_notify_flag_add(iommu->parent_obj.name);
QLIST_INSERT_HEAD(&s->devices_with_notifiers, sdev, next);
@@ -1665,90 +1553,6 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
return 0;
}
-static int smmuv3_get_attr(IOMMUMemoryRegion *iommu,
- enum IOMMUMemoryRegionAttr attr,
- void *data)
-{
- if (attr == IOMMU_ATTR_VFIO_NESTED) {
- *(bool *) data = true;
- return 0;
- } else if (attr == IOMMU_ATTR_MSI_TRANSLATE) {
- *(bool *) data = true;
- return 0;
- }
- return -EINVAL;
-}
-
-struct iommu_fault;
-
-static inline int
-smmuv3_inject_faults(IOMMUMemoryRegion *iommu_mr, int count,
- struct iommu_fault *buf)
-{
-#ifdef __linux__
- SMMUDevice *sdev = container_of(iommu_mr, SMMUDevice, iommu);
- SMMUv3State *s3 = sdev->smmu;
- uint32_t sid = smmu_get_sid(sdev);
- int i;
-
- for (i = 0; i < count; i++) {
- SMMUEventInfo info = {};
- struct iommu_fault_unrecoverable *record;
-
- if (buf[i].type != IOMMU_FAULT_DMA_UNRECOV) {
- continue;
- }
-
- info.sid = sid;
- record = &buf[i].event;
-
- switch (record->reason) {
- case IOMMU_FAULT_REASON_PASID_INVALID:
- info.type = SMMU_EVT_C_BAD_SUBSTREAMID;
- /* TODO further fill info.u.c_bad_substream */
- break;
- case IOMMU_FAULT_REASON_PASID_FETCH:
- info.type = SMMU_EVT_F_CD_FETCH;
- break;
- case IOMMU_FAULT_REASON_BAD_PASID_ENTRY:
- info.type = SMMU_EVT_C_BAD_CD;
- /* TODO further fill info.u.c_bad_cd */
- break;
- case IOMMU_FAULT_REASON_WALK_EABT:
- info.type = SMMU_EVT_F_WALK_EABT;
- info.u.f_walk_eabt.addr = record->addr;
- info.u.f_walk_eabt.addr2 = record->fetch_addr;
- break;
- case IOMMU_FAULT_REASON_PTE_FETCH:
- info.type = SMMU_EVT_F_TRANSLATION;
- info.u.f_translation.addr = record->addr;
- break;
- case IOMMU_FAULT_REASON_OOR_ADDRESS:
- info.type = SMMU_EVT_F_ADDR_SIZE;
- info.u.f_addr_size.addr = record->addr;
- break;
- case IOMMU_FAULT_REASON_ACCESS:
- info.type = SMMU_EVT_F_ACCESS;
- info.u.f_access.addr = record->addr;
- break;
- case IOMMU_FAULT_REASON_PERMISSION:
- info.type = SMMU_EVT_F_PERMISSION;
- info.u.f_permission.addr = record->addr;
- break;
- default:
- warn_report("%s Unexpected fault reason received from host: %d",
- __func__, record->reason);
- continue;
- }
-
- smmuv3_record_event(s3, &info);
- }
- return 0;
-#else
- return -1;
-#endif
-}
-
static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
void *data)
{
@@ -1756,8 +1560,6 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
imrc->translate = smmuv3_translate;
imrc->notify_flag_changed = smmuv3_notify_flag_changed;
- imrc->get_attr = smmuv3_get_attr;
- imrc->inject_faults = smmuv3_inject_faults;
}
static const TypeInfo smmuv3_type_info = {
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index d9851d663e1bc2d2aa6073038891be87b69cb135..2dee296c8fbca1a8d95b21f30df470c47986489e 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -46,12 +46,10 @@ smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x"
smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d"
-smmuv3_s1_asid_inval(int asid) "asid=%d"
smmuv3_cmdq_tlbi_nh(void) ""
smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d"
smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
-smmuv3_notify_config_change(const char *name, uint8_t config, uint64_t s1ctxptr) "iommu mr=%s config=%d s1ctxptr=0x%"PRIx64
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6501d93f7e48c4272cbe7681cdeda2e2f1b5c880..5b865ac08c0797b981fcb761807337747ed1bc3a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1197,7 +1197,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
uint32_t offset;
uint64_t slpte;
uint64_t subpage_size, subpage_mask;
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
uint64_t iova = start;
uint64_t iova_next;
int ret = 0;
@@ -2431,7 +2431,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
VTDAddressSpace *vtd_dev_as;
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
struct VTDBus *vtd_bus;
hwaddr addr;
uint64_t sz;
@@ -3487,7 +3487,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
size = remain = end - start + 1;
while (remain >= VTD_PAGE_SIZE) {
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
uint64_t mask = dma_aligned_pow2_mask(start, end, s->aw_bits);
uint64_t size = mask + 1;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 0743dc7c42be9c7ffa9aa6fc550576803af83667..40e2516d99c5a6ae00809df5f8a0df1210719d57 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2763,56 +2763,6 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
bus->iommu_opaque = opaque;
}
-void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
-{
- assert(ops && !dev->pasid_ops);
- dev->pasid_ops = ops;
-}
-
-bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn)
-{
- PCIDevice *dev;
-
- if (!bus) {
- return false;
- }
-
- dev = bus->devices[devfn];
- return !!(dev && dev->pasid_ops);
-}
-
-int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn,
- IOMMUConfig *config)
-{
- PCIDevice *dev;
-
- if (!bus) {
- return -EINVAL;
- }
-
- dev = bus->devices[devfn];
- if (dev && dev->pasid_ops && dev->pasid_ops->set_pasid_table) {
- return dev->pasid_ops->set_pasid_table(bus, devfn, config);
- }
- return -ENOENT;
-}
-
-int pci_device_return_page_response(PCIBus *bus, int32_t devfn,
- IOMMUPageResponse *resp)
-{
- PCIDevice *dev;
-
- if (!bus) {
- return -EINVAL;
- }
-
- dev = bus->devices[devfn];
- if (dev && dev->pasid_ops && dev->pasid_ops->return_page_response) {
- return dev->pasid_ops->return_page_response(bus, devfn, resp);
- }
- return -ENOENT;
-}
-
static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
{
Range *range = opaque;
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 454df25d441094029ea33181d48ef1d69939d00f..db010718589b0b384efa7049b86f0a61de162211 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -449,7 +449,7 @@ static void spapr_tce_reset(DeviceState *dev)
static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
target_ulong tce)
{
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 4d45c2b6257faa53c1374486cf94f82189280ff5..6cb91e7ffd264434e7b696ce35558e9d27efcf8c 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -707,132 +707,6 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
return true;
}
-/* Propagate a guest IOTLB invalidation to the host (nested mode) */
-static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
-{
- VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
- struct vfio_iommu_type1_cache_invalidate ustruct = {};
- VFIOContainer *container = giommu->container;
- int ret;
-
- assert(iotlb->perm == IOMMU_NONE);
-
- ustruct.argsz = sizeof(ustruct);
- ustruct.flags = 0;
- ustruct.info.argsz = sizeof(struct iommu_cache_invalidate_info);
- ustruct.info.version = IOMMU_CACHE_INVALIDATE_INFO_VERSION_1;
- ustruct.info.cache = IOMMU_CACHE_INV_TYPE_IOTLB;
-
- switch (iotlb->granularity) {
- case IOMMU_INV_GRAN_DOMAIN:
- ustruct.info.granularity = IOMMU_INV_GRANU_DOMAIN;
- break;
- case IOMMU_INV_GRAN_PASID:
- {
- struct iommu_inv_pasid_info *pasid_info;
- int archid = -1;
-
- pasid_info = &ustruct.info.granu.pasid_info;
- ustruct.info.granularity = IOMMU_INV_GRANU_PASID;
- if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) {
- pasid_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID;
- archid = iotlb->arch_id;
- }
- pasid_info->archid = archid;
- trace_vfio_iommu_asid_inv_iotlb(archid);
- break;
- }
- case IOMMU_INV_GRAN_ADDR:
- {
- hwaddr start = iotlb->iova + giommu->iommu_offset;
- struct iommu_inv_addr_info *addr_info;
- size_t size = iotlb->addr_mask + 1;
- int archid = -1;
-
- addr_info = &ustruct.info.granu.addr_info;
- ustruct.info.granularity = IOMMU_INV_GRANU_ADDR;
- if (iotlb->leaf) {
- addr_info->flags |= IOMMU_INV_ADDR_FLAGS_LEAF;
- }
- if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) {
- addr_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID;
- archid = iotlb->arch_id;
- }
- addr_info->archid = archid;
- addr_info->addr = start;
- addr_info->granule_size = size;
- addr_info->nb_granules = 1;
- trace_vfio_iommu_addr_inv_iotlb(archid, start, size,
- 1, iotlb->leaf);
- break;
- }
- }
-
- ret = ioctl(container->fd, VFIO_IOMMU_CACHE_INVALIDATE, &ustruct);
- if (ret) {
- error_report("%p: failed to invalidate CACHE (%d)", container, ret);
- }
-}
-
-int vfio_iommu_set_msi_binding(VFIOContainer *container, int n,
- IOMMUTLBEntry *iotlb)
-{
- struct vfio_iommu_type1_set_msi_binding ustruct;
- VFIOMSIBinding *binding;
- int ret;
-
- QLIST_FOREACH(binding, &container->msibinding_list, next) {
- if (binding->index == n) {
- return 0;
- }
- }
-
- ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding);
- ustruct.iova = iotlb->iova;
- ustruct.flags = VFIO_IOMMU_BIND_MSI;
- ustruct.gpa = iotlb->translated_addr;
- ustruct.size = iotlb->addr_mask + 1;
- ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct);
- if (ret) {
- error_report("%s: failed to register the stage1 MSI binding (%m)",
- __func__);
- return ret;
- }
- binding = g_new0(VFIOMSIBinding, 1);
- binding->iova = ustruct.iova;
- binding->gpa = ustruct.gpa;
- binding->size = ustruct.size;
- binding->index = n;
-
- QLIST_INSERT_HEAD(&container->msibinding_list, binding, next);
- return 0;
-}
-
-int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n)
-{
- struct vfio_iommu_type1_set_msi_binding ustruct;
- VFIOMSIBinding *binding, *tmp;
- int ret;
-
- ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding);
- QLIST_FOREACH_SAFE(binding, &container->msibinding_list, next, tmp) {
- if (binding->index != n) {
- continue;
- }
- ustruct.flags = VFIO_IOMMU_UNBIND_MSI;
- ustruct.iova = binding->iova;
- ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct);
- if (ret) {
- error_report("Failed to unregister the stage1 MSI binding "
- "for iova=0x%"PRIx64" (%m)", binding->iova);
- }
- QLIST_REMOVE(binding, next);
- g_free(binding);
- return ret;
- }
- return 0;
-}
-
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
{
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
@@ -1035,174 +909,16 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
g_free(vrdl);
}
-static VFIOHostDMAWindow *
-hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end)
-{
- VFIOHostDMAWindow *hostwin;
-
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
- return hostwin;
- }
- }
- return NULL;
-}
-
-static int vfio_dma_map_ram_section(VFIOContainer *container,
- MemoryRegionSection *section, Error **err)
-{
- VFIOHostDMAWindow *hostwin;
- Int128 llend, llsize;
- hwaddr iova, end;
- void *vaddr;
- int ret;
-
- assert(memory_region_is_ram(section->mr));
-
- iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
- llend = int128_make64(section->offset_within_address_space);
- llend = int128_add(llend, section->size);
- llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
- end = int128_get64(int128_sub(llend, int128_one()));
-
- vaddr = memory_region_get_ram_ptr(section->mr) +
- section->offset_within_region +
- (iova - section->offset_within_address_space);
-
- hostwin = hostwin_from_range(container, iova, end);
- if (!hostwin) {
- error_setg(err, "Container %p can't map guest IOVA region"
- " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
- return -EFAULT;
- }
-
- trace_vfio_dma_map_ram(iova, end, vaddr);
-
- llsize = int128_sub(llend, int128_make64(iova));
-
- if (memory_region_is_ram_device(section->mr)) {
- hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
-
- if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
- trace_vfio_listener_region_add_no_dma_map(
- memory_region_name(section->mr),
- section->offset_within_address_space,
- int128_getlo(section->size),
- pgmask + 1);
- return 0;
- }
- }
-
- ret = vfio_dma_map(container, iova, int128_get64(llsize),
- vaddr, section->readonly);
- if (ret) {
- error_setg(err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
- container, iova, int128_get64(llsize), vaddr, ret);
- if (memory_region_is_ram_device(section->mr)) {
- /* Allow unexpected mappings not to be fatal for RAM devices */
- error_report_err(*err);
- return 0;
- }
- return ret;
- }
- return 0;
-}
-
-static void vfio_dma_unmap_ram_section(VFIOContainer *container,
- MemoryRegionSection *section)
-{
- Int128 llend, llsize;
- hwaddr iova, end;
- bool try_unmap = true;
- int ret;
-
- iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
- llend = int128_make64(section->offset_within_address_space);
- llend = int128_add(llend, section->size);
- llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
-
- if (int128_ge(int128_make64(iova), llend)) {
- return;
- }
- end = int128_get64(int128_sub(llend, int128_one()));
-
- llsize = int128_sub(llend, int128_make64(iova));
-
- trace_vfio_dma_unmap_ram(iova, end);
-
- if (memory_region_is_ram_device(section->mr)) {
- hwaddr pgmask;
- VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
-
- assert(hostwin); /* or region_add() would have failed */
-
- pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
- try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
- } else if (memory_region_has_ram_discard_manager(section->mr)) {
- vfio_unregister_ram_discard_listener(container, section);
- /* Unregistering will trigger an unmap. */
- try_unmap = false;
- }
-
- if (try_unmap) {
- if (int128_eq(llsize, int128_2_64())) {
- /* The unmap ioctl doesn't accept a full 64-bit span. */
- llsize = int128_rshift(llsize, 1);
- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
- if (ret) {
- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%m)",
- container, iova, int128_get64(llsize), ret);
- }
- iova += int128_get64(llsize);
- }
- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
- if (ret) {
- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%m)",
- container, iova, int128_get64(llsize), ret);
- }
- }
-}
-
-static void vfio_prereg_listener_region_add(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- VFIOContainer *container =
- container_of(listener, VFIOContainer, prereg_listener);
- Error *err = NULL;
-
- if (!memory_region_is_ram(section->mr)) {
- return;
- }
-
- vfio_dma_map_ram_section(container, section, &err);
- if (err) {
- error_report_err(err);
- }
-}
-static void vfio_prereg_listener_region_del(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- VFIOContainer *container =
- container_of(listener, VFIOContainer, prereg_listener);
-
- if (!memory_region_is_ram(section->mr)) {
- return;
- }
-
- vfio_dma_unmap_ram_section(container, section);
-}
-
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
hwaddr iova, end;
- Int128 llend;
+ Int128 llend, llsize;
+ void *vaddr;
int ret;
VFIOHostDMAWindow *hostwin;
+ bool hostwin_found;
Error *err = NULL;
if (vfio_listener_skipped_section(section)) {
@@ -1295,8 +1011,15 @@ static void vfio_listener_region_add(MemoryListener *listener,
#endif
}
- hostwin = hostwin_from_range(container, iova, end);
- if (!hostwin) {
+ hostwin_found = false;
+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+ if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+ hostwin_found = true;
+ break;
+ }
+ }
+
+ if (!hostwin_found) {
error_setg(&err, "Container %p can't map guest IOVA region"
" 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
goto fail;
@@ -1305,10 +1028,9 @@ static void vfio_listener_region_add(MemoryListener *listener,
memory_region_ref(section->mr);
if (memory_region_is_iommu(section->mr)) {
- IOMMUNotify notify;
VFIOGuestIOMMU *giommu;
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
- int iommu_idx, flags;
+ int iommu_idx;
trace_vfio_listener_region_add_iommu(iova, end);
/*
@@ -1327,18 +1049,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
llend = int128_sub(llend, int128_one());
iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
MEMTXATTRS_UNSPECIFIED);
-
- if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
- /* IOTLB unmap notifier to propagate guest IOTLB invalidations */
- flags = IOMMU_NOTIFIER_UNMAP;
- notify = vfio_iommu_unmap_notify;
- } else {
- /* MAP/UNMAP IOTLB notifier */
- flags = IOMMU_NOTIFIER_IOTLB_EVENTS;
- notify = vfio_iommu_map_notify;
- }
-
- iommu_notifier_init(&giommu->n, notify, flags,
+ iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
+ IOMMU_NOTIFIER_IOTLB_EVENTS,
section->offset_within_region,
int128_get64(llend),
iommu_idx);
@@ -1358,9 +1070,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
goto fail;
}
QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
- if (flags & IOMMU_NOTIFIER_MAP) {
- memory_region_iommu_replay(giommu->iommu, &giommu->n);
- }
+ memory_region_iommu_replay(giommu->iommu, &giommu->n);
return;
}
@@ -1377,7 +1087,38 @@ static void vfio_listener_region_add(MemoryListener *listener,
return;
}
- if (vfio_dma_map_ram_section(container, section, &err)) {
+ vaddr = memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region +
+ (iova - section->offset_within_address_space);
+
+ trace_vfio_listener_region_add_ram(iova, end, vaddr);
+
+ llsize = int128_sub(llend, int128_make64(iova));
+
+ if (memory_region_is_ram_device(section->mr)) {
+ hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+
+ if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
+ trace_vfio_listener_region_add_no_dma_map(
+ memory_region_name(section->mr),
+ section->offset_within_address_space,
+ int128_getlo(section->size),
+ pgmask + 1);
+ return;
+ }
+ }
+
+ ret = vfio_dma_map(container, iova, int128_get64(llsize),
+ vaddr, section->readonly);
+ if (ret) {
+ error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx", %p) = %d (%m)",
+ container, iova, int128_get64(llsize), vaddr, ret);
+ if (memory_region_is_ram_device(section->mr)) {
+ /* Allow unexpected mappings not to be fatal for RAM devices */
+ error_report_err(err);
+ return;
+ }
goto fail;
}
@@ -1411,8 +1152,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
- hwaddr iova;
- Int128 llend;
+ hwaddr iova, end;
+ Int128 llend, llsize;
+ int ret;
+ bool try_unmap = true;
if (vfio_listener_skipped_section(section)) {
trace_vfio_listener_region_del_skip(
@@ -1443,16 +1186,6 @@ static void vfio_listener_region_del(MemoryListener *listener,
}
}
- /*
- * In nested mode, stage 2 (gpa->hpa) and the stage 1
- * (giova->gpa) are set separately. The ram section
- * will be unmapped in vfio_prereg_listener_region_del().
- * Hence it doesn't need to unmap ram section here.
- */
- if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
- return;
- }
-
/*
* FIXME: We assume the one big unmap below is adequate to
* remove any individual page mappings in the IOMMU which
@@ -1466,11 +1199,56 @@ static void vfio_listener_region_del(MemoryListener *listener,
llend = int128_make64(section->offset_within_address_space);
llend = int128_add(llend, section->size);
llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
+
if (int128_ge(int128_make64(iova), llend)) {
return;
}
+ end = int128_get64(int128_sub(llend, int128_one()));
- vfio_dma_unmap_ram_section(container, section);
+ llsize = int128_sub(llend, int128_make64(iova));
+
+ trace_vfio_listener_region_del(iova, end);
+
+ if (memory_region_is_ram_device(section->mr)) {
+ hwaddr pgmask;
+ VFIOHostDMAWindow *hostwin;
+ bool hostwin_found = false;
+
+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+ if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+ hostwin_found = true;
+ break;
+ }
+ }
+ assert(hostwin_found); /* or region_add() would have failed */
+
+ pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+ try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+ } else if (memory_region_has_ram_discard_manager(section->mr)) {
+ vfio_unregister_ram_discard_listener(container, section);
+ /* Unregistering will trigger an unmap. */
+ try_unmap = false;
+ }
+
+ if (try_unmap) {
+ if (int128_eq(llsize, int128_2_64())) {
+ /* The unmap ioctl doesn't accept a full 64-bit span. */
+ llsize = int128_rshift(llsize, 1);
+ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+ if (ret) {
+ error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx") = %d (%m)",
+ container, iova, int128_get64(llsize), ret);
+ }
+ iova += int128_get64(llsize);
+ }
+ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+ if (ret) {
+ error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx") = %d (%m)",
+ container, iova, int128_get64(llsize), ret);
+ }
+ }
memory_region_unref(section->mr);
@@ -1511,17 +1289,6 @@ static void vfio_listener_log_global_start(MemoryListener *listener)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
- /* For nested mode, vfio_prereg_listener is used to start dirty tracking */
- if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) {
- vfio_set_dirty_page_tracking(container, true);
- }
-}
-
-static void vfio_prereg_listener_log_global_start(MemoryListener *listener)
-{
- VFIOContainer *container =
- container_of(listener, VFIOContainer, prereg_listener);
-
vfio_set_dirty_page_tracking(container, true);
}
@@ -1529,17 +1296,6 @@ static void vfio_listener_log_global_stop(MemoryListener *listener)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
- /* For nested mode, vfio_prereg_listener is used to stop dirty tracking */
- if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) {
- vfio_set_dirty_page_tracking(container, false);
- }
-}
-
-static void vfio_prereg_listener_log_global_stop(MemoryListener *listener)
-{
- VFIOContainer *container =
- container_of(listener, VFIOContainer, prereg_listener);
-
vfio_set_dirty_page_tracking(container, false);
}
@@ -1598,35 +1354,6 @@ err_out:
return ret;
}
-static int vfio_dma_sync_ram_section_dirty_bitmap(VFIOContainer *container,
- MemoryRegionSection *section)
-{
- ram_addr_t ram_addr;
-
- ram_addr = memory_region_get_ram_addr(section->mr) +
- section->offset_within_region;
-
- return vfio_get_dirty_bitmap(container,
- REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
- int128_get64(section->size), ram_addr);
-}
-
-static void vfio_prereg_listener_log_sync(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- VFIOContainer *container =
- container_of(listener, VFIOContainer, prereg_listener);
-
- if (!memory_region_is_ram(section->mr) ||
- !container->dirty_pages_supported) {
- return;
- }
-
- if (vfio_devices_all_dirty_tracking(container)) {
- vfio_dma_sync_ram_section_dirty_bitmap(container, section);
- }
-}
-
typedef struct {
IOMMUNotifier n;
VFIOGuestIOMMU *giommu;
@@ -1711,19 +1438,11 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
MemoryRegionSection *section)
{
+ ram_addr_t ram_addr;
+
if (memory_region_is_iommu(section->mr)) {
VFIOGuestIOMMU *giommu;
- /*
- * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are
- * set up separately. It is inappropriate to pass 'giova' to kernel
- * to get dirty pages. We only need to focus on stage 2 mapping when
- * marking dirty pages.
- */
- if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
- return 0;
- }
-
QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
if (MEMORY_REGION(giommu->iommu) == section->mr &&
giommu->n.start == section->offset_within_region) {
@@ -1751,7 +1470,12 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
}
- return vfio_dma_sync_ram_section_dirty_bitmap(container, section);
+ ram_addr = memory_region_get_ram_addr(section->mr) +
+ section->offset_within_region;
+
+ return vfio_get_dirty_bitmap(container,
+ REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
+ int128_get64(section->size), ram_addr);
}
static void vfio_listener_log_sync(MemoryListener *listener,
@@ -1889,43 +1613,6 @@ static int vfio_physical_log_clear(VFIOContainer *container,
return ret;
}
-static void vfio_prereg_listener_log_clear(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- VFIOContainer *container =
- container_of(listener, VFIOContainer, prereg_listener);
-
- if (!memory_region_is_ram(section->mr)) {
- return;
- }
-
- vfio_physical_log_clear(container, section);
-}
-
-static int vfio_clear_dirty_bitmap(VFIOContainer *container,
- MemoryRegionSection *section)
-{
- if (memory_region_is_iommu(section->mr)) {
- /*
- * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are
- * set up separately. It is inappropriate to pass 'giova' to kernel
- * to get dirty pages. We only need to focus on stage 2 mapping when
- * marking dirty pages.
- */
- if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
- return 0;
- }
-
- /*
- * TODO: x86. With the log_clear() interface added, x86 may inplement
- * its own method.
- */
- }
-
- /* Here we assume that memory_region_is_ram(section->mr) == true */
- return vfio_physical_log_clear(container, section);
-}
-
static void vfio_listener_log_clear(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -1937,7 +1624,7 @@ static void vfio_listener_log_clear(MemoryListener *listener,
}
if (vfio_devices_all_dirty_tracking(container)) {
- vfio_clear_dirty_bitmap(container, section);
+ vfio_physical_log_clear(container, section);
}
}
@@ -1951,20 +1638,10 @@ static const MemoryListener vfio_memory_listener = {
.log_clear = vfio_listener_log_clear,
};
-static MemoryListener vfio_memory_prereg_listener = {
- .region_add = vfio_prereg_listener_region_add,
- .region_del = vfio_prereg_listener_region_del,
- .log_global_start = vfio_prereg_listener_log_global_start,
- .log_global_stop = vfio_prereg_listener_log_global_stop,
- .log_sync = vfio_prereg_listener_log_sync,
- .log_clear = vfio_prereg_listener_log_clear,
-};
-
static void vfio_listener_release(VFIOContainer *container)
{
memory_listener_unregister(&container->listener);
- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
- container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
memory_listener_unregister(&container->prereg_listener);
}
}
@@ -2034,25 +1711,6 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
return true;
}
-struct vfio_info_cap_header *
-vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id)
-{
- struct vfio_info_cap_header *hdr;
- void *ptr = info;
-
- if (!(info->flags & VFIO_IRQ_INFO_FLAG_CAPS)) {
- return NULL;
- }
-
- for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
- if (hdr->id == id) {
- return hdr;
- }
- }
-
- return NULL;
-}
-
static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
struct vfio_region_info *info)
{
@@ -2387,38 +2045,27 @@ static void vfio_put_address_space(VFIOAddressSpace *space)
* vfio_get_iommu_type - selects the richest iommu_type (v2 first)
*/
static int vfio_get_iommu_type(VFIOContainer *container,
- bool want_nested,
Error **errp)
{
- int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
- VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+ int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
- int i, ret = -EINVAL;
+ int i;
for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
- if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
- continue;
- }
- ret = iommu_types[i];
- break;
+ return iommu_types[i];
}
}
- if (ret < 0) {
- error_setg(errp, "No available IOMMU models");
- } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
- error_setg(errp, "Nested mode requested but not supported");
- ret = -EINVAL;
- }
- return ret;
+ error_setg(errp, "No available IOMMU models");
+ return -EINVAL;
}
static int vfio_init_container(VFIOContainer *container, int group_fd,
- bool want_nested, Error **errp)
+ Error **errp)
{
int iommu_type, dirty_log_manual_clear, ret;
- iommu_type = vfio_get_iommu_type(container, want_nested, errp);
+ iommu_type = vfio_get_iommu_type(container, errp);
if (iommu_type < 0) {
return iommu_type;
}
@@ -2530,14 +2177,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
VFIOContainer *container;
int ret, fd;
VFIOAddressSpace *space;
- IOMMUMemoryRegion *iommu_mr;
- bool nested = false;
-
- if (memory_region_is_iommu(as->root)) {
- iommu_mr = IOMMU_MEMORY_REGION(as->root);
- memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED,
- (void *)&nested);
- }
space = vfio_get_address_space(as);
@@ -2618,7 +2257,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
QLIST_INIT(&container->vrdl_list);
QLIST_INIT(&container->dma_list);
- ret = vfio_init_container(container, group->fd, nested, errp);
+ ret = vfio_init_container(container, group->fd, errp);
if (ret) {
goto free_container_exit;
}
@@ -2630,7 +2269,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
}
switch (container->iommu_type) {
- case VFIO_TYPE1_NESTING_IOMMU:
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_IOMMU:
{
@@ -2659,20 +2297,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
vfio_get_iommu_info_migration(container, info);
}
g_free(info);
-
- if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
- container->prereg_listener = vfio_memory_prereg_listener;
- memory_listener_register(&container->prereg_listener,
- &address_space_memory);
- if (container->error) {
- memory_listener_unregister(&container->prereg_listener);
- ret = -1;
- error_propagate_prepend(errp, container->error,
- "RAM memory listener initialization failed "
- "for container");
- goto free_container_exit;
- }
- }
break;
}
case VFIO_SPAPR_TCE_v2_IOMMU:
@@ -3021,33 +2645,6 @@ retry:
return 0;
}
-int vfio_get_irq_info(VFIODevice *vbasedev, int index,
- struct vfio_irq_info **info)
-{
- size_t argsz = sizeof(struct vfio_irq_info);
-
- *info = g_malloc0(argsz);
-
- (*info)->index = index;
-retry:
- (*info)->argsz = argsz;
-
- if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, *info)) {
- g_free(*info);
- *info = NULL;
- return -errno;
- }
-
- if ((*info)->argsz > argsz) {
- argsz = (*info)->argsz;
- *info = g_realloc(*info, argsz);
-
- goto retry;
- }
-
- return 0;
-}
-
int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
uint32_t subtype, struct vfio_region_info **info)
{
@@ -3083,42 +2680,6 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
return -ENODEV;
}
-int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type,
- uint32_t subtype, struct vfio_irq_info **info)
-{
- int i;
-
- for (i = 0; i < vbasedev->num_irqs; i++) {
- struct vfio_info_cap_header *hdr;
- struct vfio_irq_info_cap_type *cap_type;
-
- if (vfio_get_irq_info(vbasedev, i, info)) {
- continue;
- }
-
- hdr = vfio_get_irq_info_cap(*info, VFIO_IRQ_INFO_CAP_TYPE);
- if (!hdr) {
- g_free(*info);
- continue;
- }
-
- cap_type = container_of(hdr, struct vfio_irq_info_cap_type, header);
-
- trace_vfio_get_dev_irq(vbasedev->name, i,
- cap_type->type, cap_type->subtype);
-
- if (cap_type->type == type && cap_type->subtype == subtype) {
- return 0;
- }
-
- g_free(*info);
- }
-
- *info = NULL;
- return -ENODEV;
-}
-
-
bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
{
struct vfio_region_info *info = NULL;
@@ -3134,21 +2695,6 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
return ret;
}
-bool vfio_has_irq_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
-{
- struct vfio_region_info *info = NULL;
- bool ret = false;
-
- if (!vfio_get_region_info(vbasedev, region, &info)) {
- if (vfio_get_region_info_cap(info, cap_type)) {
- ret = true;
- }
- g_free(info);
- }
-
- return ret;
-}
-
/*
* Interfaces for IBM EEH (Enhanced Error Handling)
*/
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8e24f9c7d1bd576e6aa5db575dafd7c6edddd2f9..7b45353ce27ffb17d2ef4bcd7bdea18535f06d19 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -365,65 +365,6 @@ static void vfio_msi_interrupt(void *opaque)
notify(&vdev->pdev, nr);
}
-static bool vfio_iommu_require_msi_binding(IOMMUMemoryRegion *iommu_mr)
-{
- bool msi_translate = false, nested = false;
-
- memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_MSI_TRANSLATE,
- (void *)&msi_translate);
- memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED,
- (void *)&nested);
- if (!nested || !msi_translate) {
- return false;
- }
- return true;
-}
-
-static int vfio_register_msi_binding(VFIOPCIDevice *vdev,
- int vector_n, bool set)
-{
- VFIOContainer *container = vdev->vbasedev.group->container;
- PCIDevice *dev = &vdev->pdev;
- AddressSpace *as = pci_device_iommu_address_space(dev);
- IOMMUMemoryRegionClass *imrc;
- IOMMUMemoryRegion *iommu_mr;
- IOMMUTLBEntry entry;
- MSIMessage msg;
-
- if (as == &address_space_memory) {
- return 0;
- }
-
- iommu_mr = IOMMU_MEMORY_REGION(as->root);
- if (!vfio_iommu_require_msi_binding(iommu_mr)) {
- return 0;
- }
-
- /* MSI doorbell address is translated by an IOMMU */
-
- if (!set) { /* unregister */
- trace_vfio_unregister_msi_binding(vdev->vbasedev.name, vector_n);
-
- return vfio_iommu_unset_msi_binding(container, vector_n);
- }
-
- msg = pci_get_msi_message(dev, vector_n);
- imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
-
- rcu_read_lock();
- entry = imrc->translate(iommu_mr, msg.address, IOMMU_WO, 0);
- rcu_read_unlock();
-
- if (entry.perm == IOMMU_NONE) {
- return -ENOENT;
- }
-
- trace_vfio_register_msi_binding(vdev->vbasedev.name, vector_n,
- msg.address, entry.translated_addr);
-
- return vfio_iommu_set_msi_binding(container, vector_n, &entry);
-}
-
static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
{
struct vfio_irq_set *irq_set;
@@ -441,7 +382,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
fds = (int32_t *)&irq_set->data;
for (i = 0; i < vdev->nr_vectors; i++) {
- int ret, fd = -1;
+ int fd = -1;
/*
* MSI vs MSI-X - The guest has direct access to MSI mask and pending
@@ -450,12 +391,6 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
* KVM signaling path only when configured and unmasked.
*/
if (vdev->msi_vectors[i].use) {
- ret = vfio_register_msi_binding(vdev, i, true);
- if (ret) {
- error_report("%s failed to register S1 MSI binding "
- "for vector %d(%d)", vdev->vbasedev.name, i, ret);
- goto out;
- }
if (vdev->msi_vectors[i].virq < 0 ||
(msix && msix_is_masked(&vdev->pdev, i))) {
fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
@@ -469,7 +404,6 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
-out:
g_free(irq_set);
return ret;
@@ -784,8 +718,7 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
static void vfio_msix_disable(VFIOPCIDevice *vdev)
{
- int ret, i;
-
+ int i;
msix_unset_vector_notifiers(&vdev->pdev);
@@ -797,11 +730,6 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
if (vdev->msi_vectors[i].use) {
vfio_msix_vector_release(&vdev->pdev, i);
msix_vector_unuse(&vdev->pdev, i);
- ret = vfio_register_msi_binding(vdev, i, false);
- if (ret) {
- error_report("%s: failed to unregister S1 MSI binding "
- "for vector %d(%d)", vdev->vbasedev.name, i, ret);
- }
}
}
@@ -2638,122 +2566,11 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
return 0;
}
-static void vfio_init_fault_regions(VFIOPCIDevice *vdev, Error **errp)
-{
- struct vfio_region_info *fault_region_info = NULL;
- struct vfio_region_info_cap_fault *cap_fault;
- VFIODevice *vbasedev = &vdev->vbasedev;
- struct vfio_info_cap_header *hdr;
- char *fault_region_name;
- int ret;
-
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
- VFIO_REGION_TYPE_NESTED,
- VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
- &fault_region_info);
- if (ret) {
- goto out;
- }
-
- hdr = vfio_get_region_info_cap(fault_region_info,
- VFIO_REGION_INFO_CAP_DMA_FAULT);
- if (!hdr) {
- error_setg(errp, "failed to retrieve DMA FAULT capability");
- goto out;
- }
- cap_fault = container_of(hdr, struct vfio_region_info_cap_fault,
- header);
- if (cap_fault->version != 1) {
- error_setg(errp, "Unsupported DMA FAULT API version %d",
- cap_fault->version);
- goto out;
- }
-
- fault_region_name = g_strdup_printf("%s DMA FAULT %d",
- vbasedev->name,
- fault_region_info->index);
-
- ret = vfio_region_setup(OBJECT(vdev), vbasedev,
- &vdev->dma_fault_region,
- fault_region_info->index,
- fault_region_name);
- g_free(fault_region_name);
- if (ret) {
- error_setg_errno(errp, -ret,
- "failed to set up the DMA FAULT region %d",
- fault_region_info->index);
- goto out;
- }
-
- ret = vfio_region_mmap(&vdev->dma_fault_region);
- if (ret) {
- error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT queue");
- }
-out:
- g_free(fault_region_info);
-}
-
-static void vfio_init_fault_response_regions(VFIOPCIDevice *vdev, Error **errp)
-{
- struct vfio_region_info *fault_region_info = NULL;
- struct vfio_region_info_cap_fault *cap_fault;
- VFIODevice *vbasedev = &vdev->vbasedev;
- struct vfio_info_cap_header *hdr;
- char *fault_region_name;
- int ret;
-
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
- VFIO_REGION_TYPE_NESTED,
- VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE,
- &fault_region_info);
- if (ret) {
- goto out;
- }
-
- hdr = vfio_get_region_info_cap(fault_region_info,
- VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE);
- if (!hdr) {
- error_setg(errp, "failed to retrieve DMA FAULT RESPONSE capability");
- goto out;
- }
- cap_fault = container_of(hdr, struct vfio_region_info_cap_fault,
- header);
- if (cap_fault->version != 1) {
- error_setg(errp, "Unsupported DMA FAULT RESPONSE API version %d",
- cap_fault->version);
- goto out;
- }
-
- fault_region_name = g_strdup_printf("%s DMA FAULT RESPONSE %d",
- vbasedev->name,
- fault_region_info->index);
-
- ret = vfio_region_setup(OBJECT(vdev), vbasedev,
- &vdev->dma_fault_response_region,
- fault_region_info->index,
- fault_region_name);
- g_free(fault_region_name);
- if (ret) {
- error_setg_errno(errp, -ret,
- "failed to set up the DMA FAULT RESPONSE region %d",
- fault_region_info->index);
- goto out;
- }
-
- ret = vfio_region_mmap(&vdev->dma_fault_response_region);
- if (ret) {
- error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT RESPONSE queue");
- }
-out:
- g_free(fault_region_info);
-}
-
static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
{
VFIODevice *vbasedev = &vdev->vbasedev;
struct vfio_region_info *reg_info;
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
- Error *err = NULL;
int i, ret = -1;
/* Sanity check device */
@@ -2817,18 +2634,6 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
}
}
- vfio_init_fault_regions(vdev, &err);
- if (err) {
- error_propagate(errp, err);
- return;
- }
-
- vfio_init_fault_response_regions(vdev, &err);
- if (err) {
- error_propagate(errp, err);
- return;
- }
-
irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
@@ -2992,205 +2797,6 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
vdev->req_enabled = false;
}
-static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn,
- IOMMUConfig *config)
-{
- PCIDevice *pdev = bus->devices[devfn];
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
- VFIOContainer *container = vdev->vbasedev.group->container;
- struct vfio_iommu_type1_set_pasid_table info;
-
- info.argsz = sizeof(info);
- info.flags = VFIO_PASID_TABLE_FLAG_SET;
- memcpy(&info.config, &config->pasid_cfg, sizeof(config->pasid_cfg));
-
- return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info);
-}
-
-static int vfio_iommu_return_page_response(PCIBus *bus, int32_t devfn,
- IOMMUPageResponse *resp)
-{
- PCIDevice *pdev = bus->devices[devfn];
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
- struct iommu_page_response *response = &resp->resp;
- struct vfio_region_dma_fault_response header;
- struct iommu_page_response *queue;
- char *queue_buffer = NULL;
- ssize_t bytes;
-
- if (!vdev->dma_fault_response_region.mem) {
- return -EINVAL;
- }
-
- /* read the header */
- bytes = pread(vdev->vbasedev.fd, &header, sizeof(header),
- vdev->dma_fault_response_region.fd_offset);
- if (bytes != sizeof(header)) {
- error_report("%s unable to read the fault region header (0x%lx)",
- __func__, bytes);
- return -1;
- }
-
- /* Normally the fault queue is mmapped */
- queue = (struct iommu_page_response *)vdev->dma_fault_response_region.mmaps[0].mmap;
- if (!queue) {
- size_t queue_size = header.nb_entries * header.entry_size;
-
- error_report("%s: fault queue not mmapped: slower fault handling",
- vdev->vbasedev.name);
-
- queue_buffer = g_malloc(queue_size);
- bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size,
- vdev->dma_fault_response_region.fd_offset + header.offset);
- if (bytes != queue_size) {
- error_report("%s unable to read the fault queue (0x%lx)",
- __func__, bytes);
- return -1;
- }
-
- queue = (struct iommu_page_response *)queue_buffer;
- }
- /* deposit the new response in the queue and increment the head */
- memcpy(queue + header.head, response, header.entry_size);
-
- vdev->fault_response_head_index =
- (vdev->fault_response_head_index + 1) % header.nb_entries;
- bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_response_head_index, 4,
- vdev->dma_fault_response_region.fd_offset);
- if (bytes != 4) {
- error_report("%s unable to write the fault response region head index (0x%lx)",
- __func__, bytes);
- }
- g_free(queue_buffer);
-
- return 0;
-}
-
-static PCIPASIDOps vfio_pci_pasid_ops = {
- .set_pasid_table = vfio_iommu_set_pasid_table,
- .return_page_response = vfio_iommu_return_page_response,
-};
-
-static void vfio_dma_fault_notifier_handler(void *opaque)
-{
- VFIOPCIExtIRQ *ext_irq = opaque;
- VFIOPCIDevice *vdev = ext_irq->vdev;
- PCIDevice *pdev = &vdev->pdev;
- AddressSpace *as = pci_device_iommu_address_space(pdev);
- IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(as->root);
- struct vfio_region_dma_fault header;
- struct iommu_fault *queue;
- char *queue_buffer = NULL;
- ssize_t bytes;
-
- if (!event_notifier_test_and_clear(&ext_irq->notifier)) {
- return;
- }
-
- bytes = pread(vdev->vbasedev.fd, &header, sizeof(header),
- vdev->dma_fault_region.fd_offset);
- if (bytes != sizeof(header)) {
- error_report("%s unable to read the fault region header (0x%lx)",
- __func__, bytes);
- return;
- }
-
- /* Normally the fault queue is mmapped */
- queue = (struct iommu_fault *)vdev->dma_fault_region.mmaps[0].mmap;
- if (!queue) {
- size_t queue_size = header.nb_entries * header.entry_size;
-
- error_report("%s: fault queue not mmapped: slower fault handling",
- vdev->vbasedev.name);
-
- queue_buffer = g_malloc(queue_size);
- bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size,
- vdev->dma_fault_region.fd_offset + header.offset);
- if (bytes != queue_size) {
- error_report("%s unable to read the fault queue (0x%lx)",
- __func__, bytes);
- return;
- }
-
- queue = (struct iommu_fault *)queue_buffer;
- }
-
- while (vdev->fault_tail_index != header.head) {
- memory_region_inject_faults(iommu_mr, 1,
- &queue[vdev->fault_tail_index]);
- vdev->fault_tail_index =
- (vdev->fault_tail_index + 1) % header.nb_entries;
- }
- bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_tail_index, 4,
- vdev->dma_fault_region.fd_offset);
- if (bytes != 4) {
- error_report("%s unable to write the fault region tail index (0x%lx)",
- __func__, bytes);
- }
- g_free(queue_buffer);
-}
-
-static int vfio_register_ext_irq_handler(VFIOPCIDevice *vdev,
- uint32_t type, uint32_t subtype,
- IOHandler *handler)
-{
- int32_t fd, ext_irq_index, index;
- struct vfio_irq_info *irq_info;
- Error *err = NULL;
- EventNotifier *n;
- int ret;
-
- ret = vfio_get_dev_irq_info(&vdev->vbasedev, type, subtype, &irq_info);
- if (ret) {
- return ret;
- }
- index = irq_info->index;
- ext_irq_index = irq_info->index - VFIO_PCI_NUM_IRQS;
- g_free(irq_info);
-
- vdev->ext_irqs[ext_irq_index].vdev = vdev;
- vdev->ext_irqs[ext_irq_index].index = index;
- n = &vdev->ext_irqs[ext_irq_index].notifier;
-
- ret = event_notifier_init(n, 0);
- if (ret) {
- error_report("vfio: Unable to init event notifier for ext irq %d(%d)",
- ext_irq_index, ret);
- return ret;
- }
-
- fd = event_notifier_get_fd(n);
- qemu_set_fd_handler(fd, vfio_dma_fault_notifier_handler, NULL,
- &vdev->ext_irqs[ext_irq_index]);
-
- ret = vfio_set_irq_signaling(&vdev->vbasedev, index, 0,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err);
- if (ret) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(n);
- }
- return ret;
-}
-
-static void vfio_unregister_ext_irq_notifiers(VFIOPCIDevice *vdev)
-{
- VFIODevice *vbasedev = &vdev->vbasedev;
- Error *err = NULL;
- int i;
-
- for (i = 0; i < vbasedev->num_irqs - VFIO_PCI_NUM_IRQS; i++) {
- if (vfio_set_irq_signaling(vbasedev, i + VFIO_PCI_NUM_IRQS , 0,
- VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
- qemu_set_fd_handler(event_notifier_get_fd(&vdev->ext_irqs[i].notifier),
- NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->ext_irqs[i].notifier);
- }
- g_free(vdev->ext_irqs);
-}
-
static void vfio_realize(PCIDevice *pdev, Error **errp)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
@@ -3201,7 +2807,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
ssize_t len;
struct stat st;
int groupid;
- int i, ret, nb_ext_irqs;
+ int i, ret;
bool is_mdev;
if (!vdev->vbasedev.sysfsdev) {
@@ -3289,11 +2895,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- nb_ext_irqs = vdev->vbasedev.num_irqs - VFIO_PCI_NUM_IRQS;
- if (nb_ext_irqs > 0) {
- vdev->ext_irqs = g_new0(VFIOPCIExtIRQ, nb_ext_irqs);
- }
-
vfio_populate_device(vdev, &err);
if (err) {
error_propagate(errp, err);
@@ -3505,13 +3106,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
- vfio_register_ext_irq_handler(vdev, VFIO_IRQ_TYPE_NESTED,
- VFIO_IRQ_SUBTYPE_DMA_FAULT,
- vfio_dma_fault_notifier_handler);
vfio_setup_resetfn_quirk(vdev);
- pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops);
-
return;
out_deregister:
@@ -3531,8 +3127,6 @@ static void vfio_instance_finalize(Object *obj)
vfio_display_finalize(vdev);
vfio_bars_finalize(vdev);
- vfio_region_finalize(&vdev->dma_fault_region);
- vfio_region_finalize(&vdev->dma_fault_response_region);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
/*
@@ -3552,9 +3146,6 @@ static void vfio_exitfn(PCIDevice *pdev)
vfio_unregister_req_notifier(vdev);
vfio_unregister_err_notifier(vdev);
- vfio_unregister_ext_irq_notifiers(vdev);
- vfio_region_exit(&vdev->dma_fault_region);
- vfio_region_exit(&vdev->dma_fault_response_region);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
if (vdev->irqchip_change_notifier.notify) {
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 61b3bf1303bfa8518d2154adead8015067388a7e..64777516d16d2cce5b20f151784b9c42d71b5349 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -114,12 +114,6 @@ typedef struct VFIOMSIXInfo {
unsigned long *pending;
} VFIOMSIXInfo;
-typedef struct VFIOPCIExtIRQ {
- struct VFIOPCIDevice *vdev;
- EventNotifier notifier;
- uint32_t index;
-} VFIOPCIExtIRQ;
-
#define TYPE_VFIO_PCI "vfio-pci"
OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
@@ -144,11 +138,6 @@ struct VFIOPCIDevice {
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
- VFIOPCIExtIRQ *ext_irqs;
- VFIORegion dma_fault_region;
- uint32_t fault_tail_index;
- VFIORegion dma_fault_response_region;
- uint32_t fault_response_head_index;
int (*resetfn)(struct VFIOPCIDevice *);
uint32_t vendor_id;
uint32_t device_id;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index f5fe201ab5e0a9ee3cc8142fac4581371262f925..0ef1b5f4a65ff38171380c91877002d13035bc34 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -99,10 +99,10 @@ vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i
vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64
vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
-vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
+vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
-vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
+vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
vfio_disconnect_container(int fd) "close container->fd=%d"
vfio_put_group(int fd) "close group->fd=%d"
vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
@@ -117,12 +117,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
-vfio_get_dev_irq(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
vfio_dma_unmap_overflow_workaround(void) ""
-vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d"
-vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d"
-vfio_register_msi_binding(const char *name, int vector, uint64_t giova, uint64_t gdb) "%s: register vector %d gIOVA=0x%"PRIx64 "-> gDB=0x%"PRIx64" stage 1 mapping"
-vfio_unregister_msi_binding(const char *name, int vector) "%s: unregister vector %d stage 1 mapping"
# platform.c
vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 83ed2b82e6a0a8edd5f2aff744c6e0bf6fbf7c79..1b23e8e18c780dd176ad3a54e4873b45889d9cc5 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -129,7 +129,7 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
hwaddr virt_end, hwaddr paddr,
uint32_t flags)
{
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
flags & VIRTIO_IOMMU_MAP_F_WRITE);
@@ -154,7 +154,7 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
hwaddr virt_end)
{
- IOMMUTLBEvent event = {};
+ IOMMUTLBEvent event;
uint64_t delta = virt_end - virt_start;
if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 7c3fe69d52445d2ddd51c68ce4538046c820a77e..4b5b431e45f6fe22a8f63ddc818efc7393f99c17 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -106,8 +106,6 @@ struct MemoryRegionSection {
bool nonvolatile;
};
-struct iommu_fault;
-
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
/* See address_space_translate: bit 0 is read, bit 1 is write. */
@@ -118,48 +116,14 @@ typedef enum {
IOMMU_RW = 3,
} IOMMUAccessFlags;
-/* Granularity of the cache invalidation */
-typedef enum {
- IOMMU_INV_GRAN_ADDR = 0,
- IOMMU_INV_GRAN_PASID,
- IOMMU_INV_GRAN_DOMAIN,
-} IOMMUInvGranularity;
-
#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0))
-/**
- * struct IOMMUTLBEntry - IOMMU TLB entry
- *
- * Structure used when performing a translation or when notifying MAP or
- * UNMAP (invalidation) events
- *
- * @target_as: target address space
- * @iova: IO virtual address (input)
- * @translated_addr: translated address (output)
- * @addr_mask: address mask (0xfff means 4K binding), must be multiple of 2
- * @perm: permission flag of the mapping (NONE encodes no mapping or
- * invalidation notification)
- * @granularity: granularity of the invalidation
- * @flags: informs whether the following fields are set
- * @arch_id: architecture specific ID tagging the TLB
- * @pasid: PASID tagging the TLB
- * @leaf: when @perm is NONE, indicates whether only caches for the last
- * level of translation need to be invalidated.
- */
struct IOMMUTLBEntry {
AddressSpace *target_as;
hwaddr iova;
hwaddr translated_addr;
- hwaddr addr_mask;
+ hwaddr addr_mask; /* 0xfff = 4k translation */
IOMMUAccessFlags perm;
- IOMMUInvGranularity granularity;
-#define IOMMU_INV_FLAGS_PASID (1 << 0)
-#define IOMMU_INV_FLAGS_ARCHID (1 << 1)
-#define IOMMU_INV_FLAGS_LEAF (1 << 2)
- uint32_t flags;
- uint32_t arch_id;
- uint32_t pasid;
- bool leaf;
};
/*
@@ -326,9 +290,7 @@ typedef struct MemoryRegionClass {
enum IOMMUMemoryRegionAttr {
- IOMMU_ATTR_SPAPR_TCE_FD,
- IOMMU_ATTR_VFIO_NESTED,
- IOMMU_ATTR_MSI_TRANSLATE,
+ IOMMU_ATTR_SPAPR_TCE_FD
};
/*
@@ -528,19 +490,6 @@ struct IOMMUMemoryRegionClass {
int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
uint64_t page_size_mask,
Error **errp);
-
- /*
- * Inject @count faults into the IOMMU memory region
- *
- * Optional method: if this method is not provided, then
- * memory_region_injection_faults() will return -ENOENT
- *
- * @iommu: the IOMMU memory region to inject the faults in
- * @count: number of faults to inject
- * @buf: fault buffer
- */
- int (*inject_faults)(IOMMUMemoryRegion *iommu, int count,
- struct iommu_fault *buf);
};
typedef struct RamDiscardListener RamDiscardListener;
@@ -1837,15 +1786,6 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr);
int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
uint64_t page_size_mask,
Error **errp);
-/**
- * memory_region_inject_faults : inject @count faults stored in @buf
- *
- * @iommu_mr: the IOMMU memory region
- * @count: number of faults to be injected
- * @buf: buffer containing the faults
- */
-int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count,
- struct iommu_fault *buf);
/**
* memory_region_name: get a memory region's name
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index d578339935190290f13dec2a6f5c7b757bdb2b02..706be3c6d0a4f0b8fc4ecd6c16b2a2f642d7d89c 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -76,7 +76,6 @@ typedef struct SMMUTransCfg {
uint8_t tbi; /* Top Byte Ignore */
uint16_t asid;
SMMUTransTableInfo tt[2];
- dma_addr_t s1ctxptr;
uint32_t iotlb_hits; /* counts IOTLB hits for this asid */
uint32_t iotlb_misses; /* counts IOTLB misses for this asid */
} SMMUTransCfg;
diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h
deleted file mode 100644
index 5890f095b14b413a829079c4950f85dd2ea47cc3..0000000000000000000000000000000000000000
--- a/include/hw/iommu/iommu.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * common header for iommu devices
- *
- * Copyright Red Hat, Inc. 2019
- *
- * Authors:
- * Eric Auger
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#ifndef QEMU_HW_IOMMU_IOMMU_H
-#define QEMU_HW_IOMMU_IOMMU_H
-#ifdef __linux__
-#include
-#endif
-
-typedef struct IOMMUConfig {
- union {
-#ifdef __linux__
- struct iommu_pasid_table_config pasid_cfg;
-#endif
- };
-} IOMMUConfig;
-
-typedef struct IOMMUPageResponse {
- union {
-#ifdef __linux__
- struct iommu_page_response resp;
-#endif
- };
-} IOMMUPageResponse;
-
-
-#endif /* QEMU_HW_IOMMU_IOMMU_H */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index bfe3a6bca7637fb0fd3081ca2f83114cdf9427ee..5b36334a28a626248fddce59881daaaaf7012f9b 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -9,7 +9,6 @@
#include "hw/pci/pcie.h"
#include "qom/object.h"
-#include "hw/iommu/iommu.h"
extern bool pci_available;
@@ -266,13 +265,6 @@ struct PCIReqIDCache {
};
typedef struct PCIReqIDCache PCIReqIDCache;
-struct PCIPASIDOps {
- int (*set_pasid_table)(PCIBus *bus, int32_t devfn, IOMMUConfig *config);
- int (*return_page_response)(PCIBus *bus, int32_t devfn,
- IOMMUPageResponse *resp);
-};
-typedef struct PCIPASIDOps PCIPASIDOps;
-
struct PCIDevice {
DeviceState qdev;
bool partially_hotplugged;
@@ -369,7 +361,6 @@ struct PCIDevice {
/* ID of standby device in net_failover pair */
char *failover_pair_id;
uint32_t acpi_index;
- PCIPASIDOps *pasid_ops;
};
void pci_register_bar(PCIDevice *pci_dev, int region_num,
@@ -507,12 +498,6 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
-void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops);
-bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn);
-int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, IOMMUConfig *config);
-int pci_device_return_page_response(PCIBus *bus, int32_t devfn,
- IOMMUPageResponse *resp);
-
static inline void
pci_set_byte(uint8_t *config, uint8_t val)
{
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 7fdca26fa046a1bfe7044ee0d9a57533398ebbba..0234f5e1b1627012021b976fddf2e4f8161b24e7 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -74,14 +74,6 @@ typedef struct VFIOAddressSpace {
QLIST_ENTRY(VFIOAddressSpace) list;
} VFIOAddressSpace;
-typedef struct VFIOMSIBinding {
- int index;
- hwaddr iova;
- hwaddr gpa;
- hwaddr size;
- QLIST_ENTRY(VFIOMSIBinding) next;
-} VFIOMSIBinding;
-
struct VFIOGroup;
typedef struct VFIODMARange {
@@ -111,7 +103,6 @@ typedef struct VFIOContainer {
QLIST_HEAD(, VFIOGroup) group_list;
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
QLIST_HEAD(, VFIODMARange) dma_list;
- QLIST_HEAD(, VFIOMSIBinding) msibinding_list;
QLIST_ENTRY(VFIOContainer) next;
} VFIOContainer;
@@ -231,9 +222,6 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
void vfio_put_group(VFIOGroup *group);
int vfio_get_device(VFIOGroup *group, const char *name,
VFIODevice *vbasedev, Error **errp);
-int vfio_iommu_set_msi_binding(VFIOContainer *container, int n,
- IOMMUTLBEntry *entry);
-int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n);
extern const MemoryRegionOps vfio_region_ops;
typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
@@ -254,13 +242,6 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
unsigned int *avail);
struct vfio_info_cap_header *
vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
-int vfio_get_irq_info(VFIODevice *vbasedev, int index,
- struct vfio_irq_info **info);
-int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type,
- uint32_t subtype, struct vfio_irq_info **info);
-bool vfio_has_irq_cap(VFIODevice *vbasedev, int irq, uint16_t cap_type);
-struct vfio_info_cap_header *
-vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id);
#endif
extern const MemoryListener vfio_prereg_listener;
diff --git a/linux-headers/linux/iommu.h b/linux-headers/linux/iommu.h
deleted file mode 100644
index 773b7dc2d695b49f2423059907b4e93afb30a73d..0000000000000000000000000000000000000000
--- a/linux-headers/linux/iommu.h
+++ /dev/null
@@ -1,395 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * IOMMU user API definitions
- */
-
-#ifndef IOMMU_H
-#define IOMMU_H
-
-#include
-
-#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
-#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
-#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
-#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
-
-/* Generic fault types, can be expanded IRQ remapping fault */
-enum iommu_fault_type {
- IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */
- IOMMU_FAULT_PAGE_REQ, /* page request fault */
-};
-
-enum iommu_fault_reason {
- IOMMU_FAULT_REASON_UNKNOWN = 0,
-
- /* Could not access the PASID table (fetch caused external abort) */
- IOMMU_FAULT_REASON_PASID_FETCH,
-
- /* PASID entry is invalid or has configuration errors */
- IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
-
- /*
- * PASID is out of range (e.g. exceeds the maximum PASID
- * supported by the IOMMU) or disabled.
- */
- IOMMU_FAULT_REASON_PASID_INVALID,
-
- /*
- * An external abort occurred fetching (or updating) a translation
- * table descriptor
- */
- IOMMU_FAULT_REASON_WALK_EABT,
-
- /*
- * Could not access the page table entry (Bad address),
- * actual translation fault
- */
- IOMMU_FAULT_REASON_PTE_FETCH,
-
- /* Protection flag check failed */
- IOMMU_FAULT_REASON_PERMISSION,
-
- /* access flag check failed */
- IOMMU_FAULT_REASON_ACCESS,
-
- /* Output address of a translation stage caused Address Size fault */
- IOMMU_FAULT_REASON_OOR_ADDRESS,
-};
-
-/**
- * struct iommu_fault_unrecoverable - Unrecoverable fault data
- * @reason: reason of the fault, from &enum iommu_fault_reason
- * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
- * @pasid: Process Address Space ID
- * @perm: requested permission access using by the incoming transaction
- * (IOMMU_FAULT_PERM_* values)
- * @addr: offending page address
- * @fetch_addr: address that caused a fetch abort, if any
- */
-struct iommu_fault_unrecoverable {
- __u32 reason;
-#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0)
-#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1)
-#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2)
- __u32 flags;
- __u32 pasid;
- __u32 perm;
- __u64 addr;
- __u64 fetch_addr;
-};
-
-/**
- * struct iommu_fault_page_request - Page Request data
- * @flags: encodes whether the corresponding fields are valid and whether this
- * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values).
- * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response
- * must have the same PASID value as the page request. When it is clear,
- * the page response should not have a PASID.
- * @pasid: Process Address Space ID
- * @grpid: Page Request Group Index
- * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
- * @addr: page address
- * @private_data: device-specific private information
- */
-struct iommu_fault_page_request {
-#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
-#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
-#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
-#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 perm;
- __u64 addr;
- __u64 private_data[2];
-};
-
-/**
- * struct iommu_fault - Generic fault data
- * @type: fault type from &enum iommu_fault_type
- * @padding: reserved for future use (should be zero)
- * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
- * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
- * @padding2: sets the fault size to allow for future extensions
- */
-struct iommu_fault {
- __u32 type;
- __u32 padding;
- union {
- struct iommu_fault_unrecoverable event;
- struct iommu_fault_page_request prm;
- __u8 padding2[56];
- };
-};
-
-/**
- * enum iommu_page_response_code - Return status of fault handlers
- * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
- * populated, retry the access. This is "Success" in PCI PRI.
- * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
- * this device if possible. This is "Response Failure" in PCI PRI.
- * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
- * access. This is "Invalid Request" in PCI PRI.
- */
-enum iommu_page_response_code {
- IOMMU_PAGE_RESP_SUCCESS = 0,
- IOMMU_PAGE_RESP_INVALID,
- IOMMU_PAGE_RESP_FAILURE,
-};
-
-/**
- * struct iommu_page_response - Generic page response information
- * @argsz: User filled size of this data
- * @version: API version of this structure
- * @flags: encodes whether the corresponding fields are valid
- * (IOMMU_FAULT_PAGE_RESPONSE_* values)
- * @pasid: Process Address Space ID
- * @grpid: Page Request Group Index
- * @code: response code from &enum iommu_page_response_code
- */
-struct iommu_page_response {
- __u32 argsz;
-#define IOMMU_PAGE_RESP_VERSION_1 1
- __u32 version;
-#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0)
- __u32 flags;
- __u32 pasid;
- __u32 grpid;
- __u32 code;
-};
-
-/* defines the granularity of the invalidation */
-enum iommu_inv_granularity {
- IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */
- IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */
- IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */
- IOMMU_INV_GRANU_NR, /* number of invalidation granularities */
-};
-
-/**
- * struct iommu_inv_addr_info - Address Selective Invalidation Structure
- *
- * @flags: indicates the granularity of the address-selective invalidation
- * - If the PASID bit is set, the @pasid field is populated and the invalidation
- * relates to cache entries tagged with this PASID and matching the address
- * range.
- * - If ARCHID bit is set, @archid is populated and the invalidation relates
- * to cache entries tagged with this architecture specific ID and matching
- * the address range.
- * - Both PASID and ARCHID can be set as they may tag different caches.
- * - If neither PASID or ARCHID is set, global addr invalidation applies.
- * - The LEAF flag indicates whether only the leaf PTE caching needs to be
- * invalidated and other paging structure caches can be preserved.
- * @pasid: process address space ID
- * @archid: architecture-specific ID
- * @addr: first stage/level input address
- * @granule_size: page/block size of the mapping in bytes
- * @nb_granules: number of contiguous granules to be invalidated
- */
-struct iommu_inv_addr_info {
-#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0)
-#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1)
-#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2)
- __u32 flags;
- __u32 archid;
- __u64 pasid;
- __u64 addr;
- __u64 granule_size;
- __u64 nb_granules;
-};
-
-/**
- * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure
- *
- * @flags: indicates the granularity of the PASID-selective invalidation
- * - If the PASID bit is set, the @pasid field is populated and the invalidation
- * relates to cache entries tagged with this PASID and matching the address
- * range.
- * - If the ARCHID bit is set, the @archid is populated and the invalidation
- * relates to cache entries tagged with this architecture specific ID and
- * matching the address range.
- * - Both PASID and ARCHID can be set as they may tag different caches.
- * - At least one of PASID or ARCHID must be set.
- * @pasid: process address space ID
- * @archid: architecture-specific ID
- */
-struct iommu_inv_pasid_info {
-#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0)
-#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1)
- __u32 flags;
- __u32 archid;
- __u64 pasid;
-};
-
-/**
- * struct iommu_cache_invalidate_info - First level/stage invalidation
- * information
- * @argsz: User filled size of this data
- * @version: API version of this structure
- * @cache: bitfield that allows to select which caches to invalidate
- * @granularity: defines the lowest granularity used for the invalidation:
- * domain > PASID > addr
- * @padding: reserved for future use (should be zero)
- * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID
- * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR
- *
- * Not all the combinations of cache/granularity are valid:
- *
- * +--------------+---------------+---------------+---------------+
- * | type / | DEV_IOTLB | IOTLB | PASID |
- * | granularity | | | cache |
- * +==============+===============+===============+===============+
- * | DOMAIN | N/A | Y | Y |
- * +--------------+---------------+---------------+---------------+
- * | PASID | Y | Y | Y |
- * +--------------+---------------+---------------+---------------+
- * | ADDR | Y | Y | N/A |
- * +--------------+---------------+---------------+---------------+
- *
- * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than
- * @version and @cache.
- *
- * If multiple cache types are invalidated simultaneously, they all
- * must support the used granularity.
- */
-struct iommu_cache_invalidate_info {
- __u32 argsz;
-#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1
- __u32 version;
-/* IOMMU paging structure cache */
-#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */
-#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */
-#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */
-#define IOMMU_CACHE_INV_TYPE_NR (3)
- __u8 cache;
- __u8 granularity;
- __u8 padding[6];
- union {
- struct iommu_inv_pasid_info pasid_info;
- struct iommu_inv_addr_info addr_info;
- } granu;
-};
-
-/**
- * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest
- * SVA binding.
- *
- * @flags: VT-d PASID table entry attributes
- * @pat: Page attribute table data to compute effective memory type
- * @emt: Extended memory type
- *
- * Only guest vIOMMU selectable and effective options are passed down to
- * the host IOMMU.
- */
-struct iommu_gpasid_bind_data_vtd {
-#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */
-#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */
-#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */
-#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */
-#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */
-#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */
-#define IOMMU_SVA_VTD_GPASID_LAST (1 << 6)
- __u64 flags;
- __u32 pat;
- __u32 emt;
-};
-
-#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \
- IOMMU_SVA_VTD_GPASID_EMTE | \
- IOMMU_SVA_VTD_GPASID_PCD | \
- IOMMU_SVA_VTD_GPASID_PWT)
-
-/**
- * struct iommu_gpasid_bind_data - Information about device and guest PASID binding
- * @argsz: User filled size of this data
- * @version: Version of this data structure
- * @format: PASID table entry format
- * @flags: Additional information on guest bind request
- * @gpgd: Guest page directory base of the guest mm to bind
- * @hpasid: Process address space ID used for the guest mm in host IOMMU
- * @gpasid: Process address space ID used for the guest mm in guest IOMMU
- * @addr_width: Guest virtual address width
- * @padding: Reserved for future use (should be zero)
- * @vtd: Intel VT-d specific data
- *
- * Guest to host PASID mapping can be an identity or non-identity, where guest
- * has its own PASID space. For non-identify mapping, guest to host PASID lookup
- * is needed when VM programs guest PASID into an assigned device. VMM may
- * trap such PASID programming then request host IOMMU driver to convert guest
- * PASID to host PASID based on this bind data.
- */
-struct iommu_gpasid_bind_data {
- __u32 argsz;
-#define IOMMU_GPASID_BIND_VERSION_1 1
- __u32 version;
-#define IOMMU_PASID_FORMAT_INTEL_VTD 1
-#define IOMMU_PASID_FORMAT_LAST 2
- __u32 format;
- __u32 addr_width;
-#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */
- __u64 flags;
- __u64 gpgd;
- __u64 hpasid;
- __u64 gpasid;
- __u8 padding[8];
- /* Vendor specific data */
- union {
- struct iommu_gpasid_bind_data_vtd vtd;
- } vendor;
-};
-
-/**
- * struct iommu_pasid_smmuv3 - ARM SMMUv3 Stream Table Entry stage 1 related
- * information
- * @version: API version of this structure
- * @s1fmt: STE s1fmt (format of the CD table: single CD, linear table
- * or 2-level table)
- * @s1dss: STE s1dss (specifies the behavior when @pasid_bits != 0
- * and no PASID is passed along with the incoming transaction)
- * @padding: reserved for future use (should be zero)
- *
- * The PASID table is referred to as the Context Descriptor (CD) table on ARM
- * SMMUv3. Please refer to the ARM SMMU 3.x spec (ARM IHI 0070A) for full
- * details.
- */
-struct iommu_pasid_smmuv3 {
-#define PASID_TABLE_SMMUV3_CFG_VERSION_1 1
- __u32 version;
- __u8 s1fmt;
- __u8 s1dss;
- __u8 padding[2];
-};
-
-/**
- * struct iommu_pasid_table_config - PASID table data used to bind guest PASID
- * table to the host IOMMU
- * @argsz: User filled size of this data
- * @version: API version to prepare for future extensions
- * @base_ptr: guest physical address of the PASID table
- * @format: format of the PASID table
- * @pasid_bits: number of PASID bits used in the PASID table
- * @config: indicates whether the guest translation stage must
- * be translated, bypassed or aborted.
- * @padding: reserved for future use (should be zero)
- * @vendor_data.smmuv3: table information when @format is
- * %IOMMU_PASID_FORMAT_SMMUV3
- */
-struct iommu_pasid_table_config {
- __u32 argsz;
-#define PASID_TABLE_CFG_VERSION_1 1
- __u32 version;
- __u64 base_ptr;
-#define IOMMU_PASID_FORMAT_SMMUV3 1
- __u32 format;
- __u8 pasid_bits;
-#define IOMMU_PASID_CONFIG_TRANSLATE 1
-#define IOMMU_PASID_CONFIG_BYPASS 2
-#define IOMMU_PASID_CONFIG_ABORT 3
- __u8 config;
- __u8 padding[2];
- union {
- struct iommu_pasid_smmuv3 smmuv3;
- } vendor_data;
-};
-
-#endif /* _UAPI_IOMMU_H */
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index cf8e208fac50a6436e1257cabb2cf1fc6b164620..f4ff038e8c2d9b7ab29a92f6b89a0332cb061b78 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -14,7 +14,6 @@
#include
#include
-#include
#define VFIO_API_VERSION 0
@@ -335,7 +334,6 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_TYPE_GFX (1)
#define VFIO_REGION_TYPE_CCW (2)
#define VFIO_REGION_TYPE_MIGRATION (3)
-#define VFIO_REGION_TYPE_NESTED (4)
/* sub-types for VFIO_REGION_TYPE_PCI_* */
@@ -364,10 +362,6 @@ struct vfio_region_info_cap_type {
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
-/* sub-types for VFIO_REGION_TYPE_NESTED */
-#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT (1)
-#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE (2)
-
/**
* struct vfio_region_gfx_edid - EDID region layout.
*
@@ -727,30 +721,11 @@ struct vfio_irq_info {
#define VFIO_IRQ_INFO_MASKABLE (1 << 1)
#define VFIO_IRQ_INFO_AUTOMASKED (1 << 2)
#define VFIO_IRQ_INFO_NORESIZE (1 << 3)
-#define VFIO_IRQ_INFO_FLAG_CAPS (1 << 4) /* Info supports caps */
__u32 index; /* IRQ index */
__u32 count; /* Number of IRQs within this index */
- __u32 cap_offset; /* Offset within info struct of first cap */
};
#define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9)
-/*
- * The irq type capability allows IRQs unique to a specific device or
- * class of devices to be exposed.
- *
- * The structures below define version 1 of this capability.
- */
-#define VFIO_IRQ_INFO_CAP_TYPE 3
-
-struct vfio_irq_info_cap_type {
- struct vfio_info_cap_header header;
- __u32 type; /* global per bus driver */
- __u32 subtype; /* type specific */
-};
-
-#define VFIO_IRQ_TYPE_NESTED (1)
-#define VFIO_IRQ_SUBTYPE_DMA_FAULT (1)
-
/**
* VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
*
@@ -852,8 +827,7 @@ enum {
VFIO_PCI_MSIX_IRQ_INDEX,
VFIO_PCI_ERR_IRQ_INDEX,
VFIO_PCI_REQ_IRQ_INDEX,
- VFIO_PCI_NUM_IRQS = 5 /* Fixed user ABI, IRQ indexes >=5 use */
- /* device specific cap to define content */
+ VFIO_PCI_NUM_IRQS
};
/*
@@ -1038,68 +1012,6 @@ struct vfio_device_feature {
*/
#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0)
-/*
- * Capability exposed by the DMA fault region
- * @version: ABI version
- */
-#define VFIO_REGION_INFO_CAP_DMA_FAULT 6
-
-struct vfio_region_info_cap_fault {
- struct vfio_info_cap_header header;
- __u32 version;
-};
-
-/*
- * Capability exposed by the DMA fault response region
- * @version: ABI version
- */
-#define VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE 7
-
-struct vfio_region_info_cap_fault_response {
- struct vfio_info_cap_header header;
- __u32 version;
-};
-
-/*
- * DMA Fault Region Layout
- * @tail: index relative to the start of the ring buffer at which the
- * consumer finds the next item in the buffer
- * @entry_size: fault ring buffer entry size in bytes
- * @nb_entries: max capacity of the fault ring buffer
- * @offset: ring buffer offset relative to the start of the region
- * @head: index relative to the start of the ring buffer at which the
- * producer (kernel) inserts items into the buffers
- */
-struct vfio_region_dma_fault {
- /* Write-Only */
- __u32 tail;
- /* Read-Only */
- __u32 entry_size;
- __u32 nb_entries;
- __u32 offset;
- __u32 head;
-};
-
-/*
- * DMA Fault Response Region Layout
- * @head: index relative to the start of the ring buffer at which the
- * producer (userspace) insert responses into the buffer
- * @entry_size: fault ring buffer entry size in bytes
- * @nb_entries: max capacity of the fault ring buffer
- * @offset: ring buffer offset relative to the start of the region
- * @tail: index relative to the start of the ring buffer at which the
- * consumer (kernel) finds the next item in the buffer
- */
-struct vfio_region_dma_fault_response {
- /* Write-Only */
- __u32 head;
- /* Read-Only */
- __u32 entry_size;
- __u32 nb_entries;
- __u32 offset;
- __u32 tail;
-};
-
/* -------- API for Type1 VFIO IOMMU -------- */
/**
@@ -1212,7 +1124,7 @@ struct vfio_iommu_type1_dma_map {
struct vfio_bitmap {
__u64 pgsize; /* page size for bitmap in bytes */
__u64 size; /* in bytes */
- __u64 *data; /* one bit per page */
+ __u64 *data; /* one bit per page */
};
/**
@@ -1338,134 +1250,6 @@ struct vfio_iommu_type1_dirty_bitmap_get {
#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
-/*
- * VFIO_IOMMU_BIND_PROCESS
- *
- * Allocate a PASID for a process address space, and use it to attach this
- * process to all devices in the container. Devices can then tag their DMA
- * traffic with the returned @pasid to perform transactions on the associated
- * virtual address space. Mapping and unmapping buffers is performed by standard
- * functions such as mmap and malloc.
- *
- * If flag is VFIO_IOMMU_BIND_PID, @pid contains the pid of a foreign process to
- * bind. Otherwise the current task is bound. Given that the caller owns the
- * device, setting this flag grants the caller read and write permissions on the
- * entire address space of foreign process described by @pid. Therefore,
- * permission to perform the bind operation on a foreign process is governed by
- * the ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check. See man ptrace(2)
- * for more information.
- *
- * On success, VFIO writes a Process Address Space ID (PASID) into @pasid. This
- * ID is unique to a process and can be used on all devices in the container.
- *
- * On fork, the child inherits the device fd and can use the bonds setup by its
- * parent. Consequently, the child has R/W access on the address spaces bound by
- * its parent. After an execv, the device fd is closed and the child doesn't
- * have access to the address space anymore.
- *
- * To remove a bond between process and container, VFIO_IOMMU_UNBIND ioctl is
- * issued with the same parameters. If a pid was specified in VFIO_IOMMU_BIND,
- * it should also be present for VFIO_IOMMU_UNBIND. Otherwise unbind the current
- * task from the container.
- */
-struct vfio_iommu_type1_bind_process {
- __u32 flags;
-#define VFIO_IOMMU_BIND_PID (1 << 0)
- __u32 pasid;
- __s32 pid;
-};
-
-/*
- * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes
- * vfio_iommu_type1_bind_process in data.
- */
-struct vfio_iommu_type1_bind {
- __u32 argsz;
- __u32 flags;
-#define VFIO_IOMMU_BIND_PROCESS (1 << 0)
- __u8 data[];
-};
-
-/*
- * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct vfio_iommu_bind)
- *
- * Manage address spaces of devices in this container. Initially a TYPE1
- * container can only have one address space, managed with
- * VFIO_IOMMU_MAP/UNMAP_DMA.
- *
- * An IOMMU of type VFIO_TYPE1_NESTING_IOMMU can be managed by both MAP/UNMAP
- * and BIND ioctls at the same time. MAP/UNMAP acts on the stage-2 (host) page
- * tables, and BIND manages the stage-1 (guest) page tables. Other types of
- * IOMMU may allow MAP/UNMAP and BIND to coexist, where MAP/UNMAP controls
- * non-PASID traffic and BIND controls PASID traffic. But this depends on the
- * underlying IOMMU architecture and isn't guaranteed.
- *
- * Availability of this feature depends on the device, its bus, the underlying
- * IOMMU and the CPU architecture.
- *
- * returns: 0 on success, -errno on failure.
- */
-#define VFIO_IOMMU_BIND _IO(VFIO_TYPE, VFIO_BASE + 22)
-
-/*
- * VFIO_IOMMU_UNBIND - _IOWR(VFIO_TYPE, VFIO_BASE + 23, struct vfio_iommu_bind)
- *
- * Undo what was done by the corresponding VFIO_IOMMU_BIND ioctl.
- */
-#define VFIO_IOMMU_UNBIND _IO(VFIO_TYPE, VFIO_BASE + 23)
-
-/*
- * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 18,
- * struct vfio_iommu_type1_set_pasid_table)
- *
- * The SET operation passes a PASID table to the host while the
- * UNSET operation detaches the one currently programmed. It is
- * allowed to "SET" the table several times without unsetting as
- * long as the table config does not stay IOMMU_PASID_CONFIG_TRANSLATE.
- */
-struct vfio_iommu_type1_set_pasid_table {
- __u32 argsz;
- __u32 flags;
-#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
-#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
- struct iommu_pasid_table_config config; /* used on SET */
-};
-
-#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 18)
-
-/**
- * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19,
- * struct vfio_iommu_type1_cache_invalidate)
- *
- * Propagate guest IOMMU cache invalidation to the host.
- */
-struct vfio_iommu_type1_cache_invalidate {
- __u32 argsz;
- __u32 flags;
- struct iommu_cache_invalidate_info info;
-};
-#define VFIO_IOMMU_CACHE_INVALIDATE _IO(VFIO_TYPE, VFIO_BASE + 19)
-
-/**
- * VFIO_IOMMU_SET_MSI_BINDING - _IOWR(VFIO_TYPE, VFIO_BASE + 20,
- * struct vfio_iommu_type1_set_msi_binding)
- *
- * Pass a stage 1 MSI doorbell mapping to the host so that this
- * latter can build a nested stage2 mapping. Or conversely tear
- * down a previously bound stage 1 MSI binding.
- */
-struct vfio_iommu_type1_set_msi_binding {
- __u32 argsz;
- __u32 flags;
-#define VFIO_IOMMU_BIND_MSI (1 << 0)
-#define VFIO_IOMMU_UNBIND_MSI (1 << 1)
- __u64 iova; /* MSI guest IOVA */
- /* Fields below are used on BIND */
- __u64 gpa; /* MSI guest physical address */
- __u64 size; /* size of stage1 mapping (bytes) */
-};
-#define VFIO_IOMMU_SET_MSI_BINDING _IO(VFIO_TYPE, VFIO_BASE + 20)
-
/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
/*
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index acde610733307bd75bd18a5b17d8795b4ab993d8..fea4d6eb655f31a0d5aac77cfbda04147d29b464 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -144,7 +144,7 @@ done
rm -rf "$output/linux-headers/linux"
mkdir -p "$output/linux-headers/linux"
-for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h iommu.h \
+for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
psci.h psp-sev.h userfaultfd.h mman.h; do
cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
done
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 9f98209ab2f9a27403be47ee721a427626371222..7340e19ff5e24b6776ddb5737443de18019a8414 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -2111,16 +2111,6 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
rdmc->unregister_listener(rdm, rdl);
}
-int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count,
- struct iommu_fault *buf)
-{
- IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
- if (!imrc->inject_faults) {
- return -ENOENT;
- }
- return imrc->inject_faults(iommu_mr, count, buf);
-}
-
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
{
uint8_t mask = 1 << client;