From 1b7bb759d615a687a50a699621be1312fae4b2cf Mon Sep 17 00:00:00 2001
From: Binfeng Wu <wubinfeng@huawei.com>
Date: Tue, 8 Feb 2022 17:00:39 +0800
Subject: [PATCH 1/5] vfio/pci: Ascend310 need 4Bytes quirk in bar4

---
 hw/vfio/pci-quirks.c | 75 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 0cf69a8c6d..d86bcaf309 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1209,6 +1209,80 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
     return 0;
 }
 
+#define PCI_VENDOR_ID_HUAWEI      0x19e5
+#define PCI_DEVICE_ID_ASCEND310   0xd100
+#define ASCEND310_XLOADER_SIZE    4
+#define ASCEND310_XLOADER_OFFSET  0x400
+
+typedef struct VFIOAscendBarQuirk {
+    struct VFIOPCIDevice *vdev;
+    pcibus_t offset;
+    uint8_t bar;
+    MemoryRegion *mem;
+} VFIOAscendBarQuirk;
+
+static uint64_t vfio_ascend_quirk_read(void *opaque,
+                                       hwaddr addr, unsigned size)
+{
+    VFIOAscendBarQuirk *quirk = opaque;
+    VFIOPCIDevice *vdev = quirk->vdev;
+
+    qemu_log("read RO region! addr=0x%" HWADDR_PRIx ", size=%d\n",
+            addr + quirk->offset, size);
+
+    return vfio_region_read(&vdev->bars[quirk->bar].region,
+                            addr + quirk->offset, size);
+}
+
+static void vfio_ascend_quirk_write(void *opaque, hwaddr addr,
+                                    uint64_t data, unsigned size)
+{
+    VFIOAscendBarQuirk *quirk = opaque;
+
+    qemu_log("modifying RO region is not allowed! addr=0x%"
+            HWADDR_PRIx ", data=0x%" PRIx64 ", size=%d\n",
+            addr + quirk->offset, data, size);
+}
+
+static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = {
+    .read = vfio_ascend_quirk_read,
+    .write = vfio_ascend_quirk_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    VFIOQuirk *quirk;
+    VFIOAscendBarQuirk *bar4_quirk;
+
+    if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 4 ||
+        vdev->device_id != PCI_DEVICE_ID_ASCEND310) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->nr_mem = 1;
+    quirk->mem = g_new0(MemoryRegion, quirk->nr_mem);
+    bar4_quirk = quirk->data = g_new0(typeof(*bar4_quirk), quirk->nr_mem);
+    bar4_quirk[0].vdev = vdev;
+    bar4_quirk[0].offset = ASCEND310_XLOADER_OFFSET;
+    bar4_quirk[0].bar = nr;
+
+    /*
+     * intercept w/r to the xloader-updating register,
+     * so the vm can't enable xloader-updating
+     */
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
+                          &vfio_ascend_intercept_regs_quirk,
+                          &bar4_quirk[0],
+                          "vfio-ascend310-bar4-intercept-regs-quirk",
+                          ASCEND310_XLOADER_SIZE);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        bar4_quirk[0].offset,
+                                        &quirk->mem[0], 1);
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
 /*
  * Common quirk probe entry points.
  */
@@ -1261,6 +1335,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
 #ifdef CONFIG_VFIO_IGD
     vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
+    vfio_probe_ascend310_bar4_quirk(vdev, nr);
 }
 
 void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
-- 
Gitee


From 81f1d1cc50b047c824fca999a0d72c00592b5261 Mon Sep 17 00:00:00 2001
From: Binfeng Wu <wubinfeng@huawei.com>
Date: Tue, 8 Feb 2022 17:16:04 +0800
Subject: [PATCH 2/5] vfio/pci: Ascend710 need 4Bytes quirk in bar0

---
 hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index d86bcaf309..6a9fc0afc5 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1210,7 +1210,10 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
 }
 
 #define PCI_VENDOR_ID_HUAWEI      0x19e5
+#define PCI_DEVICE_ID_ASCEND710   0xd500
 #define PCI_DEVICE_ID_ASCEND310   0xd100
+#define ASCEND710_XLOADER_SIZE    4
+#define ASCEND710_XLOADER_OFFSET  0x20430
 #define ASCEND310_XLOADER_SIZE    4
 #define ASCEND310_XLOADER_OFFSET  0x400
 
@@ -1250,6 +1253,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    VFIOQuirk *quirk;
+    VFIOAscendBarQuirk *bar0_quirk;
+
+    if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 ||
+        vdev->device_id != PCI_DEVICE_ID_ASCEND710) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->nr_mem = 1;
+    quirk->mem = g_new0(MemoryRegion, quirk->nr_mem);
+    bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem);
+    bar0_quirk[0].vdev = vdev;
+    bar0_quirk[0].offset = ASCEND710_XLOADER_OFFSET;
+    bar0_quirk[0].bar = nr;
+
+    /*
+     * intercept w/r to the xloader-updating register,
+     * so the vm can't enable xloader-updating
+     */
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
+                          &vfio_ascend_intercept_regs_quirk,
+                          &bar0_quirk[0],
+                          "vfio-ascend710-bar0-intercept-regs-quirk",
+                          ASCEND710_XLOADER_SIZE);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        bar0_quirk[0].offset,
+                                        &quirk->mem[0], 1);
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
 static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr)
 {
     VFIOQuirk *quirk;
@@ -1335,6 +1371,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
 #ifdef CONFIG_VFIO_IGD
     vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
+    vfio_probe_ascend710_bar0_quirk(vdev, nr);
     vfio_probe_ascend310_bar4_quirk(vdev, nr);
 }
 
-- 
Gitee


From fa198f262724f34d94aef0b5a8946375dbbab164 Mon Sep 17 00:00:00 2001
From: Binfeng Wu <wubinfeng@huawei.com>
Date: Tue, 8 Feb 2022 19:20:36 +0800
Subject: [PATCH 3/5] vfio/pci: Ascend910 need 4Bytes quirk in bar0

---
 hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 6a9fc0afc5..2457a61196 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1210,8 +1210,11 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
 }
 
 #define PCI_VENDOR_ID_HUAWEI      0x19e5
+#define PCI_DEVICE_ID_ASCEND910   0xd801
 #define PCI_DEVICE_ID_ASCEND710   0xd500
 #define PCI_DEVICE_ID_ASCEND310   0xd100
+#define ASCEND910_XLOADER_SIZE    4
+#define ASCEND910_XLOADER_OFFSET  0x80400
 #define ASCEND710_XLOADER_SIZE    4
 #define ASCEND710_XLOADER_OFFSET  0x20430
 #define ASCEND310_XLOADER_SIZE    4
@@ -1253,6 +1256,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static void vfio_probe_ascend910_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    VFIOQuirk *quirk;
+    VFIOAscendBarQuirk *bar0_quirk;
+
+    if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 ||
+        vdev->device_id != PCI_DEVICE_ID_ASCEND910) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->nr_mem = 1;
+    quirk->mem = g_new0(MemoryRegion, quirk->nr_mem);
+    bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem);
+    bar0_quirk[0].vdev = vdev;
+    bar0_quirk[0].offset = ASCEND910_XLOADER_OFFSET;
+    bar0_quirk[0].bar = nr;
+
+    /*
+     * intercept w/r to the xloader-updating register,
+     * so the vm can't enable xloader-updating
+     */
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
+                          &vfio_ascend_intercept_regs_quirk,
+                          &bar0_quirk[0],
+                          "vfio-ascend910-bar0-intercept-regs-quirk",
+                          ASCEND910_XLOADER_SIZE);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        bar0_quirk[0].offset,
+                                        &quirk->mem[0], 1);
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
 static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr)
 {
     VFIOQuirk *quirk;
@@ -1371,6 +1407,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
 #ifdef CONFIG_VFIO_IGD
     vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
+    vfio_probe_ascend910_bar0_quirk(vdev, nr);
     vfio_probe_ascend710_bar0_quirk(vdev, nr);
     vfio_probe_ascend310_bar4_quirk(vdev, nr);
 }
-- 
Gitee


From fb632aef38ff1fc8dbdb8ae50f2f7b200cab680d Mon Sep 17 00:00:00 2001
From: Jian Wang <wangjian161@huawei.com>
Date: Thu, 10 Feb 2022 19:43:55 +0800
Subject: [PATCH 4/5] i386: cache passthrough: Update Intel CPUID4.EAX[25:14]
 based on vCPU topo

On Intel target, when host cache passthrough is disabled we will
emulate the guest caches with default values and initialize the
shared cpu list of the caches based on vCPU topology. However when
host cache passthrough is enabled, the shared cpu list is consistent
with host regardless what the vCPU topology is.

For example, when cache passthrough is enabled, running a guest
with vThreads=1 on a host with pThreads=2, we will get that there
are every *two* logical vCPUs sharing a L1/L2 cache, which is not
consistent with the vCPU topology (vThreads=1).

So let's reinitialize BITs[25:14] of Intel CPUID 4 based on the
actual vCPU topology instead of host pCPU topology.

Signed-off-by: Jian Wang <wangjian161@huawei.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
---
 target/i386/cpu.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 868cf3e7e8..463e40f7a1 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5196,7 +5196,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 {
     X86CPU *cpu = env_archcpu(env);
     CPUState *cs = env_cpu(env);
-    uint32_t die_offset;
+    uint32_t die_offset, smt_width;
     uint32_t limit;
     uint32_t signature[3];
     X86CPUTopoInfo topo_info;
@@ -5205,6 +5205,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     topo_info.cores_per_die = cs->nr_cores;
     topo_info.threads_per_core = cs->nr_threads;
 
+    die_offset = apicid_die_offset(&topo_info);
+    smt_width = apicid_smt_width(&topo_info);
+
     /* Calculate & apply limits for different index ranges */
     if (index >= 0xC0000000) {
         limit = env->cpuid_xlevel2;
@@ -5272,8 +5275,25 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         /* cache info: needed for Core compatibility */
         if (cpu->cache_info_passthrough) {
             host_cpuid(index, count, eax, ebx, ecx, edx);
-            /* QEMU gives out its own APIC IDs, never pass down bits 31..26.  */
-            *eax &= ~0xFC000000;
+            /*
+             * QEMU gives out its own APIC IDs, never pass down bits 31..26.
+             * Update the cache topo bits 25..14, according to the guest
+             * vCPU topology instead of the host pCPU topology.
+             */
+            *eax &= ~0xFFFFC000;
+            switch (count) {
+            case 0: /* L1 dcache info */
+            case 1: /* L1 icache info */
+            case 2: /* L2 cache info */
+                *eax |= ((1 << smt_width) - 1) << 14;
+                break;
+            case 3:
+                *eax |= ((1 << die_offset) - 1) << 14;
+                break;
+            default: /* end of info */
+                *eax = *ebx = *ecx = *edx = 0;
+                break;
+            }
             if ((*eax & 31) && cs->nr_cores > 1) {
                 *eax |= (cs->nr_cores - 1) << 26;
             }
@@ -5298,7 +5318,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                                     eax, ebx, ecx, edx);
                 break;
             case 3: /* L3 cache info */
-                die_offset = apicid_die_offset(&topo_info);
                 if (cpu->enable_l3_cache) {
                     encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
                                         (1 << die_offset), cs->nr_cores,
-- 
Gitee


From 3aa2a7723447dc1c696b590e07f520c0162bcad2 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Thu, 10 Feb 2022 20:06:01 +0800
Subject: [PATCH 5/5] i386: cache passthrough: Update AMD 8000_001D.EAX[25:14]
 based on vCPU topo

On AMD target, when host cache passthrough is disabled we will
emulate the guest caches with default values and initialize the
shared cpu list of the caches based on vCPU topology. However
when host cache passthrough is enabled, the shared cpu list is
consistent with host regardless what the vCPU topology is.

For example, when cache passthrough is enabled, running a guest
with vThreads=1 on a host with pThreads=2, we will get that there
are every *two* logical vCPUs sharing a L1/L2 cache, which is not
consistent with the vCPU topology (vThreads=1).

So let's reinitialize BITs[25:14] of AMD CPUID 8000_001D.EAX
based on the actual vCPU topology instead of host pCPU topology.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
---
 target/i386/cpu.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 463e40f7a1..c391ee3c09 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5724,9 +5724,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         }
         break;
     case 0x8000001D:
+        /* Populate AMD Processor Cache Information */
         *eax = 0;
         if (cpu->cache_info_passthrough) {
             host_cpuid(index, count, eax, ebx, ecx, edx);
+
+            /*
+             * Clear BITs[25:14] and then update them based on the guest
+             * vCPU topology, like what we do in encode_cache_cpuid8000001d
+             * when cache_info_passthrough is not enabled.
+             */
+            *eax &= ~0x03FFC000;
+            switch (count) {
+            case 0: /* L1 dcache info */
+            case 1: /* L1 icache info */
+            case 2: /* L2 cache info */
+                *eax |= ((topo_info.threads_per_core - 1) << 14);
+                break;
+            case 3: /* L3 cache info */
+                *eax |= ((topo_info.cores_per_die *
+                          topo_info.threads_per_core - 1) << 14);
+                break;
+            default: /* end of info */
+                *eax = *ebx = *ecx = *edx = 0;
+                break;
+            }
             break;
         }
         switch (count) {
-- 
Gitee