diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index eecd8031cf6c326f1816da179cf104c9c6dfb983..cb3f1327307b32cebc0c1e5aabfa174681a3a06f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -71,86 +71,12 @@ do { } while (0) #endif -#define KVM_MSI_HASHTAB_SIZE 256 - struct KVMParkedVcpu { unsigned long vcpu_id; int kvm_fd; QLIST_ENTRY(KVMParkedVcpu) node; }; -enum KVMDirtyRingReaperState { - KVM_DIRTY_RING_REAPER_NONE = 0, - /* The reaper is sleeping */ - KVM_DIRTY_RING_REAPER_WAIT, - /* The reaper is reaping for dirty pages */ - KVM_DIRTY_RING_REAPER_REAPING, -}; - -/* - * KVM reaper instance, responsible for collecting the KVM dirty bits - * via the dirty ring. - */ -struct KVMDirtyRingReaper { - /* The reaper thread */ - QemuThread reaper_thr; - volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ - volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ -}; - -struct KVMState -{ - AccelState parent_obj; - - int nr_slots; - int fd; - int vmfd; - int coalesced_mmio; - int coalesced_pio; - struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; - bool coalesced_flush_in_progress; - int vcpu_events; - int robust_singlestep; - int debugregs; -#ifdef KVM_CAP_SET_GUEST_DEBUG - QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; -#endif - int max_nested_state_len; - int many_ioeventfds; - int intx_set_mask; - int kvm_shadow_mem; - bool kernel_irqchip_allowed; - bool kernel_irqchip_required; - OnOffAuto kernel_irqchip_split; - bool sync_mmu; - uint64_t manual_dirty_log_protect; - /* The man page (and posix) say ioctl numbers are signed int, but - * they're not. Linux, glibc and *BSD all treat ioctl numbers as - * unsigned, and treating them as signed here can break things */ - unsigned irq_set_ioctl; - unsigned int sigmask_len; - GHashTable *gsimap; -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing *irq_routes; - int nr_allocated_irq_routes; - unsigned long *used_gsi_bitmap; - unsigned int gsi_count; - QTAILQ_HEAD(, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; -#endif - KVMMemoryListener memory_listener; - QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus; - - /* For "info mtree -f" to tell if an MR is registered in KVM */ - int nr_as; - struct KVMAs { - KVMMemoryListener *ml; - AddressSpace *as; - } *as; - uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ - uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ - struct KVMDirtyRingReaper reaper; -}; - KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_split_irqchip; @@ -3624,6 +3550,8 @@ static void kvm_accel_instance_init(Object *obj) s->kernel_irqchip_split = ON_OFF_AUTO_AUTO; /* KVM dirty ring is by default off */ s->kvm_dirty_ring_size = 0; + s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN; + s->notify_window = 0; } static void kvm_accel_class_init(ObjectClass *oc, void *data) @@ -3651,6 +3579,8 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, "dirty-ring-size", "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)"); + + kvm_arch_accel_class_init(oc); } static const TypeInfo kvm_accel_type = { diff --git a/configure b/configure index 48c21775f3a90c91631d90bd6e3ec6b060215465..63f0ef926cee2339b367baa8534c0f5999bafcdb 100755 --- a/configure +++ b/configure @@ -330,8 +330,6 @@ qom_cast_debug="yes" trace_backends="log" trace_file="trace" opengl="$default_feature" -cpuid_h="no" -avx2_opt="$default_feature" guest_agent="$default_feature" guest_agent_with_vss="no" guest_agent_ntddscsi="no" @@ -1047,14 +1045,6 @@ for opt do ;; --disable-tools) want_tools="no" ;; - --disable-avx2) avx2_opt="no" - ;; - --enable-avx2) avx2_opt="yes" - ;; - --disable-avx512f) avx512f_opt="no" - ;; - --enable-avx512f) avx512f_opt="yes" - ;; --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane) echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2 ;; @@ -1450,8 +1440,6 @@ cat << EOF tpm TPM support libssh ssh block device support numa libnuma support - avx2 AVX2 optimization support - avx512f AVX512F optimization support replication replication support opengl opengl support xfsctl xfsctl support @@ -2890,85 +2878,6 @@ else # "$safe_stack" = "" fi fi -######################################## -# check if cpuid.h is usable. - -cat > $TMPC << EOF -#include -int main(void) { - unsigned a, b, c, d; - int max = __get_cpuid_max(0, 0); - - if (max >= 1) { - __cpuid(1, a, b, c, d); - } - - if (max >= 7) { - __cpuid_count(7, 0, a, b, c, d); - } - - return 0; -} -EOF -if compile_prog "" "" ; then - cpuid_h=yes -fi - -########################################## -# avx2 optimization requirement check -# -# There is no point enabling this if cpuid.h is not usable, -# since we won't be able to select the new routines. - -if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then - cat > $TMPC << EOF -#pragma GCC push_options -#pragma GCC target("avx2") -#include -#include -static int bar(void *a) { - __m256i x = *(__m256i *)a; - return _mm256_testz_si256(x, x); -} -int main(int argc, char *argv[]) { return bar(argv[0]); } -EOF - if compile_object "-Werror" ; then - avx2_opt="yes" - else - avx2_opt="no" - fi -fi - -########################################## -# avx512f optimization requirement check -# -# There is no point enabling this if cpuid.h is not usable, -# since we won't be able to select the new routines. -# by default, it is turned off. -# if user explicitly want to enable it, check environment - -if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then - cat > $TMPC << EOF -#pragma GCC push_options -#pragma GCC target("avx512f") -#include -#include -static int bar(void *a) { - __m512i x = *(__m512i *)a; - return _mm512_test_epi64_mask(x, x); -} -int main(int argc, char *argv[]) -{ - return bar(argv[0]); -} -EOF - if ! compile_object "-Werror" ; then - avx512f_opt="no" - fi -else - avx512f_opt="no" -fi - ######################################## # check if __[u]int128_t is usable. @@ -3580,14 +3489,6 @@ if test "$opengl" = "yes" ; then echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak fi -if test "$avx2_opt" = "yes" ; then - echo "CONFIG_AVX2_OPT=y" >> $config_host_mak -fi - -if test "$avx512f_opt" = "yes" ; then - echo "CONFIG_AVX512F_OPT=y" >> $config_host_mak -fi - # XXX: suppress that if [ "$bsd" = "yes" ] ; then echo "CONFIG_BSD=y" >> $config_host_mak @@ -3620,10 +3521,6 @@ if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then echo "CONFIG_TSAN=y" >> $config_host_mak fi -if test "$cpuid_h" = "yes" ; then - echo "CONFIG_CPUID_H=y" >> $config_host_mak -fi - if test "$int128" = "yes" ; then echo "CONFIG_INT128=y" >> $config_host_mak fi diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index ff7488cb63b93830f75093030add22d6f7a086c4..33925edf450e739dfa45009e01f3d364af6efb18 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -270,6 +270,19 @@ accepted incorrect commands will return an error. Users should make sure that all arguments passed to ``device_add`` are consistent with the documented property types. +``query-sgx`` return value member ``section-size`` (since 7.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Member ``section-size`` in return value elements with meta-type ``uint64`` is +deprecated. Use ``sections`` instead. + + +``query-sgx-capabilities`` return value member ``section-size`` (since 7.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Member ``section-size`` in return value elements with meta-type ``uint64`` is +deprecated. Use ``sections`` instead. + System accelerators ------------------- diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst index f8fade5ac2d44b7378a87e8da14d33ae732bb20a..0f0a73f7587c44e69e028f061fed1c39bb97bdd8 100644 --- a/docs/system/i386/sgx.rst +++ b/docs/system/i386/sgx.rst @@ -141,8 +141,7 @@ To launch a SGX guest: |qemu_system_x86| \\ -cpu host,+sgx-provisionkey \\ -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ - -object memory-backend-epc,id=mem2,size=28M \\ - -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 + -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 Utilizing SGX in the guest requires a kernel/OS with SGX support. The support can be determined in guest by:: @@ -152,8 +151,32 @@ The support can be determined in guest by:: and SGX epc info by:: $ dmesg | grep sgx - [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff - [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff + [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff + [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. + +To launch a SGX numa guest: + +.. parsed-literal:: + + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ + -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ + -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ + -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ + -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ + -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ + -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 + +and SGX epc numa info by:: + + $ dmesg | grep sgx + [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff + [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff + + $ dmesg | grep SRAT + [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] + [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] References ---------- diff --git a/hw/core/numa.c b/hw/core/numa.c index e6050b22739f44a0c9acbed5459970bee0b58404..1aa05dcf425f46eca6ac0468d56a96fe0d67bf13 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -784,9 +784,8 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) break; case MEMORY_DEVICE_INFO_KIND_SGX_EPC: se = value->u.sgx_epc.data; - /* TODO: once we support numa, assign to right node */ - node_mem[0].node_mem += se->size; - node_mem[0].node_plugged_mem += se->size; + node_mem[se->node].node_mem += se->size; + node_mem[se->node].node_plugged_mem = 0; break; default: g_assert_not_reached(); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a99c6e4fe3fad88da568c9f738d7bd8f5900f126..8383b83ee36027631356f28aaeddbeb558c9e7ac 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2068,6 +2068,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) nvdimm_build_srat(table_data); } + sgx_epc_build_srat(table_data); + /* * TODO: this part is not in ACPI spec and current linux kernel boots fine * without these entries. But I recall there were issues the last time I diff --git a/hw/i386/pc.c b/hw/i386/pc.c index a2ef40ecbc2459145557e85f0873ba18d4139b74..9959f93216a235bad631be8177b90449ae5e54c3 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -736,14 +736,6 @@ void pc_machine_done(Notifier *notifier, void *data) /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); } - - - if (x86ms->apic_id_limit > 255 && !xen_enabled() && - !kvm_irqchip_in_kernel()) { - error_report("current -smp configuration requires kernel " - "irqchip support."); - exit(EXIT_FAILURE); - } } void pc_guest_info_init(PCMachineState *pcms) diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c index e508827e787b7b7a38cf9785a030571cf22c6e54..96b2940d75eba983124effc9963f4d320ef0d7fb 100644 --- a/hw/i386/sgx-epc.c +++ b/hw/i386/sgx-epc.c @@ -21,6 +21,7 @@ static Property sgx_epc_properties[] = { DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), + DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), DEFINE_PROP_END_OF_LIST(), @@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, se->memaddr = epc->addr; se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, NULL); + se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, + NULL); se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); info->u.sgx_epc.data = se; diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c index c9b379e66519210337f03952359317de43f46901..26833eb233c5b6448831c76c31266babaa070265 100644 --- a/hw/i386/sgx-stub.c +++ b/hw/i386/sgx-stub.c @@ -6,6 +6,10 @@ #include "qapi/error.h" #include "qapi/qapi-commands-misc-target.h" +void sgx_epc_build_srat(GArray *table_data) +{ +} + SGXInfo *qmp_query_sgx(Error **errp) { error_setg(errp, "SGX support is not compiled in"); diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index 8fef3dd8fad4c20a4c9a3169f6e328707504bce9..a2b318dd9387d1343736ae50da1b7bfb6159e40f 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -23,6 +23,7 @@ #include "sysemu/hw_accel.h" #include "sysemu/reset.h" #include +#include "hw/acpi/aml-build.h" #define SGX_MAX_EPC_SECTIONS 8 #define SGX_CPUID_EPC_INVALID 0x0 @@ -36,17 +37,59 @@ #define RETRY_NUM 2 +static int sgx_epc_device_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { + *list = g_slist_append(*list, DEVICE(obj)); + } + + object_child_foreach(obj, sgx_epc_device_list, opaque); + return 0; +} + +static GSList *sgx_epc_get_device_list(void) +{ + GSList *list = NULL; + + object_child_foreach(qdev_get_machine(), sgx_epc_device_list, &list); + return list; +} + +void sgx_epc_build_srat(GArray *table_data) +{ + GSList *device_list = sgx_epc_get_device_list(); + + for (; device_list; device_list = device_list->next) { + DeviceState *dev = device_list->data; + Object *obj = OBJECT(dev); + uint64_t addr, size; + int node; + + node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, + &error_abort); + addr = object_property_get_uint(obj, SGX_EPC_ADDR_PROP, &error_abort); + size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, &error_abort); + + build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED); + } + g_slist_free(device_list); +} + static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) { return (low & MAKE_64BIT_MASK(12, 20)) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); } -static uint64_t sgx_calc_host_epc_section_size(void) +static SGXEPCSectionList *sgx_calc_host_epc_sections(uint64_t *size) { + SGXEPCSectionList *head = NULL, **tail = &head; + SGXEPCSection *section; uint32_t i, type; uint32_t eax, ebx, ecx, edx; - uint64_t size = 0; + uint32_t j = 0; for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); @@ -60,10 +103,14 @@ static uint64_t sgx_calc_host_epc_section_size(void) break; } - size += sgx_calc_section_metric(ecx, edx); + section = g_new0(SGXEPCSection, 1); + section->node = j++; + section->size = sgx_calc_section_metric(ecx, edx); + *size += section->size; + QAPI_LIST_APPEND(tail, section); } - return size; + return head; } static void sgx_epc_reset(void *opaque) @@ -110,6 +157,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) { SGXInfo *info = NULL; uint32_t eax, ebx, ecx, edx; + uint64_t size = 0; int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); if (fd < 0) { @@ -127,13 +175,36 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) info->sgx1 = eax & (1U << 0) ? true : false; info->sgx2 = eax & (1U << 1) ? true : false; - info->section_size = sgx_calc_host_epc_section_size(); + info->sections = sgx_calc_host_epc_sections(&size); + info->section_size = size; close(fd); return info; } +static SGXEPCSectionList *sgx_get_epc_sections_list(void) +{ + GSList *device_list = sgx_epc_get_device_list(); + SGXEPCSectionList *head = NULL, **tail = &head; + SGXEPCSection *section; + + for (; device_list; device_list = device_list->next) { + DeviceState *dev = device_list->data; + Object *obj = OBJECT(dev); + + section = g_new0(SGXEPCSection, 1); + section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, + &error_abort); + section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, + &error_abort); + QAPI_LIST_APPEND(tail, section); + } + g_slist_free(device_list); + + return head; +} + SGXInfo *qmp_query_sgx(Error **errp) { SGXInfo *info = NULL; @@ -160,6 +231,7 @@ SGXInfo *qmp_query_sgx(Error **errp) info->sgx2 = true; info->flc = true; info->section_size = sgx_epc->size; + info->sections = sgx_get_epc_sections_list(); return info; } @@ -167,6 +239,7 @@ SGXInfo *qmp_query_sgx(Error **errp) void hmp_info_sgx(Monitor *mon, const QDict *qdict) { Error *err = NULL; + SGXEPCSectionList *section_list, *section; g_autoptr(SGXInfo) info = qmp_query_sgx(&err); if (err) { @@ -183,6 +256,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) info->flc ? "enabled" : "disabled"); monitor_printf(mon, "size: %" PRIu64 "\n", info->section_size); + + section_list = info->sections; + for (section = section_list; section; section = section->next) { + monitor_printf(mon, "NUMA node #%" PRId64 ": ", + section->value->node); + monitor_printf(mon, "size=%" PRIu64 "\n", + section->value->size); + } } bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) @@ -226,6 +307,9 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) /* set the memdev link with memory backend */ object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, &error_fatal); + /* set the numa node property for sgx epc object */ + object_property_set_uint(obj, SGX_EPC_NUMA_NODE_PROP, list->value->node, + &error_fatal); object_property_set_bool(obj, "realized", true, &error_fatal); object_unref(obj); } diff --git a/hw/i386/x86.c b/hw/i386/x86.c index b84840a1bb99a9072375f9784dc5068c23fb00ca..a3258d78facf41c63fdad6c8c6f21da82ad1938e 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -39,6 +39,7 @@ #include "sysemu/replay.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-timers.h" +#include "sysemu/xen.h" #include "trace.h" #include "hw/i386/x86.h" @@ -136,6 +137,25 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) */ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ms->smp.max_cpus - 1) + 1; + + /* + * Can we support APIC ID 255 or higher? + * + * Under Xen: yes. + * With userspace emulated lapic: no + * With KVM's in-kernel lapic: only if X2APIC API is enabled. + */ + if (x86ms->apic_id_limit > 255 && !xen_enabled() && + (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { + error_report("current -smp configuration requires kernel " + "irqchip and X2APIC API support."); + exit(EXIT_FAILURE); + } + + if (kvm_enabled()) { + kvm_set_max_apic_id(x86ms->apic_id_limit); + } + possible_cpus = mc->possible_cpu_arch_ids(ms); for (i = 0; i < ms->smp.cpus; i++) { x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h index a6a65be854f88db0e0c7b5d565a0d07f722f3d11..581fac389a630d66bc173d389e51bdf713490cae 100644 --- a/include/hw/i386/sgx-epc.h +++ b/include/hw/i386/sgx-epc.h @@ -25,6 +25,7 @@ #define SGX_EPC_ADDR_PROP "addr" #define SGX_EPC_SIZE_PROP "size" #define SGX_EPC_MEMDEV_PROP "memdev" +#define SGX_EPC_NUMA_NODE_PROP "node" /** * SGXEPCDevice: @@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { /* public */ uint64_t addr; + uint32_t node; HostMemoryBackendEpc *hostmem; } SGXEPCDevice; @@ -56,6 +58,7 @@ typedef struct SGXEPCState { } SGXEPCState; bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); +void sgx_epc_build_srat(GArray *table_data); static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) { diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 7b22aeb6ae1a111c158370933260ead5059c35c9..d7ee0b6245c69639ae72874ce58aec665a6ca69d 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -333,6 +333,8 @@ bool kvm_device_supported(int vmfd, uint64_t type); extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; +void kvm_arch_accel_class_init(ObjectClass *oc); + void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 1f5487d9b74db353820c693134ec5e7fbdb61e96..3b4adcdc101d2a009b0b870617dd025c66478ed0 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -10,6 +10,7 @@ #define QEMU_KVM_INT_H #include "exec/memory.h" +#include "qapi/qapi-types-common.h" #include "qemu/accel.h" #include "sysemu/kvm.h" @@ -36,6 +37,81 @@ typedef struct KVMMemoryListener { int as_id; } KVMMemoryListener; +#define KVM_MSI_HASHTAB_SIZE 256 + +enum KVMDirtyRingReaperState { + KVM_DIRTY_RING_REAPER_NONE = 0, + /* The reaper is sleeping */ + KVM_DIRTY_RING_REAPER_WAIT, + /* The reaper is reaping for dirty pages */ + KVM_DIRTY_RING_REAPER_REAPING, +}; + +/* + * KVM reaper instance, responsible for collecting the KVM dirty bits + * via the dirty ring. + */ +struct KVMDirtyRingReaper { + /* The reaper thread */ + QemuThread reaper_thr; + volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ + volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ +}; +struct KVMState +{ + AccelState parent_obj; + + int nr_slots; + int fd; + int vmfd; + int coalesced_mmio; + int coalesced_pio; + struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; + bool coalesced_flush_in_progress; + int vcpu_events; + int robust_singlestep; + int debugregs; +#ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; +#endif + int max_nested_state_len; + int many_ioeventfds; + int intx_set_mask; + int kvm_shadow_mem; + bool kernel_irqchip_allowed; + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as + * unsigned, and treating them as signed here can break things */ + unsigned irq_set_ioctl; + unsigned int sigmask_len; + GHashTable *gsimap; +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *irq_routes; + int nr_allocated_irq_routes; + unsigned long *used_gsi_bitmap; + unsigned int gsi_count; + QTAILQ_HEAD(, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; +#endif + KVMMemoryListener memory_listener; + QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus; + + /* For "info mtree -f" to tell if an MR is registered in KVM */ + int nr_as; + struct KVMAs { + KVMMemoryListener *ml; + AddressSpace *as; + } *as; + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + struct KVMDirtyRingReaper reaper; + NotifyVmexitOption notify_vmexit; + uint32_t notify_window; +}; + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, AddressSpace *as, int as_id, const char *name); diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index a6c327f8ad9e5f079dd7039229e3df7e296fc345..46e730b62f506522d909766e982fbc31689b0dc1 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -324,6 +324,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 #define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 +#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -358,7 +359,10 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u8 reserved[27]; + struct { + __u8 pending; + } triple_fault; + __u8 reserved[26]; __u8 exception_has_payload; __u64 exception_payload; }; @@ -437,6 +441,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index bcaf66cc4d2a06a861322b0ee82c2c5b38454b98..f3d2c40598ce827fd6ec9ea40e5c6023bfc84ce4 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -269,6 +269,7 @@ struct kvm_xen_exit { #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 +#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -469,6 +470,11 @@ struct kvm_run { } msr; /* KVM_EXIT_XEN */ struct kvm_xen_exit xen; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1112,6 +1118,12 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 +#define KVM_CAP_VM_GPA_BITS 207 +#define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 #ifdef KVM_CAP_IRQ_ROUTING @@ -2004,4 +2016,11 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + #endif /* __LINUX_KVM_H */ diff --git a/meson.build b/meson.build index 96de1a6ef948542aa93bd03242005907643e6c47..a68315ff736d9191638b2f495e7868ba5e856df7 100644 --- a/meson.build +++ b/meson.build @@ -1738,6 +1738,52 @@ config_host_data.set('CONFIG_GETAUXVAL', cc.links(gnu_source_prefix + ''' return getauxval(AT_HWCAP) == 0; }''')) +have_cpuid_h = cc.links(''' + #include + int main(void) { + unsigned a, b, c, d; + unsigned max = __get_cpuid_max(0, 0); + + if (max >= 1) { + __cpuid(1, a, b, c, d); + } + + if (max >= 7) { + __cpuid_count(7, 0, a, b, c, d); + } + + return 0; + }''') +config_host_data.set('CONFIG_CPUID_H', have_cpuid_h) + +config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \ + .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \ + .require(cc.links(''' + #pragma GCC push_options + #pragma GCC target("avx2") + #include + #include + static int bar(void *a) { + __m256i x = *(__m256i *)a; + return _mm256_testz_si256(x, x); + } + int main(int argc, char *argv[]) { return bar(argv[0]); } + '''), error_message: 'AVX2 not available').allowed()) + +config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \ + .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \ + .require(cc.links(''' + #pragma GCC push_options + #pragma GCC target("avx512f") + #include + #include + static int bar(void *a) { + __m512i x = *(__m512i *)a; + return _mm512_test_epi64_mask(x, x); + } + int main(int argc, char *argv[]) { return bar(argv[0]); } + '''), error_message: 'AVX512F not available').allowed()) + config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + ''' #include #include @@ -1758,6 +1804,22 @@ config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + ''' return -1; }''')) +config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \ + .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \ + .require(cc.links(''' + #pragma GCC push_options + #pragma GCC target("avx512bw") + #include + #include + static int bar(void *a) { + + __m512i *x = a; + __m512i res= _mm512_abs_epi8(*x); + return res[1]; + } + int main(int argc, char *argv[]) { return bar(argv[0]); } + '''), error_message: 'AVX512BW not available').allowed()) + ignored = ['CONFIG_QEMU_INTERP_PREFIX', # actually per-target 'HAVE_GDB_BIN'] arrays = ['CONFIG_BDRV_RW_WHITELIST', 'CONFIG_BDRV_RO_WHITELIST'] @@ -3256,8 +3318,9 @@ summary_info += {'membarrier': config_host.has_key('CONFIG_MEMBARRIER')} summary_info += {'debug stack usage': config_host.has_key('CONFIG_DEBUG_STACK_USAGE')} summary_info += {'mutex debugging': config_host.has_key('CONFIG_DEBUG_MUTEX')} summary_info += {'memory allocator': get_option('malloc')} -summary_info += {'avx2 optimization': config_host.has_key('CONFIG_AVX2_OPT')} -summary_info += {'avx512f optimization': config_host.has_key('CONFIG_AVX512F_OPT')} +summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')} +summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')} +summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')} summary_info += {'gprof enabled': config_host.has_key('CONFIG_GPROF')} summary_info += {'gcov': get_option('b_coverage')} summary_info += {'thread sanitizer': config_host.has_key('CONFIG_TSAN')} diff --git a/meson_options.txt b/meson_options.txt index e3923237322a7bac13915c439d4944136afe61bb..ec9c3c0a05e2e6ab545320242e2e105bffa4f38f 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -66,6 +66,12 @@ option('cfi_debug', type: 'boolean', value: 'false', description: 'Verbose errors in case of CFI violation') option('multiprocess', type: 'feature', value: 'auto', description: 'Out of process device emulation support') +option('avx2', type: 'feature', value: 'auto', + description: 'AVX2 optimizations') +option('avx512f', type: 'feature', value: 'disabled', + description: 'AVX512F optimizations') +option('avx512bw', type: 'feature', value: 'auto', + description: 'AVX512BW optimizations') option('attr', type : 'feature', value : 'auto', description: 'attr/xattr support') diff --git a/migration/ram.c b/migration/ram.c index 863035d23517c17265593a2d4465f27bcaddab07..15b4e40723208684bdbe2aa0277d5bbba7f6ac7a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -86,6 +86,34 @@ static inline bool is_zero_range(uint8_t *p, uint64_t size) return buffer_is_zero(p, size); } +int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, + uint8_t *, int) = xbzrle_encode_buffer; +#if defined(CONFIG_AVX512BW_OPT) +#include "qemu/cpuid.h" +static void __attribute__((constructor)) init_cpu_flag(void) +{ + unsigned max = __get_cpuid_max(0, NULL); + int a, b, c, d; + if (max >= 1) { + __cpuid(1, a, b, c, d); + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) { + int bv; + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0)); + __cpuid_count(7, 0, a, b, c, d); + /* 0xe6: + * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 + * and ZMM16-ZMM31 state are enabled by OS) + * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS) + */ + if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) { + xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512; + } + } + } +} +#endif + XBZRLECacheStats xbzrle_counters; /* struct contains XBZRLE cache and a static page @@ -731,9 +759,9 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); /* XBZRLE encoding (if there is no overflow) */ - encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, - TARGET_PAGE_SIZE, XBZRLE.encoded_buf, - TARGET_PAGE_SIZE); + encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); /* * Update the cache contents, so that it corresponds to the data diff --git a/migration/xbzrle.c b/migration/xbzrle.c index 1ba482ded9c4ffcfb48fe774396e379bb7726284..c6f8b209175acc31ca77d5fae758a14b9ac2d5f3 100644 --- a/migration/xbzrle.c +++ b/migration/xbzrle.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/host-utils.h" #include "xbzrle.h" /* @@ -174,3 +175,126 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) return d; } + +#if defined(CONFIG_AVX512BW_OPT) +#pragma GCC push_options +#pragma GCC target("avx512bw") +#include +int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen) +{ + uint32_t zrun_len = 0, nzrun_len = 0; + int d = 0, i = 0, num = 0; + uint8_t *nzrun_start = NULL; + /* add 1 to include residual part in main loop */ + uint32_t count512s = (slen >> 6) + 1; + /* countResidual is tail of data, i.e., countResidual = slen % 64 */ + uint32_t count_residual = slen & 0b111111; + bool never_same = true; + uint64_t mask_residual = 1; + mask_residual <<= count_residual; + mask_residual -= 1; + __m512i r = _mm512_set1_epi32(0); + + while (count512s) { + int bytes_to_check = 64; + uint64_t mask = 0xffffffffffffffff; + if (count512s == 1) { + bytes_to_check = count_residual; + mask = mask_residual; + } + __m512i old_data = _mm512_mask_loadu_epi8(r, + mask, old_buf + i); + __m512i new_data = _mm512_mask_loadu_epi8(r, + mask, new_buf + i); + uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data); + count512s--; + + bool is_same = (comp & 0x1); + while (bytes_to_check) { + if (d + 2 > dlen) { + return -1; + } + if (is_same) { + if (nzrun_len) { + d += uleb128_encode_small(dst + d, nzrun_len); + if (d + nzrun_len > dlen) { + return -1; + } + nzrun_start = new_buf + i - nzrun_len; + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + } + /* 64 data at a time for speed */ + if (count512s && (comp == 0xffffffffffffffff)) { + i += 64; + zrun_len += 64; + break; + } + never_same = false; + num = ctz64(~comp); + num = (num < bytes_to_check) ? num : bytes_to_check; + zrun_len += num; + bytes_to_check -= num; + comp >>= num; + i += num; + if (bytes_to_check) { + /* still has different data after same data */ + d += uleb128_encode_small(dst + d, zrun_len); + zrun_len = 0; + } else { + break; + } + } + if (never_same || zrun_len) { + /* + * never_same only acts if + * data begins with diff in first count512s + */ + d += uleb128_encode_small(dst + d, zrun_len); + zrun_len = 0; + never_same = false; + } + /* has diff, 64 data at a time for speed */ + if ((bytes_to_check == 64) && (comp == 0x0)) { + i += 64; + nzrun_len += 64; + break; + } + num = ctz64(comp); + num = (num < bytes_to_check) ? num : bytes_to_check; + nzrun_len += num; + bytes_to_check -= num; + comp >>= num; + i += num; + if (bytes_to_check) { + /* mask like 111000 */ + d += uleb128_encode_small(dst + d, nzrun_len); + /* overflow */ + if (d + nzrun_len > dlen) { + return -1; + } + nzrun_start = new_buf + i - nzrun_len; + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + is_same = true; + } + } + } + + if (nzrun_len != 0) { + d += uleb128_encode_small(dst + d, nzrun_len); + /* overflow */ + if (d + nzrun_len > dlen) { + return -1; + } + nzrun_start = new_buf + i - nzrun_len; + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + } + return d; +} +#pragma GCC pop_options +#endif diff --git a/migration/xbzrle.h b/migration/xbzrle.h index a0db507b9cd9475277749de89a91d2cb98315c21..6feb49160adfff0fd1cdf52e16bf0d6952117954 100644 --- a/migration/xbzrle.h +++ b/migration/xbzrle.h @@ -18,4 +18,8 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst, int dlen); int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); +#if defined(CONFIG_AVX512BW_OPT) +int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +#endif #endif diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 9c91bf93e94cda6baccc53e9c0a3216e870e4020..2669156b284868188392a2da75b203d0ee27cc0d 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1810,6 +1810,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) se->id ? se->id : ""); monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", se->memaddr); monitor_printf(mon, " size: %" PRIu64 "\n", se->size); + monitor_printf(mon, " node: %" PRId64 "\n", se->node); monitor_printf(mon, " memdev: %s\n", se->memdev); break; default: diff --git a/qapi/machine.json b/qapi/machine.json index 067e3f53787928d38566e1e26e1c94ec1481ebf0..a9f33d0f27da136e360cf039b684bf288b8b1010 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1207,12 +1207,15 @@ # # @memdev: memory backend linked with device # +# @node: the numa node (Since: 7.0) +# # Since: 6.2 ## { 'struct': 'SgxEPCDeviceInfo', 'data': { '*id': 'str', 'memaddr': 'size', 'size': 'size', + 'node': 'int', 'memdev': 'str' } } @@ -1285,10 +1288,15 @@ # # @memdev: memory backend linked with device # +# @node: the numa node (Since: 7.0) +# # Since: 6.2 ## { 'struct': 'SgxEPC', - 'data': { 'memdev': 'str' } } + 'data': { 'memdev': 'str', + 'node': 'int' + } +} ## # @SgxEPCProperties: diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 5aa2b95b7d4aa09a4b11a9bbabafa2b30ce4f2ae..4bc45d247416769e54518be006ee74f9fbed6f7a 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -337,6 +337,21 @@ 'if': 'TARGET_ARM' } +## +# @SGXEPCSection: +# +# Information about intel SGX EPC section info +# +# @node: the numa node +# +# @size: the size of EPC section +# +# Since: 7.0 +## +{ 'struct': 'SGXEPCSection', + 'data': { 'node': 'int', + 'size': 'uint64'}} + ## # @SGXInfo: # @@ -351,6 +366,12 @@ # @flc: true if FLC is supported # # @section-size: The EPC section size for guest +# Redundant with @sections. Just for backward compatibility. +# +# @sections: The EPC sections info for guest (Since: 7.0) +# +# Features: +# @deprecated: Member @section-size is deprecated. Use @sections instead. # # Since: 6.2 ## @@ -359,7 +380,9 @@ 'sgx1': 'bool', 'sgx2': 'bool', 'flc': 'bool', - 'section-size': 'uint64'}, + 'section-size': { 'type': 'uint64', + 'features': [ 'deprecated' ] }, + 'sections': ['SGXEPCSection']}, 'if': 'TARGET_I386' } ## @@ -375,7 +398,9 @@ # # -> { "execute": "query-sgx" } # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, -# "flc": true, "section-size" : 0 } } +# "flc": true, "section-size" : 96468992, +# "sections": [{"node": 0, "size": 67108864}, +# {"node": 1, "size": 29360128}]} } # ## { 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } @@ -393,7 +418,9 @@ # # -> { "execute": "query-sgx-capabilities" } # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, -# "flc": true, "section-size" : 0 } } +# "flc": true, "section-size" : 96468992, +# "section" : [{"node": 0, "size": 67108864}, +# {"node": 1, "size": 29360128}]} } # ## { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } diff --git a/qapi/run-state.json b/qapi/run-state.json index 43d66d700fcd202e50b33c9743eee8e9f9f7a98b..08c38b2c67bfa894313d068415cd415bd9c2a379 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -638,3 +638,20 @@ { 'struct': 'MemoryFailureFlags', 'data': { 'action-required': 'bool', 'recursive': 'bool'} } + +## +# @NotifyVmexitOption: +# +# An enumeration of the options specified when enabling notify VM exit +# +# @run: enable the feature, do nothing and continue if the notify VM exit happens. +# +# @internal-error: enable the feature, raise a internal error if the notify +# VM exit happens. +# +# @disable: disable the feature. +# +# Since: 7.2 +## +{ 'enum': 'NotifyVmexitOption', + 'data': [ 'run', 'internal-error', 'disable' ] } \ No newline at end of file diff --git a/qemu-options.hx b/qemu-options.hx index ae2c6dbbfc005c526026604d988612157dd5a7fd..ac61c05162719ebf5bfea32672026cdda03c9971 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -127,11 +127,11 @@ SRST ERST DEF("M", HAS_ARG, QEMU_OPTION_M, - " sgx-epc.0.memdev=memid\n", + " sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n", QEMU_ARCH_ALL) SRST -``sgx-epc.0.memdev=@var{memid}`` +``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}`` Define an SGX EPC section. ERST @@ -152,6 +152,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel, " split-wx=on|off (enable TCG split w^x mapping)\n" " tb-size=n (TCG translation block cache size)\n" " dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n" + " notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n" " thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL) SRST ``-accel name[,prop=value[,...]]`` @@ -203,6 +204,16 @@ SRST is disabled (dirty-ring-size=0). When enabled, KVM will instead record dirty pages in a bitmap. + ``notify-vmexit=run|internal-error|disable,notify-window=n`` + Enables or disables notify VM exit support on x86 host and specify + the corresponding notify window to trigger the VM exit if enabled. + ``run`` option enables the feature. It does nothing and continue + if the exit happens. ``internal-error`` option enables the feature. + It raises a internal error. ``disable`` option doesn't enable the feature. + This feature can mitigate the CPU stuck issue due to event windows don't + open up for a specified of time (i.e. notify-window). + Default: notify-vmexit=run,notify-window=0. + ERST DEF("smp", HAS_ARG, QEMU_OPTION_smp, diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 7a17ff42182fc78b3b88c0551d4108caaa0d2d9c..8c00cce41183632396acb2163f6a1df2b14c9497 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -25,6 +25,9 @@ meson_options_help() { printf "%s\n" ' alsa ALSA sound support' printf "%s\n" ' attr attr/xattr support' printf "%s\n" ' auth-pam PAM access control' + printf "%s\n" ' avx2 AVX2 optimizations' + printf "%s\n" ' avx512bw AVX512BW optimizations' + printf "%s\n" ' avx512f AVX512F optimizations' printf "%s\n" ' bpf eBPF support' printf "%s\n" ' brlapi brlapi character device driver' printf "%s\n" ' bzip2 bzip2 support for DMG images' @@ -107,6 +110,12 @@ _meson_option_parse() { --disable-attr) printf "%s" -Dattr=disabled ;; --enable-auth-pam) printf "%s" -Dauth_pam=enabled ;; --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;; + --enable-avx2) printf "%s" -Davx2=enabled ;; + --disable-avx2) printf "%s" -Davx2=disabled ;; + --enable-avx512bw) printf "%s" -Davx512bw=enabled ;; + --disable-avx512bw) printf "%s" -Davx512bw=disabled ;; + --enable-avx512f) printf "%s" -Davx512f=enabled ;; + --disable-avx512f) printf "%s" -Davx512f=disabled ;; --enable-bpf) printf "%s" -Dbpf=enabled ;; --disable-bpf) printf "%s" -Dbpf=disabled ;; --enable-brlapi) printf "%s" -Dbrlapi=enabled ;; diff --git a/target/arm/kvm.c b/target/arm/kvm.c index bbf1ce7ba3bc337908aa257b259b008a0ed4b5a8..0ca90a58a953a9807850492ae51f26d07b1b0843 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1057,3 +1057,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/i386/cpu.c b/target/i386/cpu.c index aa9e6368004c7490ea63cc28ee3f9fe498771899..8fa43b3ee75101f1b65f0a802d30190f53ab771b 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -574,6 +574,18 @@ static CPUCacheInfo legacy_l3_cache = { #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ +/* CPUID Leaf 0x1D constants: */ +#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1 +#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000 +#define INTEL_AMX_BYTES_PER_TILE 0x400 +#define INTEL_AMX_BYTES_PER_ROW 0x40 +#define INTEL_AMX_TILE_MAX_NAMES 0x8 +#define INTEL_AMX_TILE_MAX_ROWS 0x10 + +/* CPUID Leaf 0x1E constants: */ +#define INTEL_AMX_TMUL_MAX_K 0x10 +#define INTEL_AMX_TMUL_MAX_N 0x40 + void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, uint32_t vendor2, uint32_t vendor3) { @@ -648,8 +660,9 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | \ /* CPUID_7_0_ECX_OSPKE is dynamic */ \ CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS) -#define TCG_7_0_EDX_FEATURES 0 -#define TCG_7_1_EAX_FEATURES 0 +#define TCG_7_0_EDX_FEATURES CPUID_7_0_EDX_FSRM +#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ + CPUID_7_1_EAX_FSRC) #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) @@ -843,8 +856,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx512-vp2intersect", NULL, "md-clear", NULL, NULL, NULL, "serialize", NULL, "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, - NULL, NULL, NULL, "avx512-fp16", - NULL, NULL, "spec-ctrl", "stibp", + NULL, NULL, "amx-bf16", "avx512-fp16", + "amx-tile", "amx-int8", "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", }, .cpuid = { @@ -859,8 +872,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .feat_names = { NULL, NULL, NULL, NULL, "avx-vnni", "avx512-bf16", NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, "fzrm", "fsrs", + "fsrc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -909,7 +922,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { "xsaveopt", "xsavec", "xgetbv1", "xsaves", - NULL, NULL, NULL, NULL, + "xfd", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1192,7 +1205,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .feat_names = { "sgx1", "sgx2", NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "sgx-edeccssa", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1232,7 +1245,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .feat_names = { NULL, "sgx-debug", "sgx-mode64", NULL, "sgx-provisionkey", "sgx-tokenkey", NULL, "sgx-kss", - NULL, NULL, NULL, NULL, + NULL, NULL, "sgx-aex-notify", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1401,6 +1414,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_PKRU_BIT] = { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, .size = sizeof(XSavePKRU) }, + [XSTATE_XTILE_CFG_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILECFG), + }, + [XSTATE_XTILE_DATA_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILEDATA) + }, }; static uint32_t xsave_area_size(uint64_t mask) @@ -3508,6 +3529,135 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .name = "SapphireRapids", + .level = 0x20, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 143, + .stepping = 4, + /* + * please keep the ascending order so that we can have a clear view of + * bit position of each feature. + */ + .features[FEAT_1_EDX] = + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + .features[FEAT_1_ECX] = + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_WBNOINVD, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | + CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | + CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | + CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 | + CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE | + CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16 | + CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC, + .features[FEAT_VMX_BASIC] = + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = + MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 | + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | + MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | + VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = + VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | + VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | + VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | + VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | + VMX_SECONDARY_EXEC_XSAVES, + .features[FEAT_VMX_VMFUNC] = + MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (SapphireRapids)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { /* end of list */ }, + }, + }, { .name = "Denverton", .level = 21, @@ -4952,8 +5102,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) return cpu_list; } -static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - bool migratable_only) +uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + bool migratable_only) { FeatureWordInfo *wi = &feature_word_info[w]; uint64_t r = 0; @@ -5487,6 +5637,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, const ExtSaveArea *esa = &x86_ext_save_areas[count]; *eax = esa->size; *ebx = esa->offset; + *ecx = esa->ecx & + (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); } } break; @@ -5575,6 +5727,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1D: { + /* AMX TILE, for now hardcoded for Sapphire Rapids*/ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *eax = INTEL_AMX_TILE_MAX_SUBLEAF; + } else if (count == 1) { + *eax = INTEL_AMX_TOTAL_TILE_BYTES | + (INTEL_AMX_BYTES_PER_TILE << 16); + *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16); + *ecx = INTEL_AMX_TILE_MAX_ROWS; + } + break; + } + case 0x1E: { + /* AMX TMUL, for now hardcoded for Sapphire Rapids */ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8); + } + break; + } case 0x40000000: /* * CPUID code in kvm_arch_init_vcpu() ignores stuff @@ -5917,6 +6106,7 @@ static void x86_cpu_reset(DeviceState *dev) env->exception_has_payload = false; env->exception_payload = 0; env->nmi_injected = false; + env->triple_fault_pending = false; #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); @@ -5997,6 +6187,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) CPUX86State *env = &cpu->env; int i; uint64_t mask; + static bool request_perm; if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { env->features[FEAT_XSAVE_COMP_LO] = 0; @@ -6012,6 +6203,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) } } + /* Only request permission for first vcpu */ + if (kvm_enabled() && !request_perm) { + kvm_request_xsave_components(cpu, mask); + request_perm = true; + } + env->features[FEAT_XSAVE_COMP_LO] = mask; env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 04f2b790c9fafd9a672b895833dafce082b13ad0..238d7f8656e39c352a9272113a4d46114eb87c57 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -505,6 +505,9 @@ typedef enum X86Seg { #define MSR_VM_HSAVE_PA 0xc0010117 +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 + #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_UMWAIT_CONTROL 0xe1 @@ -537,6 +540,8 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 #define XSTATE_PKRU_BIT 9 +#define XSTATE_XTILE_CFG_BIT 17 +#define XSTATE_XTILE_DATA_BIT 18 #define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT) #define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT) @@ -547,6 +552,17 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) + +#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + +#define ESA_FEATURE_ALIGN64_BIT 1 +#define ESA_FEATURE_XFD_BIT 2 + +#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) +#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) + /* CPUID feature words */ typedef enum FeatureWord { @@ -588,6 +604,8 @@ typedef enum FeatureWord { } FeatureWord; typedef uint64_t FeatureWordArray[FEATURE_WORDS]; +uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + bool migratable_only); /* cpuid_features bits */ #define CPUID_FP87 (1U << 0) @@ -838,8 +856,14 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) +/* AMX_BF16 instruction */ +#define CPUID_7_0_EDX_AMX_BF16 (1U << 22) /* AVX512_FP16 instruction */ #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) +/* AMX tile (two-dimensional register) */ +#define CPUID_7_0_EDX_AMX_TILE (1U << 24) +/* AMX_INT8 instruction */ +#define CPUID_7_0_EDX_AMX_INT8 (1U << 25) /* Speculation Control */ #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Single Thread Indirect Branch Predictors */ @@ -855,6 +879,15 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* Fast Zero REP MOVS */ +#define CPUID_7_1_EAX_FZRM (1U << 10) +/* Fast Short REP STOS */ +#define CPUID_7_1_EAX_FSRS (1U << 11) +/* Fast Short REP CMPS/SCAS */ +#define CPUID_7_1_EAX_FSRC (1U << 12) + +/* XFD Extend Feature Disabled */ +#define CPUID_D_1_EAX_XFD (1U << 4) /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) @@ -1343,6 +1376,16 @@ typedef struct XSavePKRU { uint32_t padding; } XSavePKRU; +/* Ext. save area 17: AMX XTILECFG state */ +typedef struct XSaveXTILECFG { + uint8_t xtilecfg[64]; +} XSaveXTILECFG; + +/* Ext. save area 18: AMX XTILEDATA state */ +typedef struct XSaveXTILEDATA { + uint8_t xtiledata[8][1024]; +} XSaveXTILEDATA; + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); @@ -1350,13 +1393,16 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200); QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); typedef struct ExtSaveArea { uint32_t feature, bits; uint32_t offset, size; + uint32_t ecx; } ExtSaveArea; -#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) +#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1) extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT]; @@ -1494,6 +1540,10 @@ typedef struct CPUX86State { uint64_t opmask_regs[NB_OPMASK_REGS]; YMMReg zmmh_regs[CPU_NB_REGS]; ZMMReg hi16_zmm_regs[CPU_NB_REGS]; +#ifdef TARGET_X86_64 + uint8_t xtilecfg[64]; + uint8_t xtiledata[8192]; +#endif /* sysenter registers */ uint32_t sysenter_cs; @@ -1579,6 +1629,10 @@ typedef struct CPUX86State { uint64_t msr_rtit_cr3_match; uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; + /* Per-VCPU XFD MSRs */ + uint64_t msr_xfd; + uint64_t msr_xfd_err; + /* exception/interrupt handling */ int error_code; int exception_is_int; @@ -1654,6 +1708,7 @@ typedef struct CPUX86State { uint8_t has_error_code; uint8_t exception_has_payload; uint64_t exception_payload; + uint8_t triple_fault_pending; uint32_t ins_len; uint32_t sipi_vector; bool tsc_valid; diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index d95028018e8921bb1d7127861aec06284a609749..7237378a7d4e4121f018bb1977bb88db1f5d8eed 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) static void kvm_cpu_xsave_init(void) { static bool first = true; - KVMState *s = kvm_state; + uint32_t eax, ebx, ecx, edx; int i; if (!first) { @@ -99,12 +99,18 @@ static void kvm_cpu_xsave_init(void) for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { ExtSaveArea *esa = &x86_ext_save_areas[i]; - if (esa->size) { - int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX); - if (sz != 0) { - assert(esa->size == sz); - esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); - } + if (!esa->size) { + continue; + } + if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits) + != esa->bits) { + continue; + } + host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); + if (eax != 0) { + assert(esa->size == eax); + esa->offset = ebx; + esa->ecx = ecx; } } } @@ -165,7 +171,7 @@ static void kvm_cpu_instance_init(CPUState *cs) /* only applies to builtin_x86_defs cpus */ if (!kvm_irqchip_in_kernel()) { x86_cpu_change_kvm_default("x2apic", "off"); - } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) { + } else if (kvm_irqchip_is_split()) { x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on"); } diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c index f6e7e4466e1ab738e4a7dcca08a1ac9c1f60dac7..e052f1c7b0ef25f5db673e5717bfe82c94807496 100644 --- a/target/i386/kvm/kvm-stub.c +++ b/target/i386/kvm/kvm-stub.c @@ -44,3 +44,8 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp) { abort(); } + +void kvm_set_max_apic_id(uint32_t max_apic_id) +{ + return; +} diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 5a698bde19ace17be8d3eff255cfa82fa6b4740e..e0c38dec6055e70d191d28a4d5162a99e53ee279 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -15,8 +15,10 @@ #include "qemu/osdep.h" #include "qapi/qapi-events-run-state.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include #include +#include #include #include "standard-headers/asm-x86/kvm_para.h" @@ -122,9 +124,11 @@ static uint32_t num_architectural_pmu_gp_counters; static uint32_t num_architectural_pmu_fixed_counters; static int has_xsave; +static int has_xsave2; static int has_xcrs; static int has_pit_state2; static int has_exception_payload; +static int has_triple_fault_event; static bool has_msr_mcg_ext_ctl; @@ -346,7 +350,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, { struct kvm_cpuid2 *cpuid; uint32_t ret = 0; - uint32_t cpuid_1_edx; + uint32_t cpuid_1_edx, unused; + uint64_t bitmask; cpuid = get_supported_cpuid(s); @@ -392,10 +397,20 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } else if (function == 6 && reg == R_EAX) { ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */ } else if (function == 7 && index == 0 && reg == R_EBX) { + /* Not new instructions, just an optimization. */ + uint32_t ebx; + host_cpuid(7, 0, &unused, &ebx, &unused, &unused); + ret |= ebx & CPUID_7_0_EBX_ERMS; + if (host_tsx_broken()) { ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); } } else if (function == 7 && index == 0 && reg == R_EDX) { + /* Not new instructions, just an optimization. */ + uint32_t edx; + host_cpuid(7, 0, &unused, &unused, &unused, &edx); + ret |= edx & CPUID_7_0_EDX_FSRM; + /* * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is @@ -404,6 +419,39 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!has_msr_arch_capabs) { ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; } + } else if (function == 7 && index == 1 && reg == R_EAX) { + /* Not new instructions, just an optimization. */ + uint32_t eax; + host_cpuid(7, 1, &eax, &unused, &unused, &unused); + ret |= eax & (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC); + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { + /* + * The value returned by KVM_GET_SUPPORTED_CPUID does not include + * features that still have to be enabled with the arch_prctl + * system call. QEMU needs the full value, which is retrieved + * with KVM_GET_DEVICE_ATTR. + */ + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { + return ret; + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); + if (rc < 0) { + if (rc != -ENXIO) { + warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " + "error: %d", rc); + } + return ret; + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; } else if (function == 0x80000001 && reg == R_ECX) { /* * It's safe to enable TOPOEXT even if it's not returned by @@ -1564,6 +1612,26 @@ static Error *invtsc_mig_blocker; #define KVM_MAX_CPUID_ENTRIES 100 +static void kvm_init_xsave(CPUX86State *env) +{ + if (has_xsave2) { + env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); + } else if (has_xsave) { + env->xsave_buf_len = sizeof(struct kvm_xsave); + } else { + return; + } + + env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); + memset(env->xsave_buf, 0, env->xsave_buf_len); + /* + * The allocated storage must be large enough for all of the + * possible XSAVE state components. + */ + assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= + env->xsave_buf_len); +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -1593,6 +1661,8 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = 0; + has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + r = kvm_arch_set_tsc_khz(cs); if (r < 0) { return r; @@ -1758,7 +1828,9 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; } break; - case 0x14: { + case 0x14: + case 0x1d: + case 0x1e: { uint32_t times; c->function = i; @@ -1980,19 +2052,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (r) { goto fail; } - - if (has_xsave) { - env->xsave_buf_len = sizeof(struct kvm_xsave); - env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); - memset(env->xsave_buf, 0, env->xsave_buf_len); - - /* - * The allocated storage must be large enough for all of the - * possible XSAVE state components. - */ - assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) - <= env->xsave_buf_len); - } + kvm_init_xsave(env); max_nested_state_len = kvm_max_nested_state_length(); if (max_nested_state_len > 0) { @@ -2337,6 +2397,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + has_triple_fault_event = kvm_check_extension(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT); + if (has_triple_fault_event) { + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable triple fault event cap: %s", + strerror(-ret)); + return ret; + } + } + ret = kvm_get_supported_msrs(s); if (ret < 0) { return ret; @@ -2442,6 +2512,21 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE && + kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) { + uint64_t notify_window_flags = + ((uint64_t)s->notify_window << 32) | + KVM_X86_NOTIFY_VMEXIT_ENABLED | + KVM_X86_NOTIFY_VMEXIT_USER; + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0, + notify_window_flags); + if (ret < 0) { + error_report("kvm: Failed to enable notify vmexit cap: %s", + strerror(-ret)); + return ret; + } + } + return 0; } @@ -3185,6 +3270,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) env->msr_ia32_sgxlepubkeyhash[3]); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, + env->msr_xfd); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, + env->msr_xfd_err); + } + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ } @@ -3240,13 +3332,14 @@ static int kvm_get_xsave(X86CPU *cpu) { CPUX86State *env = &cpu->env; void *xsave = env->xsave_buf; - int ret; + int type, ret; if (!has_xsave) { return kvm_get_fpu(cpu); } - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); + type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; + ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); if (ret < 0) { return ret; } @@ -3535,6 +3628,11 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); + } + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); if (ret < 0) { return ret; @@ -3831,6 +3929,12 @@ static int kvm_get_msrs(X86CPU *cpu) env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = msrs[i].data; break; + case MSR_IA32_XFD: + env->msr_xfd = msrs[i].data; + break; + case MSR_IA32_XFD_ERR: + env->msr_xfd_err = msrs[i].data; + break; } } @@ -3942,6 +4046,11 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) } } + if (has_triple_fault_event) { + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = env->triple_fault_pending; + } + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); } @@ -4011,6 +4120,10 @@ static int kvm_get_vcpu_events(X86CPU *cpu) } } + if (events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) { + env->triple_fault_pending = events.triple_fault.pending; + } + env->sipi_vector = events.sipi_vector; return 0; @@ -4757,6 +4870,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) X86CPU *cpu = X86_CPU(cs); uint64_t code; int ret; + bool ctx_invalid; + char str[256]; + KVMState *state; switch (run->exit_reason) { case KVM_EXIT_HLT: @@ -4812,6 +4928,21 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) /* already handled in kvm_arch_post_run */ ret = 0; break; + case KVM_EXIT_NOTIFY: + ctx_invalid = !!(run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID); + state = KVM_STATE(current_accel()); + sprintf(str, "Encounter a notify exit with %svalid context in" + " guest. There can be possible misbehaves in guest." + " Please have a look.", ctx_invalid ? "in" : ""); + if (ctx_invalid || + state->notify_vmexit == NOTIFY_VMEXIT_OPTION_INTERNAL_ERROR) { + warn_report("KVM internal error: %s", str); + ret = -1; + } else { + warn_report_once("KVM: %s", str); + ret = 0; + } + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -5050,3 +5181,112 @@ bool kvm_arch_cpu_check_are_resettable(void) { return !sev_es_enabled(); } + +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +{ + KVMState *s = kvm_state; + uint64_t supported; + + mask &= XSTATE_DYNAMIC_MASK; + if (!mask) { + return; + } + /* + * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. + * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned + * about them already because they are not supported features. + */ + supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); + supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; + mask &= supported; + + while (mask) { + int bit = ctz64(mask); + int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); + if (rc) { + /* + * Older kernel version (<5.17) do not support + * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return + * any dynamic feature from kvm_arch_get_supported_cpuid. + */ + warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " + "for feature bit %d", bit); + } + mask &= ~BIT_ULL(bit); + } +} + +static int kvm_arch_get_notify_vmexit(Object *obj, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + return s->notify_vmexit; +} + +static void kvm_arch_set_notify_vmexit(Object *obj, int value, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + s->notify_vmexit = value; +} + +static void kvm_arch_get_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint32_t value = s->notify_window; + + visit_type_uint32(v, name, &value, errp); +} + +static void kvm_arch_set_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + uint32_t value; + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + visit_type_uint32(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + + s->notify_window = value; +} + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ + object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption", + &NotifyVmexitOption_lookup, + kvm_arch_get_notify_vmexit, + kvm_arch_set_notify_vmexit); + object_class_property_set_description(oc, "notify-vmexit", + "Enable notify VM exit"); + + object_class_property_add(oc, "notify-window", "uint32", + kvm_arch_get_notify_window, + kvm_arch_set_notify_window, + NULL, NULL); + object_class_property_set_description(oc, "notify-window", + "Clock cycles without an event window " + "after which a notification VM exit occurs"); +} + +void kvm_set_max_apic_id(uint32_t max_apic_id) +{ + kvm_vm_enable_cap(kvm_state, KVM_CAP_MAX_VCPU_ID, 0, max_apic_id); +} diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index a978509d507fa3a3333ca92580c7497ce61ea783..58590138e5ee9dbe06a76e201fc6be27d06b4fd5 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -52,5 +52,8 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); bool kvm_enable_sgx_provisioning(KVMState *s); +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + +void kvm_set_max_apic_id(uint32_t max_apic_id); #endif diff --git a/target/i386/machine.c b/target/i386/machine.c index 83c2b91529bf1ad17a1d24fe0a6cea4b1d08ac35..41cf5c00534c139c664c77dc2d3b1bbf31596a91 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1455,6 +1455,67 @@ static const VMStateDescription vmstate_msr_intel_sgx = { } }; +static bool xfd_msrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD); +} + +static const VMStateDescription vmstate_msr_xfd = { + .name = "cpu/msr_xfd", + .version_id = 1, + .minimum_version_id = 1, + .needed = xfd_msrs_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.msr_xfd, X86CPU), + VMSTATE_UINT64(env.msr_xfd_err, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +#ifdef TARGET_X86_64 +static bool amx_xtile_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE); +} + +static const VMStateDescription vmstate_amx_xtile = { + .name = "cpu/intel_amx_xtile", + .version_id = 1, + .minimum_version_id = 1, + .needed = amx_xtile_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64), + VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192), + VMSTATE_END_OF_LIST() + } +}; +#endif + +static bool triple_fault_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->triple_fault_pending; +} + +static const VMStateDescription vmstate_triple_fault = { + .name = "cpu/triple_fault", + .version_id = 1, + .minimum_version_id = 1, + .needed = triple_fault_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.triple_fault_pending, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1593,6 +1654,11 @@ const VMStateDescription vmstate_x86_cpu = { #endif &vmstate_msr_tsx_ctrl, &vmstate_msr_intel_sgx, + &vmstate_msr_xfd, +#ifdef TARGET_X86_64 + &vmstate_amx_xtile, +#endif + &vmstate_triple_fault, NULL } }; diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c index ac61a963440627e6d61a647a772a734c971c0a21..996e9f3bfef53b14a55963db4ece3254bb33b470 100644 --- a/target/i386/xsave_helper.c +++ b/target/i386/xsave_helper.c @@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) memcpy(pkru, &env->pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); + } #endif } @@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen) pkru = buf + e->offset; memcpy(&env->pkru, pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + const XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + const XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); + } #endif } diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 086debd9f013737f9a0e889afc712adb4c3f8f74..f80ac72dd1855ea9fbcb518313f3eb925b83ccc7 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1295,3 +1295,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index dc93b99189ea242fa8c05c95f91dc08fed999c9b..da2dcea1e25b61b566b8077175130a3f1a7226ba 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2959,3 +2959,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 5b1fdb55c47747f1145fe72af88391b0d92515b8..671d0f179c7524f68773b9501091de6698a352f5 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2562,3 +2562,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/tests/bench/meson.build b/tests/bench/meson.build index 00b3c209dcbd16958a32283c86e8cb5f3ae2d5cb..54bc8938a8addaed294cb1311b409d9fee602a01 100644 --- a/tests/bench/meson.build +++ b/tests/bench/meson.build @@ -3,6 +3,12 @@ qht_bench = executable('qht-bench', sources: 'qht-bench.c', dependencies: [qemuutil]) +if have_system +xbzrle_bench = executable('xbzrle-bench', + sources: 'xbzrle-bench.c', + dependencies: [qemuutil,migration]) +endif + executable('atomic_add-bench', sources: files('atomic_add-bench.c'), dependencies: [qemuutil], diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c new file mode 100644 index 0000000000000000000000000000000000000000..8848a3a32d7ef591f8eae2cbbc898f92af07f549 --- /dev/null +++ b/tests/bench/xbzrle-bench.c @@ -0,0 +1,469 @@ +/* + * Xor Based Zero Run Length Encoding unit tests. + * + * Copyright 2013 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Orit Wasserman + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "../migration/xbzrle.h" + +#if defined(CONFIG_AVX512BW_OPT) +#define XBZRLE_PAGE_SIZE 4096 +static bool is_cpu_support_avx512bw; +#include "qemu/cpuid.h" +static void __attribute__((constructor)) init_cpu_flag(void) +{ + unsigned max = __get_cpuid_max(0, NULL); + int a, b, c, d; + is_cpu_support_avx512bw = false; + if (max >= 1) { + __cpuid(1, a, b, c, d); + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) { + int bv; + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0)); + __cpuid_count(7, 0, a, b, c, d); + /* 0xe6: + * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 + * and ZMM16-ZMM31 state are enabled by OS) + * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS) + */ + if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) { + is_cpu_support_avx512bw = true; + } + } + } + return ; +} + +struct ResTime { + float t_raw; + float t_512; +}; + + +/* Function prototypes +int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +*/ +static void encode_decode_zero(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0; + int dlen = 0, dlen512 = 0; + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006); + + for (i = diff_len; i > 0; i--) { + buffer[1000 + i] = i; + buffer512[1000 + i] = i; + } + + buffer[1000 + diff_len + 3] = 103; + buffer[1000 + diff_len + 5] = 105; + + buffer512[1000 + diff_len + 3] = 103; + buffer512[1000 + diff_len + 5] = 105; + + /* encode zero page */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(dlen == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(dlen512 == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(buffer512); + g_free(compressed512); + +} + +static void test_encode_decode_zero_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_zero(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Zero test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_unchanged(struct ResTime *res) +{ + uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0; + int dlen = 0, dlen512 = 0; + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006); + + for (i = diff_len; i > 0; i--) { + test[1000 + i] = i + 4; + test512[1000 + i] = i + 4; + } + + test[1000 + diff_len + 3] = 107; + test[1000 + diff_len + 5] = 109; + + test512[1000 + diff_len + 3] = 107; + test512[1000 + diff_len + 5] = 109; + + /* test unchanged buffer */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(dlen == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(dlen512 == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(test); + g_free(compressed); + g_free(test512); + g_free(compressed512); + +} + +static void test_encode_decode_unchanged_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_unchanged(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Unchanged test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_1_byte(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE); + int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0; + uint8_t buf[2]; + uint8_t buf512[2]; + + test[XBZRLE_PAGE_SIZE - 1] = 1; + test512[XBZRLE_PAGE_SIZE - 1] = 1; + + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2)); + + rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE); + g_assert(rc == XBZRLE_PAGE_SIZE); + g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2)); + + rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512, + XBZRLE_PAGE_SIZE); + g_assert(rc512 == XBZRLE_PAGE_SIZE); + g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_1_byte_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_1_byte(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("1 byte test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_overflow(struct ResTime *res) +{ + uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0, rc = 0, rc512 = 0; + + for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) { + test[i * 2] = 1; + test512[i * 2] = 1; + } + + /* encode overflow */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(rc == -1); + + t_start512 = clock(); + rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(rc512 == -1); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_overflow_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_overflow(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Overflow test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_range_avx512(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0, rc = 0, rc512 = 0; + int dlen = 0, dlen512 = 0; + + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006); + + for (i = diff_len; i > 0; i--) { + buffer[1000 + i] = i; + test[1000 + i] = i + 4; + buffer512[1000 + i] = i; + test512[1000 + i] = i + 4; + } + + buffer[1000 + diff_len + 3] = 103; + test[1000 + diff_len + 3] = 107; + + buffer[1000 + diff_len + 5] = 105; + test[1000 + diff_len + 5] = 109; + + buffer512[1000 + diff_len + 3] = 103; + test512[1000 + diff_len + 3] = 107; + + buffer512[1000 + diff_len + 5] = 105; + test512[1000 + diff_len + 5] = 109; + + /* test encode/decode */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE); + g_assert(rc < XBZRLE_PAGE_SIZE); + g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE); + g_assert(rc512 < XBZRLE_PAGE_SIZE); + g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_range_avx512(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Encode decode test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_random(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0, rc = 0, rc512 = 0; + int dlen = 0, dlen512 = 0; + + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1); + /* store the index of diff */ + int dirty_index[diff_len]; + for (int j = 0; j < diff_len; j++) { + dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1); + } + for (i = diff_len - 1; i >= 0; i--) { + buffer[dirty_index[i]] = i; + test[dirty_index[i]] = i + 4; + buffer512[dirty_index[i]] = i; + test512[dirty_index[i]] = i + 4; + } + + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE); + g_assert(rc < XBZRLE_PAGE_SIZE); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE); + g_assert(rc512 < XBZRLE_PAGE_SIZE); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_random_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_random(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Random test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} +#endif + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_rand_int(); + #if defined(CONFIG_AVX512BW_OPT) + if (likely(is_cpu_support_avx512bw)) { + g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512); + g_test_add_func("/xbzrle/encode_decode_unchanged", + test_encode_decode_unchanged_avx512); + g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512); + g_test_add_func("/xbzrle/encode_decode_overflow", + test_encode_decode_overflow_avx512); + g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512); + g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512); + } + #endif + return g_test_run(); +} diff --git a/tests/unit/test-xbzrle.c b/tests/unit/test-xbzrle.c index 795d6f1cbabb0761206aa812f9319155481e2e09..baa364b443b648aff73c32ebdb1bd8749e3e34b2 100644 --- a/tests/unit/test-xbzrle.c +++ b/tests/unit/test-xbzrle.c @@ -17,6 +17,35 @@ #define XBZRLE_PAGE_SIZE 4096 +int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, + uint8_t *, int) = xbzrle_encode_buffer; +#if defined(CONFIG_AVX512BW_OPT) +#include "qemu/cpuid.h" +static void __attribute__((constructor)) init_cpu_flag(void) +{ + unsigned max = __get_cpuid_max(0, NULL); + int a, b, c, d; + if (max >= 1) { + __cpuid(1, a, b, c, d); + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) { + int bv; + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0)); + __cpuid_count(7, 0, a, b, c, d); + /* 0xe6: + * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 + * and ZMM16-ZMM31 state are enabled by OS) + * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS) + */ + if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) { + xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512; + } + } + } + return ; +} +#endif + static void test_uleb(void) { uint32_t i, val; @@ -55,7 +84,7 @@ static void test_encode_decode_zero(void) buffer[1000 + diff_len + 5] = 105; /* encode zero page */ - dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(dlen == 0); @@ -79,7 +108,7 @@ static void test_encode_decode_unchanged(void) test[1000 + diff_len + 5] = 109; /* test unchanged buffer */ - dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(dlen == 0); @@ -97,7 +126,7 @@ static void test_encode_decode_1_byte(void) test[XBZRLE_PAGE_SIZE - 1] = 1; - dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2)); @@ -122,7 +151,7 @@ static void test_encode_decode_overflow(void) } /* encode overflow */ - rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(rc == -1); @@ -153,7 +182,7 @@ static void encode_decode_range(void) test[1000 + diff_len + 5] = 109; /* test encode/decode */ - dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);