diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 10af4170d9381f482edfd8f13dacc7c607a73545..328fccd7fac1ab9f1e97518282cc17d2a5ee297f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -73,86 +73,12 @@ do { } while (0) #endif -#define KVM_MSI_HASHTAB_SIZE 256 - struct KVMParkedVcpu { unsigned long vcpu_id; int kvm_fd; QLIST_ENTRY(KVMParkedVcpu) node; }; -enum KVMDirtyRingReaperState { - KVM_DIRTY_RING_REAPER_NONE = 0, - /* The reaper is sleeping */ - KVM_DIRTY_RING_REAPER_WAIT, - /* The reaper is reaping for dirty pages */ - KVM_DIRTY_RING_REAPER_REAPING, -}; - -/* - * KVM reaper instance, responsible for collecting the KVM dirty bits - * via the dirty ring. - */ -struct KVMDirtyRingReaper { - /* The reaper thread */ - QemuThread reaper_thr; - volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ - volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ -}; - -struct KVMState -{ - AccelState parent_obj; - - int nr_slots; - int fd; - int vmfd; - int coalesced_mmio; - int coalesced_pio; - struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; - bool coalesced_flush_in_progress; - int vcpu_events; - int robust_singlestep; - int debugregs; -#ifdef KVM_CAP_SET_GUEST_DEBUG - QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; -#endif - int max_nested_state_len; - int many_ioeventfds; - int intx_set_mask; - int kvm_shadow_mem; - bool kernel_irqchip_allowed; - bool kernel_irqchip_required; - OnOffAuto kernel_irqchip_split; - bool sync_mmu; - uint64_t manual_dirty_log_protect; - /* The man page (and posix) say ioctl numbers are signed int, but - * they're not. Linux, glibc and *BSD all treat ioctl numbers as - * unsigned, and treating them as signed here can break things */ - unsigned irq_set_ioctl; - unsigned int sigmask_len; - GHashTable *gsimap; -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing *irq_routes; - int nr_allocated_irq_routes; - unsigned long *used_gsi_bitmap; - unsigned int gsi_count; - QTAILQ_HEAD(, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; -#endif - KVMMemoryListener memory_listener; - QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus; - - /* For "info mtree -f" to tell if an MR is registered in KVM */ - int nr_as; - struct KVMAs { - KVMMemoryListener *ml; - AddressSpace *as; - } *as; - uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ - uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ - struct KVMDirtyRingReaper reaper; -}; - KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_split_irqchip; @@ -3732,6 +3658,8 @@ static void kvm_accel_instance_init(Object *obj) s->kernel_irqchip_split = ON_OFF_AUTO_AUTO; /* KVM dirty ring is by default off */ s->kvm_dirty_ring_size = 0; + s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN; + s->notify_window = 0; } static void kvm_accel_class_init(ObjectClass *oc, void *data) @@ -3759,6 +3687,8 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, "dirty-ring-size", "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)"); + + kvm_arch_accel_class_init(oc); } static const TypeInfo kvm_accel_type = { diff --git a/configure b/configure index 6f4057f888667c5efb0a527b0f0747fc8f194af6..571092a8221ce43bd872408782db4a452d7b74fd 100755 --- a/configure +++ b/configure @@ -330,8 +330,6 @@ qom_cast_debug="yes" trace_backends="log" trace_file="trace" opengl="$default_feature" -cpuid_h="no" -avx2_opt="$default_feature" guest_agent="$default_feature" guest_agent_with_vss="no" guest_agent_ntddscsi="no" @@ -1063,14 +1061,6 @@ for opt do ;; --disable-tools) want_tools="no" ;; - --disable-avx2) avx2_opt="no" - ;; - --enable-avx2) avx2_opt="yes" - ;; - --disable-avx512f) avx512f_opt="no" - ;; - --enable-avx512f) avx512f_opt="yes" - ;; --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane) echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2 ;; @@ -1468,8 +1458,6 @@ cat << EOF tpm TPM support libssh ssh block device support numa libnuma support - avx2 AVX2 optimization support - avx512f AVX512F optimization support replication replication support opengl opengl support xfsctl xfsctl support @@ -2908,85 +2896,6 @@ else # "$safe_stack" = "" fi fi -######################################## -# check if cpuid.h is usable. - -cat > $TMPC << EOF -#include -int main(void) { - unsigned a, b, c, d; - int max = __get_cpuid_max(0, 0); - - if (max >= 1) { - __cpuid(1, a, b, c, d); - } - - if (max >= 7) { - __cpuid_count(7, 0, a, b, c, d); - } - - return 0; -} -EOF -if compile_prog "" "" ; then - cpuid_h=yes -fi - -########################################## -# avx2 optimization requirement check -# -# There is no point enabling this if cpuid.h is not usable, -# since we won't be able to select the new routines. - -if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then - cat > $TMPC << EOF -#pragma GCC push_options -#pragma GCC target("avx2") -#include -#include -static int bar(void *a) { - __m256i x = *(__m256i *)a; - return _mm256_testz_si256(x, x); -} -int main(int argc, char *argv[]) { return bar(argv[0]); } -EOF - if compile_object "-Werror" ; then - avx2_opt="yes" - else - avx2_opt="no" - fi -fi - -########################################## -# avx512f optimization requirement check -# -# There is no point enabling this if cpuid.h is not usable, -# since we won't be able to select the new routines. -# by default, it is turned off. -# if user explicitly want to enable it, check environment - -if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then - cat > $TMPC << EOF -#pragma GCC push_options -#pragma GCC target("avx512f") -#include -#include -static int bar(void *a) { - __m512i x = *(__m512i *)a; - return _mm512_test_epi64_mask(x, x); -} -int main(int argc, char *argv[]) -{ - return bar(argv[0]); -} -EOF - if ! compile_object "-Werror" ; then - avx512f_opt="no" - fi -else - avx512f_opt="no" -fi - ######################################## # check if __[u]int128_t is usable. @@ -3608,14 +3517,6 @@ if test "$opengl" = "yes" ; then echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak fi -if test "$avx2_opt" = "yes" ; then - echo "CONFIG_AVX2_OPT=y" >> $config_host_mak -fi - -if test "$avx512f_opt" = "yes" ; then - echo "CONFIG_AVX512F_OPT=y" >> $config_host_mak -fi - # XXX: suppress that if [ "$bsd" = "yes" ] ; then echo "CONFIG_BSD=y" >> $config_host_mak @@ -3648,10 +3549,6 @@ if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then echo "CONFIG_TSAN=y" >> $config_host_mak fi -if test "$cpuid_h" = "yes" ; then - echo "CONFIG_CPUID_H=y" >> $config_host_mak -fi - if test "$int128" = "yes" ; then echo "CONFIG_INT128=y" >> $config_host_mak fi diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 357257349be2ec386e96a9bcefc2dd5a9b7c6439..5b926bed11f419ed4eea6bfd2faf353b23a33ee4 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1035,14 +1035,6 @@ void pc_machine_done(Notifier *notifier, void *data) /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); } - - - if (x86ms->apic_id_limit > 255 && !xen_enabled() && - !kvm_irqchip_in_kernel()) { - error_report("current -smp configuration requires kernel " - "irqchip support."); - exit(EXIT_FAILURE); - } } void pc_guest_info_init(PCMachineState *pcms) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index b84840a1bb99a9072375f9784dc5068c23fb00ca..a3258d78facf41c63fdad6c8c6f21da82ad1938e 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -39,6 +39,7 @@ #include "sysemu/replay.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-timers.h" +#include "sysemu/xen.h" #include "trace.h" #include "hw/i386/x86.h" @@ -136,6 +137,25 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) */ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ms->smp.max_cpus - 1) + 1; + + /* + * Can we support APIC ID 255 or higher? + * + * Under Xen: yes. + * With userspace emulated lapic: no + * With KVM's in-kernel lapic: only if X2APIC API is enabled. + */ + if (x86ms->apic_id_limit > 255 && !xen_enabled() && + (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { + error_report("current -smp configuration requires kernel " + "irqchip and X2APIC API support."); + exit(EXIT_FAILURE); + } + + if (kvm_enabled()) { + kvm_set_max_apic_id(x86ms->apic_id_limit); + } + possible_cpus = mc->possible_cpu_arch_ids(ms); for (i = 0; i < ms->smp.cpus; i++) { x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index f050dfa1f75ded658005fb88f83b9c4b48ef30a7..b66970e07b4243e3e2f7333be650cb9fc0a48078 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -343,6 +343,8 @@ bool kvm_device_supported(int vmfd, uint64_t type); extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; +void kvm_arch_accel_class_init(ObjectClass *oc); + void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 7e18c0a3c0a727fb1b191d61c1cbe7e5b96207cd..60b520a13e84e0e710e5575c850ddad5e5ab26fa 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -10,6 +10,7 @@ #define QEMU_KVM_INT_H #include "exec/memory.h" +#include "qapi/qapi-types-common.h" #include "qemu/accel.h" #include "qemu/queue.h" #include "sysemu/kvm.h" @@ -44,6 +45,81 @@ typedef struct KVMMemoryListener { QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; } KVMMemoryListener; +#define KVM_MSI_HASHTAB_SIZE 256 + +enum KVMDirtyRingReaperState { + KVM_DIRTY_RING_REAPER_NONE = 0, + /* The reaper is sleeping */ + KVM_DIRTY_RING_REAPER_WAIT, + /* The reaper is reaping for dirty pages */ + KVM_DIRTY_RING_REAPER_REAPING, +}; + +/* + * KVM reaper instance, responsible for collecting the KVM dirty bits + * via the dirty ring. + */ +struct KVMDirtyRingReaper { + /* The reaper thread */ + QemuThread reaper_thr; + volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ + volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ +}; +struct KVMState +{ + AccelState parent_obj; + + int nr_slots; + int fd; + int vmfd; + int coalesced_mmio; + int coalesced_pio; + struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; + bool coalesced_flush_in_progress; + int vcpu_events; + int robust_singlestep; + int debugregs; +#ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; +#endif + int max_nested_state_len; + int many_ioeventfds; + int intx_set_mask; + int kvm_shadow_mem; + bool kernel_irqchip_allowed; + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as + * unsigned, and treating them as signed here can break things */ + unsigned irq_set_ioctl; + unsigned int sigmask_len; + GHashTable *gsimap; +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *irq_routes; + int nr_allocated_irq_routes; + unsigned long *used_gsi_bitmap; + unsigned int gsi_count; + QTAILQ_HEAD(, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; +#endif + KVMMemoryListener memory_listener; + QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus; + + /* For "info mtree -f" to tell if an MR is registered in KVM */ + int nr_as; + struct KVMAs { + KVMMemoryListener *ml; + AddressSpace *as; + } *as; + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + struct KVMDirtyRingReaper reaper; + NotifyVmexitOption notify_vmexit; + uint32_t notify_window; +}; + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, AddressSpace *as, int as_id, const char *name); diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index bf6e96011dfed4e2e8582e511c08b5b6cbd723b6..8791f7c228e1b449091849694d992c2d0197b090 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -325,6 +325,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 #define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 +#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -359,7 +360,10 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u8 reserved[27]; + struct { + __u8 pending; + } triple_fault; + __u8 reserved[26]; __u8 exception_has_payload; __u64 exception_payload; }; diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 02bba131e6c9cb0634e1d428f4c06da1e27db692..eb069b0f31d7fc1ff755426118ae0f903253293c 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -270,6 +270,7 @@ struct kvm_xen_exit { #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 +#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -492,6 +493,11 @@ struct kvm_run { unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1152,6 +1158,8 @@ struct kvm_ppc_resize_hpt { /* #define KVM_CAP_VM_TSC_CONTROL 214 */ #define KVM_CAP_SYSTEM_EVENT_DATA 215 #define KVM_CAP_S390_PROTECTED_DUMP 217 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 #define KVM_CAP_SEV_ES_GHCB 500 @@ -2285,4 +2293,8 @@ struct kvm_s390_zpci_op { /* flags for kvm_s390_zpci_op->u.reg_aen.flags */ #define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + #endif /* __LINUX_KVM_H */ diff --git a/meson.build b/meson.build index 4d4fe04bd05880e4a286f9e99109f8add1ccf82c..869a61a5fb864ecef1558f2c4919edfc6e8d60d9 100644 --- a/meson.build +++ b/meson.build @@ -1750,6 +1750,52 @@ config_host_data.set('CONFIG_GETAUXVAL', cc.links(gnu_source_prefix + ''' return getauxval(AT_HWCAP) == 0; }''')) +have_cpuid_h = cc.links(''' + #include + int main(void) { + unsigned a, b, c, d; + unsigned max = __get_cpuid_max(0, 0); + + if (max >= 1) { + __cpuid(1, a, b, c, d); + } + + if (max >= 7) { + __cpuid_count(7, 0, a, b, c, d); + } + + return 0; + }''') +config_host_data.set('CONFIG_CPUID_H', have_cpuid_h) + +config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \ + .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \ + .require(cc.links(''' + #pragma GCC push_options + #pragma GCC target("avx2") + #include + #include + static int bar(void *a) { + __m256i x = *(__m256i *)a; + return _mm256_testz_si256(x, x); + } + int main(int argc, char *argv[]) { return bar(argv[0]); } + '''), error_message: 'AVX2 not available').allowed()) + +config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \ + .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \ + .require(cc.links(''' + #pragma GCC push_options + #pragma GCC target("avx512f") + #include + #include + static int bar(void *a) { + __m512i x = *(__m512i *)a; + return _mm512_test_epi64_mask(x, x); + } + int main(int argc, char *argv[]) { return bar(argv[0]); } + '''), error_message: 'AVX512F not available').allowed()) + config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + ''' #include #include @@ -1770,6 +1816,22 @@ config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + ''' return -1; }''')) +config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \ + .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \ + .require(cc.links(''' + #pragma GCC push_options + #pragma GCC target("avx512bw") + #include + #include + static int bar(void *a) { + + __m512i *x = a; + __m512i res= _mm512_abs_epi8(*x); + return res[1]; + } + int main(int argc, char *argv[]) { return bar(argv[0]); } + '''), error_message: 'AVX512BW not available').allowed()) + ignored = ['CONFIG_QEMU_INTERP_PREFIX', # actually per-target 'HAVE_GDB_BIN'] arrays = ['CONFIG_BDRV_RW_WHITELIST', 'CONFIG_BDRV_RO_WHITELIST'] @@ -3274,8 +3336,9 @@ summary_info += {'membarrier': config_host.has_key('CONFIG_MEMBARRIER')} summary_info += {'debug stack usage': config_host.has_key('CONFIG_DEBUG_STACK_USAGE')} summary_info += {'mutex debugging': config_host.has_key('CONFIG_DEBUG_MUTEX')} summary_info += {'memory allocator': get_option('malloc')} -summary_info += {'avx2 optimization': config_host.has_key('CONFIG_AVX2_OPT')} -summary_info += {'avx512f optimization': config_host.has_key('CONFIG_AVX512F_OPT')} +summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')} +summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')} +summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')} summary_info += {'gprof enabled': config_host.has_key('CONFIG_GPROF')} summary_info += {'gcov': get_option('b_coverage')} summary_info += {'thread sanitizer': config_host.has_key('CONFIG_TSAN')} diff --git a/meson_options.txt b/meson_options.txt index e3923237322a7bac13915c439d4944136afe61bb..ec9c3c0a05e2e6ab545320242e2e105bffa4f38f 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -66,6 +66,12 @@ option('cfi_debug', type: 'boolean', value: 'false', description: 'Verbose errors in case of CFI violation') option('multiprocess', type: 'feature', value: 'auto', description: 'Out of process device emulation support') +option('avx2', type: 'feature', value: 'auto', + description: 'AVX2 optimizations') +option('avx512f', type: 'feature', value: 'disabled', + description: 'AVX512F optimizations') +option('avx512bw', type: 'feature', value: 'auto', + description: 'AVX512BW optimizations') option('attr', type : 'feature', value : 'auto', description: 'attr/xattr support') diff --git a/migration/ram.c b/migration/ram.c index 727fe801dbe995e4615deb1ecaad6f159b7d099e..efb6b1560489172dc6e0ab4ba84a4d9415820eab 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -102,6 +102,34 @@ static inline bool is_zero_range(uint8_t *p, uint64_t size) return buffer_is_zero(p, size); } +int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, + uint8_t *, int) = xbzrle_encode_buffer; +#if defined(CONFIG_AVX512BW_OPT) +#include "qemu/cpuid.h" +static void __attribute__((constructor)) init_cpu_flag(void) +{ + unsigned max = __get_cpuid_max(0, NULL); + int a, b, c, d; + if (max >= 1) { + __cpuid(1, a, b, c, d); + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) { + int bv; + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0)); + __cpuid_count(7, 0, a, b, c, d); + /* 0xe6: + * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 + * and ZMM16-ZMM31 state are enabled by OS) + * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS) + */ + if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) { + xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512; + } + } + } +} +#endif + XBZRLECacheStats xbzrle_counters; /* struct contains XBZRLE cache and a static page @@ -766,9 +794,9 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); /* XBZRLE encoding (if there is no overflow) */ - encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, - TARGET_PAGE_SIZE, XBZRLE.encoded_buf, - TARGET_PAGE_SIZE); + encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); /* * Update the cache contents, so that it corresponds to the data diff --git a/migration/xbzrle.c b/migration/xbzrle.c index 1ba482ded9c4ffcfb48fe774396e379bb7726284..c6f8b209175acc31ca77d5fae758a14b9ac2d5f3 100644 --- a/migration/xbzrle.c +++ b/migration/xbzrle.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/host-utils.h" #include "xbzrle.h" /* @@ -174,3 +175,126 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) return d; } + +#if defined(CONFIG_AVX512BW_OPT) +#pragma GCC push_options +#pragma GCC target("avx512bw") +#include +int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen) +{ + uint32_t zrun_len = 0, nzrun_len = 0; + int d = 0, i = 0, num = 0; + uint8_t *nzrun_start = NULL; + /* add 1 to include residual part in main loop */ + uint32_t count512s = (slen >> 6) + 1; + /* countResidual is tail of data, i.e., countResidual = slen % 64 */ + uint32_t count_residual = slen & 0b111111; + bool never_same = true; + uint64_t mask_residual = 1; + mask_residual <<= count_residual; + mask_residual -= 1; + __m512i r = _mm512_set1_epi32(0); + + while (count512s) { + int bytes_to_check = 64; + uint64_t mask = 0xffffffffffffffff; + if (count512s == 1) { + bytes_to_check = count_residual; + mask = mask_residual; + } + __m512i old_data = _mm512_mask_loadu_epi8(r, + mask, old_buf + i); + __m512i new_data = _mm512_mask_loadu_epi8(r, + mask, new_buf + i); + uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data); + count512s--; + + bool is_same = (comp & 0x1); + while (bytes_to_check) { + if (d + 2 > dlen) { + return -1; + } + if (is_same) { + if (nzrun_len) { + d += uleb128_encode_small(dst + d, nzrun_len); + if (d + nzrun_len > dlen) { + return -1; + } + nzrun_start = new_buf + i - nzrun_len; + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + } + /* 64 data at a time for speed */ + if (count512s && (comp == 0xffffffffffffffff)) { + i += 64; + zrun_len += 64; + break; + } + never_same = false; + num = ctz64(~comp); + num = (num < bytes_to_check) ? num : bytes_to_check; + zrun_len += num; + bytes_to_check -= num; + comp >>= num; + i += num; + if (bytes_to_check) { + /* still has different data after same data */ + d += uleb128_encode_small(dst + d, zrun_len); + zrun_len = 0; + } else { + break; + } + } + if (never_same || zrun_len) { + /* + * never_same only acts if + * data begins with diff in first count512s + */ + d += uleb128_encode_small(dst + d, zrun_len); + zrun_len = 0; + never_same = false; + } + /* has diff, 64 data at a time for speed */ + if ((bytes_to_check == 64) && (comp == 0x0)) { + i += 64; + nzrun_len += 64; + break; + } + num = ctz64(comp); + num = (num < bytes_to_check) ? num : bytes_to_check; + nzrun_len += num; + bytes_to_check -= num; + comp >>= num; + i += num; + if (bytes_to_check) { + /* mask like 111000 */ + d += uleb128_encode_small(dst + d, nzrun_len); + /* overflow */ + if (d + nzrun_len > dlen) { + return -1; + } + nzrun_start = new_buf + i - nzrun_len; + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + is_same = true; + } + } + } + + if (nzrun_len != 0) { + d += uleb128_encode_small(dst + d, nzrun_len); + /* overflow */ + if (d + nzrun_len > dlen) { + return -1; + } + nzrun_start = new_buf + i - nzrun_len; + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + } + return d; +} +#pragma GCC pop_options +#endif diff --git a/migration/xbzrle.h b/migration/xbzrle.h index a0db507b9cd9475277749de89a91d2cb98315c21..6feb49160adfff0fd1cdf52e16bf0d6952117954 100644 --- a/migration/xbzrle.h +++ b/migration/xbzrle.h @@ -18,4 +18,8 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst, int dlen); int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); +#if defined(CONFIG_AVX512BW_OPT) +int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +#endif #endif diff --git a/qapi/run-state.json b/qapi/run-state.json index 43d66d700fcd202e50b33c9743eee8e9f9f7a98b..08c38b2c67bfa894313d068415cd415bd9c2a379 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -638,3 +638,20 @@ { 'struct': 'MemoryFailureFlags', 'data': { 'action-required': 'bool', 'recursive': 'bool'} } + +## +# @NotifyVmexitOption: +# +# An enumeration of the options specified when enabling notify VM exit +# +# @run: enable the feature, do nothing and continue if the notify VM exit happens. +# +# @internal-error: enable the feature, raise a internal error if the notify +# VM exit happens. +# +# @disable: disable the feature. +# +# Since: 7.2 +## +{ 'enum': 'NotifyVmexitOption', + 'data': [ 'run', 'internal-error', 'disable' ] } \ No newline at end of file diff --git a/qemu-options.hx b/qemu-options.hx index 453d220226bdbbe3a967619335ff3cf11ab4b9b8..8ec647371c0b18816cf925b1730a5acdd9f70d75 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -152,6 +152,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel, " split-wx=on|off (enable TCG split w^x mapping)\n" " tb-size=n (TCG translation block cache size)\n" " dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n" + " notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n" " thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL) SRST ``-accel name[,prop=value[,...]]`` @@ -203,6 +204,16 @@ SRST is disabled (dirty-ring-size=0). When enabled, KVM will instead record dirty pages in a bitmap. + ``notify-vmexit=run|internal-error|disable,notify-window=n`` + Enables or disables notify VM exit support on x86 host and specify + the corresponding notify window to trigger the VM exit if enabled. + ``run`` option enables the feature. It does nothing and continue + if the exit happens. ``internal-error`` option enables the feature. + It raises a internal error. ``disable`` option doesn't enable the feature. + This feature can mitigate the CPU stuck issue due to event windows don't + open up for a specified of time (i.e. notify-window). + Default: notify-vmexit=run,notify-window=0. + ERST DEF("smp", HAS_ARG, QEMU_OPTION_smp, diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 7a17ff42182fc78b3b88c0551d4108caaa0d2d9c..8c00cce41183632396acb2163f6a1df2b14c9497 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -25,6 +25,9 @@ meson_options_help() { printf "%s\n" ' alsa ALSA sound support' printf "%s\n" ' attr attr/xattr support' printf "%s\n" ' auth-pam PAM access control' + printf "%s\n" ' avx2 AVX2 optimizations' + printf "%s\n" ' avx512bw AVX512BW optimizations' + printf "%s\n" ' avx512f AVX512F optimizations' printf "%s\n" ' bpf eBPF support' printf "%s\n" ' brlapi brlapi character device driver' printf "%s\n" ' bzip2 bzip2 support for DMG images' @@ -107,6 +110,12 @@ _meson_option_parse() { --disable-attr) printf "%s" -Dattr=disabled ;; --enable-auth-pam) printf "%s" -Dauth_pam=enabled ;; --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;; + --enable-avx2) printf "%s" -Davx2=enabled ;; + --disable-avx2) printf "%s" -Davx2=disabled ;; + --enable-avx512bw) printf "%s" -Davx512bw=enabled ;; + --disable-avx512bw) printf "%s" -Davx512bw=disabled ;; + --enable-avx512f) printf "%s" -Davx512f=enabled ;; + --disable-avx512f) printf "%s" -Davx512f=disabled ;; --enable-bpf) printf "%s" -Dbpf=enabled ;; --disable-bpf) printf "%s" -Dbpf=disabled ;; --enable-brlapi) printf "%s" -Dbrlapi=enabled ;; diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 1ae4e510558f1a60346947ef2500118a6cb4710f..7bec3e5f4ff82419efb5ddf1755afe68aa0f4933 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1059,3 +1059,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 62149367b10690035f99eecfe025dfebefa198bf..923fa99b6e061de49e2c41c6262c23bac553a21b 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -978,6 +978,34 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_XSAVE_FEATURES, }, + [FEAT_XSAVE_XSS_LO] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, + .ecx = 1, + .reg = R_ECX, + }, + }, + [FEAT_XSAVE_XSS_HI] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, + .ecx = 1, + .reg = R_EDX + }, + }, [FEAT_6_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -993,7 +1021,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid = { .eax = 6, .reg = R_EAX, }, .tcg_features = TCG_6_EAX_FEATURES, }, - [FEAT_XSAVE_COMP_LO] = { + [FEAT_XSAVE_XCR0_LO] = { .type = CPUID_FEATURE_WORD, .cpuid = { .eax = 0xD, @@ -1006,7 +1034,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK, }, - [FEAT_XSAVE_COMP_HI] = { + [FEAT_XSAVE_XCR0_HI] = { .type = CPUID_FEATURE_WORD, .cpuid = { .eax = 0xD, @@ -1025,7 +1053,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", NULL, "fb-clear", NULL, NULL, NULL, NULL, NULL, NULL, - "pbrsb-no", NULL, NULL, "rfds-no", + "pbrsb-no", NULL, "gds-no", "rfds-no", "rfds-clear", NULL, NULL, NULL, }, .msr = { @@ -1246,7 +1274,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .feat_names = { "sgx1", "sgx2", NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "sgx-edeccssa", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1286,7 +1314,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .feat_names = { NULL, "sgx-debug", "sgx-mode64", NULL, "sgx-provisionkey", "sgx-tokenkey", NULL, "sgx-kss", - NULL, NULL, NULL, NULL, + NULL, NULL, "sgx-aex-notify", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1431,6 +1459,9 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (0) + ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ @@ -1473,15 +1504,18 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { }, }; -static uint32_t xsave_area_size(uint64_t mask) +static uint32_t xsave_area_size(uint64_t mask, bool compacted) { + uint64_t ret = x86_ext_save_areas[0].size; + const ExtSaveArea *esa; + uint32_t offset = 0; int i; - uint64_t ret = 0; - for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) { - const ExtSaveArea *esa = &x86_ext_save_areas[i]; + for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) { + esa = &x86_ext_save_areas[i]; if ((mask >> i) & 1) { - ret = MAX(ret, esa->offset + esa->size); + offset = compacted ? ret : esa->offset; + ret = MAX(ret, offset + esa->size); } } return ret; @@ -1492,10 +1526,10 @@ static inline bool accel_uses_host_cpuid(void) return kvm_enabled() || hvf_enabled(); } -static inline uint64_t x86_cpu_xsave_components(X86CPU *cpu) +static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) { - return ((uint64_t)cpu->env.features[FEAT_XSAVE_COMP_HI]) << 32 | - cpu->env.features[FEAT_XSAVE_COMP_LO]; + return ((uint64_t)cpu->env.features[FEAT_XSAVE_XCR0_HI]) << 32 | + cpu->env.features[FEAT_XSAVE_XCR0_LO]; } /* Return name of 32-bit register, from a R_* constant */ @@ -1507,6 +1541,12 @@ static const char *get_register_name_32(unsigned int reg) return x86_reg_info_32[reg].name; } +static inline uint64_t x86_cpu_xsave_xss_components(X86CPU *cpu) +{ + return ((uint64_t)cpu->env.features[FEAT_XSAVE_XSS_HI]) << 32 | + cpu->env.features[FEAT_XSAVE_XSS_LO]; +} + /* * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. @@ -3633,6 +3673,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 7, + .note = "TSX, taa-no", + .props = (PropValue[]) { + /* Restore TSX features removed by -v2 above */ + { "hle", "on" }, + { "rtm", "on" }, + { /* end of list */ } + }, + }, { /* end of list */ } } }, @@ -5206,8 +5256,8 @@ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) /* XSAVE components are automatically enabled by other features, * so return the original feature name instead */ - if (w == FEAT_XSAVE_COMP_LO || w == FEAT_XSAVE_COMP_HI) { - int comp = (w == FEAT_XSAVE_COMP_HI) ? bitnr + 32 : bitnr; + if (w == FEAT_XSAVE_XCR0_LO || w == FEAT_XSAVE_XCR0_HI) { + int comp = (w == FEAT_XSAVE_XCR0_HI) ? bitnr + 32 : bitnr; if (comp < ARRAY_SIZE(x86_ext_save_areas) && x86_ext_save_areas[comp].bits) { @@ -6077,25 +6127,36 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } if (count == 0) { - *ecx = xsave_area_size(x86_cpu_xsave_components(cpu)); - *eax = env->features[FEAT_XSAVE_COMP_LO]; - *edx = env->features[FEAT_XSAVE_COMP_HI]; + *ecx = xsave_area_size(x86_cpu_xsave_xcr0_components(cpu), false); + *eax = env->features[FEAT_XSAVE_XCR0_LO]; + *edx = env->features[FEAT_XSAVE_XCR0_HI]; /* * The initial value of xcr0 and ebx == 0, On host without kvm * commit 412a3c41(e.g., CentOS 6), the ebx's value always == 0 * even through guest update xcr0, this will crash some legacy guest * (e.g., CentOS 6), So set ebx == ecx to workaroud it. */ - *ebx = kvm_enabled() ? *ecx : xsave_area_size(env->xcr0); + *ebx = kvm_enabled() ? *ecx : xsave_area_size(env->xcr0, false); } else if (count == 1) { + uint64_t xstate = x86_cpu_xsave_xcr0_components(cpu) | + x86_cpu_xsave_xss_components(cpu); + *eax = env->features[FEAT_XSAVE]; + *ebx = xsave_area_size(xstate, true); + *ecx = env->features[FEAT_XSAVE_XSS_LO]; + *edx = env->features[FEAT_XSAVE_XSS_HI]; } else if (count < ARRAY_SIZE(x86_ext_save_areas)) { - if ((x86_cpu_xsave_components(cpu) >> count) & 1) { - const ExtSaveArea *esa = &x86_ext_save_areas[count]; + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + + if (x86_cpu_xsave_xcr0_components(cpu) & (1ULL << count)) { *eax = esa->size; *ebx = esa->offset; *ecx = esa->ecx & (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); + } else if (x86_cpu_xsave_xss_components(cpu) & (1ULL << count)) { + *eax = esa->size; + *ebx = 0; + *ecx = 1; } } break; @@ -6146,8 +6207,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } else { *eax &= env->features[FEAT_SGX_12_1_EAX]; *ebx &= 0; /* ebx reserve */ - *ecx &= env->features[FEAT_XSAVE_COMP_LO]; - *edx &= env->features[FEAT_XSAVE_COMP_HI]; + *ecx &= env->features[FEAT_XSAVE_XCR0_LO]; + *edx &= env->features[FEAT_XSAVE_XCR0_HI]; /* FP and SSE are always allowed regardless of XSAVE/XCR0. */ *ecx |= XSTATE_FP_MASK | XSTATE_SSE_MASK; @@ -6544,6 +6605,9 @@ static void x86_cpu_reset(DeviceState *dev) } for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) { const ExtSaveArea *esa = &x86_ext_save_areas[i]; + if (!((1 << i) & CPUID_XSTATE_XCR0_MASK)) { + continue; + } if (env->features[esa->feature] & esa->bits) { xcr0 |= 1ull << i; } @@ -6577,6 +6641,7 @@ static void x86_cpu_reset(DeviceState *dev) env->exception_has_payload = false; env->exception_payload = 0; env->nmi_injected = false; + env->triple_fault_pending = false; #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); @@ -6660,8 +6725,10 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) static bool request_perm; if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { - env->features[FEAT_XSAVE_COMP_LO] = 0; - env->features[FEAT_XSAVE_COMP_HI] = 0; + env->features[FEAT_XSAVE_XCR0_LO] = 0; + env->features[FEAT_XSAVE_XCR0_HI] = 0; + env->features[FEAT_XSAVE_XSS_LO] = 0; + env->features[FEAT_XSAVE_XSS_HI] = 0; return; } @@ -6679,8 +6746,10 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) request_perm = true; } - env->features[FEAT_XSAVE_COMP_LO] = mask; - env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; + env->features[FEAT_XSAVE_XCR0_LO] = mask & CPUID_XSTATE_XCR0_MASK; + env->features[FEAT_XSAVE_XCR0_HI] = (mask & CPUID_XSTATE_XCR0_MASK) >> 32; + env->features[FEAT_XSAVE_XSS_LO] = mask & CPUID_XSTATE_XSS_MASK; + env->features[FEAT_XSAVE_XSS_HI] = (mask & CPUID_XSTATE_XSS_MASK) >> 32; } /***** Steps involved on loading and filtering CPUID data diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e84cb8265c74141cee5b7dcdfc5cb0ad53a42880..b0d79a1519b5e22b6ef292c7366f46921ac20bb7 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -566,6 +566,14 @@ typedef enum X86Seg { #define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) +/* CPUID feature bits available in XCR0 */ +#define CPUID_XSTATE_XCR0_MASK (XSTATE_FP_MASK | XSTATE_SSE_MASK | \ + XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | \ + XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | \ + XSTATE_ZMM_Hi256_MASK | \ + XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ + XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -584,8 +592,8 @@ typedef enum FeatureWord { FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEAT_6_EAX, /* CPUID[6].EAX */ - FEAT_XSAVE_COMP_LO, /* CPUID[EAX=0xd,ECX=0].EAX */ - FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ + FEAT_XSAVE_XCR0_LO, /* CPUID[EAX=0xd,ECX=0].EAX */ + FEAT_XSAVE_XCR0_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ FEAT_ARCH_CAPABILITIES, FEAT_CORE_CAPABILITY, FEAT_PERF_CAPABILITIES, @@ -602,6 +610,8 @@ typedef enum FeatureWord { FEAT_SGX_12_0_EAX, /* CPUID[EAX=0x12,ECX=0].EAX (SGX) */ FEAT_SGX_12_0_EBX, /* CPUID[EAX=0x12,ECX=0].EBX (SGX MISCSELECT[31:0]) */ FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ + FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ + FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ FEATURE_WORDS, @@ -1736,6 +1746,7 @@ typedef struct CPUX86State { uint8_t has_error_code; uint8_t exception_has_payload; uint64_t exception_payload; + uint8_t triple_fault_pending; uint32_t ins_len; uint32_t sipi_vector; bool tsc_valid; diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 74c1396a93ff3eeb2bf22a7d7f0bf6be1d171c9d..7b8a3d5af03be9a42348953167ed64a7d0d680e5 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -172,7 +172,7 @@ static void kvm_cpu_instance_init(CPUState *cs) /* only applies to builtin_x86_defs cpus */ if (!kvm_irqchip_in_kernel()) { x86_cpu_change_kvm_default("x2apic", "off"); - } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) { + } else if (kvm_irqchip_is_split()) { x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on"); } diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c index f6e7e4466e1ab738e4a7dcca08a1ac9c1f60dac7..e052f1c7b0ef25f5db673e5717bfe82c94807496 100644 --- a/target/i386/kvm/kvm-stub.c +++ b/target/i386/kvm/kvm-stub.c @@ -44,3 +44,8 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp) { abort(); } + +void kvm_set_max_apic_id(uint32_t max_apic_id) +{ + return; +} diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a14debcfc274259d6fb16f81c0f9eb736d4fb1fb..fc50731bf9a895738358f7d3943fa30b2c531702 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -15,6 +15,7 @@ #include "qemu/osdep.h" #include "qapi/qapi-events-run-state.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include #include #include @@ -129,6 +130,7 @@ static int has_xcrs; static int has_pit_state2; static int has_exception_payload; static int has_map_gpa_range; +static int has_triple_fault_event; static bool has_msr_mcg_ext_ctl; @@ -140,6 +142,7 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; #define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */ static RateLimit bus_lock_ratelimit_ctrl; +static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); int kvm_has_pit_state2(void) { @@ -210,28 +213,21 @@ static int kvm_get_tsc(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data = {}; + uint64_t value; int ret; if (env->tsc_valid) { return 0; } - memset(&msr_data, 0, sizeof(msr_data)); - msr_data.info.nmsrs = 1; - msr_data.entries[0].index = MSR_IA32_TSC; env->tsc_valid = !runstate_is_running(); - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + ret = kvm_get_one_msr(cpu, MSR_IA32_TSC, &value); if (ret < 0) { return ret; } - assert(ret == 1); - env->tsc = msr_data.entries[0].data; + env->tsc = value; return 0; } @@ -352,7 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, { struct kvm_cpuid2 *cpuid; uint32_t ret = 0; - uint32_t cpuid_1_edx; + uint32_t cpuid_1_edx, unused; uint64_t bitmask; cpuid = get_supported_cpuid(s); @@ -399,10 +395,20 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } else if (function == 6 && reg == R_EAX) { ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */ } else if (function == 7 && index == 0 && reg == R_EBX) { + /* Not new instructions, just an optimization. */ + uint32_t ebx; + host_cpuid(7, 0, &unused, &ebx, &unused, &unused); + ret |= ebx & CPUID_7_0_EBX_ERMS; + if (host_tsx_broken()) { ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); } } else if (function == 7 && index == 0 && reg == R_EDX) { + /* Not new instructions, just an optimization. */ + uint32_t edx; + host_cpuid(7, 0, &unused, &unused, &unused, &edx); + ret |= edx & CPUID_7_0_EDX_FSRM; + /* * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is @@ -411,6 +417,11 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!has_msr_arch_capabs) { ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; } + } else if (function == 7 && index == 1 && reg == R_EAX) { + /* Not new instructions, just an optimization. */ + uint32_t eax; + host_cpuid(7, 1, &eax, &unused, &unused, &unused); + ret |= eax & (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC); } else if (function == 0xd && index == 0 && (reg == R_EAX || reg == R_EDX)) { /* @@ -1512,21 +1523,14 @@ static int hyperv_init_vcpu(X86CPU *cpu) * the kernel doesn't support setting vp_index; assert that its value * is in sync */ - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data = { - .info.nmsrs = 1, - .entries[0].index = HV_X64_MSR_VP_INDEX, - }; + uint64_t value; - ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data); + ret = kvm_get_one_msr(cpu, HV_X64_MSR_VP_INDEX, &value); if (ret < 0) { return ret; } - assert(ret == 1); - if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) { + if (value != hyperv_vp_index(CPU(cpu))) { error_report("kernel's vp_index != QEMU's vp_index"); return -ENXIO; } @@ -1779,6 +1783,7 @@ int kvm_arch_init_vcpu(CPUState *cs) } case 0x1f: if (env->nr_dies < 2) { + cpuid_i--; break; } /* fallthrough */ @@ -1819,7 +1824,6 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; } break; - case 0x7: case 0x12: for (j = 0; ; j++) { c->function = i; @@ -1839,6 +1843,7 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; } break; + case 0x7: case 0x14: case 0x1d: case 0x1e: { @@ -2472,6 +2477,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + has_triple_fault_event = kvm_check_extension(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT); + if (has_triple_fault_event) { + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable triple fault event cap: %s", + strerror(-ret)); + return ret; + } + } + ret = kvm_get_supported_msrs(s); if (ret < 0) { return ret; @@ -2597,6 +2612,21 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE && + kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) { + uint64_t notify_window_flags = + ((uint64_t)s->notify_window << 32) | + KVM_X86_NOTIFY_VMEXIT_ENABLED | + KVM_X86_NOTIFY_VMEXIT_USER; + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0, + notify_window_flags); + if (ret < 0) { + error_report("kvm: Failed to enable notify vmexit cap: %s", + strerror(-ret)); + return ret; + } + } + return 0; } @@ -2832,6 +2862,25 @@ static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); } +static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value) +{ + int ret; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data = { + .info.nmsrs = 1, + .entries[0].index = index, + }; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + assert(ret == 1); + *value = msr_data.entries[0].data; + return ret; +} void kvm_put_apicbase(X86CPU *cpu, uint64_t value) { int ret; @@ -4131,6 +4180,11 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) } } + if (has_triple_fault_event) { + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = env->triple_fault_pending; + } + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); } @@ -4200,6 +4254,10 @@ static int kvm_get_vcpu_events(X86CPU *cpu) } } + if (events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) { + env->triple_fault_pending = events.triple_fault.pending; + } + env->sipi_vector = events.sipi_vector; return 0; @@ -5079,6 +5137,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) X86CPU *cpu = X86_CPU(cs); uint64_t code; int ret; + bool ctx_invalid; + char str[256]; + KVMState *state; switch (run->exit_reason) { case KVM_EXIT_HLT: @@ -5147,6 +5208,21 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_HYPERCALL: ret = kvm_handle_exit_hypercall(cpu, run); break; + case KVM_EXIT_NOTIFY: + ctx_invalid = !!(run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID); + state = KVM_STATE(current_accel()); + sprintf(str, "Encounter a notify exit with %svalid context in" + " guest. There can be possible misbehaves in guest." + " Please have a look.", ctx_invalid ? "in" : ""); + if (ctx_invalid || + state->notify_vmexit == NOTIFY_VMEXIT_OPTION_INTERNAL_ERROR) { + warn_report("KVM internal error: %s", str); + ret = -1; + } else { + warn_report_once("KVM: %s", str); + ret = 0; + } + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -5423,3 +5499,76 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) mask &= ~BIT_ULL(bit); } } + +static int kvm_arch_get_notify_vmexit(Object *obj, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + return s->notify_vmexit; +} + +static void kvm_arch_set_notify_vmexit(Object *obj, int value, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + s->notify_vmexit = value; +} + +static void kvm_arch_get_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint32_t value = s->notify_window; + + visit_type_uint32(v, name, &value, errp); +} + +static void kvm_arch_set_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + uint32_t value; + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + visit_type_uint32(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + + s->notify_window = value; +} + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ + object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption", + &NotifyVmexitOption_lookup, + kvm_arch_get_notify_vmexit, + kvm_arch_set_notify_vmexit); + object_class_property_set_description(oc, "notify-vmexit", + "Enable notify VM exit"); + + object_class_property_add(oc, "notify-window", "uint32", + kvm_arch_get_notify_window, + kvm_arch_set_notify_window, + NULL, NULL); + object_class_property_set_description(oc, "notify-window", + "Clock cycles without an event window " + "after which a notification VM exit occurs"); +} + +void kvm_set_max_apic_id(uint32_t max_apic_id) +{ + kvm_vm_enable_cap(kvm_state, KVM_CAP_MAX_VCPU_ID, 0, max_apic_id); +} diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 2ed586c11b7e74573cf69113d9c14dd1041c0a4b..23f6cab600176a935d2e0b2db6f373bec6dd52f9 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -65,4 +65,6 @@ typedef struct kvm_msr_handlers { bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, QEMUWRMSRHandler *wrmsr); +void kvm_set_max_apic_id(uint32_t max_apic_id); + #endif diff --git a/target/i386/machine.c b/target/i386/machine.c index 8aa54432d7d49800656e1ced9611aa1fcf1a090b..7fe80e1aa01f03d377a487f84bfd4e38b4560655 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1518,6 +1518,25 @@ static const VMStateDescription vmstate_msr_ghcb_gpa = { }; #endif +static bool triple_fault_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->triple_fault_pending; +} + +static const VMStateDescription vmstate_triple_fault = { + .name = "cpu/triple_fault", + .version_id = 1, + .minimum_version_id = 1, + .needed = triple_fault_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.triple_fault_pending, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1663,6 +1682,7 @@ const VMStateDescription vmstate_x86_cpu = { #if defined(CONFIG_KVM) && defined(TARGET_X86_64) &vmstate_msr_ghcb_gpa, #endif + &vmstate_triple_fault, NULL } }; diff --git a/target/loongarch64/kvm.c b/target/loongarch64/kvm.c index 0eaabe39436c57a43d38fef1ee58a8623ab69c27..8d2f84995e2addb7913af99cc4a357ecf02cad2f 100644 --- a/target/loongarch64/kvm.c +++ b/target/loongarch64/kvm.c @@ -1417,3 +1417,7 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) { abort(); } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 086debd9f013737f9a0e889afc712adb4c3f8f74..f80ac72dd1855ea9fbcb518313f3eb925b83ccc7 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1295,3 +1295,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index dc93b99189ea242fa8c05c95f91dc08fed999c9b..da2dcea1e25b61b566b8077175130a3f1a7226ba 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2959,3 +2959,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 6d1a6324b9e26f8320caf4cc0df6141038d5ae85..dbb911c8325121446fd8f911c5f03022d7eecced 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2677,3 +2677,7 @@ int kvm_s390_get_zpci_op(void) { return cap_zpci_op; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/tests/bench/meson.build b/tests/bench/meson.build index 00b3c209dcbd16958a32283c86e8cb5f3ae2d5cb..54bc8938a8addaed294cb1311b409d9fee602a01 100644 --- a/tests/bench/meson.build +++ b/tests/bench/meson.build @@ -3,6 +3,12 @@ qht_bench = executable('qht-bench', sources: 'qht-bench.c', dependencies: [qemuutil]) +if have_system +xbzrle_bench = executable('xbzrle-bench', + sources: 'xbzrle-bench.c', + dependencies: [qemuutil,migration]) +endif + executable('atomic_add-bench', sources: files('atomic_add-bench.c'), dependencies: [qemuutil], diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c new file mode 100644 index 0000000000000000000000000000000000000000..8848a3a32d7ef591f8eae2cbbc898f92af07f549 --- /dev/null +++ b/tests/bench/xbzrle-bench.c @@ -0,0 +1,469 @@ +/* + * Xor Based Zero Run Length Encoding unit tests. + * + * Copyright 2013 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Orit Wasserman + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "../migration/xbzrle.h" + +#if defined(CONFIG_AVX512BW_OPT) +#define XBZRLE_PAGE_SIZE 4096 +static bool is_cpu_support_avx512bw; +#include "qemu/cpuid.h" +static void __attribute__((constructor)) init_cpu_flag(void) +{ + unsigned max = __get_cpuid_max(0, NULL); + int a, b, c, d; + is_cpu_support_avx512bw = false; + if (max >= 1) { + __cpuid(1, a, b, c, d); + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) { + int bv; + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0)); + __cpuid_count(7, 0, a, b, c, d); + /* 0xe6: + * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 + * and ZMM16-ZMM31 state are enabled by OS) + * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS) + */ + if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) { + is_cpu_support_avx512bw = true; + } + } + } + return ; +} + +struct ResTime { + float t_raw; + float t_512; +}; + + +/* Function prototypes +int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +*/ +static void encode_decode_zero(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0; + int dlen = 0, dlen512 = 0; + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006); + + for (i = diff_len; i > 0; i--) { + buffer[1000 + i] = i; + buffer512[1000 + i] = i; + } + + buffer[1000 + diff_len + 3] = 103; + buffer[1000 + diff_len + 5] = 105; + + buffer512[1000 + diff_len + 3] = 103; + buffer512[1000 + diff_len + 5] = 105; + + /* encode zero page */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(dlen == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(dlen512 == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(buffer512); + g_free(compressed512); + +} + +static void test_encode_decode_zero_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_zero(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Zero test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_unchanged(struct ResTime *res) +{ + uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0; + int dlen = 0, dlen512 = 0; + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006); + + for (i = diff_len; i > 0; i--) { + test[1000 + i] = i + 4; + test512[1000 + i] = i + 4; + } + + test[1000 + diff_len + 3] = 107; + test[1000 + diff_len + 5] = 109; + + test512[1000 + diff_len + 3] = 107; + test512[1000 + diff_len + 5] = 109; + + /* test unchanged buffer */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(dlen == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(dlen512 == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(test); + g_free(compressed); + g_free(test512); + g_free(compressed512); + +} + +static void test_encode_decode_unchanged_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_unchanged(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Unchanged test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_1_byte(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE); + int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0; + uint8_t buf[2]; + uint8_t buf512[2]; + + test[XBZRLE_PAGE_SIZE - 1] = 1; + test512[XBZRLE_PAGE_SIZE - 1] = 1; + + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2)); + + rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE); + g_assert(rc == XBZRLE_PAGE_SIZE); + g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2)); + + rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512, + XBZRLE_PAGE_SIZE); + g_assert(rc512 == XBZRLE_PAGE_SIZE); + g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_1_byte_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_1_byte(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("1 byte test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_overflow(struct ResTime *res) +{ + uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0, rc = 0, rc512 = 0; + + for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) { + test[i * 2] = 1; + test512[i * 2] = 1; + } + + /* encode overflow */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + g_assert(rc == -1); + + t_start512 = clock(); + rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + g_assert(rc512 == -1); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_overflow_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_overflow(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Overflow test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_range_avx512(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0, rc = 0, rc512 = 0; + int dlen = 0, dlen512 = 0; + + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006); + + for (i = diff_len; i > 0; i--) { + buffer[1000 + i] = i; + test[1000 + i] = i + 4; + buffer512[1000 + i] = i; + test512[1000 + i] = i + 4; + } + + buffer[1000 + diff_len + 3] = 103; + test[1000 + diff_len + 3] = 107; + + buffer[1000 + diff_len + 5] = 105; + test[1000 + diff_len + 5] = 109; + + buffer512[1000 + diff_len + 3] = 103; + test512[1000 + diff_len + 3] = 107; + + buffer512[1000 + diff_len + 5] = 105; + test512[1000 + diff_len + 5] = 109; + + /* test encode/decode */ + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE); + g_assert(rc < XBZRLE_PAGE_SIZE); + g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE); + g_assert(rc512 < XBZRLE_PAGE_SIZE); + g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_range_avx512(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Encode decode test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} + +static void encode_decode_random(struct ResTime *res) +{ + uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE); + uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE); + uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE); + int i = 0, rc = 0, rc512 = 0; + int dlen = 0, dlen512 = 0; + + int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1); + /* store the index of diff */ + int dirty_index[diff_len]; + for (int j = 0; j < diff_len; j++) { + dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1); + } + for (i = diff_len - 1; i >= 0; i--) { + buffer[dirty_index[i]] = i; + test[dirty_index[i]] = i + 4; + buffer512[dirty_index[i]] = i; + test512[dirty_index[i]] = i + 4; + } + + time_t t_start, t_end, t_start512, t_end512; + t_start = clock(); + dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed, + XBZRLE_PAGE_SIZE); + t_end = clock(); + float time_val = difftime(t_end, t_start); + rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE); + g_assert(rc < XBZRLE_PAGE_SIZE); + + t_start512 = clock(); + dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE, + compressed512, XBZRLE_PAGE_SIZE); + t_end512 = clock(); + float time_val512 = difftime(t_end512, t_start512); + rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE); + g_assert(rc512 < XBZRLE_PAGE_SIZE); + + res->t_raw = time_val; + res->t_512 = time_val512; + + g_free(buffer); + g_free(compressed); + g_free(test); + g_free(buffer512); + g_free(compressed512); + g_free(test512); + +} + +static void test_encode_decode_random_avx512(void) +{ + int i; + float time_raw = 0.0, time_512 = 0.0; + struct ResTime res; + for (i = 0; i < 10000; i++) { + encode_decode_random(&res); + time_raw += res.t_raw; + time_512 += res.t_512; + } + printf("Random test:\n"); + printf("Raw xbzrle_encode time is %f ms\n", time_raw); + printf("512 xbzrle_encode time is %f ms\n", time_512); +} +#endif + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_rand_int(); + #if defined(CONFIG_AVX512BW_OPT) + if (likely(is_cpu_support_avx512bw)) { + g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512); + g_test_add_func("/xbzrle/encode_decode_unchanged", + test_encode_decode_unchanged_avx512); + g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512); + g_test_add_func("/xbzrle/encode_decode_overflow", + test_encode_decode_overflow_avx512); + g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512); + g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512); + } + #endif + return g_test_run(); +} diff --git a/tests/unit/test-xbzrle.c b/tests/unit/test-xbzrle.c index 795d6f1cbabb0761206aa812f9319155481e2e09..baa364b443b648aff73c32ebdb1bd8749e3e34b2 100644 --- a/tests/unit/test-xbzrle.c +++ b/tests/unit/test-xbzrle.c @@ -17,6 +17,35 @@ #define XBZRLE_PAGE_SIZE 4096 +int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, + uint8_t *, int) = xbzrle_encode_buffer; +#if defined(CONFIG_AVX512BW_OPT) +#include "qemu/cpuid.h" +static void __attribute__((constructor)) init_cpu_flag(void) +{ + unsigned max = __get_cpuid_max(0, NULL); + int a, b, c, d; + if (max >= 1) { + __cpuid(1, a, b, c, d); + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) { + int bv; + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0)); + __cpuid_count(7, 0, a, b, c, d); + /* 0xe6: + * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 + * and ZMM16-ZMM31 state are enabled by OS) + * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS) + */ + if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) { + xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512; + } + } + } + return ; +} +#endif + static void test_uleb(void) { uint32_t i, val; @@ -55,7 +84,7 @@ static void test_encode_decode_zero(void) buffer[1000 + diff_len + 5] = 105; /* encode zero page */ - dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(dlen == 0); @@ -79,7 +108,7 @@ static void test_encode_decode_unchanged(void) test[1000 + diff_len + 5] = 109; /* test unchanged buffer */ - dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(dlen == 0); @@ -97,7 +126,7 @@ static void test_encode_decode_1_byte(void) test[XBZRLE_PAGE_SIZE - 1] = 1; - dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2)); @@ -122,7 +151,7 @@ static void test_encode_decode_overflow(void) } /* encode overflow */ - rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed, + rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); g_assert(rc == -1); @@ -153,7 +182,7 @@ static void encode_decode_range(void) test[1000 + diff_len + 5] = 109; /* test encode/decode */ - dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed, + dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed, XBZRLE_PAGE_SIZE); rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);