diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c index 1e7f423462df10ff7d7b09638d6006889e1e68c6..e083f24aa80747498192f19e1071949bc7b015ef 100644 --- a/accel/accel-blocker.c +++ b/accel/accel-blocker.c @@ -41,7 +41,7 @@ void accel_blocker_init(void) void accel_ioctl_begin(void) { - if (likely(qemu_mutex_iothread_locked())) { + if (likely(bql_locked())) { return; } @@ -51,7 +51,7 @@ void accel_ioctl_begin(void) void accel_ioctl_end(void) { - if (likely(qemu_mutex_iothread_locked())) { + if (likely(bql_locked())) { return; } @@ -62,7 +62,7 @@ void accel_ioctl_end(void) void accel_cpu_ioctl_begin(CPUState *cpu) { - if (unlikely(qemu_mutex_iothread_locked())) { + if (unlikely(bql_locked())) { return; } @@ -72,7 +72,7 @@ void accel_cpu_ioctl_begin(CPUState *cpu) void accel_cpu_ioctl_end(CPUState *cpu) { - if (unlikely(qemu_mutex_iothread_locked())) { + if (unlikely(bql_locked())) { return; } @@ -105,7 +105,7 @@ void accel_ioctl_inhibit_begin(void) * We allow to inhibit only when holding the BQL, so we can identify * when an inhibitor wants to issue an ioctl easily. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* Block further invocations of the ioctls outside the BQL. */ CPU_FOREACH(cpu) { diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c index b75c919ac358677919598506ce55bacad35601f4..f4b0ec58900cf19264d2b548c1a0db8a6a55d945 100644 --- a/accel/dummy-cpus.c +++ b/accel/dummy-cpus.c @@ -24,7 +24,7 @@ static void *dummy_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); cpu->neg.can_do_io = true; @@ -43,7 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg) qemu_guest_random_seed_thread_part2(cpu->random_seed); do { - qemu_mutex_unlock_iothread(); + bql_unlock(); #ifndef _WIN32 do { int sig; @@ -56,11 +56,11 @@ static void *dummy_cpu_thread_fn(void *arg) #else qemu_sem_wait(&cpu->sem); #endif - qemu_mutex_lock_iothread(); + bql_lock(); qemu_wait_io_event(cpu); } while (!cpu->unplug); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index abe7adf7ee87c89532fdbb78d08748e1140340ae..8eabb696facb8993bc0f146c85ac377bf40f14ec 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -424,7 +424,7 @@ static void *hvf_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); @@ -449,7 +449,7 @@ static void *hvf_cpu_thread_fn(void *arg) hvf_vcpu_destroy(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index 54f19028b828d8e329160ec26359c618f1f0b317..58a6601642184e50890fe776519f81130f67a199 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -33,7 +33,7 @@ static void *kvm_vcpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); cpu->neg.can_do_io = true; @@ -58,7 +58,7 @@ static void *kvm_vcpu_thread_fn(void *arg) kvm_destroy_vcpu(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } @@ -83,7 +83,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu) static bool kvm_cpus_are_resettable(void) { - return !kvm_enabled() || kvm_cpu_check_are_resettable(); + return !kvm_enabled() || !kvm_state->guest_state_protected; } #ifdef KVM_CAP_SET_GUEST_DEBUG diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f472fc4f697fa803dd5b196b42601e932ec1baf5..bb99cf4b420711435f270a2bf2778bd8a0e6405f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -107,6 +107,8 @@ bool kvm_csv3_allowed; bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; +static uint64_t kvm_supported_memory_attributes; +static bool kvm_guest_memfd_supported; static hwaddr kvm_max_slot_size = ~0; static const KVMCapabilityInfo kvm_required_capabilites[] = { @@ -298,34 +300,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) { KVMState *s = kvm_state; - struct kvm_userspace_memory_region mem; + struct kvm_userspace_memory_region2 mem; int ret; mem.slot = slot->slot | (kml->as_id << 16); mem.guest_phys_addr = slot->start_addr; mem.userspace_addr = (unsigned long)slot->ram; mem.flags = slot->flags; + mem.guest_memfd = slot->guest_memfd; + mem.guest_memfd_offset = slot->guest_memfd_offset; if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { /* Set the slot size to 0 before setting the slot to the desired * value. This is needed based on KVM commit 75d61fbc. */ mem.memory_size = 0; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } if (ret < 0) { goto err; } } mem.memory_size = slot->memory_size; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } slot->old_flags = mem.flags; err: - trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, - mem.memory_size, mem.userspace_addr, ret); + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, + mem.userspace_addr, mem.guest_memfd, + mem.guest_memfd_offset, ret); if (ret < 0) { - error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," - " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", - __func__, mem.slot, slot->start_addr, - (uint64_t)mem.memory_size, strerror(errno)); + if (kvm_guest_memfd_supported) { + error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 "," + " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," + " guest_memfd_offset=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, mem.flags, + mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, + strerror(errno)); + } else { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, strerror(errno)); + } } return ret; } @@ -513,6 +539,10 @@ static int kvm_mem_flags(MemoryRegion *mr) if (readonly && kvm_readonly_mem_allowed) { flags |= KVM_MEM_READONLY; } + if (memory_region_has_guest_memfd(mr)) { + assert(kvm_guest_memfd_supported); + flags |= KVM_MEM_GUEST_MEMFD; + } return flags; } @@ -855,7 +885,7 @@ static void kvm_dirty_ring_flush(void) * should always be with BQL held, serialization is guaranteed. * However, let's be sure of it. */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); /* * First make sure to flush the hardware buffers by kicking all * vcpus out in a synchronous way. @@ -1168,6 +1198,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension) return ret; } +/* + * We track the poisoned pages to be able to: + * - replace them on VM reset + * - block a migration for a VM with a poisoned page + */ typedef struct HWPoisonPage { ram_addr_t ram_addr; QLIST_ENTRY(HWPoisonPage) list; @@ -1201,6 +1236,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr) QLIST_INSERT_HEAD(&hwpoison_page_list, page, list); } +bool kvm_hwpoisoned_mem(void) +{ + return !QLIST_EMPTY(&hwpoison_page_list); +} + static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size) { #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN @@ -1304,6 +1344,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) kvm_max_slot_size = max_slot_size; } +static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) +{ + struct kvm_memory_attributes attrs; + int r; + + assert((attr & kvm_supported_memory_attributes) == attr); + attrs.attributes = attr; + attrs.address = start; + attrs.size = size; + attrs.flags = 0; + + r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); + if (r) { + error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " + "with attr 0x%" PRIx64 " error '%s'", + start, size, attr, strerror(errno)); + } + return r; +} + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, 0); +} + /* Called with KVMMemoryListener.slots_lock held */ static void kvm_set_phys_mem(KVMMemoryListener *kml, MemoryRegionSection *section, bool add) @@ -1400,6 +1470,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, mem->ram_start_offset = ram_start_offset; mem->ram = ram; mem->flags = kvm_mem_flags(mr); + mem->guest_memfd = mr->ram_block->guest_memfd; + mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; + kvm_slot_init_dirty_bitmap(mem); err = kvm_set_user_memory_region(kml, mem, true); if (err) { @@ -1407,6 +1480,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, strerror(-err)); abort(); } + + if (memory_region_has_guest_memfd(mr)) { + err = kvm_set_memory_attributes_private(start_addr, slot_size); + if (err) { + error_report("%s: failed to set memory attribute private: %s", + __func__, strerror(-err)); + exit(1); + } + } + start_addr += slot_size; ram_start_offset += slot_size; ram += slot_size; @@ -1440,9 +1523,9 @@ static void *kvm_dirty_ring_reaper_thread(void *data) trace_kvm_dirty_ring_reaper("wakeup"); r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING; - qemu_mutex_lock_iothread(); + bql_lock(); kvm_dirty_ring_reap(s, NULL); - qemu_mutex_unlock_iothread(); + bql_unlock(); r->reaper_iteration++; } @@ -2461,6 +2544,12 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_guest_memfd_supported = + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); @@ -2771,11 +2860,6 @@ void kvm_flush_coalesced_mmio_buffer(void) s->coalesced_flush_in_progress = false; } -bool kvm_cpu_check_are_resettable(void) -{ - return kvm_arch_cpu_check_are_resettable(); -} - static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { @@ -2911,6 +2995,69 @@ static void kvm_eat_signals(CPUState *cpu) } while (sigismember(&chkset, SIG_IPI)); } +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) +{ + MemoryRegionSection section; + ram_addr_t offset; + MemoryRegion *mr; + RAMBlock *rb; + void *addr; + int ret = -1; + + trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); + + if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || + !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { + return -1; + } + + if (!size) { + return -1; + } + + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { + return -1; + } + + if (!memory_region_has_guest_memfd(mr)) { + error_report("Converting non guest_memfd backed memory region " + "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + goto out_unref; + } + + if (to_private) { + ret = kvm_set_memory_attributes_private(start, size); + } else { + ret = kvm_set_memory_attributes_shared(start, size); + } + if (ret) { + goto out_unref; + } + + addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; + rb = qemu_ram_block_from_host(addr, false, &offset); + + if (to_private) { + if (rb->page_size != qemu_real_host_page_size()) { + /* + * shared memory is backed by hugetlb, which is supposed to be + * pre-allocated and doesn't need to be discarded + */ + goto out_unref; + } + ret = ram_block_discard_range(rb, offset, size); + } else { + ret = ram_block_discard_guest_memfd_range(rb, offset, size); + } + +out_unref: + memory_region_unref(mr); + return ret; +} + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -2923,7 +3070,7 @@ int kvm_cpu_exec(CPUState *cpu) return EXCP_HLT; } - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_start(cpu); do { @@ -2963,11 +3110,11 @@ int kvm_cpu_exec(CPUState *cpu) #ifdef KVM_HAVE_MCE_INJECTION if (unlikely(have_sigbus_pending)) { - qemu_mutex_lock_iothread(); + bql_lock(); kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code, pending_sigbus_addr); have_sigbus_pending = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif @@ -2978,18 +3125,20 @@ int kvm_cpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } - fprintf(stderr, "error: kvm run failed %s\n", - strerror(-run_ret)); + if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { + fprintf(stderr, "error: kvm run failed %s\n", + strerror(-run_ret)); #ifdef TARGET_PPC - if (run_ret == -EBUSY) { - fprintf(stderr, - "This is probably because your SMT is enabled.\n" - "VCPU can only run on primary threads with all " - "secondary threads offline.\n"); - } + if (run_ret == -EBUSY) { + fprintf(stderr, + "This is probably because your SMT is enabled.\n" + "VCPU can only run on primary threads with all " + "secondary threads offline.\n"); + } #endif - ret = -1; - break; + ret = -1; + break; + } } trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); @@ -3037,7 +3186,7 @@ int kvm_cpu_exec(CPUState *cpu) * still full. Got kicked by KVM_RESET_DIRTY_RINGS. */ trace_kvm_dirty_ring_full(cpu->cpu_index); - qemu_mutex_lock_iothread(); + bql_lock(); /* * We throttle vCPU by making it sleep once it exit from kernel * due to dirty ring full. In the dirtylimit scenario, reaping @@ -3049,7 +3198,7 @@ int kvm_cpu_exec(CPUState *cpu) } else { kvm_dirty_ring_reap(kvm_state, NULL); } - qemu_mutex_unlock_iothread(); + bql_unlock(); dirtylimit_vcpu_execute(cpu); ret = 0; break; @@ -3065,9 +3214,9 @@ int kvm_cpu_exec(CPUState *cpu) break; case KVM_SYSTEM_EVENT_CRASH: kvm_cpu_synchronize_state(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); + bql_unlock(); ret = 0; break; default: @@ -3076,6 +3225,19 @@ int kvm_cpu_exec(CPUState *cpu) break; } break; + case KVM_EXIT_MEMORY_FAULT: + trace_kvm_memory_fault(run->memory_fault.gpa, + run->memory_fault.size, + run->memory_fault.flags); + if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { + error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, + (uint64_t)run->memory_fault.flags); + ret = -1; + break; + } + ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, + run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); + break; default: DPRINTF("kvm_arch_handle_exit\n"); ret = kvm_arch_handle_exit(cpu, run); @@ -3084,7 +3246,7 @@ int kvm_cpu_exec(CPUState *cpu) } while (ret == 0); cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); if (ret < 0) { cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); @@ -4230,3 +4392,25 @@ void kvm_mark_guest_state_protected(void) { kvm_state->guest_state_protected = true; } + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) +{ + int fd; + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + if (!kvm_guest_memfd_supported) { + error_setg(errp, "KVM does not support guest_memfd"); + return -1; + } + + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); + if (fd < 0) { + error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); + return -1; + } + + return fd; +} diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 9c880fdcf4f67b985dfb94cea39428a291c9d7e3..c2445a646de40814344d4b7e86532db9e5081737 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -19,7 +19,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" -kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" +kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d" kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 kvm_resample_fd_notify(int gsi) "gsi %d" kvm_dirty_ring_full(int id) "vcpu %d" diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index e68f3433ad2c816b449eb015be3134f0124d59a5..106007acae77e9ec5911c555207f575af402079f 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -134,6 +134,16 @@ uint32_t kvm_dirty_ring_size(void) return 0; } + +bool kvm_hwpoisoned_mem(void) +{ + return false; +} + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) +{ + return -ENOSYS; +} int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, struct kvm_numa_info *numa_info) { diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index c938eb96f8fd654c7325595806a5084f0b9e0c79..67eda9865ee2a89f7d5a4f3a4142efb1a869d4fd 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -558,8 +558,8 @@ static void cpu_exec_longjmp_cleanup(CPUState *cpu) tcg_ctx->gen_tb = NULL; } #endif - if (qemu_mutex_iothread_locked()) { - qemu_mutex_unlock_iothread(); + if (bql_locked()) { + bql_unlock(); } assert_no_pages_locked(); } @@ -680,10 +680,10 @@ static inline bool cpu_handle_halt(CPUState *cpu) #if defined(TARGET_I386) if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { X86CPU *x86_cpu = X86_CPU(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif /* TARGET_I386 */ if (!cpu_has_work(cpu)) { @@ -749,9 +749,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) #else if (replay_exception()) { CPUClass *cc = CPU_GET_CLASS(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); cc->tcg_ops->do_interrupt(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu->exception_index = -1; if (unlikely(cpu->singlestep_enabled)) { @@ -812,7 +812,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, if (unlikely(qatomic_read(&cpu->interrupt_request))) { int interrupt_request; - qemu_mutex_lock_iothread(); + bql_lock(); interrupt_request = cpu->interrupt_request; if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { /* Mask out external interrupts for this step. */ @@ -821,7 +821,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, if (interrupt_request & CPU_INTERRUPT_DEBUG) { cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG; cpu->exception_index = EXCP_DEBUG; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #if !defined(CONFIG_USER_ONLY) @@ -832,7 +832,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, cpu->interrupt_request &= ~CPU_INTERRUPT_HALT; cpu->halted = 1; cpu->exception_index = EXCP_HLT; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #if defined(TARGET_I386) @@ -843,14 +843,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0); do_cpu_init(x86_cpu); cpu->exception_index = EXCP_HALTED; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #else else if (interrupt_request & CPU_INTERRUPT_RESET) { replay_interrupt(); cpu_reset(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #endif /* !TARGET_I386 */ @@ -873,7 +873,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, */ if (unlikely(cpu->singlestep_enabled)) { cpu->exception_index = EXCP_DEBUG; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } cpu->exception_index = -1; @@ -892,7 +892,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */ - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* Finally, check if we need to exit to the main loop. */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index db3f93fda990c52984f438dad15f5b198b1a4fde..5698a9fd8e2c74363c978367f840dc3ea09eff7e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2030,10 +2030,10 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); ret = int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx, type, ra, mr, mr_offset); - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -2054,12 +2054,12 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset); b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset + size - 8); - qemu_mutex_unlock_iothread(); + bql_unlock(); return int128_make128(b, a); } @@ -2577,10 +2577,10 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); ret = int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx, ra, mr, mr_offset); - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -2601,12 +2601,12 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8, mmu_idx, ra, mr, mr_offset); ret = int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8, size - 8, mmu_idx, ra, mr, mr_offset + 8); - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index b25685fb712ecdbd7379d026c7bf83c0818be50e..5824d92580fa79073139cdb824855c8c5a5f81ee 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -126,9 +126,9 @@ void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget) * We're called without the iothread lock, so must take it while * we're calling timer handlers. */ - qemu_mutex_lock_iothread(); + bql_lock(); icount_notify_aio_contexts(); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 73866990ced13943d8f7d71aff71ae71ec855f04..d1cb6a362065fb5febba57c4442643b64ec6bcc5 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -76,7 +76,7 @@ static void *mttcg_cpu_thread_fn(void *arg) rcu_add_force_rcu_notifier(&force_rcu.notifier); tcg_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); @@ -91,9 +91,9 @@ static void *mttcg_cpu_thread_fn(void *arg) do { if (cpu_can_run(cpu)) { int r; - qemu_mutex_unlock_iothread(); + bql_unlock(); r = tcg_cpus_exec(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); switch (r) { case EXCP_DEBUG: cpu_handle_guest_debug(cpu); @@ -105,9 +105,9 @@ static void *mttcg_cpu_thread_fn(void *arg) */ break; case EXCP_ATOMIC: - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); default: /* Ignore everything else? */ break; @@ -119,7 +119,7 @@ static void *mttcg_cpu_thread_fn(void *arg) } while (!cpu->unplug || cpu_can_run(cpu)); tcg_cpus_destroy(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_remove_force_rcu_notifier(&force_rcu.notifier); rcu_unregister_thread(); tcg_unregister_thread(); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 611932f3c3a8c953a3ff68612139696fed0e92e1..c4ea372a3f6422db58bc0e0aede24bcdbcf3757c 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -188,7 +188,7 @@ static void *rr_cpu_thread_fn(void *arg) rcu_add_force_rcu_notifier(&force_rcu); tcg_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); @@ -218,9 +218,9 @@ static void *rr_cpu_thread_fn(void *arg) /* Only used for icount_enabled() */ int64_t cpu_budget = 0; - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); if (icount_enabled()) { int cpu_count = rr_cpu_count(); @@ -254,7 +254,7 @@ static void *rr_cpu_thread_fn(void *arg) if (cpu_can_run(cpu)) { int r; - qemu_mutex_unlock_iothread(); + bql_unlock(); if (icount_enabled()) { icount_prepare_for_run(cpu, cpu_budget); } @@ -262,15 +262,15 @@ static void *rr_cpu_thread_fn(void *arg) if (icount_enabled()) { icount_process_data(cpu); } - qemu_mutex_lock_iothread(); + bql_lock(); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); break; } else if (r == EXCP_ATOMIC) { - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); break; } } else if (cpu->stop) { diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 1b5729068202e461dfd430828a267ecf77bed600..813065c0ecb006aea50d4bc38d1bd48f7a355d4e 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -88,7 +88,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu) /* mask must never be zero, except for A20 change call */ void tcg_handle_interrupt(CPUState *cpu, int mask) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); cpu->interrupt_request |= mask; diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 79a88f5fb7577a2b2f04b668911660b40f85e38e..1737bb3da5831a9ed9e6b05cef4021d4dd64ff8a 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -649,7 +649,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) void cpu_interrupt(CPUState *cpu, int mask) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); cpu->interrupt_request |= mask; qatomic_set(&cpu->neg.icount_decr.u16.high, -1); } diff --git a/audio/coreaudio.m b/audio/coreaudio.m index 8cd129a27d02fa645b2d5231ee97425062e0f1bd..9d2db9883cb3953f672eacf6ad1391dfe6d65de4 100644 --- a/audio/coreaudio.m +++ b/audio/coreaudio.m @@ -547,7 +547,7 @@ static OSStatus handle_voice_change( { coreaudioVoiceOut *core = in_client_data; - qemu_mutex_lock_iothread(); + bql_lock(); if (core->outputDeviceID) { fini_out_device(core); @@ -557,7 +557,7 @@ static OSStatus handle_voice_change( update_device_playback_state(core); } - qemu_mutex_unlock_iothread(); + bql_unlock(); return 0; } diff --git a/backends/hostmem-epc.c b/backends/hostmem-epc.c index 4e162d6789e86dc0de4ff3086b6e383fea059be4..3ceb079f9e0ab202511ca4b1d115657b80413891 100644 --- a/backends/hostmem-epc.c +++ b/backends/hostmem-epc.c @@ -20,8 +20,8 @@ static void sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { + g_autofree char *name = NULL; uint32_t ram_flags; - char *name; int fd; if (!backend->size) { @@ -41,7 +41,6 @@ sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); - g_free(name); } static void sgx_epc_backend_instance_init(Object *obj) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index ce63a372a3fce4733c1adb02f7940766ba1ba2d5..aa2898da4ac5222171d1d991923c6ab0e275b7e5 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -134,8 +134,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) object_get_typename(OBJECT(backend))); #else HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend); + g_autofree gchar *name = NULL; uint32_t ram_flags; - gchar *name; if (!backend->size) { error_setg(errp, "can't create backend with size 0"); @@ -174,12 +174,12 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; ram_flags |= fb->is_pmem ? RAM_PMEM : 0; ram_flags |= RAM_NAMED_FILE; memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, backend->size, fb->align, ram_flags, fb->mem_path, fb->offset, errp); - g_free(name); if (virtcca_cvm_enabled() && backend->share && (strcmp(fb->mem_path, "/dev/shm") != 0) && diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 3fc85c3db81bb71176cdfe65dbf0643e1504355b..e0d3ba197f16448a04e590ffc88783ebdecb644f 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -35,8 +35,8 @@ static void memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend); + g_autofree char *name = NULL; uint32_t ram_flags; - char *name; int fd; if (!backend->size) { @@ -55,9 +55,9 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); - g_free(name); } static bool diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c index b8e55cdbd0f89078b01bb1575416173ce909526b..8c41dc8959617e624abeeb0ef0a3edce224cb25c 100644 --- a/backends/hostmem-ram.c +++ b/backends/hostmem-ram.c @@ -19,8 +19,8 @@ static void ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { + g_autofree char *name = NULL; uint32_t ram_flags; - char *name; if (!backend->size) { error_setg(errp, "can't create backend with size 0"); @@ -30,9 +30,9 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, errp); - g_free(name); } static void diff --git a/backends/hostmem.c b/backends/hostmem.c index 747e7838c031c42aaab91c159cbaa9b96f7e5013..6097e0249f252fb14db42b9326d56ab7a7866a8f 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -279,6 +279,7 @@ static void host_memory_backend_init(Object *obj) /* TODO: convert access to globals to compat properties */ backend->merge = machine_mem_merge(machine); backend->dump = machine_dump_guest_core(machine); + backend->guest_memfd = machine_require_guest_memfd(machine); backend->reserve = true; backend->prealloc_threads = machine->smp.cpus; } @@ -328,83 +329,84 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) void *ptr; uint64_t sz; - if (bc->alloc) { - bc->alloc(backend, &local_err); - if (local_err) { - goto out; - } + if (!bc->alloc) { + return; + } + bc->alloc(backend, &local_err); + if (local_err) { + goto out; + } - ptr = memory_region_get_ram_ptr(&backend->mr); - sz = memory_region_size(&backend->mr); + ptr = memory_region_get_ram_ptr(&backend->mr); + sz = memory_region_size(&backend->mr); - if (backend->merge) { - qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); - } - if (!backend->dump) { - qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); - } + if (backend->merge) { + qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); + } + if (!backend->dump) { + qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); + } #ifdef CONFIG_NUMA - unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); - /* lastbit == MAX_NODES means maxnode = 0 */ - unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); - /* ensure policy won't be ignored in case memory is preallocated - * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so - * this doesn't catch hugepage case. */ - unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; - int mode = backend->policy; - - /* check for invalid host-nodes and policies and give more verbose - * error messages than mbind(). */ - if (maxnode && backend->policy == MPOL_DEFAULT) { - error_setg(errp, "host-nodes must be empty for policy default," - " or you should explicitly specify a policy other" - " than default"); - return; - } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { - error_setg(errp, "host-nodes must be set for policy %s", - HostMemPolicy_str(backend->policy)); - return; - } + unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); + /* lastbit == MAX_NODES means maxnode = 0 */ + unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); + /* ensure policy won't be ignored in case memory is preallocated + * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so + * this doesn't catch hugepage case. */ + unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; + int mode = backend->policy; + + /* check for invalid host-nodes and policies and give more verbose + * error messages than mbind(). */ + if (maxnode && backend->policy == MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be empty for policy default," + " or you should explicitly specify a policy other" + " than default"); + return; + } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be set for policy %s", + HostMemPolicy_str(backend->policy)); + return; + } - /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 - * as argument to mbind() due to an old Linux bug (feature?) which - * cuts off the last specified node. This means backend->host_nodes - * must have MAX_NODES+1 bits available. - */ - assert(sizeof(backend->host_nodes) >= - BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); - assert(maxnode <= MAX_NODES); + /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 + * as argument to mbind() due to an old Linux bug (feature?) which + * cuts off the last specified node. This means backend->host_nodes + * must have MAX_NODES+1 bits available. + */ + assert(sizeof(backend->host_nodes) >= + BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); + assert(maxnode <= MAX_NODES); #ifdef HAVE_NUMA_HAS_PREFERRED_MANY - if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) { - /* - * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below - * silently picks the first node. - */ - mode = MPOL_PREFERRED_MANY; - } + if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) { + /* + * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below + * silently picks the first node. + */ + mode = MPOL_PREFERRED_MANY; + } #endif - if (maxnode && - mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) { - if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { - error_setg_errno(errp, errno, - "cannot bind memory to host NUMA nodes"); - return; - } + if (maxnode && + mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) { + if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { + error_setg_errno(errp, errno, + "cannot bind memory to host NUMA nodes"); + return; } + } #endif - /* Preallocate memory after the NUMA policy has been instantiated. - * This is necessary to guarantee memory is allocated with - * specified NUMA policy in place. - */ - if (backend->prealloc) { - qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz, - backend->prealloc_threads, - backend->prealloc_context, &local_err); - if (local_err) { - goto out; - } + /* Preallocate memory after the NUMA policy has been instantiated. + * This is necessary to guarantee memory is allocated with + * specified NUMA policy in place. + */ + if (backend->prealloc) { + qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz, + backend->prealloc_threads, + backend->prealloc_context, &local_err); + if (local_err) { + goto out; } } out: diff --git a/cpu-common.c b/cpu-common.c index a949ad7ca3cf5a526efed41c089d0646a5b10faa..b83ed4164e70d11fa4b419c12fe65d3a96041295 100644 --- a/cpu-common.c +++ b/cpu-common.c @@ -395,11 +395,11 @@ void process_queued_cpu_work(CPUState *cpu) * BQL, so it goes to sleep; start_exclusive() is sleeping too, so * neither CPU can proceed. */ - qemu_mutex_unlock_iothread(); + bql_unlock(); start_exclusive(); wi->func(cpu, wi->data); end_exclusive(); - qemu_mutex_lock_iothread(); + bql_lock(); } else { wi->func(cpu, wi->data); } diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst index b7e3f46ff689f1588de12756441afb0974811a2c..ea53ebc502a1b899327c92cbf0e658d389905df3 100644 --- a/docs/system/i386/amd-memory-encryption.rst +++ b/docs/system/i386/amd-memory-encryption.rst @@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs are about to occur. This allows the guest to selectively share information with the hypervisor to satisfy the requested function. -Launching ---------- +Launching (SEV and SEV-ES) +-------------------------- Boot images (such as bios) must be encrypted before a guest can be booted. The ``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``, @@ -161,6 +161,72 @@ The value of GCTX.LD is If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for ``kernel_hashes_blob`` and ``vmsas_blob`` as needed. +Launching (SEV-SNP) +------------------- +Boot images (such as bios) must be encrypted before a guest can be booted. The +``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: +``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These +three commands communicate with SEV-SNP firmware to generate a fresh memory +encryption key for the VM, encrypt the boot images for a successful launch. For +more details on the SEV-SNP firmware interfaces used by these commands please +see the SEV-SNP Firmware ABI. + +``SNP_LAUNCH_START`` is called first to create a cryptographic launch context +within the firmware. To create this context, the guest owner must provide a +guest policy and other parameters as described in the SEV-SNP firmware +specification. The launch parameters should be specified as described in the +QAPI schema for the sev-snp-guest object. + +The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured +by the corresponding parameters documented in the QAPI schema for the +'sev-snp-guest' object. + ++--------+-------+----------+-------------------------------------------------+ +| key | type | default | meaning | ++---------------------------+-------------------------------------------------+ +| policy | hex | 0x30000 | a 64-bit guest policy | ++---------------------------+-------------------------------------------------+ +| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string| +| | | | for guest OS visible | +| | | | workarounds. | ++---------------------------+-------------------------------------------------+ + +``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context +created via the ``SNP_LAUNCH_START`` command. If required, this command can be +called multiple times to encrypt different memory regions. The command also +calculates the measurement of the memory contents as it encrypts. + +``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while +finalizing the launch the firmware can perform checks on the launch digest +computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must +supply the id block, authentication blob and host data that should be included +in the attestation report. See the SEV-SNP spec for further details. + +The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured +by the corresponding parameters documented in the QAPI schema for the +'sev-snp-guest' object. + ++--------------------+-------+----------+-------------------------------------+ +| key | type | default | meaning | ++--------------------+-------+----------+-------------------------------------+ +| id-block | string| none | base64 encoded ID block | ++--------------------+-------+----------+-------------------------------------+ +| id-auth | string| none | base64 encoded authentication | +| | | | information | ++--------------------+-------+----------+-------------------------------------+ +| author-key-enabled | bool | 0 | auth block contains author key | ++--------------------+-------+----------+-------------------------------------+ +| host_data | string| none | host provided data | ++--------------------+-------+----------+-------------------------------------+ + +To launch a SEV-SNP guest (additional parameters are documented in the QAPI +schema for the 'sev-snp-guest' object):: + + # ${QEMU} \ + -machine ...,confidential-guest-support=sev0 \ + -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1 + + Debugging --------- diff --git a/dump/dump.c b/dump/dump.c index 787059ac2c8118311124ccbce3f1839219827df8..0f9b37718eaeeab8607aa1ecf3f9780a746ae5b6 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -109,11 +109,11 @@ static int dump_cleanup(DumpState *s) s->guest_note = NULL; if (s->resume) { if (s->detached) { - qemu_mutex_lock_iothread(); + bql_lock(); } vm_start(); if (s->detached) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } } migrate_del_blocker(&dump_migration_blocker); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index f12bc645d2ab3fd73d20abe3515fb6edb286dcdb..9bfaa839dcbe26b01ad7c6da150cbdcb45183064 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2842,7 +2842,7 @@ static void machvirt_init(MachineState *machine) * after GIC and machine has been fully initialized during * machine_init_done() phase. */ - cpu_slot->cpu = OBJECT(cs); + cpu_slot->cpu = cs; } } fdt_add_timer_nodes(vms); @@ -3552,7 +3552,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, /* insert the cold/hot-plugged vcpu in the slot */ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); - cpu_slot->cpu = OBJECT(dev); + cpu_slot->cpu = CPU(dev); /* * Update the ACPI Hotplug state both for vCPUs being {hot,cold}-plugged. @@ -4190,10 +4190,24 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_9_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(9, 1) + +static void virt_machine_9_0_options(MachineClass *mc) +{ + virt_machine_9_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); +} +DEFINE_VIRT_MACHINE(9, 0) + static void virt_machine_8_2_options(MachineClass *mc) { + virt_machine_9_0_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_VIRT_MACHINE_AS_LATEST(8, 2) +DEFINE_VIRT_MACHINE(8, 2) static void virt_machine_8_1_options(MachineClass *mc) { diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index e36ca2c207093a41ff4554dc41941fe424edad25..6292eb774b72c5ca5a4199ca1c1db7e0b8232177 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -70,14 +70,14 @@ CPUState *cpu_create(const char *typename) * BQL here if we need to. cpu_interrupt assumes it is held.*/ void cpu_reset_interrupt(CPUState *cpu, int mask) { - bool need_lock = !qemu_mutex_iothread_locked(); + bool need_lock = !bql_locked(); if (need_lock) { - qemu_mutex_lock_iothread(); + bql_lock(); } cpu->interrupt_request &= ~mask; if (need_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } } diff --git a/hw/core/machine.c b/hw/core/machine.c index 965682619bdf23d58c33cab7c965ee3225c23ca8..dbf5b812048590b0520d9c42846c691ea07c623b 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -32,6 +32,12 @@ #include "hw/virtio/virtio-net.h" #include "audio/audio.h" +GlobalProperty hw_compat_8_2[] = {}; +GlobalProperty hw_compat_9_0[] = {}; +const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); + +const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); + GlobalProperty hw_compat_8_1[] = { { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, { "ramfb", "x-migrate", "off" }, @@ -715,7 +721,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) mc->possible_cpu_arch_ids(machine); for (i = 0; i < machine->possible_cpus->len; i++) { - Object *cpu; + CPUState *cpu; HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1); cpu_item->type = g_strdup(machine->possible_cpus->cpus[i].type); @@ -725,7 +731,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) cpu = machine->possible_cpus->cpus[i].cpu; if (cpu) { - cpu_item->qom_path = object_get_canonical_path(cpu); + cpu_item->qom_path = object_get_canonical_path(OBJECT(cpu)); } QAPI_LIST_PREPEND(head, cpu_item); } @@ -1191,6 +1197,11 @@ bool machine_mem_merge(MachineState *machine) return machine->mem_merge; } +bool machine_require_guest_memfd(MachineState *machine) +{ + return machine->cgs && machine->cgs->require_guest_memfd; +} + static char *cpu_slot_to_string(const CPUArchId *cpu) { GString *s = g_string_new(NULL); diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c index 43dc23f7e06f63e8684197fb00ab0197b163e7fb..57e81c732200923919d6ef23808e61b1dc1564d3 100644 --- a/hw/i386/acpi-common.c +++ b/hw/i386/acpi-common.c @@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, acpi_table_begin(&table, table_data); /* Local APIC Address */ build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); - build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ + /* Flags. bit 0: PCAT_COMPAT */ + build_append_int_noprefix(table_data, + x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4); for (i = 0; i < apic_ids->len; i++) { pc_madt_cpu_entry(i, apic_ids, table_data, false); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 60d86e0cb692c56f43f82feee9c3c2916fa83580..2def6d8a10aa6d70dc040f8031fd2e39b8a34fbc 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1690,7 +1690,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) { bool use_iommu, pt; /* Whether we need to take the BQL on our own */ - bool take_bql = !qemu_mutex_iothread_locked(); + bool take_bql = !bql_locked(); assert(as); @@ -1708,7 +1708,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) * it. We'd better make sure we have had it already, or, take it. */ if (take_bql) { - qemu_mutex_lock_iothread(); + bql_lock(); } /* Turn off first then on the other */ @@ -1763,7 +1763,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) } if (take_bql) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } return use_iommu; diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index 02b8cbf8dfffeb925668d476c3782316abbb1848..d7d15cfaf7877e9c730fcabda9efa2e0abba18e0 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -425,7 +425,7 @@ void xen_evtchn_set_callback_level(int level) * effect immediately. That just leaves interdomain loopback as the case * which uses the BH. */ - if (!qemu_mutex_iothread_locked()) { + if (!bql_locked()) { qemu_bh_schedule(s->gsi_bh); return; } @@ -459,7 +459,7 @@ int xen_evtchn_set_callback_param(uint64_t param) * We need the BQL because set_callback_pci_intx() may call into PCI code, * and because we may need to manipulate the old and new GSI levels. */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); qemu_mutex_lock(&s->port_lock); switch (type) { @@ -1037,7 +1037,7 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port, XenEvtchnPort *p = &s->port_table[port]; /* Because it *might* be a PIRQ port */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); switch (p->type) { case EVTCHNSTAT_closed: @@ -1104,7 +1104,7 @@ int xen_evtchn_soft_reset(void) return -ENOTSUP; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); qemu_mutex_lock(&s->port_lock); @@ -1601,7 +1601,7 @@ bool xen_evtchn_set_gsi(int gsi, int level) XenEvtchnState *s = xen_evtchn_singleton; int pirq; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) { return false; @@ -1712,7 +1712,7 @@ void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector, return; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); pirq = msi_pirq_target(addr, data); @@ -1749,7 +1749,7 @@ int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route, return 1; /* Not a PIRQ */ } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); pirq = msi_pirq_target(address, data); if (!pirq || pirq >= s->nr_pirqs) { @@ -1796,7 +1796,7 @@ bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data) return false; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); pirq = msi_pirq_target(address, data); if (!pirq || pirq >= s->nr_pirqs) { diff --git a/hw/i386/kvm/xen_overlay.c b/hw/i386/kvm/xen_overlay.c index 39fda1b72c3f8ed8c807a65787795c494feb20e0..17222946389f93be9d4dac8a54d6049234ef632a 100644 --- a/hw/i386/kvm/xen_overlay.c +++ b/hw/i386/kvm/xen_overlay.c @@ -194,7 +194,7 @@ int xen_overlay_map_shinfo_page(uint64_t gpa) return -ENOENT; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (s->shinfo_gpa) { /* If removing shinfo page, turn the kernel magic off first */ diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 6e651960b3ab5eb0cd1fc59b91496f048d352f5f..ae27889a3f82b60b0e81a664c1c91b09bd83a764 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -1341,7 +1341,7 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token) { XenXenstoreState *s = opaque; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); /* * If there's a response pending, we obviously can't scribble over diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 369c6bf823bbb200b5f84050601ff9adc1bffa62..1341361edaa634e73540bf9f3ad8a3fec3a9aa8b 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -5,13 +5,14 @@ i386_ss.add(files( 'e820_memory_layout.c', 'multiboot.c', 'x86.c', + 'x86-cpu.c', )) i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), if_false: files('x86-iommu-stub.c')) i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c')) i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) -i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) +i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) @@ -21,6 +22,7 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) i386_ss.add(when: 'CONFIG_PC', if_true: files( + 'x86-common.c', 'pc.c', 'pc_sysfw.c', 'acpi-build.c', diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index ca55aecc3b4f2472ab6b6010f225121fabc56862..fec63cacfa881af6ea672f52098258fb7bff544d 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -175,7 +175,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) &error_abort); isa_bus_register_input_irqs(isa_bus, x86ms->gsi); - ioapic_init_gsi(gsi_state, "machine"); + ioapic_init_gsi(gsi_state, OBJECT(mms)); if (ioapics > 1) { x86ms->ioapic2 = ioapic_init_secondary(gsi_state); } @@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) default_firmware = x86_machine_is_acpi_enabled(x86ms) ? MICROVM_BIOS_FILENAME : MICROVM_QBOOT_FILENAME; - x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true); + x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true); } static void microvm_memory_init(MicrovmMachineState *mms) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 204e34db86c53361f217b3b9174d015e3c5aeb08..4e90e227e935cf18ac50181b1b0f0079d1151088 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -62,6 +62,7 @@ #include "hw/mem/memory-device.h" #include "e820_memory_layout.h" #include "trace.h" +#include "sev.h" #include CONFIG_DEVICES #ifdef CONFIG_XEN_EMU @@ -78,6 +79,15 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, +GlobalProperty pc_compat_9_0[] = { + { TYPE_X86_CPU, "guest-phys-bits", "0" }, + { "sev-guest", "legacy-vm-type", "on" }, +}; +const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); + +GlobalProperty pc_compat_8_2[] = {}; +const size_t pc_compat_8_2_len = G_N_ELEMENTS(pc_compat_8_2); + GlobalProperty pc_compat_8_1[] = {}; const size_t pc_compat_8_1_len = G_N_ELEMENTS(pc_compat_8_1); @@ -1158,10 +1168,15 @@ void pc_memory_init(PCMachineState *pcms, pc_system_firmware_init(pcms, rom_memory); option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); + if (machine_require_guest_memfd(machine)) { + memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", + PC_ROM_SIZE, &error_fatal); + } else { + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); + if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } } memory_region_add_subregion_overlap(rom_memory, PC_ROM_MIN_VGA, @@ -1876,11 +1891,6 @@ static void pc_machine_initfn(Object *obj) cxl_machine_init(obj, &pcms->cxl_devices_state); } -int pc_machine_kvm_type(MachineState *machine, const char *kvm_type) -{ - return 0; -} - static void pc_machine_reset(MachineState *machine, ShutdownCause reason) { CPUState *cs; @@ -1940,6 +1950,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->kvmclock_enabled = true; pcmc->enforce_aligned_dimm = true; pcmc->enforce_amd_1tb_hole = true; + pcmc->isa_bios_alias = true; /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported * to be used at the moment, 32K should be enough for a while. */ pcmc->acpi_data_size = 0x20000 + 0x8000; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index eace8543358a85a0d1f1f75353bb0e601db3b0e0..00c2ad61af5874c16acbfb4bb2bdbeff00c799aa 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -112,6 +112,7 @@ static void pc_init1(MachineState *machine, X86MachineState *x86ms = X86_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); + Object *phb = NULL; PCIBus *pci_bus = NULL; ISABus *isa_bus; Object *piix4_pm = NULL; @@ -195,8 +196,6 @@ static void pc_init1(MachineState *machine, } if (pcmc->pci_enabled) { - Object *phb; - pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; @@ -323,8 +322,8 @@ static void pc_init1(MachineState *machine, pc_i8259_create(isa_bus, gsi_state->i8259_irq); } - if (pcmc->pci_enabled) { - ioapic_init_gsi(gsi_state, "i440fx"); + if (phb) { + ioapic_init_gsi(gsi_state, phb); } if (tcg_enabled()) { @@ -545,13 +544,40 @@ static void pc_i440fx_machine_options(MachineClass *m) "Use a different south bridge than PIIX3"); } -static void pc_i440fx_8_2_machine_options(MachineClass *m) +static void pc_i440fx_9_1_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = true; } +DEFINE_I440FX_MACHINE(v9_1, "pc-i440fx-9.1", NULL, + pc_i440fx_9_1_machine_options); + +static void pc_i440fx_9_0_machine_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + + pc_i440fx_9_1_machine_options(m); + m->alias = NULL; + m->is_default = false; + + compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); + compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); + pcmc->isa_bios_alias = false; +} + +DEFINE_I440FX_MACHINE(v9_0, "pc-i440fx-9.0", NULL, + pc_i440fx_9_0_machine_options); + +static void pc_i440fx_8_2_machine_options(MachineClass *m) +{ + pc_i440fx_9_0_machine_options(m); + + compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); + compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); +} + DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL, pc_i440fx_8_2_machine_options); @@ -560,8 +586,6 @@ static void pc_i440fx_8_1_machine_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_8_2_machine_options(m); - m->alias = NULL; - m->is_default = false; pcmc->broken_32bit_mem_addr_check = true; compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 4f3e5412f6b8b42dffb55b4e6527a019fed35628..ab1366ae4422478a878506f8492ba121825acf3b 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -130,8 +130,7 @@ static void pc_q35_init(MachineState *machine) ISADevice *rtc_state; MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); - MemoryRegion *pci_memory; - MemoryRegion *rom_memory; + MemoryRegion *pci_memory = g_new(MemoryRegion, 1); GSIState *gsi_state; ISABus *isa_bus; int i; @@ -143,6 +142,8 @@ static void pc_q35_init(MachineState *machine) bool keep_pci_slot_hpc; uint64_t pci_hole64_size = 0; + assert(pcmc->pci_enabled); + /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping * also known as MMCFG). @@ -189,16 +190,6 @@ static void pc_q35_init(MachineState *machine) kvmclock_create(pcmc->kvmclock_create_always); } - /* pci enabled */ - if (pcmc->pci_enabled) { - pci_memory = g_new(MemoryRegion, 1); - memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); - rom_memory = pci_memory; - } else { - pci_memory = NULL; - rom_memory = system_memory; - } - pc_guest_info_init(pcms); if (pcmc->smbios_defaults) { @@ -212,14 +203,13 @@ static void pc_q35_init(MachineState *machine) /* create pci host bus */ phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE)); - if (pcmc->pci_enabled) { - pci_hole64_size = object_property_get_uint(phb, - PCI_HOST_PROP_PCI_HOLE64_SIZE, - &error_abort); - } + pci_hole64_size = object_property_get_uint(phb, + PCI_HOST_PROP_PCI_HOLE64_SIZE, + &error_abort); /* allocate ram and load rom/bios */ - pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size); + memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); + pc_memory_init(pcms, system_memory, pci_memory, pci_hole64_size); object_property_add_child(OBJECT(machine), "q35", phb); object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, @@ -236,6 +226,8 @@ static void pc_q35_init(MachineState *machine) x86ms->above_4g_mem_size, NULL); object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, pcms->default_bus_bypass_iommu, NULL); + object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES, + x86_machine_is_smm_enabled(x86ms), NULL); sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); /* pci */ @@ -243,7 +235,7 @@ static void pc_q35_init(MachineState *machine) pcms->bus = host_bus; /* irq lines */ - gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled); + gsi_state = pc_gsi_create(&x86ms->gsi, true); /* create ISA bus */ lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), @@ -286,9 +278,7 @@ static void pc_q35_init(MachineState *machine) pc_i8259_create(isa_bus, gsi_state->i8259_irq); } - if (pcmc->pci_enabled) { - ioapic_init_gsi(gsi_state, "q35"); - } + ioapic_init_gsi(gsi_state, OBJECT(phb)); if (tcg_enabled()) { x86_register_ferr_irq(x86ms->gsi[13]); @@ -383,12 +373,35 @@ static void pc_q35_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); } -static void pc_q35_8_2_machine_options(MachineClass *m) +static void pc_q35_9_1_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v9_1, "pc-q35-9.1", NULL, + pc_q35_9_1_machine_options); + +static void pc_q35_9_0_machine_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_9_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); + compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); + pcmc->isa_bios_alias = false; +} + +DEFINE_Q35_MACHINE(v9_0, "pc-q35-9.0", NULL, + pc_q35_9_0_machine_options); + +static void pc_q35_8_2_machine_options(MachineClass *m) +{ + pc_q35_9_0_machine_options(m); + compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); + compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); +} + DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL, pc_q35_8_2_machine_options); diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 7c6a9102501b9146372ac0674cf24e85511ef4b4..e488004d1d62f4afa88afbbd3045a284f133cf3e 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -41,12 +41,10 @@ #define FLASH_SECTOR_SIZE 4096 -static void pc_isa_bios_init(MemoryRegion *rom_memory, - MemoryRegion *flash_mem, - int ram_size) +static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios, + MemoryRegion *rom_memory, MemoryRegion *flash_mem) { int isa_bios_size; - MemoryRegion *isa_bios; uint64_t flash_size; void *flash_ptr, *isa_bios_ptr; @@ -54,9 +52,13 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = MIN(flash_size, 128 * KiB); - isa_bios = g_malloc(sizeof(*isa_bios)); - memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, - &error_fatal); + if (machine_require_guest_memfd(MACHINE(pcms))) { + memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios", + isa_bios_size, &error_fatal); + } else { + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_fatal); + } memory_region_add_subregion_overlap(rom_memory, 0x100000 - isa_bios_size, isa_bios, @@ -69,7 +71,9 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), isa_bios_size); - memory_region_set_readonly(isa_bios, true); + if (!machine_require_guest_memfd(current_machine)) { + memory_region_set_readonly(isa_bios, true); + } } static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms, @@ -140,6 +144,8 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) static void pc_system_flash_map(PCMachineState *pcms, MemoryRegion *rom_memory) { + X86MachineState *x86ms = X86_MACHINE(pcms); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); hwaddr total_size = 0; int i; BlockBackend *blk; @@ -152,6 +158,8 @@ static void pc_system_flash_map(PCMachineState *pcms, assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { + hwaddr gpa; + system_flash = pcms->flash[i]; blk = pflash_cfi01_get_blk(system_flash); if (!blk) { @@ -181,21 +189,26 @@ static void pc_system_flash_map(PCMachineState *pcms, } total_size += size; + gpa = 0x100000000ULL - total_size; /* where the flash is mapped */ qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks", size / FLASH_SECTOR_SIZE); sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, - 0x100000000ULL - total_size); + sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa); if (i == 0) { flash_mem = pflash_cfi01_get_memory(system_flash); - pc_isa_bios_init(rom_memory, flash_mem, size); + if (pcmc->isa_bios_alias) { + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, + true); + } else { + pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem); + } /* Encrypt the pflash boot ROM */ if (sev_enabled()) { flash_ptr = memory_region_get_ram_ptr(flash_mem); flash_size = memory_region_size(flash_mem); - x86_firmware_configure(flash_ptr, flash_size); + x86_firmware_configure(gpa, flash_ptr, flash_size); } } } @@ -209,7 +222,7 @@ void pc_system_firmware_init(PCMachineState *pcms, BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; if (!pcmc->pci_enabled) { - x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true); + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); return; } @@ -230,7 +243,7 @@ void pc_system_firmware_init(PCMachineState *pcms, if (!pflash_blk[0]) { /* Machine property pflash0 not set, use ROM mode */ - x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false); + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); } else { if (kvm_enabled() && !kvm_readonly_mem_enabled()) { /* @@ -248,7 +261,7 @@ void pc_system_firmware_init(PCMachineState *pcms, pc_system_flash_cleanup_unused(pcms); } -void x86_firmware_configure(void *ptr, int size) +void x86_firmware_configure(hwaddr gpa, void *ptr, int size) { int ret; @@ -259,6 +272,10 @@ void x86_firmware_configure(void *ptr, int size) pc_system_parse_ovmf_flash(ptr, size); if (sev_enabled()) { + + /* Copy the SEV metadata table (if it exists) */ + pc_system_parse_sev_metadata(ptr, size); + ret = sev_es_save_reset_vector(ptr, size); if (ret) { error_report("failed to locate and/or save reset vector"); @@ -278,7 +295,7 @@ void x86_firmware_configure(void *ptr, int size) } csv3_load_data(mr->addr + offset, ptr, size, &error_fatal); } else { - sev_encrypt_flash(ptr, size, &error_fatal); + sev_encrypt_flash(gpa, ptr, size, &error_fatal); } } } diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c new file mode 100644 index 0000000000000000000000000000000000000000..6cbb76c25c4eaeebd97cbe034a6cf8c3fad43e9d --- /dev/null +++ b/hw/i386/x86-common.c @@ -0,0 +1,1014 @@ +/* + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2019, 2024 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "qemu/units.h" +#include "qemu/datadir.h" +#include "qapi/error.h" +#include "sysemu/numa.h" +#include "sysemu/sysemu.h" +#include "sysemu/xen.h" +#include "trace.h" + +#include "hw/i386/x86.h" +#include "target/i386/cpu.h" +#include "hw/rtc/mc146818rtc.h" +#include "target/i386/sev.h" + +#include "hw/acpi/cpu_hotplug.h" +#include "hw/irq.h" +#include "hw/loader.h" +#include "multiboot.h" +#include "elf.h" +#include "standard-headers/asm-x86/bootparam.h" +#include CONFIG_DEVICES +#include "kvm/kvm_i386.h" + +#ifdef CONFIG_XEN_EMU +#include "hw/xen/xen.h" +#include "hw/i386/kvm/xen_evtchn.h" +#endif + +/* Physical Address of PVH entry point read from kernel ELF NOTE */ +static size_t pvh_start_addr; + +static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) +{ + Object *cpu = object_new(MACHINE(x86ms)->cpu_type); + + if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { + goto out; + } + qdev_realize(DEVICE(cpu), NULL, errp); + +out: + object_unref(cpu); +} + +void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) +{ + int i; + const CPUArchIdList *possible_cpus; + MachineState *ms = MACHINE(x86ms); + MachineClass *mc = MACHINE_GET_CLASS(x86ms); + + x86_cpu_set_default_version(default_cpu_version); + + /* + * Calculates the limit to CPU APIC ID values + * + * Limit for the APIC ID value, so that all + * CPU APIC IDs are < x86ms->apic_id_limit. + * + * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). + */ + x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, + ms->smp.max_cpus - 1) + 1; + + /* + * Can we support APIC ID 255 or higher? With KVM, that requires + * both in-kernel lapic and X2APIC userspace API. + * + * kvm_enabled() must go first to ensure that kvm_* references are + * not emitted for the linker to consume (kvm_enabled() is + * a literal `0` in configurations where kvm_* aren't defined) + */ + if (kvm_enabled() && x86ms->apic_id_limit > 255 && + kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { + error_report("current -smp configuration requires kernel " + "irqchip and X2APIC API support."); + exit(EXIT_FAILURE); + } + + if (kvm_enabled()) { + kvm_set_max_apic_id(x86ms->apic_id_limit); + } + + if (!kvm_irqchip_in_kernel()) { + apic_set_max_apic_id(x86ms->apic_id_limit); + } + + possible_cpus = mc->possible_cpu_arch_ids(ms); + for (i = 0; i < ms->smp.cpus; i++) { + x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); + } +} + +void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) +{ + MC146818RtcState *rtc = MC146818_RTC(s); + + if (cpus_count > 0xff) { + /* + * If the number of CPUs can't be represented in 8 bits, the + * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just + * to make old BIOSes fail more predictably. + */ + mc146818rtc_set_cmos_data(rtc, 0x5f, 0); + } else { + mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); + } +} + +static int x86_apic_cmp(const void *a, const void *b) +{ + CPUArchId *apic_a = (CPUArchId *)a; + CPUArchId *apic_b = (CPUArchId *)b; + + return apic_a->arch_id - apic_b->arch_id; +} + +/* + * returns pointer to CPUArchId descriptor that matches CPU's apic_id + * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no + * entry corresponding to CPU's apic_id returns NULL. + */ +static CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) +{ + CPUArchId apic_id, *found_cpu; + + apic_id.arch_id = id; + found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, + ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), + x86_apic_cmp); + if (found_cpu && idx) { + *idx = found_cpu - ms->possible_cpus->cpus; + } + return found_cpu; +} + +void x86_cpu_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + CPUArchId *found_cpu; + Error *local_err = NULL; + X86CPU *cpu = X86_CPU(dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + + if (x86ms->acpi_dev) { + hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); + if (local_err) { + goto out; + } + } + + /* increment the number of CPUs */ + x86ms->boot_cpus++; + if (x86ms->rtc) { + x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); + } + if (x86ms->fw_cfg) { + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); + } + + found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); + found_cpu->cpu = CPU(dev); +out: + error_propagate(errp, local_err); +} + +void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + int idx = -1; + X86CPU *cpu = X86_CPU(dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + + if (!x86ms->acpi_dev) { + error_setg(errp, "CPU hot unplug not supported without ACPI"); + return; + } + + x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); + assert(idx != -1); + if (idx == 0) { + error_setg(errp, "Boot CPU is unpluggable"); + return; + } + + hotplug_handler_unplug_request(x86ms->acpi_dev, dev, + errp); +} + +void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + CPUArchId *found_cpu; + Error *local_err = NULL; + X86CPU *cpu = X86_CPU(dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + + hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); + if (local_err) { + goto out; + } + + found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); + found_cpu->cpu = NULL; + qdev_unrealize(dev); + + /* decrement the number of CPUs */ + x86ms->boot_cpus--; + /* Update the number of CPUs in CMOS */ + x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); + out: + error_propagate(errp, local_err); +} + +void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + int idx; + CPUState *cs; + CPUArchId *cpu_slot; + X86CPUTopoIDs topo_ids; + X86CPU *cpu = X86_CPU(dev); + CPUX86State *env = &cpu->env; + MachineState *ms = MACHINE(hotplug_dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + unsigned int smp_cores = ms->smp.cores; + unsigned int smp_threads = ms->smp.threads; + X86CPUTopoInfo topo_info; + + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", + ms->cpu_type); + return; + } + + if (x86ms->acpi_dev) { + Error *local_err = NULL; + + hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + init_topo_info(&topo_info, x86ms); + + env->nr_dies = ms->smp.dies; + + /* + * If APIC ID is not set, + * set it based on socket/die/core/thread properties. + */ + if (cpu->apic_id == UNASSIGNED_APIC_ID) { + int max_socket = (ms->smp.max_cpus - 1) / + smp_threads / smp_cores / ms->smp.dies; + + /* + * die-id was optional in QEMU 4.0 and older, so keep it optional + * if there's only one die per socket. + */ + if (cpu->die_id < 0 && ms->smp.dies == 1) { + cpu->die_id = 0; + } + + if (cpu->socket_id < 0) { + error_setg(errp, "CPU socket-id is not set"); + return; + } else if (cpu->socket_id > max_socket) { + error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", + cpu->socket_id, max_socket); + return; + } + if (cpu->die_id < 0) { + error_setg(errp, "CPU die-id is not set"); + return; + } else if (cpu->die_id > ms->smp.dies - 1) { + error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", + cpu->die_id, ms->smp.dies - 1); + return; + } + if (cpu->core_id < 0) { + error_setg(errp, "CPU core-id is not set"); + return; + } else if (cpu->core_id > (smp_cores - 1)) { + error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", + cpu->core_id, smp_cores - 1); + return; + } + if (cpu->thread_id < 0) { + error_setg(errp, "CPU thread-id is not set"); + return; + } else if (cpu->thread_id > (smp_threads - 1)) { + error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", + cpu->thread_id, smp_threads - 1); + return; + } + + topo_ids.pkg_id = cpu->socket_id; + topo_ids.die_id = cpu->die_id; + topo_ids.core_id = cpu->core_id; + topo_ids.smt_id = cpu->thread_id; + cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); + } + + cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); + if (!cpu_slot) { + x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + error_setg(errp, + "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" + " APIC ID %" PRIu32 ", valid index range 0:%d", + topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, + cpu->apic_id, ms->possible_cpus->len - 1); + return; + } + + if (cpu_slot->cpu) { + error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", + idx, cpu->apic_id); + return; + } + + /* if 'address' properties socket-id/core-id/thread-id are not set, set them + * so that machine_query_hotpluggable_cpus would show correct values + */ + /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() + * once -smp refactoring is complete and there will be CPU private + * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ + x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { + error_setg(errp, "property socket-id: %u doesn't match set apic-id:" + " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, + topo_ids.pkg_id); + return; + } + cpu->socket_id = topo_ids.pkg_id; + + if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { + error_setg(errp, "property die-id: %u doesn't match set apic-id:" + " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); + return; + } + cpu->die_id = topo_ids.die_id; + + if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { + error_setg(errp, "property core-id: %u doesn't match set apic-id:" + " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, + topo_ids.core_id); + return; + } + cpu->core_id = topo_ids.core_id; + + if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { + error_setg(errp, "property thread-id: %u doesn't match set apic-id:" + " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, + topo_ids.smt_id); + return; + } + cpu->thread_id = topo_ids.smt_id; + + /* + * kvm_enabled() must go first to ensure that kvm_* references are + * not emitted for the linker to consume (kvm_enabled() is + * a literal `0` in configurations where kvm_* aren't defined) + */ + if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && + !kvm_hv_vpindex_settable()) { + error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); + return; + } + + cs = CPU(cpu); + cs->cpu_index = idx; + + numa_cpu_pre_plug(cpu_slot, dev, errp); +} + +static long get_file_size(FILE *f) +{ + long where, size; + + /* XXX: on Unix systems, using fstat() probably makes more sense */ + + where = ftell(f); + fseek(f, 0, SEEK_END); + size = ftell(f); + fseek(f, where, SEEK_SET); + + return size; +} + +void gsi_handler(void *opaque, int n, int level) +{ + GSIState *s = opaque; + + trace_x86_gsi_interrupt(n, level); + switch (n) { + case 0 ... ISA_NUM_IRQS - 1: + if (s->i8259_irq[n]) { + /* Under KVM, Kernel will forward to both PIC and IOAPIC */ + qemu_set_irq(s->i8259_irq[n], level); + } + /* fall through */ + case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: +#ifdef CONFIG_XEN_EMU + /* + * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC + * routing actually works properly under Xen). And then to + * *either* the PIRQ handling or the I/OAPIC depending on + * whether the former wants it. + */ + if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { + break; + } +#endif + qemu_set_irq(s->ioapic_irq[n], level); + break; + case IO_APIC_SECONDARY_IRQBASE + ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: + qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); + break; + } +} + +void ioapic_init_gsi(GSIState *gsi_state, Object *parent) +{ + DeviceState *dev; + SysBusDevice *d; + unsigned int i; + + assert(parent); + if (kvm_ioapic_in_kernel()) { + dev = qdev_new(TYPE_KVM_IOAPIC); + } else { + dev = qdev_new(TYPE_IOAPIC); + } + object_property_add_child(parent, "ioapic", OBJECT(dev)); + d = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(d, &error_fatal); + sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); + } +} + +DeviceState *ioapic_init_secondary(GSIState *gsi_state) +{ + DeviceState *dev; + SysBusDevice *d; + unsigned int i; + + dev = qdev_new(TYPE_IOAPIC); + d = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(d, &error_fatal); + sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); + } + return dev; +} + +/* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM + * direct boot ABI and is available in an ELFNOTE in the kernel binary. + * + * This function is passed to load_elf() when it is called from + * load_elfboot() which then additionally checks for an ELF Note of + * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to + * parse the PVH entry address from the ELF Note. + * + * Due to trickery in elf_opts.h, load_elf() is actually available as + * load_elf32() or load_elf64() and this routine needs to be able + * to deal with being called as 32 or 64 bit. + * + * The address of the PVH entry point is saved to the 'pvh_start_addr' + * global variable. (although the entry point is 32-bit, the kernel + * binary can be either 32-bit or 64-bit). + */ +static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +{ + size_t *elf_note_data_addr; + + /* Check if ELF Note header passed in is valid */ + if (arg1 == NULL) { + return 0; + } + + if (is64) { + struct elf64_note *nhdr64 = (struct elf64_note *)arg1; + uint64_t nhdr_size64 = sizeof(struct elf64_note); + uint64_t phdr_align = *(uint64_t *)arg2; + uint64_t nhdr_namesz = nhdr64->n_namesz; + + elf_note_data_addr = + ((void *)nhdr64) + nhdr_size64 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + + pvh_start_addr = *elf_note_data_addr; + } else { + struct elf32_note *nhdr32 = (struct elf32_note *)arg1; + uint32_t nhdr_size32 = sizeof(struct elf32_note); + uint32_t phdr_align = *(uint32_t *)arg2; + uint32_t nhdr_namesz = nhdr32->n_namesz; + + elf_note_data_addr = + ((void *)nhdr32) + nhdr_size32 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + + pvh_start_addr = *(uint32_t *)elf_note_data_addr; + } + + return pvh_start_addr; +} + +static bool load_elfboot(const char *kernel_filename, + int kernel_file_size, + uint8_t *header, + size_t pvh_xen_start_addr, + FWCfgState *fw_cfg) +{ + uint32_t flags = 0; + uint32_t mh_load_addr = 0; + uint32_t elf_kernel_size = 0; + uint64_t elf_entry; + uint64_t elf_low, elf_high; + int kernel_size; + + if (ldl_p(header) != 0x464c457f) { + return false; /* no elfboot */ + } + + bool elf_is64 = header[EI_CLASS] == ELFCLASS64; + flags = elf_is64 ? + ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; + + if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ + error_report("elfboot unsupported flags = %x", flags); + exit(1); + } + + uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; + kernel_size = load_elf(kernel_filename, read_pvh_start_addr, + NULL, &elf_note_type, &elf_entry, + &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, + 0, 0); + + if (kernel_size < 0) { + error_report("Error while loading elf kernel"); + exit(1); + } + mh_load_addr = elf_low; + elf_kernel_size = elf_high - elf_low; + + if (pvh_start_addr == 0) { + error_report("Error loading uncompressed kernel without PVH ELF Note"); + exit(1); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); + + return true; +} + +void x86_load_linux(X86MachineState *x86ms, + FWCfgState *fw_cfg, + int acpi_data_size, + bool pvh_enabled) +{ + bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; + uint16_t protocol; + int setup_size, kernel_size, cmdline_size; + int dtb_size, setup_data_offset; + uint32_t initrd_max; + uint8_t header[8192], *setup, *kernel; + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; + FILE *f; + char *vmode; + MachineState *machine = MACHINE(x86ms); + struct setup_data *setup_data; + const char *kernel_filename = machine->kernel_filename; + const char *initrd_filename = machine->initrd_filename; + const char *dtb_filename = machine->dtb; + const char *kernel_cmdline = machine->kernel_cmdline; + SevKernelLoaderContext sev_load_ctx = {}; + + /* Align to 16 bytes as a paranoia measure */ + cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; + + /* load the kernel header */ + f = fopen(kernel_filename, "rb"); + if (!f) { + fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", + kernel_filename, strerror(errno)); + exit(1); + } + + kernel_size = get_file_size(f); + if (!kernel_size || + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != + MIN(ARRAY_SIZE(header), kernel_size)) { + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", + kernel_filename, strerror(errno)); + exit(1); + } + + /* kernel protocol version */ + if (ldl_p(header + 0x202) == 0x53726448) { + protocol = lduw_p(header + 0x206); + } else { + /* + * This could be a multiboot kernel. If it is, let's stop treating it + * like a Linux kernel. + * Note: some multiboot images could be in the ELF format (the same of + * PVH), so we try multiboot first since we check the multiboot magic + * header before to load it. + */ + if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, + kernel_cmdline, kernel_size, header)) { + return; + } + /* + * Check if the file is an uncompressed kernel file (ELF) and load it, + * saving the PVH entry point used by the x86/HVM direct boot ABI. + * If load_elfboot() is successful, populate the fw_cfg info. + */ + if (pvh_enabled && + load_elfboot(kernel_filename, kernel_size, + header, pvh_start_addr, fw_cfg)) { + fclose(f); + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, + strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, + header, sizeof(header)); + + /* load initrd */ + if (initrd_filename) { + GMappedFile *mapped_file; + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); + if (!mapped_file) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + x86ms->initrd_mapped_file = mapped_file; + + initrd_data = g_mapped_file_get_contents(mapped_file); + initrd_size = g_mapped_file_get_length(mapped_file); + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max - initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, + initrd_size); + } + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "pvh.bin"; + nb_option_roms++; + + return; + } + protocol = 0; + } + + if (protocol < 0x200 || !(header[0x211] & 0x01)) { + /* Low kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x10000; + } else if (protocol < 0x202) { + /* High but ancient kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x100000; + } else { + /* High and recent kernel */ + real_addr = 0x10000; + cmdline_addr = 0x20000; + prot_addr = 0x100000; + } + + /* highest address for loading the initrd */ + if (protocol >= 0x20c && + lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { + /* + * Linux has supported initrd up to 4 GB for a very long time (2007, + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), + * though it only sets initrd_max to 2 GB to "work around bootloader + * bugs". Luckily, QEMU firmware(which does something like bootloader) + * has supported this. + * + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can + * be loaded into any address. + * + * In addition, initrd_max is uint32_t simply because QEMU doesn't + * support the 64-bit boot protocol (specifically the ext_ramdisk_image + * field). + * + * Therefore here just limit initrd_max to UINT32_MAX simply as well. + */ + initrd_max = UINT32_MAX; + } else if (protocol >= 0x203) { + initrd_max = ldl_p(header + 0x22c); + } else { + initrd_max = 0x37ffffff; + } + + if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; + } + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + sev_load_ctx.cmdline_data = (char *)kernel_cmdline; + sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; + + if (protocol >= 0x202) { + stl_p(header + 0x228, cmdline_addr); + } else { + stw_p(header + 0x20, 0xA33F); + stw_p(header + 0x22, cmdline_addr - real_addr); + } + + /* handle vga= parameter */ + vmode = strstr(kernel_cmdline, "vga="); + if (vmode) { + unsigned int video_mode; + const char *end; + int ret; + /* skip "vga=" */ + vmode += 4; + if (!strncmp(vmode, "normal", 6)) { + video_mode = 0xffff; + } else if (!strncmp(vmode, "ext", 3)) { + video_mode = 0xfffe; + } else if (!strncmp(vmode, "ask", 3)) { + video_mode = 0xfffd; + } else { + ret = qemu_strtoui(vmode, &end, 0, &video_mode); + if (ret != 0 || (*end && *end != ' ')) { + fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); + exit(1); + } + } + stw_p(header + 0x1fa, video_mode); + } + + /* loader type */ + /* + * High nybble = B reserved for QEMU; low nybble is revision number. + * If this code is substantially changed, you may want to consider + * incrementing the revision. + */ + if (protocol >= 0x200) { + header[0x210] = 0xB0; + } + /* heap */ + if (protocol >= 0x201) { + header[0x211] |= 0x80; /* CAN_USE_HEAP */ + stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); + } + + /* load initrd */ + if (initrd_filename) { + GMappedFile *mapped_file; + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + if (protocol < 0x200) { + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); + exit(1); + } + + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); + if (!mapped_file) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + x86ms->initrd_mapped_file = mapped_file; + + initrd_data = g_mapped_file_get_contents(mapped_file); + initrd_size = g_mapped_file_get_length(mapped_file); + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max - initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); + sev_load_ctx.initrd_data = initrd_data; + sev_load_ctx.initrd_size = initrd_size; + + stl_p(header + 0x218, initrd_addr); + stl_p(header + 0x21c, initrd_size); + } + + /* load kernel and setup */ + setup_size = header[0x1f1]; + if (setup_size == 0) { + setup_size = 4; + } + setup_size = (setup_size + 1) * 512; + if (setup_size > kernel_size) { + fprintf(stderr, "qemu: invalid kernel header\n"); + exit(1); + } + kernel_size -= setup_size; + + setup = g_malloc(setup_size); + kernel = g_malloc(kernel_size); + fseek(f, 0, SEEK_SET); + if (fread(setup, 1, setup_size, f) != setup_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + if (fread(kernel, 1, kernel_size, f) != kernel_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + fclose(f); + + /* append dtb to kernel */ + if (dtb_filename) { + if (protocol < 0x209) { + fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); + exit(1); + } + + dtb_size = get_image_size(dtb_filename); + if (dtb_size <= 0) { + fprintf(stderr, "qemu: error reading dtb %s: %s\n", + dtb_filename, strerror(errno)); + exit(1); + } + + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; + kernel = g_realloc(kernel, kernel_size); + + stq_p(header + 0x250, prot_addr + setup_data_offset); + + setup_data = (struct setup_data *)(kernel + setup_data_offset); + setup_data->next = 0; + setup_data->type = cpu_to_le32(SETUP_DTB); + setup_data->len = cpu_to_le32(dtb_size); + + load_image_size(dtb_filename, setup_data->data, dtb_size); + } + + /* + * If we're starting an encrypted VM, it will be OVMF based, which uses the + * efi stub for booting and doesn't require any values to be placed in the + * kernel header. We therefore don't update the header so the hash of the + * kernel on the other side of the fw_cfg interface matches the hash of the + * file the user passed in. + */ + if (!sev_enabled()) { + memcpy(setup, header, MIN(sizeof(header), setup_size)); + } + + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + sev_load_ctx.kernel_data = (char *)kernel; + sev_load_ctx.kernel_size = kernel_size; + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); + sev_load_ctx.setup_data = (char *)setup; + sev_load_ctx.setup_size = setup_size; + + if (sev_enabled()) { + sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); + } + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "linuxboot.bin"; + if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { + option_rom[nb_option_roms].name = "linuxboot_dma.bin"; + } + nb_option_roms++; +} + +void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, + MemoryRegion *bios, bool read_only) +{ + uint64_t bios_size = memory_region_size(bios); + uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); + + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, + isa_bios, 1); + memory_region_set_readonly(isa_bios, read_only); +} + +void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) +{ + const char *bios_name; + char *filename; + int bios_size; + ssize_t ret; + + /* BIOS load */ + bios_name = MACHINE(x86ms)->firmware ?: default_firmware; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); + } else { + bios_size = -1; + } + if (bios_size <= 0 || + (bios_size % 65536) != 0) { + goto bios_error; + } + if (machine_require_guest_memfd(MACHINE(x86ms))) { + memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", + bios_size, &error_fatal); + } else { + memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", + bios_size, &error_fatal); + } + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for + * confidential computing guests, we have to destroy and + * re-launch them instead. So there is no need to register + * the firmware as rom to properly re-initialize on reset. + * Just go for a straight file load instead. + */ + void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size); + } else { + memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; + } + } + g_free(filename); + + if (!machine_require_guest_memfd(MACHINE(x86ms))) { + /* map the last 128KB of the BIOS in ISA space */ + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, + !isapc_ram_fw); + } + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), + &x86ms->bios); + return; + +bios_error: + fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); + exit(1); +} diff --git a/hw/i386/x86-cpu.c b/hw/i386/x86-cpu.c new file mode 100644 index 0000000000000000000000000000000000000000..ab2920522d18cfbaa06477e6aed3b35db9937576 --- /dev/null +++ b/hw/i386/x86-cpu.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2019, 2024 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "sysemu/whpx.h" +#include "sysemu/cpu-timers.h" +#include "trace.h" + +#include "hw/i386/x86.h" +#include "target/i386/cpu.h" +#include "hw/intc/i8259.h" +#include "hw/irq.h" +#include "sysemu/kvm.h" + +/* TSC handling */ +uint64_t cpu_get_tsc(CPUX86State *env) +{ + return cpus_get_elapsed_ticks(); +} + +/* IRQ handling */ +static void pic_irq_request(void *opaque, int irq, int level) +{ + CPUState *cs = first_cpu; + X86CPU *cpu = X86_CPU(cs); + + trace_x86_pic_interrupt(irq, level); + if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && + !whpx_apic_in_platform()) { + CPU_FOREACH(cs) { + cpu = X86_CPU(cs); + if (apic_accept_pic_intr(cpu->apic_state)) { + apic_deliver_pic_intr(cpu->apic_state, level); + } + } + } else { + if (level) { + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); + } + } +} + +qemu_irq x86_allocate_cpu_irq(void) +{ + return qemu_allocate_irq(pic_irq_request, NULL, 0); +} + +int cpu_get_pic_interrupt(CPUX86State *env) +{ + X86CPU *cpu = env_archcpu(env); + int intno; + + if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { + intno = apic_get_interrupt(cpu->apic_state); + if (intno >= 0) { + return intno; + } + /* read the irq from the PIC */ + if (!apic_accept_pic_intr(cpu->apic_state)) { + return -1; + } + } + + intno = pic_read_irq(isa_pic); + return intno; +} + +DeviceState *cpu_get_current_apic(void) +{ + if (current_cpu) { + X86CPU *cpu = X86_CPU(current_cpu); + return cpu->apic_state; + } else { + return NULL; + } +} diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 2b6291ad8d5f3e23806d825c4360a3dca4234b76..935eea641b73768b147ec503b920545faeae2010 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -22,52 +22,25 @@ */ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include "qemu/option.h" -#include "qemu/cutils.h" #include "qemu/units.h" -#include "qemu/datadir.h" #include "qapi/error.h" #include "qapi/qapi-visit-common.h" -#include "qapi/clone-visitor.h" #include "qapi/qapi-visit-machine.h" #include "qapi/visitor.h" #include "sysemu/qtest.h" -#include "sysemu/whpx.h" #include "sysemu/numa.h" -#include "sysemu/replay.h" -#include "sysemu/sysemu.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/xen.h" #include "trace.h" +#include "hw/acpi/aml-build.h" #include "hw/i386/x86.h" -#include "target/i386/cpu.h" #include "hw/i386/topology.h" -#include "hw/i386/fw_cfg.h" -#include "hw/intc/i8259.h" -#include "hw/rtc/mc146818rtc.h" -#include "target/i386/sev.h" -#include "hw/acpi/cpu_hotplug.h" -#include "hw/irq.h" #include "hw/nmi.h" -#include "hw/loader.h" -#include "multiboot.h" -#include "elf.h" -#include "standard-headers/asm-x86/bootparam.h" -#include CONFIG_DEVICES #include "kvm/kvm_i386.h" -#ifdef CONFIG_XEN_EMU -#include "hw/xen/xen.h" -#include "hw/i386/kvm/xen_evtchn.h" -#endif -/* Physical Address of PVH entry point read from kernel ELF NOTE */ -static size_t pvh_start_addr; - -static void init_topo_info(X86CPUTopoInfo *topo_info, - const X86MachineState *x86ms) +void init_topo_info(X86CPUTopoInfo *topo_info, + const X86MachineState *x86ms) { MachineState *ms = MACHINE(x86ms); @@ -94,351 +67,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, return x86_apicid_from_cpu_idx(&topo_info, cpu_index); } - -void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) -{ - Object *cpu = object_new(MACHINE(x86ms)->cpu_type); - - if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { - goto out; - } - qdev_realize(DEVICE(cpu), NULL, errp); - -out: - object_unref(cpu); -} - -void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) -{ - int i; - const CPUArchIdList *possible_cpus; - MachineState *ms = MACHINE(x86ms); - MachineClass *mc = MACHINE_GET_CLASS(x86ms); - - x86_cpu_set_default_version(default_cpu_version); - - /* - * Calculates the limit to CPU APIC ID values - * - * Limit for the APIC ID value, so that all - * CPU APIC IDs are < x86ms->apic_id_limit. - * - * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). - */ - x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, - ms->smp.max_cpus - 1) + 1; - - /* - * Can we support APIC ID 255 or higher? With KVM, that requires - * both in-kernel lapic and X2APIC userspace API. - * - * kvm_enabled() must go first to ensure that kvm_* references are - * not emitted for the linker to consume (kvm_enabled() is - * a literal `0` in configurations where kvm_* aren't defined) - */ - if (kvm_enabled() && x86ms->apic_id_limit > 255 && - (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { - error_report("current -smp configuration requires kernel " - "irqchip and X2APIC API support."); - exit(EXIT_FAILURE); - } - - if (kvm_enabled()) { - kvm_set_max_apic_id(x86ms->apic_id_limit); - } - - possible_cpus = mc->possible_cpu_arch_ids(ms); - for (i = 0; i < ms->smp.cpus; i++) { - x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); - } -} - -void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) -{ - MC146818RtcState *rtc = MC146818_RTC(s); - - if (cpus_count > 0xff) { - /* - * If the number of CPUs can't be represented in 8 bits, the - * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just - * to make old BIOSes fail more predictably. - */ - mc146818rtc_set_cmos_data(rtc, 0x5f, 0); - } else { - mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); - } -} - -static int x86_apic_cmp(const void *a, const void *b) -{ - CPUArchId *apic_a = (CPUArchId *)a; - CPUArchId *apic_b = (CPUArchId *)b; - - return apic_a->arch_id - apic_b->arch_id; -} - -/* - * returns pointer to CPUArchId descriptor that matches CPU's apic_id - * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no - * entry corresponding to CPU's apic_id returns NULL. - */ -CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) -{ - CPUArchId apic_id, *found_cpu; - - apic_id.arch_id = id; - found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, - ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), - x86_apic_cmp); - if (found_cpu && idx) { - *idx = found_cpu - ms->possible_cpus->cpus; - } - return found_cpu; -} - -void x86_cpu_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - CPUArchId *found_cpu; - Error *local_err = NULL; - X86CPU *cpu = X86_CPU(dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - - if (x86ms->acpi_dev) { - hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); - if (local_err) { - goto out; - } - } - - /* increment the number of CPUs */ - x86ms->boot_cpus++; - if (x86ms->rtc) { - x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); - } - if (x86ms->fw_cfg) { - fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); - } - - found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); - found_cpu->cpu = OBJECT(dev); -out: - error_propagate(errp, local_err); -} - -void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - int idx = -1; - X86CPU *cpu = X86_CPU(dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - - if (!x86ms->acpi_dev) { - error_setg(errp, "CPU hot unplug not supported without ACPI"); - return; - } - - x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); - assert(idx != -1); - if (idx == 0) { - error_setg(errp, "Boot CPU is unpluggable"); - return; - } - - hotplug_handler_unplug_request(x86ms->acpi_dev, dev, - errp); -} - -void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - CPUArchId *found_cpu; - Error *local_err = NULL; - X86CPU *cpu = X86_CPU(dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - - hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); - if (local_err) { - goto out; - } - - found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); - found_cpu->cpu = NULL; - qdev_unrealize(dev); - - /* decrement the number of CPUs */ - x86ms->boot_cpus--; - /* Update the number of CPUs in CMOS */ - x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); - fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); - out: - error_propagate(errp, local_err); -} - -void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - int idx; - CPUState *cs; - CPUArchId *cpu_slot; - X86CPUTopoIDs topo_ids; - X86CPU *cpu = X86_CPU(dev); - CPUX86State *env = &cpu->env; - MachineState *ms = MACHINE(hotplug_dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - unsigned int smp_cores = ms->smp.cores; - unsigned int smp_threads = ms->smp.threads; - X86CPUTopoInfo topo_info; - - if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { - error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", - ms->cpu_type); - return; - } - - if (x86ms->acpi_dev) { - Error *local_err = NULL; - - hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } - - init_topo_info(&topo_info, x86ms); - - env->nr_dies = ms->smp.dies; - - /* - * If APIC ID is not set, - * set it based on socket/die/core/thread properties. - */ - if (cpu->apic_id == UNASSIGNED_APIC_ID) { - int max_socket = (ms->smp.max_cpus - 1) / - smp_threads / smp_cores / ms->smp.dies; - - /* - * die-id was optional in QEMU 4.0 and older, so keep it optional - * if there's only one die per socket. - */ - if (cpu->die_id < 0 && ms->smp.dies == 1) { - cpu->die_id = 0; - } - - if (cpu->socket_id < 0) { - error_setg(errp, "CPU socket-id is not set"); - return; - } else if (cpu->socket_id > max_socket) { - error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", - cpu->socket_id, max_socket); - return; - } - if (cpu->die_id < 0) { - error_setg(errp, "CPU die-id is not set"); - return; - } else if (cpu->die_id > ms->smp.dies - 1) { - error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", - cpu->die_id, ms->smp.dies - 1); - return; - } - if (cpu->core_id < 0) { - error_setg(errp, "CPU core-id is not set"); - return; - } else if (cpu->core_id > (smp_cores - 1)) { - error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", - cpu->core_id, smp_cores - 1); - return; - } - if (cpu->thread_id < 0) { - error_setg(errp, "CPU thread-id is not set"); - return; - } else if (cpu->thread_id > (smp_threads - 1)) { - error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", - cpu->thread_id, smp_threads - 1); - return; - } - - topo_ids.pkg_id = cpu->socket_id; - topo_ids.die_id = cpu->die_id; - topo_ids.core_id = cpu->core_id; - topo_ids.smt_id = cpu->thread_id; - cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); - } - - cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); - if (!cpu_slot) { - x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); - error_setg(errp, - "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" - " APIC ID %" PRIu32 ", valid index range 0:%d", - topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, - cpu->apic_id, ms->possible_cpus->len - 1); - return; - } - - if (cpu_slot->cpu) { - error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", - idx, cpu->apic_id); - return; - } - - /* if 'address' properties socket-id/core-id/thread-id are not set, set them - * so that machine_query_hotpluggable_cpus would show correct values - */ - /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() - * once -smp refactoring is complete and there will be CPU private - * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ - x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); - if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { - error_setg(errp, "property socket-id: %u doesn't match set apic-id:" - " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, - topo_ids.pkg_id); - return; - } - cpu->socket_id = topo_ids.pkg_id; - - if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { - error_setg(errp, "property die-id: %u doesn't match set apic-id:" - " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); - return; - } - cpu->die_id = topo_ids.die_id; - - if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { - error_setg(errp, "property core-id: %u doesn't match set apic-id:" - " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, - topo_ids.core_id); - return; - } - cpu->core_id = topo_ids.core_id; - - if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { - error_setg(errp, "property thread-id: %u doesn't match set apic-id:" - " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, - topo_ids.smt_id); - return; - } - cpu->thread_id = topo_ids.smt_id; - - /* - * kvm_enabled() must go first to ensure that kvm_* references are - * not emitted for the linker to consume (kvm_enabled() is - * a literal `0` in configurations where kvm_* aren't defined) - */ - if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && - !kvm_hv_vpindex_settable()) { - error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); - return; - } - - cs = CPU(cpu); - cs->cpu_index = idx; - - numa_cpu_pre_plug(cpu_slot, dev, errp); -} - CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) { @@ -516,690 +144,12 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) CPU_FOREACH(cs) { X86CPU *cpu = X86_CPU(cs); - if (!cpu->apic_state) { - cpu_interrupt(cs, CPU_INTERRUPT_NMI); - } else { + if (cpu_is_apic_enabled(cpu->apic_state)) { apic_deliver_nmi(cpu->apic_state); - } - } -} - -static long get_file_size(FILE *f) -{ - long where, size; - - /* XXX: on Unix systems, using fstat() probably makes more sense */ - - where = ftell(f); - fseek(f, 0, SEEK_END); - size = ftell(f); - fseek(f, where, SEEK_SET); - - return size; -} - -/* TSC handling */ -uint64_t cpu_get_tsc(CPUX86State *env) -{ - return cpus_get_elapsed_ticks(); -} - -/* IRQ handling */ -static void pic_irq_request(void *opaque, int irq, int level) -{ - CPUState *cs = first_cpu; - X86CPU *cpu = X86_CPU(cs); - - trace_x86_pic_interrupt(irq, level); - if (cpu->apic_state && !kvm_irqchip_in_kernel() && - !whpx_apic_in_platform()) { - CPU_FOREACH(cs) { - cpu = X86_CPU(cs); - if (apic_accept_pic_intr(cpu->apic_state)) { - apic_deliver_pic_intr(cpu->apic_state, level); - } - } - } else { - if (level) { - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } - } -} - -qemu_irq x86_allocate_cpu_irq(void) -{ - return qemu_allocate_irq(pic_irq_request, NULL, 0); -} - -int cpu_get_pic_interrupt(CPUX86State *env) -{ - X86CPU *cpu = env_archcpu(env); - int intno; - - if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { - intno = apic_get_interrupt(cpu->apic_state); - if (intno >= 0) { - return intno; - } - /* read the irq from the PIC */ - if (!apic_accept_pic_intr(cpu->apic_state)) { - return -1; - } - } - - intno = pic_read_irq(isa_pic); - return intno; -} - -DeviceState *cpu_get_current_apic(void) -{ - if (current_cpu) { - X86CPU *cpu = X86_CPU(current_cpu); - return cpu->apic_state; - } else { - return NULL; - } -} - -void gsi_handler(void *opaque, int n, int level) -{ - GSIState *s = opaque; - - trace_x86_gsi_interrupt(n, level); - switch (n) { - case 0 ... ISA_NUM_IRQS - 1: - if (s->i8259_irq[n]) { - /* Under KVM, Kernel will forward to both PIC and IOAPIC */ - qemu_set_irq(s->i8259_irq[n], level); - } - /* fall through */ - case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: -#ifdef CONFIG_XEN_EMU - /* - * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC - * routing actually works properly under Xen). And then to - * *either* the PIRQ handling or the I/OAPIC depending on - * whether the former wants it. - */ - if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { - break; - } -#endif - qemu_set_irq(s->ioapic_irq[n], level); - break; - case IO_APIC_SECONDARY_IRQBASE - ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: - qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); - break; - } -} - -void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) -{ - DeviceState *dev; - SysBusDevice *d; - unsigned int i; - - assert(parent_name); - if (kvm_ioapic_in_kernel()) { - dev = qdev_new(TYPE_KVM_IOAPIC); - } else { - dev = qdev_new(TYPE_IOAPIC); - } - object_property_add_child(object_resolve_path(parent_name, NULL), - "ioapic", OBJECT(dev)); - d = SYS_BUS_DEVICE(dev); - sysbus_realize_and_unref(d, &error_fatal); - sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); - - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); - } -} - -DeviceState *ioapic_init_secondary(GSIState *gsi_state) -{ - DeviceState *dev; - SysBusDevice *d; - unsigned int i; - - dev = qdev_new(TYPE_IOAPIC); - d = SYS_BUS_DEVICE(dev); - sysbus_realize_and_unref(d, &error_fatal); - sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); - - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); - } - return dev; -} - -struct setup_data { - uint64_t next; - uint32_t type; - uint32_t len; - uint8_t data[]; -} __attribute__((packed)); - - -/* - * The entry point into the kernel for PVH boot is different from - * the native entry point. The PVH entry is defined by the x86/HVM - * direct boot ABI and is available in an ELFNOTE in the kernel binary. - * - * This function is passed to load_elf() when it is called from - * load_elfboot() which then additionally checks for an ELF Note of - * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to - * parse the PVH entry address from the ELF Note. - * - * Due to trickery in elf_opts.h, load_elf() is actually available as - * load_elf32() or load_elf64() and this routine needs to be able - * to deal with being called as 32 or 64 bit. - * - * The address of the PVH entry point is saved to the 'pvh_start_addr' - * global variable. (although the entry point is 32-bit, the kernel - * binary can be either 32-bit or 64-bit). - */ -static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) -{ - size_t *elf_note_data_addr; - - /* Check if ELF Note header passed in is valid */ - if (arg1 == NULL) { - return 0; - } - - if (is64) { - struct elf64_note *nhdr64 = (struct elf64_note *)arg1; - uint64_t nhdr_size64 = sizeof(struct elf64_note); - uint64_t phdr_align = *(uint64_t *)arg2; - uint64_t nhdr_namesz = nhdr64->n_namesz; - - elf_note_data_addr = - ((void *)nhdr64) + nhdr_size64 + - QEMU_ALIGN_UP(nhdr_namesz, phdr_align); - - pvh_start_addr = *elf_note_data_addr; - } else { - struct elf32_note *nhdr32 = (struct elf32_note *)arg1; - uint32_t nhdr_size32 = sizeof(struct elf32_note); - uint32_t phdr_align = *(uint32_t *)arg2; - uint32_t nhdr_namesz = nhdr32->n_namesz; - - elf_note_data_addr = - ((void *)nhdr32) + nhdr_size32 + - QEMU_ALIGN_UP(nhdr_namesz, phdr_align); - - pvh_start_addr = *(uint32_t *)elf_note_data_addr; - } - - return pvh_start_addr; -} - -static bool load_elfboot(const char *kernel_filename, - int kernel_file_size, - uint8_t *header, - size_t pvh_xen_start_addr, - FWCfgState *fw_cfg) -{ - uint32_t flags = 0; - uint32_t mh_load_addr = 0; - uint32_t elf_kernel_size = 0; - uint64_t elf_entry; - uint64_t elf_low, elf_high; - int kernel_size; - - if (ldl_p(header) != 0x464c457f) { - return false; /* no elfboot */ - } - - bool elf_is64 = header[EI_CLASS] == ELFCLASS64; - flags = elf_is64 ? - ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; - - if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ - error_report("elfboot unsupported flags = %x", flags); - exit(1); - } - - uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; - kernel_size = load_elf(kernel_filename, read_pvh_start_addr, - NULL, &elf_note_type, &elf_entry, - &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, - 0, 0); - - if (kernel_size < 0) { - error_report("Error while loading elf kernel"); - exit(1); - } - mh_load_addr = elf_low; - elf_kernel_size = elf_high - elf_low; - - if (pvh_start_addr == 0) { - error_report("Error loading uncompressed kernel without PVH ELF Note"); - exit(1); - } - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); - - return true; -} - -void x86_load_linux(X86MachineState *x86ms, - FWCfgState *fw_cfg, - int acpi_data_size, - bool pvh_enabled) -{ - bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; - uint16_t protocol; - int setup_size, kernel_size, cmdline_size; - int dtb_size, setup_data_offset; - uint32_t initrd_max; - uint8_t header[8192], *setup, *kernel; - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; - FILE *f; - char *vmode; - MachineState *machine = MACHINE(x86ms); - struct setup_data *setup_data; - const char *kernel_filename = machine->kernel_filename; - const char *initrd_filename = machine->initrd_filename; - const char *dtb_filename = machine->dtb; - const char *kernel_cmdline = machine->kernel_cmdline; - SevKernelLoaderContext sev_load_ctx = {}; - - /* Align to 16 bytes as a paranoia measure */ - cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; - - /* load the kernel header */ - f = fopen(kernel_filename, "rb"); - if (!f) { - fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", - kernel_filename, strerror(errno)); - exit(1); - } - - kernel_size = get_file_size(f); - if (!kernel_size || - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != - MIN(ARRAY_SIZE(header), kernel_size)) { - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", - kernel_filename, strerror(errno)); - exit(1); - } - - /* kernel protocol version */ - if (ldl_p(header + 0x202) == 0x53726448) { - protocol = lduw_p(header + 0x206); - } else { - /* - * This could be a multiboot kernel. If it is, let's stop treating it - * like a Linux kernel. - * Note: some multiboot images could be in the ELF format (the same of - * PVH), so we try multiboot first since we check the multiboot magic - * header before to load it. - */ - if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, - kernel_cmdline, kernel_size, header)) { - return; - } - /* - * Check if the file is an uncompressed kernel file (ELF) and load it, - * saving the PVH entry point used by the x86/HVM direct boot ABI. - * If load_elfboot() is successful, populate the fw_cfg info. - */ - if (pvh_enabled && - load_elfboot(kernel_filename, kernel_size, - header, pvh_start_addr, fw_cfg)) { - fclose(f); - - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, - strlen(kernel_cmdline) + 1); - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); - - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, - header, sizeof(header)); - - /* load initrd */ - if (initrd_filename) { - GMappedFile *mapped_file; - gsize initrd_size; - gchar *initrd_data; - GError *gerr = NULL; - - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); - if (!mapped_file) { - fprintf(stderr, "qemu: error reading initrd %s: %s\n", - initrd_filename, gerr->message); - exit(1); - } - x86ms->initrd_mapped_file = mapped_file; - - initrd_data = g_mapped_file_get_contents(mapped_file); - initrd_size = g_mapped_file_get_length(mapped_file); - initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; - if (initrd_size >= initrd_max) { - fprintf(stderr, "qemu: initrd is too large, cannot support." - "(max: %"PRIu32", need %"PRId64")\n", - initrd_max, (uint64_t)initrd_size); - exit(1); - } - - initrd_addr = (initrd_max - initrd_size) & ~4095; - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, - initrd_size); - } - - option_rom[nb_option_roms].bootindex = 0; - option_rom[nb_option_roms].name = "pvh.bin"; - nb_option_roms++; - - return; - } - protocol = 0; - } - - if (protocol < 0x200 || !(header[0x211] & 0x01)) { - /* Low kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x10000; - } else if (protocol < 0x202) { - /* High but ancient kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x100000; - } else { - /* High and recent kernel */ - real_addr = 0x10000; - cmdline_addr = 0x20000; - prot_addr = 0x100000; - } - - /* highest address for loading the initrd */ - if (protocol >= 0x20c && - lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { - /* - * Linux has supported initrd up to 4 GB for a very long time (2007, - * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), - * though it only sets initrd_max to 2 GB to "work around bootloader - * bugs". Luckily, QEMU firmware(which does something like bootloader) - * has supported this. - * - * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can - * be loaded into any address. - * - * In addition, initrd_max is uint32_t simply because QEMU doesn't - * support the 64-bit boot protocol (specifically the ext_ramdisk_image - * field). - * - * Therefore here just limit initrd_max to UINT32_MAX simply as well. - */ - initrd_max = UINT32_MAX; - } else if (protocol >= 0x203) { - initrd_max = ldl_p(header + 0x22c); - } else { - initrd_max = 0x37ffffff; - } - - if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { - initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; - } - - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); - sev_load_ctx.cmdline_data = (char *)kernel_cmdline; - sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; - - if (protocol >= 0x202) { - stl_p(header + 0x228, cmdline_addr); - } else { - stw_p(header + 0x20, 0xA33F); - stw_p(header + 0x22, cmdline_addr - real_addr); - } - - /* handle vga= parameter */ - vmode = strstr(kernel_cmdline, "vga="); - if (vmode) { - unsigned int video_mode; - const char *end; - int ret; - /* skip "vga=" */ - vmode += 4; - if (!strncmp(vmode, "normal", 6)) { - video_mode = 0xffff; - } else if (!strncmp(vmode, "ext", 3)) { - video_mode = 0xfffe; - } else if (!strncmp(vmode, "ask", 3)) { - video_mode = 0xfffd; } else { - ret = qemu_strtoui(vmode, &end, 0, &video_mode); - if (ret != 0 || (*end && *end != ' ')) { - fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); - exit(1); - } - } - stw_p(header + 0x1fa, video_mode); - } - - /* loader type */ - /* - * High nybble = B reserved for QEMU; low nybble is revision number. - * If this code is substantially changed, you may want to consider - * incrementing the revision. - */ - if (protocol >= 0x200) { - header[0x210] = 0xB0; - } - /* heap */ - if (protocol >= 0x201) { - header[0x211] |= 0x80; /* CAN_USE_HEAP */ - stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); - } - - /* load initrd */ - if (initrd_filename) { - GMappedFile *mapped_file; - gsize initrd_size; - gchar *initrd_data; - GError *gerr = NULL; - - if (protocol < 0x200) { - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); - exit(1); - } - - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); - if (!mapped_file) { - fprintf(stderr, "qemu: error reading initrd %s: %s\n", - initrd_filename, gerr->message); - exit(1); - } - x86ms->initrd_mapped_file = mapped_file; - - initrd_data = g_mapped_file_get_contents(mapped_file); - initrd_size = g_mapped_file_get_length(mapped_file); - if (initrd_size >= initrd_max) { - fprintf(stderr, "qemu: initrd is too large, cannot support." - "(max: %"PRIu32", need %"PRId64")\n", - initrd_max, (uint64_t)initrd_size); - exit(1); - } - - initrd_addr = (initrd_max - initrd_size) & ~4095; - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); - sev_load_ctx.initrd_data = initrd_data; - sev_load_ctx.initrd_size = initrd_size; - - stl_p(header + 0x218, initrd_addr); - stl_p(header + 0x21c, initrd_size); - } - - /* load kernel and setup */ - setup_size = header[0x1f1]; - if (setup_size == 0) { - setup_size = 4; - } - setup_size = (setup_size + 1) * 512; - if (setup_size > kernel_size) { - fprintf(stderr, "qemu: invalid kernel header\n"); - exit(1); - } - kernel_size -= setup_size; - - setup = g_malloc(setup_size); - kernel = g_malloc(kernel_size); - fseek(f, 0, SEEK_SET); - if (fread(setup, 1, setup_size, f) != setup_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - if (fread(kernel, 1, kernel_size, f) != kernel_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - fclose(f); - - /* append dtb to kernel */ - if (dtb_filename) { - if (protocol < 0x209) { - fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); - exit(1); - } - - dtb_size = get_image_size(dtb_filename); - if (dtb_size <= 0) { - fprintf(stderr, "qemu: error reading dtb %s: %s\n", - dtb_filename, strerror(errno)); - exit(1); - } - - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); - kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; - kernel = g_realloc(kernel, kernel_size); - - stq_p(header + 0x250, prot_addr + setup_data_offset); - - setup_data = (struct setup_data *)(kernel + setup_data_offset); - setup_data->next = 0; - setup_data->type = cpu_to_le32(SETUP_DTB); - setup_data->len = cpu_to_le32(dtb_size); - - load_image_size(dtb_filename, setup_data->data, dtb_size); - } - - /* - * If we're starting an encrypted VM, it will be OVMF based, which uses the - * efi stub for booting and doesn't require any values to be placed in the - * kernel header. We therefore don't update the header so the hash of the - * kernel on the other side of the fw_cfg interface matches the hash of the - * file the user passed in. - */ - if (!sev_enabled()) { - memcpy(setup, header, MIN(sizeof(header), setup_size)); - } - - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); - sev_load_ctx.kernel_data = (char *)kernel; - sev_load_ctx.kernel_size = kernel_size; - - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); - sev_load_ctx.setup_data = (char *)setup; - sev_load_ctx.setup_size = setup_size; - - if (sev_enabled()) { - sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); - } - - option_rom[nb_option_roms].bootindex = 0; - option_rom[nb_option_roms].name = "linuxboot.bin"; - if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { - option_rom[nb_option_roms].name = "linuxboot_dma.bin"; - } - nb_option_roms++; -} - -void x86_bios_rom_init(MachineState *ms, const char *default_firmware, - MemoryRegion *rom_memory, bool isapc_ram_fw) -{ - const char *bios_name; - char *filename; - MemoryRegion *bios, *isa_bios; - int bios_size, isa_bios_size; - ssize_t ret; - - /* BIOS load */ - bios_name = ms->firmware ?: default_firmware; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); - if (filename) { - bios_size = get_image_size(filename); - } else { - bios_size = -1; - } - if (bios_size <= 0 || - (bios_size % 65536) != 0) { - goto bios_error; - } - bios = g_malloc(sizeof(*bios)); - memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); - if (sev_enabled()) { - /* - * The concept of a "reset" simply doesn't exist for - * confidential computing guests, we have to destroy and - * re-launch them instead. So there is no need to register - * the firmware as rom to properly re-initialize on reset. - * Just go for a straight file load instead. - */ - void *ptr = memory_region_get_ram_ptr(bios); - load_image_size(filename, ptr, bios_size); - x86_firmware_configure(ptr, bios_size); - } else { - if (!isapc_ram_fw) { - memory_region_set_readonly(bios, true); - } - ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); - if (ret != 0) { - goto bios_error; + cpu_interrupt(cs, CPU_INTERRUPT_NMI); } } - g_free(filename); - - /* map the last 128KB of the BIOS in ISA space */ - isa_bios_size = MIN(bios_size, 128 * KiB); - isa_bios = g_malloc(sizeof(*isa_bios)); - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, - bios_size - isa_bios_size, isa_bios_size); - memory_region_add_subregion_overlap(rom_memory, - 0x100000 - isa_bios_size, - isa_bios, - 1); - if (!isapc_ram_fw) { - memory_region_set_readonly(isa_bios, true); - } - - /* map all the bios at the top of memory */ - memory_region_add_subregion(rom_memory, - (uint32_t)(-bios_size), - bios); - return; - -bios_error: - fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); - exit(1); } bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) @@ -1386,6 +336,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, qapi_free_SgxEPCList(list); } +static int x86_kvm_type(MachineState *ms, const char *vm_type) +{ + /* + * No x86 machine has a kvm-type property. If one is added that has + * it, it should call kvm_get_vm_type() directly or not use it at all. + */ + assert(vm_type == NULL); + return kvm_enabled() ? kvm_get_vm_type(ms) : 0; +} + static void x86_machine_initfn(Object *obj) { X86MachineState *x86ms = X86_MACHINE(obj); @@ -1410,6 +370,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = x86_cpu_index_to_props; mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + mc->kvm_type = x86_kvm_type; x86mc->save_tsc_khz = true; x86mc->fwcfg_dma_enabled = true; nc->nmi_monitor_handler = x86_nmi; diff --git a/hw/intc/apic.c b/hw/intc/apic.c index ac3d47d2318f3db374e214d2a3085f3e7d89650c..178fb26b4773b7df2e6c0519488ed7b119cad0c5 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -32,14 +32,13 @@ #include "qapi/error.h" #include "qom/object.h" -#define MAX_APICS 255 -#define MAX_APIC_WORDS 8 - #define SYNC_FROM_VAPIC 0x1 #define SYNC_TO_VAPIC 0x2 #define SYNC_ISR_IRR_TO_VAPIC 0x4 -static APICCommonState *local_apics[MAX_APICS + 1]; +static APICCommonState **local_apics; +static uint32_t max_apics; +static uint32_t max_apic_words; #define TYPE_APIC "apic" /*This is reusing the APICCommonState typedef from APIC_COMMON */ @@ -49,7 +48,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC, static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode); static void apic_update_irq(APICCommonState *s); static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, - uint8_t dest, uint8_t dest_mode); + uint32_t dest, uint8_t dest_mode); + +void apic_set_max_apic_id(uint32_t max_apic_id) +{ + int word_size = 32; + + /* round up the max apic id to next multiple of words */ + max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1); + + local_apics = g_malloc0(sizeof(*local_apics) * max_apics); + max_apic_words = max_apics >> 5; +} + /* Find first bit starting from msb */ static int apic_fls_bit(uint32_t value) @@ -199,10 +210,10 @@ static void apic_external_nmi(APICCommonState *s) #define foreach_apic(apic, deliver_bitmask, code) \ {\ int __i, __j;\ - for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\ + for (__i = 0; __i < max_apic_words; __i++) {\ uint32_t __mask = deliver_bitmask[__i];\ if (__mask) {\ - for(__j = 0; __j < 32; __j++) {\ + for (__j = 0; __j < 32; __j++) {\ if (__mask & (1U << __j)) {\ apic = local_apics[__i * 32 + __j];\ if (apic) {\ @@ -226,7 +237,7 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, { int i, d; d = -1; - for(i = 0; i < MAX_APIC_WORDS; i++) { + for (i = 0; i < max_apic_words; i++) { if (deliver_bitmask[i]) { d = i * 32 + apic_ffs_bit(deliver_bitmask[i]); break; @@ -276,16 +287,25 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, apic_set_irq(apic_iter, vector_num, trigger_mode) ); } -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, - uint8_t vector_num, uint8_t trigger_mode) +static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode, + uint8_t delivery_mode, uint8_t vector_num, + uint8_t trigger_mode) { - uint32_t deliver_bitmask[MAX_APIC_WORDS]; + uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t)); trace_apic_deliver_irq(dest, dest_mode, delivery_mode, vector_num, trigger_mode); apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode); apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); + g_free(deliver_bitmask); +} + +bool is_x2apic_mode(DeviceState *dev) +{ + APICCommonState *s = APIC(dev); + + return s->apicbase & MSR_IA32_APICBASE_EXTD; } static void apic_set_base(APICCommonState *s, uint64_t val) @@ -435,57 +455,123 @@ static void apic_eoi(APICCommonState *s) apic_update_irq(s); } -static int apic_find_dest(uint8_t dest) +static bool apic_match_dest(APICCommonState *apic, uint32_t dest) { - APICCommonState *apic = local_apics[dest]; - int i; + if (is_x2apic_mode(&apic->parent_obj)) { + return apic->initial_apic_id == dest; + } else { + return apic->id == (uint8_t)dest; + } +} - if (apic && apic->id == dest) - return dest; /* shortcut in case apic->id == local_apics[dest]->id */ +static void apic_find_dest(uint32_t *deliver_bitmask, uint32_t dest) +{ + APICCommonState *apic = NULL; + int i; - for (i = 0; i < MAX_APICS; i++) { + for (i = 0; i < max_apics; i++) { apic = local_apics[i]; - if (apic && apic->id == dest) - return i; - if (!apic) - break; + if (apic && apic_match_dest(apic, dest)) { + apic_set_bit(deliver_bitmask, i); + } } +} - return -1; +/* + * Deliver interrupt to x2APIC CPUs if it is x2APIC broadcast. + * Otherwise, deliver interrupt to xAPIC CPUs if it is xAPIC + * broadcast. + */ +static void apic_get_broadcast_bitmask(uint32_t *deliver_bitmask, + bool is_x2apic_broadcast) +{ + int i; + APICCommonState *apic_iter; + + for (i = 0; i < max_apics; i++) { + apic_iter = local_apics[i]; + if (apic_iter) { + bool apic_in_x2apic = is_x2apic_mode(&apic_iter->parent_obj); + + if (is_x2apic_broadcast && apic_in_x2apic) { + apic_set_bit(deliver_bitmask, i); + } else if (!is_x2apic_broadcast && !apic_in_x2apic) { + apic_set_bit(deliver_bitmask, i); + } + } + } } static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, - uint8_t dest, uint8_t dest_mode) + uint32_t dest, uint8_t dest_mode) { - APICCommonState *apic_iter; + APICCommonState *apic; int i; - if (dest_mode == 0) { - if (dest == 0xff) { - memset(deliver_bitmask, 0xff, MAX_APIC_WORDS * sizeof(uint32_t)); + memset(deliver_bitmask, 0x00, max_apic_words * sizeof(uint32_t)); + + /* + * x2APIC broadcast is delivered to all x2APIC CPUs regardless of + * destination mode. In case the destination mode is physical, it is + * broadcasted to all xAPIC CPUs too. Otherwise, if the destination + * mode is logical, we need to continue checking if xAPIC CPUs accepts + * the interrupt. + */ + if (dest == 0xffffffff) { + if (dest_mode == APIC_DESTMODE_PHYSICAL) { + memset(deliver_bitmask, 0xff, max_apic_words * sizeof(uint32_t)); + return; } else { - int idx = apic_find_dest(dest); - memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t)); - if (idx >= 0) - apic_set_bit(deliver_bitmask, idx); + apic_get_broadcast_bitmask(deliver_bitmask, true); + } + } + + if (dest_mode == APIC_DESTMODE_PHYSICAL) { + apic_find_dest(deliver_bitmask, dest); + /* Any APIC in xAPIC mode will interpret 0xFF as broadcast */ + if (dest == 0xff) { + apic_get_broadcast_bitmask(deliver_bitmask, false); } } else { - /* XXX: cluster mode */ - memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t)); - for(i = 0; i < MAX_APICS; i++) { - apic_iter = local_apics[i]; - if (apic_iter) { - if (apic_iter->dest_mode == 0xf) { - if (dest & apic_iter->log_dest) - apic_set_bit(deliver_bitmask, i); - } else if (apic_iter->dest_mode == 0x0) { - if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) && - (dest & apic_iter->log_dest & 0x0f)) { + /* XXX: logical mode */ + for (i = 0; i < max_apics; i++) { + apic = local_apics[i]; + if (apic) { + /* x2APIC logical mode */ + if (apic->apicbase & MSR_IA32_APICBASE_EXTD) { + if ((dest >> 16) == (apic->extended_log_dest >> 16) && + (dest & apic->extended_log_dest & 0xffff)) { apic_set_bit(deliver_bitmask, i); } + continue; } - } else { - break; + + /* xAPIC logical mode */ + dest = (uint8_t)dest; + if (apic->dest_mode == APIC_DESTMODE_LOGICAL_FLAT) { + if (dest & apic->log_dest) { + apic_set_bit(deliver_bitmask, i); + } + } else if (apic->dest_mode == APIC_DESTMODE_LOGICAL_CLUSTER) { + /* + * In cluster model of xAPIC logical mode IPI, 4 higher + * bits are used as cluster address, 4 lower bits are + * the bitmask for local APICs in the cluster. The IPI + * is delivered to an APIC if the cluster address + * matches and the APIC's address bit in the cluster is + * set in bitmask of destination ID in IPI. + * + * The cluster address ranges from 0 - 14, the cluster + * address 15 (0xf) is the broadcast address to all + * clusters. + */ + if ((dest & 0xf0) == 0xf0 || + (dest & 0xf0) == (apic->log_dest & 0xf0)) { + if (dest & apic->log_dest & 0x0f) { + apic_set_bit(deliver_bitmask, i); + } + } + } } } } @@ -509,29 +595,36 @@ void apic_sipi(DeviceState *dev) s->wait_for_sipi = 0; } -static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode, +static void apic_deliver(DeviceState *dev, uint32_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, - uint8_t trigger_mode) + uint8_t trigger_mode, uint8_t dest_shorthand) { APICCommonState *s = APIC(dev); - uint32_t deliver_bitmask[MAX_APIC_WORDS]; - int dest_shorthand = (s->icr[0] >> 18) & 3; APICCommonState *apic_iter; + uint32_t deliver_bitmask_size = max_apic_words * sizeof(uint32_t); + uint32_t *deliver_bitmask = g_malloc(deliver_bitmask_size); + uint32_t current_apic_id; + + if (is_x2apic_mode(dev)) { + current_apic_id = s->initial_apic_id; + } else { + current_apic_id = s->id; + } switch (dest_shorthand) { case 0: apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode); break; case 1: - memset(deliver_bitmask, 0x00, sizeof(deliver_bitmask)); - apic_set_bit(deliver_bitmask, s->id); + memset(deliver_bitmask, 0x00, deliver_bitmask_size); + apic_set_bit(deliver_bitmask, current_apic_id); break; case 2: - memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); + memset(deliver_bitmask, 0xff, deliver_bitmask_size); break; case 3: - memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); - apic_reset_bit(deliver_bitmask, s->id); + memset(deliver_bitmask, 0xff, deliver_bitmask_size); + apic_reset_bit(deliver_bitmask, current_apic_id); break; } @@ -555,6 +648,7 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode, } apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); + g_free(deliver_bitmask); } static bool apic_check_pic(APICCommonState *s) @@ -636,27 +730,26 @@ static void apic_timer(void *opaque) apic_timer_update(s, s->next_time); } -static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) +static int apic_register_read(int index, uint64_t *value) { DeviceState *dev; APICCommonState *s; uint32_t val; - int index; - - if (size < 4) { - return 0; - } + int ret = 0; dev = cpu_get_current_apic(); if (!dev) { - return 0; + return -1; } s = APIC(dev); - index = (addr >> 4) & 0xff; switch(index) { case 0x02: /* id */ - val = s->id << 24; + if (is_x2apic_mode(dev)) { + val = s->initial_apic_id; + } else { + val = s->id << 24; + } break; case 0x03: /* version */ val = s->version | ((APIC_LVT_NB - 1) << 16); @@ -679,10 +772,19 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) val = 0; break; case 0x0d: - val = s->log_dest << 24; + if (is_x2apic_mode(dev)) { + val = s->extended_log_dest; + } else { + val = s->log_dest << 24; + } break; case 0x0e: - val = (s->dest_mode << 28) | 0xfffffff; + if (is_x2apic_mode(dev)) { + val = 0; + ret = -1; + } else { + val = (s->dest_mode << 28) | 0xfffffff; + } break; case 0x0f: val = s->spurious_vec; @@ -718,17 +820,56 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; val = 0; + ret = -1; break; } - trace_apic_mem_readl(addr, val); + + trace_apic_register_read(index, val); + *value = val; + return ret; +} + +static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val; + int index; + + if (size < 4) { + return 0; + } + + index = (addr >> 4) & 0xff; + apic_register_read(index, &val); + return val; } +int apic_msr_read(int index, uint64_t *val) +{ + DeviceState *dev; + + dev = cpu_get_current_apic(); + if (!dev) { + return -1; + } + + if (!is_x2apic_mode(dev)) { + return -1; + } + + return apic_register_read(index, val); +} + static void apic_send_msi(MSIMessage *msi) { uint64_t addr = msi->address; uint32_t data = msi->data; - uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + uint32_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + /* + * The higher 3 bytes of destination id is stored in higher word of + * msi address. See x86_iommu_irq_to_msi_message() + */ + dest = dest | (addr >> 32); uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; @@ -737,38 +878,25 @@ static void apic_send_msi(MSIMessage *msi) apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode); } -static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, - unsigned size) +static int apic_register_write(int index, uint64_t val) { DeviceState *dev; APICCommonState *s; - int index = (addr >> 4) & 0xff; - - if (size < 4) { - return; - } - - if (addr > 0xfff || !index) { - /* MSI and MMIO APIC are at the same memory location, - * but actually not on the global bus: MSI is on PCI bus - * APIC is connected directly to the CPU. - * Mapping them on the global bus happens to work because - * MSI registers are reserved in APIC MMIO and vice versa. */ - MSIMessage msi = { .address = addr, .data = val }; - apic_send_msi(&msi); - return; - } dev = cpu_get_current_apic(); if (!dev) { - return; + return -1; } s = APIC(dev); - trace_apic_mem_writel(addr, val); + trace_apic_register_write(index, val); switch(index) { case 0x02: + if (is_x2apic_mode(dev)) { + return -1; + } + s->id = (val >> 24); break; case 0x03: @@ -788,9 +916,17 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, apic_eoi(s); break; case 0x0d: + if (is_x2apic_mode(dev)) { + return -1; + } + s->log_dest = val >> 24; break; case 0x0e: + if (is_x2apic_mode(dev)) { + return -1; + } + s->dest_mode = val >> 28; break; case 0x0f: @@ -802,13 +938,27 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, case 0x20 ... 0x27: case 0x28: break; - case 0x30: + case 0x30: { + uint32_t dest; + s->icr[0] = val; - apic_deliver(dev, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1, + if (is_x2apic_mode(dev)) { + s->icr[1] = val >> 32; + dest = s->icr[1]; + } else { + dest = (s->icr[1] >> 24) & 0xff; + } + + apic_deliver(dev, dest, (s->icr[0] >> 11) & 1, (s->icr[0] >> 8) & 7, (s->icr[0] & 0xff), - (s->icr[0] >> 15) & 1); + (s->icr[0] >> 15) & 1, (s->icr[0] >> 18) & 3); break; + } case 0x31: + if (is_x2apic_mode(dev)) { + return -1; + } + s->icr[1] = val; break; case 0x32 ... 0x37: @@ -837,10 +987,70 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, s->count_shift = (v + 1) & 7; } break; + case 0x3f: { + int vector = val & 0xff; + + if (!is_x2apic_mode(dev)) { + return -1; + } + + /* + * Self IPI is identical to IPI with + * - Destination shorthand: 1 (Self) + * - Trigger mode: 0 (Edge) + * - Delivery mode: 0 (Fixed) + */ + apic_deliver(dev, 0, 0, APIC_DM_FIXED, vector, 0, 1); + + break; + } default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; - break; + return -1; } + + return 0; +} + +static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + int index = (addr >> 4) & 0xff; + + if (size < 4) { + return; + } + + if (addr > 0xfff || !index) { + /* + * MSI and MMIO APIC are at the same memory location, + * but actually not on the global bus: MSI is on PCI bus + * APIC is connected directly to the CPU. + * Mapping them on the global bus happens to work because + * MSI registers are reserved in APIC MMIO and vice versa. + */ + MSIMessage msi = { .address = addr, .data = val }; + apic_send_msi(&msi); + return; + } + + apic_register_write(index, val); +} + +int apic_msr_write(int index, uint64_t val) +{ + DeviceState *dev; + + dev = cpu_get_current_apic(); + if (!dev) { + return -1; + } + + if (!is_x2apic_mode(dev)) { + return -1; + } + + return apic_register_write(index, val); } static void apic_pre_save(APICCommonState *s) @@ -871,12 +1081,6 @@ static void apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC(dev); - if (s->id >= MAX_APICS) { - error_setg(errp, "%s initialization failed. APIC ID %d is invalid", - object_get_typename(OBJECT(dev)), s->id); - return; - } - if (kvm_enabled()) { warn_report("Userspace local APIC is deprecated for KVM."); warn_report("Do not use kernel-irqchip except for the -M isapc machine type."); @@ -893,7 +1097,16 @@ static void apic_realize(DeviceState *dev, Error **errp) s->io_memory.disable_reentrancy_guard = true; s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); - local_apics[s->id] = s; + + /* + * The --machine none does not call apic_set_max_apic_id before creating + * apic, so we need to call it here and set it to 1 which is the max cpus + * in machine none. + */ + if (!local_apics) { + apic_set_max_apic_id(1); + } + local_apics[s->initial_apic_id] = s; msi_nonbroken = true; } @@ -903,7 +1116,7 @@ static void apic_unrealize(DeviceState *dev) APICCommonState *s = APIC(dev); timer_free(s->timer); - local_apics[s->id] = NULL; + local_apics[s->initial_apic_id] = NULL; } static void apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index bccb4241c2d49ecf7f771a8ce20f007a24c72b28..62e757dba2594e9d9b5cc515ba48d4397da47d29 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -63,6 +63,19 @@ uint64_t cpu_get_apic_base(DeviceState *dev) } } +bool cpu_is_apic_enabled(DeviceState *dev) +{ + APICCommonState *s; + + if (!dev) { + return false; + } + + s = APIC_COMMON(dev); + + return s->apicbase & MSR_IA32_APICBASE_ENABLE; +} + void cpu_set_apic_tpr(DeviceState *dev, uint8_t val) { APICCommonState *s; @@ -287,6 +300,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) } vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, s, -1, 0, NULL); + + /* APIC LDR in x2APIC mode */ + s->extended_log_dest = ((s->initial_apic_id >> 4) << 16) | + (1 << (s->initial_apic_id & 0xf)); } static void apic_common_unrealize(DeviceState *dev) @@ -427,6 +444,11 @@ static void apic_common_set_id(Object *obj, Visitor *v, const char *name, return; } + if (value >= 255 && !cpu_has_x2apic_feature(&s->cpu->env)) { + error_setg(errp, "APIC ID %d requires x2APIC feature in CPU", value); + return; + } + s->initial_apic_id = value; s->id = (uint8_t)value; } diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index a0135100744115fe3326dc965c662708f43799ad..edd727ad37920b7c4e0de528eac568e07e84b401 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -938,7 +938,7 @@ void gicv3_cpuif_update(GICv3CPUState *cs) return; } - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq, cs->hppi.grp, cs->hppi.prio); diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c index 74e02858d43e3596d669e0d1a26b2bc8e4c90f3c..93b8531ad0305f0b35474a7fa37cab6d06bdfd2b 100644 --- a/hw/intc/s390_flic.c +++ b/hw/intc/s390_flic.c @@ -106,7 +106,7 @@ static int qemu_s390_clear_io_flic(S390FLICState *fs, uint16_t subchannel_id, QEMUS390FlicIO *cur, *next; uint8_t isc; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (!(flic->pending & FLIC_PENDING_IO)) { return 0; } @@ -223,7 +223,7 @@ uint32_t qemu_s390_flic_dequeue_service(QEMUS390FLICState *flic) { uint32_t tmp; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); g_assert(flic->pending & FLIC_PENDING_SERVICE); tmp = flic->service_param; flic->service_param = 0; @@ -238,7 +238,7 @@ QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic, uint64_t cr6) QEMUS390FlicIO *io; uint8_t isc; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (!(flic->pending & CR6_TO_PENDING_IO(cr6))) { return NULL; } @@ -262,7 +262,7 @@ QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic, uint64_t cr6) void qemu_s390_flic_dequeue_crw_mchk(QEMUS390FLICState *flic) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); g_assert(flic->pending & FLIC_PENDING_MCHK_CR); flic->pending &= ~FLIC_PENDING_MCHK_CR; } @@ -271,7 +271,7 @@ static void qemu_s390_inject_service(S390FLICState *fs, uint32_t parm) { QEMUS390FLICState *flic = s390_get_qemu_flic(fs); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* multiplexing is good enough for sclp - kvm does it internally as well */ flic->service_param |= parm; flic->pending |= FLIC_PENDING_SERVICE; @@ -287,7 +287,7 @@ static void qemu_s390_inject_io(S390FLICState *fs, uint16_t subchannel_id, QEMUS390FLICState *flic = s390_get_qemu_flic(fs); QEMUS390FlicIO *io; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); io = g_new0(QEMUS390FlicIO, 1); io->id = subchannel_id; io->nr = subchannel_nr; @@ -304,7 +304,7 @@ static void qemu_s390_inject_crw_mchk(S390FLICState *fs) { QEMUS390FLICState *flic = s390_get_qemu_flic(fs); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); flic->pending |= FLIC_PENDING_MCHK_CR; qemu_s390_flic_notify(FLIC_PENDING_MCHK_CR); @@ -330,7 +330,7 @@ bool qemu_s390_flic_has_crw_mchk(QEMUS390FLICState *flic) bool qemu_s390_flic_has_any(QEMUS390FLICState *flic) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); return !!flic->pending; } @@ -340,7 +340,7 @@ static void qemu_s390_flic_reset(DeviceState *dev) QEMUS390FlicIO *cur, *next; int isc; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); flic->simm = 0; flic->nimm = 0; flic->pending = 0; diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 36ff71f947579bc893339758301aad4eae91c75c..1ef29d0256ad54d2351945d7c90f573c4dcd498b 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64 # apic.c apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d" apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode %d vector %d trigger_mode %d" -apic_mem_readl(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x" -apic_mem_writel(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x" +apic_register_read(uint8_t reg, uint64_t val) "register 0x%02x = 0x%"PRIx64 +apic_register_write(uint8_t reg, uint64_t val) "register 0x%02x = 0x%"PRIx64 # ioapic.c ioapic_set_remote_irr(int n) "set remote irr for pin %d" diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index 2e49e262ee0e90e1d2c40a623be087153accae71..33c00083d47c976abd6ccf3f3af882689e497b21 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -346,10 +346,24 @@ type_init(virt_machine_register_types) } \ type_init(machvirt_machine_##major##_##minor##_init); +static void virt_machine_9_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE(9, 1, true) + +static void virt_machine_9_0_options(MachineClass *mc) +{ + virt_machine_9_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); +} +DEFINE_VIRT_MACHINE(9, 0, false) + static void virt_machine_8_2_options(MachineClass *mc) { + virt_machine_9_0_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_VIRT_MACHINE(8, 2, true) +DEFINE_VIRT_MACHINE(8, 2, false) static void virt_machine_8_1_options(MachineClass *mc) { diff --git a/hw/misc/edu.c b/hw/misc/edu.c index e64a246d3febf9c096c27cb299b83d6fa1a0477d..2a976ca2b151839248126c5c6eb3833832c5bf22 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -355,9 +355,9 @@ static void *edu_fact_thread(void *opaque) smp_mb__after_rmw(); if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { - qemu_mutex_lock_iothread(); + bql_lock(); edu_raise_irq(edu, FACT_IRQ); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c index a9c64d06ebcfe10d5b8dfb3a1bdd85a493f391f2..2b9bb075400710aa5c999c0bb5b1392680917f65 100644 --- a/hw/misc/imx6_src.c +++ b/hw/misc/imx6_src.c @@ -131,7 +131,7 @@ static void imx6_clear_reset_bit(CPUState *cpu, run_on_cpu_data data) struct SRCSCRResetInfo *ri = data.host_ptr; IMX6SRCState *s = ri->s; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); s->regs[SRC_SCR] = deposit32(s->regs[SRC_SCR], ri->reset_bit, 1, 0); DPRINTF("reg[%s] <= 0x%" PRIx32 "\n", diff --git a/hw/misc/imx7_src.c b/hw/misc/imx7_src.c index 983251e86f7268683388dd0eb104a5321771e92c..77ad7a7eeffdc5b12e5430429af1d0f43260e56b 100644 --- a/hw/misc/imx7_src.c +++ b/hw/misc/imx7_src.c @@ -136,7 +136,7 @@ static void imx7_clear_reset_bit(CPUState *cpu, run_on_cpu_data data) struct SRCSCRResetInfo *ri = data.host_ptr; IMX7SRCState *s = ri->s; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); s->regs[SRC_A7RCR0] = deposit32(s->regs[SRC_A7RCR0], ri->reset_bit, 1, 0); diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 1e2b3baeb1a5143b21885d35d0fbf33a83a2cdf2..453fdb98198302cccd52f11f2a590666b6e9f48c 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -133,7 +133,7 @@ static bool net_tx_packets(struct XenNetDev *netdev) void *page; void *tmpbuf = NULL; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); for (;;) { rc = netdev->tx_ring.req_cons; @@ -260,7 +260,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size RING_IDX rc, rp; void *page; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (xen_device_backend_get_state(&netdev->xendev) != XenbusStateConnected) { return -1; @@ -354,7 +354,7 @@ static bool xen_netdev_connect(XenDevice *xendev, Error **errp) XenNetDev *netdev = XEN_NET_DEVICE(xendev); unsigned int port, rx_copy; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (xen_device_frontend_scanf(xendev, "tx-ring-ref", "%u", &netdev->tx_ring_ref) != 1) { @@ -425,7 +425,7 @@ static void xen_netdev_disconnect(XenDevice *xendev, Error **errp) trace_xen_netdev_disconnect(netdev->dev); - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); netdev->tx_ring.sring = NULL; netdev->rx_ring.sring = NULL; diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 08534bc7cc09c631db485d8e2da24eb1c99454bf..8facd8b63f766ae9fb12e33de9987501cb70a7ed 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -179,6 +179,8 @@ static Property q35_host_props[] = { mch.below_4g_mem_size, 0), DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, mch.above_4g_mem_size, 0), + DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost, + mch.has_smm_ranges, true), DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; @@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj) /* mch's object_initialize resets the default value, set it again */ qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); + object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", q35_host_get_pci_hole_start, NULL, NULL, NULL); @@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d, mch_update_pciexbar(mch); } + if (!mch->has_smm_ranges) { + return; + } + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM, MCH_HOST_BRIDGE_SMRAM_SIZE)) { mch_update_smram(mch); @@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d, static void mch_update(MCHPCIState *mch) { mch_update_pciexbar(mch); + mch_update_pam(mch); - mch_update_smram(mch); - mch_update_ext_tseg_mbytes(mch); - mch_update_smbase_smram(mch); + if (mch->has_smm_ranges) { + mch_update_smram(mch); + mch_update_ext_tseg_mbytes(mch); + mch_update_smbase_smram(mch); + } /* * pci hole goes from end-of-low-ram to io-apic. @@ -538,18 +548,20 @@ static void mch_reset(DeviceState *qdev) pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR, MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); - d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; - d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; - d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; - d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + if (mch->has_smm_ranges) { + d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; + d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; + d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; + d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; - if (mch->ext_tseg_mbytes > 0) { - pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, - MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); - } + if (mch->ext_tseg_mbytes > 0) { + pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); + } - d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; - d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; + d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; + d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; + } mch_update(mch); } @@ -568,6 +580,20 @@ static void mch_realize(PCIDevice *d, Error **errp) /* setup pci memory mapping */ pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space); + /* PAM */ + init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, + PAM_BIOS_BASE, PAM_BIOS_SIZE); + for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { + init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); + } + + if (!mch->has_smm_ranges) { + return; + } + /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, @@ -634,15 +660,6 @@ static void mch_realize(PCIDevice *d, Error **errp) object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&mch->smram)); - - init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, - mch->system_memory, mch->pci_address_space, - PAM_BIOS_BASE, PAM_BIOS_SIZE); - for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { - init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, - mch->system_memory, mch->pci_address_space, - PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); - } } uint64_t mch_mcfg_base(void) diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c index d28ed3ba7333af8d4628112338ed0544f389903c..47553348b1e759b9c657d359323d898ebc669a14 100644 --- a/hw/ppc/pef.c +++ b/hw/ppc/pef.c @@ -15,7 +15,6 @@ #include "sysemu/kvm.h" #include "migration/blocker.h" #include "exec/confidential-guest-support.h" -#include "hw/ppc/pef.h" #define TYPE_PEF_GUEST "pef-guest" OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) @@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp) #endif } -int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { return 0; @@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return kvmppc_svm_init(cgs, errp); } -int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) +static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { return 0; @@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, static void pef_guest_class_init(ObjectClass *oc, void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = pef_kvm_init; + klass->kvm_reset = pef_kvm_reset; } static void pef_guest_init(Object *obj) diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 3203a4a72898c9eec0f801c5e7e1cd3fff4922c5..d84f3f977d99336fb8d952f863bd0fb45d481fb8 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -515,7 +515,7 @@ static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) CPUPPCState *env = &cpu->env; /* The TCG path should also be holding the BQL at this point */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (FIELD_EX64(env->msr, MSR, PR)) { qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n"); diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index be167710a3561fba9742676b330fe98471032000..b6581c16fc9f8a17b8f70290e44c4242a9b1008a 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -314,7 +314,7 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val) { PowerPCCPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); switch ((val >> 28) & 0x3) { case 0x0: @@ -334,7 +334,7 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val) break; } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* PowerPC 40x internal IRQ controller */ diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index df09aa9d6a007cd2adf8e3c0ccf678919f2a2e96..1bb2d363ed9ead5b1b5fde4fe00bb715dd6beef9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -74,6 +74,7 @@ #include "hw/virtio/vhost-scsi-common.h" #include "exec/ram_addr.h" +#include "exec/confidential-guest-support.h" #include "hw/usb.h" #include "qemu/config-file.h" #include "qemu/error-report.h" @@ -86,7 +87,6 @@ #include "hw/ppc/spapr_tpm_proxy.h" #include "hw/ppc/spapr_nvdimm.h" #include "hw/ppc/spapr_numa.h" -#include "hw/ppc/pef.h" #include "monitor/monitor.h" @@ -1304,7 +1304,7 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp, CPUPPCState *env = &cpu->env; /* The TCG path should also be holding the BQL at this point */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); g_assert(!vhyp_cpu_in_nested(cpu)); @@ -1687,7 +1687,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); } - pef_kvm_reset(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_reset(machine->cgs, &error_fatal); + } spapr_caps_apply(spapr); first_ppc_cpu = POWERPC_CPU(first_cpu); @@ -2810,7 +2812,9 @@ static void spapr_machine_init(MachineState *machine) /* * if Secure VM (PEF) support is configured, then initialize it */ - pef_kvm_init(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_init(machine->cgs, &error_fatal); + } msi_nonbroken = true; @@ -3985,7 +3989,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); SpaprCpuCore *core = SPAPR_CPU_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(dev); - CPUState *cs; SpaprDrc *drc; CPUArchId *core_slot; int index; @@ -4019,7 +4022,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) } } - core_slot->cpu = OBJECT(dev); + core_slot->cpu = CPU(dev); /* * Set compatibility mode to match the boot CPU, which was either set @@ -4035,7 +4038,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) if (smc->pre_2_10_has_unused_icps) { for (i = 0; i < cc->nr_threads; i++) { - cs = CPU(core->threads[i]); + CPUState *cs = CPU(core->threads[i]); pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); } } @@ -4785,15 +4788,37 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +/* + * pseries-9.1 + */ +static void spapr_machine_9_1_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(9_1, "9.1", true); + +/* + * pseries-9.0 + */ +static void spapr_machine_9_0_class_options(MachineClass *mc) +{ + spapr_machine_9_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); +} + +DEFINE_SPAPR_MACHINE(9_0, "9.0", false); + /* * pseries-8.2 */ static void spapr_machine_8_2_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_9_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_SPAPR_MACHINE(8_2, "8.2", true); +DEFINE_SPAPR_MACHINE(8_2, "8.2", false); /* * pseries-8.1 diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c index df5c4b9687350e11ef98d31229befebb623ed599..c2fda7ad2094afe9ff7c93da661104a13e918289 100644 --- a/hw/ppc/spapr_rng.c +++ b/hw/ppc/spapr_rng.c @@ -82,9 +82,9 @@ static target_ulong h_random(PowerPCCPU *cpu, SpaprMachineState *spapr, while (hrdata.received < 8) { rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received, random_recv, &hrdata); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_sem_wait(&hrdata.sem); - qemu_mutex_lock_iothread(); + bql_lock(); } qemu_sem_destroy(&hrdata.sem); diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c index 278666317ef22cb8b8ed8d6832fbef81009cb3b3..fc1bbc0b61c8e92dc51702d036788c54e801b5fb 100644 --- a/hw/ppc/spapr_softmmu.c +++ b/hw/ppc/spapr_softmmu.c @@ -334,7 +334,7 @@ static void *hpt_prepare_thread(void *opaque) pending->ret = H_NO_MEM; } - qemu_mutex_lock_iothread(); + bql_lock(); if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { /* Ready to go */ @@ -344,7 +344,7 @@ static void *hpt_prepare_thread(void *opaque) free_pending_hpt(pending); } - qemu_mutex_unlock_iothread(); + bql_unlock(); return NULL; } diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c index 9bd98e82197e4dbdb0ad37bb519dec6f31e28d92..d04ac936212f2c0211af9bba9a88e166d4506c88 100644 --- a/hw/remote/mpqemu-link.c +++ b/hw/remote/mpqemu-link.c @@ -33,7 +33,7 @@ */ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) { - bool iolock = qemu_mutex_iothread_locked(); + bool drop_bql = bql_locked(); bool iothread = qemu_in_iothread(); struct iovec send[2] = {}; int *fds = NULL; @@ -63,8 +63,8 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) * for IOThread case. * Also skip lock handling while in a co-routine in the main context. */ - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_unlock_iothread(); + if (drop_bql && !iothread && !qemu_in_coroutine()) { + bql_unlock(); } if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), @@ -74,9 +74,9 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); } - if (iolock && !iothread && !qemu_in_coroutine()) { + if (drop_bql && !iothread && !qemu_in_coroutine()) { /* See above comment why skip locking here. */ - qemu_mutex_lock_iothread(); + bql_lock(); } return ret; @@ -96,7 +96,7 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, size_t *nfds, Error **errp) { struct iovec iov = { .iov_base = buf, .iov_len = len }; - bool iolock = qemu_mutex_iothread_locked(); + bool drop_bql = bql_locked(); bool iothread = qemu_in_iothread(); int ret = -1; @@ -106,14 +106,14 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, */ assert(qemu_in_coroutine() || !iothread); - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_unlock_iothread(); + if (drop_bql && !iothread && !qemu_in_coroutine()) { + bql_unlock(); } ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, errp); - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_lock_iothread(); + if (drop_bql && !iothread && !qemu_in_coroutine()) { + bql_lock(); } return (ret <= 0) ? ret : iov.iov_len; diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index 8b708422fe5291511b94d6c62f2bfa47562396df..8dbafafb9e742b16afd68034b5036e12e52031fa 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -400,7 +400,7 @@ static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset, } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); release_lock = false; } diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index 8f5159d85dc6a491fe30a654f8dc54f4f09ab2ca..5c535d483e9f7bb0e665330c6b694cc19cc8904c 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -153,7 +153,7 @@ void qmp_dump_skeys(const char *filename, Error **errp) goto out; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); guest_phys_blocks_init(&guest_phys_blocks); guest_phys_blocks_append(&guest_phys_blocks); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 7262725d2e8a1cc1fa33fdbe2a074c87dfe69628..3021685659114b0deff2fb966f8a33aa4f989d81 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "exec/ram_addr.h" +#include "exec/confidential-guest-support.h" #include "hw/s390x/s390-virtio-hcall.h" #include "hw/s390x/sclp.h" #include "hw/s390x/s390_flic.h" @@ -259,7 +260,9 @@ static void ccw_init(MachineState *machine) s390_init_cpus(machine); /* Need CPU model to be determined before we can set up PV */ - s390_pv_init(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_init(machine->cgs, &error_fatal); + } s390_flic_init(); @@ -316,7 +319,7 @@ static void s390_cpu_plug(HotplugHandler *hotplug_dev, ERRP_GUARD(); g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu); - ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev); + ms->possible_cpus->cpus[cpu->env.core_id].cpu = CPU(dev); if (s390_has_topology()) { s390_topology_setup_cpu(ms, cpu, errp); @@ -855,14 +858,38 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_9_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_9_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(9_1, "9.1", true); + +static void ccw_machine_9_0_instance_options(MachineState *machine) +{ + ccw_machine_9_1_instance_options(machine); +} + +static void ccw_machine_9_0_class_options(MachineClass *mc) +{ + ccw_machine_9_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); +} +DEFINE_CCW_MACHINE(9_0, "9.0", false); + static void ccw_machine_8_2_instance_options(MachineState *machine) { + ccw_machine_9_0_instance_options(machine); } static void ccw_machine_8_2_class_options(MachineClass *mc) { + ccw_machine_9_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_CCW_MACHINE(8_2, "8.2", true); +DEFINE_CCW_MACHINE(8_2, "8.2", false); static void ccw_machine_8_1_instance_options(MachineState *machine) { diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index 5449b6d7428df3e9604c880b630c6f0565bb1dc2..d22ca243293ef05ea4e6ebcabb18cd0c1948baeb 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -151,7 +151,7 @@ static inline bool in_aio_context_home_thread(AioContext *ctx) } if (ctx == qemu_get_aio_context()) { - return qemu_mutex_iothread_locked(); + return bql_locked(); } else { return false; } diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h index 2cba27642f765a7ccd6ee33c1d366e2b12e0de81..cd7c7b74591c658b632093ce61f0cd7b18c1b0d6 100644 --- a/include/exec/confidential-guest-support.h +++ b/include/exec/confidential-guest-support.h @@ -23,11 +23,19 @@ #include "qom/object.h" #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" -OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) +OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + ConfidentialGuestSupportClass, + CONFIDENTIAL_GUEST_SUPPORT) + struct ConfidentialGuestSupport { Object parent; + /* + * True if the machine should use guest_memfd for RAM. + */ + bool require_guest_memfd; + /* * ready: flag set by CGS initialization code once it's ready to * start executing instructions in a potentially-secure @@ -101,8 +109,37 @@ struct ConfidentialGuestMemoryEncryptionOps { typedef struct ConfidentialGuestSupportClass { ObjectClass parent; struct ConfidentialGuestMemoryEncryptionOps *memory_encryption_ops; + + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); } ConfidentialGuestSupportClass; +static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_init) { + return klass->kvm_init(cgs, errp); + } + + return 0; +} + +static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_reset) { + return klass->kvm_reset(cgs, errp); + } + + return 0; +} + #endif /* !CONFIG_USER_ONLY */ #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index d21d9990ad0e548b59bcf846c0dc919d45050f25..f267aa67e7b4788777d3a85e534c1ec8967b5b3d 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -182,6 +182,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length); #endif diff --git a/include/exec/memory.h b/include/exec/memory.h index 51fe10d4a00f7c48168d8479c2d28c47b3cd7d1a..fc93bbba3cc494b7288809ab1b5a387c100c86b4 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -255,6 +255,9 @@ typedef struct IOMMUTLBEvent { /* RAM FD is opened read-only */ #define RAM_READONLY_FD (1 << 11) +/* RAM can be private that has kvm guest memfd backend */ +#define RAM_GUEST_MEMFD (1 << 12) + /* The address limit of the VirtCCA bounce buffer is 4GB. */ #define VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT 0x100000000ULL @@ -1455,8 +1458,10 @@ void memory_region_init_io(MemoryRegion *mr, * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1472,13 +1477,16 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr, * @name: Region name, becomes part of RAMBlock name used in migration stream * must be unique within any device * @size: size of the region. - * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, + * RAM_GUEST_MEMFD. * @errp: pointer to Error*, to store an error if it happens. * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1505,8 +1513,10 @@ void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_resizeable_ram(MemoryRegion *mr, +bool memory_region_init_resizeable_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1530,15 +1540,17 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * (getpagesize()) will be used. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @path: the path in which to allocate the RAM. * @offset: offset within the file referenced by path * @errp: pointer to Error*, to store an error if it happens. * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_from_file(MemoryRegion *mr, +bool memory_region_init_ram_from_file(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1558,15 +1570,17 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, * @size: size of the region. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @fd: the fd to mmap. * @offset: offset within the file referenced by fd * @errp: pointer to Error*, to store an error if it happens. * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_from_fd(MemoryRegion *mr, +bool memory_region_init_ram_from_fd(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1661,8 +1675,10 @@ void memory_region_init_alias(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1684,8 +1700,10 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_device_nomigrate(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, @@ -1743,13 +1761,21 @@ void memory_region_init_iommu(void *_iommu_mr, * give the RAM block a unique name for migration purposes. * We should lift this restriction and allow arbitrary Objects. * If you pass a non-NULL non-device @owner then we will assert. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram(MemoryRegion *mr, +bool memory_region_init_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, Error **errp); +bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp); + /** * memory_region_init_rom: Initialize a ROM memory region. * @@ -1770,8 +1796,10 @@ void memory_region_init_ram(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom(MemoryRegion *mr, +bool memory_region_init_rom(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1801,8 +1829,10 @@ void memory_region_init_rom(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom_device(MemoryRegion *mr, +bool memory_region_init_rom_device(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, @@ -1869,6 +1899,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) */ bool memory_region_is_protected(MemoryRegion *mr); +/** + * memory_region_has_guest_memfd: check whether a memory region has guest_memfd + * associated + * + * Returns %true if a memory region's ram_block has valid guest_memfd assigned. + * + * @mr: the memory region being queried + */ +bool memory_region_has_guest_memfd(MemoryRegion *mr); + /** * memory_region_get_iommu: check whether a memory region is an iommu * diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index ef6988b445e1734767551f59ff8a36d8cca8b1b3..435ade6bd9b8567b9cd3e1e62df782c04aa5e5ec 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -109,7 +109,7 @@ long qemu_maxrampagesize(void); * @mr: the memory region where the ram block is * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @mem_path or @fd: specify the backing file or device * @offset: Offset into target file * @errp: pointer to Error*, to store an error if it happens diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 8f9579ed705b5f6327e0dca9ffd3fda9bdefc725..f62d3019db7a2b44b835fb84fda54d1fcf207d92 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -41,6 +41,7 @@ struct RAMBlock { QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; int fd; uint64_t fd_offset; + int guest_memfd; size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; diff --git a/include/hw/boards.h b/include/hw/boards.h index 8ac8cad2a2225e97df1475c2f20118ccf2af394e..1f410b8d664618fe0425a7f4603ca47b1e967bfe 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -30,6 +30,7 @@ bool machine_usb(MachineState *machine); int machine_phandle_start(MachineState *machine); bool machine_dump_guest_core(MachineState *machine); bool machine_mem_merge(MachineState *machine); +bool machine_require_guest_memfd(MachineState *machine); HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); void machine_set_cpu_numa_node(MachineState *machine, const CpuInstanceProperties *props, @@ -114,7 +115,7 @@ typedef struct CPUArchId { uint64_t arch_id; int64_t vcpus_count; CpuInstanceProperties props; - Object *cpu; + CPUState *cpu; const char *type; } CPUArchId; @@ -421,6 +422,12 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_9_0[]; +extern const size_t hw_compat_9_0_len; + +extern GlobalProperty hw_compat_8_2[]; +extern const size_t hw_compat_8_2_len; + extern GlobalProperty hw_compat_8_1[]; extern const size_t hw_compat_8_1_len; diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index bdc15a7a73174f71c26f63510c6396bc4d3961a8..98a87b2ded428a4065dad7a9904f19350ea8338f 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -3,14 +3,14 @@ /* apic.c */ -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, - uint8_t vector_num, uint8_t trigger_mode); +void apic_set_max_apic_id(uint32_t max_apic_id); int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); void cpu_set_apic_base(DeviceState *s, uint64_t val); uint64_t cpu_get_apic_base(DeviceState *s); +bool cpu_is_apic_enabled(DeviceState *s); void cpu_set_apic_tpr(DeviceState *s, uint8_t val); uint8_t cpu_get_apic_tpr(DeviceState *s); void apic_init_reset(DeviceState *s); @@ -18,6 +18,9 @@ void apic_sipi(DeviceState *s); void apic_poll_irq(DeviceState *d); void apic_designate_bsp(DeviceState *d, bool bsp); int apic_get_highest_priority_irr(DeviceState *dev); +int apic_msr_read(int index, uint64_t *val); +int apic_msr_write(int index, uint64_t val); +bool is_x2apic_mode(DeviceState *d); /* pc.c */ DeviceState *cpu_get_current_apic(void); diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index 5f2ba24bfcd2ad8c3f83fa5ca7b1acd91e08b272..e796e6cae3b487fbbc6883f28781b8ec07b1e8ab 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -46,8 +46,10 @@ #define APIC_DM_EXTINT 7 /* APIC destination mode */ -#define APIC_DESTMODE_FLAT 0xf -#define APIC_DESTMODE_CLUSTER 1 +#define APIC_DESTMODE_PHYSICAL 0 +#define APIC_DESTMODE_LOGICAL 1 +#define APIC_DESTMODE_LOGICAL_FLAT 0xf +#define APIC_DESTMODE_LOGICAL_CLUSTER 0 #define APIC_TRIGGER_EDGE 0 #define APIC_TRIGGER_LEVEL 1 @@ -187,6 +189,7 @@ struct APICCommonState { DeviceState *vapic; hwaddr vapic_paddr; /* note: persistence via kvmvapic */ bool legacy_instance_id; + uint32_t extended_log_dest; }; typedef struct VAPICState { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index a10ceeabbfac8fbfa64b2f37aefb34d1065f9a64..ebcb14cfb19af3853a049abdc3f9e20f67b01fad 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -120,6 +120,7 @@ struct PCMachineClass { bool enforce_aligned_dimm; bool broken_reserved_end; bool enforce_amd_1tb_hole; + bool isa_bios_alias; /* generate legacy CPU hotplug AML */ bool legacy_cpu_hotplug; @@ -165,7 +166,36 @@ void pc_guest_info_init(PCMachineState *pcms); #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" - +#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + +typedef enum { + SEV_DESC_TYPE_UNDEF, + /* The section contains the region that must be validated by the VMM. */ + SEV_DESC_TYPE_SNP_SEC_MEM, + /* The section contains the SNP secrets page */ + SEV_DESC_TYPE_SNP_SECRETS, + /* The section contains address that can be used as a CPUID page */ + SEV_DESC_TYPE_CPUID, + + /* The section contains the region for kernel hashes for measured direct boot */ + SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10, +} ovmf_sev_metadata_desc_type; + +typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { + uint32_t base; + uint32_t len; + ovmf_sev_metadata_desc_type type; +} OvmfSevMetadataDesc; + +typedef struct __attribute__((__packed__)) OvmfSevMetadata { + uint8_t signature[4]; + uint32_t len; + uint32_t version; + uint32_t num_desc; + OvmfSevMetadataDesc descs[]; +} OvmfSevMetadata; + +OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void); void pc_pci_as_mapping_init(MemoryRegion *system_memory, MemoryRegion *pci_address_space); @@ -210,6 +240,12 @@ void pc_madt_cpu_entry(int uid, const CPUArchIdList *apic_ids, /* sgx.c */ void pc_machine_init_sgx_epc(PCMachineState *pcms); +extern GlobalProperty pc_compat_9_0[]; +extern const size_t pc_compat_9_0_len; + +extern GlobalProperty pc_compat_8_2[]; +extern const size_t pc_compat_8_2_len; + extern GlobalProperty pc_compat_8_1[]; extern const size_t pc_compat_8_1_len; @@ -309,15 +345,12 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; -int pc_machine_kvm_type(MachineState *machine, const char *vm_type); - #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ MachineClass *mc = MACHINE_CLASS(oc); \ optsfn(mc); \ mc->init = initfn; \ - mc->kvm_type = pc_machine_kvm_type; \ } \ static const TypeInfo pc_machine_type_##suffix = { \ .name = namestr TYPE_MACHINE_SUFFIX, \ diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index da19ae15463ab3733a3940735648567945e35136..a68ffe34ebc7ef42c2f9a83846554bfb6864a1a2 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -18,8 +18,10 @@ #define HW_I386_X86_H #include "exec/hwaddr.h" +#include "exec/memory.h" #include "hw/boards.h" +#include "hw/i386/topology.h" #include "hw/intc/ioapic.h" #include "hw/isa/isa.h" #include "qom/object.h" @@ -50,6 +52,18 @@ struct X86MachineState { GMappedFile *initrd_mapped_file; HotplugHandler *acpi_dev; + /* + * Map the whole BIOS just underneath the 4 GiB address boundary. Only used + * in the ROM (-bios) case. + */ + MemoryRegion bios; + + /* + * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address + * boundary. + */ + MemoryRegion isa_bios; + /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; @@ -95,16 +109,15 @@ struct X86MachineState { #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) -uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, +void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms); +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, unsigned int cpu_index); -void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index); int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); -CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); @@ -115,7 +128,9 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); -void x86_bios_rom_init(MachineState *ms, const char *default_firmware, +void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, + MemoryRegion *bios, bool read_only); +void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, MemoryRegion *rom_memory, bool isapc_ram_fw); void x86_load_linux(X86MachineState *x86ms, @@ -138,10 +153,10 @@ typedef struct GSIState { qemu_irq x86_allocate_cpu_irq(void); void gsi_handler(void *opaque, int n, int level); -void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); +void ioapic_init_gsi(GSIState *gsi_state, Object *parent); DeviceState *ioapic_init_secondary(GSIState *gsi_state); /* pc_sysfw.c */ -void x86_firmware_configure(void *ptr, int size); +void x86_firmware_configure(hwaddr gpa, void *ptr, int size); #endif diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index bafcbe675214b330985c1d604cd983268b581e66..22fadfa3ed76ce7fee79373b0a4db3a949e0cd1f 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -50,6 +50,7 @@ struct MCHPCIState { MemoryRegion tseg_blackhole, tseg_window; MemoryRegion smbase_blackhole, smbase_window; bool has_smram_at_smbase; + bool has_smm_ranges; Range pci_hole; uint64_t below_4g_mem_size; uint64_t above_4g_mem_size; diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h deleted file mode 100644 index 707dbe524c42553d851e1be10b2a21ece056df65..0000000000000000000000000000000000000000 --- a/include/hw/ppc/pef.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * PEF (Protected Execution Facility) for POWER support - * - * Copyright Red Hat. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef HW_PPC_PEF_H -#define HW_PPC_PEF_H - -int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); - -#endif /* HW_PPC_PEF_H */ diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 68e70e61aa59c81256a46b7fae793a45ab99e8a2..72ebc0cb3a88fd10edb01036de11a877ee2f1709 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -248,19 +248,19 @@ GSource *iohandler_get_g_source(void); AioContext *iohandler_get_aio_context(void); /** - * qemu_mutex_iothread_locked: Return lock status of the main loop mutex. + * bql_locked: Return lock status of the Big QEMU Lock (BQL) * - * The main loop mutex is the coarsest lock in QEMU, and as such it + * The Big QEMU Lock (BQL) is the coarsest lock in QEMU, and as such it * must always be taken outside other locks. This function helps * functions take different paths depending on whether the current - * thread is running within the main loop mutex. + * thread is running within the BQL. * * This function should never be used in the block layer, because * unit tests, block layer tools and qemu-storage-daemon do not * have a BQL. * Please instead refer to qemu_in_main_thread(). */ -bool qemu_mutex_iothread_locked(void); +bool bql_locked(void); /** * qemu_in_main_thread: return whether it's possible to safely access @@ -312,58 +312,57 @@ bool qemu_in_main_thread(void); } while (0) /** - * qemu_mutex_lock_iothread: Lock the main loop mutex. + * bql_lock: Lock the Big QEMU Lock (BQL). * - * This function locks the main loop mutex. The mutex is taken by + * This function locks the Big QEMU Lock (BQL). The lock is taken by * main() in vl.c and always taken except while waiting on - * external events (such as with select). The mutex should be taken + * external events (such as with select). The lock should be taken * by threads other than the main loop thread when calling * qemu_bh_new(), qemu_set_fd_handler() and basically all other * functions documented in this file. * - * NOTE: tools currently are single-threaded and qemu_mutex_lock_iothread + * NOTE: tools currently are single-threaded and bql_lock * is a no-op there. */ -#define qemu_mutex_lock_iothread() \ - qemu_mutex_lock_iothread_impl(__FILE__, __LINE__) -void qemu_mutex_lock_iothread_impl(const char *file, int line); +#define bql_lock() bql_lock_impl(__FILE__, __LINE__) +void bql_lock_impl(const char *file, int line); /** - * qemu_mutex_unlock_iothread: Unlock the main loop mutex. + * bql_unlock: Unlock the Big QEMU Lock (BQL). * - * This function unlocks the main loop mutex. The mutex is taken by + * This function unlocks the Big QEMU Lock. The lock is taken by * main() in vl.c and always taken except while waiting on - * external events (such as with select). The mutex should be unlocked + * external events (such as with select). The lock should be unlocked * as soon as possible by threads other than the main loop thread, * because it prevents the main loop from processing callbacks, * including timers and bottom halves. * - * NOTE: tools currently are single-threaded and qemu_mutex_unlock_iothread + * NOTE: tools currently are single-threaded and bql_unlock * is a no-op there. */ -void qemu_mutex_unlock_iothread(void); +void bql_unlock(void); /** * QEMU_IOTHREAD_LOCK_GUARD * - * Wrap a block of code in a conditional qemu_mutex_{lock,unlock}_iothread. + * Wrap a block of code in a conditional bql_{lock,unlock}. */ typedef struct IOThreadLockAuto IOThreadLockAuto; static inline IOThreadLockAuto *qemu_iothread_auto_lock(const char *file, int line) { - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { return NULL; } - qemu_mutex_lock_iothread_impl(file, line); + bql_lock_impl(file, line); /* Anything non-NULL causes the cleanup function to be called */ return (IOThreadLockAuto *)(uintptr_t)1; } static inline void qemu_iothread_auto_unlock(IOThreadLockAuto *l) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } G_DEFINE_AUTOPTR_CLEANUP_FUNC(IOThreadLockAuto, qemu_iothread_auto_unlock) diff --git a/include/qemu/thread.h b/include/qemu/thread.h index dd3822d7cee9010fb4e07158ee86c429c30315f2..fb74e21c08a7662d81b36cd0f543b4f6a8d934e7 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -47,7 +47,7 @@ typedef void (*QemuCondWaitFunc)(QemuCond *c, QemuMutex *m, const char *f, typedef bool (*QemuCondTimedWaitFunc)(QemuCond *c, QemuMutex *m, int ms, const char *f, int l); -extern QemuMutexLockFunc qemu_bql_mutex_lock_func; +extern QemuMutexLockFunc bql_mutex_lock_func; extern QemuMutexLockFunc qemu_mutex_lock_func; extern QemuMutexTrylockFunc qemu_mutex_trylock_func; extern QemuRecMutexLockFunc qemu_rec_mutex_lock_func; diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h index 0b06d2bff1b98f325fe27cc192854b8c554e9cc7..b582a105c08758093fb9133a4094b8ad69a84a3e 100644 --- a/include/standard-headers/asm-x86/bootparam.h +++ b/include/standard-headers/asm-x86/bootparam.h @@ -2,21 +2,7 @@ #ifndef _ASM_X86_BOOTPARAM_H #define _ASM_X86_BOOTPARAM_H -/* setup_data/setup_indirect types */ -#define SETUP_NONE 0 -#define SETUP_E820_EXT 1 -#define SETUP_DTB 2 -#define SETUP_PCI 3 -#define SETUP_EFI 4 -#define SETUP_APPLE_PROPERTIES 5 -#define SETUP_JAILHOUSE 6 -#define SETUP_CC_BLOB 7 -#define SETUP_IMA 8 -#define SETUP_RNG_SEED 9 -#define SETUP_ENUM_MAX SETUP_RNG_SEED - -#define SETUP_INDIRECT (1<<31) -#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) +#include "standard-headers/asm-x86/setup_data.h" /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -38,6 +24,7 @@ #define XLF_EFI_KEXEC (1<<4) #define XLF_5LEVEL (1<<5) #define XLF_5LEVEL_ENABLED (1<<6) +#define XLF_MEM_ENCRYPTION (1<<7) #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index f0235e58a1d334524dc57bcd5afefeb30ca02aba..9a011d20f0176d3ef61f049197a63f7371ddf02f 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -92,7 +92,7 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) /* MSR_KVM_ASYNC_PF_INT */ -#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) /* MSR_KVM_MIGRATION_CONTROL */ #define KVM_MIGRATION_READY (1 << 0) @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { uint32_t token; uint8_t pad[56]; - uint32_t enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h new file mode 100644 index 0000000000000000000000000000000000000000..09355f54c55ff41a27d8669aa7768d0198a7c035 --- /dev/null +++ b/include/standard-headers/asm-x86/setup_data.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_SETUP_DATA_H +#define _ASM_X86_SETUP_DATA_H + +/* setup_data/setup_indirect types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 +#define SETUP_CC_BLOB 7 +#define SETUP_IMA 8 +#define SETUP_RNG_SEED 9 +#define SETUP_ENUM_MAX SETUP_RNG_SEED + +#define SETUP_INDIRECT (1<<31) +#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) + +#ifndef __ASSEMBLY__ + +#include "standard-headers/linux/types.h" + +/* extensible setup data list node */ +struct setup_data { + uint64_t next; + uint32_t type; + uint32_t len; + uint8_t data[]; +}; + +/* extensible setup indirect data node */ +struct setup_indirect { + uint32_t type; + uint32_t reserved; /* Reserved, must be set to zero. */ + uint64_t len; + uint64_t addr; +}; + +/* + * The E820 memory region entry of the boot protocol ABI: + */ +struct boot_e820_entry { + uint64_t addr; + uint64_t size; + uint32_t type; +} QEMU_PACKED; + +/* + * The boot loader is passing platform information via this Jailhouse-specific + * setup data structure. + */ +struct jailhouse_setup_data { + struct { + uint16_t version; + uint16_t compatible_version; + } QEMU_PACKED hdr; + struct { + uint16_t pm_timer_address; + uint16_t num_cpus; + uint64_t pci_mmconfig_base; + uint32_t tsc_khz; + uint32_t apic_khz; + uint8_t standard_ioapic; + uint8_t cpu_ids[255]; + } QEMU_PACKED v1; + struct { + uint32_t flags; + } QEMU_PACKED v2; +} QEMU_PACKED; + +/* + * IMA buffer setup data information from the previous kernel during kexec + */ +struct ima_setup_data { + uint64_t addr; + uint64_t size; +} QEMU_PACKED; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index 3afb70160f0bf5feaf7ae284ea19e6e51e822cf5..b72917073d8d2ba292dce45953b59ea427480403 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -53,7 +53,7 @@ extern "C" { * Format modifiers may change any property of the buffer, including the number * of planes and/or the required allocation size. Format modifiers are * vendor-namespaced, and as such the relationship between a fourcc code and a - * modifier is specific to the modifer being used. For example, some modifiers + * modifier is specific to the modifier being used. For example, some modifiers * may preserve meaning - such as number of planes - from the fourcc code, * whereas others may not. * @@ -78,7 +78,7 @@ extern "C" { * format. * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users * see modifiers as opaque tokens they can check for equality and intersect. - * These users musn't need to know to reason about the modifier value + * These users mustn't need to know to reason about the modifier value * (i.e. they are not expected to extract information out of the modifier). * * Vendors should document their modifier usage in as much detail as @@ -539,7 +539,7 @@ extern "C" { * This is a tiled layout using 4Kb tiles in row-major layout. * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which * are arranged in four groups (two wide, two high) with column-major layout. - * Each group therefore consits out of four 256 byte units, which are also laid + * Each group therefore consists out of four 256 byte units, which are also laid * out as 2x2 column-major. * 256 byte units are made out of four 64 byte blocks of pixels, producing * either a square block or a 2:1 unit. @@ -1102,7 +1102,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) */ /* - * The top 4 bits (out of the 56 bits alloted for specifying vendor specific + * The top 4 bits (out of the 56 bits allotted for specifying vendor specific * modifiers) denote the category for modifiers. Currently we have three * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of * sixteen different categories. @@ -1418,7 +1418,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) * Amlogic FBC Memory Saving mode * * Indicates the storage is packed when pixel size is multiple of word - * boudaries, i.e. 8bit should be stored in this mode to save allocation + * boundaries, i.e. 8bit should be stored in this mode to save allocation * memory. * * This mode reduces body layout to 3072 bytes per 64x32 superblock with diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 99fcddf04f88f8b6dbd66a57fa58195e394c28bf..01503784d26f29d40164efd51d49caebf32a92be 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -1266,6 +1266,8 @@ struct ethtool_rxfh_indir { * hardware hash key. * @hfunc: Defines the current RSS hash function used by HW (or to be set to). * Valid values are one of the %ETH_RSS_HASH_*. + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. * @rsvd8: Reserved for future use; see the note on reserved space. * @rsvd32: Reserved for future use; see the note on reserved space. * @rss_config: RX ring/queue index for each hash value i.e., indirection table @@ -1285,7 +1287,8 @@ struct ethtool_rxfh { uint32_t indir_size; uint32_t key_size; uint8_t hfunc; - uint8_t rsvd8[3]; + uint8_t input_xfrm; + uint8_t rsvd8[2]; uint32_t rsvd32; uint32_t rss_config[]; }; @@ -1992,6 +1995,15 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define WOL_MODE_COUNT 8 +/* RSS hash function data + * XOR the corresponding source and destination fields of each specified + * protocol. Both copies of the XOR'ed fields are fed into the RSS and RXHASH + * calculation. Note that this XORing reduces the input set entropy and could + * be exploited to reduce the RSS queue spread. + */ +#define RXH_XFRM_SYM_XOR (1 << 0) +#define RXH_XFRM_NO_CHANGE 0xff + /* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ @@ -2011,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ + +/* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ +#define GTPU_V4_FLOW 0x13 /* hash only */ +#define GTPU_V6_FLOW 0x14 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ +#define GTPC_V4_FLOW 0x15 /* hash only */ +#define GTPC_V6_FLOW 0x16 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ +#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ +#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ + +/* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ +#define GTPU_EH_V4_FLOW 0x19 /* hash only */ +#define GTPU_EH_V6_FLOW 0x1a /* hash only */ + +/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ +#define GTPU_UL_V4_FLOW 0x1b /* hash only */ +#define GTPU_UL_V6_FLOW 0x1c /* hash only */ +#define GTPU_DL_V4_FLOW 0x1d /* hash only */ +#define GTPU_DL_V6_FLOW 0x1e /* hash only */ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 #define FLOW_MAC_EXT 0x40000000 @@ -2025,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define RXH_IP_DST (1 << 5) #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ +#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL @@ -2128,18 +2188,6 @@ enum ethtool_reset_flags { * refused. For drivers: ignore this field (use kernel's * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will * be overwritten by kernel. - * @supported: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features for which the interface - * supports autonegotiation or auto-detection. Read-only. - * @advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features that are advertised through - * autonegotiation or enabled for auto-detection. - * @lp_advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, and other - * link features that the link partner advertised through - * autonegotiation; 0 if unknown or not applicable. Read-only. * @transceiver: Used to distinguish different possible PHY types, * reported consistently by PHYLIB. Read-only. * @master_slave_cfg: Master/slave port mode. @@ -2181,6 +2229,21 @@ enum ethtool_reset_flags { * %set_link_ksettings() should validate all fields other than @cmd * and @link_mode_masks_nwords that are not described as read-only or * deprecated, and must ignore all fields described as read-only. + * + * @link_mode_masks is divided into three bitfields, each of length + * @link_mode_masks_nwords: + * - supported: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features for which the interface + * supports autonegotiation or auto-detection. Read-only. + * - advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features that are advertised through + * autonegotiation or enabled for auto-detection. + * - lp_advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, and other + * link features that the link partner advertised through + * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { uint32_t cmd; diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index fc0dcd10aededdb8ea0f2ddb7144dcf44854727d..bac9dbc49f809f9a4237afd81b21e20a4d361865 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -211,6 +211,12 @@ * 7.39 * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures + * + * 7.40 + * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag + * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag + * - add FUSE_NO_EXPORT_SUPPORT init flag + * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag */ #ifndef _LINUX_FUSE_H @@ -242,7 +248,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 39 +#define FUSE_KERNEL_MINOR_VERSION 40 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -349,6 +355,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_PASSTHROUGH: passthrough read/write io for this open file */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -357,6 +364,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_PASSTHROUGH (1 << 7) /** * INIT request/reply flags @@ -406,6 +414,9 @@ struct fuse_file_lock { * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. + * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support + * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit + * of the request ID indicates resend requests */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -445,6 +456,9 @@ struct fuse_file_lock { #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) +#define FUSE_PASSTHROUGH (1ULL << 37) +#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) +#define FUSE_HAS_RESEND (1ULL << 39) /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP @@ -631,6 +645,7 @@ enum fuse_notify_code { FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, + FUSE_NOTIFY_RESEND = 7, FUSE_NOTIFY_CODE_MAX, }; @@ -757,7 +772,7 @@ struct fuse_create_in { struct fuse_open_out { uint64_t fh; uint32_t open_flags; - uint32_t padding; + int32_t backing_id; }; struct fuse_release_in { @@ -873,7 +888,8 @@ struct fuse_init_out { uint16_t max_pages; uint16_t map_alignment; uint32_t flags2; - uint32_t unused[7]; + uint32_t max_stack_depth; + uint32_t unused[6]; }; #define CUSE_INIT_INFO_MAX 4096 @@ -956,6 +972,14 @@ struct fuse_fallocate_in { uint32_t padding; }; +/** + * FUSE request unique ID flag + * + * Indicates whether this is a resend request. The receiver should handle this + * request accordingly. + */ +#define FUSE_UNIQUE_RESEND (1ULL << 63) + struct fuse_in_header { uint32_t len; uint32_t opcode; @@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_backing_map { + int32_t fd; + uint32_t flags; + uint64_t padding; +}; + /* Device ioctls: */ #define FUSE_DEV_IOC_MAGIC 229 #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) +#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ + struct fuse_backing_map) +#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) struct fuse_lseek_in { uint64_t fh; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index f6bab08540d81247024938067e84306dc1c00cde..2221b0c38348ae2c5f006d85190041fabe416d91 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -602,6 +602,7 @@ #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ +#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h new file mode 100644 index 0000000000000000000000000000000000000000..015c16630216e853130ae2aea86e3185b909e06f --- /dev/null +++ b/include/standard-headers/linux/kvm_para.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_KVM_PARA_H +#define __LINUX_KVM_PARA_H + +/* + * This header file provides a method for making a hypercall to the host + * Architectures should define: + * - kvm_hypercall0, kvm_hypercall1... + * - kvm_arch_para_features + * - kvm_para_available + */ + +/* Return values for hypercalls */ +#define KVM_ENOSYS 1000 +#define KVM_EFAULT EFAULT +#define KVM_EINVAL EINVAL +#define KVM_E2BIG E2BIG +#define KVM_EPERM EPERM +#define KVM_EOPNOTSUPP 95 + +#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HC_MMU_OP 2 +#define KVM_HC_FEATURES 3 +#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 +#define KVM_HC_KICK_CPU 5 +#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 +#define KVM_HC_MIPS_EXIT_VM 7 +#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 +#define KVM_HC_CLOCK_PAIRING 9 +#define KVM_HC_SEND_IPI 10 +#define KVM_HC_SCHED_YIELD 11 +#define KVM_HC_MAP_GPA_RANGE 12 + +/* + * hypercalls use architecture specific + */ + +#endif /* __LINUX_KVM_PARA_H */ diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index bfd1ca643e7f4a62127c295bdac8ccc4da068cc0..45be0fa1bcdb57b00ad9e2d2842ca9d5134bb090 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 41 +#define VIRTIO_TRANSPORT_F_END 42 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -112,4 +112,10 @@ * This feature indicates that the driver can reset a queue individually. */ #define VIRTIO_F_RING_RESET 40 + +/* + * This feature indicates that the device support administration virtqueues. + */ +#define VIRTIO_F_ADMIN_VQ 41 + #endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index 2da48d3d4c2cc3d631a913224ca1c447aa64277e..2db643ed8fbfed23b1d66fd3de30b7e37534e72f 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 +/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_VENUS 4 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index b7fdfd066878cb89a091edcfe79c11196f3361d7..4010216103e549dc40e1e6316f05498374277b1b 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -175,6 +175,9 @@ struct virtio_pci_modern_common_cfg { uint16_t queue_notify_data; /* read-write */ uint16_t queue_reset; /* read-write */ + + uint16_t admin_queue_index; /* read-only */ + uint16_t admin_queue_num; /* read-only */ }; /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ @@ -215,7 +218,72 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_NDATA 56 #define VIRTIO_PCI_COMMON_Q_RESET 58 +#define VIRTIO_PCI_COMMON_ADM_Q_IDX 60 +#define VIRTIO_PCI_COMMON_ADM_Q_NUM 62 #endif /* VIRTIO_PCI_NO_MODERN */ +/* Admin command status. */ +#define VIRTIO_ADMIN_STATUS_OK 0 + +/* Admin command opcode. */ +#define VIRTIO_ADMIN_CMD_LIST_QUERY 0x0 +#define VIRTIO_ADMIN_CMD_LIST_USE 0x1 + +/* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 + +/* Transitional device admin command. */ +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE 0x2 +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ 0x3 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE 0x4 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 +#define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + +struct virtio_admin_cmd_hdr { + uint16_t opcode; + /* + * 1 - SR-IOV + * 2-65535 - reserved + */ + uint16_t group_type; + /* Unused, reserved for future extensions. */ + uint8_t reserved1[12]; + uint64_t group_member_id; +}; + +struct virtio_admin_cmd_status { + uint16_t status; + uint16_t status_qualifier; + /* Unused, reserved for future extensions. */ + uint8_t reserved2[4]; +}; + +struct virtio_admin_cmd_legacy_wr_data { + uint8_t offset; /* Starting offset of the register(s) to write. */ + uint8_t reserved[7]; + uint8_t registers[]; +}; + +struct virtio_admin_cmd_legacy_rd_data { + uint8_t offset; /* Starting offset of the register(s) to read. */ +}; + +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_END 0 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_DEV 0x1 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM 0x2 + +#define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +struct virtio_admin_cmd_notify_info_data { + uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + uint8_t bar; /* BAR of the member or the owner device */ + uint8_t padding[6]; + uint64_t offset; /* Offset within bar. */ +}; + +struct virtio_admin_cmd_notify_info_result { + struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO]; +}; + #endif diff --git a/include/standard-headers/linux/virtio_pmem.h b/include/standard-headers/linux/virtio_pmem.h index fc029de7988ec52f85484eea59bc7f7a2421ef59..1a2576d0178e59beb61fd40743938d0118559793 100644 --- a/include/standard-headers/linux/virtio_pmem.h +++ b/include/standard-headers/linux/virtio_pmem.h @@ -14,6 +14,13 @@ #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_config.h" +/* Feature bits */ +/* guest physical address range will be indicated as shared memory region 0 */ +#define VIRTIO_PMEM_F_SHMEM_REGION 0 + +/* shmid of the shared memory region corresponding to the pmem */ +#define VIRTIO_PMEM_SHMEM_REGION_ID 0 + struct virtio_pmem_config { uint64_t start; uint64_t size; diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h index 1af96b9fc61a6f34417599ac82c9fb7b3f7469cc..860f12e0a4e196b6469fc7f4ee44545c4130b380 100644 --- a/include/standard-headers/linux/virtio_snd.h +++ b/include/standard-headers/linux/virtio_snd.h @@ -7,6 +7,14 @@ #include "standard-headers/linux/virtio_types.h" +/******************************************************************************* + * FEATURE BITS + */ +enum { + /* device supports control elements */ + VIRTIO_SND_F_CTLS = 0 +}; + /******************************************************************************* * CONFIGURATION SPACE */ @@ -17,6 +25,8 @@ struct virtio_snd_config { uint32_t streams; /* # of available channel maps */ uint32_t chmaps; + /* # of available control elements */ + uint32_t controls; }; enum { @@ -55,6 +65,15 @@ enum { /* channel map control request types */ VIRTIO_SND_R_CHMAP_INFO = 0x0200, + /* control element request types */ + VIRTIO_SND_R_CTL_INFO = 0x0300, + VIRTIO_SND_R_CTL_ENUM_ITEMS, + VIRTIO_SND_R_CTL_READ, + VIRTIO_SND_R_CTL_WRITE, + VIRTIO_SND_R_CTL_TLV_READ, + VIRTIO_SND_R_CTL_TLV_WRITE, + VIRTIO_SND_R_CTL_TLV_COMMAND, + /* jack event types */ VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, VIRTIO_SND_EVT_JACK_DISCONNECTED, @@ -63,6 +82,9 @@ enum { VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, VIRTIO_SND_EVT_PCM_XRUN, + /* control element event types */ + VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, + /* common status codes */ VIRTIO_SND_S_OK = 0x8000, VIRTIO_SND_S_BAD_MSG, @@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; }; +/******************************************************************************* + * CONTROL ELEMENTS MESSAGES + */ +struct virtio_snd_ctl_hdr { + /* VIRTIO_SND_R_CTL_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint32_t control_id; +}; + +/* supported roles for control elements */ +enum { + VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, + VIRTIO_SND_CTL_ROLE_VOLUME, + VIRTIO_SND_CTL_ROLE_MUTE, + VIRTIO_SND_CTL_ROLE_GAIN +}; + +/* supported value types for control elements */ +enum { + VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, + VIRTIO_SND_CTL_TYPE_INTEGER, + VIRTIO_SND_CTL_TYPE_INTEGER64, + VIRTIO_SND_CTL_TYPE_ENUMERATED, + VIRTIO_SND_CTL_TYPE_BYTES, + VIRTIO_SND_CTL_TYPE_IEC958 +}; + +/* supported access rights for control elements */ +enum { + VIRTIO_SND_CTL_ACCESS_READ = 0, + VIRTIO_SND_CTL_ACCESS_WRITE, + VIRTIO_SND_CTL_ACCESS_VOLATILE, + VIRTIO_SND_CTL_ACCESS_INACTIVE, + VIRTIO_SND_CTL_ACCESS_TLV_READ, + VIRTIO_SND_CTL_ACCESS_TLV_WRITE, + VIRTIO_SND_CTL_ACCESS_TLV_COMMAND +}; + +struct virtio_snd_ctl_info { + /* common header */ + struct virtio_snd_info hdr; + /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ + uint32_t role; + /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ + uint32_t type; + /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ + uint32_t access; + /* # of members in the element value */ + uint32_t count; + /* index for an element with a non-unique name */ + uint32_t index; + /* name identifier string for the element */ + uint8_t name[44]; + /* additional information about the element's value */ + union { + /* VIRTIO_SND_CTL_TYPE_INTEGER */ + struct { + /* minimum supported value */ + uint32_t min; + /* maximum supported value */ + uint32_t max; + /* fixed step size for value (0 = variable size) */ + uint32_t step; + } integer; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ + struct { + /* minimum supported value */ + uint64_t min; + /* maximum supported value */ + uint64_t max; + /* fixed step size for value (0 = variable size) */ + uint64_t step; + } integer64; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ + struct { + /* # of options supported for value */ + uint32_t items; + } enumerated; + } value; +}; + +struct virtio_snd_ctl_enum_item { + /* option name */ + uint8_t item[64]; +}; + +struct virtio_snd_ctl_iec958 { + /* AES/IEC958 channel status bits */ + uint8_t status[24]; + /* AES/IEC958 subcode bits */ + uint8_t subcode[147]; + /* nothing */ + uint8_t pad; + /* AES/IEC958 subframe bits */ + uint8_t dig_subframe[4]; +}; + +struct virtio_snd_ctl_value { + union { + /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ + uint32_t integer[128]; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ + uint64_t integer64[64]; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ + uint32_t enumerated[128]; + /* VIRTIO_SND_CTL_TYPE_BYTES value */ + uint8_t bytes[512]; + /* VIRTIO_SND_CTL_TYPE_IEC958 value */ + struct virtio_snd_ctl_iec958 iec958; + } value; +}; + +/* supported event reason types */ +enum { + /* element's value has changed */ + VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, + /* element's information has changed */ + VIRTIO_SND_CTL_EVT_MASK_INFO, + /* element's metadata has changed */ + VIRTIO_SND_CTL_EVT_MASK_TLV +}; + +struct virtio_snd_ctl_event { + /* VIRTIO_SND_EVT_CTL_NOTIFY */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint16_t control_id; + /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ + uint16_t mask; +}; + #endif /* VIRTIO_SND_IF_H */ diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 39326f1d4f9c0713fdcf0613623a941157cbeed8..ffeb32c0e04159a131034792bcfb3b94ff4b561b 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -66,6 +66,7 @@ struct HostMemoryBackend { uint64_t size; bool merge, dump, use_canonical_path; bool prealloc, is_mapped, share, reserve; + bool guest_memfd; uint32_t prealloc_threads; ThreadContext *prealloc_context; DECLARE_BITMAP(host_nodes, MAX_NODES + 1); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 5f3f779de452a3674089996238d58384a3706a94..8067da717bf316af017cc8efdfb142af15da4c33 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -583,20 +583,16 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); /* Notify resamplefd for EOI of specific interrupts. */ void kvm_resample_fd_notify(int gsi); -/** - * kvm_cpu_check_are_resettable - return whether CPUs can be reset - * - * Returns: true: CPUs are resettable - * false: CPUs are not resettable - */ -bool kvm_cpu_check_are_resettable(void); - -bool kvm_arch_cpu_check_are_resettable(void); - bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); + +/** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. + */ +bool kvm_hwpoisoned_mem(void); int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, struct kvm_numa_info *numa_info); @@ -607,4 +603,12 @@ int kvm_delete_shadow_device(PCIDevice *dev); void kvm_mark_guest_state_protected(void); + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); + #endif diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 9a7bc1a4b8b4d51baa86889c218267e369b0f799..d4568a66d2d660ba6a2a168985dcbf12a5bb3e51 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -30,6 +30,8 @@ typedef struct KVMSlot int as_id; /* Cache of the offset in ram address space */ ram_addr_t ram_start_offset; + int guest_memfd; + hwaddr guest_memfd_offset; } KVMSlot; typedef struct KVMMemoryUpdate { diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 777b6688511051d150571265439486a0e5c95316..3e783f3936998bcc9ae830d56a4fbf717dd19d05 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -37,9 +37,7 @@ #include #include -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -76,11 +74,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ - KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ - KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 @@ -164,6 +162,11 @@ struct kvm_sync_regs { __u64 device_irq_level; }; +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. diff --git a/linux-headers/asm-arm64/sve_context.h b/linux-headers/asm-arm64/sve_context.h index 1d0e3e1d0950acfa436b081f6b7d09fdd5b265ae..d1b1ec8cb1f130079cccdbd507f5a495ea72a971 100644 --- a/linux-headers/asm-arm64/sve_context.h +++ b/linux-headers/asm-arm64/sve_context.h @@ -13,6 +13,17 @@ #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ +/* + * Yes, __SVE_VQ_MAX is 512 QUADWORDS. + * + * To help ensure forward portability, this is much larger than the + * current maximum value defined by the SVE architecture. While arrays + * or static allocations can be sized based on this value, watch out! + * It will waste a surprisingly large amount of memory. + * + * Dynamic sizing based on the actual runtime vector length is likely to + * be preferable for most purposes. + */ #define __SVE_VQ_MIN 1 #define __SVE_VQ_MAX 512 diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h index 75f320fa91e54e3f999db22a0f5877a20fb6b942..1fb4f0c9f2789d43dfe52c533964bb73d42aefd7 100644 --- a/linux-headers/asm-generic/bitsperlong.h +++ b/linux-headers/asm-generic/bitsperlong.h @@ -24,4 +24,8 @@ #endif #endif +#ifndef __BITS_PER_LONG_LONG +#define __BITS_PER_LONG_LONG 64 +#endif + #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 756b013fb8324bd7a320e60cebec2ca692faa149..75f00965ab1586cd64d00928217596de5034bd25 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_statmount 457 +__SYSCALL(__NR_statmount, sys_statmount) + +#define __NR_listmount 458 +__SYSCALL(__NR_listmount, sys_listmount) + +#define __NR_lsm_get_self_attr 459 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 460 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 461 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) + #undef __NR_syscalls -#define __NR_syscalls 457 +#define __NR_syscalls 462 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h index d22b19e134fb993c19e2484a2172cd5843cb4e5c..5bb4cd6f2989c283faad887c8e354513b6070998 100644 --- a/linux-headers/asm-loongarch/kvm.h +++ b/linux-headers/asm-loongarch/kvm.h @@ -14,7 +14,6 @@ * Some parts derived from the x86 version of this file. */ -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_GUEST_DEBUG #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/linux-headers/asm-mips/kvm.h b/linux-headers/asm-mips/kvm.h index edcf717c432717fce72096b1e5a2905c3f8c63ba..9673dc9cb31575a3553a4cf0e73d3e73ec24494f 100644 --- a/linux-headers/asm-mips/kvm.h +++ b/linux-headers/asm-mips/kvm.h @@ -20,8 +20,6 @@ * Some parts derived from the x86 version of this file. */ -#define __KVM_HAVE_READONLY_MEM - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h index c6e1fc77c9968874feefc79b7c94a165d0ad89d2..9c48d9a21aa01ff16019d25249f4bcb97120ce08 100644 --- a/linux-headers/asm-mips/mman.h +++ b/linux-headers/asm-mips/mman.h @@ -88,7 +88,7 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ -#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, +#define MADV_DONTDUMP 16 /* Explicitly exclude from core dump, overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index 994b6f008f5414d5f1cb6f6d08c5e5298bb70261..ce2e050a9ba4675e82278a4408d522d31f7ca093 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -385,5 +385,10 @@ #define __NR_futex_wake (__NR_Linux + 454) #define __NR_futex_wait (__NR_Linux + 455) #define __NR_futex_requeue (__NR_Linux + 456) +#define __NR_statmount (__NR_Linux + 457) +#define __NR_listmount (__NR_Linux + 458) +#define __NR_lsm_get_self_attr (__NR_Linux + 459) +#define __NR_lsm_set_self_attr (__NR_Linux + 460) +#define __NR_lsm_list_modules (__NR_Linux + 461) #endif /* _ASM_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 41dcf5877a1b80bbb01d20e8f3b77e16d9803d6d..5bfb3733ffdfce0cc7637713df162d153adf6fe5 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -361,5 +361,10 @@ #define __NR_futex_wake (__NR_Linux + 454) #define __NR_futex_wait (__NR_Linux + 455) #define __NR_futex_requeue (__NR_Linux + 456) +#define __NR_statmount (__NR_Linux + 457) +#define __NR_listmount (__NR_Linux + 458) +#define __NR_lsm_get_self_attr (__NR_Linux + 459) +#define __NR_lsm_set_self_attr (__NR_Linux + 460) +#define __NR_lsm_list_modules (__NR_Linux + 461) #endif /* _ASM_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index ae9d334d96e3444c4eb39f924f5fec183100a094..02eaecd020ec2cb0ae83f48f46aa47300e6f95d5 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -431,5 +431,10 @@ #define __NR_futex_wake (__NR_Linux + 454) #define __NR_futex_wait (__NR_Linux + 455) #define __NR_futex_requeue (__NR_Linux + 456) +#define __NR_statmount (__NR_Linux + 457) +#define __NR_listmount (__NR_Linux + 458) +#define __NR_lsm_get_self_attr (__NR_Linux + 459) +#define __NR_lsm_set_self_attr (__NR_Linux + 460) +#define __NR_lsm_list_modules (__NR_Linux + 461) #endif /* _ASM_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 9f18fa090f1f1d08179cba6f39f7b832bbd7b95b..1691297a766a9c1a4df9384c4ff02ecd8ce21b92 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -28,7 +28,6 @@ #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_GUEST_DEBUG /* Not always available, but if it is, this is the correct offset. */ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { #define KVM_XIVE_TIMA_PAGE_OFFSET 0 #define KVM_XIVE_ESB_PAGE_OFFSET 4 +/* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + +struct kvm_ppc_pvinfo { + /* out */ + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index b9b23d66d7d9ae5f052f70577c38d573f3225ee9..bbab08d6ec267e4d20eec63d0385323900334955 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -438,6 +438,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index cbb4b3e8f7c2f1fa9c572b4c37bdd99637c8925b..af34cde70f20fa1320d00f93b700dd7ab8f83b5a 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -410,6 +410,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index 60d3b21dead7d8846050d20a96ef1a0b3ad1ba20..e878e7cc39784a537b65ff7e6a97e44d74d14b01 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -16,7 +16,6 @@ #include #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -139,6 +138,36 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZIHPM, KVM_RISCV_ISA_EXT_SMSTATEEN, KVM_RISCV_ISA_EXT_ZICOND, + KVM_RISCV_ISA_EXT_ZBC, + KVM_RISCV_ISA_EXT_ZBKB, + KVM_RISCV_ISA_EXT_ZBKC, + KVM_RISCV_ISA_EXT_ZBKX, + KVM_RISCV_ISA_EXT_ZKND, + KVM_RISCV_ISA_EXT_ZKNE, + KVM_RISCV_ISA_EXT_ZKNH, + KVM_RISCV_ISA_EXT_ZKR, + KVM_RISCV_ISA_EXT_ZKSED, + KVM_RISCV_ISA_EXT_ZKSH, + KVM_RISCV_ISA_EXT_ZKT, + KVM_RISCV_ISA_EXT_ZVBB, + KVM_RISCV_ISA_EXT_ZVBC, + KVM_RISCV_ISA_EXT_ZVKB, + KVM_RISCV_ISA_EXT_ZVKG, + KVM_RISCV_ISA_EXT_ZVKNED, + KVM_RISCV_ISA_EXT_ZVKNHA, + KVM_RISCV_ISA_EXT_ZVKNHB, + KVM_RISCV_ISA_EXT_ZVKSED, + KVM_RISCV_ISA_EXT_ZVKSH, + KVM_RISCV_ISA_EXT_ZVKT, + KVM_RISCV_ISA_EXT_ZFH, + KVM_RISCV_ISA_EXT_ZFHMIN, + KVM_RISCV_ISA_EXT_ZIHINTNTL, + KVM_RISCV_ISA_EXT_ZVFH, + KVM_RISCV_ISA_EXT_ZVFHMIN, + KVM_RISCV_ISA_EXT_ZFA, + KVM_RISCV_ISA_EXT_ZTSO, + KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_SSCOFPMF, KVM_RISCV_ISA_EXT_MAX, }; @@ -157,9 +186,16 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_EXPERIMENTAL, KVM_RISCV_SBI_EXT_VENDOR, KVM_RISCV_SBI_EXT_DBCN, + KVM_RISCV_SBI_EXT_STA, KVM_RISCV_SBI_EXT_MAX, }; +/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_sbi_sta { + unsigned long shmem_lo; + unsigned long shmem_hi; +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -241,6 +277,12 @@ enum KVM_RISCV_SBI_EXT_ID { #define KVM_REG_RISCV_VECTOR_REG(n) \ ((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long)) +/* Registers for specific SBI extensions are mapped as type 10 */ +#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA_REG(name) \ + (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long)) + /* Device Control API: RISC-V AIA */ #define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 #define KVM_DEV_RISCV_APLIC_SIZE 0x4000 diff --git a/linux-headers/asm-riscv/ptrace.h b/linux-headers/asm-riscv/ptrace.h new file mode 100644 index 0000000000000000000000000000000000000000..1e3166caca8c6e616c4674fe2f4a07a5d7b35f0f --- /dev/null +++ b/linux-headers/asm-riscv/ptrace.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2012 Regents of the University of California + */ + +#ifndef _ASM_RISCV_PTRACE_H +#define _ASM_RISCV_PTRACE_H + +#ifndef __ASSEMBLY__ + +#include + +#define PTRACE_GETFDPIC 33 + +#define PTRACE_GETFDPIC_EXEC 0 +#define PTRACE_GETFDPIC_INTERP 1 + +/* + * User-mode register state for core dumps, ptrace, sigcontext + * + * This decouples struct pt_regs from the userspace ABI. + * struct user_regs_struct must form a prefix of struct pt_regs. + */ +struct user_regs_struct { + unsigned long pc; + unsigned long ra; + unsigned long sp; + unsigned long gp; + unsigned long tp; + unsigned long t0; + unsigned long t1; + unsigned long t2; + unsigned long s0; + unsigned long s1; + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + unsigned long s2; + unsigned long s3; + unsigned long s4; + unsigned long s5; + unsigned long s6; + unsigned long s7; + unsigned long s8; + unsigned long s9; + unsigned long s10; + unsigned long s11; + unsigned long t3; + unsigned long t4; + unsigned long t5; + unsigned long t6; +}; + +struct __riscv_f_ext_state { + __u32 f[32]; + __u32 fcsr; +}; + +struct __riscv_d_ext_state { + __u64 f[32]; + __u32 fcsr; +}; + +struct __riscv_q_ext_state { + __u64 f[64] __attribute__((aligned(16))); + __u32 fcsr; + /* + * Reserved for expansion of sigcontext structure. Currently zeroed + * upon signal, and must be zero upon sigreturn. + */ + __u32 reserved[3]; +}; + +struct __riscv_ctx_hdr { + __u32 magic; + __u32 size; +}; + +struct __riscv_extra_ext_header { + __u32 __padding[129] __attribute__((aligned(16))); + /* + * Reserved for expansion of sigcontext structure. Currently zeroed + * upon signal, and must be zero upon sigreturn. + */ + __u32 reserved; + struct __riscv_ctx_hdr hdr; +}; + +union __riscv_fp_state { + struct __riscv_f_ext_state f; + struct __riscv_d_ext_state d; + struct __riscv_q_ext_state q; +}; + +struct __riscv_v_ext_state { + unsigned long vstart; + unsigned long vl; + unsigned long vtype; + unsigned long vcsr; + unsigned long vlenb; + void *datap; + /* + * In signal handler, datap will be set a correct user stack offset + * and vector registers will be copied to the address of datap + * pointer. + */ +}; + +struct __riscv_v_regset_state { + unsigned long vstart; + unsigned long vl; + unsigned long vtype; + unsigned long vcsr; + unsigned long vlenb; + char vreg[]; +}; + +/* + * According to spec: The number of bits in a single vector register, + * VLEN >= ELEN, which must be a power of 2, and must be no greater than + * 2^16 = 65536bits = 8192bytes + */ +#define RISCV_MAX_VLENB (8192) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_RISCV_PTRACE_H */ diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index 023a2763a97cee85f58b006acb0e6f6f4b784619..684c4e1205d6ea7ad183754a33062cc7ab5b7108 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -12,7 +12,320 @@ #include #define __KVM_S390 -#define __KVM_HAVE_GUEST_DEBUG + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +#define KVM_S390_CMMA_PEEK (1 << 0) + +/** + * kvm_s390_cmma_log - Used for CMMA migration. + * + * Used both for input and output. + * + * @start_gfn: Guest page number to start from. + * @count: Size of the result buffer. + * @flags: Control operation mode via KVM_S390_CMMA_* flags + * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty + * pages are still remaining. + * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set + * in the PGSTE. + * @values: Pointer to the values buffer. + * + * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. + */ +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ + }; + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* ignored */ + }; +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +#define KVM_S390_MEMOP_SIDA_READ 2 +#define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) + +struct kvm_s390_psw { + __u64 mask; + __u64 addr; +}; + +/* valid values for type in kvm_s390_interrupt */ +#define KVM_S390_SIGP_STOP 0xfffe0000u +#define KVM_S390_PROGRAM_INT 0xfffe0001u +#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +#define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +#define KVM_S390_MCHK 0xfffe1000u +#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +#define KVM_S390_INT_CPU_TIMER 0xffff1005u +#define KVM_S390_INT_VIRTIO 0xffff2603u +#define KVM_S390_INT_SERVICE 0xffff2401u +#define KVM_S390_INT_EMERGENCY 0xffff1201u +#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_IO_AI_MASK 0x04000000u + + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +struct kvm_s390_io_info { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; +}; + +struct kvm_s390_ext_info { + __u32 ext_params; + __u32 pad; + __u64 ext_params2; +}; + +struct kvm_s390_pgm_info { + __u64 trans_exc_code; + __u64 mon_code; + __u64 per_address; + __u32 data_exc_code; + __u16 code; + __u16 mon_class_nr; + __u8 per_code; + __u8 per_atmid; + __u8 exc_access_id; + __u8 per_access_id; + __u8 op_access_id; +#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 + __u8 flags; + __u8 pad[2]; +}; + +struct kvm_s390_prefix_info { + __u32 address; +}; + +struct kvm_s390_extcall_info { + __u16 code; +}; + +struct kvm_s390_emerg_info { + __u16 code; +}; + +#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +struct kvm_s390_stop_info { + __u32 flags; +}; + +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 pad; + __u8 fixed_logout[16]; +}; + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; /* will stay unused for compatibility reasons */ + __u32 len; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ +}; + +struct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; +}; + +struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +}; + +struct kvm_s390_pv_unp { + __u64 addr; + __u64 size; + __u64 tweak; +}; + +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + +enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, + KVM_PV_SET_SEC_PARMS, + KVM_PV_UNPACK, + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, +}; + +struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; +}; + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) /* Device control API: s390-specific devices */ #define KVM_DEV_FLIC_GET_ALL_IRQS 1 diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index c093e6d5f9111ae5be58b0702251e19ecffc98cc..a3ece69d8241fc71cbb3e4820ce2cbd03129e9a6 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -429,5 +429,10 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 114c0569a49aa5e8a50f9834e2487f0d9abc12b7..8c5fd93495ce86a670d024dcc152949430ac5dbe 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -377,5 +377,10 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 2b3a8f7bd2c0493a6fd387ae4cbe68b659a9d214..805ab535f399eff08acc6cc297f7ef3f96265e5e 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -7,6 +7,8 @@ * */ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 @@ -49,7 +50,6 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS -#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -455,8 +455,13 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 -/* attributes for system fd (group 0) */ -#define KVM_X86_XCOMP_GUEST_SUPP 0 +/* vendor-independent attributes for system fd (group 0) */ +#define KVM_X86_GRP_SYSTEM 0 +# define KVM_X86_XCOMP_GUEST_SUPP 0 + +/* vendor-specific groups and attributes for system fd */ +#define KVM_X86_GRP_SEV 1 +# define KVM_X86_SEV_VMSA_FEATURES 0 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -524,9 +529,361 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 -#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) +/* for KVM_CAP_MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; + +/* for KVM_CAP_XEN_HVM */ +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + __u8 runstate_update_flag; + union { + __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) + __u64 hva; + } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u32 xen_version; + __u64 pad[8]; + } u; +}; + + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) + __u64 hva; + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; + __u8 vector; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, + + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + + /* Hygon CSV batch command */ + KVM_CSV_COMMAND_BATCH = 0x18, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u32 pad0; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; + __u16 ghcb_version; + __u16 pad1; + __u32 pad2[8]; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u32 pad0; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u32 pad1; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u32 pad2; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u32 pad3; + __u64 session_uaddr; + __u32 session_len; + __u32 pad4; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + /* * Masked event layout. * Bits Description @@ -547,10 +904,10 @@ struct kvm_pmu_event_filter { ((__u64)(!!(exclude)) << 55)) #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ - (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) -#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) + (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ @@ -558,6 +915,12 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ /* x86-specific KVM_EXIT_HYPERCALL flags. */ -#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) +#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) + +#define KVM_X86_DEFAULT_VM 0 +#define KVM_X86_SW_PROTECTED_VM 1 +#define KVM_X86_SEV_VM 2 +#define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h new file mode 100644 index 0000000000000000000000000000000000000000..1d3e0e0b07a48c24db218fa9e3adb6b6e6fa2266 --- /dev/null +++ b/linux-headers/asm-x86/kvm_para.h @@ -0,0 +1 @@ +#include "standard-headers/asm-x86/kvm_para.h" diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 329649c377be129711ce0f9c76433ace56df4706..5c9c329e9390eedcb76b0baa7353acbf46f8228c 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -447,6 +447,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 4583606ce68420d75bccf5c411da675f860744b3..d9aab7ae87d8648313e3e19c23b419d949de3824 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -369,6 +369,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 146d74d8e4b0146d80ead022189149e27441b8a1..63cdd1ee43dfeb67e72dbaaf098b4187b3b83525 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -321,6 +321,11 @@ #define __NR_futex_wake (__X32_SYSCALL_BIT + 454) #define __NR_futex_wait (__X32_SYSCALL_BIT + 455) #define __NR_futex_requeue (__X32_SYSCALL_BIT + 456) +#define __NR_statmount (__X32_SYSCALL_BIT + 457) +#define __NR_listmount (__X32_SYSCALL_BIT + 458) +#define __NR_lsm_get_self_attr (__X32_SYSCALL_BIT + 459) +#define __NR_lsm_set_self_attr (__X32_SYSCALL_BIT + 460) +#define __NR_lsm_list_modules (__X32_SYSCALL_BIT + 461) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h new file mode 100644 index 0000000000000000000000000000000000000000..d9897771be8c2e94d603d8b7a52a7167b38baa41 --- /dev/null +++ b/linux-headers/linux/bits.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* bits.h: Macros for dealing with bitmasks. */ + +#ifndef _LINUX_BITS_H +#define _LINUX_BITS_H + +#define __GENMASK(h, l) \ + (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ + (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) + +#define __GENMASK_ULL(h, l) \ + (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ + (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) + +#endif /* _LINUX_BITS_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 96bc60475e84fa21accd0c2ce517af237980af6b..b114944549109b264d27173c46b23e67eb265452 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -18,75 +18,10 @@ #define KVM_API_VERSION 12 -/* *** Deprecated interfaces *** */ - -#define KVM_TRC_SHIFT 16 - -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) - -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - -struct kvm_user_trace_setup { - __u32 buf_size; - __u32 buf_nr; -}; - -#define __KVM_DEPRECATED_MAIN_W_0x06 \ - _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) -#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) - -#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) - -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) - -/* *** End of deprecated interfaces *** */ - +/* + * Backwards-compatible definitions. + */ +#define __KVM_HAVE_GUEST_DEBUG /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { @@ -97,6 +32,19 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; +/* for KVM_SET_USER_MEMORY_REGION2 */ +struct kvm_userspace_memory_region2 { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 userspace_addr; + __u64 guest_memfd_offset; + __u32 guest_memfd; + __u32 pad1; + __u64 pad2[14]; +}; + /* * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for * userspace, other bits are reserved for kvm internal use which are defined @@ -104,6 +52,7 @@ struct kvm_userspace_memory_region { */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) +#define KVM_MEM_GUEST_MEMFD (1UL << 2) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -143,43 +92,6 @@ struct kvm_pit_config { #define KVM_PIT_SPEAKER_DUMMY 1 -struct kvm_s390_skeys { - __u64 start_gfn; - __u64 count; - __u64 skeydata_addr; - __u32 flags; - __u32 reserved[9]; -}; - -#define KVM_S390_CMMA_PEEK (1 << 0) - -/** - * kvm_s390_cmma_log - Used for CMMA migration. - * - * Used both for input and output. - * - * @start_gfn: Guest page number to start from. - * @count: Size of the result buffer. - * @flags: Control operation mode via KVM_S390_CMMA_* flags - * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty - * pages are still remaining. - * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set - * in the PGSTE. - * @values: Pointer to the values buffer. - * - * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. - */ -struct kvm_s390_cmma_log { - __u64 start_gfn; - __u32 count; - __u32 flags; - union { - __u64 remaining; - __u64 mask; - }; - __u64 values; -}; - struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 @@ -267,6 +179,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 +#define KVM_EXIT_MEMORY_FAULT 39 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -371,11 +284,6 @@ struct kvm_run { __u32 ipb; } s390_sieic; /* KVM_EXIT_S390_RESET */ -#define KVM_S390_RESET_POR 1 -#define KVM_S390_RESET_CLEAR 2 -#define KVM_S390_RESET_SUBSYSTEM 4 -#define KVM_S390_RESET_CPU_INIT 8 -#define KVM_S390_RESET_IPL 16 __u64 s390_reset_flags; /* KVM_EXIT_S390_UCONTROL */ struct { @@ -517,6 +425,13 @@ struct kvm_run { #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) __u32 flags; } notify; + /* KVM_EXIT_MEMORY_FAULT */ + struct { +#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault; /* Fix the size of the union. */ char padding[256]; }; @@ -583,43 +498,6 @@ struct kvm_translation { __u8 pad[5]; }; -/* for KVM_S390_MEM_OP */ -struct kvm_s390_mem_op { - /* in */ - __u64 gaddr; /* the guest address */ - __u64 flags; /* flags */ - __u32 size; /* amount of bytes */ - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ - union { - struct { - __u8 ar; /* the access register number */ - __u8 key; /* access key, ignored if flag unset */ - __u8 pad1[6]; /* ignored */ - __u64 old_addr; /* ignored if cmpxchg flag unset */ - }; - __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* ignored */ - }; -}; -/* types for kvm_s390_mem_op->op */ -#define KVM_S390_MEMOP_LOGICAL_READ 0 -#define KVM_S390_MEMOP_LOGICAL_WRITE 1 -#define KVM_S390_MEMOP_SIDA_READ 2 -#define KVM_S390_MEMOP_SIDA_WRITE 3 -#define KVM_S390_MEMOP_ABSOLUTE_READ 4 -#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 -#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 - -/* flags for kvm_s390_mem_op->flags */ -#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) -#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) - -/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ -#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) -#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) - /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -684,124 +562,6 @@ struct kvm_mp_state { __u32 mp_state; }; -struct kvm_s390_psw { - __u64 mask; - __u64 addr; -}; - -/* valid values for type in kvm_s390_interrupt */ -#define KVM_S390_SIGP_STOP 0xfffe0000u -#define KVM_S390_PROGRAM_INT 0xfffe0001u -#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u -#define KVM_S390_RESTART 0xfffe0003u -#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u -#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u -#define KVM_S390_MCHK 0xfffe1000u -#define KVM_S390_INT_CLOCK_COMP 0xffff1004u -#define KVM_S390_INT_CPU_TIMER 0xffff1005u -#define KVM_S390_INT_VIRTIO 0xffff2603u -#define KVM_S390_INT_SERVICE 0xffff2401u -#define KVM_S390_INT_EMERGENCY 0xffff1201u -#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u -/* Anything below 0xfffe0000u is taken by INT_IO */ -#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ - (((schid)) | \ - ((ssid) << 16) | \ - ((cssid) << 18) | \ - ((ai) << 26)) -#define KVM_S390_INT_IO_MIN 0x00000000u -#define KVM_S390_INT_IO_MAX 0xfffdffffu -#define KVM_S390_INT_IO_AI_MASK 0x04000000u - - -struct kvm_s390_interrupt { - __u32 type; - __u32 parm; - __u64 parm64; -}; - -struct kvm_s390_io_info { - __u16 subchannel_id; - __u16 subchannel_nr; - __u32 io_int_parm; - __u32 io_int_word; -}; - -struct kvm_s390_ext_info { - __u32 ext_params; - __u32 pad; - __u64 ext_params2; -}; - -struct kvm_s390_pgm_info { - __u64 trans_exc_code; - __u64 mon_code; - __u64 per_address; - __u32 data_exc_code; - __u16 code; - __u16 mon_class_nr; - __u8 per_code; - __u8 per_atmid; - __u8 exc_access_id; - __u8 per_access_id; - __u8 op_access_id; -#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 -#define KVM_S390_PGM_FLAGS_ILC_0 0x02 -#define KVM_S390_PGM_FLAGS_ILC_1 0x04 -#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 -#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 - __u8 flags; - __u8 pad[2]; -}; - -struct kvm_s390_prefix_info { - __u32 address; -}; - -struct kvm_s390_extcall_info { - __u16 code; -}; - -struct kvm_s390_emerg_info { - __u16 code; -}; - -#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 -struct kvm_s390_stop_info { - __u32 flags; -}; - -struct kvm_s390_mchk_info { - __u64 cr14; - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 pad; - __u8 fixed_logout[16]; -}; - -struct kvm_s390_irq { - __u64 type; - union { - struct kvm_s390_io_info io; - struct kvm_s390_ext_info ext; - struct kvm_s390_pgm_info pgm; - struct kvm_s390_emerg_info emerg; - struct kvm_s390_extcall_info extcall; - struct kvm_s390_prefix_info prefix; - struct kvm_s390_stop_info stop; - struct kvm_s390_mchk_info mchk; - char reserved[64]; - } u; -}; - -struct kvm_s390_irq_state { - __u64 buf; - __u32 flags; /* will stay unused for compatibility reasons */ - __u32 len; - __u32 reserved[4]; /* will stay unused for compatibility reasons */ -}; - /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -863,50 +623,6 @@ struct kvm_enable_cap { __u8 pad[64]; }; -/* for KVM_PPC_GET_PVINFO */ - -#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) - -struct kvm_ppc_pvinfo { - /* out */ - __u32 flags; - __u32 hcall[4]; - __u8 pad[108]; -}; - -/* for KVM_PPC_GET_SMMU_INFO */ -#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 - -struct kvm_ppc_one_page_size { - __u32 page_shift; /* Page shift (or 0) */ - __u32 pte_enc; /* Encoding in the HPTE (>>12) */ -}; - -struct kvm_ppc_one_seg_page_size { - __u32 page_shift; /* Base page shift of segment (or 0) */ - __u32 slb_enc; /* SLB encoding for BookS */ - struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 -#define KVM_PPC_1T_SEGMENTS 0x00000002 -#define KVM_PPC_NO_HASH 0x00000004 - -struct kvm_ppc_smmu_info { - __u64 flags; - __u32 slb_size; - __u16 data_keys; /* # storage keys supported for data */ - __u16 instr_keys; /* # storage keys supported for instructions */ - struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ -struct kvm_ppc_resize_hpt { - __u64 flags; - __u32 shift; - __u32 pad; -}; - #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -961,9 +677,6 @@ struct kvm_ppc_resize_hpt { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 -#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 -#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) @@ -990,9 +703,7 @@ struct kvm_ppc_resize_hpt { /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 #define KVM_CAP_USER_NMI 22 -#ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 -#endif #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif @@ -1219,6 +930,12 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 #define KVM_CAP_ARM_RME 300 +#define KVM_CAP_USER_MEMORY2 231 +#define KVM_CAP_MEMORY_FAULT_INFO 232 +#define KVM_CAP_MEMORY_ATTRIBUTES 233 +#define KVM_CAP_GUEST_MEMFD 234 +#define KVM_CAP_VM_TYPES 235 + #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 @@ -1236,8 +953,6 @@ struct kvm_ppc_resize_hpt { #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing_irqchip { __u32 irqchip; __u32 pin; @@ -1302,41 +1017,6 @@ struct kvm_irq_routing { struct kvm_irq_routing_entry entries[]; }; -#endif - -#ifdef KVM_CAP_MCE -/* x86 MCE */ -struct kvm_x86_mce { - __u64 status; - __u64 addr; - __u64 misc; - __u64 mcg_status; - __u8 bank; - __u8 pad1[7]; - __u64 pad2[3]; -}; -#endif - -#ifdef KVM_CAP_XEN_HVM -#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) -#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) -#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) -#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) -#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) -#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) -#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) - -struct kvm_xen_hvm_config { - __u32 flags; - __u32 msr; - __u64 blob_addr_32; - __u64 blob_addr_64; - __u8 blob_size_32; - __u8 blob_size_64; - __u8 pad2[30]; -}; -#endif - #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) /* * Available with KVM_CAP_IRQFD_RESAMPLE @@ -1556,13 +1236,11 @@ struct kvm_user_data { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) -#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) +#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x43, struct kvm_user_data) +#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ + struct kvm_userspace_memory_region2) + /* enable ucontrol for s390 */ -struct kvm_s390_ucas_mapping { - __u64 user_addr; - __u64 vcpu_addr; - __u64 length; -}; #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) @@ -1580,20 +1258,8 @@ struct kvm_s390_ucas_mapping { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) -/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ - struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ - struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) @@ -1621,9 +1287,6 @@ struct kvm_master_dev_info * KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) -/* Available with KVM_CAP_PCI_2_3 */ -#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ - struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ @@ -1680,8 +1343,6 @@ struct kvm_master_dev_info #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -1789,89 +1450,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -struct kvm_s390_pv_sec_parm { - __u64 origin; - __u64 length; -}; - -struct kvm_s390_pv_unp { - __u64 addr; - __u64 size; - __u64 tweak; -}; - -enum pv_cmd_dmp_id { - KVM_PV_DUMP_INIT, - KVM_PV_DUMP_CONFIG_STOR_STATE, - KVM_PV_DUMP_COMPLETE, - KVM_PV_DUMP_CPU, -}; - -struct kvm_s390_pv_dmp { - __u64 subcmd; - __u64 buff_addr; - __u64 buff_len; - __u64 gaddr; /* For dump storage state */ - __u64 reserved[4]; -}; - -enum pv_cmd_info_id { - KVM_PV_INFO_VM, - KVM_PV_INFO_DUMP, -}; - -struct kvm_s390_pv_info_dump { - __u64 dump_cpu_buffer_len; - __u64 dump_config_mem_buffer_per_1m; - __u64 dump_config_finalize_len; -}; - -struct kvm_s390_pv_info_vm { - __u64 inst_calls_list[4]; - __u64 max_cpus; - __u64 max_guests; - __u64 max_guest_addr; - __u64 feature_indication; -}; - -struct kvm_s390_pv_info_header { - __u32 id; - __u32 len_max; - __u32 len_written; - __u32 reserved; -}; - -struct kvm_s390_pv_info { - struct kvm_s390_pv_info_header header; - union { - struct kvm_s390_pv_info_dump dump; - struct kvm_s390_pv_info_vm vm; - }; -}; - -enum pv_cmd_id { - KVM_PV_ENABLE, - KVM_PV_DISABLE, - KVM_PV_SET_SEC_PARMS, - KVM_PV_UNPACK, - KVM_PV_VERIFY, - KVM_PV_PREP_RESET, - KVM_PV_UNSHARE_ALL, - KVM_PV_INFO, - KVM_PV_DUMP, - KVM_PV_ASYNC_CLEANUP_PREPARE, - KVM_PV_ASYNC_CLEANUP_PERFORM, -}; - -struct kvm_pv_cmd { - __u32 cmd; /* Command to be executed */ - __u16 rc; /* Ultravisor return code */ - __u16 rrc; /* Ultravisor return reason code */ - __u64 data; /* Data or address */ - __u32 flags; /* flags for future extensions. Must be 0 for now */ - __u32 reserved[3]; -}; - /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) @@ -1885,58 +1463,6 @@ struct kvm_pv_cmd { #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) -struct kvm_xen_hvm_attr { - __u16 type; - __u16 pad[3]; - union { - __u8 long_mode; - __u8 vector; - __u8 runstate_update_flag; - struct { - __u64 gfn; -#define KVM_XEN_INVALID_GFN ((__u64)-1) - } shared_info; - struct { - __u32 send_port; - __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ - __u32 flags; -#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) -#define KVM_XEN_EVTCHN_UPDATE (1 << 1) -#define KVM_XEN_EVTCHN_RESET (1 << 2) - /* - * Events sent by the guest are either looped back to - * the guest itself (potentially on a different port#) - * or signalled via an eventfd. - */ - union { - struct { - __u32 port; - __u32 vcpu; - __u32 priority; - } port; - struct { - __u32 port; /* Zero for eventfd */ - __s32 fd; - } eventfd; - __u32 padding[4]; - } deliver; - } evtchn; - __u32 xen_version; - __u64 pad[8]; - } u; -}; - - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 -#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 -#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 -#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ -#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 - /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) @@ -1947,157 +1473,6 @@ struct kvm_xen_hvm_attr { #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) -struct kvm_xen_vcpu_attr { - __u16 type; - __u16 pad[3]; - union { - __u64 gpa; -#define KVM_XEN_INVALID_GPA ((__u64)-1) - __u64 pad[8]; - struct { - __u64 state; - __u64 state_entry_time; - __u64 time_running; - __u64 time_runnable; - __u64 time_blocked; - __u64 time_offline; - } runstate; - __u32 vcpu_id; - struct { - __u32 port; - __u32 priority; - __u64 expires_ns; - } timer; - __u8 vector; - } u; -}; - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 -#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 -#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 - -/* Secure Encrypted Virtualization command */ -enum sev_cmd_id { - /* Guest initialization commands */ - KVM_SEV_INIT = 0, - KVM_SEV_ES_INIT, - /* Guest launch commands */ - KVM_SEV_LAUNCH_START, - KVM_SEV_LAUNCH_UPDATE_DATA, - KVM_SEV_LAUNCH_UPDATE_VMSA, - KVM_SEV_LAUNCH_SECRET, - KVM_SEV_LAUNCH_MEASURE, - KVM_SEV_LAUNCH_FINISH, - /* Guest migration commands (outgoing) */ - KVM_SEV_SEND_START, - KVM_SEV_SEND_UPDATE_DATA, - KVM_SEV_SEND_UPDATE_VMSA, - KVM_SEV_SEND_FINISH, - /* Guest migration commands (incoming) */ - KVM_SEV_RECEIVE_START, - KVM_SEV_RECEIVE_UPDATE_DATA, - KVM_SEV_RECEIVE_UPDATE_VMSA, - KVM_SEV_RECEIVE_FINISH, - /* Guest status and debug commands */ - KVM_SEV_GUEST_STATUS, - KVM_SEV_DBG_DECRYPT, - KVM_SEV_DBG_ENCRYPT, - /* Guest certificates commands */ - KVM_SEV_CERT_EXPORT, - /* Attestation report */ - KVM_SEV_GET_ATTESTATION_REPORT, - /* Guest Migration Extension */ - KVM_SEV_SEND_CANCEL, - - /* Hygon CSV batch command */ - KVM_CSV_COMMAND_BATCH = 0x18, - - KVM_SEV_NR_MAX, -}; - -struct kvm_sev_cmd { - __u32 id; - __u64 data; - __u32 error; - __u32 sev_fd; -}; - -struct kvm_sev_launch_start { - __u32 handle; - __u32 policy; - __u64 dh_uaddr; - __u32 dh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_launch_update_data { - __u64 uaddr; - __u32 len; -}; - - -struct kvm_sev_launch_secret { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -struct kvm_sev_launch_measure { - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_guest_status { - __u32 handle; - __u32 policy; - __u32 state; -}; - -struct kvm_sev_dbg { - __u64 src_uaddr; - __u64 dst_uaddr; - __u32 len; -}; - -struct kvm_sev_attestation_report { - __u8 mnonce[16]; - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_send_start { - __u32 policy; - __u64 pdh_cert_uaddr; - __u32 pdh_cert_len; - __u64 plat_certs_uaddr; - __u32 plat_certs_len; - __u64 amd_certs_uaddr; - __u32 amd_certs_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_send_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - struct kvm_sev_send_update_vmsa { __u32 vcpu_id; __u64 hdr_uaddr; @@ -2106,24 +1481,6 @@ struct kvm_sev_send_update_vmsa { __u32 trans_len; }; -struct kvm_sev_receive_start { - __u32 handle; - __u32 policy; - __u64 pdh_uaddr; - __u32 pdh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_receive_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - struct kvm_sev_receive_update_vmsa { __u32 vcpu_id; __u64 hdr_uaddr; @@ -2216,76 +1573,6 @@ struct kvm_csv3_handle_memory { __u32 opcode; }; -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) - -struct kvm_assigned_pci_dev { - __u32 assigned_dev_id; - __u32 busnr; - __u32 devfn; - __u32 flags; - __u32 segnr; - union { - __u32 reserved[11]; - }; -}; - -#define KVM_DEV_IRQ_HOST_INTX (1 << 0) -#define KVM_DEV_IRQ_HOST_MSI (1 << 1) -#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) - -#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) -#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) -#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) - -#define KVM_DEV_IRQ_HOST_MASK 0x00ff -#define KVM_DEV_IRQ_GUEST_MASK 0xff00 - -struct kvm_assigned_irq { - __u32 assigned_dev_id; - __u32 host_irq; /* ignored (legacy field) */ - __u32 guest_irq; - __u32 flags; - union { - __u32 reserved[12]; - }; -}; - -struct kvm_assigned_msix_nr { - __u32 assigned_dev_id; - __u16 entry_nr; - __u16 padding; -}; - -#define KVM_MAX_MSIX_PER_DEV 256 -struct kvm_assigned_msix_entry { - __u32 assigned_dev_id; - __u32 gsi; - __u16 entry; /* The index of entry in the MSI-X table */ - __u16 padding[3]; -}; - -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) - -/* Available with KVM_CAP_ARM_USER_IRQ */ - -/* Bits for run->s.regs.device_irq_level */ -#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) -#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) -#define KVM_ARM_DEV_PMU (1 << 2) - -struct kvm_hyperv_eventfd { - __u32 conn_id; - __s32 fd; - __u32 flags; - __u32 padding[3]; -}; - -#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff -#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) - #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) @@ -2431,33 +1718,6 @@ struct kvm_stats_desc { /* Available with KVM_CAP_S390_ZPCI_OP */ #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) -struct kvm_s390_zpci_op { - /* in */ - __u32 fh; /* target device */ - __u8 op; /* operation to perform */ - __u8 pad[3]; - union { - /* for KVM_S390_ZPCIOP_REG_AEN */ - struct { - __u64 ibv; /* Guest addr of interrupt bit vector */ - __u64 sb; /* Guest addr of summary bit */ - __u32 flags; - __u32 noi; /* Number of interrupts */ - __u8 isc; /* Guest interrupt subclass */ - __u8 sbo; /* Offset of guest summary bit vector */ - __u16 pad; - } reg_aen; - __u64 reserved[8]; - } u; -}; - -/* types for kvm_s390_zpci_op->op */ -#define KVM_S390_ZPCIOP_REG_AEN 0 -#define KVM_S390_ZPCIOP_DEREG_AEN 1 - -/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ -#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) - /* get tmi version */ #define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, uint64_t) #define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001 @@ -2469,4 +1729,24 @@ struct kvm_arm_rmm_psci_complete { __u32 padding[3]; }; +/* Available with KVM_CAP_MEMORY_ATTRIBUTES */ +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; +}; + +#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + +#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + +struct kvm_create_guest_memfd { + __u64 size; + __u64 flags; + __u64 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h new file mode 100644 index 0000000000000000000000000000000000000000..6a1e672259c332c89b41e5e14e81cd3403b784a4 --- /dev/null +++ b/linux-headers/linux/kvm_para.h @@ -0,0 +1,2 @@ +#include "standard-headers/linux/kvm_para.h" +#include diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h index bcb21339ee39fae6739e70c27e98b2d336dcc253..c3046c6bfff5176094074e3ea0e48a27c1065890 100644 --- a/linux-headers/linux/psp-sev.h +++ b/linux-headers/linux/psp-sev.h @@ -28,6 +28,9 @@ enum { SEV_PEK_CERT_IMPORT, SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ SEV_GET_ID2, + SNP_PLATFORM_STATUS, + SNP_COMMIT, + SNP_SET_CONFIG, SEV_MAX, }; @@ -69,6 +72,12 @@ typedef enum { SEV_RET_RESOURCE_LIMIT, SEV_RET_SECURE_DATA_INVALID, SEV_RET_INVALID_KEY = 0x27, + SEV_RET_INVALID_PAGE_SIZE, + SEV_RET_INVALID_PAGE_STATE, + SEV_RET_INVALID_MDATA_ENTRY, + SEV_RET_INVALID_PAGE_OWNER, + SEV_RET_INVALID_PAGE_AEAD_OFLOW, + SEV_RET_RMP_INIT_REQUIRED, SEV_RET_MAX, } sev_ret_code; @@ -155,6 +164,56 @@ struct sev_user_data_get_id2 { __u32 length; /* In/Out */ } __attribute__((packed)); +/** + * struct sev_user_data_snp_status - SNP status + * + * @api_major: API major version + * @api_minor: API minor version + * @state: current platform state + * @is_rmp_initialized: whether RMP is initialized or not + * @rsvd: reserved + * @build_id: firmware build id for the API version + * @mask_chip_id: whether chip id is present in attestation reports or not + * @mask_chip_key: whether attestation reports are signed or not + * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded + * @rsvd1: reserved + * @guest_count: the number of guest currently managed by the firmware + * @current_tcb_version: current TCB version + * @reported_tcb_version: reported TCB version + */ +struct sev_user_data_snp_status { + __u8 api_major; /* Out */ + __u8 api_minor; /* Out */ + __u8 state; /* Out */ + __u8 is_rmp_initialized:1; /* Out */ + __u8 rsvd:7; + __u32 build_id; /* Out */ + __u32 mask_chip_id:1; /* Out */ + __u32 mask_chip_key:1; /* Out */ + __u32 vlek_en:1; /* Out */ + __u32 rsvd1:29; + __u32 guest_count; /* Out */ + __u64 current_tcb_version; /* Out */ + __u64 reported_tcb_version; /* Out */ +} __attribute__((packed)); + +/** + * struct sev_user_data_snp_config - system wide configuration value for SNP. + * + * @reported_tcb: the TCB version to report in the guest attestation report. + * @mask_chip_id: whether chip id is present in attestation reports or not + * @mask_chip_key: whether attestation reports are signed or not + * @rsvd: reserved + * @rsvd1: reserved + */ +struct sev_user_data_snp_config { + __u64 reported_tcb ; /* In */ + __u32 mask_chip_id:1; /* In */ + __u32 mask_chip_key:1; /* In */ + __u32 rsvd:30; /* In */ + __u8 rsvd1[52]; +} __attribute__((packed)); + /** * struct sev_issue_cmd - SEV ioctl parameters * diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 953c75fedae9eac851fed94cac653c29aa493148..4283de22d5b6594f6942b0e48a1195bfd897ed97 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -41,7 +41,8 @@ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ UFFD_FEATURE_WP_UNPOPULATED | \ UFFD_FEATURE_POISON | \ - UFFD_FEATURE_WP_ASYNC) + UFFD_FEATURE_WP_ASYNC | \ + UFFD_FEATURE_MOVE) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -50,6 +51,7 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_MOVE | \ (__u64)1 << _UFFDIO_WRITEPROTECT | \ (__u64)1 << _UFFDIO_CONTINUE | \ (__u64)1 << _UFFDIO_POISON) @@ -73,6 +75,7 @@ #define _UFFDIO_WAKE (0x02) #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_MOVE (0x05) #define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_CONTINUE (0x07) #define _UFFDIO_POISON (0x08) @@ -92,6 +95,8 @@ struct uffdio_copy) #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ struct uffdio_zeropage) +#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \ + struct uffdio_move) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ @@ -222,6 +227,9 @@ struct uffdio_api { * asynchronous mode is supported in which the write fault is * automatically resolved and write-protection is un-set. * It implies UFFD_FEATURE_WP_UNPOPULATED. + * + * UFFD_FEATURE_MOVE indicates that the kernel supports moving an + * existing page contents from userspace. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -239,6 +247,7 @@ struct uffdio_api { #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) #define UFFD_FEATURE_POISON (1<<14) #define UFFD_FEATURE_WP_ASYNC (1<<15) +#define UFFD_FEATURE_MOVE (1<<16) __u64 features; __u64 ioctls; @@ -347,6 +356,24 @@ struct uffdio_poison { __s64 updated; }; +struct uffdio_move { + __u64 dst; + __u64 src; + __u64 len; + /* + * Especially if used to atomically remove memory from the + * address space the wake on the dst range is not needed. + */ +#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0) +#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1) + __u64 mode; + /* + * "move" is written by the ioctl and must be at the end: the + * copy_from_user will not read the last 8 bytes. + */ + __s64 move; +}; + /* * Flags for the userfaultfd(2) system call itself. */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 5b1e2871affae86f15b71e3fb84f73ba0015346e..68f7d446b3d4c539f82ba67015fff95d1bc05385 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -1231,6 +1231,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING_P2P = 5, VFIO_DEVICE_STATE_PRE_COPY = 6, VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, + VFIO_DEVICE_STATE_NR, }; /** diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index f5c05abe8b0a5bba7857318437c375a12014bda3..21072f6f660858d6f8cd0360b067e1365a6713c0 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -183,12 +183,6 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) -/* Get the count of all virtqueues */ -#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) - -/* Get the number of virtqueue groups. */ -#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) - /* Get the number of address spaces. */ #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) @@ -268,4 +262,16 @@ /* set device migtration state */ #define VHOST_VDPA_SET_MIG_STATE _IOW(VHOST_VIRTIO, 0xb2, __u8) +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + +/* Get the queue size of a specific virtqueue. + * userspace set the vring index in vhost_vring_state.index + * kernel set the queue size in vhost_vring_state.num + */ +#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ + struct vhost_vring_state) #endif diff --git a/memory_ldst.c.inc b/memory_ldst.c.inc index 84b868f29464853b60cd070534d1d3a346692a6d..0e6f3940a9a155114f5967500a276059f0f67ca6 100644 --- a/memory_ldst.c.inc +++ b/memory_ldst.c.inc @@ -61,7 +61,7 @@ static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -130,7 +130,7 @@ static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -186,7 +186,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -234,7 +234,7 @@ static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -295,7 +295,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -339,7 +339,7 @@ static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -391,7 +391,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -435,7 +435,7 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -499,7 +499,7 @@ static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } diff --git a/meson.build b/meson.build index d221f5cad53ec5864b35a8b4132d46df4d3c8b57..2f8602e1a3d9f01e59cc7884e1d7a7fddab320c5 100644 --- a/meson.build +++ b/meson.build @@ -435,6 +435,22 @@ if get_option('fuzzing') endif endif +# Check further flags that make QEMU more robust against malicious parties + +hardening_flags = [ + # Zero out registers used during a function call + # upon its return. This makes it harder to assemble + # ROP gadgets into something usable + '-fzero-call-used-regs=used-gpr', + + # Initialize all stack variables to zero. This makes + # it harder to take advantage of uninitialized stack + # data to drive exploits + '-ftrivial-auto-var-init=zero', +] + +qemu_common_flags += cc.get_supported_arguments(hardening_flags) + add_global_arguments(qemu_common_flags, native: false, language: all_languages) add_global_link_arguments(qemu_ldflags, native: false, language: all_languages) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 24347ab0f756a61509954e1d9da103e15fc99242..92e031b6fa74d15623b5c1fffe8e25f4b44296bd 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -774,7 +774,7 @@ static void dirty_bitmap_state_pending(void *opaque, SaveBitmapState *dbms; uint64_t pending = 0; - qemu_mutex_lock_iothread(); + bql_lock(); QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) { uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap); @@ -784,7 +784,7 @@ static void dirty_bitmap_state_pending(void *opaque, pending += DIV_ROUND_UP(sectors * BDRV_SECTOR_SIZE, gran); } - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_dirty_bitmap_state_pending(pending); diff --git a/migration/block.c b/migration/block.c index 710ef6f4909b905a82cd0926c39659f0a6ee14ef..f7e34e5f8d5414174f8157cf24e405b1c13aee9f 100644 --- a/migration/block.c +++ b/migration/block.c @@ -269,7 +269,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) int64_t count; if (bmds->shared_base) { - qemu_mutex_lock_iothread(); + bql_lock(); aio_context_acquire(blk_get_aio_context(bb)); /* Skip unallocated sectors; intentionally treats failure or * partial sector as an allocated sector */ @@ -282,7 +282,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) cur_sector += count >> BDRV_SECTOR_BITS; } aio_context_release(blk_get_aio_context(bb)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } if (cur_sector >= total_sectors) { @@ -321,14 +321,14 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) * This is ugly and will disappear when we make bdrv_* thread-safe, * without the need to acquire the AioContext. */ - qemu_mutex_lock_iothread(); + bql_lock(); aio_context_acquire(blk_get_aio_context(bmds->blk)); bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE, nr_sectors * BDRV_SECTOR_SIZE); blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov, 0, blk_mig_read_cb, blk); aio_context_release(blk_get_aio_context(bmds->blk)); - qemu_mutex_unlock_iothread(); + bql_unlock(); bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); @@ -789,9 +789,9 @@ static int block_save_iterate(QEMUFile *f, void *opaque) /* Always called with iothread lock taken for * simplicity, block_save_complete also calls it. */ - qemu_mutex_lock_iothread(); + bql_lock(); ret = blk_mig_save_dirty_block(f, 1); - qemu_mutex_unlock_iothread(); + bql_unlock(); } if (ret < 0) { return ret; @@ -863,9 +863,9 @@ static void block_state_pending(void *opaque, uint64_t *must_precopy, /* Estimate pending number of bytes to send */ uint64_t pending; - qemu_mutex_lock_iothread(); + bql_lock(); pending = get_remaining_dirty(); - qemu_mutex_unlock_iothread(); + bql_unlock(); blk_mig_lock(); pending += block_mig_state.submitted * BLK_MIG_BLOCK_SIZE + diff --git a/migration/colo.c b/migration/colo.c index 8f301b7e57164d484b0ffb28778ba82269611e60..27cb3a9beacafacdaa31e6136ae2c58203186f39 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -420,13 +420,13 @@ static int colo_do_checkpoint_transaction(MigrationState *s, qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); bioc->usage = 0; - qemu_mutex_lock_iothread(); + bql_lock(); if (failover_get_state() != FAILOVER_STATUS_NONE) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } vm_stop_force_state(RUN_STATE_COLO); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("run", "stop"); /* * Failover request bh could be called after vm_stop_force_state(), @@ -435,23 +435,23 @@ static int colo_do_checkpoint_transaction(MigrationState *s, if (failover_get_state() != FAILOVER_STATUS_NONE) { goto out; } - qemu_mutex_lock_iothread(); + bql_lock(); replication_do_checkpoint_all(&local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } /* Note: device state is saved into buffer */ ret = qemu_save_device_state(fb); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (ret < 0) { goto out; } @@ -504,9 +504,9 @@ static int colo_do_checkpoint_transaction(MigrationState *s, ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); out: @@ -557,15 +557,15 @@ static void colo_process_checkpoint(MigrationState *s) fb = qemu_file_new_output(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); - qemu_mutex_lock_iothread(); + bql_lock(); replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + @@ -639,14 +639,14 @@ out: void migrate_start_colo_process(MigrationState *s) { - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_event_init(&s->colo_checkpoint_event, false); s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, colo_checkpoint_notify, s); qemu_sem_init(&s->colo_exit_sem, 0); colo_process_checkpoint(s); - qemu_mutex_lock_iothread(); + bql_lock(); } static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, @@ -657,9 +657,9 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, Error *local_err = NULL; int ret; - qemu_mutex_lock_iothread(); + bql_lock(); vm_stop_force_state(RUN_STATE_COLO); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("run", "stop"); /* FIXME: This is unnecessary for periodic checkpoint mode */ @@ -677,10 +677,10 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, return; } - qemu_mutex_lock_iothread(); + bql_lock(); cpu_synchronize_all_states(); ret = qemu_loadvm_state_main(mis->from_src_file, mis); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (ret < 0) { error_setg(errp, "Load VM's live state (ram) error"); @@ -719,14 +719,14 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, return; } - qemu_mutex_lock_iothread(); + bql_lock(); vmstate_loading = true; colo_flush_ram_cache(); ret = qemu_load_device_state(fb); if (ret < 0) { error_setg(errp, "COLO: load device state failed"); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } @@ -734,7 +734,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } @@ -743,7 +743,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } /* Notify all filters of all NIC to do checkpoint */ @@ -752,13 +752,13 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } vmstate_loading = false; vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { @@ -831,14 +831,14 @@ static void *colo_process_incoming_thread(void *opaque) } /* Make sure all file formats throw away their mutable metadata */ - qemu_mutex_lock_iothread(); + bql_lock(); bdrv_activate_all(&local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); error_report_err(local_err); return NULL; } - qemu_mutex_unlock_iothread(); + bql_unlock(); failover_init_state(); @@ -861,14 +861,14 @@ static void *colo_process_incoming_thread(void *opaque) fb = qemu_file_new_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); - qemu_mutex_lock_iothread(); + bql_lock(); replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, @@ -929,7 +929,7 @@ int coroutine_fn colo_incoming_co(void) MigrationIncomingState *mis = migration_incoming_get_current(); QemuThread th; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (!migration_incoming_colo_enabled()) { return 0; @@ -942,10 +942,10 @@ int coroutine_fn colo_incoming_co(void) qemu_coroutine_yield(); mis->colo_incoming_co = NULL; - qemu_mutex_unlock_iothread(); + bql_unlock(); /* Wait checkpoint incoming thread exit before free resource */ qemu_thread_join(&th); - qemu_mutex_lock_iothread(); + bql_lock(); /* We hold the global iothread lock, so it is safe here */ colo_release_ram_cache(); diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 036ac017fc91f111dcfca1b2dc34bf8be8cb0648..429d10c4d9ce91042f70fab745d624b2173f91c6 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -90,13 +90,13 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages, void global_dirty_log_change(unsigned int flag, bool start) { - qemu_mutex_lock_iothread(); + bql_lock(); if (start) { memory_global_dirty_log_start(flag); } else { memory_global_dirty_log_stop(flag); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* @@ -106,12 +106,12 @@ void global_dirty_log_change(unsigned int flag, bool start) */ static void global_dirty_log_sync(unsigned int flag, bool one_shot) { - qemu_mutex_lock_iothread(); + bql_lock(); memory_global_dirty_log_sync(false); if (one_shot) { memory_global_dirty_log_stop(flag); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat) @@ -610,7 +610,7 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) int64_t start_time; DirtyPageRecord dirty_pages; - qemu_mutex_lock_iothread(); + bql_lock(); memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); /* @@ -627,7 +627,7 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled. */ dirtyrate_manual_reset_protect(); - qemu_mutex_unlock_iothread(); + bql_unlock(); record_dirtypages_bitmap(&dirty_pages, true); diff --git a/migration/migration.c b/migration/migration.c index 526f926b797279958db61c1b163e60f76577e384..7e3202bd432245f44cd4de6f61100f02deee0464 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -67,6 +67,7 @@ #include "options.h" #include "sysemu/dirtylimit.h" #include "qemu/sockets.h" +#include "sysemu/kvm.h" #define DEFAULT_FD_MAX 4096 @@ -1311,12 +1312,12 @@ static void migrate_fd_cleanup(MigrationState *s) QEMUFile *tmp; trace_migrate_fd_cleanup(); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (s->migration_thread_running) { qemu_thread_join(&s->thread); s->migration_thread_running = false; } - qemu_mutex_lock_iothread(); + bql_lock(); multifd_send_shutdown(); qemu_mutex_lock(&s->qemu_file_lock); @@ -1937,6 +1938,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (kvm_hwpoisoned_mem()) { + error_setg(errp, "Can't migrate this vm with hardware poisoned memory, " + "please reboot the vm and try again"); + return false; + } + if (migration_is_blocked(errp)) { return false; } @@ -2455,7 +2462,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) } trace_postcopy_start(); - qemu_mutex_lock_iothread(); + bql_lock(); trace_postcopy_start_set_run(); migration_downtime_start(ms); @@ -2564,7 +2571,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) migration_downtime_end(ms); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (migrate_postcopy_ram()) { /* @@ -2605,7 +2612,7 @@ fail: error_report_err(local_err); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); return -1; } @@ -2639,14 +2646,14 @@ static int migration_maybe_pause(MigrationState *s, * wait for the 'pause_sem' semaphore. */ if (s->state != MIGRATION_STATUS_CANCELLING) { - qemu_mutex_unlock_iothread(); + bql_unlock(); migrate_set_state(&s->state, *current_active_state, MIGRATION_STATUS_PRE_SWITCHOVER); qemu_sem_wait(&s->pause_sem); migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, new_state); *current_active_state = new_state; - qemu_mutex_lock_iothread(); + bql_lock(); } return s->state == new_state ? 0 : -EINVAL; @@ -2657,7 +2664,7 @@ static int migration_completion_precopy(MigrationState *s, { int ret; - qemu_mutex_lock_iothread(); + bql_lock(); migration_downtime_start(s); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); @@ -2685,7 +2692,7 @@ static int migration_completion_precopy(MigrationState *s, ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, s->block_inactive); out_unlock: - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -2693,9 +2700,9 @@ static void migration_completion_postcopy(MigrationState *s) { trace_migration_completion_postcopy_end(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_complete_postcopy(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* * Shutdown the postcopy fast path thread. This is only needed when dest @@ -2719,14 +2726,14 @@ static void migration_completion_failed(MigrationState *s, */ Error *local_err = NULL; - qemu_mutex_lock_iothread(); + bql_lock(); bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } else { s->block_inactive = false; } - qemu_mutex_unlock_iothread(); + bql_unlock(); } migrate_set_state(&s->state, current_active_state, @@ -3166,7 +3173,7 @@ static void migration_iteration_finish(MigrationState *s) /* If we enabled cpu throttling for auto-converge, turn it off. */ cpu_throttle_stop(); - qemu_mutex_lock_iothread(); + bql_lock(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: migration_calculate_complete(s); @@ -3197,7 +3204,7 @@ static void migration_iteration_finish(MigrationState *s) break; } migrate_fd_cleanup_schedule(s); - qemu_mutex_unlock_iothread(); + bql_unlock(); } static void bg_migration_iteration_finish(MigrationState *s) @@ -3209,7 +3216,7 @@ static void bg_migration_iteration_finish(MigrationState *s) */ ram_write_tracking_stop(); - qemu_mutex_lock_iothread(); + bql_lock(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: migration_calculate_complete(s); @@ -3228,7 +3235,7 @@ static void bg_migration_iteration_finish(MigrationState *s) } migrate_fd_cleanup_schedule(s); - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* @@ -3357,9 +3364,9 @@ static void *migration_thread(void *opaque) goto out; } - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_header(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* * If we opened the return path, we need to make sure dst has it @@ -3387,9 +3394,9 @@ static void *migration_thread(void *opaque) qemu_savevm_send_colo_enable(s->to_dst_file); } - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_setup(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -3501,10 +3508,10 @@ static void *bg_migration_thread(void *opaque) ram_write_tracking_prepare(); #endif - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_header(s->to_dst_file); qemu_savevm_state_setup(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -3514,7 +3521,7 @@ static void *bg_migration_thread(void *opaque) trace_migration_thread_setup_complete(); migration_downtime_start(s); - qemu_mutex_lock_iothread(); + bql_lock(); /* * If VM is currently in suspended state, then, to make a valid runstate @@ -3557,7 +3564,7 @@ static void *bg_migration_thread(void *opaque) s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); qemu_bh_schedule(s->vm_start_bh); - qemu_mutex_unlock_iothread(); + bql_unlock(); while (migration_is_active(s)) { MigIterateState iter_state = bg_migration_iteration_run(s); @@ -3586,7 +3593,7 @@ fail: if (early_fail) { migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); - qemu_mutex_unlock_iothread(); + bql_unlock(); } bg_migration_iteration_finish(s); diff --git a/migration/ram.c b/migration/ram.c index e6baecf14306c74b3d1a84e4372aeadeb4689e1e..26708cf3f2fe4028098d33adb8bf8da75f0c8539 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3443,9 +3443,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) migration_ops->ram_save_target_page = ram_save_target_page_legacy; } - qemu_mutex_unlock_iothread(); + bql_unlock(); ret = multifd_send_sync_main(); - qemu_mutex_lock_iothread(); + bql_lock(); if (ret < 0) { return ret; } @@ -3702,11 +3702,11 @@ static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy, uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; if (!migration_in_postcopy() && remaining_size < s->threshold_size) { - qemu_mutex_lock_iothread(); + bql_lock(); WITH_RCU_READ_LOCK_GUARD() { migration_bitmap_sync_precopy(rs, false); } - qemu_mutex_unlock_iothread(); + bql_unlock(); remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } @@ -3934,7 +3934,7 @@ void colo_incoming_start_dirty_log(void) { RAMBlock *block = NULL; /* For memory_global_dirty_log_start below. */ - qemu_mutex_lock_iothread(); + bql_lock(); qemu_mutex_lock_ramlist(); memory_global_dirty_log_sync(false); @@ -3948,7 +3948,7 @@ void colo_incoming_start_dirty_log(void) } ram_state->migration_dirty_pages = 0; qemu_mutex_unlock_ramlist(); - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* It is need to hold the global lock to call this helper */ diff --git a/migration/savevm.c b/migration/savevm.c index bde05eebe4754feb537e7b23de0bee0e48f89cd7..563a4e55991cb897079599b932cb3a2838f7b23f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2857,7 +2857,7 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) uint8_t section_type; int ret = 0; - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { memory_region_transaction_begin(); } @@ -2904,7 +2904,7 @@ retry: } out: - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { memory_region_transaction_commit(); } if (ret < 0) { diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 3df0062f3dc872bd50c0e014b243f6e3fb088322..e9952b218cc6f719bf9dcd4aa21491fb5a9999cb 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -47,6 +47,50 @@ 'send-update', 'receive-update' ], 'if': 'TARGET_I386' } +## +# @SevGuestType: +# +# An enumeration indicating the type of SEV guest being run. +# +# @sev: The guest is a legacy SEV or SEV-ES guest. +# +# @sev-snp: The guest is an SEV-SNP guest. +# +# Since: 6.2 +## +{ 'enum': 'SevGuestType', + 'data': [ 'sev', 'sev-snp' ], + 'if': 'TARGET_I386' } + +## +# @SevGuestInfo: +# +# Information specific to legacy SEV/SEV-ES guests. +# +# @policy: SEV policy value +# +# @handle: SEV firmware handle +# +# Since: 2.12 +## +{ 'struct': 'SevGuestInfo', + 'data': { 'policy': 'uint32', + 'handle': 'uint32' }, + 'if': 'TARGET_I386' } + +## +# @SevSnpGuestInfo: +# +# Information specific to SEV-SNP guests. +# +# @snp-policy: SEV-SNP policy value +# +# Since: 9.1 +## +{ 'struct': 'SevSnpGuestInfo', + 'data': { 'snp-policy': 'uint64' }, + 'if': 'TARGET_I386' } + ## # @SevInfo: # @@ -60,25 +104,25 @@ # # @build-id: SEV FW build id # -# @policy: SEV policy value -# # @state: SEV guest state # -# @handle: SEV firmware handle +# @sev-type: Type of SEV guest being run # # Since: 2.12 ## -{ 'struct': 'SevInfo', - 'data': { 'enabled': 'bool', - 'api-major': 'uint8', - 'api-minor' : 'uint8', - 'build-id' : 'uint8', - 'policy' : 'uint32', - 'state' : 'SevState', - 'handle' : 'uint32' - }, - 'if': 'TARGET_I386' -} +{ 'union': 'SevInfo', + 'base': { 'enabled': 'bool', + 'api-major': 'uint8', + 'api-minor' : 'uint8', + 'build-id' : 'uint8', + 'state' : 'SevState', + 'sev-type' : 'SevGuestType' }, + 'discriminator': 'sev-type', + 'data': { + 'sev': 'SevGuestInfo', + 'sev-snp': 'SevSnpGuestInfo' }, + 'if': 'TARGET_I386' } + ## # @query-sev: diff --git a/qapi/qom.json b/qapi/qom.json index e0590a6019dfbca03db02463d0170da3d953d70e..12e526aeb7142da0932d35224958107c2e390d16 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -860,20 +860,12 @@ 'data': { '*filename': 'str' } } ## -# @SevGuestProperties: +# @SevCommonProperties: # -# Properties for sev-guest objects. +# Properties common to objects that are derivatives of sev-common. # # @sev-device: SEV device to use (default: "/dev/sev") # -# @dh-cert-file: guest owners DH certificate (encoded with base64) -# -# @session-file: guest owners session parameters (encoded with base64) -# -# @policy: SEV policy value (default: 0x1) -# -# @handle: SEV firmware handle (default: 0) -# # @cbitpos: C-bit location in page table entry (default: 0) # # @reduced-phys-bits: number of bits in physical addresses that become @@ -883,6 +875,27 @@ # designated guest firmware page for measured boot with -kernel # (default: false) (since 6.2) # +# Since: 9.1 +## +{ 'struct': 'SevCommonProperties', + 'data': { '*sev-device': 'str', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', + '*kernel-hashes': 'bool' } } + +## +# @SevGuestProperties: +# +# Properties for sev-guest objects. +# +# @dh-cert-file: guest owners DH certificate (encoded with base64) +# +# @session-file: guest owners session parameters (encoded with base64) +# +# @policy: SEV policy value (default: 0x1) +# +# @handle: SEV firmware handle (default: 0) +# # @user-id: the user id of the guest owner, only support on Hygon CPUs # (since 8.2) # @@ -890,21 +903,86 @@ # support on Hygon CPUs (since 8.2) # @secret-file: the file guest owner's secret, only support on Hygon # CPUs (since 8.2) +# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. +# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs +# additional vCPU state when initializing the VMSA structures, +# which will result in a different guest measurement. Set +# this to 'on' to force compatibility with older QEMU or kernel +# versions that rely on legacy KVM_SEV_INIT behavior. 'auto' +# will behave identically to 'on', but will automatically +# switch to using KVM_SEV_INIT2 if the user specifies any +# additional options that require it. If set to 'off', QEMU +# will require KVM_SEV_INIT2 unconditionally. +# (default: off) (since 9.1) # # Since: 2.12 ## { 'struct': 'SevGuestProperties', - 'data': { '*sev-device': 'str', - '*dh-cert-file': 'str', + 'base': 'SevCommonProperties', + 'data': { '*dh-cert-file': 'str', '*session-file': 'str', '*policy': 'uint32', '*handle': 'uint32', - '*cbitpos': 'uint32', - 'reduced-phys-bits': 'uint32', - '*kernel-hashes': 'bool', '*user-id': 'str', '*secret-header-file': 'str', - '*secret-file': 'str' } } + '*secret-file': 'str', + '*legacy-vm-type': 'OnOffAuto' } } + +## +# @SevSnpGuestProperties: +# +# Properties for sev-snp-guest objects. Most of these are direct +# arguments for the KVM_SNP_* interfaces documented in the Linux +# kernel source under +# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn +# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands +# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9). +# +# More usage information is also available in the QEMU source tree +# under docs/amd-memory-encryption. +# +# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as +# defined in the SEV-SNP firmware ABI (default: 0x30000) +# +# @guest-visible-workarounds: 16-byte, base64-encoded blob to report +# hypervisor-defined workarounds, corresponding to the 'GOSVW' +# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP +# firmware ABI (default: all-zero) +# +# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block' +# structure for the SNP_LAUNCH_FINISH command defined in the +# SEV-SNP firmware ABI (default: all-zero) +# +# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID +# Authentication Information Structure' for the SNP_LAUNCH_FINISH +# command defined in the SEV-SNP firmware ABI (default: all-zero) +# +# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY' +# field defined SEV-SNP firmware ABI (default: false) +# +# @host-data: 32-byte, base64-encoded, user-defined blob to provide to +# the guest, as documented for the 'HOST_DATA' parameter of the +# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default: +# all-zero) +# +# @vcek-disabled: Guests are by default allowed to choose between VLEK +# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip +# Endorsement Key) when requesting attestation reports from +# firmware. Set this to true to disable the use of VCEK. +# (default: false) (since: 9.1) +# +# Since: 9.1 +## +{ 'struct': 'SevSnpGuestProperties', + 'base': 'SevCommonProperties', + 'data': { + '*policy': 'uint64', + '*guest-visible-workarounds': 'str', + '*id-block': 'str', + '*id-auth': 'str', + '*author-key-enabled': 'bool', + '*host-data': 'str', + '*vcek-disabled': 'bool' } } ## # @ThreadContextProperties: @@ -1047,6 +1125,7 @@ { 'name': 'secret_keyring', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest', + 'sev-snp-guest', 'thread-context', 's390-pv-guest', 'throttle-group', @@ -1118,6 +1197,7 @@ 'secret_keyring': { 'type': 'SecretKeyringProperties', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest': 'SevGuestProperties', + 'sev-snp-guest': 'SevSnpGuestProperties', 'thread-context': 'ThreadContextProperties', 'throttle-group': 'ThrottleGroupProperties', 'tls-creds-anon': 'TlsCredsAnonProperties', diff --git a/replay/replay-internal.c b/replay/replay-internal.c index 77d0c82327ed5aeeee63f446bddc147d592af7eb..3e08e381cbbf23b667e6f2a0d696e438c74c75c6 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -216,7 +216,7 @@ void replay_mutex_lock(void) { if (replay_mode != REPLAY_MODE_NONE) { unsigned long id; - g_assert(!qemu_mutex_iothread_locked()); + g_assert(!bql_locked()); g_assert(!replay_mutex_locked()); qemu_mutex_lock(&lock); id = mutex_tail++; diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 88c76b8f69b07e9b7b587e27e87578686f99a7a9..6586201aab7125af7a7ea804bf339843b723b7c9 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -27,6 +27,8 @@ # types like "__u64". This work is done in the cp_portable function. tmpdir=$(mktemp -d) +hdrdir="$tmpdir/headers" +blddir="$tmpdir/build" linux="$1" output="$2" @@ -61,7 +63,8 @@ cp_portable() { -e 'linux/const' \ -e 'linux/kernel' \ -e 'linux/sysinfo' \ - -e 'asm-generic/kvm_para' \ + -e 'asm/setup_data.h' \ + -e 'asm/kvm_para.h' \ > /dev/null then echo "Unexpected #include in input file $f". @@ -69,6 +72,15 @@ cp_portable() { fi header=$(basename "$f"); + + if test -z "$arch"; then + # Let users of include/standard-headers/linux/ headers pick the + # asm-* header that they care about + arch_cmd='/]*\)>/d' + else + arch_cmd='s/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' + fi + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ @@ -77,6 +89,7 @@ cp_portable() { -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ + -e "$arch_cmd" \ -e 's/__bitwise//' \ -e 's/__attribute__((packed))/QEMU_PACKED/' \ -e 's/__inline__/inline/' \ @@ -110,70 +123,81 @@ for arch in $ARCHLIST; do arch_var=ARCH fi - make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install + make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" for header in kvm.h unistd.h bitsperlong.h mman.h; do - cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" + cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" done if [ $arch = mips ]; then - cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" - cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" - cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" - cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" fi if [ $arch = powerpc ]; then - cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" - cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" fi rm -rf "$output/include/standard-headers/asm-$arch" mkdir -p "$output/include/standard-headers/asm-$arch" if [ $arch = s390 ]; then - cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" - cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" - cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" + cp_portable "$hdrdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" fi if [ $arch = arm ]; then - cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" - cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" - cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" + cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" + cp "$hdrdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" fi if [ $arch = arm64 ]; then - cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" + cp "$hdrdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" fi if [ $arch = x86 ]; then - cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" - cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" - cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" - cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" + + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + cat <$output/linux-headers/asm-$arch/kvm_para.h +#include "standard-headers/asm-$arch/kvm_para.h" +EOF + # Remove everything except the macros from bootparam.h avoiding the # unnecessary import of several video/ist/etc headers sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ - "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" - cp_portable "$tmpdir/bootparam.h" \ + "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h" + cp_portable "$hdrdir/bootparam.h" \ "$output/include/standard-headers/asm-$arch" + cp_portable "$hdrdir/include/asm/setup_data.h" \ + "$output/standard-headers/asm-x86" fi if [ $arch = loongarch ]; then cp "$hdrdir/include/asm/kvm_para.h" "$output/linux-headers/asm-loongarch/" cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-loongarch/" fi + if [ $arch = riscv ]; then + cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi done +arch= rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ - vduse.h iommufd.h; do - cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" + vduse.h iommufd.h bits.h; do + cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" done rm -rf "$output/linux-headers/asm-generic" mkdir -p "$output/linux-headers/asm-generic" for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do - cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" + cp "$hdrdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" done if [ -L "$linux/source" ]; then @@ -196,6 +220,10 @@ if [ -d "$linux/LICENSES" ]; then done fi +cat <$output/linux-headers/linux/kvm_para.h +#include "standard-headers/linux/kvm_para.h" +#include +EOF cat <$output/linux-headers/linux/virtio_config.h #include "standard-headers/linux/virtio_config.h" EOF @@ -208,23 +236,24 @@ EOF rm -rf "$output/include/standard-headers/linux" mkdir -p "$output/include/standard-headers/linux" -for i in "$tmpdir"/include/linux/*virtio*.h \ - "$tmpdir/include/linux/qemu_fw_cfg.h" \ - "$tmpdir/include/linux/fuse.h" \ - "$tmpdir/include/linux/input.h" \ - "$tmpdir/include/linux/input-event-codes.h" \ - "$tmpdir/include/linux/udmabuf.h" \ - "$tmpdir/include/linux/pci_regs.h" \ - "$tmpdir/include/linux/ethtool.h" \ - "$tmpdir/include/linux/const.h" \ - "$tmpdir/include/linux/kernel.h" \ - "$tmpdir/include/linux/vhost_types.h" \ - "$tmpdir/include/linux/sysinfo.h" \ - "$tmpdir/include/misc/pvpanic.h"; do +for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/qemu_fw_cfg.h" \ + "$hdrdir/include/linux/fuse.h" \ + "$hdrdir/include/linux/input.h" \ + "$hdrdir/include/linux/input-event-codes.h" \ + "$hdrdir/include/linux/udmabuf.h" \ + "$hdrdir/include/linux/pci_regs.h" \ + "$hdrdir/include/linux/ethtool.h" \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/kvm_para.h" \ + "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/sysinfo.h" \ + "$hdrdir/include/misc/pvpanic.h"; do cp_portable "$i" "$output/include/standard-headers/linux" done mkdir -p "$output/include/standard-headers/drm" -cp_portable "$tmpdir/include/drm/drm_fourcc.h" \ +cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ "$output/include/standard-headers/drm" rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" diff --git a/semihosting/console.c b/semihosting/console.c index 5d61e8207e26a7703dac45968bfbc0fb548b4b1c..60102bbab6657035d41334d27e0f4742e7af5332 100644 --- a/semihosting/console.c +++ b/semihosting/console.c @@ -43,7 +43,7 @@ static SemihostingConsole console; static int console_can_read(void *opaque) { SemihostingConsole *c = opaque; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); return (int)fifo8_num_free(&c->fifo); } @@ -58,7 +58,7 @@ static void console_wake_up(gpointer data, gpointer user_data) static void console_read(void *opaque, const uint8_t *buf, int size) { SemihostingConsole *c = opaque; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); while (size-- && !fifo8_is_full(&c->fifo)) { fifo8_push(&c->fifo, *buf++); } @@ -70,7 +70,7 @@ bool qemu_semihosting_console_ready(void) { SemihostingConsole *c = &console; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); return !fifo8_is_empty(&c->fifo); } @@ -78,7 +78,7 @@ void qemu_semihosting_console_block_until_ready(CPUState *cs) { SemihostingConsole *c = &console; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* Block if the fifo is completely empty. */ if (fifo8_is_empty(&c->fifo)) { diff --git a/stubs/iothread-lock.c b/stubs/iothread-lock.c index 5b45b7fc8b905701f7b30fd769032dcc36f2b7e0..d7890e5581c5c29b69d18e9e27f509e453c739e5 100644 --- a/stubs/iothread-lock.c +++ b/stubs/iothread-lock.c @@ -1,15 +1,15 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" -bool qemu_mutex_iothread_locked(void) +bool bql_locked(void) { return false; } -void qemu_mutex_lock_iothread_impl(const char *file, int line) +void bql_lock_impl(const char *file, int line) { } -void qemu_mutex_unlock_iothread(void) +void bql_unlock(void) { } diff --git a/system/cpu-throttle.c b/system/cpu-throttle.c index d9bb30a223d84e4b460dbdfaeec5da010659cd4e..786a9a5639c2fa502f693bec239bf0d0f16c1030 100644 --- a/system/cpu-throttle.c +++ b/system/cpu-throttle.c @@ -57,9 +57,9 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) qemu_cond_timedwait_iothread(cpu->halt_cond, sleeptime_ns / SCALE_MS); } else { - qemu_mutex_unlock_iothread(); + bql_unlock(); g_usleep(sleeptime_ns / SCALE_US); - qemu_mutex_lock_iothread(); + bql_lock(); } sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } diff --git a/system/cpus.c b/system/cpus.c index d9de09b9e87b5f5d4eab095e38f7ca2ed942b2ba..27622356c9f87f89dc6770608bf9fddd0fe3ce6c 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -65,7 +65,8 @@ #endif /* CONFIG_LINUX */ -static QemuMutex qemu_global_mutex; +/* The Big QEMU Lock (BQL) */ +static QemuMutex bql; /* * The chosen accelerator is supposed to register this. @@ -403,14 +404,14 @@ void qemu_init_cpu_loop(void) qemu_init_sigbus(); qemu_cond_init(&qemu_cpu_cond); qemu_cond_init(&qemu_pause_cond); - qemu_mutex_init(&qemu_global_mutex); + qemu_mutex_init(&bql); qemu_thread_get_self(&io_thread); } void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data) { - do_run_on_cpu(cpu, func, data, &qemu_global_mutex); + do_run_on_cpu(cpu, func, data, &bql); } static void qemu_cpu_stop(CPUState *cpu, bool exit) @@ -442,7 +443,7 @@ void qemu_wait_io_event(CPUState *cpu) slept = true; qemu_plugin_vcpu_idle_cb(cpu); } - qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); + qemu_cond_wait(cpu->halt_cond, &bql); } if (slept) { qemu_plugin_vcpu_resume_cb(cpu); @@ -495,46 +496,46 @@ bool qemu_in_vcpu_thread(void) return current_cpu && qemu_cpu_is_self(current_cpu); } -QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) +QEMU_DEFINE_STATIC_CO_TLS(bool, bql_locked) -bool qemu_mutex_iothread_locked(void) +bool bql_locked(void) { - return get_iothread_locked(); + return get_bql_locked(); } bool qemu_in_main_thread(void) { - return qemu_mutex_iothread_locked(); + return bql_locked(); } /* * The BQL is taken from so many places that it is worth profiling the * callers directly, instead of funneling them all through a single function. */ -void qemu_mutex_lock_iothread_impl(const char *file, int line) +void bql_lock_impl(const char *file, int line) { - QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func); + QemuMutexLockFunc bql_lock_fn = qatomic_read(&bql_mutex_lock_func); - g_assert(!qemu_mutex_iothread_locked()); - bql_lock(&qemu_global_mutex, file, line); - set_iothread_locked(true); + g_assert(!bql_locked()); + bql_lock_fn(&bql, file, line); + set_bql_locked(true); } -void qemu_mutex_unlock_iothread(void) +void bql_unlock(void) { - g_assert(qemu_mutex_iothread_locked()); - set_iothread_locked(false); - qemu_mutex_unlock(&qemu_global_mutex); + g_assert(bql_locked()); + set_bql_locked(false); + qemu_mutex_unlock(&bql); } void qemu_cond_wait_iothread(QemuCond *cond) { - qemu_cond_wait(cond, &qemu_global_mutex); + qemu_cond_wait(cond, &bql); } void qemu_cond_timedwait_iothread(QemuCond *cond, int ms) { - qemu_cond_timedwait(cond, &qemu_global_mutex, ms); + qemu_cond_timedwait(cond, &bql, ms); } /* signal CPU creation */ @@ -595,7 +596,7 @@ retry: replay_mutex_unlock(); while (!all_vcpus_paused()) { - qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); + qemu_cond_wait(&qemu_pause_cond, &bql); /* During we waited on qemu_pause_cond the bql was unlocked, * the vcpu's state may has been changed by other thread, so * we must request the pause state on all vcpus again. @@ -603,9 +604,9 @@ retry: request_pause_all_vcpus(); } - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); /* During the bql was unlocked, the vcpu's state may has been * changed by other thread, so we must retry. @@ -644,9 +645,9 @@ void cpu_remove_sync(CPUState *cpu) cpu->stop = true; cpu->unplug = true; qemu_cpu_kick(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_thread_join(cpu->thread); - qemu_mutex_lock_iothread(); + bql_lock(); } void cpus_register_accel(const AccelOpsClass *ops) @@ -685,7 +686,7 @@ void qemu_init_vcpu(CPUState *cpu) cpus_accel->create_vcpu_thread(cpu); while (!cpu->created) { - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); + qemu_cond_wait(&qemu_cpu_cond, &bql); } } diff --git a/system/dirtylimit.c b/system/dirtylimit.c index 495c7a7082ff190ec12a0d13611bac260a1d0d83..b5607eb8c272a4b5456f3f903c6396dd307381f6 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -148,9 +148,9 @@ void vcpu_dirty_rate_stat_stop(void) { qatomic_set(&vcpu_dirty_rate_stat->running, 0); dirtylimit_state_unlock(); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_thread_join(&vcpu_dirty_rate_stat->thread); - qemu_mutex_lock_iothread(); + bql_lock(); dirtylimit_state_lock(); } diff --git a/system/memory.c b/system/memory.c index fa990097013936c6e937a11923dc9e16165abbcf..c394957dbbfa02705e811879f79c965009b7c6fa 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1169,7 +1169,7 @@ void memory_region_commit(void) void memory_region_transaction_commit(void) { assert(memory_region_transaction_depth); - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); --memory_region_transaction_depth; if (!memory_region_transaction_depth) { @@ -1579,16 +1579,17 @@ void memory_region_init_io(MemoryRegion *mr, mr->terminates = true; } -void memory_region_init_ram_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, Error **errp) { - memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); + return memory_region_init_ram_flags_nomigrate(mr, owner, name, + size, 0, errp); } -void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1605,10 +1606,12 @@ void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } -void memory_region_init_resizeable_ram(MemoryRegion *mr, +bool memory_region_init_resizeable_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1629,11 +1632,13 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } #ifdef CONFIG_POSIX -void memory_region_init_ram_from_file(MemoryRegion *mr, +bool memory_region_init_ram_from_file(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1656,10 +1661,12 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } -void memory_region_init_ram_from_fd(MemoryRegion *mr, +bool memory_region_init_ram_from_fd(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1680,7 +1687,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } #endif @@ -1731,17 +1740,22 @@ void memory_region_init_alias(MemoryRegion *mr, mr->alias_offset = offset; } -void memory_region_init_rom_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, Error **errp) { - memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); + if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, + size, 0, errp)) { + return false; + } mr->readonly = true; + + return true; } -void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_device_nomigrate(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, @@ -1762,7 +1776,9 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } void memory_region_init_iommu(void *_iommu_mr, @@ -1867,6 +1883,11 @@ bool memory_region_is_protected(MemoryRegion *mr) return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); } +bool memory_region_has_guest_memfd(MemoryRegion *mr) +{ + return mr->ram_block && mr->ram_block->guest_memfd >= 0; +} + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) { uint8_t mask = mr->dirty_log_mask; @@ -3652,19 +3673,16 @@ void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled) } } -void memory_region_init_ram(MemoryRegion *mr, +bool memory_region_init_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, Error **errp) { DeviceState *owner_dev; - Error *err = NULL; - memory_region_init_ram_nomigrate(mr, owner, name, size, &err); - if (err) { - error_propagate(errp, err); - return; + if (!memory_region_init_ram_nomigrate(mr, owner, name, size, errp)) { + return false; } /* This will assert if owner is neither NULL nor a DeviceState. * We only want the owner here for the purposes of defining a @@ -3674,21 +3692,44 @@ void memory_region_init_ram(MemoryRegion *mr, */ owner_dev = DEVICE(owner); vmstate_register_ram(mr, owner_dev); + + return true; } -void memory_region_init_rom(MemoryRegion *mr, +bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + DeviceState *owner_dev; + + if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size, + RAM_GUEST_MEMFD, errp)) { + return false; + } + /* This will assert if owner is neither NULL nor a DeviceState. + * We only want the owner here for the purposes of defining a + * unique name for migration. TODO: Ideally we should implement + * a naming scheme for Objects which are not DeviceStates, in + * which case we can relax this restriction. + */ + owner_dev = DEVICE(owner); + vmstate_register_ram(mr, owner_dev); + + return true; +} + +bool memory_region_init_rom(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, Error **errp) { DeviceState *owner_dev; - Error *err = NULL; - memory_region_init_rom_nomigrate(mr, owner, name, size, &err); - if (err) { - error_propagate(errp, err); - return; + if (!memory_region_init_rom_nomigrate(mr, owner, name, size, errp)) { + return false; } /* This will assert if owner is neither NULL nor a DeviceState. * We only want the owner here for the purposes of defining a @@ -3698,9 +3739,11 @@ void memory_region_init_rom(MemoryRegion *mr, */ owner_dev = DEVICE(owner); vmstate_register_ram(mr, owner_dev); + + return true; } -void memory_region_init_rom_device(MemoryRegion *mr, +bool memory_region_init_rom_device(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, @@ -3709,13 +3752,10 @@ void memory_region_init_rom_device(MemoryRegion *mr, Error **errp) { DeviceState *owner_dev; - Error *err = NULL; - memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque, - name, size, &err); - if (err) { - error_propagate(errp, err); - return; + if (!memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque, + name, size, errp)) { + return false; } /* This will assert if owner is neither NULL nor a DeviceState. * We only want the owner here for the purposes of defining a @@ -3725,6 +3765,8 @@ void memory_region_init_rom_device(MemoryRegion *mr, */ owner_dev = DEVICE(owner); vmstate_register_ram(mr, owner_dev); + + return true; } /* diff --git a/system/physmem.c b/system/physmem.c index 8f4be2d1310c6cb007ae7b4db6ff135d475cf6de..f4bc6eeec40387a8cbef6c0f96f2df10cecdc8af 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1842,6 +1842,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) RAMBlock *block; RAMBlock *last_block = NULL; ram_addr_t ram_size; + bool free_on_error = false; Error *err = NULL; qemu_mutex_lock_ramlist(); @@ -1868,6 +1869,26 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) return; } memory_try_enable_merging(new_block->host, new_block->max_length); + free_on_error = true; + } + } + + if (new_block->flags & RAM_GUEST_MEMFD) { + assert(kvm_enabled()); + assert(new_block->guest_memfd < 0); + + if (ram_block_discard_require(true) < 0) { + error_setg_errno(errp, errno, + "cannot set up private guest memory: discard currently blocked"); + error_append_hint(errp, "Are you using assigned devices?\n"); + goto out_free; + } + + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + 0, errp); + if (new_block->guest_memfd < 0) { + qemu_mutex_unlock_ramlist(); + goto out_free; } } @@ -1916,6 +1937,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) ram_block_notify_add(new_block->host, new_block->used_length, new_block->max_length); } + return; + +out_free: + if (free_on_error) { + qemu_anon_ram_free(new_block->host, new_block->max_length); + new_block->host = NULL; + } } #ifdef CONFIG_POSIX @@ -1930,7 +1958,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, /* Just support these ram flags by now. */ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | - RAM_READONLY_FD)) == 0); + RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -1965,6 +1993,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = ram_flags; + new_block->guest_memfd = -1; new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, errp); if (!new_block->host) { @@ -2043,7 +2072,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, Error *local_err = NULL; assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | - RAM_NORESERVE)) == 0); + RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); assert(!host ^ (ram_flags & RAM_PREALLOC)); size = HOST_PAGE_ALIGN(size); @@ -2055,6 +2084,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, new_block->max_length = max_size; assert(max_size >= size); new_block->fd = -1; + new_block->guest_memfd = -1; new_block->page_size = qemu_real_host_page_size(); new_block->host = host; new_block->flags = ram_flags; @@ -2077,7 +2107,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, Error **errp) { - assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); + assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); } @@ -2105,6 +2135,12 @@ static void reclaim_ramblock(RAMBlock *block) } else { qemu_anon_ram_free(block->host, block->max_length); } + + if (block->guest_memfd >= 0) { + close(block->guest_memfd); + ram_block_discard_require(false); + } + g_free(block); } @@ -2690,8 +2726,8 @@ bool prepare_mmio_access(MemoryRegion *mr) { bool release_lock = false; - if (!qemu_mutex_iothread_locked()) { - qemu_mutex_lock_iothread(); + if (!bql_locked()) { + bql_lock(); release_lock = true; } if (mr->flush_coalesced_mmio) { @@ -2772,7 +2808,7 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); release_lock = false; } @@ -2850,7 +2886,7 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); release_lock = false; } @@ -3676,6 +3712,29 @@ err: return ret; } +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length) +{ + int ret = -1; + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); + + if (ret) { + ret = -errno; + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); + } +#else + ret = -ENOSYS; + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); +#endif + + return ret; +} + bool ramblock_is_pmem(RAMBlock *rb) { return rb->flags & RAM_PMEM; diff --git a/system/runstate.c b/system/runstate.c index 7e41626bb15b54b4a86fe4d50ac5388aa168544c..05dcd0cc8fc1031656e1e1b249511d82e737b436 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -504,11 +504,24 @@ void qemu_system_reset(ShutdownCause reason) default: qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } - cpu_synchronize_all_post_reset(); cpus_control_post_system_reset(); monitor_qapi_event_discard_io_error(); + + /* + * Some boards use the machine reset callback to point CPUs to the firmware + * entry point. Assume that this is not the case for boards that support + * non-resettable CPUs (currently used only for confidential guests), in + * which case cpu_synchronize_all_post_init() is enough because + * it does _more_ than cpu_synchronize_all_post_reset(). + */ + if (cpus_are_resettable()) { + cpu_synchronize_all_post_reset(); + } else { + assert(runstate_check(RUN_STATE_PRELAUNCH)); + } + } /* @@ -819,7 +832,7 @@ void qemu_init_subsystems(void) qemu_init_cpu_list(); qemu_init_cpu_loop(); - qemu_mutex_lock_iothread(); + bql_lock(); atexit(qemu_run_exit_notifiers); diff --git a/system/watchpoint.c b/system/watchpoint.c index ba5ad13352c355f5c8de5d25d36efb7fb09b95c2..b76007ebf6b62855f420b72c8ae36c972d74ec04 100644 --- a/system/watchpoint.c +++ b/system/watchpoint.c @@ -155,9 +155,9 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, * Now raise the debug interrupt so that it will * trigger after the current instruction. */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index fb19b04189293722795432903fe7da4dbff0cf2a..35b9386134a3a3bc48a1439bda43f60fd8f96d72 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -108,7 +108,7 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, g_free(info); /* Finally set the power status */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_ON; } @@ -120,7 +120,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, CPUArchId *arch_id; struct CpuOnInfo *info; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64 "\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry, @@ -229,7 +229,7 @@ static void arm_set_cpu_on_and_reset_async_work(CPUState *target_cpu_state, target_cpu_state->halted = 0; /* Finally set the power status */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_ON; } @@ -238,7 +238,7 @@ int arm_set_cpu_on_and_reset(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); /* Retrieve the cpu we are powering up */ target_cpu_state = arm_get_cpu_by_id(cpuid); @@ -280,7 +280,7 @@ static void arm_set_cpu_off_async_work(CPUState *target_cpu_state, { ARMCPU *target_cpu = ARM_CPU(target_cpu_state); - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_OFF; target_cpu_state->halted = 1; target_cpu_state->exception_index = EXCP_HLT; @@ -291,7 +291,7 @@ int arm_set_cpu_off(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 "\n", cpuid); @@ -327,7 +327,7 @@ int arm_reset_cpu(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 "\n", cpuid); diff --git a/target/arm/helper.c b/target/arm/helper.c index 0370a739e3bbfcf3b04c548190764dd5ac3c4637..89ae7c69db0fcf8f83efedb042c5a228f4a9a991 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5852,7 +5852,7 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) * VFIQ are masked unless running at EL0 or EL1, and HCR * can only be written at EL2. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); arm_cpu_update_virq(cpu); arm_cpu_update_vfiq(cpu); arm_cpu_update_vserr(cpu); @@ -11395,7 +11395,7 @@ void arm_cpu_do_interrupt(CPUState *cs) * BQL needs to be held for any modification of * cs->interrupt_request. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); arm_call_pre_el_change_hook(cpu); diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index d7cc00a084b413fc6df6c7322b37a542b3780be1..c34f1e725d0854bf96dec60fee0f523d035e7ae2 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -1719,9 +1719,9 @@ static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) * sleeping. */ qatomic_set_mb(&cpu->thread_kicked, false); - qemu_mutex_unlock_iothread(); + bql_unlock(); pselect(0, 0, 0, 0, ts, &cpu->accel->unblock_ipi_mask); - qemu_mutex_lock_iothread(); + bql_lock(); } static void hvf_wfi(CPUState *cpu) @@ -1822,7 +1822,7 @@ int hvf_vcpu_exec(CPUState *cpu) flush_cpu_state(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); assert_hvf_ok(hv_vcpu_run(cpu->accel->fd)); /* handle VMEXIT */ @@ -1831,7 +1831,7 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t ec = syn_get_ec(syndrome); ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); switch (exit_reason) { case HV_EXIT_REASON_EXCEPTION: /* This is the main one, handle below. */ diff --git a/target/arm/kvm.c b/target/arm/kvm.c index f45783a9dafe76c81e3411fcf35d7b455db3d1e4..5e71b74892b7d9e6a52d0df50d2a3819ea345ee4 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1009,7 +1009,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) if (run->s.regs.device_irq_level != cpu->device_irq_level) { switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level; - qemu_mutex_lock_iothread(); + bql_lock(); if (switched_level & KVM_ARM_DEV_EL1_VTIMER) { qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT], @@ -1038,7 +1038,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) /* We also mark unknown levels as processed to not waste cycles */ cpu->device_irq_level = run->s.regs.device_irq_level; - qemu_mutex_unlock_iothread(); + bql_unlock(); } return MEMTXATTRS_UNSPECIFIED; @@ -1122,9 +1122,9 @@ static int kvm_arm_handle_hypercall(CPUState *cs, struct kvm_run *run) env->exception.syndrome = syn_aa64_hvc(0); } env->exception.target_el = 1; - qemu_mutex_lock_iothread(); + bql_lock(); arm_cpu_do_interrupt(cs); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* * For PSCI, exit the kvm_run loop and process the work. Especially @@ -1325,11 +1325,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return (data - 32) & 0xffff; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return !virtcca_cvm_enabled(); -} - static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 6a8aad0f066ae17a8b6b5e37ff15c69c3759ebcf..1af807cab5429c390170d161f12e1da4776ea1b7 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -1368,9 +1368,9 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) env->exception.syndrome = debug_exit->hsr; env->exception.vaddress = debug_exit->far; env->exception.target_el = 1; - qemu_mutex_lock_iothread(); + bql_lock(); arm_cpu_do_interrupt(cs); - qemu_mutex_unlock_iothread(); + bql_unlock(); return false; } diff --git a/target/arm/psci.c b/target/arm/psci.c index a8690a16afc53c314411959fe9ebd81bd4ba4652..8a084cf50116675282cb062e88ea173eec2873d9 100644 --- a/target/arm/psci.c +++ b/target/arm/psci.c @@ -109,7 +109,7 @@ void arm_handle_psci_call(ARMCPU *cpu) } target_cpu = ARM_CPU(target_cpu_state); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); ret = target_cpu->power_state; break; default: diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 1762b058aecfc0e61e58a50160d0e2bef92787f4..0ecd3a36dad47d12a90fb9bcfd287347c3812182 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -772,9 +772,9 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, #if !TCG_OVERSIZED_GUEST # error "Unexpected configuration" #endif - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); if (!locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } if (ptw->out_be) { cur_val = ldq_be_p(host); @@ -788,7 +788,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, } } if (!locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 8ad84623d37d93a3d56c3f42e770be342b705ff0..198b975f207c008d427a9c4ee7c82ea46aa8b72f 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -809,9 +809,9 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_pre_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (!return_to_aa64) { env->aarch64 = false; @@ -876,9 +876,9 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) */ aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c index a26adb75aa247359ccf8aae40c5aff1aa7cdecba..d1f1e02acc11a8f1964c47e218173c649c51e75a 100644 --- a/target/arm/tcg/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -373,8 +373,8 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) bool ts = is_secure && (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK); bool take_exception; - /* Take the iothread lock as we are going to touch the NVIC */ - qemu_mutex_lock_iothread(); + /* Take the BQL as we are going to touch the NVIC */ + bql_lock(); /* Check the background context had access to the FPU */ if (!v7m_cpacr_pass(env, is_secure, is_priv)) { @@ -428,7 +428,7 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) take_exception = !stacked_ok && armv7m_nvic_can_take_pending_exception(env->nvic); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (take_exception) { raise_exception_ra(env, EXCP_LAZYFP, 0, 1, GETPC()); diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index ea08936a852b17d363ff58f4262edb57cbe3b376..34e706e0fd36ad20d098d06d6ec9cd7d84eeadc8 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -427,9 +427,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val) { uint32_t mask; - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_pre_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); mask = aarch32_cpsr_valid_mask(env->features, &env_archcpu(env)->isar); cpsr_write(env, val, mask, CPSRWriteExceptionReturn); @@ -442,9 +442,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val) env->regs[15] &= (env->thumb ? ~1 : ~3); arm_rebuild_hflags(env); - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* Access to user mode registers from privileged modes. */ @@ -803,9 +803,9 @@ void HELPER(set_cp_reg)(CPUARMState *env, const void *rip, uint32_t value) const ARMCPRegInfo *ri = rip; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); ri->writefn(env, ri, value); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { ri->writefn(env, ri, value); } @@ -817,9 +817,9 @@ uint32_t HELPER(get_cp_reg)(CPUARMState *env, const void *rip) uint32_t res; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); res = ri->readfn(env, ri); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { res = ri->readfn(env, ri); } @@ -832,9 +832,9 @@ void HELPER(set_cp_reg64)(CPUARMState *env, const void *rip, uint64_t value) const ARMCPRegInfo *ri = rip; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); ri->writefn(env, ri, value); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { ri->writefn(env, ri, value); } @@ -846,9 +846,9 @@ uint64_t HELPER(get_cp_reg64)(CPUARMState *env, const void *rip) uint64_t res; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); res = ri->readfn(env, ri); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { res = ri->readfn(env, ri); } diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c index 98e9d688f64070e7b93974c649a1a62df5ed8d23..efe638b36ed9022a386acca15f215e4c516e445b 100644 --- a/target/hppa/int_helper.c +++ b/target/hppa/int_helper.c @@ -84,17 +84,17 @@ void hppa_cpu_alarm_timer(void *opaque) void HELPER(write_eirr)(CPUHPPAState *env, target_ulong val) { env->cr[CR_EIRR] &= ~val; - qemu_mutex_lock_iothread(); + bql_lock(); eval_interrupt(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(write_eiem)(CPUHPPAState *env, target_ulong val) { env->cr[CR_EIEM] = val; - qemu_mutex_lock_iothread(); + bql_lock(); eval_interrupt(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void hppa_cpu_do_interrupt(CPUState *cs) diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c new file mode 100644 index 0000000000000000000000000000000000000000..b3727845adc3719e19b5a704a8ea33a53aebda7b --- /dev/null +++ b/target/i386/confidential-guest.c @@ -0,0 +1,33 @@ +/* + * QEMU Confidential Guest support + * + * Copyright (C) 2024 Red Hat, Inc. + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "confidential-guest.h" + +OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, + x86_confidential_guest, + X86_CONFIDENTIAL_GUEST, + CONFIDENTIAL_GUEST_SUPPORT) + +static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) +{ +} + +static void x86_confidential_guest_init(Object *obj) +{ +} + +static void x86_confidential_guest_finalize(Object *obj) +{ +} diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h new file mode 100644 index 0000000000000000000000000000000000000000..7342d2843aa51c1255b4726a3f3ccd3be4bedf49 --- /dev/null +++ b/target/i386/confidential-guest.h @@ -0,0 +1,83 @@ +/* + * x86-specific confidential guest methods. + * + * Copyright (c) 2024 Red Hat Inc. + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef TARGET_I386_CG_H +#define TARGET_I386_CG_H + +#include "qom/object.h" + +#include "exec/confidential-guest-support.h" + +#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" + +OBJECT_DECLARE_TYPE(X86ConfidentialGuest, + X86ConfidentialGuestClass, + X86_CONFIDENTIAL_GUEST) + +struct X86ConfidentialGuest { + /* */ + ConfidentialGuestSupport parent_obj; +}; + +/** + * X86ConfidentialGuestClass: + * + * Class to be implemented by confidential-guest-support concrete objects + * for the x86 target. + */ +struct X86ConfidentialGuestClass { + /* */ + ConfidentialGuestSupportClass parent; + + /* */ + int (*kvm_type)(X86ConfidentialGuest *cg); + uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, + int reg, uint32_t value); +}; + +/** + * x86_confidential_guest_kvm_type: + * + * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. + */ +static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->kvm_type) { + return klass->kvm_type(cg); + } else { + return 0; + } +} + +/** + * x86_confidential_guest_mask_cpuid_features: + * + * Removes unsupported features from a confidential guest's CPUID values, returns + * the value with the bits removed. The bits removed should be those that KVM + * provides independent of host-supported CPUID features, but are not supported by + * the confidential computing firmware. + */ +static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->mask_cpuid_features) { + return klass->mask_cpuid_features(cg, feature, index, reg, value); + } else { + return value; + } +} + +#endif diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c index 2375e48178f99d7af22f95868622ac28c02baf21..74220967370326eadf2e3de7fc95e1fe4f9a2a81 100644 --- a/target/i386/cpu-sysemu.c +++ b/target/i386/cpu-sysemu.c @@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env) env->features[FEAT_1_EDX] &= ~CPUID_APIC; } +void cpu_set_apic_feature(CPUX86State *env) +{ + env->features[FEAT_1_EDX] |= CPUID_APIC; +} + +bool cpu_has_x2apic_feature(CPUX86State *env) +{ + return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC; +} + bool cpu_is_bsp(X86CPU *cpu) { return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP; @@ -281,11 +291,17 @@ void x86_cpu_apic_create(X86CPU *cpu, Error **errp) OBJECT(cpu->apic_state)); object_unref(OBJECT(cpu->apic_state)); - qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); /* TODO: convert to link<> */ apic = APIC_COMMON(cpu->apic_state); apic->cpu = cpu; apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; + + /* + * apic_common_set_id needs to check if the CPU has x2APIC + * feature in case APIC ID >= 255, so we need to set apic->cpu + * before setting APIC ID + */ + qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); } void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 8360ea3d610426818586ca876daa7295df2fffa9..a70cc4b3d85572fe6d842ae5d87ca22997e08151 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7180,6 +7180,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = 0x2; *eax |= sev_es_enabled() ? 0x8 : 0; *eax |= csv3_enabled() ? 0x40000000 : 0; /* bit 30 for CSV3 */ + *eax |= sev_snp_enabled() ? 0x10 : 0; *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 4424e58d1ba24fd9578623c92ca169e48e73e22b..f003558e405cedbeff23632e8b17e7b2636e6488 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -567,6 +567,9 @@ typedef enum X86Seg { #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 #define MSR_IA32_VMX_VMFUNC 0x00000491 +#define MSR_APIC_START 0x00000800 +#define MSR_APIC_END 0x000008ff + #define XSTATE_FP_BIT 0 #define XSTATE_SSE_BIT 1 #define XSTATE_YMM_BIT 2 @@ -1013,6 +1016,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) /* Speculative Store Bypass Disable */ #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) +/* Paravirtualized Speculative Store Bypass Disable MSR */ +#define CPUID_8000_0008_EBX_VIRT_SSBD (1U << 25) /* Predictive Store Forwarding Disable */ #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) @@ -2320,8 +2325,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); void cpu_clear_apic_feature(CPUX86State *env); +void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); +bool cpu_has_x2apic_feature(CPUX86State *env); /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); diff --git a/target/i386/hvf/README.md b/target/i386/hvf/README.md index 2d33477aca505e08eb899698c8d036273035d2c9..64a8935237c8c209eab6a2f2905f342bc30c67a2 100644 --- a/target/i386/hvf/README.md +++ b/target/i386/hvf/README.md @@ -4,4 +4,4 @@ These sources (and ../hvf-all.c) are adapted from Veertu Inc's vdhh (Veertu Desk 1. Adapt to our current QEMU's `CPUState` structure and `address_space_rw` API; many struct members have been moved around (emulated x86 state, xsave_buf) due to historical differences + QEMU needing to handle more emulation targets. 2. Removal of `apic_page` and hyperv-related functionality. -3. More relaxed use of `qemu_mutex_lock_iothread`. +3. More relaxed use of `bql_lock`. diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 20b9ca3ef5135afd3d5b0e959568a503d1c72603..11ffdd4c69fdb61ab072e9364d6007c19ce88c39 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -429,9 +429,9 @@ int hvf_vcpu_exec(CPUState *cpu) } vmx_update_tpr(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { - qemu_mutex_lock_iothread(); + bql_lock(); return EXCP_HLT; } @@ -450,7 +450,7 @@ int hvf_vcpu_exec(CPUState *cpu) rip = rreg(cpu->accel->fd, HV_X86_RIP); env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS); - qemu_mutex_lock_iothread(); + bql_lock(); update_apic_tpr(cpu); current_cpu = cpu; diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index e3ac978648b81cc31fa9f2bd1ba22c3d1ad9a764..6825c89af374ab7319a1bc10724068fffe16e306 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -45,9 +45,9 @@ void hyperv_x86_synic_update(X86CPU *cpu) static void async_synic_update(CPUState *cs, run_on_cpu_data data) { - qemu_mutex_lock_iothread(); + bql_lock(); hyperv_x86_synic_update(X86_CPU(cs)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 3a88e65635698ec3043a90b05cd984a2e93e7a58..8e2512a19a1a973fd05681f8a52ca30e036d9863 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -21,6 +21,7 @@ #include #include +#include #include "standard-headers/asm-x86/kvm_para.h" #include "hw/xen/interface/arch-x86/cpuid.h" @@ -31,6 +32,7 @@ #include "sysemu/kvm_int.h" #include "sysemu/runstate.h" #include "kvm_i386.h" +#include "../confidential-guest.h" #include "sev.h" #include "csv.h" #include "xen-emu.h" @@ -163,6 +165,59 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; static RateLimit bus_lock_ratelimit_ctrl; static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); +static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", + [KVM_X86_SNP_VM] = "SEV-SNP", +}; + +bool kvm_is_vm_type_supported(int type) +{ + uint32_t machine_types; + + /* + * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM + * is always supported + */ + if (type == KVM_X86_DEFAULT_VM) { + return true; + } + + machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), + KVM_CAP_VM_TYPES); + return !!(machine_types & BIT(type)); +} + +int kvm_get_vm_type(MachineState *ms) +{ + int kvm_type = KVM_X86_DEFAULT_VM; + + if (ms->cgs) { + if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { + error_report("configuration type %s not supported for x86 guests", + object_get_typename(OBJECT(ms->cgs))); + exit(1); + } + kvm_type = x86_confidential_guest_kvm_type( + X86_CONFIDENTIAL_GUEST(ms->cgs)); + } + + if (!kvm_is_vm_type_supported(kvm_type)) { + error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); + exit(1); + } + + return kvm_type; +} + +bool kvm_enable_hypercall(uint64_t enable_mask) +{ + KVMState *s = KVM_STATE(current_accel()); + + return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask); +} + bool kvm_has_smm(void) { return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); @@ -492,6 +547,11 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, ret |= 1U << KVM_HINTS_REALTIME; } + if (current_machine->cgs) { + ret = x86_confidential_guest_mask_cpuid_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), + function, index, reg, ret); + } return ret; } @@ -2192,7 +2252,7 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10); } - if (sev_enabled()) { + if (!sev_snp_enabled() && sev_enabled()) { c = cpuid_find_entry(&cpuid_data.cpuid, KVM_CPUID_FEATURES | kvm_base, 0); if (c) { @@ -2270,7 +2330,7 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) env->mp_state = KVM_MP_STATE_RUNNABLE; } - if (cpu_is_bsp(cpu) && + if (!sev_snp_enabled() && cpu_is_bsp(cpu) && sev_enabled() && has_map_gpa_range) { sev_remove_shared_regions_list(0, -1); } @@ -2578,10 +2638,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) * mechanisms are supported in future (e.g. TDX), they'll need * their own initialization either here or elsewhere. */ - ret = sev_kvm_init(ms->cgs, &local_err); - if (ret < 0) { - error_report_err(local_err); - return ret; + if (ms->cgs) { + ret = confidential_guest_kvm_init(ms->cgs, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } } has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); @@ -2627,14 +2689,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s) #endif } - has_map_gpa_range = kvm_check_extension(s, KVM_CAP_EXIT_HYPERCALL); - if (has_map_gpa_range) { - ret = kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, - KVM_EXIT_HYPERCALL_VALID_MASK); - if (ret < 0) { - error_report("kvm: Failed to enable MAP_GPA_RANGE cap: %s", - strerror(-ret)); - return ret; + if (!sev_snp_enabled()){ + has_map_gpa_range = kvm_check_extension(s, KVM_CAP_EXIT_HYPERCALL); + if (has_map_gpa_range) { + ret = kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, + KVM_EXIT_HYPERCALL_VALID_MASK); + if (ret < 0) { + error_report("kvm: Failed to enable MAP_GPA_RANGE cap: %s", + strerror(-ret)); + return ret; + } } } @@ -4861,9 +4925,9 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) /* Inject NMI */ if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; - qemu_mutex_unlock_iothread(); + bql_unlock(); DPRINTF("injected NMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_NMI); if (ret < 0) { @@ -4872,9 +4936,9 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; - qemu_mutex_unlock_iothread(); + bql_unlock(); DPRINTF("injected SMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_SMI); if (ret < 0) { @@ -4885,7 +4949,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } if (!kvm_pic_in_kernel()) { - qemu_mutex_lock_iothread(); + bql_lock(); } /* Force the VCPU out of its inner loop to process any INIT requests @@ -4938,7 +5002,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -4986,12 +5050,12 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) /* We need to protect the apic state against concurrent accesses from * different threads in case the userspace irqchip is used. */ if (!kvm_irqchip_in_kernel()) { - qemu_mutex_lock_iothread(); + bql_lock(); } cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); if (!kvm_irqchip_in_kernel()) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } return cpu_get_mem_attrs(env); } @@ -5436,6 +5500,50 @@ static bool host_supports_vmx(void) return ecx & CPUID_EXT_VMX; } +/* + * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE + * to service guest-initiated memory attribute update requests so that + * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be + * backed by the private memory pool provided by guest_memfd, and as such + * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX). + * + * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live + * migration, are not implemented here currently. + * + * For the guest_memfd use-case, these exits will generally be synthesized + * by KVM based on platform-specific hypercalls, like GHCB requests in the + * case of SEV-SNP, and not issued directly within the guest though the + * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is + * not actually advertised to guests via the KVM CPUID feature bit, as + * opposed to SEV live migration where it would be. Since it is unlikely the + * SEV live migration use-case would be useful for guest-memfd backed guests, + * because private/shared page tracking is already provided through other + * means, these 2 use-cases should be treated as being mutually-exclusive. + */ +static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) +{ + uint64_t gpa, size, attributes; + + if (!machine_require_guest_memfd(current_machine)) + return -EINVAL; + + gpa = run->hypercall.args[0]; + size = run->hypercall.args[1] * TARGET_PAGE_SIZE; + attributes = run->hypercall.args[2]; + + trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); + + return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); +} + +static int kvm_handle_hypercall(struct kvm_run *run) +{ + if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) + return kvm_handle_hc_map_gpa_range(run); + + return -EINVAL; +} + #define VMX_INVALID_GUEST_STATE 0x80000021 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) @@ -5450,17 +5558,17 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) switch (run->exit_reason) { case KVM_EXIT_HLT: DPRINTF("handle_hlt\n"); - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_handle_halt(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case KVM_EXIT_SET_TPR: ret = 0; break; case KVM_EXIT_TPR_ACCESS: - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_handle_tpr_access(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case KVM_EXIT_FAIL_ENTRY: code = run->fail_entry.hardware_entry_failure_reason; @@ -5486,9 +5594,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; case KVM_EXIT_DEBUG: DPRINTF("kvm_exit_debug\n"); - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_handle_debug(cpu, &run->debug.arch); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case KVM_EXIT_HYPERV: ret = kvm_hv_handle_exit(cpu, &run->hyperv); @@ -5532,7 +5640,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; #endif case KVM_EXIT_HYPERCALL: - ret = kvm_handle_exit_hypercall(cpu, run); + if (!sev_snp_enabled()) { + ret = kvm_handle_exit_hypercall(cpu, run); + } else { + ret = kvm_handle_hypercall(run); + } break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); @@ -5786,14 +5898,6 @@ bool kvm_has_waitpkg(void) return has_msr_umwait; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - if (is_hygon_cpu()) - return !csv_kvm_cpu_reset_inhibit; - - return !sev_es_enabled(); -} - #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 30fedcffea3e59161eeb9ff1750738845c1a4637..34fc60774b865a7ce9838345b096a2ebb4c2cdab 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -33,10 +33,12 @@ bool kvm_has_smm(void); bool kvm_enable_x2apic(void); bool kvm_hv_vpindex_settable(void); +bool kvm_enable_hypercall(uint64_t enable_mask); bool kvm_enable_sgx_provisioning(KVMState *s); bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +int kvm_get_vm_type(MachineState *ms); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_after_reset_vcpu(X86CPU *cpu); void kvm_arch_do_init_vcpu(X86CPU *cs); @@ -49,6 +51,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); void kvm_synchronize_all_tsc(void); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3c3f8cf93c1bfe17638beae51942163fc7f4003d..7c5eadac66e9d24013b56c8660c2d69ce6eba959 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -7,7 +7,6 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) -i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) i386_kvm_ss.add(when: 'CONFIG_CSV', if_false: files('csv-stub.c')) i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c index a0aac1117fbaca51154fc85553ea242cbc8cd647..8db00310976387930a7ed0678d89eebb5823e04d 100644 --- a/target/i386/kvm/sev-stub.c +++ b/target/i386/kvm/sev-stub.c @@ -1,27 +1,8 @@ -/* - * QEMU SEV stub - * - * Copyright Advanced Micro Devices 2018 - * - * Authors: - * Brijesh Singh - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - #include "qemu/osdep.h" #include "sev.h" bool sev_kvm_has_msr_ghcb; -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - /* If we get here, cgs must be some non-SEV thing */ - return 0; -} - int sev_remove_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end) { diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events index b365a8e8e282a126f067466d624907d06135ee99..74a6234ff7f5841dded2744ef638bd40f82e1757 100644 --- a/target/i386/kvm/trace-events +++ b/target/i386/kvm/trace-events @@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" +kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64 # xen-emu.c kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index c0631f9cf439fd580756f716501a31305b778ce0..b0ed2e6aeb2a2464d28725c23a8a4de357b2cc48 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -403,7 +403,7 @@ void kvm_xen_maybe_deassert_callback(CPUState *cs) /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */ if (!vi->evtchn_upcall_pending) { - qemu_mutex_lock_iothread(); + bql_lock(); /* * Check again now we have the lock, because it may have been * asserted in the interim. And we don't want to take the lock @@ -413,7 +413,7 @@ void kvm_xen_maybe_deassert_callback(CPUState *cs) X86_CPU(cs)->env.xen_callback_asserted = false; xen_evtchn_set_callback_level(0); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -773,9 +773,9 @@ static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu, switch (hp.index) { case HVM_PARAM_CALLBACK_IRQ: - qemu_mutex_lock_iothread(); + bql_lock(); err = xen_evtchn_set_callback_param(hp.value); - qemu_mutex_unlock_iothread(); + bql_unlock(); xen_set_long_mode(exit->u.hcall.longmode); break; default: @@ -1408,7 +1408,7 @@ int kvm_xen_soft_reset(void) CPUState *cpu; int err; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); trace_kvm_xen_soft_reset(); @@ -1481,9 +1481,9 @@ static int schedop_shutdown(CPUState *cs, uint64_t arg) break; case SHUTDOWN_soft_reset: - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_xen_soft_reset(); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; default: diff --git a/target/i386/meson.build b/target/i386/meson.build index 594a0a6abf7bdc028fa7e94390890c2547ead7db..9a1d070d130ca097f50294cf86ca168ea746c5fb 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -6,7 +6,7 @@ i386_ss.add(files( 'xsave_helper.c', 'cpu-dump.c', )) -i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c index 6c46101ac1aecafb6e92c64f3c909e6392b64b10..f9d5e9a37ae4077ffa94d5b96273136737d6c345 100644 --- a/target/i386/nvmm/nvmm-accel-ops.c +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -25,7 +25,7 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); current_cpu = cpu; @@ -55,7 +55,7 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg) nvmm_destroy_vcpu(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index 7d752bc5e000d10465f0e5af3d1c05063275e510..cfdca91123c6615722b0ce5a529dc51aa2e71aee 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -399,7 +399,7 @@ nvmm_vcpu_pre_run(CPUState *cpu) uint8_t tpr; int ret; - qemu_mutex_lock_iothread(); + bql_lock(); tpr = cpu_get_apic_tpr(x86_cpu->apic_state); if (tpr != qcpu->tpr) { @@ -462,7 +462,7 @@ nvmm_vcpu_pre_run(CPUState *cpu) } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* @@ -485,9 +485,9 @@ nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit) tpr = exit->exitstate.cr8; if (qcpu->tpr != tpr) { qcpu->tpr = tpr; - qemu_mutex_lock_iothread(); + bql_lock(); cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -648,7 +648,7 @@ nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, CPUX86State *env = cpu_env(cpu); int ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && @@ -658,7 +658,7 @@ nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, ret = 1; } - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -721,7 +721,7 @@ nvmm_vcpu_loop(CPUState *cpu) return 0; } - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_start(cpu); /* @@ -806,16 +806,16 @@ nvmm_vcpu_loop(CPUState *cpu) error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]", exit->reason, exit->u.inv.hwcode); nvmm_get_registers(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); + bql_unlock(); ret = -1; break; } } while (ret == 0); cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qatomic_set(&cpu->exit_request, false); diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c index 96e1c15cc3fa13b3d0dcf542632ab4074fee3501..d5bf886e799ce4cc11e34041d5a6fa32b358bb2d 100644 --- a/target/i386/sev-sysemu-stub.c +++ b/target/i386/sev-sysemu-stub.c @@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, error_setg(errp, "SEV is not available in this QEMU"); } -int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) +int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { g_assert_not_reached(); } @@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "SEV is not available in this QEMU\n"); } + +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) +{ +} diff --git a/target/i386/sev.c b/target/i386/sev.c index 8c1f4d653ef4b8ec3bd792adabfd9cea98cb2f82..29c6815844cdc8e54f005d6656d57cdcac93c1da 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include +#include #include #include @@ -26,6 +27,7 @@ #include "qemu/error-report.h" #include "crypto/hash.h" #include "sysemu/kvm.h" +#include "kvm/kvm_i386.h" #include "sev.h" #include "csv.h" #include "sysemu/sysemu.h" @@ -38,18 +40,98 @@ #include "monitor/monitor.h" #include "monitor/hmp-target.h" #include "qapi/qapi-commands-misc-target.h" -#include "exec/confidential-guest-support.h" +#include "confidential-guest.h" #include "hw/i386/pc.h" #include "exec/address-spaces.h" +#include "qemu/queue.h" -#define TYPE_SEV_GUEST "sev-guest" -OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) +OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) +OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) struct shared_region { unsigned long gfn_start, gfn_end; QTAILQ_ENTRY(shared_region) list; }; +/* hard code sha256 digest size */ +#define HASH_SIZE 32 + +typedef struct QEMU_PACKED SevHashTableEntry { + QemuUUID guid; + uint16_t len; + uint8_t hash[HASH_SIZE]; +} SevHashTableEntry; + +typedef struct QEMU_PACKED SevHashTable { + QemuUUID guid; + uint16_t len; + SevHashTableEntry cmdline; + SevHashTableEntry initrd; + SevHashTableEntry kernel; +} SevHashTable; + +/* + * Data encrypted by sev_encrypt_flash() must be padded to a multiple of + * 16 bytes. + */ +typedef struct QEMU_PACKED PaddedSevHashTable { + SevHashTable ht; + uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +} PaddedSevHashTable; + +QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ + uint32_t reset_addr; +} SevInfoBlock; + +#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +typedef struct QEMU_PACKED SevHashTableDescriptor { + /* SEV hash table area guest address */ + uint32_t base; + /* SEV hash table area size (in bytes) */ + uint32_t size; +} SevHashTableDescriptor; + +struct SevCommonState { + X86ConfidentialGuest parent_obj; + + int kvm_type; + + /* configuration parameters */ + char *sev_device; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; + + /* runtime state */ + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; +}; + +struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + + /* public */ + bool (*build_kernel_loader_hashes)(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp); + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, size_t len); + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); +}; /** * SevGuestState: @@ -62,28 +144,19 @@ struct shared_region { * -machine ...,memory-encryption=sev0 */ struct SevGuestState { - ConfidentialGuestSupport parent_obj; + SevCommonState parent_obj; + gchar *measurement; /* configuration parameters */ - char *sev_device; + uint32_t handle; uint32_t policy; char *dh_cert_file; char *session_file; - uint32_t cbitpos; - uint32_t reduced_phys_bits; - bool kernel_hashes; + OnOffAuto legacy_vm_type; char *user_id; char *secret_header_file; char *secret_file; - /* runtime state */ - uint32_t handle; - uint8_t api_major; - uint8_t api_minor; - uint8_t build_id; - int sev_fd; - SevState state; - gchar *measurement; guchar *remote_pdh; size_t remote_pdh_len; guchar *remote_plat_cert; @@ -97,62 +170,44 @@ struct SevGuestState { gchar *send_vmsa_packet_hdr; size_t send_vmsa_packet_hdr_len; - uint32_t reset_cs; - uint32_t reset_ip; - bool reset_data_valid; - QTAILQ_HEAD(, shared_region) shared_regions_list; /* link list used for HYGON CSV */ CsvBatchCmdList *csv_batch_cmd_list; }; -#define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ -#define DEFAULT_SEV_DEVICE "/dev/sev" - -#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" -typedef struct __attribute__((__packed__)) SevInfoBlock { - /* SEV-ES Reset Vector Address */ - uint32_t reset_addr; -} SevInfoBlock; - -#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" -typedef struct QEMU_PACKED SevHashTableDescriptor { - /* SEV hash table area guest address */ - uint32_t base; - /* SEV hash table area size (in bytes) */ - uint32_t size; -} SevHashTableDescriptor; - -/* hard code sha256 digest size */ -#define HASH_SIZE 32 +struct SevSnpGuestState { + SevCommonState parent_obj; -typedef struct QEMU_PACKED SevHashTableEntry { - QemuUUID guid; - uint16_t len; - uint8_t hash[HASH_SIZE]; -} SevHashTableEntry; + /* configuration parameters */ + char *guest_visible_workarounds; + char *id_block_base64; + uint8_t *id_block; + char *id_auth_base64; + uint8_t *id_auth; + char *host_data; + + struct kvm_sev_snp_launch_start kvm_start_conf; + struct kvm_sev_snp_launch_finish kvm_finish_conf; + + uint32_t kernel_hashes_offset; + PaddedSevHashTable *kernel_hashes_data; +}; -typedef struct QEMU_PACKED SevHashTable { - QemuUUID guid; - uint16_t len; - SevHashTableEntry cmdline; - SevHashTableEntry initrd; - SevHashTableEntry kernel; -} SevHashTable; +#define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ +#define DEFAULT_SEV_DEVICE "/dev/sev" +#define DEFAULT_SEV_SNP_POLICY 0x30000 -/* - * Data encrypted by sev_encrypt_flash() must be padded to a multiple of - * 16 bytes. - */ -typedef struct QEMU_PACKED PaddedSevHashTable { - SevHashTable ht; - uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; -} PaddedSevHashTable; +typedef struct SevLaunchUpdateData { + QTAILQ_ENTRY(SevLaunchUpdateData) next; + hwaddr gpa; + void *hva; + size_t len; + int type; +} SevLaunchUpdateData; -QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); +static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; -static SevGuestState *sev_guest; static Error *sev_mig_blocker; bool sev_kvm_has_msr_ghcb; @@ -210,6 +265,36 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { .load_incoming_cpu_state = csv_load_incoming_cpu_state, }; +/* doesn't expose this, so re-use the max from kvm.c */ +#define KVM_MAX_CPUID_ENTRIES 100 + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + +#define SNP_CPUID_FUNCTION_MAXCOUNT 64 +#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF + +typedef struct { + uint32_t eax_in; + uint32_t ecx_in; + uint64_t xcr0_in; + uint64_t xss_in; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint64_t reserved; +} __attribute__((packed)) SnpCpuidFunc; + +typedef struct { + uint32_t count; + uint32_t reserved1; + uint64_t reserved2; + SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT]; +} __attribute__((packed)) SnpCpuidInfo; + static int sev_ioctl(int fd, int cmd, void *data, int *error) { @@ -258,21 +343,21 @@ fw_error_to_str(int code) } static bool -sev_check_state(const SevGuestState *sev, SevState state) +sev_check_state(const SevCommonState *sev_common, SevState state) { - assert(sev); - return sev->state == state ? true : false; + assert(sev_common); + return sev_common->state == state ? true : false; } static void -sev_set_guest_state(SevGuestState *sev, SevState new_state) +sev_set_guest_state(SevCommonState *sev_common, SevState new_state) { assert(new_state < SEV_STATE__MAX); - assert(sev); + assert(sev_common); - trace_kvm_sev_change_state(SevState_str(sev->state), + trace_kvm_sev_change_state(SevState_str(sev_common->state), SevState_str(new_state)); - sev->state = new_state; + sev_common->state = new_state; } static void @@ -339,43 +424,6 @@ static struct RAMBlockNotifier sev_ram_notifier = { .ram_block_removed = sev_ram_block_removed, }; -static void -sev_guest_finalize(Object *obj) -{ -} - -static char * -sev_guest_get_session_file(Object *obj, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - return s->session_file ? g_strdup(s->session_file) : NULL; -} - -static void -sev_guest_set_session_file(Object *obj, const char *value, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - s->session_file = g_strdup(value); -} - -static char * -sev_guest_get_dh_cert_file(Object *obj, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - return g_strdup(s->dh_cert_file); -} - -static void -sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - s->dh_cert_file = g_strdup(value); -} - static char * sev_guest_get_user_id(Object *obj, Error **errp) { @@ -424,146 +472,72 @@ sev_guest_set_secret_file(Object *obj, const char *value, Error **errp) s->secret_file = g_strdup(value); } -static char * -sev_guest_get_sev_device(Object *obj, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - return g_strdup(sev->sev_device); -} - -static void -sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->sev_device = g_strdup(value); -} - -static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - return sev->kernel_hashes; -} - -static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->kernel_hashes = value; -} - -static void -sev_guest_class_init(ObjectClass *oc, void *data) -{ - object_class_property_add_str(oc, "sev-device", - sev_guest_get_sev_device, - sev_guest_set_sev_device); - object_class_property_set_description(oc, "sev-device", - "SEV device to use"); - object_class_property_add_str(oc, "dh-cert-file", - sev_guest_get_dh_cert_file, - sev_guest_set_dh_cert_file); - object_class_property_set_description(oc, "dh-cert-file", - "guest owners DH certificate (encoded with base64)"); - object_class_property_add_str(oc, "session-file", - sev_guest_get_session_file, - sev_guest_set_session_file); - object_class_property_set_description(oc, "session-file", - "guest owners session parameters (encoded with base64)"); - object_class_property_add_bool(oc, "kernel-hashes", - sev_guest_get_kernel_hashes, - sev_guest_set_kernel_hashes); - object_class_property_set_description(oc, "kernel-hashes", - "add kernel hashes to guest firmware for measured Linux boot"); - object_class_property_add_str(oc, "user-id", - sev_guest_get_user_id, - sev_guest_set_user_id); - object_class_property_set_description(oc, "user-id", - "user id of the guest owner"); - object_class_property_add_str(oc, "secret-header-file", - sev_guest_get_secret_header_file, - sev_guest_set_secret_header_file); - object_class_property_set_description(oc, "secret-header-file", - "header file of the guest owner's secret"); - object_class_property_add_str(oc, "secret-file", - sev_guest_get_secret_file, - sev_guest_set_secret_file); - object_class_property_set_description(oc, "secret-file", - "file of the guest owner's secret"); -} - -static void -sev_guest_instance_init(Object *obj) +bool +sev_enabled(void) { - SevGuestState *sev = SEV_GUEST(obj); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; - sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); - sev->policy = DEFAULT_GUEST_POLICY; - object_property_add_uint32_ptr(obj, "policy", &sev->policy, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "handle", &sev->handle, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "reduced-phys-bits", - &sev->reduced_phys_bits, - OBJ_PROP_FLAG_READWRITE); + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); } -/* sev guest info */ -static const TypeInfo sev_guest_info = { - .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, - .name = TYPE_SEV_GUEST, - .instance_size = sizeof(SevGuestState), - .instance_finalize = sev_guest_finalize, - .class_init = sev_guest_class_init, - .instance_init = sev_guest_instance_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } - } -}; - bool -sev_enabled(void) +sev_snp_enabled(void) { - return !!sev_guest; + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST); } bool sev_es_enabled(void) { - return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return sev_snp_enabled() || + (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES); } uint32_t sev_get_cbit_position(void) { - return sev_guest ? sev_guest->cbitpos : 0; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + return sev_common ? sev_common->cbitpos : 0; } uint32_t sev_get_reduced_phys_bits(void) { - return sev_guest ? sev_guest->reduced_phys_bits : 0; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + return sev_common ? sev_common->reduced_phys_bits : 0; } static SevInfo *sev_get_info(void) { SevInfo *info; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); info = g_new0(SevInfo, 1); info->enabled = sev_enabled(); if (info->enabled) { - info->api_major = sev_guest->api_major; - info->api_minor = sev_guest->api_minor; - info->build_id = sev_guest->build_id; - info->policy = sev_guest->policy; - info->state = sev_guest->state; - info->handle = sev_guest->handle; + info->api_major = sev_common->api_major; + info->api_minor = sev_common->api_minor; + info->build_id = sev_common->build_id; + info->state = sev_common->state; + + if (sev_snp_enabled()) { + info->sev_type = SEV_GUEST_TYPE_SEV_SNP; + info->u.sev_snp.snp_policy = + object_property_get_uint(OBJECT(sev_common), "policy", NULL); + } else { + info->sev_type = SEV_GUEST_TYPE_SEV; + info->u.sev.handle = SEV_GUEST(sev_common)->handle; + info->u.sev.policy = + (uint32_t)object_property_get_uint(OBJECT(sev_common), + "policy", NULL); + } } return info; @@ -586,20 +560,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) { SevInfo *info = sev_get_info(); - if (info && info->enabled) { - monitor_printf(mon, "handle: %d\n", info->handle); - monitor_printf(mon, "state: %s\n", SevState_str(info->state)); - monitor_printf(mon, "build: %d\n", info->build_id); - monitor_printf(mon, "api version: %d.%d\n", - info->api_major, info->api_minor); + if (!info || !info->enabled) { + monitor_printf(mon, "SEV is not enabled\n"); + goto out; + } + + monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type)); + monitor_printf(mon, "state: %s\n", SevState_str(info->state)); + monitor_printf(mon, "build: %d\n", info->build_id); + monitor_printf(mon, "api version: %d.%d\n", info->api_major, + info->api_minor); + + if (sev_snp_enabled()) { monitor_printf(mon, "debug: %s\n", - info->policy & SEV_POLICY_NODBG ? "off" : "on"); - monitor_printf(mon, "key-sharing: %s\n", - info->policy & SEV_POLICY_NOKS ? "off" : "on"); + info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on" + : "off"); + monitor_printf(mon, "SMT allowed: %s\n", + info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on" + : "off"); } else { - monitor_printf(mon, "SEV is not enabled\n"); + monitor_printf(mon, "handle: %d\n", info->u.sev.handle); + monitor_printf(mon, "debug: %s\n", + info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on"); + monitor_printf(mon, "key-sharing: %s\n", + info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on"); } +out: qapi_free_SevInfo(info); } @@ -690,6 +677,8 @@ static SevCapability *sev_get_capabilities(Error **errp) size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; uint32_t ebx; int fd; + SevCommonState *sev_common; + char *sev_device; if (!kvm_enabled()) { error_setg(errp, "KVM not enabled"); @@ -700,12 +689,22 @@ static SevCapability *sev_get_capabilities(Error **errp) return NULL; } - fd = open(DEFAULT_SEV_DEVICE, O_RDWR); - if (fd < 0) { + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common) { + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", + &error_abort); + } else { + sev_device = g_strdup(DEFAULT_SEV_DEVICE); + } + + fd = open(sev_device, O_RDWR); + if (fd < 0) { error_setg_errno(errp, errno, "SEV: Failed to open %s", - DEFAULT_SEV_DEVICE); + sev_device); + g_free(sev_device); return NULL; } + g_free(sev_device); if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, &cert_chain_data, &cert_chain_len, errp)) { @@ -743,12 +742,44 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) return sev_get_capabilities(errp); } +static OvmfSevMetadata *ovmf_sev_metadata_table; + +#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc" +typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset { + uint32_t offset; +} OvmfSevMetadataOffset; + +OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void) +{ + return ovmf_sev_metadata_table; +} + +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) +{ + OvmfSevMetadata *metadata; + OvmfSevMetadataOffset *data; + + if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data, + NULL)) { + return; + } + + metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset); + if (memcmp(metadata->signature, "ASEV", 4) != 0 || + metadata->len < sizeof(OvmfSevMetadata) || + metadata->len > flash_size - data->offset) { + return; + } + + ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len); +} + static SevAttestationReport *sev_get_attestation_report(const char *mnonce, Error **errp) { struct kvm_sev_attestation_report input = {}; SevAttestationReport *report = NULL; - SevGuestState *sev = sev_guest; + SevCommonState *sev_common; g_autofree guchar *data = NULL; g_autofree guchar *buf = NULL; gsize len; @@ -773,8 +804,10 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, return NULL; } + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + /* Query the report length */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, &input, &err); if (ret < 0) { if (err != SEV_RET_INVALID_LEN) { @@ -790,7 +823,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, memcpy(input.mnonce, buf, sizeof(input.mnonce)); /* Query the report */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, &input, &err); if (ret) { error_setg_errno(errp, errno, "SEV: Failed to get attestation report" @@ -830,26 +863,56 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) } static int -sev_launch_start(SevGuestState *sev) +sev_snp_launch_start(SevCommonState *sev_common) +{ + int fw_error, rc; + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; + + trace_kvm_sev_snp_launch_start(start->policy, + sev_snp_guest->guest_visible_workarounds); + + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return 1; + } + + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, + start, &fw_error); + if (rc < 0) { + error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'", + __func__, rc, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + QTAILQ_INIT(&launch_update); + + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); + + return 0; +} + +static int +sev_launch_start(SevCommonState *sev_common) { gsize sz; int ret = 1; int fw_error, rc; + SevGuestState *sev_guest = SEV_GUEST(sev_common); struct kvm_sev_launch_start start = { - .handle = sev->handle, .policy = sev->policy + .handle = sev_guest->handle, .policy = sev_guest->policy }; guchar *session = NULL, *dh_cert = NULL; - if (sev->session_file) { - if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { + if (sev_guest->session_file) { + if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { goto out; } start.session_uaddr = (unsigned long)session; start.session_len = sz; } - if (sev->dh_cert_file) { - if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { + if (sev_guest->dh_cert_file) { + if (sev_read_file_base64(sev_guest->dh_cert_file, &dh_cert, &sz) < 0) { goto out; } start.dh_uaddr = (unsigned long)dh_cert; @@ -857,15 +920,15 @@ sev_launch_start(SevGuestState *sev) } trace_kvm_sev_launch_start(start.policy, session, dh_cert); - rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); if (rc < 0) { error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); goto out; } - sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); - sev->handle = start.handle; + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); + sev_guest->handle = start.handle; ret = 0; out: @@ -874,8 +937,152 @@ out: return ret; } +static void +sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + SnpCpuidInfo *new) +{ + size_t i; + + if (old->count != new->count) { + error_report("SEV-SNP: CPUID validation failed due to count mismatch, " + "provided: %d, expected: %d", old->count, new->count); + return; + } + + for (i = 0; i < old->count; i++) { + SnpCpuidFunc *old_func, *new_func; + + old_func = &old->entries[i]; + new_func = &new->entries[i]; + + if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { + error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x, " + "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x, " + "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", + old_func->eax_in, old_func->ecx_in, + old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, + new_func->eax, new_func->ebx, new_func->ecx, new_func->edx); + } + } +} + +static const char * +snp_page_type_to_str(int type) +{ + switch (type) { + case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal"; + case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero"; + case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured"; + case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets"; + case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid"; + default: return "unknown"; + } +} + +static int +sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + SevLaunchUpdateData *data) +{ + int ret, fw_error; + SnpCpuidInfo snp_cpuid_info; + struct kvm_sev_snp_launch_update update = {0}; + + if (!data->hva || !data->len) { + error_report("SNP_LAUNCH_UPDATE called with invalid address" + "/ length: %p / %zx", + data->hva, data->len); + return 1; + } + + if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + /* Save a copy for comparison in case the LAUNCH_UPDATE fails */ + memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info)); + } + + update.uaddr = (__u64)(unsigned long)data->hva; + update.gfn_start = data->gpa >> TARGET_PAGE_BITS; + update.len = data->len; + update.type = data->type; + + /* + * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private + * memory attribute set in advance. + */ + ret = kvm_set_memory_attributes_private(data->gpa, data->len); + if (ret) { + error_report("SEV-SNP: failed to configure initial" + "private guest memory"); + goto out; + } + + while (update.len || ret == -EAGAIN) { + trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start << + TARGET_PAGE_BITS, update.len, + snp_page_type_to_str(update.type)); + + ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd, + KVM_SEV_SNP_LAUNCH_UPDATE, + &update, &fw_error); + if (ret && ret != -EAGAIN) { + error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + + if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva); + error_report("SEV-SNP: failed update CPUID page"); + } + break; + } + } + +out: + if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { + error_report("SEV-SNP: expected update of GPA range %" + HWADDR_PRIx "-%" HWADDR_PRIx "," + "got GPA range %" HWADDR_PRIx "-%llx", + data->gpa, data->gpa + data->len, data->gpa, + update.gfn_start << TARGET_PAGE_BITS); + ret = -EIO; + } + + return ret; +} + +static uint32_t +sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + switch (feature) { + case 1: + if (reg == R_ECX) { + return value & ~CPUID_EXT_TSC_DEADLINE_TIMER; + } + break; + case 7: + if (index == 0 && reg == R_EBX) { + return value & ~CPUID_7_0_EBX_TSC_ADJUST; + } + if (index == 0 && reg == R_EDX) { + return value & ~(CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | + CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | + CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD); + } + break; + case 0x80000008: + if (reg == R_EBX) { + return value & ~CPUID_8000_0008_EBX_VIRT_SSBD; + } + break; + } + return value; +} + static int -sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) +sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *addr, size_t len) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -887,7 +1094,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) update.uaddr = (__u64)(unsigned long)addr; update.len = len; trace_kvm_sev_launch_update_data(addr, len); - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", @@ -898,11 +1105,12 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) } static int -sev_launch_update_vmsa(SevGuestState *sev) +sev_launch_update_vmsa(SevGuestState *sev_guest) { int ret, fw_error; - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, + NULL, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -917,12 +1125,13 @@ csv_load_launch_secret(const char *secret_header_file, const char *secret_file); static void sev_launch_get_measure(Notifier *notifier, void *unused) { - SevGuestState *sev = sev_guest; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = SEV_GUEST(sev_common); int ret, error; g_autofree guchar *data = NULL; struct kvm_sev_launch_measure measurement = {}; - if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { + if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { return; } @@ -931,7 +1140,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) ret = csv3_launch_encrypt_vmcb(); } else { /* measure all the VM save areas before getting launch_measure */ - ret = sev_launch_update_vmsa(sev); + ret = sev_launch_update_vmsa(sev_guest); } if (ret) { exit(1); @@ -940,7 +1149,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) } /* query the measurement blob length */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, &measurement, &error); if (!measurement.len) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", @@ -952,7 +1161,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) measurement.uaddr = (unsigned long)data; /* get the measurement blob */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, &measurement, &error); if (ret) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", @@ -960,26 +1169,30 @@ sev_launch_get_measure(Notifier *notifier, void *unused) return; } - sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_SECRET); /* encode the measurement value and emit the event */ - sev->measurement = g_base64_encode(data, measurement.len); - trace_kvm_sev_launch_measurement(sev->measurement); + sev_guest->measurement = g_base64_encode(data, measurement.len); + trace_kvm_sev_launch_measurement(sev_guest->measurement); /* Hygon CSV will auto load guest owner's secret */ if (is_hygon_cpu()) { - if (sev->secret_header_file && - strlen(sev->secret_header_file) && - sev->secret_file && - strlen(sev->secret_file)) - csv_load_launch_secret(sev->secret_header_file, sev->secret_file); + if (sev_guest->secret_header_file && + strlen(sev_guest->secret_header_file) && + sev_guest->secret_file && + strlen(sev_guest->secret_file)) + csv_load_launch_secret(sev_guest->secret_header_file, sev_guest->secret_file); } } static char *sev_get_launch_measurement(void) { + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + SevGuestState *sev_guest = + (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + if (sev_guest && - sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { + SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { return g_strdup(sev_guest->measurement); } @@ -1008,19 +1221,20 @@ static Notifier sev_machine_done_notify = { }; static void -sev_launch_finish(SevGuestState *sev) +sev_launch_finish(SevCommonState *sev_common) { int ret, error; trace_kvm_sev_launch_finish(); - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + &error); if (ret) { error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); exit(1); } - sev_set_guest_state(sev, SEV_STATE_RUNNING); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); /* add migration blocker */ error_setg(&sev_mig_blocker, @@ -1035,37 +1249,282 @@ sev_del_migrate_blocker(void) } static int -sev_receive_finish(SevGuestState *s) +sev_receive_finish(SevCommonState *sev_common) { int error, ret = 1; trace_kvm_sev_receive_finish(); - ret = sev_ioctl(s->sev_fd, KVM_SEV_RECEIVE_FINISH, 0, &error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_FINISH, 0, &error); if (ret) { error_report("%s: RECEIVE_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); goto err; } - sev_set_guest_state(s, SEV_STATE_RUNNING); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); err: return ret; } +static int +snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) +{ + SevLaunchUpdateData *data; + + data = g_new0(SevLaunchUpdateData, 1); + data->gpa = gpa; + data->hva = hva; + data->len = len; + data->type = type; + + QTAILQ_INSERT_TAIL(&launch_update, data, next); + + return 0; +} + +static int +sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len) +{ + int ret = snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_NORMAL); + return ret; +} + +static int +sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, + const KvmCpuidInfo *kvm_cpuid_info) +{ + size_t i; + + if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { + error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", + kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); + return -1; + } + + memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info)); + + for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) { + const struct kvm_cpuid_entry2 *kvm_cpuid_entry; + SnpCpuidFunc *snp_cpuid_entry; + + kvm_cpuid_entry = &kvm_cpuid_info->entries[i]; + snp_cpuid_entry = &snp_cpuid_info->entries[i]; + + snp_cpuid_entry->eax_in = kvm_cpuid_entry->function; + if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) { + snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index; + } + snp_cpuid_entry->eax = kvm_cpuid_entry->eax; + snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx; + snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx; + snp_cpuid_entry->edx = kvm_cpuid_entry->edx; + + /* + * Guest kernels will calculate EBX themselves using the 0xD + * subfunctions corresponding to the individual XSAVE areas, so only + * encode the base XSAVE size in the initial leaves, corresponding + * to the initial XCR0=1 state. + */ + if (snp_cpuid_entry->eax_in == 0xD && + (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) { + snp_cpuid_entry->ebx = 0x240; + snp_cpuid_entry->xcr0_in = 1; + snp_cpuid_entry->xss_in = 0; + } + } + + snp_cpuid_info->count = i; + + return 0; +} + +static int +snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) +{ + KvmCpuidInfo kvm_cpuid_info = {0}; + SnpCpuidInfo snp_cpuid_info; + CPUState *cs = first_cpu; + int ret; + uint32_t i = 0; + + assert(sizeof(snp_cpuid_info) <= cpuid_len); + + /* get the cpuid list from KVM */ + do { + kvm_cpuid_info.cpuid.nent = ++i; + ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info); + } while (ret == -E2BIG); + + if (ret) { + error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", + strerror(-ret)); + return 1; + } + + ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); + if (ret) { + error_report("SEV-SNP: failed to generate CPUID table information"); + return 1; + } + + memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); + + return snp_launch_update_data(cpuid_addr, hva, cpuid_len, + KVM_SEV_SNP_PAGE_TYPE_CPUID); +} + +static int +snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, + void *hva, uint32_t len) +{ + int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; + if (sev_snp->parent_obj.kernel_hashes) { + assert(sev_snp->kernel_hashes_data); + assert((sev_snp->kernel_hashes_offset + + sizeof(*sev_snp->kernel_hashes_data)) <= len); + memset(hva, 0, len); + memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data, + sizeof(*sev_snp->kernel_hashes_data)); + type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; + } + return snp_launch_update_data(addr, hva, len, type); +} + +static int +snp_metadata_desc_to_page_type(int desc_type) +{ + switch (desc_type) { + /* Add the umeasured prevalidated pages as a zero page */ + case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO; + case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS; + case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID; + default: + return KVM_SEV_SNP_PAGE_TYPE_ZERO; + } +} + +static void +snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + OvmfSevMetadata *metadata) +{ + OvmfSevMetadataDesc *desc; + int type, ret, i; + void *hva; + MemoryRegion *mr = NULL; + + for (i = 0; i < metadata->num_desc; i++) { + desc = &metadata->descs[i]; + + type = snp_metadata_desc_to_page_type(desc->type); + + hva = gpa2hva(&mr, desc->base, desc->len, NULL); + if (!hva) { + error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x", + __func__, desc->base, desc->len); + exit(1); + } + + if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { + ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, + desc->len); + } else { + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + } + + if (ret) { + error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", + __func__, desc->base, desc->len, desc->type); + exit(1); + } + } +} + +static void +sev_snp_launch_finish(SevCommonState *sev_common) +{ + int ret, error; + Error *local_err = NULL; + OvmfSevMetadata *metadata; + SevLaunchUpdateData *data; + SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + + /* + * To boot the SNP guest, the hypervisor is required to populate the CPUID + * and Secrets page before finalizing the launch flow. The location of + * the secrets and CPUID page is available through the OVMF metadata GUID. + */ + metadata = pc_system_get_ovmf_sev_metadata_ptr(); + if (metadata == NULL) { + error_report("%s: Failed to locate SEV metadata header", __func__); + exit(1); + } + + /* Populate all the metadata pages */ + snp_populate_metadata_pages(sev_snp, metadata); + + QTAILQ_FOREACH(data, &launch_update, next) { + ret = sev_snp_launch_update(sev_snp, data); + if (ret) { + exit(1); + } + } + + trace_kvm_sev_snp_launch_finish(sev_snp->id_block_base64, sev_snp->id_auth_base64, + sev_snp->host_data); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, + finish, &error); + if (ret) { + error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'", + ret, error, fw_error_to_str(error)); + exit(1); + } + + kvm_mark_guest_state_protected(); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, + "SEV-SNP: Migration is not implemented"); + ret = migrate_add_blocker(&sev_mig_blocker, &local_err); + if (local_err) { + error_report_err(local_err); + error_free(sev_mig_blocker); + exit(1); + } +} + + static void sev_vm_state_change(void *opaque, bool running, RunState state) { - SevGuestState *sev = opaque; + SevCommonState *sev_common = opaque; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque); if (running) { - if (sev_check_state(sev, SEV_STATE_RECEIVE_UPDATE)) { - sev_receive_finish(sev); - } else if (!sev_check_state(sev, SEV_STATE_RUNNING)) { - sev_launch_finish(sev); + if (sev_check_state(sev_common, SEV_STATE_RECEIVE_UPDATE)) { + sev_receive_finish(sev_common); + } else if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { + klass->launch_finish(sev_common); } } } +/* + * This helper is to examine sev-guest properties and determine if any options + * have been set which rely on the newer KVM_SEV_INIT2 interface and associated + * KVM VM types. + */ +static bool sev_init2_required(SevGuestState *sev_guest) +{ + /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */ + return false; +} + static inline bool check_blob_length(size_t value) { if (value > SEV_FW_BLOB_MAX_SIZE) { @@ -1080,51 +1539,52 @@ static inline bool check_blob_length(size_t value) int sev_save_setup(const char *pdh, const char *plat_cert, const char *amd_cert) { - SevGuestState *s = sev_guest; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = SEV_GUEST(sev_common); if (is_hygon_cpu()) { - if (sev_read_file_base64(pdh, &s->remote_pdh, - &s->remote_pdh_len) < 0) { + if (sev_read_file_base64(pdh, &sev_guest->remote_pdh, + &sev_guest->remote_pdh_len) < 0) { goto error; } } else { - s->remote_pdh = g_base64_decode(pdh, &s->remote_pdh_len); + sev_guest->remote_pdh = g_base64_decode(pdh, &sev_guest->remote_pdh_len); } - if (!check_blob_length(s->remote_pdh_len)) { + if (!check_blob_length(sev_guest->remote_pdh_len)) { goto error; } if (is_hygon_cpu()) { - if (sev_read_file_base64(plat_cert, &s->remote_plat_cert, - &s->remote_plat_cert_len) < 0) { + if (sev_read_file_base64(plat_cert, &sev_guest->remote_plat_cert, + &sev_guest->remote_plat_cert_len) < 0) { goto error; } } else { - s->remote_plat_cert = g_base64_decode(plat_cert, - &s->remote_plat_cert_len); + sev_guest->remote_plat_cert = g_base64_decode(plat_cert, + &sev_guest->remote_plat_cert_len); } - if (!check_blob_length(s->remote_plat_cert_len)) { + if (!check_blob_length(sev_guest->remote_plat_cert_len)) { goto error; } if (is_hygon_cpu()) { - if (sev_read_file_base64(amd_cert, &s->amd_cert, - &s->amd_cert_len) < 0) { + if (sev_read_file_base64(amd_cert, &sev_guest->amd_cert, + &sev_guest->amd_cert_len) < 0) { goto error; } } else { - s->amd_cert = g_base64_decode(amd_cert, &s->amd_cert_len); + sev_guest->amd_cert = g_base64_decode(amd_cert, &sev_guest->amd_cert_len); } - if (!check_blob_length(s->amd_cert_len)) { + if (!check_blob_length(sev_guest->amd_cert_len)) { goto error; } return 0; error: - g_free(s->remote_pdh); - g_free(s->remote_plat_cert); - g_free(s->amd_cert); + g_free(sev_guest->remote_pdh); + g_free(sev_guest->remote_plat_cert); + g_free(sev_guest->amd_cert); return 1; } @@ -1133,9 +1593,11 @@ static void sev_send_finish(void) { int ret, error; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = SEV_GUEST(sev_common); trace_kvm_sev_send_finish(); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_FINISH, 0, &error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_FINISH, 0, &error); if (ret) { error_report("%s: SEND_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); @@ -1145,7 +1607,7 @@ sev_send_finish(void) if (sev_es_enabled() && is_hygon_cpu()) { g_free(sev_guest->send_vmsa_packet_hdr); } - sev_set_guest_state(sev_guest, SEV_STATE_RUNNING); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); } static void @@ -1156,7 +1618,8 @@ sev_migration_state_notifier(Notifier *notifier, void *data) if (migration_has_finished(s) || migration_in_postcopy_after_devices(s) || migration_has_failed(s)) { - if (sev_check_state(sev_guest, SEV_STATE_SEND_UPDATE)) { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_check_state(sev_common, SEV_STATE_SEND_UPDATE)) { sev_send_finish(); } } @@ -1164,31 +1627,74 @@ sev_migration_state_notifier(Notifier *notifier, void *data) static Notifier sev_migration_state; -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int sev_kvm_type(X86ConfidentialGuest *cg) +{ + SevCommonState *sev_common = SEV_COMMON(cg); + SevGuestState *sev_guest = SEV_GUEST(sev_common); + int kvm_type; + + if (sev_common->kvm_type != -1) { + goto out; + } + + /* These are the only cases where legacy VM types can be used. */ + if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON || + (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO && + !sev_init2_required(sev_guest))) { + sev_common->kvm_type = KVM_X86_DEFAULT_VM; + goto out; + } + + /* + * Newer VM types are required, either explicitly via legacy-vm-type=on, or + * implicitly via legacy-vm-type=auto along with additional sev-guest + * properties that require the newer VM types. + */ + kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? + KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + if (!kvm_is_vm_type_supported(kvm_type)) { + if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) { + error_report("SEV: host kernel does not support requested %s VM type, which is required " + "for the set of options specified. To allow use of the legacy " + "KVM_X86_DEFAULT_VM VM type, please disable any options that are not " + "compatible with the legacy VM type, or upgrade your kernel.", + kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); + } else { + error_report("SEV: host kernel does not support requested %s VM type. To allow use of " + "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument " + "must be set to 'on' or 'auto' for the sev-guest object.", + kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); + } + + return -1; + } + + sev_common->kvm_type = kvm_type; +out: + return sev_common->kvm_type; +} + +static int sev_snp_kvm_type(X86ConfidentialGuest *cg) +{ + return KVM_X86_SNP_VM; +} + +static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevGuestState *sev - = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); char *devname; int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; - - if (!sev) { - return 0; - } + SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + X86ConfidentialGuestClass *x86_klass = + X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); ConfidentialGuestSupportClass *cgs_class = (ConfidentialGuestSupportClass *) object_get_class(OBJECT(cgs)); - ret = ram_block_discard_disable(true); - if (ret) { - error_report("%s: cannot disable RAM discard", __func__); - return -1; - } - - sev_guest = sev; - sev->state = SEV_STATE_UNINIT; + sev_common->state = SEV_STATE_UNINIT; host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); host_cbitpos = ebx & 0x3f; @@ -1198,10 +1704,10 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * register of CPUID 0x8000001F. No need to verify the range as the * comparison against the host value accomplishes that. */ - if (host_cbitpos != sev->cbitpos) { + if (host_cbitpos != sev_common->cbitpos) { error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", - __func__, host_cbitpos, sev->cbitpos); - goto err; + __func__, host_cbitpos, sev_common->cbitpos); + return -1; } /* @@ -1209,47 +1715,50 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * the EBX register of CPUID 0x8000001F, so verify the supplied value * is in the range of 1 to 63. */ - if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { + if (sev_common->reduced_phys_bits < 1 || + sev_common->reduced_phys_bits > 63) { error_setg(errp, "%s: reduced_phys_bits check failed," " it should be in the range of 1 to 63, requested '%d'", - __func__, sev->reduced_phys_bits); - goto err; + __func__, sev_common->reduced_phys_bits); + return -1; } - devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); - sev->sev_fd = open(devname, O_RDWR); - if (sev->sev_fd < 0) { + devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); + sev_common->sev_fd = open(devname, O_RDWR); + if (sev_common->sev_fd < 0) { error_setg(errp, "%s: Failed to open %s '%s'", __func__, devname, strerror(errno)); g_free(devname); - goto err; + return -1; } g_free(devname); - ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, + ret = sev_platform_ioctl(sev_common->sev_fd, SEV_PLATFORM_STATUS, &status, &fw_error); if (ret) { error_setg(errp, "%s: failed to get platform status ret=%d " "fw_error='%d: %s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); - goto err; + return -1; } - sev->build_id = status.build; - sev->api_major = status.api_major; - sev->api_minor = status.api_minor; + sev_common->build_id = status.build; + sev_common->api_major = status.api_major; + sev_common->api_minor = status.api_minor; if (sev_es_enabled()) { if (!kvm_kernel_irqchip_allowed()) { - error_report("%s: SEV-ES guests require in-kernel irqchip support", - __func__); - goto err; + error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" + "support", __func__); + return -1; } + } + if (sev_es_enabled() && !sev_snp_enabled()) { if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { - error_report("%s: guest policy requires SEV-ES, but " + error_setg(errp, "%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", __func__); - goto err; + return -1; } cmd = KVM_SEV_ES_INIT; } else { @@ -1257,110 +1766,200 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } trace_kvm_sev_init(); - /* Only support reuse asid for CSV/CSV2 guest */ - if (is_hygon_cpu() && - (sev_guest->policy & GUEST_POLICY_REUSE_ASID)) { - char *user_id = NULL; - struct kvm_csv_init *init_cmd_buf = NULL; - - user_id = object_property_get_str(OBJECT(sev), "user-id", NULL); - if (user_id && strlen(user_id)) { - init_cmd_buf = g_new0(struct kvm_csv_init, 1); - init_cmd_buf->len = strlen(user_id); - init_cmd_buf->userid_addr = (__u64)user_id; - } - ret = sev_ioctl(sev->sev_fd, cmd, init_cmd_buf, &fw_error); - - if (user_id) { - g_free(user_id); - g_free(init_cmd_buf); + if (is_hygon_cpu()) { + SevGuestState *sev_guest = SEV_GUEST(sev_common); + if (sev_guest->policy & GUEST_POLICY_REUSE_ASID) { + char *user_id = NULL; + struct kvm_csv_init *init_cmd_buf = NULL; + + user_id = object_property_get_str(OBJECT(sev_guest), "user-id", NULL); + if (user_id && strlen(user_id)) { + init_cmd_buf = g_new0(struct kvm_csv_init, 1); + init_cmd_buf->len = strlen(user_id); + init_cmd_buf->userid_addr = (__u64)user_id; + } + ret = sev_ioctl(sev_common->sev_fd, cmd, init_cmd_buf, &fw_error); + + if (user_id) { + g_free(user_id); + g_free(init_cmd_buf); + } } } else { - ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { + case KVM_X86_DEFAULT_VM: + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); + break; + case KVM_X86_SEV_VM: + case KVM_X86_SEV_ES_VM: + case KVM_X86_SNP_VM: { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + break; + } + default: + error_setg(errp, "%s: host kernel does not support the requested SEV configuration.", + __func__); + return -1; + } } if (ret) { error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); - goto err; + return -1; } - /* Support CSV3 */ - if (!ret && cmd == KVM_SEV_ES_INIT) { - ret = csv3_init(sev_guest->policy, sev->sev_fd, (void *)&sev->state, &sev_ops); - if (ret) { - error_setg(errp, "%s: failed to init csv3 context", __func__); - goto err; + if (!sev_snp_enabled()) { + SevGuestState *sev_guest = SEV_GUEST(sev_common); + /* Support CSV3 */ + if (!ret && cmd == KVM_SEV_ES_INIT) { + ret = csv3_init(sev_guest->policy, sev_common->sev_fd, (void *)&sev_common->state, &sev_ops); + if (ret) { + error_setg(errp, "%s: failed to init csv3 context", __func__); + return -1; + } + /* The CSV3 guest is not resettable */ + if (csv3_enabled()) + csv_kvm_cpu_reset_inhibit = true; } - /* The CSV3 guest is not resettable */ - if (csv3_enabled()) - csv_kvm_cpu_reset_inhibit = true; - } - /* - * The LAUNCH context is used for new guest, if its an incoming guest - * then RECEIVE context will be created after the connection is established. - */ - if (!runstate_check(RUN_STATE_INMIGRATE)) { - ret = sev_launch_start(sev); + /* + * The LAUNCH context is used for new guest, if its an incoming guest + * then RECEIVE context will be created after the connection is established. + */ + if (!runstate_check(RUN_STATE_INMIGRATE)) { + ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + return -1; + } + if (klass->kvm_init && klass->kvm_init(cgs, errp)) { + return -1; + } + } else { + /* + * The CSV2 guest is not resettable after migrated to target machine, + * set csv_kvm_cpu_reset_inhibit to true to indicate the CSV2 guest is + * not resettable. + */ + if (is_hygon_cpu() && sev_es_enabled()) { + csv_kvm_cpu_reset_inhibit = true; + } + } + } else { + ret = klass->launch_start(sev_common); + + if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); - goto err; + return -1; } - } else { - /* - * The CSV2 guest is not resettable after migrated to target machine, - * set csv_kvm_cpu_reset_inhibit to true to indicate the CSV2 guest is - * not resettable. - */ - if (is_hygon_cpu() && sev_es_enabled()) { - csv_kvm_cpu_reset_inhibit = true; + if (klass->kvm_init && klass->kvm_init(cgs, errp)) { + return -1; + } + } + + qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + if (!sev_snp_enabled()) { + SevGuestState *sev_guest = SEV_GUEST(sev_common); + migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); + + if (csv3_enabled()) { + cgs_class->memory_encryption_ops = &csv3_memory_encryption_ops; + } else { + cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; } + QTAILQ_INIT(&sev_guest->shared_regions_list); + + /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ + if (sev_es_enabled()) { + sev_kvm_has_msr_ghcb = + kvm_vm_check_extension(kvm_state, KVM_CAP_SEV_ES_GHCB); + } else { + sev_kvm_has_msr_ghcb = false; + } + } + + cgs->ready = true; + + return 0; +} + +static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + int ret; + + /* + * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding + * isn't actually possible. With SNP, only guest_memfd pages are used + * for private guest memory, so discarding of shared memory is still + * possible.. + */ + ret = ram_block_discard_disable(true); + if (ret) { + error_setg(errp, "%s: cannot disable RAM discard", __func__); + return -1; } + /* + * SEV uses these notifiers to register/pin pages prior to guest use, + * but SNP relies on guest_memfd for private pages, which has its + * own internal mechanisms for registering/pinning private memory. + */ /* CSV3 guest do not need notifier to reg/unreg memory */ if (!csv3_enabled()) { ram_block_notifier_add(&sev_ram_notifier); } + + /* + * The machine done notify event is used for SEV guests to get the + * measurement of the encrypted images. When SEV-SNP is enabled, the + * measurement is part of the guest attestation process where it can + * be collected without any reliance on the VMM. So skip registering + * the notifier for SNP in favor of using guest attestation instead. + */ qemu_add_machine_init_done_notifier(&sev_machine_done_notify); - qemu_add_vm_change_state_handler(sev_vm_state_change, sev); - migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); - if (csv3_enabled()) { - cgs_class->memory_encryption_ops = &csv3_memory_encryption_ops; - } else { - cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; - } - QTAILQ_INIT(&sev->shared_regions_list); + return 0; +} - /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ - if (sev_es_enabled()) { - sev_kvm_has_msr_ghcb = - kvm_vm_check_extension(kvm_state, KVM_CAP_SEV_ES_GHCB); - } else { - sev_kvm_has_msr_ghcb = false; - } +static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); - cgs->ready = true; + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "SEV-SNP does not support SMM."); + return -1; + } return 0; -err: - sev_guest = NULL; - ram_block_discard_disable(false); - return -1; } int -sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) +sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { - if (!sev_guest) { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass; + + if (!sev_common) { return 0; } + klass = SEV_COMMON_GET_CLASS(sev_common); /* if SEV is in update state then encrypt the data else do nothing */ - if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { - int ret = sev_launch_update_data(sev_guest, ptr, len); + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { + int ret; + + ret = klass->launch_update_data(sev_common, gpa, ptr, len); if (ret < 0) { error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; @@ -1380,16 +1979,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, void *hva; gsize hdr_sz = 0, data_sz = 0; MemoryRegion *mr = NULL; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - if (!sev_guest) { + if (!sev_common) { error_setg(errp, "SEV not enabled for guest"); return 1; } /* secret can be injected only in this state */ - if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { + if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_SECRET)) { error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", - sev_guest->state); + sev_common->state); return 1; } @@ -1433,7 +2033,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, input.trans_uaddr, input.trans_len); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_SECRET, &input, &error); if (ret) { error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", @@ -1540,9 +2140,12 @@ void sev_es_set_reset_vector(CPUState *cpu) { X86CPU *x86; CPUX86State *env; + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + SevCommonState *sev_common = SEV_COMMON( + object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); /* Only update if we have valid reset information */ - if (!sev_guest || !sev_guest->reset_data_valid) { + if (!sev_common || !sev_common->reset_data_valid) { return; } @@ -1554,11 +2157,11 @@ void sev_es_set_reset_vector(CPUState *cpu) x86 = X86_CPU(cpu); env = &x86->env; - cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, + cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); - env->eip = sev_guest->reset_ip; + env->eip = sev_common->reset_ip; } int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) @@ -1566,6 +2169,7 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) CPUState *cpu; uint32_t addr; int ret; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_es_enabled()) { return 0; @@ -1579,9 +2183,9 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) } if (addr) { - sev_guest->reset_cs = addr & 0xffff0000; - sev_guest->reset_ip = addr & 0x0000ffff; - sev_guest->reset_data_valid = true; + sev_common->reset_cs = addr & 0xffff0000; + sev_common->reset_ip = addr & 0x0000ffff; + sev_common->reset_data_valid = true; CPU_FOREACH(cpu) { sev_es_set_reset_vector(cpu); @@ -1596,8 +2200,9 @@ sev_get_send_session_length(void) { int ret, fw_err = 0; struct kvm_sev_send_start start = {}; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_START, &start, &fw_err); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_START, &start, &fw_err); if (fw_err != SEV_RET_INVALID_LEN) { ret = -1; error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", @@ -1618,6 +2223,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) struct kvm_sev_send_start start = { }; guchar *pdh = NULL, *plat_cert = NULL, *session = NULL; Error *local_err = NULL; + SevCommonState *sev_common = SEV_COMMON(s); if (!s->remote_pdh || !s->remote_plat_cert || !s->amd_cert_len) { error_report("%s: missing remote PDH or PLAT_CERT", __func__); @@ -1645,7 +2251,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) start.session_len = session_len; /* Get our PDH certificate */ - ret = sev_get_pdh_info(s->sev_fd, &pdh, &pdh_len, + ret = sev_get_pdh_info(sev_common->sev_fd, &pdh, &pdh_len, &plat_cert, &plat_cert_len, &local_err); if (ret) { error_report("Failed to get our PDH cert"); @@ -1656,7 +2262,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) start.plat_certs_uaddr, start.plat_certs_len, start.amd_certs_uaddr, start.amd_certs_len); - ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_START, &start, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_START, &start, &fw_error); if (ret < 0) { error_report("%s: SEND_START ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -1670,7 +2276,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) qemu_put_buffer(f, (uint8_t *)start.session_uaddr, start.session_len); *bytes_sent = 12 + pdh_len + start.session_len; - sev_set_guest_state(s, SEV_STATE_SEND_UPDATE); + sev_set_guest_state(sev_common, SEV_STATE_SEND_UPDATE); err: g_free(pdh); @@ -1683,8 +2289,9 @@ sev_send_get_packet_len(int *fw_err) { int ret; struct kvm_sev_send_update_data update = { 0, }; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_DATA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, fw_err); if (*fw_err != SEV_RET_INVALID_LEN) { ret = 0; @@ -1706,6 +2313,7 @@ sev_send_update_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, int ret, fw_error; guchar *trans; struct kvm_sev_send_update_data update = { }; + SevCommonState *sev_common = SEV_COMMON(s); /* * If this is first call then query the packet header bytes and allocate @@ -1734,7 +2342,7 @@ sev_send_update_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, trace_kvm_sev_send_update_data(ptr, trans, size); - ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: SEND_UPDATE_DATA ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -1757,13 +2365,13 @@ err: int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, uint32_t sz, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* * If this is a first buffer then create outgoing encryption context * and write our PDH, policy and session data. */ - if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_SEND_UPDATE) && sev_send_start(s, f, bytes_sent)) { error_report("Failed to create outgoing context"); return 1; @@ -1779,6 +2387,7 @@ sev_receive_start(SevGuestState *sev, QEMUFile *f) int fw_error; struct kvm_sev_receive_start start = { }; gchar *session = NULL, *pdh_cert = NULL; + SevCommonState *sev_common = SEV_COMMON(sev); /* get SEV guest handle */ start.handle = object_property_get_int(OBJECT(sev), "handle", @@ -1808,7 +2417,7 @@ sev_receive_start(SevGuestState *sev, QEMUFile *f) trace_kvm_sev_receive_start(start.policy, session, pdh_cert); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_START, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_START, &start, &fw_error); if (ret < 0) { error_report("Error RECEIVE_START ret=%d fw_error=%d '%s'", @@ -1817,7 +2426,7 @@ sev_receive_start(SevGuestState *sev, QEMUFile *f) } object_property_set_int(OBJECT(sev), "handle", start.handle, &error_abort); - sev_set_guest_state(sev, SEV_STATE_RECEIVE_UPDATE); + sev_set_guest_state(sev_common, SEV_STATE_RECEIVE_UPDATE); err: g_free(session); g_free(pdh_cert); @@ -1830,6 +2439,7 @@ static int sev_receive_update_data(QEMUFile *f, uint8_t *ptr) int ret = 1, fw_error = 0; gchar *hdr = NULL, *trans = NULL; struct kvm_sev_receive_update_data update = {}; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* get packet header */ update.hdr_len = qemu_get_be32(f); @@ -1857,7 +2467,7 @@ static int sev_receive_update_data(QEMUFile *f, uint8_t *ptr) trace_kvm_sev_receive_update_data(trans, ptr, update.guest_len, hdr, update.hdr_len); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_DATA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("Error RECEIVE_UPDATE_DATA ret=%d fw_error=%d '%s'", @@ -1872,13 +2482,13 @@ err: int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* * If this is first buffer and SEV is not in recieiving state then * use RECEIVE_START command to create a encryption context. */ - if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_RECEIVE_UPDATE) && sev_receive_start(s, f)) { return 1; } @@ -1888,7 +2498,7 @@ int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) int sev_remove_shared_regions_list(unsigned long start, unsigned long end) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *pos, *next_pos; QTAILQ_FOREACH_SAFE(pos, &s->shared_regions_list, list, next_pos) { @@ -1936,7 +2546,7 @@ int sev_add_shared_regions_list(unsigned long start, unsigned long end) { struct shared_region *shrd_region; struct shared_region *pos; - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); if (QTAILQ_EMPTY(&s->shared_regions_list)) { shrd_region = g_malloc0(sizeof(*shrd_region)); @@ -1987,7 +2597,7 @@ int sev_add_shared_regions_list(unsigned long start, unsigned long end) int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *pos; QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { @@ -2004,7 +2614,7 @@ int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent) int sev_load_incoming_shared_regions_list(QEMUFile *f) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *shrd_region; int status; @@ -2027,7 +2637,7 @@ int sev_load_incoming_shared_regions_list(QEMUFile *f) bool sev_is_gfn_in_unshared_region(unsigned long gfn) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *pos; QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { @@ -2255,11 +2865,12 @@ csv_command_batch(uint32_t cmd_id, uint64_t head_uaddr, int *fw_err) { int ret; struct kvm_csv_command_batch command_batch = { }; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); command_batch.command_id = cmd_id; command_batch.csv_batch_list_uaddr = head_uaddr; - ret = sev_ioctl(sev_guest->sev_fd, KVM_CSV_COMMAND_BATCH, + ret = sev_ioctl(sev_common->sev_fd, KVM_CSV_COMMAND_BATCH, &command_batch, fw_err); if (ret) { error_report("%s: COMMAND_BATCH ret=%d fw_err=%d '%s'", @@ -2334,7 +2945,7 @@ csv_receive_update_data_batch(SevGuestState *s) int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2347,7 +2958,7 @@ csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2359,7 +2970,7 @@ int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) * If this is first buffer and SEV is not in recieiving state then * use RECEIVE_START command to create a encryption context. */ - if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_RECEIVE_UPDATE) && sev_receive_start(s, f)) { return 1; } @@ -2370,7 +2981,7 @@ int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) int csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2382,7 +2993,7 @@ csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) * If this is a first buffer then create outgoing encryption context * and write our PDH, policy and session data. */ - if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_SEND_UPDATE) && sev_send_start(s, f, bytes_sent)) { error_report("Failed to create outgoing context"); return 1; @@ -2393,7 +3004,7 @@ csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) int csv_load_queued_incoming_pages(QEMUFile *f) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2409,8 +3020,9 @@ sev_send_vmsa_get_packet_len(int *fw_err) { int ret; struct kvm_sev_send_update_vmsa update = { 0, }; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, fw_err); if (*fw_err != SEV_RET_INVALID_LEN) { ret = 0; @@ -2432,6 +3044,7 @@ sev_send_update_vmsa(SevGuestState *s, QEMUFile *f, uint32_t cpu_id, int ret, fw_error; guchar *trans = NULL; struct kvm_sev_send_update_vmsa update = {}; + SevCommonState *sev_common = SEV_COMMON(s); /* * If this is first call then query the packet header bytes and allocate @@ -2459,7 +3072,7 @@ sev_send_update_vmsa(SevGuestState *s, QEMUFile *f, uint32_t cpu_id, trace_kvm_sev_send_update_vmsa(cpu_id, cpu_index, trans, size); - ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, &fw_error); if (ret) { error_report("%s: SEND_UPDATE_VMSA ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -2489,7 +3102,7 @@ err: int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); CPUState *cpu; int ret = 0; @@ -2522,6 +3135,7 @@ static int sev_receive_update_vmsa(QEMUFile *f) uint32_t cpu_index, cpu_id = 0; gchar *hdr = NULL, *trans = NULL; struct kvm_sev_receive_update_vmsa update = {}; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* get cpu index buffer */ assert(qemu_get_be32(f) == sizeof(uint32_t)); @@ -2558,7 +3172,7 @@ static int sev_receive_update_vmsa(QEMUFile *f) trace_kvm_sev_receive_update_vmsa(cpu_id, cpu_index, trans, update.trans_len, hdr, update.hdr_len); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_VMSA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_UPDATE_VMSA, &update, &fw_error); if (ret) { error_report("Error RECEIVE_UPDATE_VMSA ret=%d fw_error=%d '%s'", @@ -2655,44 +3269,16 @@ static const QemuUUID sev_cmdline_entry_guid = { 0x4d, 0x36, 0xab, 0x2a) }; -/* - * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page - * which is included in SEV's initial memory measurement. - */ -bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, + SevKernelLoaderContext *ctx, + Error **errp) { - uint8_t *data; - SevHashTableDescriptor *area; SevHashTable *ht; - PaddedSevHashTable *padded_ht; uint8_t cmdline_hash[HASH_SIZE]; uint8_t initrd_hash[HASH_SIZE]; uint8_t kernel_hash[HASH_SIZE]; uint8_t *hashp; size_t hash_len = HASH_SIZE; - hwaddr mapped_len = sizeof(*padded_ht); - MemTxAttrs attrs = { 0 }; - bool ret = true; - - /* - * Only add the kernel hashes if the sev-guest configuration explicitly - * stated kernel-hashes=on. - */ - if (!sev_guest->kernel_hashes) { - return false; - } - - if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { - error_setg(errp, "SEV: kernel specified but guest firmware " - "has no hashes table GUID"); - return false; - } - area = (SevHashTableDescriptor *)data; - if (!area->base || area->size < sizeof(PaddedSevHashTable)) { - error_setg(errp, "SEV: guest firmware hashes table area is invalid " - "(base=0x%x size=0x%x)", area->base, area->size); - return false; - } /* * Calculate hash of kernel command-line with the terminating null byte. If @@ -2729,16 +3315,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) } assert(hash_len == HASH_SIZE); - /* - * Populate the hashes table in the guest's memory at the OVMF-designated - * area for the SEV hashes table - */ - padded_ht = address_space_map(&address_space_memory, area->base, - &mapped_len, true, attrs); - if (!padded_ht || mapped_len != sizeof(*padded_ht)) { - error_setg(errp, "SEV: cannot map hashes table guest memory area"); - return false; - } ht = &padded_ht->ht; ht->guid = sev_hash_table_header_guid; @@ -2759,17 +3335,59 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) /* zero the excess data so the measurement can be reliably calculated */ memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); - if (csv3_enabled()) { - if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA) { - if (csv3_load_data(area->base, (uint8_t *)padded_ht, - sizeof(*padded_ht), errp) < 0) { - ret = false; - } - } else { - error_report("%s: CSV3 load kernel hashes unsupported!", __func__); - ret = false; - } - } else if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { + return true; +} + +static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp) +{ + /* + * SNP: Populate the hashes table in an area that later in + * snp_launch_update_kernel_hashes() will be copied to the guest memory + * and encrypted. + */ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); + sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK; + sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1); + return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp); +} + +static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp) +{ + PaddedSevHashTable *padded_ht; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; + + /* + * Populate the hashes table in the guest's memory at the OVMF-designated + * area for the SEV hashes table + */ + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } + + if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { + if (csv3_enabled()) { + if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA) { + if (csv3_load_data(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { + ret = false; } + } else { + error_report("%s: CSV3 load kernel hashes unsupported!", __func__); + ret = false; } + } else if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { + ret = false; } + } else { ret = false; } @@ -2779,16 +3397,51 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) return ret; } +/* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. + */ +bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +{ + uint8_t *data; + SevHashTableDescriptor *area; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_common->kernel_hashes) { + return false; + } + + if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); + return false; + } + + area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } + + return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp); +} + static int _sev_send_start(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); return sev_send_start(s, f, bytes_sent); } static int _sev_receive_start(QEMUFile *f) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); return sev_receive_start(s, f); } @@ -2800,10 +3453,456 @@ struct sev_ops sev_ops = { .sev_receive_start = _sev_receive_start, }; +static char * +sev_common_get_sev_device(Object *obj, Error **errp) +{ + return g_strdup(SEV_COMMON(obj)->sev_device); +} + +static void +sev_common_set_sev_device(Object *obj, const char *value, Error **errp) +{ + SEV_COMMON(obj)->sev_device = g_strdup(value); +} + +static bool sev_common_get_kernel_hashes(Object *obj, Error **errp) +{ + return SEV_COMMON(obj)->kernel_hashes; +} + +static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) +{ + SEV_COMMON(obj)->kernel_hashes = value; +} + +static void +sev_common_class_init(ObjectClass *oc, void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = sev_common_kvm_init; + + object_class_property_add_str(oc, "sev-device", + sev_common_get_sev_device, + sev_common_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_common_get_kernel_hashes, + sev_common_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); +} + +static void +sev_common_instance_init(Object *obj) +{ + SevCommonState *sev_common = SEV_COMMON(obj); + + sev_common->kvm_type = -1; + + sev_common->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + + object_property_add_uint32_ptr(obj, "cbitpos", &sev_common->cbitpos, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev_common->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); +} + +/* sev guest info common to sev/sev-es/sev-snp */ +static const TypeInfo sev_common_info = { + .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_COMMON, + .instance_size = sizeof(SevCommonState), + .instance_init = sev_common_instance_init, + .class_size = sizeof(SevCommonStateClass), + .class_init = sev_common_class_init, + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static char * +sev_guest_get_dh_cert_file(Object *obj, Error **errp) +{ + return g_strdup(SEV_GUEST(obj)->dh_cert_file); +} + +static void +sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +{ + SEV_GUEST(obj)->dh_cert_file = g_strdup(value); +} + +static char * +sev_guest_get_session_file(Object *obj, Error **errp) +{ + SevGuestState *sev_guest = SEV_GUEST(obj); + + return sev_guest->session_file ? g_strdup(sev_guest->session_file) : NULL; +} + +static void +sev_guest_set_session_file(Object *obj, const char *value, Error **errp) +{ + SEV_GUEST(obj)->session_file = g_strdup(value); +} + +static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + SevGuestState *sev_guest = SEV_GUEST(obj); + OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type; + + visit_type_OnOffAuto(v, name, &legacy_vm_type, errp); +} + +static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + SevGuestState *sev_guest = SEV_GUEST(obj); + + visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp); +} + +static void +sev_guest_class_init(ObjectClass *oc, void *data) +{ + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes; + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->launch_update_data = sev_launch_update_data; + klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); + object_class_property_set_description(oc, "dh-cert-file", + "guest owners DH certificate (encoded with base64)"); + object_class_property_add_str(oc, "session-file", + sev_guest_get_session_file, + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); + object_class_property_add_str(oc, "user-id", + sev_guest_get_user_id, + sev_guest_set_user_id); + object_class_property_set_description(oc, "user-id", + "user id of the guest owner"); + object_class_property_add_str(oc, "secret-header-file", + sev_guest_get_secret_header_file, + sev_guest_set_secret_header_file); + object_class_property_set_description(oc, "secret-header-file", + "header file of the guest owner's secret"); + object_class_property_add_str(oc, "secret-file", + sev_guest_get_secret_file, + sev_guest_set_secret_file); + object_class_property_set_description(oc, "secret-file", + "file of the guest owner's secret"); + object_class_property_add(oc, "legacy-vm-type", "OnOffAuto", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type, NULL, NULL); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); +} + +static void +sev_guest_instance_init(Object *obj) +{ + SevGuestState *sev_guest = SEV_GUEST(obj); + + sev_guest->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "handle", &sev_guest->handle, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); + + sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO; +} + +/* guest info specific sev/sev-es */ +static const TypeInfo sev_guest_info = { + .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_init = sev_guest_instance_init, + .class_init = sev_guest_class_init, +}; + +static void +sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, + (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, + errp); +} + +static void +sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, + (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, + errp); +} + +static char * +sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp) +{ + return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds); +} + +static void +sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, + Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; + g_autofree guchar *blob = NULL; + gsize len; + + g_free(sev_snp_guest->guest_visible_workarounds); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->guest_visible_workarounds = g_strdup(value); + + blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds, + -1, &len, errp); + if (!blob) { + return; + } + + if (len != sizeof(start->gosvw)) { + error_setg(errp, "parameter length of %" G_GSIZE_FORMAT + " exceeds max of %zu", + len, sizeof(start->gosvw)); + return; + } + + memcpy(start->gosvw, blob, len); +} + +static char * +sev_snp_guest_get_id_block(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->id_block_base64); +} + +static void +sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + + finish->id_block_en = 0; + g_free(sev_snp_guest->id_block); + g_free(sev_snp_guest->id_block_base64); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->id_block_base64 = g_strdup(value); + sev_snp_guest->id_block = + qbase64_decode(sev_snp_guest->id_block_base64, -1, &len, errp); + + if (!sev_snp_guest->id_block) { + return; + } + + if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { + error_setg(errp, "parameter length of %" G_GSIZE_FORMAT + " not equal to %u", + len, KVM_SEV_SNP_ID_BLOCK_SIZE); + return; + } + + finish->id_block_en = 1; + finish->id_block_uaddr = (uintptr_t)sev_snp_guest->id_block; +} + +static char * +sev_snp_guest_get_id_auth(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->id_auth_base64); +} + +static void +sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + + finish->id_auth_uaddr = 0; + g_free(sev_snp_guest->id_auth); + g_free(sev_snp_guest->id_auth_base64); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->id_auth_base64 = g_strdup(value); + sev_snp_guest->id_auth = + qbase64_decode(sev_snp_guest->id_auth_base64, -1, &len, errp); + + if (!sev_snp_guest->id_auth) { + return; + } + + if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { + error_setg(errp, "parameter length:ID_AUTH %" G_GSIZE_FORMAT + " exceeds max of %u", + len, KVM_SEV_SNP_ID_AUTH_SIZE); + return; + } + + finish->id_auth_uaddr = (uintptr_t)sev_snp_guest->id_auth; +} + +static bool +sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return !!sev_snp_guest->kvm_finish_conf.auth_key_en; +} + +static void +sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + sev_snp_guest->kvm_finish_conf.auth_key_en = value; +} + +static bool +sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return !!sev_snp_guest->kvm_finish_conf.vcek_disabled; +} + +static void +sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + sev_snp_guest->kvm_finish_conf.vcek_disabled = value; +} + +static char * +sev_snp_guest_get_host_data(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->host_data); +} + +static void +sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + g_autofree guchar *blob = NULL; + gsize len; + + g_free(sev_snp_guest->host_data); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->host_data = g_strdup(value); + + blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp); + + if (!blob) { + return; + } + + if (len != sizeof(finish->host_data)) { + error_setg(errp, "parameter length of %" G_GSIZE_FORMAT + " not equal to %zu", + len, sizeof(finish->host_data)); + return; + } + + memcpy(finish->host_data, blob, len); +} + +static void +sev_snp_guest_class_init(ObjectClass *oc, void *data) +{ + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes; + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; + klass->launch_update_data = sev_snp_launch_update_data; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; + x86_klass->kvm_type = sev_snp_kvm_type; + + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); + object_class_property_add_str(oc, "guest-visible-workarounds", + sev_snp_guest_get_guest_visible_workarounds, + sev_snp_guest_set_guest_visible_workarounds); + object_class_property_add_str(oc, "id-block", + sev_snp_guest_get_id_block, + sev_snp_guest_set_id_block); + object_class_property_add_str(oc, "id-auth", + sev_snp_guest_get_id_auth, + sev_snp_guest_set_id_auth); + object_class_property_add_bool(oc, "author-key-enabled", + sev_snp_guest_get_author_key_enabled, + sev_snp_guest_set_author_key_enabled); + object_class_property_add_bool(oc, "vcek-disabled", + sev_snp_guest_get_vcek_disabled, + sev_snp_guest_set_vcek_disabled); + object_class_property_add_str(oc, "host-data", + sev_snp_guest_get_host_data, + sev_snp_guest_set_host_data); +} + +static void +sev_snp_guest_instance_init(Object *obj) +{ + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + cgs->require_guest_memfd = true; + + /* default init/start/finish params for kvm */ + sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; +} + +/* guest info specific to sev-snp */ +static const TypeInfo sev_snp_guest_info = { + .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_SNP_GUEST, + .instance_size = sizeof(SevSnpGuestState), + .class_init = sev_snp_guest_class_init, + .instance_init = sev_snp_guest_instance_init, +}; + static void sev_register_types(void) { + type_register_static(&sev_common_info); type_register_static(&sev_guest_info); + type_register_static(&sev_snp_guest_info); } type_init(sev_register_types); diff --git a/target/i386/sev.h b/target/i386/sev.h index 647b426b16f4b4c4d003a8a59ac07f42f966f112..a43097e605f1520b849a75cd290a08d114cf7136 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -20,6 +20,10 @@ #include "exec/confidential-guest-support.h" +#define TYPE_SEV_COMMON "sev-common" +#define TYPE_SEV_GUEST "sev-guest" +#define TYPE_SEV_SNP_GUEST "sev-snp-guest" + #define SEV_POLICY_NODBG 0x1 #define SEV_POLICY_NOKS 0x2 #define SEV_POLICY_ES 0x4 @@ -27,6 +31,9 @@ #define SEV_POLICY_DOMAIN 0x10 #define SEV_POLICY_SEV 0x20 +#define SEV_SNP_POLICY_SMT 0x10000 +#define SEV_SNP_POLICY_DBG 0x80000 + typedef struct SevKernelLoaderContext { char *setup_data; size_t setup_size; @@ -48,16 +55,18 @@ typedef struct SevKernelLoaderContext { #ifdef CONFIG_SEV bool sev_enabled(void); bool sev_es_enabled(void); +bool sev_snp_enabled(void); #else #define sev_enabled() 0 #define sev_es_enabled() 0 +#define sev_snp_enabled() 0 #endif uint32_t sev_get_cbit_position(void); uint32_t sev_get_reduced_phys_bits(void); bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); -int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); +int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); int sev_save_setup(const char *pdh, const char *plat_cert, const char *amd_cert); int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, @@ -76,8 +85,6 @@ int sev_load_incoming_shared_regions_list(QEMUFile *f); bool sev_is_gfn_in_unshared_region(unsigned long gfn); void sev_del_migrate_blocker(void); -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); - extern bool sev_kvm_has_msr_ghcb; struct sev_ops { @@ -89,4 +96,6 @@ struct sev_ops { extern struct sev_ops sev_ops; +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size); + #endif diff --git a/target/i386/tcg/sysemu/fpu_helper.c b/target/i386/tcg/sysemu/fpu_helper.c index 93506cdd94e0bc17bd1a1bbc6beaf75acc337149..e0305ba23450e977ebea05bfd305aa655fb60e24 100644 --- a/target/i386/tcg/sysemu/fpu_helper.c +++ b/target/i386/tcg/sysemu/fpu_helper.c @@ -32,9 +32,9 @@ void x86_register_ferr_irq(qemu_irq irq) void fpu_check_raise_ferr_irq(CPUX86State *env) { if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) { - qemu_mutex_lock_iothread(); + bql_lock(); qemu_irq_raise(ferr_irq); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } } @@ -49,7 +49,7 @@ void cpu_set_ignne(void) { CPUX86State *env = &X86_CPU(first_cpu)->env; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); env->hflags2 |= HF2_IGNNE_MASK; /* diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index e1528b7f80bec6b81e36f34eff18705dfb4ffd2d..1c43a9f4f7ae2e33c113f8837a9d1aa74e8e29ee 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -25,6 +25,7 @@ #include "exec/address-spaces.h" #include "exec/exec-all.h" #include "tcg/helper-tcg.h" +#include "hw/i386/apic.h" void helper_outb(CPUX86State *env, uint32_t port, uint32_t data) { @@ -118,9 +119,9 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) break; case 8: if (!(env->hflags2 & HF2_VINTR_MASK)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_set_apic_tpr(env_archcpu(env)->apic_state, t0); - qemu_mutex_unlock_iothread(); + bql_unlock(); } env->int_ctl = (env->int_ctl & ~V_TPR_MASK) | (t0 & V_TPR_MASK); @@ -289,6 +290,19 @@ void helper_wrmsr(CPUX86State *env) env->msr_bndcfgs = val; cpu_sync_bndcs_hflags(env); break; + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_write(index, val); + bql_unlock(); + if (ret < 0) { + goto error; + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + @@ -455,6 +469,19 @@ void helper_rdmsr(CPUX86State *env) val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16); break; } + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_read(index, &val); + bql_unlock(); + if (ret < 0) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + diff --git a/target/i386/trace-events b/target/i386/trace-events index 5d4a709a39dedc3f4b0a6ad27f4e51fe25fcdac9..b8eebf798dfc7dc3e123930c4cf7a948fae0533d 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -6,11 +6,14 @@ kvm_memcrypt_register_region(void *addr, size_t len) "addr %p len 0x%zx" kvm_memcrypt_unregister_region(void *addr, size_t len) "addr %p len 0x%zx" kvm_sev_change_state(const char *old, const char *new) "%s -> %s" kvm_sev_launch_start(int policy, void *session, void *pdh) "policy 0x%x session %p pdh %p" -kvm_sev_launch_update_data(void *addr, uint64_t len) "addr %p len 0x%" PRIx64 +kvm_sev_launch_update_data(void *addr, size_t len) "addr %p len 0x%zx" kvm_sev_launch_measurement(const char *value) "data %s" kvm_sev_launch_finish(void) "" kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" +kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" +kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)" +kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s" kvm_sev_send_start(uint64_t pdh, int l1, uint64_t plat, int l2, uint64_t amd, int l3) "pdh 0x%" PRIx64 " len %d plat 0x%" PRIx64 " len %d amd 0x%" PRIx64 " len %d" kvm_sev_send_update_data(void *src, void *dst, int len) "guest %p trans %p len %d" kvm_sev_send_finish(void) "" diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index 67cad867207233dbd22ae38a6075f2895bae532a..e783a760a718824975802a1d2e7216228d176845 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -25,7 +25,7 @@ static void *whpx_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); current_cpu = cpu; @@ -55,7 +55,7 @@ static void *whpx_cpu_thread_fn(void *arg) whpx_destroy_vcpu(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index d29ba916a0cc8b8d649bf24ae91f83be399a1f5b..a7262654acdee1f656393465327caa61d0f0de8c 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -1324,7 +1324,7 @@ static int whpx_first_vcpu_starting(CPUState *cpu) struct whpx_state *whpx = &whpx_global; HRESULT hr; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (!QTAILQ_EMPTY(&cpu->breakpoints) || (whpx->breakpoints.breakpoints && @@ -1442,7 +1442,7 @@ static int whpx_handle_halt(CPUState *cpu) CPUX86State *env = cpu_env(cpu); int ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { @@ -1450,7 +1450,7 @@ static int whpx_handle_halt(CPUState *cpu) cpu->halted = true; ret = 1; } - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -1472,7 +1472,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu) memset(&new_int, 0, sizeof(new_int)); memset(reg_values, 0, sizeof(reg_values)); - qemu_mutex_lock_iothread(); + bql_lock(); /* Inject NMI */ if (!vcpu->interruption_pending && @@ -1563,7 +1563,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu) reg_count += 1; } - qemu_mutex_unlock_iothread(); + bql_unlock(); vcpu->ready_for_pic_interrupt = false; if (reg_count) { @@ -1590,9 +1590,9 @@ static void whpx_vcpu_post_run(CPUState *cpu) uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8; if (vcpu->tpr != tpr) { vcpu->tpr = tpr; - qemu_mutex_lock_iothread(); + bql_lock(); cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(vcpu->tpr)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } vcpu->interruption_pending = @@ -1652,7 +1652,7 @@ static int whpx_vcpu_run(CPUState *cpu) WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE; int ret; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (whpx->running_cpus++ == 0) { /* Insert breakpoints into memory, update exception exit bitmap. */ @@ -1690,7 +1690,7 @@ static int whpx_vcpu_run(CPUState *cpu) } } - qemu_mutex_unlock_iothread(); + bql_unlock(); if (exclusive_step_mode != WHPX_STEP_NONE) { start_exclusive(); @@ -2028,9 +2028,9 @@ static int whpx_vcpu_run(CPUState *cpu) error_report("WHPX: Unexpected VP exit code %d", vcpu->exit_ctx.ExitReason); whpx_get_registers(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; } @@ -2055,7 +2055,7 @@ static int whpx_vcpu_run(CPUState *cpu) cpu_exec_end(cpu); } - qemu_mutex_lock_iothread(); + bql_lock(); current_cpu = cpu; if (--whpx->running_cpus == 0) { diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index f724e77a1bb1d1615453ab0a7b24f9096a4272c2..4b8f08cdb6a1884f48573e49693f17190cd78b1d 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -1000,11 +1000,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) { diff --git a/target/loongarch/tcg/csr_helper.c b/target/loongarch/tcg/csr_helper.c index 55341551a5c7751224f4789e50c3abb31b509c27..15f94caefabc7722263fa46e948e21de37b4203c 100644 --- a/target/loongarch/tcg/csr_helper.c +++ b/target/loongarch/tcg/csr_helper.c @@ -89,9 +89,9 @@ target_ulong helper_csrwr_ticlr(CPULoongArchState *env, target_ulong val) int64_t old_v = 0; if (val & 0x1) { - qemu_mutex_lock_iothread(); + bql_lock(); loongarch_cpu_set_irq(cpu, IRQ_TIMER, 0); - qemu_mutex_unlock_iothread(); + bql_unlock(); } return old_v; } diff --git a/target/mips/kvm.c b/target/mips/kvm.c index e22e24ed974e12fea4858884c9a2ad12c2c10a20..2f6a44ee467635394414550ad9a44a23041714a5 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -138,7 +138,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) int r; struct kvm_mips_interrupt intr; - qemu_mutex_lock_iothread(); + bql_lock(); if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && cpu_mips_io_interrupts_pending(cpu)) { @@ -151,7 +151,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) @@ -1282,11 +1282,6 @@ int kvm_arch_get_default_type(MachineState *machine) return -1; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/mips/tcg/sysemu/cp0_helper.c b/target/mips/tcg/sysemu/cp0_helper.c index d3495487431c0d769941437f3437fdd1d510663a..cc545aed9ca903075e2a42bcaf4f9a2162e3b05f 100644 --- a/target/mips/tcg/sysemu/cp0_helper.c +++ b/target/mips/tcg/sysemu/cp0_helper.c @@ -59,9 +59,9 @@ static inline void mips_vpe_wake(MIPSCPU *c) * because there might be other conditions that state that c should * be sleeping. */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_interrupt(CPU(c), CPU_INTERRUPT_WAKE); - qemu_mutex_unlock_iothread(); + bql_unlock(); } static inline void mips_vpe_sleep(MIPSCPU *cpu) diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index 782a5751b750ce973a2b2cca0f5aa6b74754e3bb..77567afba47f773e4679a8d8fa452222fa116cb3 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -160,20 +160,20 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) break; case TO_SPR(9, 0): /* PICMR */ env->picmr = rb; - qemu_mutex_lock_iothread(); + bql_lock(); if (env->picsr & env->picmr) { cpu_interrupt(cs, CPU_INTERRUPT_HARD); } else { cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case TO_SPR(9, 2): /* PICSR */ env->picsr &= ~rb; break; case TO_SPR(10, 0): /* TTMR */ { - qemu_mutex_lock_iothread(); + bql_lock(); if ((env->ttmr & TTMR_M) ^ (rb & TTMR_M)) { switch (rb & TTMR_M) { case TIMER_NONE: @@ -198,15 +198,15 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) cs->interrupt_request &= ~CPU_INTERRUPT_TIMER; } cpu_openrisc_timer_update(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); } break; case TO_SPR(10, 1): /* TTCR */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_openrisc_count_set(cpu, rb); cpu_openrisc_timer_update(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; } #endif @@ -347,9 +347,9 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, return env->ttmr; case TO_SPR(10, 1): /* TTCR */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_openrisc_count_update(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); return cpu_openrisc_count_get(cpu); } #endif diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index a42743a3e077633dc6511b73abc436087046be21..8a2bfb5aa2f34ad33b9d1a6152f441d8ab944dd1 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -3056,7 +3056,7 @@ void helper_msgsnd(target_ulong rb) return; } - qemu_mutex_lock_iothread(); + bql_lock(); CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *cenv = &cpu->env; @@ -3065,7 +3065,7 @@ void helper_msgsnd(target_ulong rb) ppc_set_irq(cpu, irq, 1); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* Server Processor Control */ @@ -3093,7 +3093,7 @@ static void book3s_msgsnd_common(int pir, int irq) { CPUState *cs; - qemu_mutex_lock_iothread(); + bql_lock(); CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *cenv = &cpu->env; @@ -3103,7 +3103,7 @@ static void book3s_msgsnd_common(int pir, int irq) ppc_set_irq(cpu, irq, 1); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } void helper_book3s_msgsnd(target_ulong rb) @@ -3157,14 +3157,14 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) } /* Does iothread need to be locked for walking CPU list? */ - qemu_mutex_lock_iothread(); + bql_lock(); THREAD_SIBLING_FOREACH(cs, ccs) { PowerPCCPU *ccpu = POWERPC_CPU(ccs); uint32_t thread_id = ppc_cpu_tir(ccpu); if (ttir == thread_id) { ppc_set_irq(ccpu, PPC_INTERRUPT_DOORBELL, 1); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 9b1abe2fc4116fa9470fea3c59a96c0e24692311..892e14a467acc1cedeb405a5648b47cccb55d6aa 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1656,7 +1656,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) CPUPPCState *env = &cpu->env; int ret; - qemu_mutex_lock_iothread(); + bql_lock(); switch (run->exit_reason) { case KVM_EXIT_DCR: @@ -1715,7 +1715,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; } - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -2971,11 +2971,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index a05bdf78c982e4e038f08ecfc2fe9c6c034cc124..a9d41d28020fd4aa6424b5c6ec0d95dcb7b2b04c 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -238,7 +238,7 @@ target_ulong helper_load_dpdes(CPUPPCState *env) return dpdes; } - qemu_mutex_lock_iothread(); + bql_lock(); THREAD_SIBLING_FOREACH(cs, ccs) { PowerPCCPU *ccpu = POWERPC_CPU(ccs); CPUPPCState *cenv = &ccpu->env; @@ -248,7 +248,7 @@ target_ulong helper_load_dpdes(CPUPPCState *env) dpdes |= (0x1 << thread_id); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); return dpdes; } @@ -278,14 +278,14 @@ void helper_store_dpdes(CPUPPCState *env, target_ulong val) } /* Does iothread need to be locked for walking CPU list? */ - qemu_mutex_lock_iothread(); + bql_lock(); THREAD_SIBLING_FOREACH(cs, ccs) { PowerPCCPU *ccpu = POWERPC_CPU(ccs); uint32_t thread_id = ppc_cpu_tir(ccpu); ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & (0x1 << thread_id)); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c index 08a6b47ee0825699a3e6c8890657e5f232901f4f..f618ed292271ff382e0e8fb826f65c3e22837512 100644 --- a/target/ppc/timebase_helper.c +++ b/target/ppc/timebase_helper.c @@ -173,9 +173,9 @@ target_ulong helper_load_dcr(CPUPPCState *env, target_ulong dcrn) } else { int ret; - qemu_mutex_lock_iothread(); + bql_lock(); ret = ppc_dcr_read(env->dcr_env, (uint32_t)dcrn, &val); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (unlikely(ret != 0)) { qemu_log_mask(LOG_GUEST_ERROR, "DCR read error %d %03x\n", (uint32_t)dcrn, (uint32_t)dcrn); @@ -196,9 +196,9 @@ void helper_store_dcr(CPUPPCState *env, target_ulong dcrn, target_ulong val) POWERPC_EXCP_INVAL_INVAL, GETPC()); } else { int ret; - qemu_mutex_lock_iothread(); + bql_lock(); ret = ppc_dcr_write(env->dcr_env, (uint32_t)dcrn, (uint32_t)val); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (unlikely(ret != 0)) { qemu_log_mask(LOG_GUEST_ERROR, "DCR write error %d %03x\n", (uint32_t)dcrn, (uint32_t)dcrn); diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index b3dc2070f9e1c16808978ee8580e1aa7bb748477..c110b11704875f7ef44c92e37a14871c941807b5 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1274,11 +1274,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - static int aia_mode; static const char *kvm_aia_mode_str(uint64_t mode) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 33ab3551f47b035d9fc818d7346e6e68f96ef5de..b7e50e317871e547df91cc91d66dd81d91229967 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -1923,7 +1923,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) S390CPU *cpu = S390_CPU(cs); int ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); kvm_cpu_synchronize_state(cs); @@ -1947,7 +1947,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) fprintf(stderr, "Unknown KVM exit: %d\n", run->exit_reason); break; } - qemu_mutex_unlock_iothread(); + bql_unlock(); if (ret == 0) { ret = EXCP_INTERRUPT; @@ -2624,11 +2624,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) kvm_s390_vcpu_interrupt(cpu, &irq); } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - int kvm_s390_get_zpci_op(void) { return cap_zpci_op; diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c index 6a69be7e5c533cea94d600e47ad08cbd3c8feee2..902dbb6f34ab66a730e08d63144bc4f89e55d34a 100644 --- a/target/s390x/kvm/pv.c +++ b/target/s390x/kvm/pv.c @@ -319,12 +319,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) return s390_pv_check_cpus(errp); } -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { return 0; } + if (!kvm_enabled()) { + error_setg(errp, "Protected Virtualization requires KVM"); + return -1; + } + if (!s390_has_feat(S390_FEAT_UNPACK)) { error_setg(errp, "CPU model does not support Protected Virtualization"); @@ -349,6 +354,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, static void s390_pv_guest_class_init(ObjectClass *oc, void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = s390_pv_kvm_init; } static void s390_pv_guest_init(Object *obj) diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h index 7b935e2246c3f04f0c800ae44e6f49498b2b6d7c..5e1527a84970d9578f7469e3fe418be12cbfbc8b 100644 --- a/target/s390x/kvm/pv.h +++ b/target/s390x/kvm/pv.h @@ -79,18 +79,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } #endif /* CONFIG_KVM */ -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - if (!cgs) { - return 0; - } - if (kvm_enabled()) { - return s390_pv_kvm_init(cgs, errp); - } - - error_setg(errp, "Protected Virtualization requires KVM"); - return -1; -} - #endif /* HW_S390_PV_H */ diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c index 6aa7907438fd7d22058e83d36632f91fe67f569d..89b5268fd49d4467cc9d936953eeee458cbd84a8 100644 --- a/target/s390x/tcg/misc_helper.c +++ b/target/s390x/tcg/misc_helper.c @@ -101,9 +101,9 @@ uint64_t HELPER(stck)(CPUS390XState *env) /* SCLP service call */ uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2) { - qemu_mutex_lock_iothread(); + bql_lock(); int r = sclp_service_call(env_archcpu(env), r1, r2); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (r < 0) { tcg_s390_program_interrupt(env, -r, GETPC()); } @@ -117,9 +117,9 @@ void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num) switch (num) { case 0x500: /* KVM hypercall */ - qemu_mutex_lock_iothread(); + bql_lock(); r = s390_virtio_hypercall(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case 0x44: /* yield */ @@ -127,9 +127,9 @@ void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num) break; case 0x308: /* ipl */ - qemu_mutex_lock_iothread(); + bql_lock(); handle_diag_308(env, r1, r3, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); r = 0; break; case 0x288: @@ -185,7 +185,7 @@ static void update_ckc_timer(CPUS390XState *env) /* stop the timer and remove pending CKC IRQs */ timer_del(env->tod_timer); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR; /* the tod has to exceed the ckc, this can never happen if ckc is all 1's */ @@ -207,9 +207,9 @@ void HELPER(sckc)(CPUS390XState *env, uint64_t ckc) { env->ckc = ckc; - qemu_mutex_lock_iothread(); + bql_lock(); update_ckc_timer(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque) @@ -229,9 +229,9 @@ uint32_t HELPER(sck)(CPUS390XState *env, uint64_t tod_low) .low = tod_low, }; - qemu_mutex_lock_iothread(); + bql_lock(); tdc->set(td, &tod, &error_abort); - qemu_mutex_unlock_iothread(); + bql_unlock(); return 0; } @@ -421,9 +421,9 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, int cc; /* TODO: needed to inject interrupts - push further down */ - qemu_mutex_lock_iothread(); + bql_lock(); cc = handle_sigp(env, order_code & SIGP_ORDER_MASK, r1, r3); - qemu_mutex_unlock_iothread(); + bql_unlock(); return cc; } @@ -433,92 +433,92 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, void HELPER(xsch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_xsch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(csch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_csch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(hsch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_hsch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(msch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_msch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(rchp)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_rchp(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(rsch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_rsch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(sal)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_sal(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(schm)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_schm(cpu, r1, r2, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(ssch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_ssch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(stcrw)(CPUS390XState *env, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_stcrw(cpu, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(stsch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_stsch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) @@ -533,10 +533,10 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); } - qemu_mutex_lock_iothread(); + bql_lock(); io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]); if (!io) { - qemu_mutex_unlock_iothread(); + bql_unlock(); return 0; } @@ -554,7 +554,7 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) if (s390_cpu_virt_mem_write(cpu, addr, 0, &intc, sizeof(intc))) { /* writing failed, reinject and properly clean up */ s390_io_interrupt(io->id, io->nr, io->parm, io->word); - qemu_mutex_unlock_iothread(); + bql_unlock(); g_free(io); s390_cpu_virt_mem_handle_exc(cpu, ra); return 0; @@ -570,24 +570,24 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) } g_free(io); - qemu_mutex_unlock_iothread(); + bql_unlock(); return 1; } void HELPER(tsch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_tsch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(chsc)(CPUS390XState *env, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_chsc(cpu, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif @@ -726,27 +726,27 @@ void HELPER(clp)(CPUS390XState *env, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); clp_service_call(cpu, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(pcilg)(CPUS390XState *env, uint32_t r1, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); pcilg_service_call(cpu, r1, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(pcistg)(CPUS390XState *env, uint32_t r1, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); pcistg_service_call(cpu, r1, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, @@ -754,9 +754,9 @@ void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); stpcifc_service_call(cpu, r1, fiba, ar, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3) @@ -764,9 +764,9 @@ void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3) S390CPU *cpu = env_archcpu(env); int r; - qemu_mutex_lock_iothread(); + bql_lock(); r = css_do_sic(cpu, (r3 >> 27) & 0x7, r1 & 0xffff); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* css_do_sic() may actually return a PGM_xxx value to inject */ if (r) { tcg_s390_program_interrupt(env, -r, GETPC()); @@ -777,9 +777,9 @@ void HELPER(rpcit)(CPUS390XState *env, uint32_t r1, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); rpcit_service_call(cpu, r1, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3, @@ -787,9 +787,9 @@ void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3, { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); pcistb_service_call(cpu, r1, r3, gaddr, ar, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, @@ -797,8 +797,8 @@ void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); mpcifc_service_call(cpu, r1, fiba, ar, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c index 156361358220d35e0921da32731a10aa506705f6..49a91492639260cebae7e5de9c9144bbaaac8d0b 100644 --- a/target/sparc/int32_helper.c +++ b/target/sparc/int32_helper.c @@ -70,7 +70,7 @@ void cpu_check_irqs(CPUSPARCState *env) CPUState *cs; /* We should be holding the BQL before we mess with IRQs */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (env->pil_in && (env->interrupt_index == 0 || (env->interrupt_index & ~15) == TT_EXTINT)) { diff --git a/target/sparc/int64_helper.c b/target/sparc/int64_helper.c index 1b4155f5f37d97e9f258a33c1159d1eee2b0e269..27df9dba89bb79006a8b35f9a94be0b9863b7428 100644 --- a/target/sparc/int64_helper.c +++ b/target/sparc/int64_helper.c @@ -69,7 +69,7 @@ void cpu_check_irqs(CPUSPARCState *env) (env->softint & ~(SOFTINT_TIMER | SOFTINT_STIMER)); /* We should be holding the BQL before we mess with IRQs */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* TT_IVEC has a higher priority (16) than TT_EXTINT (31..17) */ if (env->ivec_status & 0x20) { @@ -267,9 +267,9 @@ static bool do_modify_softint(CPUSPARCState *env, uint32_t value) env->softint = value; #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif return true; diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c index 16d1c70fe713ddb6eb56c753a4e55b2bd1df955c..b53fc9ce94074d41f3caefcd40e4e64a86b0d72e 100644 --- a/target/sparc/win_helper.c +++ b/target/sparc/win_helper.c @@ -179,9 +179,9 @@ void helper_wrpsr(CPUSPARCState *env, target_ulong new_psr) cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC()); } else { /* cpu_put_psr may trigger interrupts, hence BQL */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_put_psr(env, new_psr); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -407,9 +407,9 @@ void helper_wrpstate(CPUSPARCState *env, target_ulong new_state) #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } @@ -422,9 +422,9 @@ void helper_wrpil(CPUSPARCState *env, target_ulong new_pil) env->psrpil = new_pil; if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } @@ -451,9 +451,9 @@ void helper_done(CPUSPARCState *env) #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } @@ -480,9 +480,9 @@ void helper_retry(CPUSPARCState *env) #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c index 91354884f7ec8456bac49bf5979fb501cfca48ed..168419a505f586d8d7be2ffd5a8314a762377a6c 100644 --- a/target/xtensa/exc_helper.c +++ b/target/xtensa/exc_helper.c @@ -105,9 +105,9 @@ void HELPER(waiti)(CPUXtensaState *env, uint32_t pc, uint32_t intlevel) env->sregs[PS] = (env->sregs[PS] & ~PS_INTLEVEL) | (intlevel << PS_INTLEVEL_SHIFT); - qemu_mutex_lock_iothread(); + bql_lock(); check_interrupts(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (env->pending_irq_level) { cpu_loop_exit(cpu); @@ -120,9 +120,9 @@ void HELPER(waiti)(CPUXtensaState *env, uint32_t pc, uint32_t intlevel) void HELPER(check_interrupts)(CPUXtensaState *env) { - qemu_mutex_lock_iothread(); + bql_lock(); check_interrupts(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(intset)(CPUXtensaState *env, uint32_t v) diff --git a/ui/cocoa.m b/ui/cocoa.m index cd069da6965b7aec43b0f3e542b7bcdc041c426f..5ebb535070530c2d375314264e01755a9967c900 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -117,29 +117,29 @@ static void cocoa_switch(DisplayChangeListener *dcl, typedef void (^CodeBlock)(void); typedef bool (^BoolCodeBlock)(void); -static void with_iothread_lock(CodeBlock block) +static void with_bql(CodeBlock block) { - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); if (!locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } block(); if (!locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } } -static bool bool_with_iothread_lock(BoolCodeBlock block) +static bool bool_with_bql(BoolCodeBlock block) { - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); bool val; if (!locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } val = block(); if (!locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } return val; } @@ -605,7 +605,7 @@ - (void) updateUIInfo return; } - with_iothread_lock(^{ + with_bql(^{ [self updateUIInfoLocked]; }); } @@ -790,7 +790,7 @@ - (void) handleMonitorInput:(NSEvent *)event - (bool) handleEvent:(NSEvent *)event { - return bool_with_iothread_lock(^{ + return bool_with_bql(^{ return [self handleEventLocked:event]; }); } @@ -1182,7 +1182,7 @@ - (QEMUScreen) gscreen {return screen;} */ - (void) raiseAllKeys { - with_iothread_lock(^{ + with_bql(^{ qkbd_state_lift_all_keys(kbd); }); } @@ -1282,7 +1282,7 @@ - (void)applicationWillTerminate:(NSNotification *)aNotification { COCOA_DEBUG("QemuCocoaAppController: applicationWillTerminate\n"); - with_iothread_lock(^{ + with_bql(^{ shutdown_action = SHUTDOWN_ACTION_POWEROFF; qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); }); @@ -1420,7 +1420,7 @@ - (void)displayConsole:(id)sender /* Pause the guest */ - (void)pauseQEMU:(id)sender { - with_iothread_lock(^{ + with_bql(^{ qmp_stop(NULL); }); [sender setEnabled: NO]; @@ -1431,7 +1431,7 @@ - (void)pauseQEMU:(id)sender /* Resume running the guest operating system */ - (void)resumeQEMU:(id) sender { - with_iothread_lock(^{ + with_bql(^{ qmp_cont(NULL); }); [sender setEnabled: NO]; @@ -1461,7 +1461,7 @@ - (void)removePause /* Restarts QEMU */ - (void)restartQEMU:(id)sender { - with_iothread_lock(^{ + with_bql(^{ qmp_system_reset(NULL); }); } @@ -1469,7 +1469,7 @@ - (void)restartQEMU:(id)sender /* Powers down QEMU */ - (void)powerDownQEMU:(id)sender { - with_iothread_lock(^{ + with_bql(^{ qmp_system_powerdown(NULL); }); } @@ -1488,7 +1488,7 @@ - (void)ejectDeviceMedia:(id)sender } __block Error *err = NULL; - with_iothread_lock(^{ + with_bql(^{ qmp_eject([drive cStringUsingEncoding: NSASCIIStringEncoding], NULL, false, false, &err); }); @@ -1523,7 +1523,7 @@ - (void)changeDeviceMedia:(id)sender } __block Error *err = NULL; - with_iothread_lock(^{ + with_bql(^{ qmp_blockdev_change_medium([drive cStringUsingEncoding: NSASCIIStringEncoding], NULL, @@ -1605,7 +1605,7 @@ - (void)adjustSpeed:(id)sender // get the throttle percentage throttle_pct = [sender tag]; - with_iothread_lock(^{ + with_bql(^{ cpu_throttle_set(throttle_pct); }); COCOA_DEBUG("cpu throttling at %d%c\n", cpu_throttle_get_percentage(), '%'); @@ -1819,7 +1819,7 @@ - (void)pasteboard:(NSPasteboard *)sender provideDataForType:(NSPasteboardType)t return; } - with_iothread_lock(^{ + with_bql(^{ QemuClipboardInfo *info = qemu_clipboard_info_ref(cbinfo); qemu_event_reset(&cbevent); qemu_clipboard_request(info, QEMU_CLIPBOARD_TYPE_TEXT); @@ -1827,9 +1827,9 @@ - (void)pasteboard:(NSPasteboard *)sender provideDataForType:(NSPasteboardType)t while (info == cbinfo && info->types[QEMU_CLIPBOARD_TYPE_TEXT].available && info->types[QEMU_CLIPBOARD_TYPE_TEXT].data == NULL) { - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_event_wait(&cbevent); - qemu_mutex_lock_iothread(); + bql_lock(); } if (info == cbinfo) { @@ -1927,9 +1927,9 @@ static void cocoa_clipboard_request(QemuClipboardInfo *info, int status; COCOA_DEBUG("Second thread: calling qemu_default_main()\n"); - qemu_mutex_lock_iothread(); + bql_lock(); status = qemu_default_main(); - qemu_mutex_unlock_iothread(); + bql_unlock(); COCOA_DEBUG("Second thread: qemu_default_main() returned, exiting\n"); [cbowner release]; exit(status); @@ -1941,7 +1941,7 @@ static int cocoa_main(void) COCOA_DEBUG("Entered %s()\n", __func__); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_thread_create(&thread, "qemu_main", call_qemu_main, NULL, QEMU_THREAD_DETACHED); diff --git a/ui/spice-core.c b/ui/spice-core.c index db21db2c9428225defe66f8904e4fb7dd90bac33..b6ee495a8fef479b855ce79e6fdb02e2cf686fae 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -222,7 +222,7 @@ static void channel_event(int event, SpiceChannelEventInfo *info) */ bool need_lock = !qemu_thread_is_self(&me); if (need_lock) { - qemu_mutex_lock_iothread(); + bql_lock(); } if (info->flags & SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT) { @@ -260,7 +260,7 @@ static void channel_event(int event, SpiceChannelEventInfo *info) } if (need_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } qapi_free_SpiceServerInfo(server); diff --git a/util/async.c b/util/async.c index 8f90ddc3047a9f0567da47ab6f89b6190e1d0f9d..def720045b51dbddc096624089ca45aba574af3e 100644 --- a/util/async.c +++ b/util/async.c @@ -741,7 +741,7 @@ AioContext *qemu_get_current_aio_context(void) if (ctx) { return ctx; } - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { /* Possibly in a vCPU thread. */ return qemu_get_aio_context(); } diff --git a/util/main-loop.c b/util/main-loop.c index 797b640c4152d6ff22cb1b6a37a38d8a1c639bdf..bfbff4f246bc2137e8a73acbe052af47782a9457 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -302,13 +302,13 @@ static int os_host_main_loop_wait(int64_t timeout) glib_pollfds_fill(&timeout); - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_unlock(); ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout); replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); glib_pollfds_poll(); @@ -517,7 +517,7 @@ static int os_host_main_loop_wait(int64_t timeout) poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout); - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_unlock(); @@ -525,7 +525,7 @@ static int os_host_main_loop_wait(int64_t timeout) replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); if (g_poll_ret > 0) { for (i = 0; i < w->num; i++) { w->revents[i] = poll_fds[n_poll_fds + i].revents; diff --git a/util/qsp.c b/util/qsp.c index 2fe3764906c53c53ec08572fc165c6f441241e19..6b783e2e7f8e495f041383716ebb161f5d9af0ed 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -124,7 +124,7 @@ static const char * const qsp_typenames[] = { [QSP_CONDVAR] = "condvar", }; -QemuMutexLockFunc qemu_bql_mutex_lock_func = qemu_mutex_lock_impl; +QemuMutexLockFunc bql_mutex_lock_func = qemu_mutex_lock_impl; QemuMutexLockFunc qemu_mutex_lock_func = qemu_mutex_lock_impl; QemuMutexTrylockFunc qemu_mutex_trylock_func = qemu_mutex_trylock_impl; QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl; @@ -439,7 +439,7 @@ void qsp_enable(void) { qatomic_set(&qemu_mutex_lock_func, qsp_mutex_lock); qatomic_set(&qemu_mutex_trylock_func, qsp_mutex_trylock); - qatomic_set(&qemu_bql_mutex_lock_func, qsp_bql_mutex_lock); + qatomic_set(&bql_mutex_lock_func, qsp_bql_mutex_lock); qatomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock); qatomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock); qatomic_set(&qemu_cond_wait_func, qsp_cond_wait); @@ -450,7 +450,7 @@ void qsp_disable(void) { qatomic_set(&qemu_mutex_lock_func, qemu_mutex_lock_impl); qatomic_set(&qemu_mutex_trylock_func, qemu_mutex_trylock_impl); - qatomic_set(&qemu_bql_mutex_lock_func, qemu_mutex_lock_impl); + qatomic_set(&bql_mutex_lock_func, qemu_mutex_lock_impl); qatomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl); qatomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl); qatomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl); diff --git a/util/rcu.c b/util/rcu.c index e587bcc48314b81a5a79bc5c399e88a3cefec245..bb7f633b5c7147059b1fb5700745bace497146e0 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -283,24 +283,24 @@ static void *call_rcu_thread(void *opaque) qatomic_sub(&rcu_call_count, n); synchronize_rcu(); - qemu_mutex_lock_iothread(); + bql_lock(); while (n > 0) { node = try_dequeue(); while (!node) { - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_event_reset(&rcu_call_ready_event); node = try_dequeue(); if (!node) { qemu_event_wait(&rcu_call_ready_event); node = try_dequeue(); } - qemu_mutex_lock_iothread(); + bql_lock(); } n--; node->func(node); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } abort(); } @@ -337,13 +337,13 @@ static void drain_rcu_callback(struct rcu_head *node) void drain_call_rcu(void) { struct rcu_drain rcu_drain; - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); memset(&rcu_drain, 0, sizeof(struct rcu_drain)); qemu_event_init(&rcu_drain.drain_complete_event, false); if (locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } @@ -365,7 +365,7 @@ void drain_call_rcu(void) qatomic_dec(&in_drain_call_rcu); if (locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } }