diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index 6195150a0b4d2f310614d4efa18d409dd0c28044..54f19028b828d8e329160ec26359c618f1f0b317 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -112,6 +112,9 @@ static void kvm_accel_ops_class_init(ObjectClass *oc, void *data) ops->remove_breakpoint = kvm_remove_breakpoint; ops->remove_all_breakpoints = kvm_remove_all_breakpoints; #endif + + ops->control_pre_system_reset = kvm_cpus_control_pre_system_reset; + ops->control_post_system_reset = kvm_cpus_control_post_system_reset; } static const TypeInfo kvm_accel_ops_type = { diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index e39a810a4e92333a251711a223eb57b05dc2033d..25d23bba219d3d874bead42a87d305dbf6c38b38 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -98,6 +98,7 @@ bool kvm_allowed; bool kvm_readonly_mem_allowed; bool kvm_vm_attributes_allowed; bool kvm_msi_use_devid; +bool kvm_csv3_allowed; bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; @@ -2761,6 +2762,16 @@ void kvm_cpu_synchronize_pre_loadvm(CPUState *cpu) run_on_cpu(cpu, do_kvm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); } +void kvm_cpus_control_pre_system_reset(void) +{ + kvm_vm_ioctl(kvm_state, KVM_CONTROL_VCPU_PRE_SYSTEM_RESET, NULL); +} + +void kvm_cpus_control_post_system_reset(void) +{ + kvm_vm_ioctl(kvm_state, KVM_CONTROL_VCPU_POST_SYSTEM_RESET, NULL); +} + #ifdef KVM_HAVE_MCE_INJECTION static __thread void *pending_sigbus_addr; static __thread int pending_sigbus_code; diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h index ca40add32c3dd90f74ed692c88796323e6b08584..27b9d0d9dbfc83bb5060e48795ae6b858030c061 100644 --- a/accel/kvm/kvm-cpus.h +++ b/accel/kvm/kvm-cpus.h @@ -23,4 +23,7 @@ int kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); void kvm_remove_all_breakpoints(CPUState *cpu); +void kvm_cpus_control_pre_system_reset(void); +void kvm_cpus_control_post_system_reset(void); + #endif /* KVM_CPUS_H */ diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 1b37d9a302cccce5f7636bff492cdd2401996d48..45b23f61ce849e684708ac26dcecba515a2ff1d0 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -24,6 +24,7 @@ bool kvm_gsi_direct_mapping; bool kvm_allowed; bool kvm_readonly_mem_allowed; bool kvm_msi_use_devid; +bool kvm_csv3_allowed; void kvm_flush_coalesced_mmio_buffer(void) { diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak index 598c6646dfc0f941385b4d07939739bf9ae53e5c..e948e54e4e902a26f257b9a89ccb66bc77703e21 100644 --- a/configs/devices/i386-softmmu/default.mak +++ b/configs/devices/i386-softmmu/default.mak @@ -23,6 +23,8 @@ #CONFIG_TPM_TIS_ISA=n #CONFIG_VTD=n #CONFIG_SGX=n +#CONFIG_CSV=n +#CONFIG_HYGON_CSV_MIG_ACCEL=n # Boards: # diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst index e9bc142bc1308dfcae7b6cf375e43bc5dd15f7e2..b7e3f46ff689f1588de12756441afb0974811a2c 100644 --- a/docs/system/i386/amd-memory-encryption.rst +++ b/docs/system/i386/amd-memory-encryption.rst @@ -177,7 +177,45 @@ TODO Live Migration --------------- -TODO +AMD SEV encrypts the memory of VMs and because a different key is used +in each VM, the hypervisor will be unable to simply copy the +ciphertext from one VM to another to migrate the VM. Instead the AMD SEV Key +Management API provides sets of function which the hypervisor can use +to package a guest page for migration, while maintaining the confidentiality +provided by AMD SEV. + +SEV guest VMs have the concept of private and shared memory. The private +memory is encrypted with the guest-specific key, while shared memory may +be encrypted with the hypervisor key. The migration APIs provided by the +SEV API spec should be used for migrating the private pages. The +KVM_GET_PAGE_ENC_BITMAP ioctl can be used to get the guest page encryption +bitmap. The bitmap can be used to check if the given guest page is +private or shared. + +Before initiating the migration, we need to know the targets machine's public +Diffie-Hellman key (PDH) and certificate chain. It can be retrieved +with the 'query-sev-capabilities' QMP command or using the sev-tool. The +migrate-set-parameter can be used to pass the target machine's PDH and +certificate chain. + +During the migration flow, the SEND_START is called on the source hypervisor +to create an outgoing encryption context. The SEV guest policy dictates whether +the certificate passed through the migrate-sev-set-info command will be +validated. SEND_UPDATE_DATA is called to encrypt the guest private pages. +After migration is completed, SEND_FINISH is called to destroy the encryption +context and make the VM non-runnable to protect it against cloning. + +On the target machine, RECEIVE_START is called first to create an +incoming encryption context. The RECEIVE_UPDATE_DATA is called to copy +the received encrypted page into guest memory. After migration has +completed, RECEIVE_FINISH is called to make the VM runnable. + +For more information about the migration see SEV API Appendix A +Usage flow (Live migration section). + +NOTE: +To protect against the memory clone SEV APIs are designed to make the VM +unrunnable in case of the migration failure. References ---------- diff --git a/hw/display/vga.c b/hw/display/vga.c index 37557c3442aa8b709e74930502ed3a573da3d222..d70226a894a10ecf4cf78dc6d04a0e23f7f63bee 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -39,6 +39,8 @@ #include "migration/vmstate.h" #include "trace.h" +#include "sysemu/kvm.h" + //#define DEBUG_VGA_MEM //#define DEBUG_VGA_REG @@ -1783,6 +1785,11 @@ static void vga_update_display(void *opaque) s->cursor_blink_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); full_update = 1; } + + /* Force to full update in CSV guest. */ + if (kvm_csv3_enabled()) + full_update = 1; + switch(graphic_mode) { case GMODE_TEXT: vga_draw_text(s, full_update); diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 55850791df4148f5535eb06b76e09dabf75d84f1..682e324f1cc9bddfce9a5e4a993a44341a04c97d 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -10,6 +10,15 @@ config SGX bool depends on KVM +config CSV + bool + select HYGON_CSV_MIG_ACCEL + depends on SEV + +config HYGON_CSV_MIG_ACCEL + bool + depends on CSV + config PC bool imply APPLESMC @@ -26,6 +35,7 @@ config PC imply QXL imply SEV imply SGX + imply CSV imply TEST_DEVICES imply TPM_CRB imply TPM_TIS_ISA diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index c8d9e71b889b673bc64d89f3d01a240007610552..2bbcbb8d357aae88fa7adbeb892360c1d1317a11 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -37,6 +37,7 @@ #include "hw/block/flash.h" #include "sysemu/kvm.h" #include "sev.h" +#include "csv.h" #define FLASH_SECTOR_SIZE 4096 @@ -263,7 +264,18 @@ void x86_firmware_configure(void *ptr, int size) error_report("failed to locate and/or save reset vector"); exit(1); } + if (csv3_enabled()) { + ram_addr_t offset = 0; + MemoryRegion *mr; - sev_encrypt_flash(ptr, size, &error_fatal); + mr = memory_region_from_host(ptr, &offset); + if (!mr) { + error_report("failed to get memory region of flash"); + exit(1); + } + csv3_load_data(mr->addr + offset, ptr, size, &error_fatal); + } else { + sev_encrypt_flash(ptr, size, &error_fatal); + } } } diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig index cc8a8c1418ff007bd6eaaa73f098415beef51735..2ea5c68eb5da574e4d77170bc1d54b88c65fdf86 100644 --- a/hw/misc/Kconfig +++ b/hw/misc/Kconfig @@ -200,4 +200,8 @@ config IOSB config XLNX_VERSAL_TRNG bool +config PSP_DEV + bool + default y + source macio/Kconfig diff --git a/hw/misc/meson.build b/hw/misc/meson.build index 36c20d5637f70cf0db17173a4cb388a2b65bb8f9..28cba0ac2859ce90fccd4f3f4b34adb8cacf8bdd 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -9,6 +9,7 @@ system_ss.add(when: 'CONFIG_UNIMP', if_true: files('unimp.c')) system_ss.add(when: 'CONFIG_EMPTY_SLOT', if_true: files('empty_slot.c')) system_ss.add(when: 'CONFIG_LED', if_true: files('led.c')) system_ss.add(when: 'CONFIG_PVPANIC_COMMON', if_true: files('pvpanic.c')) +system_ss.add(when: 'CONFIG_PSP_DEV', if_true: files('psp.c')) # ARM devices system_ss.add(when: 'CONFIG_PL310', if_true: files('arm_l2x0.c')) diff --git a/hw/misc/psp.c b/hw/misc/psp.c new file mode 100644 index 0000000000000000000000000000000000000000..da0a69efdb80d91bcbbf8f3a4a5e8a5b781381a4 --- /dev/null +++ b/hw/misc/psp.c @@ -0,0 +1,141 @@ +/* + * hygon psp device emulation + * + * Copyright 2024 HYGON Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "qemu/osdep.h" +#include "qemu/compiler.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" +#include "sysemu/runstate.h" +#include + +#define TYPE_PSP_DEV "psp" +OBJECT_DECLARE_SIMPLE_TYPE(PSPDevState, PSP_DEV) + +struct PSPDevState { + /* Private */ + DeviceState pdev; + + /* Public */ + Notifier shutdown_notifier; + int dev_fd; + uint8_t enabled; + + /** + * vid is used to identify a virtual machine in qemu. + * When a virtual machine accesses a tkm key, + * the TKM module uses different key spaces based on different vids. + */ + uint32_t vid; +}; + +#define PSP_DEV_PATH "/dev/hygon_psp_config" +#define HYGON_PSP_IOC_TYPE 'H' +#define PSP_IOC_MUTEX_ENABLE _IOWR(HYGON_PSP_IOC_TYPE, 1, NULL) +#define PSP_IOC_MUTEX_DISABLE _IOWR(HYGON_PSP_IOC_TYPE, 2, NULL) +#define PSP_IOC_VPSP_OPT _IOWR(HYGON_PSP_IOC_TYPE, 3, NULL) + +enum VPSP_DEV_CTRL_OPCODE { + VPSP_OP_VID_ADD, + VPSP_OP_VID_DEL, +}; + +struct psp_dev_ctrl { + unsigned char op; + union { + unsigned int vid; + unsigned char reserved[128]; + } data; +}; + +static void psp_dev_destroy(PSPDevState *state) +{ + struct psp_dev_ctrl ctrl = { 0 }; + if (state && state->dev_fd >= 0) { + if (state->enabled) { + ctrl.op = VPSP_OP_VID_DEL; + if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { + error_report("VPSP_OP_VID_DEL: %d", -errno); + } else { + state->enabled = false; + } + } + qemu_close(state->dev_fd); + state->dev_fd = -1; + } +} + +/** + * Guest OS performs shut down operations through 'shutdown' and 'powerdown' event. + * The 'powerdown' event will also trigger 'shutdown' in the end, + * so only attention to the 'shutdown' event. + * + * When Guest OS trigger 'reboot' or 'reset' event, to do nothing. +*/ +static void psp_dev_shutdown_notify(Notifier *notifier, void *data) +{ + PSPDevState *state = container_of(notifier, PSPDevState, shutdown_notifier); + psp_dev_destroy(state); +} + +static void psp_dev_realize(DeviceState *dev, Error **errp) +{ + struct psp_dev_ctrl ctrl = { 0 }; + PSPDevState *state = PSP_DEV(dev); + + state->dev_fd = qemu_open_old(PSP_DEV_PATH, O_RDWR); + if (state->dev_fd < 0) { + error_setg(errp, "fail to open %s, errno %d.", PSP_DEV_PATH, errno); + goto end; + } + + ctrl.op = VPSP_OP_VID_ADD; + ctrl.data.vid = state->vid; + if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { + error_setg(errp, "psp_dev_realize VPSP_OP_VID_ADD vid %d, return %d", ctrl.data.vid, -errno); + goto end; + } + + state->enabled = true; + state->shutdown_notifier.notify = psp_dev_shutdown_notify; + qemu_register_shutdown_notifier(&state->shutdown_notifier); +end: + return; +} + +static struct Property psp_dev_properties[] = { + DEFINE_PROP_UINT32("vid", PSPDevState, vid, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void psp_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "PSP Device"; + dc->realize = psp_dev_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, psp_dev_properties); +} + +static const TypeInfo psp_dev_info = { + .name = TYPE_PSP_DEV, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PSPDevState), + .class_init = psp_dev_class_init, +}; + +static void psp_dev_register_types(void) +{ + type_register_static(&psp_dev_info); +} + +type_init(psp_dev_register_types) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 73024babd451898904bedc671a45a9697c2de10d..b6574f941aea83a390b3c45d8e7277430eb1ae82 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1640,24 +1640,28 @@ static bool virtio_net_can_receive(NetClientState *nc) static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) { + int opaque; + unsigned int in_bytes; VirtIONet *n = q->n; - if (virtio_queue_empty(q->rx_vq) || - (n->mergeable_rx_bufs && - !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { - virtio_queue_set_notification(q->rx_vq, 1); - - /* To avoid a race condition where the guest has made some buffers - * available after the above check but before notification was - * enabled, check for available buffers again. - */ - if (virtio_queue_empty(q->rx_vq) || - (n->mergeable_rx_bufs && - !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { + + while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { + opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, + bufsize, 0); + /* Buffer is enough, disable notifiaction */ + if (bufsize <= in_bytes) { + break; + } + + if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { + /* Guest has added some buffers, try again */ + continue; + } else { return 0; } } virtio_queue_set_notification(q->rx_vq, 0); + return 1; } diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig index 7cdba0560aa821c88d3420b36f86020575834202..5f0d3c2d2bfae4acbc76b91bca73ec02d0184e55 100644 --- a/hw/vfio/Kconfig +++ b/hw/vfio/Kconfig @@ -41,3 +41,9 @@ config VFIO_IGD bool default y if PC_PCI depends on VFIO_PCI + +config VFIO_HCT + bool + default y + select VFIO + depends on LINUX && PCI diff --git a/hw/vfio/container.c b/hw/vfio/container.c index adc3005beb7b07d8541fdd9656558ebebfad20b0..0d6e6b428c9c431d755937895e06501c4ec76ed6 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -30,6 +30,7 @@ #include "qemu/error-report.h" #include "qemu/range.h" #include "sysemu/reset.h" +#include "sysemu/kvm.h" #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" @@ -468,6 +469,32 @@ static void vfio_free_container(VFIOContainer *container) g_free(container); } +static SharedRegionListener *g_shl; + +static void shared_memory_listener_register(MemoryListener *listener, + AddressSpace *as) +{ + SharedRegionListener *shl; + + shl = g_new0(SharedRegionListener, 1); + + shl->listener = listener; + shl->as = as; + + shared_region_register_listener(shl); + g_shl = shl; +} + +static void shared_memory_listener_unregister(void) +{ + SharedRegionListener *shl = g_shl; + + shared_region_unregister_listener(shl); + + g_free(shl); + g_shl = NULL; +} + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, Error **errp) { @@ -613,7 +640,12 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->listener = vfio_memory_listener; - memory_listener_register(&container->listener, container->space->as); + if (kvm_csv3_enabled()) { + shared_memory_listener_register(&container->listener, + container->space->as); + } else { + memory_listener_register(&container->listener, container->space->as); + } if (container->error) { ret = -1; @@ -629,7 +661,11 @@ listener_release_exit: QLIST_REMOVE(group, container_next); QLIST_REMOVE(container, next); vfio_kvm_device_del_group(group); - memory_listener_unregister(&container->listener); + if (kvm_csv3_enabled()) { + shared_memory_listener_unregister(); + } else { + memory_listener_unregister(&container->listener); + } if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { vfio_spapr_container_deinit(container); @@ -663,7 +699,11 @@ static void vfio_disconnect_container(VFIOGroup *group) * group. */ if (QLIST_EMPTY(&container->group_list)) { - memory_listener_unregister(&container->listener); + if (kvm_csv3_enabled()) { + shared_memory_listener_unregister(); + } else { + memory_listener_unregister(&container->listener); + } if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { vfio_spapr_container_deinit(container); diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c new file mode 100644 index 0000000000000000000000000000000000000000..fb429271bb42bd7a0e8acfafc3f7da58080903a3 --- /dev/null +++ b/hw/vfio/hct.c @@ -0,0 +1,540 @@ +/* + * vfio based mediated ccp(hct) assignment support + * + * Copyright 2023 HYGON Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include +#include +#include +#include +#include + +#include "qemu/osdep.h" +#include "qemu/queue.h" +#include "qemu/main-loop.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/pci/pci.h" +#include "hw/vfio/pci.h" +#include "qemu/range.h" +#include "sysemu/kvm.h" +#include "hw/pci/msi.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" + +#define MAX_CCP_CNT 48 +#define PAGE_SIZE 4096 +#define HCT_SHARED_MEMORY_SIZE (PAGE_SIZE * MAX_CCP_CNT) +#define CCP_INDEX_BYTES 4 +#define PATH_MAX 4096 +#define TYPE_HCT_DEV "hct" +#define PCI_HCT_DEV(obj) OBJECT_CHECK(HCTDevState, (obj), TYPE_HCT_DEV) +#define HCT_MMIO_SIZE (1 << 20) +#define HCT_MAX_PASID (1 << 8) + +#define PCI_VENDOR_ID_HYGON_CCP 0x1d94 +#define PCI_DEVICE_ID_HYGON_CCP 0x1468 + +#define HCT_SHARE_DEV "/dev/hct_share" + +#define HCT_VERSION_STRING "0.5" +#define DEF_VERSION_STRING "0.1" +#define VERSION_SIZE 16 + +#define HCT_SHARE_IOC_TYPE 'C' +#define HCT_SHARE_OP_TYPE 0x01 +#define HCT_SHARE_OP _IOWR(HCT_SHARE_IOC_TYPE, \ + HCT_SHARE_OP_TYPE, \ + struct hct_dev_ctrl) +#define HCT_SHARE_OP_DMA_MAP 0x01 +#define HCT_SHARE_OP_GET_ID 0x03 +#define HCT_SHARE_OP_GET_PASID 0x04 +#define HCT_SHARE_OP_DMA_UNMAP 0x05 +#define HCT_SHARE_OP_GET_VERSION 0x06 + +/* BARS */ +#define HCT_REG_BAR_IDX 2 +#define HCT_SHARED_BAR_IDX 3 +#define HCT_PASID_BAR_IDX 4 + +#define PASID_OFFSET 40 + +static volatile struct hct_data { + int init; + int hct_fd; + unsigned long pasid; + uint8_t *pasid_memory; + uint8_t *hct_shared_memory; + uint8_t ccp_index[MAX_CCP_CNT]; + uint8_t ccp_cnt; +} hct_data; + +typedef struct SharedDevice { + PCIDevice dev; + int shared_memory_offset; +} SharedDevice; + +typedef struct HctDevState { + SharedDevice sdev; + VFIODevice vdev; + MemoryRegion mmio; + MemoryRegion shared; + MemoryRegion pasid; + void *maps[PCI_NUM_REGIONS]; +} HCTDevState; + +struct hct_dev_ctrl { + unsigned char op; + unsigned char rsvd[3]; + union { + unsigned char version[VERSION_SIZE]; + struct { + unsigned long vaddr; + unsigned long iova; + unsigned long size; + }; + unsigned int id; + }; +}; + +static int pasid_get_and_init(HCTDevState *state) +{ + struct hct_dev_ctrl ctrl; + int ret; + + ctrl.op = HCT_SHARE_OP_GET_PASID; + ctrl.id = -1; + ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); + if (ret < 0) { + ret = -errno; + error_report("GET_PASID fail: %d", -errno); + goto out; + } + + *hct_data.pasid_memory = ctrl.id; + hct_data.pasid = ctrl.id; + +out: + return ret; +} + +static const MemoryRegionOps hct_mmio_ops = { + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = + { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void vfio_hct_detach_device(HCTDevState *state) +{ + vfio_detach_device(&state->vdev); + g_free(state->vdev.name); +} + +static void vfio_hct_exit(PCIDevice *dev) +{ + HCTDevState *state = PCI_HCT_DEV(dev); + + vfio_hct_detach_device(state); + + if (hct_data.hct_fd) { + qemu_close(hct_data.hct_fd); + hct_data.hct_fd = 0; + } +} + +static Property vfio_hct_properties[] = { + DEFINE_PROP_STRING("sysfsdev", HCTDevState, vdev.sysfsdev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vfio_ccp_compute_needs_reset(VFIODevice *vdev) +{ + vdev->needs_reset = false; +} + +struct VFIODeviceOps vfio_ccp_ops = { + .vfio_compute_needs_reset = vfio_ccp_compute_needs_reset, +}; + +/* create BAR2, BAR3 and BAR4 space for the virtual machine. */ +static int vfio_hct_region_mmap(HCTDevState *state) +{ + int ret; + int i; + struct vfio_region_info *info; + + for (i = 0; i < PCI_ROM_SLOT; i++) { + ret = vfio_get_region_info(&state->vdev, i, &info); + if (ret) + goto out; + + if (info->size) { + state->maps[i] = mmap(NULL, info->size, PROT_READ | PROT_WRITE, + MAP_SHARED, state->vdev.fd, info->offset); + if (state->maps[i] == MAP_FAILED) { + ret = -errno; + g_free(info); + error_report("vfio mmap fail\n"); + goto out; + } + } + g_free(info); + } + + memory_region_init_io(&state->mmio, OBJECT(state), &hct_mmio_ops, state, + "hct mmio", HCT_MMIO_SIZE); + memory_region_init_ram_device_ptr(&state->mmio, OBJECT(state), "hct mmio", + HCT_MMIO_SIZE, + state->maps[HCT_REG_BAR_IDX]); + + memory_region_init_io(&state->shared, OBJECT(state), &hct_mmio_ops, state, + "hct shared memory", PAGE_SIZE); + memory_region_init_ram_device_ptr( + &state->shared, OBJECT(state), "hct shared memory", PAGE_SIZE, + (void *)hct_data.hct_shared_memory + + state->sdev.shared_memory_offset * PAGE_SIZE); + + memory_region_init_io(&state->pasid, OBJECT(state), &hct_mmio_ops, state, + "hct pasid", PAGE_SIZE); + memory_region_init_ram_device_ptr(&state->pasid, OBJECT(state), "hct pasid", + PAGE_SIZE, hct_data.pasid_memory); + + pci_register_bar(&state->sdev.dev, HCT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &state->mmio); + pci_register_bar(&state->sdev.dev, HCT_SHARED_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &state->shared); + pci_register_bar(&state->sdev.dev, HCT_PASID_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &state->pasid); +out: + return ret; +} + +static int hct_check_duplicated_index(int index) +{ + int cnt; + for (cnt = 0; cnt < hct_data.ccp_cnt; cnt++) { + if (hct_data.ccp_index[cnt] == index) { + error_report("many mdev shouldn't be mapped to one ccp in a " + "virtual machine!\n"); + return -1; + } + } + + hct_data.ccp_index[hct_data.ccp_cnt++] = index; + return 0; +} + +static int hct_get_ccp_index(HCTDevState *state) +{ + char path[PATH_MAX]; + char buf[CCP_INDEX_BYTES]; + int fd; + int ret; + int ccp_index; + + snprintf(path, PATH_MAX, "%s/vendor/id", state->vdev.sysfsdev); + fd = qemu_open_old(path, O_RDONLY); + if (fd < 0) { + error_report("open %s fail\n", path); + return -errno; + } + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + ret = -errno; + error_report("read %s fail\n", path); + goto out; + } + + if (1 != sscanf(buf, "%d", &ccp_index)) { + ret = -errno; + error_report("format addr %s fail\n", buf); + goto out; + } + + if (!hct_check_duplicated_index(ccp_index)) { + state->sdev.shared_memory_offset = ccp_index; + } else { + ret = -1; + } + +out: + qemu_close(fd); + return ret; +} + +static int hct_api_version_check(void) +{ + struct hct_dev_ctrl ctrl; + int ret; + + ctrl.op = HCT_SHARE_OP_GET_VERSION; + memcpy(ctrl.version, DEF_VERSION_STRING, sizeof(DEF_VERSION_STRING)); + ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); + if (ret < 0) { + error_report("ret %d, errno %d: fail to get hct.ko version.\n", ret, + errno); + return -1; + } else if (memcmp(ctrl.version, HCT_VERSION_STRING, + sizeof(HCT_VERSION_STRING)) < 0) { + error_report("The hct.ko version is %s, please upgrade to version %s " + "or higher.\n", + ctrl.version, HCT_VERSION_STRING); + return -1; + } + + return 0; +} + +static int hct_shared_memory_init(void) +{ + int ret = 0; + + hct_data.hct_shared_memory = + mmap(NULL, HCT_SHARED_MEMORY_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + hct_data.hct_fd, 0); + if (hct_data.hct_shared_memory == MAP_FAILED) { + ret = -errno; + error_report("map hct shared memory fail\n"); + goto out; + } + +out: + return ret; +} + +static void hct_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct hct_dev_ctrl ctrl; + hwaddr iova; + Int128 llend, llsize; + void *vaddr; + int ret; + + iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_add(llend, int128_exts64(qemu_real_host_page_mask())); + + if (int128_ge(int128_make64(iova), llend)) { + return; + } + + if (!section->mr->ram) { + return; + } + + vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + + (iova - section->offset_within_address_space); + llsize = int128_sub(llend, int128_make64(iova)); + + ctrl.op = HCT_SHARE_OP_DMA_MAP; + ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); + ctrl.vaddr = (uint64_t)vaddr; + ctrl.size = llsize; + ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); + if (ret < 0) + error_report("VFIO_MAP_DMA: %d, iova=%lx", -errno, iova); +} + +static void hct_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct hct_dev_ctrl ctrl; + hwaddr iova; + Int128 llend, llsize; + int ret; + + iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_add(llend, int128_exts64(qemu_real_host_page_mask())); + + if (int128_ge(int128_make64(iova), llend)) { + return; + } + + if (!section->mr->ram) { + return; + } + + llsize = int128_sub(llend, int128_make64(iova)); + + ctrl.op = HCT_SHARE_OP_DMA_UNMAP; + ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); + ctrl.size = llsize; + ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); + if (ret < 0) + error_report("VFIO_UNMAP_DMA: %d", -errno); +} + +static MemoryListener hct_memory_listener = { + .region_add = hct_listener_region_add, + .region_del = hct_listener_region_del, +}; + +static void hct_data_uninit(HCTDevState *state) +{ + if (hct_data.hct_fd) { + qemu_close(hct_data.hct_fd); + hct_data.hct_fd = 0; + } + + if (hct_data.pasid) { + hct_data.pasid = 0; + } + + if (hct_data.pasid_memory) { + munmap(hct_data.pasid_memory, PAGE_SIZE); + hct_data.pasid_memory = NULL; + } + + if (hct_data.hct_shared_memory) { + munmap((void *)hct_data.hct_shared_memory, HCT_SHARED_MEMORY_SIZE); + hct_data.hct_shared_memory = NULL; + } + + memory_listener_unregister(&hct_memory_listener); +} + +static int hct_data_init(HCTDevState *state) +{ + int ret; + + if (hct_data.init == 0) { + + hct_data.hct_fd = qemu_open_old(HCT_SHARE_DEV, O_RDWR); + if (hct_data.hct_fd < 0) { + error_report("fail to open %s, errno %d.", HCT_SHARE_DEV, errno); + ret = -errno; + goto out; + } + + /* The hct.ko version number needs not to be less than 0.2. */ + ret = hct_api_version_check(); + if (ret) + goto out; + + /* assign a page to the virtual BAR3 of each CCP. */ + ret = hct_shared_memory_init(); + if (ret) + goto out; + + hct_data.pasid_memory = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (hct_data.pasid_memory < 0) + goto unmap_shared_memory_exit; + + /* assign a unique pasid to each virtual machine. */ + ret = pasid_get_and_init(state); + if (ret < 0) + goto unmap_pasid_memory_exit; + + /* perform DMA_MAP and DMA_UNMAP operations on all memories of the + * virtual machine. */ + memory_listener_register(&hct_memory_listener, &address_space_memory); + + hct_data.init = 1; + } + + return hct_get_ccp_index(state); + +unmap_pasid_memory_exit: + munmap(hct_data.pasid_memory, PAGE_SIZE); + +unmap_shared_memory_exit: + munmap((void *)hct_data.hct_shared_memory, HCT_SHARED_MEMORY_SIZE); + +out: + return ret; +} + +/* When device is loaded */ +static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) +{ + int ret; + char *mdevid; + Error *err = NULL; + HCTDevState *state = PCI_HCT_DEV(pci_dev); + + /* parsing mdev device name from startup scripts */ + mdevid = g_path_get_basename(state->vdev.sysfsdev); + state->vdev.name = g_strdup_printf("%s", mdevid); + + ret = hct_data_init(state); + if (ret < 0) { + g_free(state->vdev.name); + goto out; + } + + ret = vfio_attach_device(state->vdev.name, &state->vdev, + pci_device_iommu_address_space(pci_dev), &err); + + if (ret) { + error_report("attach device failed, name = %s", state->vdev.name); + goto data_uninit_out; + } + + state->vdev.ops = &vfio_ccp_ops; + state->vdev.dev = &state->sdev.dev.qdev; + + ret = vfio_hct_region_mmap(state); + if (ret < 0) + goto detach_device_out; + + return; + +detach_device_out: + vfio_hct_detach_device(state); + +data_uninit_out: + hct_data_uninit(state); + +out: + return; +} + +static void hct_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + + dc->desc = "HCT Device"; + device_class_set_props(dc, vfio_hct_properties); + + pdc->realize = vfio_hct_realize; + pdc->exit = vfio_hct_exit; + pdc->vendor_id = PCI_VENDOR_ID_HYGON_CCP; + pdc->device_id = PCI_DEVICE_ID_HYGON_CCP; + pdc->class_id = PCI_CLASS_CRYPT_OTHER; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + + return; +} + +static const TypeInfo pci_hct_info = { + .name = TYPE_HCT_DEV, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(HCTDevState), + .class_init = hct_dev_class_init, + .interfaces = + (InterfaceInfo[]){ + {INTERFACE_CONVENTIONAL_PCI_DEVICE}, + {}, + }, +}; + +static void hct_register_types(void) { type_register_static(&pci_hct_info); } + +type_init(hct_register_types); diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index 2a6912c94027d6213144f910d64625a469cc2b1f..b1db4c8605f0e4730cd7da29d6defc7391625509 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -17,5 +17,6 @@ vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) +vfio_ss.add(when: 'CONFIG_VFIO_HCT', if_true: files('hct.c')) specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 356d690cc97a301396e7728eece25e120e50577a..09e2de60c105dc2c2bef8115970cc8c28ddc4ede 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -744,6 +744,60 @@ int virtio_queue_empty(VirtQueue *vq) } } +static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx) +{ + if (unlikely(!vq->vring.avail)) { + return false; + } + + return (uint16_t)shadow_idx != vring_avail_idx(vq); +} + +static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx) +{ + VRingPackedDesc desc; + VRingMemoryRegionCaches *caches; + + if (unlikely(!vq->vring.desc)) { + return false; + } + + caches = vring_get_region_caches(vq); + if (!caches) { + return false; + } + + vring_packed_desc_read(vq->vdev, &desc, &caches->desc, + shadow_idx, true); + + return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter); +} + +static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx) +{ + if (virtio_device_disabled(vq->vdev)) { + return false; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + return virtio_queue_packed_poll(vq, shadow_idx); + } else { + return virtio_queue_split_poll(vq, shadow_idx); + } +} + +bool virtio_queue_enable_notification_and_check(VirtQueue *vq, + int opaque) +{ + virtio_queue_set_notification(vq, 1); + + if (opaque >= 0) { + return virtio_queue_poll(vq, (unsigned)opaque); + } else { + return false; + } +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -1323,9 +1377,9 @@ err: goto done; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, unsigned max_in_bytes, + unsigned max_out_bytes) { uint16_t desc_size; VRingMemoryRegionCaches *caches; @@ -1358,7 +1412,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, caches); } - return; + return (int)vq->shadow_avail_idx; err: if (in_bytes) { *in_bytes = 0; @@ -1366,6 +1420,8 @@ err: if (out_bytes) { *out_bytes = 0; } + + return -1; } int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h index ba2dd4b5dfc4a0c2d039b3e67718afc5844a6317..2cba27642f765a7ccd6ee33c1d366e2b12e0de81 100644 --- a/include/exec/confidential-guest-support.h +++ b/include/exec/confidential-guest-support.h @@ -53,8 +53,54 @@ struct ConfidentialGuestSupport { bool ready; }; +/** + * The functions registers with ConfidentialGuestMemoryEncryptionOps will be + * used during the encrypted guest migration. + */ +struct ConfidentialGuestMemoryEncryptionOps { + /* Initialize the platform specific state before starting the migration */ + int (*save_setup)(const char *pdh, const char *plat_cert, + const char *amd_cert); + + /* Write the encrypted page and metadata associated with it */ + int (*save_outgoing_page)(QEMUFile *f, uint8_t *ptr, uint32_t size, + uint64_t *bytes_sent); + + /* Load the incoming encrypted page into guest memory */ + int (*load_incoming_page)(QEMUFile *f, uint8_t *ptr); + + /* Check if gfn is in shared/unencrypted region */ + bool (*is_gfn_in_unshared_region)(unsigned long gfn); + + /* Write the shared regions list */ + int (*save_outgoing_shared_regions_list)(QEMUFile *f, uint64_t *bytes_sent); + + /* Load the shared regions list */ + int (*load_incoming_shared_regions_list)(QEMUFile *f); + + /* Queue the encrypted page and metadata associated with it into a list */ + int (*queue_outgoing_page)(uint8_t *ptr, uint32_t size, uint64_t addr); + + /* Write the list queued with encrypted pages and metadata associated + * with them */ + int (*save_queued_outgoing_pages)(QEMUFile *f, uint64_t *bytes_sent); + + /* Queue the incoming encrypted page into a list */ + int (*queue_incoming_page)(QEMUFile *f, uint8_t *ptr); + + /* Load the incoming encrypted pages queued in list into guest memory */ + int (*load_queued_incoming_pages)(QEMUFile *f); + + /* Write the encrypted cpu state */ + int (*save_outgoing_cpu_state)(QEMUFile *f, uint64_t *bytes_sent); + + /* Load the encrypted cpu state */ + int (*load_incoming_cpu_state)(QEMUFile *f); +}; + typedef struct ConfidentialGuestSupportClass { ObjectClass parent; + struct ConfidentialGuestMemoryEncryptionOps *memory_encryption_ops; } ConfidentialGuestSupportClass; #endif /* !CONFIG_USER_ONLY */ diff --git a/include/exec/memory.h b/include/exec/memory.h index 831f7c996d9da49cdf9884fdeffa32959865cb07..3e65d8d9f54662424d88ad7b31945196554e2cbd 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -775,6 +775,17 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, ram_addr_t *ram_addr, bool *read_only, bool *mr_has_discard_manager); +typedef struct SharedRegionListener SharedRegionListener; +struct SharedRegionListener { + MemoryListener *listener; + AddressSpace *as; + QTAILQ_ENTRY(SharedRegionListener) next; +}; + +void shared_region_register_listener(SharedRegionListener *shl); +void shared_region_unregister_listener(SharedRegionListener *shl); +void *shared_region_listeners_get(void); + typedef struct CoalescedMemoryRange CoalescedMemoryRange; typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index c8f72850bc05f4c6ea7b51aae4b572b04882725f..d2f4ed160de74b13cc9dbde433e3fd9e4e5fcf76 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -270,9 +270,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, VirtQueueElement *elem); int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, unsigned int out_bytes); -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes); +/** + * Return <0 on error or an opaque >=0 to pass to + * virtio_queue_enable_notification_and_check on success. + */ +int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, unsigned max_in_bytes, + unsigned max_out_bytes); void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); @@ -306,6 +310,14 @@ int virtio_queue_ready(VirtQueue *vq); int virtio_queue_empty(VirtQueue *vq); +/** + * Enable notification and check whether guest has added some + * buffers since last call to virtqueue_get_avail_bytes. + * + * @opaque: value returned from virtqueue_get_avail_bytes + */ +bool virtio_queue_enable_notification_and_check(VirtQueue *vq, + int opaque); /* Host binding interface. */ uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr); diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h index ef91fc28bbdfa3dab181e11c5a4dc42fce1bad92..7a32e7f820325ed677db482893faa60c7d43b5d4 100644 --- a/include/sysemu/accel-ops.h +++ b/include/sysemu/accel-ops.h @@ -53,6 +53,9 @@ struct AccelOpsClass { int (*insert_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); int (*remove_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); void (*remove_all_breakpoints)(CPUState *cpu); + + void (*control_pre_system_reset)(void); + void (*control_post_system_reset)(void); }; #endif /* ACCEL_OPS_H */ diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index b4a566cfe75274f6d8f4bfe8abb9e409a4c2e440..f24d27daf5269d658e2249cb1482d88d23741bea 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -44,6 +44,8 @@ extern int icount_align_option; void qemu_cpu_kick_self(void); bool cpus_are_resettable(void); +void cpus_control_pre_system_reset(void); +void cpus_control_post_system_reset(void); void cpu_synchronize_all_states(void); void cpu_synchronize_all_post_reset(void); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index d6148781642107de0995af400331bbde126791b8..1e15cfe9dc49ae776cab68404aa0ed8d6f030689 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -42,6 +42,7 @@ extern bool kvm_gsi_routing_allowed; extern bool kvm_gsi_direct_mapping; extern bool kvm_readonly_mem_allowed; extern bool kvm_msi_use_devid; +extern bool kvm_csv3_allowed; #define kvm_enabled() (kvm_allowed) /** @@ -143,6 +144,12 @@ extern bool kvm_msi_use_devid; */ #define kvm_msi_devid_required() (kvm_msi_use_devid) +/** + * kvm_csv3_enabled: + * Returns: true if CSV3 feature is used for the VM. + */ +#define kvm_csv3_enabled() (kvm_csv3_allowed) + #else #define kvm_enabled() (0) @@ -157,6 +164,7 @@ extern bool kvm_msi_use_devid; #define kvm_gsi_direct_mapping() (false) #define kvm_readonly_mem_enabled() (false) #define kvm_msi_devid_required() (false) +#define kvm_csv3_enabled() (false) #endif /* CONFIG_KVM_IS_POSSIBLE */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 549fea3a978af9e6b9f2cbc4dab7f71e10ec8158..36da75b925bf3ca6274f65cf084ff303022033ca 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -346,6 +346,7 @@ struct kvm_run { } iocsr_io; /* KVM_EXIT_HYPERCALL */ struct { +#define KVM_HC_MAP_GPA_RANGE 12 __u64 nr; __u64 args[6]; __u64 ret; @@ -1198,6 +1199,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 +#define KVM_CAP_SEV_ES_GHCB 500 + +#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) + #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing_irqchip { @@ -1578,6 +1583,10 @@ struct kvm_s390_ucas_mapping { #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) +/* ioctls for control vcpu setup during system reset */ +#define KVM_CONTROL_VCPU_PRE_SYSTEM_RESET _IO(KVMIO, 0xe8) +#define KVM_CONTROL_VCPU_POST_SYSTEM_RESET _IO(KVMIO, 0xe9) + /* * ioctls for vcpu fds */ @@ -1925,6 +1934,9 @@ enum sev_cmd_id { /* Guest Migration Extension */ KVM_SEV_SEND_CANCEL, + /* Hygon CSV batch command */ + KVM_CSV_COMMAND_BATCH = 0x18, + KVM_SEV_NR_MAX, }; @@ -2003,6 +2015,14 @@ struct kvm_sev_send_update_data { __u32 trans_len; }; +struct kvm_sev_send_update_vmsa { + __u32 vcpu_id; + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + struct kvm_sev_receive_start { __u32 handle; __u32 policy; @@ -2021,6 +2041,96 @@ struct kvm_sev_receive_update_data { __u32 trans_len; }; +struct kvm_sev_receive_update_vmsa { + __u32 vcpu_id; + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_csv_batch_list_node { + __u64 cmd_data_addr; + __u64 addr; + __u64 next_cmd_addr; +}; + +struct kvm_csv_command_batch { + __u32 command_id; + __u64 csv_batch_list_uaddr; +}; + +struct kvm_csv_init { + __u64 userid_addr; + __u32 len; +}; + +/* CSV3 command */ +enum csv3_cmd_id { + KVM_CSV3_NR_MIN = 0xc0, + + KVM_CSV3_INIT = KVM_CSV3_NR_MIN, + KVM_CSV3_LAUNCH_ENCRYPT_DATA, + KVM_CSV3_LAUNCH_ENCRYPT_VMCB, + KVM_CSV3_SEND_ENCRYPT_DATA, + KVM_CSV3_SEND_ENCRYPT_CONTEXT, + KVM_CSV3_RECEIVE_ENCRYPT_DATA, + KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT, + KVM_CSV3_HANDLE_MEMORY, + + KVM_CSV3_NR_MAX, +}; + +struct kvm_csv3_launch_encrypt_data { + __u64 gpa; + __u64 uaddr; + __u32 len; +}; + +struct kvm_csv3_init_data { + __u64 nodemask; +}; + +struct kvm_csv3_send_encrypt_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_addr_data; + __u32 guest_addr_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_csv3_send_encrypt_context { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_csv3_receive_encrypt_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_addr_data; + __u32 guest_addr_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_csv3_receive_encrypt_context { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +#define KVM_CSV3_RELEASE_SHARED_MEMORY (0x0001) + +struct kvm_csv3_handle_memory { + __u64 gpa; + __u32 num_pages; + __u32 opcode; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 2faa5cad46cfb9316f425b2cc7b0eab52701f760..58da696ff7afee186fa5ec464c15d1f8e468c666 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -392,6 +392,19 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %s\n", MigrationParameter_str(MIGRATION_PARAMETER_MODE), qapi_enum_lookup(&MigMode_lookup, params->mode)); + + assert(params->sev_pdh); + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_SEV_PDH), + params->sev_pdh); + assert(params->sev_plat_cert); + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_SEV_PLAT_CERT), + params->sev_plat_cert); + assert(params->sev_amd_cert); + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_SEV_AMD_CERT), + params->sev_amd_cert); } qapi_free_MigrationParameters(params); @@ -679,6 +692,21 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_mode = true; visit_type_MigMode(v, param, &p->mode, &err); break; + case MIGRATION_PARAMETER_SEV_PDH: + p->sev_pdh = g_new0(StrOrNull, 1); + p->sev_pdh->type = QTYPE_QSTRING; + visit_type_str(v, param, &p->sev_pdh->u.s, &err); + break; + case MIGRATION_PARAMETER_SEV_PLAT_CERT: + p->sev_plat_cert = g_new0(StrOrNull, 1); + p->sev_plat_cert->type = QTYPE_QSTRING; + visit_type_str(v, param, &p->sev_plat_cert->u.s, &err); + break; + case MIGRATION_PARAMETER_SEV_AMD_CERT: + p->sev_amd_cert = g_new0(StrOrNull, 1); + p->sev_amd_cert->type = QTYPE_QSTRING; + visit_type_str(v, param, &p->sev_amd_cert->u.s, &err); + break; default: assert(0); } diff --git a/migration/migration.h b/migration/migration.h index cf2c9c88e01d670b5b4c9d3de315a1b37a4f0d21..65f5599f4503ff55429cef72ceba79b759db9bb0 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -550,4 +550,6 @@ void migration_rp_kick(MigrationState *s); int migration_stop_vm(RunState state); +bool memcrypt_enabled(void); + #endif diff --git a/migration/options.c b/migration/options.c index 8d8ec73ad95bd1dc7db9cc9881099524085544ae..70f6beb83c0bb6642908ed3af34c1d1f7f50123e 100644 --- a/migration/options.c +++ b/migration/options.c @@ -179,6 +179,9 @@ Property migration_properties[] = { DEFINE_PROP_MIG_MODE("mode", MigrationState, parameters.mode, MIG_MODE_NORMAL), + DEFINE_PROP_STRING("sev-pdh", MigrationState, parameters.sev_pdh), + DEFINE_PROP_STRING("sev-plat-cert", MigrationState, parameters.sev_plat_cert), + DEFINE_PROP_STRING("sev-amd-cert", MigrationState, parameters.sev_amd_cert), /* Migration capabilities */ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), @@ -997,6 +1000,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->announce_rounds = s->parameters.announce_rounds; params->has_announce_step = true; params->announce_step = s->parameters.announce_step; + params->sev_pdh = g_strdup(s->parameters.sev_pdh); + params->sev_plat_cert = g_strdup(s->parameters.sev_plat_cert); + params->sev_amd_cert = g_strdup(s->parameters.sev_amd_cert); if (s->parameters.has_block_bitmap_mapping) { params->has_block_bitmap_mapping = true; @@ -1047,6 +1053,10 @@ void migrate_params_init(MigrationParameters *params) params->has_x_vcpu_dirty_limit_period = true; params->has_vcpu_dirty_limit = true; params->has_mode = true; + + params->sev_pdh = g_strdup(""); + params->sev_plat_cert = g_strdup(""); + params->sev_amd_cert = g_strdup(""); } /* @@ -1348,6 +1358,19 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_mode) { dest->mode = params->mode; } + + if (params->sev_pdh) { + assert(params->sev_pdh->type == QTYPE_QSTRING); + dest->sev_pdh = params->sev_pdh->u.s; + } + if (params->sev_plat_cert) { + assert(params->sev_plat_cert->type == QTYPE_QSTRING); + dest->sev_plat_cert = params->sev_plat_cert->u.s; + } + if (params->sev_amd_cert) { + assert(params->sev_amd_cert->type == QTYPE_QSTRING); + dest->sev_amd_cert = params->sev_amd_cert->u.s; + } } static void migrate_params_apply(MigrateSetParameters *params, Error **errp) @@ -1492,6 +1515,22 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_mode) { s->parameters.mode = params->mode; } + + if (params->sev_pdh) { + g_free(s->parameters.sev_pdh); + assert(params->sev_pdh->type == QTYPE_QSTRING); + s->parameters.sev_pdh = g_strdup(params->sev_pdh->u.s); + } + if (params->sev_plat_cert) { + g_free(s->parameters.sev_plat_cert); + assert(params->sev_plat_cert->type == QTYPE_QSTRING); + s->parameters.sev_plat_cert = g_strdup(params->sev_plat_cert->u.s); + } + if (params->sev_amd_cert) { + g_free(s->parameters.sev_amd_cert); + assert(params->sev_amd_cert->type == QTYPE_QSTRING); + s->parameters.sev_amd_cert = g_strdup(params->sev_amd_cert->u.s); + } } void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) @@ -1517,6 +1556,27 @@ void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) params->tls_authz->type = QTYPE_QSTRING; params->tls_authz->u.s = strdup(""); } + /* TODO Rewrite "" to null instead */ + if (params->sev_pdh + && params->sev_pdh->type == QTYPE_QNULL) { + qobject_unref(params->sev_pdh->u.n); + params->sev_pdh->type = QTYPE_QSTRING; + params->sev_pdh->u.s = strdup(""); + } + /* TODO Rewrite "" to null instead */ + if (params->sev_plat_cert + && params->sev_plat_cert->type == QTYPE_QNULL) { + qobject_unref(params->sev_plat_cert->u.n); + params->sev_plat_cert->type = QTYPE_QSTRING; + params->sev_plat_cert->u.s = strdup(""); + } + /* TODO Rewrite "" to null instead */ + if (params->sev_amd_cert + && params->sev_amd_cert->type == QTYPE_QNULL) { + qobject_unref(params->sev_amd_cert->u.n); + params->sev_amd_cert->type = QTYPE_QSTRING; + params->sev_amd_cert->u.s = strdup(""); + } migrate_params_test_apply(params, &tmp); diff --git a/migration/ram.c b/migration/ram.c index 8c7886ab797b8a91d9ecf060e8ca016bd436ae46..71353bc90bfe722ff24eaddb1a610a97717e9319 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -63,6 +63,12 @@ #include "options.h" #include "sysemu/dirtylimit.h" #include "sysemu/kvm.h" +#include "exec/confidential-guest-support.h" + +/* Defines RAM_SAVE_ENCRYPTED_PAGE and RAM_SAVE_SHARED_REGION_LIST */ +#include "target/i386/sev.h" +#include "target/i386/csv.h" +#include "sysemu/kvm.h" #include "hw/boards.h" /* for machine_dump_guest_core() */ @@ -92,7 +98,16 @@ /* 0x80 is reserved in rdma.h for RAM_SAVE_FLAG_HOOK */ #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 #define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 -/* We can't use any flag that is bigger than 0x200 */ +#define RAM_SAVE_FLAG_ENCRYPTED_DATA 0x400 + +bool memcrypt_enabled(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + if(ms->cgs) + return ms->cgs->ready; + else + return false; +} XBZRLECacheStats xbzrle_counters; @@ -1204,6 +1219,125 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, return 1; } +/** + * ram_save_encrypted_page - send the given encrypted page to the stream + */ +static int ram_save_encrypted_page(RAMState *rs, PageSearchStatus *pss) +{ + QEMUFile *file = pss->pss_channel; + int ret; + uint8_t *p; + RAMBlock *block = pss->block; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; + uint64_t bytes_xmit = 0; + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + + p = block->host + offset; + trace_ram_save_page(block->idstr, (uint64_t)offset, p); + + ram_transferred_add(save_page_header(pss, file, block, + offset | RAM_SAVE_FLAG_ENCRYPTED_DATA)); + qemu_put_be32(file, RAM_SAVE_ENCRYPTED_PAGE); + ret = ops->save_outgoing_page(file, p, TARGET_PAGE_SIZE, &bytes_xmit); + if (ret) { + return -1; + } + ram_transferred_add(4 + bytes_xmit); + stat64_add(&mig_stats.normal_pages, 1); + + return 1; +} + +/** + * ram_save_shared_region_list: send the shared region list + */ +static int ram_save_shared_region_list(RAMState *rs, QEMUFile *f) +{ + int ret; + uint64_t bytes_xmit = 0; + PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + + ram_transferred_add(save_page_header(pss, f, + pss->last_sent_block, + RAM_SAVE_FLAG_ENCRYPTED_DATA)); + qemu_put_be32(f, RAM_SAVE_SHARED_REGIONS_LIST); + ret = ops->save_outgoing_shared_regions_list(f, &bytes_xmit); + if (ret < 0) { + return ret; + } + ram_transferred_add(4 + bytes_xmit); + + return 0; +} + +/** + * ram_save_encrypted_cpu_state: send the encrypted cpu state + */ +static int ram_save_encrypted_cpu_state(RAMState *rs, QEMUFile *f) +{ + int ret; + uint64_t bytes_xmit = 0; + PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + + ram_transferred_add(save_page_header(pss, f, + pss->last_sent_block, + RAM_SAVE_FLAG_ENCRYPTED_DATA)); + qemu_put_be32(f, RAM_SAVE_ENCRYPTED_CPU_STATE); + ret = ops->save_outgoing_cpu_state(f, &bytes_xmit); + if (ret < 0) { + return ret; + } + ram_transferred_add(4 + bytes_xmit); + + return 0; +} + +static int load_encrypted_data(QEMUFile *f, uint8_t *ptr) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + + int flag; + + flag = qemu_get_be32(f); + + if (flag == RAM_SAVE_ENCRYPTED_PAGE) { + return ops->load_incoming_page(f, ptr); + } else if (flag == RAM_SAVE_SHARED_REGIONS_LIST) { + return ops->load_incoming_shared_regions_list(f); + } else if (flag == RAM_SAVE_ENCRYPTED_PAGE_BATCH) { + return ops->queue_incoming_page(f, ptr); + } else if (flag == RAM_SAVE_ENCRYPTED_PAGE_BATCH_END) { + if (ops->queue_incoming_page(f, ptr)) { + error_report("Failed to queue incoming data"); + return -EINVAL; + } + return ops->load_queued_incoming_pages(f); + } else if (flag == RAM_SAVE_ENCRYPTED_CPU_STATE) { + return ops->load_incoming_cpu_state(f); + } else { + error_report("unknown encrypted flag %x", flag); + return 1; + } +} + /** * ram_save_page: send the given page to the stream * @@ -2034,6 +2168,56 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss, compress_send_queued_data); } +/** + * encrypted_test_list: check if the page is encrypted + * + * Returns a bool indicating whether the page is encrypted. + */ +static bool encrypted_test_list(RAMState *rs, RAMBlock *block, + unsigned long page) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + unsigned long gfn; + hwaddr paddr = 0; + int ret; + + /* ROM devices contains the unencrypted data */ + if (memory_region_is_rom(block->mr)) { + return false; + } + + if (!strcmp(memory_region_name(block->mr), "system.flash0")) { + return true; + } + + if (!strcmp(memory_region_name(block->mr), "system.flash1")) { + return false; + } + + if (!strcmp(memory_region_name(block->mr), "vga.vram")) { + return false; + } + + /* + * Translate page in ram_addr_t address space to GPA address + * space using memory region. + */ + if (kvm_enabled()) { + ret = kvm_physical_memory_addr_from_host(kvm_state, + block->host + (page << TARGET_PAGE_BITS), &paddr); + if (ret == 0) { + return false; + } + } + gfn = paddr >> TARGET_PAGE_BITS; + + return ops->is_gfn_in_unshared_region(gfn); +} + /** * ram_save_target_page_legacy: save one target page * @@ -2052,6 +2236,17 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) return res; } + /* + * If memory encryption is enabled then use memory encryption APIs + * to write the outgoing buffer to the wire. The encryption APIs + * will take care of accessing the guest memory and re-encrypt it + * for the transport purposes. + */ + if (memcrypt_enabled() && + encrypted_test_list(rs, pss->block, pss->page)) { + return ram_save_encrypted_page(rs, pss); + } + if (save_compress_page(rs, pss, offset)) { return 1; } @@ -2177,6 +2372,196 @@ out: return ret; } +#ifdef CONFIG_HYGON_CSV_MIG_ACCEL +/** + * ram_save_encrypted_pages_in_batch: send the given encrypted pages to + * the stream. + * + * Sending pages of 4K size in batch. The saving stops at the end of + * the block. + * + * The caller must be with ram_state.bitmap_mutex held to call this + * function. + * + * Returns the number of pages written or negative on error + * + * @rs: current RAM state + * @pss: data about the page we want to send + */ +static int +ram_save_encrypted_pages_in_batch(RAMState *rs, PageSearchStatus *pss) +{ + bool page_dirty; + int ret; + int tmppages, pages = 0; + uint8_t *p; + uint32_t host_len = 0; + uint64_t bytes_xmit = 0; + ram_addr_t offset, start_offset = 0; + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *)object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + + do { + page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page); + + /* Check the pages is dirty and if it is send it */ + if (page_dirty) { + /* Process the unencrypted page */ + if (!encrypted_test_list(rs, pss->block, pss->page)) { + tmppages = migration_ops->ram_save_target_page(rs, pss); + } else { + /* Caculate the offset and host virtual address of the page */ + offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; + p = pss->block->host + offset; + + /* Record the offset and host virtual address of the first + * page in this loop which will be used below. + */ + if (host_len == 0) { + start_offset = offset | RAM_SAVE_FLAG_ENCRYPTED_DATA; + } else { + offset |= (RAM_SAVE_FLAG_ENCRYPTED_DATA | RAM_SAVE_FLAG_CONTINUE); + } + + /* Queue the outgoing page if the page is not zero page. + * If the queued pages are up to the outgoing page window size, + * process them below. + */ + if (ops->queue_outgoing_page(p, TARGET_PAGE_SIZE, offset)) + return -1; + + tmppages = 1; + host_len += TARGET_PAGE_SIZE; + + stat64_add(&mig_stats.normal_pages, 1); + } + } else { + tmppages = 0; + } + + if (tmppages >= 0) { + pages += tmppages; + } else { + return tmppages; + } + + pss_find_next_dirty(pss); + } while (offset_in_ramblock(pss->block, + ((ram_addr_t)pss->page) << TARGET_PAGE_BITS) && + host_len < CSV_OUTGOING_PAGE_WINDOW_SIZE); + + /* Check if there are any queued pages */ + if (host_len != 0) { + ram_transferred_add(save_page_header(pss, pss->pss_channel, + pss->block, start_offset)); + /* if only one page queued, flag is BATCH_END, else flag is BATCH */ + if (host_len > TARGET_PAGE_SIZE) + qemu_put_be32(pss->pss_channel, RAM_SAVE_ENCRYPTED_PAGE_BATCH); + else + qemu_put_be32(pss->pss_channel, RAM_SAVE_ENCRYPTED_PAGE_BATCH_END); + ram_transferred_add(4); + /* Process the queued pages in batch */ + ret = ops->save_queued_outgoing_pages(pss->pss_channel, &bytes_xmit); + if (ret) { + return -1; + } + ram_transferred_add(bytes_xmit); + } + + /* The offset we leave with is the last one we looked at */ + pss->page--; + + return pages; +} +#endif + +/** + * ram_save_csv3_pages - send the given csv3 VM pages to the stream + */ +static int ram_save_csv3_pages(RAMState *rs, PageSearchStatus *pss) +{ + bool page_dirty; + int ret; + int tmppages, pages = 0; + uint8_t *p; + uint32_t host_len = 0; + uint64_t bytes_xmit = 0; + RAMBlock *block = pss->block; + ram_addr_t offset = 0; + hwaddr paddr = RAM_ADDR_INVALID; + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + + if (!kvm_csv3_enabled()) + return 0; + + do { + page_dirty = migration_bitmap_clear_dirty(rs, block, pss->page); + + /* Check the pages is dirty and if it is send it */ + if (page_dirty) { + ret = kvm_physical_memory_addr_from_host(kvm_state, + block->host + (pss->page << TARGET_PAGE_BITS), &paddr); + /* Process ROM or MMIO */ + if (paddr == RAM_ADDR_INVALID || + memory_region_is_rom(block->mr)) { + tmppages = migration_ops->ram_save_target_page(rs, pss); + } else { + /* Caculate the offset and host virtual address of the page */ + offset = pss->page << TARGET_PAGE_BITS; + p = block->host + offset; + + if (ops->queue_outgoing_page(p, TARGET_PAGE_SIZE, offset)) + return -1; + + tmppages = 1; + host_len += TARGET_PAGE_SIZE; + + stat64_add(&mig_stats.normal_pages, 1); + } + } else { + tmppages = 0; + } + + if (tmppages >= 0) { + pages += tmppages; + } else { + return tmppages; + } + + pss_find_next_dirty(pss); + } while (offset_in_ramblock(block, + ((ram_addr_t)pss->page) << TARGET_PAGE_BITS) && + host_len < CSV3_OUTGOING_PAGE_WINDOW_SIZE); + + /* Check if there are any queued pages */ + if (host_len != 0) { + /* Always set offset as 0 for csv3. */ + ram_transferred_add(save_page_header(pss, pss->pss_channel, + block, 0 | RAM_SAVE_FLAG_ENCRYPTED_DATA)); + + qemu_put_be32(pss->pss_channel, RAM_SAVE_ENCRYPTED_PAGE); + ram_transferred_add(4); + /* Process the queued pages in batch */ + ret = ops->save_queued_outgoing_pages(pss->pss_channel, &bytes_xmit); + if (ret) { + return -1; + } + ram_transferred_add(bytes_xmit); + } + + /* The offset we leave with is the last one we looked at */ + pss->page--; + + return pages; +} + /** * ram_save_host_page: save a whole host page * @@ -2212,6 +2597,21 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) return 0; } + if (kvm_csv3_enabled()) + return ram_save_csv3_pages(rs, pss); + +#ifdef CONFIG_HYGON_CSV_MIG_ACCEL + /* + * If command_batch function is enabled and memory encryption is enabled + * then use command batch APIs to accelerate the sending process + * to write the outgoing buffer to the wire. The encryption APIs + * will re-encrypt the data with transport key so that data is prototect + * on the wire. + */ + if (memcrypt_enabled() && is_hygon_cpu() && !migration_in_postcopy()) + return ram_save_encrypted_pages_in_batch(rs, pss); +#endif + /* Update host page boundary information */ pss_host_page_prepare(pss); @@ -2917,6 +3317,18 @@ void qemu_guest_free_page_hint(void *addr, size_t len) } } +static int ram_encrypted_save_setup(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + MigrationParameters *p = &migrate_get_current()->parameters; + + return ops->save_setup(p->sev_pdh, p->sev_plat_cert, p->sev_amd_cert); +} + /* * Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code @@ -2952,6 +3364,13 @@ static int ram_save_setup(QEMUFile *f, void *opaque) (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f; WITH_RCU_READ_LOCK_GUARD() { + + if (memcrypt_enabled()) { + if (ram_encrypted_save_setup()) { + return -1; + } + } + qemu_put_be64(f, ram_bytes_total_with_ignored() | RAM_SAVE_FLAG_MEM_SIZE); @@ -3181,6 +3600,28 @@ static int ram_save_complete(QEMUFile *f, void *opaque) qemu_file_set_error(f, ret); return ret; } + + /* send the shared regions list */ + if (memcrypt_enabled()) { + ret = ram_save_shared_region_list(rs, f); + if (ret < 0) { + qemu_file_set_error(f, ret); + return ret; + } + + /* + * send the encrypted cpu state, for example, CSV2 guest's + * vmsa for each vcpu. + */ + if (is_hygon_cpu()) { + ret = ram_save_encrypted_cpu_state(rs, f); + if (ret < 0) { + error_report("Failed to save encrypted cpu state"); + qemu_file_set_error(f, ret); + return ret; + } + } + } } ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); @@ -3918,7 +4359,8 @@ static int ram_load_precopy(QEMUFile *f) } if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | - RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { + RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE | + RAM_SAVE_FLAG_ENCRYPTED_DATA)) { RAMBlock *block = ram_block_from_stream(mis, f, flags, RAM_CHANNEL_PRECOPY); @@ -4011,6 +4453,12 @@ static int ram_load_precopy(QEMUFile *f) qemu_file_set_error(f, ret); } break; + case RAM_SAVE_FLAG_ENCRYPTED_DATA: + if (load_encrypted_data(f, host)) { + error_report("Failed to load encrypted data"); + ret = -EINVAL; + } + break; default: error_report("Unknown combination of migration flags: 0x%x", flags); ret = -EINVAL; diff --git a/qapi/migration.json b/qapi/migration.json index 197d3faa43fdac90a12450d53cd7d81249c050eb..3c4724db1b21b615956fe6c1840b1d884ce3d424 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -874,6 +874,17 @@ # @mode: Migration mode. See description in @MigMode. Default is 'normal'. # (Since 8.2) # +# @sev-pdh: The target host platform diffie-hellman key encoded in base64, or +# pdh filename for hygon +# (Since 4.2) +# +# @sev-plat-cert: The target host platform certificate chain encoded in base64, +# or plat cert filename for hygon +# (Since 4.2) +# +# @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in +# base64, or vendor cert filename for hygon (Since 4.2) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -907,7 +918,8 @@ 'block-bitmap-mapping', { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, 'vcpu-dirty-limit', - 'mode'] } + 'mode', + 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert'] } ## # @MigrateSetParameters: @@ -1062,6 +1074,17 @@ # @mode: Migration mode. See description in @MigMode. Default is 'normal'. # (Since 8.2) # +# @sev-pdh: The target host platform diffie-hellman key encoded in base64, or +# pdh filename for hygon +# (Since 4.2) +# +# @sev-plat-cert: The target host platform certificate chain encoded in base64, +# or plat cert filename for hygon +# (Since 4.2) +# +# @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in +# base64, or vendor cert filename for hygon (Since 4.2) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -1115,7 +1138,11 @@ '*x-vcpu-dirty-limit-period': { 'type': 'uint64', 'features': [ 'unstable' ] }, '*vcpu-dirty-limit': 'uint64', - '*mode': 'MigMode'} } + '*mode': 'MigMode', + '*sev-pdh': 'StrOrNull', + '*sev-plat-cert': 'StrOrNull', + '*sev-amd-cert' : 'StrOrNull' } } + ## # @migrate-set-parameters: @@ -1290,6 +1317,17 @@ # @mode: Migration mode. See description in @MigMode. Default is 'normal'. # (Since 8.2) # +# @sev-pdh: The target host platform diffie-hellman key encoded in base64, or +# pdh filename for hygon +# (Since 4.2) +# +# @sev-plat-cert: The target host platform certificate chain encoded in base64, +# or plat cert filename for hygon +# (Since 4.2) +# +# @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in +# base64, or vendor cert filename for hygon (Since 4.2) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -1340,7 +1378,10 @@ '*x-vcpu-dirty-limit-period': { 'type': 'uint64', 'features': [ 'unstable' ] }, '*vcpu-dirty-limit': 'uint64', - '*mode': 'MigMode'} } + '*mode': 'MigMode', + '*sev-pdh': 'str', + '*sev-plat-cert': 'str', + '*sev-amd-cert' : 'str'} } ## # @query-migrate-parameters: diff --git a/qapi/qom.json b/qapi/qom.json index c53ef978ff7e6a81f8c926159fe52c0d349696ac..89a2516b42e15f1643f1382ed365927ec1038a81 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -866,6 +866,8 @@ # designated guest firmware page for measured boot with -kernel # (default: false) (since 6.2) # +# @user-id: the user id of the guest owner, only support on Hygon CPUs +# # Since: 2.12 ## { 'struct': 'SevGuestProperties', @@ -876,7 +878,8 @@ '*handle': 'uint32', '*cbitpos': 'uint32', 'reduced-phys-bits': 'uint32', - '*kernel-hashes': 'bool' } } + '*kernel-hashes': 'bool', + '*user-id': 'str' } } ## # @ThreadContextProperties: diff --git a/qemu-options.hx b/qemu-options.hx index b6b4ad9e6763d666b0cc9e8cac7835c595bcaa13..c260117a96cf8f2c072768111945aa36bc21a6b4 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5645,7 +5645,7 @@ SRST -object secret,id=sec0,keyid=secmaster0,format=base64,\\ data=$SECRET,iv=$(control_pre_system_reset) { + cpus_accel->control_pre_system_reset(); + } +} + +void cpus_control_post_system_reset(void) +{ + if (cpus_accel->control_post_system_reset) { + cpus_accel->control_post_system_reset(); + } +} + bool cpus_are_resettable(void) { if (cpus_accel->cpus_are_resettable) { diff --git a/system/memory.c b/system/memory.c index 798b6c0a171b71db8acb9aea503ea21ac26a07fe..2ffb878eb8cc8b5d464922003620d65f524b0dad 100644 --- a/system/memory.c +++ b/system/memory.c @@ -48,6 +48,9 @@ static QTAILQ_HEAD(, MemoryListener) memory_listeners static QTAILQ_HEAD(, AddressSpace) address_spaces = QTAILQ_HEAD_INITIALIZER(address_spaces); +static QTAILQ_HEAD(, SharedRegionListener) shared_region_listeners + = QTAILQ_HEAD_INITIALIZER(shared_region_listeners); + static GHashTable *flat_views; typedef struct AddrRange AddrRange; @@ -2226,6 +2229,21 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, return true; } +void shared_region_register_listener(SharedRegionListener *shl) +{ + QTAILQ_INSERT_TAIL(&shared_region_listeners, shl, next); +} + +void shared_region_unregister_listener(SharedRegionListener *shl) +{ + QTAILQ_REMOVE(&shared_region_listeners, shl, next); +} + +void *shared_region_listeners_get(void) +{ + return &shared_region_listeners; +} + void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) { uint8_t mask = 1 << client; diff --git a/system/runstate.c b/system/runstate.c index ea9d6c2a32a45541a87cecaba569583f60624ea2..365f2f44b982947ab9ad39374bdf7c1ead36193e 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -486,6 +486,8 @@ void qemu_system_reset(ShutdownCause reason) mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; + cpus_control_pre_system_reset(); + cpu_synchronize_all_states(); if (mc && mc->reset) { @@ -502,6 +504,8 @@ void qemu_system_reset(ShutdownCause reason) qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } cpu_synchronize_all_post_reset(); + + cpus_control_post_system_reset(); } /* diff --git a/target/i386/cpu.c b/target/i386/cpu.c index ffdaf16bd3fa014485ec771ff484f4941e050a3e..8649f9ebf5865d2b33aefb7ba210fba036784558 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -29,6 +29,7 @@ #include "hvf/hvf-i386.h" #include "kvm/kvm_i386.h" #include "sev.h" +#include "csv.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "qapi/qapi-visit-machine.h" @@ -2162,6 +2163,56 @@ static const CPUCaches epyc_genoa_cache_info = { }, }; +static const CPUCaches dharma_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -4804,6 +4855,20 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { .version = 3, + .props = (PropValue[]) { + { "xsaves", "off" }, + { "perfctr-core", "on" }, + { "clzero", "on" }, + { "xsaveerptr", "on" }, + { "aes", "on" }, + { "pclmulqdq", "on" }, + { "sha-ni", "on" }, + { "model-id", + "Hygon Dhyana-v3 processor" }, + { /* end of list */ } + }, + }, { /* end of list */ } } }, @@ -5035,6 +5100,55 @@ static const X86CPUDefinition builtin_x86_defs[] = { .model_id = "AMD EPYC-Genoa Processor", .cache_info = &epyc_genoa_cache_info, }, + { + .name = "Dharma", + .level = 0xd, + .vendor = CPUID_VENDOR_HYGON, + .family = 24, + .model = 4, + .stepping = 0, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | + CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_IBPB | CPUID_8000_0008_EBX_IBRS | + CPUID_8000_0008_EBX_STIBP | CPUID_8000_0008_EBX_AMD_SSBD, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_SHA_NI, + .features[FEAT_7_0_ECX] = CPUID_7_0_ECX_UMIP, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, + .xlevel = 0x8000001E, + .model_id = "Hygon Dharma Processor", + .cache_info = &dharma_cache_info, + }, }; /* @@ -6808,6 +6922,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (sev_enabled()) { *eax = 0x2; *eax |= sev_es_enabled() ? 0x8 : 0; + *eax |= csv3_enabled() ? 0x40000000 : 0; /* bit 30 for CSV3 */ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 877fc2b6b1f2d451e12940d0a89d5600c4498901..9fc24f7e4c41adfcbcb1876ccd539b9a0d5035e9 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -521,6 +521,8 @@ typedef enum X86Seg { #define MSR_VM_HSAVE_PA 0xc0010117 +#define MSR_AMD64_SEV_ES_GHCB 0xc0010130 + #define MSR_IA32_XFD 0x000001c4 #define MSR_IA32_XFD_ERR 0x000001c5 @@ -1888,6 +1890,9 @@ typedef struct CPUArchState { /* Number of dies within this CPU package. */ unsigned nr_dies; + + /* GHCB guest physical address info */ + uint64_t ghcb_gpa; } CPUX86State; struct kvm_msrs; diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c new file mode 100644 index 0000000000000000000000000000000000000000..f3224a01544bf456c2320465bf90de04bbe0fd89 --- /dev/null +++ b/target/i386/csv-sysemu-stub.c @@ -0,0 +1,46 @@ +/* + * QEMU CSV system stub + * + * Copyright: Hygon Info Technologies Ltd. 2022 + * + * Author: + * Jiang Xin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "sev.h" +#include "csv.h" + +int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) +{ + return 0; +} + +int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp) +{ + g_assert_not_reached(); +} + +int csv3_launch_encrypt_vmcb(void) +{ + g_assert_not_reached(); +} + +int csv3_shared_region_dma_map(uint64_t start, uint64_t end) +{ + return 0; +} + +void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) +{ + +} + +void csv3_shared_region_relese(uint64_t gpa, uint32_t num_pages) +{ + +} diff --git a/target/i386/csv.c b/target/i386/csv.c new file mode 100644 index 0000000000000000000000000000000000000000..a869cc2a7ed709f64e16f9f27ebf4bbedd402292 --- /dev/null +++ b/target/i386/csv.c @@ -0,0 +1,734 @@ +/* + * QEMU CSV support + * + * Copyright: Hygon Info Technologies Ltd. 2022 + * + * Author: + * Jiang Xin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "sysemu/kvm.h" +#include "exec/address-spaces.h" +#include "migration/blocker.h" +#include "migration/qemu-file.h" +#include "migration/misc.h" +#include "monitor/monitor.h" + +#include +#include + +#ifdef CONFIG_NUMA +#include +#endif + +#include "trace.h" +#include "cpu.h" +#include "sev.h" +#include "csv.h" + +bool csv_kvm_cpu_reset_inhibit; + +struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops = { + .save_setup = sev_save_setup, + .save_outgoing_page = NULL, + .load_incoming_page = csv3_load_incoming_page, + .is_gfn_in_unshared_region = NULL, + .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, + .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, + .queue_outgoing_page = csv3_queue_outgoing_page, + .save_queued_outgoing_pages = csv3_save_queued_outgoing_pages, + .queue_incoming_page = NULL, + .load_queued_incoming_pages = NULL, + .save_outgoing_cpu_state = csv3_save_outgoing_context, + .load_incoming_cpu_state = csv3_load_incoming_context, +}; + +#define CSV3_OUTGOING_PAGE_NUM \ + (CSV3_OUTGOING_PAGE_WINDOW_SIZE / TARGET_PAGE_SIZE) + +Csv3GuestState csv3_guest = { 0 }; + +int +csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) +{ + int fw_error; + int ret; + struct kvm_csv3_init_data data = { 0 }; + +#ifdef CONFIG_NUMA + int mode; + unsigned long nodemask; + + /* Set flags as 0 to retrieve the default NUMA policy. */ + ret = get_mempolicy(&mode, &nodemask, sizeof(nodemask) * 8, NULL, 0); + if (ret == 0 && (mode == MPOL_BIND)) + data.nodemask = nodemask; +#endif + + if (!ops || !ops->sev_ioctl || !ops->fw_error_to_str) + return -1; + + csv3_guest.policy = policy; + if (csv3_enabled()) { + ret = ops->sev_ioctl(fd, KVM_CSV3_INIT, &data, &fw_error); + if (ret) { + csv3_guest.policy = 0; + error_report("%s: Fail to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, ops->fw_error_to_str(fw_error)); + return -1; + } + + kvm_csv3_allowed = true; + + csv3_guest.sev_fd = fd; + csv3_guest.state = state; + csv3_guest.sev_ioctl = ops->sev_ioctl; + csv3_guest.fw_error_to_str = ops->fw_error_to_str; + QTAILQ_INIT(&csv3_guest.dma_map_regions_list); + qemu_mutex_init(&csv3_guest.dma_map_regions_list_mutex); + csv3_guest.sev_send_start = ops->sev_send_start; + csv3_guest.sev_receive_start = ops->sev_receive_start; + } + return 0; +} + +bool +csv3_enabled(void) +{ + if (!is_hygon_cpu()) + return false; + + return sev_es_enabled() && (csv3_guest.policy & GUEST_POLICY_CSV3_BIT); +} + +static bool +csv3_check_state(SevState state) +{ + return *((SevState *)csv3_guest.state) == state ? true : false; +} + +static int +csv3_ioctl(int cmd, void *data, int *error) +{ + if (csv3_guest.sev_ioctl) + return csv3_guest.sev_ioctl(csv3_guest.sev_fd, cmd, data, error); + else + return -1; +} + +static const char * +fw_error_to_str(int code) +{ + if (csv3_guest.fw_error_to_str) + return csv3_guest.fw_error_to_str(code); + else + return NULL; +} + +static int +csv3_launch_encrypt_data(uint64_t gpa, uint8_t *addr, uint64_t len) +{ + int ret, fw_error; + struct kvm_csv3_launch_encrypt_data update; + + if (!addr || !len) { + return 1; + } + + update.gpa = (__u64)gpa; + update.uaddr = (__u64)(unsigned long)addr; + update.len = len; + trace_kvm_csv3_launch_encrypt_data(gpa, addr, len); + ret = csv3_ioctl(KVM_CSV3_LAUNCH_ENCRYPT_DATA, &update, &fw_error); + if (ret) { + error_report("%s: CSV3 LAUNCH_ENCRYPT_DATA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + } + + return ret; +} + +int +csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp) +{ + int ret = 0; + + if (!csv3_enabled()) { + error_setg(errp, "%s: CSV3 is not enabled", __func__); + return -1; + } + + /* if CSV3 is in update state then load the data to secure memory */ + if (csv3_check_state(SEV_STATE_LAUNCH_UPDATE)) { + ret = csv3_launch_encrypt_data(gpa, ptr, len); + if (ret) + error_setg(errp, "%s: CSV3 fail to encrypt data", __func__); + } + + return ret; +} + +int +csv3_launch_encrypt_vmcb(void) +{ + int ret, fw_error; + + if (!csv3_enabled()) { + error_report("%s: CSV3 is not enabled",__func__); + return -1; + } + + ret = csv3_ioctl(KVM_CSV3_LAUNCH_ENCRYPT_VMCB, NULL, &fw_error); + if (ret) { + error_report("%s: CSV3 LAUNCH_ENCRYPT_VMCB ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + +err: + return ret; +} + +int csv3_shared_region_dma_map(uint64_t start, uint64_t end) +{ + MemoryRegionSection section; + AddressSpace *as; + QTAILQ_HEAD(, SharedRegionListener) *shared_region_listeners; + SharedRegionListener *shl; + MemoryListener *listener; + uint64_t size; + Csv3GuestState *s = &csv3_guest; + struct dma_map_region *region, *pos; + int ret = 0; + + if (!csv3_enabled()) + return 0; + + if (end <= start) + return 0; + + shared_region_listeners = shared_region_listeners_get(); + if (QTAILQ_EMPTY(shared_region_listeners)) + return 0; + + size = end - start; + + qemu_mutex_lock(&s->dma_map_regions_list_mutex); + QTAILQ_FOREACH(pos, &s->dma_map_regions_list, list) { + if (start >= (pos->start + pos->size)) { + continue; + } else if ((start + size) <= pos->start) { + break; + } else { + goto end; + } + } + QTAILQ_FOREACH(shl, shared_region_listeners, next) { + listener = shl->listener; + as = shl->as; + section = memory_region_find(as->root, start, size); + if (!section.mr) { + goto end; + } + + if (!memory_region_is_ram(section.mr)) { + memory_region_unref(section.mr); + goto end; + } + + if (listener->region_add) { + listener->region_add(listener, §ion); + } + memory_region_unref(section.mr); + } + + region = g_malloc0(sizeof(*region)); + if (!region) { + ret = -1; + goto end; + } + region->start = start; + region->size = size; + + if (pos) { + QTAILQ_INSERT_BEFORE(pos, region, list); + } else { + QTAILQ_INSERT_TAIL(&s->dma_map_regions_list, region, list); + } + +end: + qemu_mutex_unlock(&s->dma_map_regions_list_mutex); + return ret; +} + +void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages) +{ + struct kvm_csv3_handle_memory mem = { 0 }; + MemoryRegion *mr = NULL; + void *hva; + int ret; + + if (!csv3_enabled()) + return; + + if (!gpa || !num_pages) + return; + + mem.gpa = (__u64)gpa; + mem.num_pages = (__u32)num_pages; + mem.opcode = (__u32)KVM_CSV3_RELEASE_SHARED_MEMORY; + + /* unpin the pages */ + ret = csv3_ioctl(KVM_CSV3_HANDLE_MEMORY, &mem, NULL); + if (ret <= 0) { + if (ret < 0) + error_report("%s: CSV3 unpin failed ret %d", __func__, ret); + return; + } + + /* drop the pages */ + hva = gpa2hva(&mr, gpa, num_pages << TARGET_PAGE_BITS, NULL); + if (hva) { + ret = madvise(hva, num_pages << TARGET_PAGE_BITS, MADV_DONTNEED); + if (ret) + error_report("%s: madvise failed %d", __func__, ret); + } +} + +void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) +{ + MemoryRegionSection section; + AddressSpace *as; + QTAILQ_HEAD(, SharedRegionListener) *shared_region_listeners; + SharedRegionListener *shl; + MemoryListener *listener; + uint64_t size; + Csv3GuestState *s = &csv3_guest; + struct dma_map_region *pos, *next_pos; + + if (!csv3_enabled()) + return; + + if (end <= start) + return; + + shared_region_listeners = shared_region_listeners_get(); + if (QTAILQ_EMPTY(shared_region_listeners)) + return; + + size = end - start; + + qemu_mutex_lock(&s->dma_map_regions_list_mutex); + QTAILQ_FOREACH_SAFE(pos, &s->dma_map_regions_list, list, next_pos) { + uint64_t l, r; + uint64_t curr_end = pos->start + pos->size; + + l = MAX(start, pos->start); + r = MIN(start + size, pos->start + pos->size); + if (l < r) { + if ((start <= pos->start) && (start + size >= pos->start + pos->size)) { + QTAILQ_FOREACH(shl, shared_region_listeners, next) { + listener = shl->listener; + as = shl->as; + section = memory_region_find(as->root, pos->start, pos->size); + if (!section.mr) { + goto end; + } + if (listener->region_del) { + listener->region_del(listener, §ion); + } + memory_region_unref(section.mr); + } + + QTAILQ_REMOVE(&s->dma_map_regions_list, pos, list); + g_free(pos); + } + break; + } + if ((start + size) <= curr_end) { + break; + } + } +end: + qemu_mutex_unlock(&s->dma_map_regions_list_mutex); + return; +} + +static inline hwaddr csv3_hva_to_gfn(uint8_t *ptr) +{ + ram_addr_t offset = RAM_ADDR_INVALID; + + kvm_physical_memory_addr_from_host(kvm_state, ptr, &offset); + + return offset >> TARGET_PAGE_BITS; +} + +static int +csv3_send_start(QEMUFile *f, uint64_t *bytes_sent) +{ + if (csv3_guest.sev_send_start) + return csv3_guest.sev_send_start(f, bytes_sent); + else + return -1; +} + +static int +csv3_send_get_packet_len(int *fw_err) +{ + int ret; + struct kvm_csv3_send_encrypt_data update = {0}; + + update.hdr_len = 0; + update.trans_len = 0; + ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_DATA, &update, fw_err); + if (*fw_err != SEV_RET_INVALID_LEN) { + error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", + __func__, ret, *fw_err, fw_error_to_str(*fw_err)); + ret = 0; + goto err; + } + + if (update.hdr_len <= INT_MAX) + ret = update.hdr_len; + else + ret = 0; + +err: + return ret; +} + +static int +csv3_send_encrypt_data(Csv3GuestState *s, QEMUFile *f, + uint8_t *ptr, uint32_t size, uint64_t *bytes_sent) +{ + int ret, fw_error = 0; + guchar *trans; + uint32_t guest_addr_entry_num; + uint32_t i; + struct kvm_csv3_send_encrypt_data update = { }; + + /* + * If this is first call then query the packet header bytes and allocate + * the packet buffer. + */ + if (!s->send_packet_hdr) { + s->send_packet_hdr_len = csv3_send_get_packet_len(&fw_error); + if (s->send_packet_hdr_len < 1) { + error_report("%s: SEND_UPDATE fw_error=%d '%s'", + __func__, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + s->send_packet_hdr = g_new(gchar, s->send_packet_hdr_len); + } + + if (!s->guest_addr_len || !s->guest_addr_data) { + error_report("%s: invalid host address or size", __func__); + return 1; + } else { + guest_addr_entry_num = s->guest_addr_len / sizeof(struct guest_addr_entry); + } + + /* allocate transport buffer */ + trans = g_new(guchar, guest_addr_entry_num * TARGET_PAGE_SIZE); + + update.hdr_uaddr = (uintptr_t)s->send_packet_hdr; + update.hdr_len = s->send_packet_hdr_len; + update.guest_addr_data = (uintptr_t)s->guest_addr_data; + update.guest_addr_len = s->guest_addr_len; + update.trans_uaddr = (uintptr_t)trans; + update.trans_len = guest_addr_entry_num * TARGET_PAGE_SIZE; + + trace_kvm_csv3_send_encrypt_data(trans, update.trans_len); + + ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_DATA, &update, &fw_error); + if (ret) { + error_report("%s: SEND_ENCRYPT_DATA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + + for (i = 0; i < guest_addr_entry_num; i++) { + if (s->guest_addr_data[i].share) + memcpy(trans + i * TARGET_PAGE_SIZE, (guchar *)s->guest_hva_data[i].hva, + TARGET_PAGE_SIZE); + } + + qemu_put_be32(f, update.hdr_len); + qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); + *bytes_sent += 4 + update.hdr_len; + + qemu_put_be32(f, update.guest_addr_len); + qemu_put_buffer(f, (uint8_t *)update.guest_addr_data, update.guest_addr_len); + *bytes_sent += 4 + update.guest_addr_len; + + qemu_put_be32(f, update.trans_len); + qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + *bytes_sent += (4 + update.trans_len); + +err: + s->guest_addr_len = 0; + g_free(trans); + return ret; +} + +int +csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) +{ + Csv3GuestState *s = &csv3_guest; + uint32_t i = 0; + + (void) addr; + + if (!s->guest_addr_data) { + s->guest_hva_data = g_new0(struct guest_hva_entry, CSV3_OUTGOING_PAGE_NUM); + s->guest_addr_data = g_new0(struct guest_addr_entry, CSV3_OUTGOING_PAGE_NUM); + s->guest_addr_len = 0; + } + + if (s->guest_addr_len >= sizeof(struct guest_addr_entry) * CSV3_OUTGOING_PAGE_NUM) { + error_report("Failed to queue outgoing page"); + return 1; + } + + i = s->guest_addr_len / sizeof(struct guest_addr_entry); + s->guest_hva_data[i].hva = (uintptr_t)ptr; + s->guest_addr_data[i].share = 0; + s->guest_addr_data[i].reserved = 0; + s->guest_addr_data[i].gfn = csv3_hva_to_gfn(ptr); + s->guest_addr_len += sizeof(struct guest_addr_entry); + + return 0; +} + +int +csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) +{ + Csv3GuestState *s = &csv3_guest; + + /* + * If this is a first buffer then create outgoing encryption context + * and write our PDH, policy and session data. + */ + if (!csv3_check_state(SEV_STATE_SEND_UPDATE) && + csv3_send_start(f, bytes_sent)) { + error_report("Failed to create outgoing context"); + return 1; + } + + return csv3_send_encrypt_data(s, f, NULL, 0, bytes_sent); +} + +static int +csv3_receive_start(QEMUFile *f) +{ + if (csv3_guest.sev_receive_start) + return csv3_guest.sev_receive_start(f); + else + return -1; +} + +static int csv3_receive_encrypt_data(QEMUFile *f, uint8_t *ptr) +{ + int ret = 1, fw_error = 0; + uint32_t i, guest_addr_entry_num; + gchar *hdr = NULL, *trans = NULL; + struct guest_addr_entry *guest_addr_data; + struct kvm_csv3_receive_encrypt_data update = {}; + void *hva = NULL; + MemoryRegion *mr = NULL; + + /* get packet header */ + update.hdr_len = qemu_get_be32(f); + + hdr = g_new(gchar, update.hdr_len); + qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); + update.hdr_uaddr = (uintptr_t)hdr; + + /* get guest addr data */ + update.guest_addr_len = qemu_get_be32(f); + + guest_addr_data = (struct guest_addr_entry *)g_new(gchar, update.guest_addr_len); + qemu_get_buffer(f, (uint8_t *)guest_addr_data, update.guest_addr_len); + update.guest_addr_data = (uintptr_t)guest_addr_data; + + /* get transport buffer */ + update.trans_len = qemu_get_be32(f); + + trans = g_new(gchar, update.trans_len); + update.trans_uaddr = (uintptr_t)trans; + qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + + /* update share memory. */ + guest_addr_entry_num = update.guest_addr_len / sizeof(struct guest_addr_entry); + for (i = 0; i < guest_addr_entry_num; i++) { + if (guest_addr_data[i].share) { + hva = gpa2hva(&mr, + ((uint64_t)guest_addr_data[i].gfn << TARGET_PAGE_BITS), + TARGET_PAGE_SIZE, + NULL); + if (hva) + memcpy(hva, trans + i * TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); + } + } + + trace_kvm_csv3_receive_encrypt_data(trans, update.trans_len, hdr, update.hdr_len); + + ret = csv3_ioctl(KVM_CSV3_RECEIVE_ENCRYPT_DATA, &update, &fw_error); + if (ret) { + error_report("Error RECEIVE_ENCRYPT_DATA ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + +err: + g_free(trans); + g_free(guest_addr_data); + g_free(hdr); + return ret; +} + +int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr) +{ + /* + * If this is first buffer and SEV is not in recieiving state then + * use RECEIVE_START command to create a encryption context. + */ + if (!csv3_check_state(SEV_STATE_RECEIVE_UPDATE) && + csv3_receive_start(f)) { + return 1; + } + + return csv3_receive_encrypt_data(f, ptr); +} + +static int +csv3_send_get_context_len(int *fw_err, int *context_len, int *hdr_len) +{ + int ret = 0; + struct kvm_csv3_send_encrypt_context update = { 0 }; + + ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_CONTEXT, &update, fw_err); + if (*fw_err != SEV_RET_INVALID_LEN) { + error_report("%s: failed to get context length ret=%d fw_error=%d '%s'", + __func__, ret, *fw_err, fw_error_to_str(*fw_err)); + ret = -1; + goto err; + } + + if (update.trans_len <= INT_MAX && update.hdr_len <= INT_MAX) { + *context_len = update.trans_len; + *hdr_len = update.hdr_len; + } + ret = 0; +err: + return ret; +} + +static int +csv3_send_encrypt_context(Csv3GuestState *s, QEMUFile *f, uint64_t *bytes_sent) +{ + int ret, fw_error = 0; + int context_len = 0; + int hdr_len = 0; + guchar *trans; + guchar *hdr; + struct kvm_csv3_send_encrypt_context update = { }; + + ret = csv3_send_get_context_len(&fw_error, &context_len, &hdr_len); + if (context_len < 1 || hdr_len < 1) { + error_report("%s: fail to get context length fw_error=%d '%s'", + __func__, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + /* allocate transport buffer */ + trans = g_new(guchar, context_len); + hdr = g_new(guchar, hdr_len); + + update.hdr_uaddr = (uintptr_t)hdr; + update.hdr_len = hdr_len; + update.trans_uaddr = (uintptr_t)trans; + update.trans_len = context_len; + + trace_kvm_csv3_send_encrypt_context(trans, update.trans_len); + + ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_CONTEXT, &update, &fw_error); + if (ret) { + error_report("%s: SEND_ENCRYPT_CONTEXT ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + + qemu_put_be32(f, update.hdr_len); + qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); + *bytes_sent += 4 + update.hdr_len; + + qemu_put_be32(f, update.trans_len); + qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + *bytes_sent += 4 + update.trans_len; + +err: + g_free(trans); + g_free(hdr); + return ret; +} + +static int +csv3_receive_encrypt_context(Csv3GuestState *s, QEMUFile *f) +{ + int ret = 1, fw_error = 0; + gchar *hdr = NULL, *trans = NULL; + struct kvm_csv3_receive_encrypt_context update = {}; + + /* get packet header */ + update.hdr_len = qemu_get_be32(f); + + hdr = g_new(gchar, update.hdr_len); + qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); + update.hdr_uaddr = (uintptr_t)hdr; + + /* get transport buffer */ + update.trans_len = qemu_get_be32(f); + + trans = g_new(gchar, update.trans_len); + update.trans_uaddr = (uintptr_t)trans; + qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + + trace_kvm_csv3_receive_encrypt_context(trans, update.trans_len, hdr, update.hdr_len); + + ret = csv3_ioctl(KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT, &update, &fw_error); + if (ret) { + error_report("Error RECEIVE_ENCRYPT_CONTEXT ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + +err: + g_free(trans); + g_free(hdr); + return ret; +} + +int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent) +{ + Csv3GuestState *s = &csv3_guest; + + /* send csv3 context. */ + return csv3_send_encrypt_context(s, f, bytes_sent); +} + +int csv3_load_incoming_context(QEMUFile *f) +{ + Csv3GuestState *s = &csv3_guest; + + /* receive csv3 context. */ + return csv3_receive_encrypt_context(s, f); +} diff --git a/target/i386/csv.h b/target/i386/csv.h new file mode 100644 index 0000000000000000000000000000000000000000..a32588ab9ac3e3db34dc4464de2e100befbe1cb7 --- /dev/null +++ b/target/i386/csv.h @@ -0,0 +1,132 @@ +/* + * QEMU CSV support + * + * Copyright: Hygon Info Technologies Ltd. 2022 + * + * Author: + * Jiang Xin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef I386_CSV_H +#define I386_CSV_H + +#include "qapi/qapi-commands-misc-target.h" +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "sev.h" + +#define GUEST_POLICY_CSV3_BIT (1 << 6) +#define GUEST_POLICY_REUSE_ASID (1 << 7) + +#ifdef CONFIG_CSV + +#include "cpu.h" + +#define CPUID_VENDOR_HYGON_EBX 0x6f677948 /* "Hygo" */ +#define CPUID_VENDOR_HYGON_ECX 0x656e6975 /* "uine" */ +#define CPUID_VENDOR_HYGON_EDX 0x6e65476e /* "nGen" */ + +static bool __attribute__((unused)) is_hygon_cpu(void) +{ + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + + host_cpuid(0, 0, NULL, &ebx, &ecx, &edx); + + if (ebx == CPUID_VENDOR_HYGON_EBX && + ecx == CPUID_VENDOR_HYGON_ECX && + edx == CPUID_VENDOR_HYGON_EDX) + return true; + else + return false; +} + +bool csv3_enabled(void); + +#else + +#define is_hygon_cpu() (false) +#define csv3_enabled() (false) + +#endif + +#define CSV_OUTGOING_PAGE_WINDOW_SIZE (4094 * TARGET_PAGE_SIZE) + +extern bool csv_kvm_cpu_reset_inhibit; + +typedef struct CsvBatchCmdList CsvBatchCmdList; +typedef void (*CsvDestroyCmdNodeFn) (void *data); + +struct CsvBatchCmdList { + struct kvm_csv_batch_list_node *head; + struct kvm_csv_batch_list_node *tail; + CsvDestroyCmdNodeFn destroy_fn; +}; + +int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); +int csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); +int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr); +int csv_load_queued_incoming_pages(QEMUFile *f); +int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent); +int csv_load_incoming_cpu_state(QEMUFile *f); + +/* CSV3 */ +struct dma_map_region { + uint64_t start, size; + QTAILQ_ENTRY(dma_map_region) list; +}; + +#define CSV3_OUTGOING_PAGE_WINDOW_SIZE (512 * TARGET_PAGE_SIZE) + +struct guest_addr_entry { + uint64_t share: 1; + uint64_t reserved: 11; + uint64_t gfn: 52; +}; + +struct guest_hva_entry { + uint64_t hva; +}; + +struct Csv3GuestState { + uint32_t policy; + int sev_fd; + void *state; + int (*sev_ioctl)(int fd, int cmd, void *data, int *error); + const char *(*fw_error_to_str)(int code); + QTAILQ_HEAD(, dma_map_region) dma_map_regions_list; + QemuMutex dma_map_regions_list_mutex; + gchar *send_packet_hdr; + size_t send_packet_hdr_len; + struct guest_hva_entry *guest_hva_data; + struct guest_addr_entry *guest_addr_data; + size_t guest_addr_len; + + int (*sev_send_start)(QEMUFile *f, uint64_t *bytes_sent); + int (*sev_receive_start)(QEMUFile *f); +}; + +typedef struct Csv3GuestState Csv3GuestState; + +extern struct Csv3GuestState csv3_guest; +extern struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops; +extern int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops); +extern int csv3_launch_encrypt_vmcb(void); + +int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + +int csv3_shared_region_dma_map(uint64_t start, uint64_t end); +void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); +void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages); +int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr); +int csv3_load_incoming_context(QEMUFile *f); +int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); +int csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); +int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent); + +#endif diff --git a/target/i386/kvm/csv-stub.c b/target/i386/kvm/csv-stub.c new file mode 100644 index 0000000000000000000000000000000000000000..4d1376f268469a29698462d0c968e31d9322ef1a --- /dev/null +++ b/target/i386/kvm/csv-stub.c @@ -0,0 +1,17 @@ +/* + * QEMU CSV stub + * + * Copyright Hygon Info Technologies Ltd. 2024 + * + * Authors: + * Han Liyang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "csv.h" + +bool csv_kvm_cpu_reset_inhibit; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a0bc9ea7b1924f4c33c414b3f917bb4f8625a32f..fdceecc846b898a5d2be8a0fa218f948db2c35da 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -32,6 +32,7 @@ #include "sysemu/runstate.h" #include "kvm_i386.h" #include "sev.h" +#include "csv.h" #include "xen-emu.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -148,6 +149,7 @@ static int has_xcrs; static int has_sregs2; static int has_exception_payload; static int has_triple_fault_event; +static int has_map_gpa_range; static bool has_msr_mcg_ext_ctl; @@ -2191,6 +2193,17 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10); } + if (sev_enabled()) { + c = cpuid_find_entry(&cpuid_data.cpuid, + KVM_CPUID_FEATURES | kvm_base, 0); + if (c) { + c->eax |= (1 << KVM_FEATURE_MIGRATION_CONTROL); + if (has_map_gpa_range) { + c->eax |= (1 << KVM_FEATURE_HC_MAP_GPA_RANGE); + } + } + } + cpuid_data.cpuid.nent = cpuid_i; cpuid_data.cpuid.padding = 0; @@ -2258,6 +2271,11 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) env->mp_state = KVM_MP_STATE_RUNNABLE; } + if (cpu_is_bsp(cpu) && + sev_enabled() && has_map_gpa_range) { + sev_remove_shared_regions_list(0, -1); + } + /* enabled by default */ env->poll_control_msr = 1; @@ -2476,6 +2494,32 @@ static bool kvm_rdmsr_core_thread_count(X86CPU *cpu, uint32_t msr, return true; } +/* + * Currently this exit is only used by SEV guests for + * MSR_KVM_MIGRATION_CONTROL to indicate if the guest + * is ready for migration. + */ +static uint64_t msr_kvm_migration_control; + +static bool kvm_rdmsr_kvm_migration_control(X86CPU *cpu, uint32_t msr, + uint64_t *val) +{ + *val = msr_kvm_migration_control; + + return true; +} + +static bool kvm_wrmsr_kvm_migration_control(X86CPU *cpu, uint32_t msr, + uint64_t val) +{ + msr_kvm_migration_control = val; + + if (val == KVM_MIGRATION_READY) + sev_del_migrate_blocker(); + + return true; +} + static Notifier smram_machine_done; static KVMMemoryListener smram_listener; static AddressSpace smram_address_space; @@ -2584,6 +2628,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s) #endif } + has_map_gpa_range = kvm_check_extension(s, KVM_CAP_EXIT_HYPERCALL); + if (has_map_gpa_range) { + ret = kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, + KVM_EXIT_HYPERCALL_VALID_MASK); + if (ret < 0) { + error_report("kvm: Failed to enable MAP_GPA_RANGE cap: %s", + strerror(-ret)); + return ret; + } + } + ret = kvm_get_supported_msrs(s); if (ret < 0) { return ret; @@ -2712,6 +2767,15 @@ int kvm_arch_init(MachineState *ms, KVMState *s) strerror(-ret)); exit(1); } + + r = kvm_filter_msr(s, MSR_KVM_MIGRATION_CONTROL, + kvm_rdmsr_kvm_migration_control, + kvm_wrmsr_kvm_migration_control); + if (!r) { + error_report("Could not install MSR_KVM_MIGRATION_CONTROL handler: %s", + strerror(-ret)); + exit(1); + } } return 0; @@ -3562,6 +3626,10 @@ static int kvm_put_msrs(X86CPU *cpu, int level) } } + if (sev_kvm_has_msr_ghcb) { + kvm_msr_entry_add(cpu, MSR_AMD64_SEV_ES_GHCB, env->ghcb_gpa); + } + return kvm_buf_set_msrs(cpu); } @@ -3936,6 +4004,10 @@ static int kvm_get_msrs(X86CPU *cpu) } } + if (sev_kvm_has_msr_ghcb) { + kvm_msr_entry_add(cpu, MSR_AMD64_SEV_ES_GHCB, 0); + } + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); if (ret < 0) { return ret; @@ -4256,6 +4328,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31: env->lbr_records[index - MSR_ARCH_LBR_INFO_0].info = msrs[i].data; break; + case MSR_AMD64_SEV_ES_GHCB: + env->ghcb_gpa = msrs[i].data; + break; } } @@ -4936,6 +5011,31 @@ static int kvm_handle_tpr_access(X86CPU *cpu) return 1; } +static int kvm_handle_exit_hypercall(X86CPU *cpu, struct kvm_run *run) +{ + /* + * Currently this exit is only used by SEV guests for + * guest page encryption status tracking. + */ + if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) { + unsigned long enc = run->hypercall.args[2]; + unsigned long gpa = run->hypercall.args[0]; + unsigned long npages = run->hypercall.args[1]; + unsigned long gfn_start = gpa >> TARGET_PAGE_BITS; + unsigned long gfn_end = gfn_start + npages; + + if (enc) { + sev_remove_shared_regions_list(gfn_start, gfn_end); + csv3_shared_region_dma_unmap(gpa, gfn_end << TARGET_PAGE_BITS); + csv3_shared_region_release(gpa, npages); + } else { + sev_add_shared_regions_list(gfn_start, gfn_end); + csv3_shared_region_dma_map(gpa, gfn_end << TARGET_PAGE_BITS); + } + } + return 0; +} + int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) { static const uint8_t int3 = 0xcc; @@ -5359,6 +5459,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ret = kvm_xen_handle_exit(cpu, &run->xen); break; #endif + case KVM_EXIT_HYPERCALL: + ret = kvm_handle_exit_hypercall(cpu, run); + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -5611,6 +5714,9 @@ bool kvm_has_waitpkg(void) bool kvm_arch_cpu_check_are_resettable(void) { + if (is_hygon_cpu()) + return !csv_kvm_cpu_reset_inhibit; + return !sev_es_enabled(); } diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 84d9143e60296d4388a57184b29693231e01205c..3c3f8cf93c1bfe17638beae51942163fc7f4003d 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,6 +8,7 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) +i386_kvm_ss.add(when: 'CONFIG_CSV', if_false: files('csv-stub.c')) i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c index 1be5341e8a6a0e82997d88679f9cecce025d9e71..a0aac1117fbaca51154fc85553ea242cbc8cd647 100644 --- a/target/i386/kvm/sev-stub.c +++ b/target/i386/kvm/sev-stub.c @@ -14,8 +14,25 @@ #include "qemu/osdep.h" #include "sev.h" +bool sev_kvm_has_msr_ghcb; + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { /* If we get here, cgs must be some non-SEV thing */ return 0; } + +int sev_remove_shared_regions_list(unsigned long gfn_start, + unsigned long gfn_end) +{ + return 0; +} + +int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end) +{ + return 0; +} + +void sev_del_migrate_blocker(void) +{ +} diff --git a/target/i386/machine.c b/target/i386/machine.c index a1041ef828cb2670e913738a44853b7eeadd1e7e..9a1cb8f3b81f0013ef66e1cb4dca1ee850b8e4eb 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1605,6 +1605,27 @@ static const VMStateDescription vmstate_triple_fault = { } }; +#if defined(CONFIG_KVM) && defined(TARGET_X86_64) +static bool msr_ghcb_gpa_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->ghcb_gpa != 0; +} + +static const VMStateDescription vmstate_msr_ghcb_gpa = { + .name = "cpu/svm_msr_ghcb_gpa", + .version_id = 1, + .minimum_version_id = 1, + .needed = msr_ghcb_gpa_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.ghcb_gpa, X86CPU), + VMSTATE_END_OF_LIST() + } +}; +#endif + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1751,6 +1772,9 @@ const VMStateDescription vmstate_x86_cpu = { #endif &vmstate_arch_lbr, &vmstate_triple_fault, +#if defined(CONFIG_KVM) && defined(TARGET_X86_64) + &vmstate_msr_ghcb_gpa, +#endif NULL } }; diff --git a/target/i386/meson.build b/target/i386/meson.build index 7c74bfa8591b279e506f64451de406f9d718691d..594a0a6abf7bdc028fa7e94390890c2547ead7db 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -21,6 +21,7 @@ i386_system_ss.add(files( 'cpu-sysemu.c', )) i386_system_ss.add(when: 'CONFIG_SEV', if_true: files('sev.c'), if_false: files('sev-sysemu-stub.c')) +i386_system_ss.add(when: 'CONFIG_CSV', if_true: files('csv.c'), if_false: files('csv-sysemu-stub.c')) i386_user_ss = ss.source_set() diff --git a/target/i386/sev.c b/target/i386/sev.c index 9a71246682584da3204ce2723c0d26b830cb0b16..68bf5da356bb8d65c456bc8cd886d76f92fec995 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -27,10 +27,13 @@ #include "crypto/hash.h" #include "sysemu/kvm.h" #include "sev.h" +#include "csv.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" #include "trace.h" #include "migration/blocker.h" +#include "migration/qemu-file.h" +#include "migration/misc.h" #include "qom/object.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" @@ -42,6 +45,11 @@ #define TYPE_SEV_GUEST "sev-guest" OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) +struct shared_region { + unsigned long gfn_start, gfn_end; + QTAILQ_ENTRY(shared_region) list; +}; + /** * SevGuestState: @@ -64,6 +72,7 @@ struct SevGuestState { uint32_t cbitpos; uint32_t reduced_phys_bits; bool kernel_hashes; + char *user_id; /* runtime state */ uint32_t handle; @@ -73,10 +82,27 @@ struct SevGuestState { int sev_fd; SevState state; gchar *measurement; + guchar *remote_pdh; + size_t remote_pdh_len; + guchar *remote_plat_cert; + size_t remote_plat_cert_len; + guchar *amd_cert; + size_t amd_cert_len; + gchar *send_packet_hdr; + size_t send_packet_hdr_len; + + /* needed by live migration of HYGON CSV2 guest */ + gchar *send_vmsa_packet_hdr; + size_t send_vmsa_packet_hdr_len; uint32_t reset_cs; uint32_t reset_ip; bool reset_data_valid; + + QTAILQ_HEAD(, shared_region) shared_regions_list; + + /* link list used for HYGON CSV */ + CsvBatchCmdList *csv_batch_cmd_list; }; #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ @@ -127,6 +153,8 @@ QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); static SevGuestState *sev_guest; static Error *sev_mig_blocker; +bool sev_kvm_has_msr_ghcb; + static const char *const sev_fw_errlist[] = { [SEV_RET_SUCCESS] = "", [SEV_RET_INVALID_PLATFORM_STATE] = "Platform state is invalid", @@ -157,6 +185,29 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) +#define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */ + +#define SHARED_REGION_LIST_CONT 0x1 +#define SHARED_REGION_LIST_END 0x2 + +#define ENCRYPTED_CPU_STATE_CONT 0x1 +#define ENCRYPTED_CPU_STATE_END 0x2 + +static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_setup = sev_save_setup, + .save_outgoing_page = sev_save_outgoing_page, + .load_incoming_page = sev_load_incoming_page, + .is_gfn_in_unshared_region = sev_is_gfn_in_unshared_region, + .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, + .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, + .queue_outgoing_page = csv_queue_outgoing_page, + .save_queued_outgoing_pages = csv_save_queued_outgoing_pages, + .queue_incoming_page = csv_queue_incoming_page, + .load_queued_incoming_pages = csv_load_queued_incoming_pages, + .save_outgoing_cpu_state = csv_save_outgoing_cpu_state, + .load_incoming_cpu_state = csv_load_incoming_cpu_state, +}; + static int sev_ioctl(int fd, int cmd, void *data, int *error) { @@ -323,6 +374,22 @@ sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) s->dh_cert_file = g_strdup(value); } +static char * +sev_guest_get_user_id(Object *obj, Error **errp) +{ + SevGuestState *s = SEV_GUEST(obj); + + return g_strdup(s->user_id); +} + +static void +sev_guest_set_user_id(Object *obj, const char *value, Error **errp) +{ + SevGuestState *s = SEV_GUEST(obj); + + s->user_id = g_strdup(value); +} + static char * sev_guest_get_sev_device(Object *obj, Error **errp) { @@ -376,6 +443,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_kernel_hashes); object_class_property_set_description(oc, "kernel-hashes", "add kernel hashes to guest firmware for measured Linux boot"); + object_class_property_add_str(oc, "user-id", + sev_guest_get_user_id, + sev_guest_set_user_id); + object_class_property_set_description(oc, "user-id", + "user id of the guest owner"); } static void @@ -539,7 +611,8 @@ static int sev_get_cpu0_id(int fd, guchar **id, size_t *id_len, Error **errp) /* query the ID length */ r = sev_platform_ioctl(fd, SEV_GET_ID2, &get_id2, &err); - if (r < 0 && err != SEV_RET_INVALID_LEN) { + if (r < 0 && err != SEV_RET_INVALID_LEN && + !(is_hygon_cpu() && err == SEV_RET_INVALID_PARAM)) { error_setg(errp, "SEV: Failed to get ID ret=%d fw_err=%d (%s)", r, err, fw_error_to_str(err)); return 1; @@ -807,8 +880,12 @@ sev_launch_get_measure(Notifier *notifier, void *unused) } if (sev_es_enabled()) { - /* measure all the VM save areas before getting launch_measure */ - ret = sev_launch_update_vmsa(sev); + if (csv3_enabled()) { + ret = csv3_launch_encrypt_vmcb(); + } else { + /* measure all the VM save areas before getting launch_measure */ + ret = sev_launch_update_vmsa(sev); + } if (ret) { exit(1); } @@ -894,18 +971,142 @@ sev_launch_finish(SevGuestState *sev) migrate_add_blocker(&sev_mig_blocker, &error_fatal); } +void +sev_del_migrate_blocker(void) +{ + migrate_del_blocker(&sev_mig_blocker); +} + +static int +sev_receive_finish(SevGuestState *s) +{ + int error, ret = 1; + + trace_kvm_sev_receive_finish(); + ret = sev_ioctl(s->sev_fd, KVM_SEV_RECEIVE_FINISH, 0, &error); + if (ret) { + error_report("%s: RECEIVE_FINISH ret=%d fw_error=%d '%s'", + __func__, ret, error, fw_error_to_str(error)); + goto err; + } + + sev_set_guest_state(s, SEV_STATE_RUNNING); +err: + return ret; +} + static void sev_vm_state_change(void *opaque, bool running, RunState state) { SevGuestState *sev = opaque; if (running) { - if (!sev_check_state(sev, SEV_STATE_RUNNING)) { + if (sev_check_state(sev, SEV_STATE_RECEIVE_UPDATE)) { + sev_receive_finish(sev); + } else if (!sev_check_state(sev, SEV_STATE_RUNNING)) { sev_launch_finish(sev); } } } +static inline bool check_blob_length(size_t value) +{ + if (value > SEV_FW_BLOB_MAX_SIZE) { + error_report("invalid length max=%d got=%ld", + SEV_FW_BLOB_MAX_SIZE, value); + return false; + } + + return true; +} + +int sev_save_setup(const char *pdh, const char *plat_cert, + const char *amd_cert) +{ + SevGuestState *s = sev_guest; + + if (is_hygon_cpu()) { + if (sev_read_file_base64(pdh, &s->remote_pdh, + &s->remote_pdh_len) < 0) { + goto error; + } + } else { + s->remote_pdh = g_base64_decode(pdh, &s->remote_pdh_len); + } + if (!check_blob_length(s->remote_pdh_len)) { + goto error; + } + + if (is_hygon_cpu()) { + if (sev_read_file_base64(plat_cert, &s->remote_plat_cert, + &s->remote_plat_cert_len) < 0) { + goto error; + } + } else { + s->remote_plat_cert = g_base64_decode(plat_cert, + &s->remote_plat_cert_len); + } + if (!check_blob_length(s->remote_plat_cert_len)) { + goto error; + } + + if (is_hygon_cpu()) { + if (sev_read_file_base64(amd_cert, &s->amd_cert, + &s->amd_cert_len) < 0) { + goto error; + } + } else { + s->amd_cert = g_base64_decode(amd_cert, &s->amd_cert_len); + } + if (!check_blob_length(s->amd_cert_len)) { + goto error; + } + + return 0; + +error: + g_free(s->remote_pdh); + g_free(s->remote_plat_cert); + g_free(s->amd_cert); + + return 1; +} + +static void +sev_send_finish(void) +{ + int ret, error; + + trace_kvm_sev_send_finish(); + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_FINISH, 0, &error); + if (ret) { + error_report("%s: SEND_FINISH ret=%d fw_error=%d '%s'", + __func__, ret, error, fw_error_to_str(error)); + } + + g_free(sev_guest->send_packet_hdr); + if (sev_es_enabled() && is_hygon_cpu()) { + g_free(sev_guest->send_vmsa_packet_hdr); + } + sev_set_guest_state(sev_guest, SEV_STATE_RUNNING); +} + +static void +sev_migration_state_notifier(Notifier *notifier, void *data) +{ + MigrationState *s = data; + + if (migration_has_finished(s) || + migration_in_postcopy_after_devices(s) || + migration_has_failed(s)) { + if (sev_check_state(sev_guest, SEV_STATE_SEND_UPDATE)) { + sev_send_finish(); + } + } +} + +static Notifier sev_migration_state; + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { SevGuestState *sev @@ -920,6 +1121,9 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return 0; } + ConfidentialGuestSupportClass *cgs_class = + (ConfidentialGuestSupportClass *) object_get_class(OBJECT(cgs)); + ret = ram_block_discard_disable(true); if (ret) { error_report("%s: cannot disable RAM discard", __func__); @@ -996,22 +1200,91 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } trace_kvm_sev_init(); - ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + + /* Only support reuse asid for CSV/CSV2 guest */ + if (is_hygon_cpu() && + (sev_guest->policy & GUEST_POLICY_REUSE_ASID) && + !(sev_guest->policy & GUEST_POLICY_CSV3_BIT)) { + char *user_id = NULL; + struct kvm_csv_init *init_cmd_buf = NULL; + + user_id = object_property_get_str(OBJECT(sev), "user-id", NULL); + if (user_id && strlen(user_id)) { + init_cmd_buf = g_new0(struct kvm_csv_init, 1); + init_cmd_buf->len = strlen(user_id); + init_cmd_buf->userid_addr = (__u64)user_id; + } + ret = sev_ioctl(sev->sev_fd, cmd, init_cmd_buf, &fw_error); + + if (user_id) { + g_free(user_id); + g_free(init_cmd_buf); + } + } else { + ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + } + if (ret) { error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); goto err; } - ret = sev_launch_start(sev); - if (ret) { - error_setg(errp, "%s: failed to create encryption context", __func__); - goto err; + /* Support CSV3 */ + if (!ret && cmd == KVM_SEV_ES_INIT) { + ret = csv3_init(sev_guest->policy, sev->sev_fd, (void *)&sev->state, &sev_ops); + if (ret) { + error_setg(errp, "%s: failed to init csv3 context", __func__); + goto err; + } + /* The CSV3 guest is not resettable */ + if (csv3_enabled()) + csv_kvm_cpu_reset_inhibit = true; + } + + /* + * The LAUNCH context is used for new guest, if its an incoming guest + * then RECEIVE context will be created after the connection is established. + */ + if (!runstate_check(RUN_STATE_INMIGRATE)) { + ret = sev_launch_start(sev); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; + } + } else { + /* + * The CSV2 guest is not resettable after migrated to target machine, + * set csv_kvm_cpu_reset_inhibit to true to indicate the CSV2 guest is + * not resettable. + */ + if (is_hygon_cpu() && sev_es_enabled()) { + csv_kvm_cpu_reset_inhibit = true; + } } - ram_block_notifier_add(&sev_ram_notifier); + /* CSV3 guest do not need notifier to reg/unreg memory */ + if (!csv3_enabled()) { + ram_block_notifier_add(&sev_ram_notifier); + } qemu_add_machine_init_done_notifier(&sev_machine_done_notify); qemu_add_vm_change_state_handler(sev_vm_state_change, sev); + migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); + + if (csv3_enabled()) { + cgs_class->memory_encryption_ops = &csv3_memory_encryption_ops; + } else { + cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; + } + QTAILQ_INIT(&sev->shared_regions_list); + + /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ + if (sev_es_enabled()) { + sev_kvm_has_msr_ghcb = + kvm_vm_check_extension(kvm_state, KVM_CAP_SEV_ES_GHCB); + } else { + sev_kvm_has_msr_ghcb = false; + } cgs->ready = true; @@ -1251,138 +1524,1161 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) return 0; } -static const QemuUUID sev_hash_table_header_guid = { - .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, - 0xd4, 0x11, 0xfd, 0x21) -}; +static int +sev_get_send_session_length(void) +{ + int ret, fw_err = 0; + struct kvm_sev_send_start start = {}; + + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_START, &start, &fw_err); + if (fw_err != SEV_RET_INVALID_LEN) { + ret = -1; + error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", + __func__, ret, fw_err, fw_error_to_str(fw_err)); + goto err; + } -static const QemuUUID sev_kernel_entry_guid = { - .data = UUID_LE(0x4de79437, 0xabd2, 0x427f, 0xb8, 0x35, 0xd5, 0xb1, - 0x72, 0xd2, 0x04, 0x5b) -}; -static const QemuUUID sev_initrd_entry_guid = { - .data = UUID_LE(0x44baf731, 0x3a2f, 0x4bd7, 0x9a, 0xf1, 0x41, 0xe2, - 0x91, 0x69, 0x78, 0x1d) -}; -static const QemuUUID sev_cmdline_entry_guid = { - .data = UUID_LE(0x97d02dd8, 0xbd20, 0x4c94, 0xaa, 0x78, 0xe7, 0x71, - 0x4d, 0x36, 0xab, 0x2a) -}; + ret = start.session_len; +err: + return ret; +} -/* - * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page - * which is included in SEV's initial memory measurement. - */ -bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +static int +sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) { - uint8_t *data; - SevHashTableDescriptor *area; - SevHashTable *ht; - PaddedSevHashTable *padded_ht; - uint8_t cmdline_hash[HASH_SIZE]; - uint8_t initrd_hash[HASH_SIZE]; - uint8_t kernel_hash[HASH_SIZE]; - uint8_t *hashp; - size_t hash_len = HASH_SIZE; - hwaddr mapped_len = sizeof(*padded_ht); - MemTxAttrs attrs = { 0 }; - bool ret = true; - - /* - * Only add the kernel hashes if the sev-guest configuration explicitly - * stated kernel-hashes=on. - */ - if (!sev_guest->kernel_hashes) { - return false; + gsize pdh_len = 0, plat_cert_len; + int session_len, ret, fw_error; + struct kvm_sev_send_start start = { }; + guchar *pdh = NULL, *plat_cert = NULL, *session = NULL; + Error *local_err = NULL; + + if (!s->remote_pdh || !s->remote_plat_cert || !s->amd_cert_len) { + error_report("%s: missing remote PDH or PLAT_CERT", __func__); + return 1; } - if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { - error_setg(errp, "SEV: kernel specified but guest firmware " - "has no hashes table GUID"); - return false; - } - area = (SevHashTableDescriptor *)data; - if (!area->base || area->size < sizeof(PaddedSevHashTable)) { - error_setg(errp, "SEV: guest firmware hashes table area is invalid " - "(base=0x%x size=0x%x)", area->base, area->size); - return false; - } + start.pdh_cert_uaddr = (uintptr_t) s->remote_pdh; + start.pdh_cert_len = s->remote_pdh_len; - /* - * Calculate hash of kernel command-line with the terminating null byte. If - * the user doesn't supply a command-line via -append, the 1-byte "\0" will - * be used. - */ - hashp = cmdline_hash; - if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->cmdline_data, - ctx->cmdline_size, &hashp, &hash_len, errp) < 0) { - return false; - } - assert(hash_len == HASH_SIZE); + start.plat_certs_uaddr = (uintptr_t)s->remote_plat_cert; + start.plat_certs_len = s->remote_plat_cert_len; - /* - * Calculate hash of initrd. If the user doesn't supply an initrd via - * -initrd, an empty buffer will be used (ctx->initrd_size == 0). - */ - hashp = initrd_hash; - if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->initrd_data, - ctx->initrd_size, &hashp, &hash_len, errp) < 0) { - return false; - } - assert(hash_len == HASH_SIZE); + start.amd_certs_uaddr = (uintptr_t)s->amd_cert; + start.amd_certs_len = s->amd_cert_len; - /* Calculate hash of the kernel */ - hashp = kernel_hash; - struct iovec iov[2] = { - { .iov_base = ctx->setup_data, .iov_len = ctx->setup_size }, - { .iov_base = ctx->kernel_data, .iov_len = ctx->kernel_size } - }; - if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256, iov, ARRAY_SIZE(iov), - &hashp, &hash_len, errp) < 0) { - return false; - } - assert(hash_len == HASH_SIZE); + /* get the session length */ + session_len = sev_get_send_session_length(); + if (session_len < 0) { + ret = 1; + goto err; + } - /* - * Populate the hashes table in the guest's memory at the OVMF-designated - * area for the SEV hashes table - */ - padded_ht = address_space_map(&address_space_memory, area->base, - &mapped_len, true, attrs); - if (!padded_ht || mapped_len != sizeof(*padded_ht)) { - error_setg(errp, "SEV: cannot map hashes table guest memory area"); - return false; - } - ht = &padded_ht->ht; + session = g_new0(guchar, session_len); + start.session_uaddr = (unsigned long)session; + start.session_len = session_len; - ht->guid = sev_hash_table_header_guid; - ht->len = sizeof(*ht); + /* Get our PDH certificate */ + ret = sev_get_pdh_info(s->sev_fd, &pdh, &pdh_len, + &plat_cert, &plat_cert_len, &local_err); + if (ret) { + error_report("Failed to get our PDH cert"); + goto err; + } - ht->cmdline.guid = sev_cmdline_entry_guid; - ht->cmdline.len = sizeof(ht->cmdline); - memcpy(ht->cmdline.hash, cmdline_hash, sizeof(ht->cmdline.hash)); + trace_kvm_sev_send_start(start.pdh_cert_uaddr, start.pdh_cert_len, + start.plat_certs_uaddr, start.plat_certs_len, + start.amd_certs_uaddr, start.amd_certs_len); - ht->initrd.guid = sev_initrd_entry_guid; - ht->initrd.len = sizeof(ht->initrd); - memcpy(ht->initrd.hash, initrd_hash, sizeof(ht->initrd.hash)); + ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_START, &start, &fw_error); + if (ret < 0) { + error_report("%s: SEND_START ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } - ht->kernel.guid = sev_kernel_entry_guid; - ht->kernel.len = sizeof(ht->kernel); - memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash)); + qemu_put_be32(f, start.policy); + qemu_put_be32(f, pdh_len); + qemu_put_buffer(f, (uint8_t *)pdh, pdh_len); + qemu_put_be32(f, start.session_len); + qemu_put_buffer(f, (uint8_t *)start.session_uaddr, start.session_len); + *bytes_sent = 12 + pdh_len + start.session_len; - /* zero the excess data so the measurement can be reliably calculated */ - memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + sev_set_guest_state(s, SEV_STATE_SEND_UPDATE); - if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { - ret = false; +err: + g_free(pdh); + g_free(plat_cert); + return ret; +} + +static int +sev_send_get_packet_len(int *fw_err) +{ + int ret; + struct kvm_sev_send_update_data update = { 0, }; + + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_DATA, + &update, fw_err); + if (*fw_err != SEV_RET_INVALID_LEN) { + ret = 0; + error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", + __func__, ret, *fw_err, fw_error_to_str(*fw_err)); + goto err; } - address_space_unmap(&address_space_memory, padded_ht, - mapped_len, true, mapped_len); + ret = update.hdr_len; +err: return ret; } +static int +sev_send_update_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, + uint64_t *bytes_sent) +{ + int ret, fw_error; + guchar *trans; + struct kvm_sev_send_update_data update = { }; + + /* + * If this is first call then query the packet header bytes and allocate + * the packet buffer. + */ + if (!s->send_packet_hdr) { + s->send_packet_hdr_len = sev_send_get_packet_len(&fw_error); + if (s->send_packet_hdr_len < 1) { + error_report("%s: SEND_UPDATE fw_error=%d '%s'", + __func__, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + s->send_packet_hdr = g_new(gchar, s->send_packet_hdr_len); + } + + /* allocate transport buffer */ + trans = g_new(guchar, size); + + update.hdr_uaddr = (uintptr_t)s->send_packet_hdr; + update.hdr_len = s->send_packet_hdr_len; + update.guest_uaddr = (uintptr_t)ptr; + update.guest_len = size; + update.trans_uaddr = (uintptr_t)trans; + update.trans_len = size; + + trace_kvm_sev_send_update_data(ptr, trans, size); + + ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, &fw_error); + if (ret) { + error_report("%s: SEND_UPDATE_DATA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + + qemu_put_be32(f, update.hdr_len); + qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); + *bytes_sent = 4 + update.hdr_len; + + qemu_put_be32(f, update.trans_len); + qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + *bytes_sent += (4 + update.trans_len); + +err: + g_free(trans); + return ret; +} + +int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, + uint32_t sz, uint64_t *bytes_sent) +{ + SevGuestState *s = sev_guest; + + /* + * If this is a first buffer then create outgoing encryption context + * and write our PDH, policy and session data. + */ + if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && + sev_send_start(s, f, bytes_sent)) { + error_report("Failed to create outgoing context"); + return 1; + } + + return sev_send_update_data(s, f, ptr, sz, bytes_sent); +} + +static int +sev_receive_start(SevGuestState *sev, QEMUFile *f) +{ + int ret = 1; + int fw_error; + struct kvm_sev_receive_start start = { }; + gchar *session = NULL, *pdh_cert = NULL; + + /* get SEV guest handle */ + start.handle = object_property_get_int(OBJECT(sev), "handle", + &error_abort); + + /* get the source policy */ + start.policy = qemu_get_be32(f); + + /* get source PDH key */ + start.pdh_len = qemu_get_be32(f); + if (!check_blob_length(start.pdh_len)) { + return 1; + } + + pdh_cert = g_new(gchar, start.pdh_len); + qemu_get_buffer(f, (uint8_t *)pdh_cert, start.pdh_len); + start.pdh_uaddr = (uintptr_t)pdh_cert; + + /* get source session data */ + start.session_len = qemu_get_be32(f); + if (!check_blob_length(start.session_len)) { + return 1; + } + session = g_new(gchar, start.session_len); + qemu_get_buffer(f, (uint8_t *)session, start.session_len); + start.session_uaddr = (uintptr_t)session; + + trace_kvm_sev_receive_start(start.policy, session, pdh_cert); + + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_START, + &start, &fw_error); + if (ret < 0) { + error_report("Error RECEIVE_START ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + + object_property_set_int(OBJECT(sev), "handle", start.handle, &error_abort); + sev_set_guest_state(sev, SEV_STATE_RECEIVE_UPDATE); +err: + g_free(session); + g_free(pdh_cert); + + return ret; +} + +static int sev_receive_update_data(QEMUFile *f, uint8_t *ptr) +{ + int ret = 1, fw_error = 0; + gchar *hdr = NULL, *trans = NULL; + struct kvm_sev_receive_update_data update = {}; + + /* get packet header */ + update.hdr_len = qemu_get_be32(f); + if (!check_blob_length(update.hdr_len)) { + return 1; + } + + hdr = g_new(gchar, update.hdr_len); + qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); + update.hdr_uaddr = (uintptr_t)hdr; + + /* get transport buffer */ + update.trans_len = qemu_get_be32(f); + if (!check_blob_length(update.trans_len)) { + goto err; + } + + trans = g_new(gchar, update.trans_len); + update.trans_uaddr = (uintptr_t)trans; + qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + + update.guest_uaddr = (uintptr_t) ptr; + update.guest_len = update.trans_len; + + trace_kvm_sev_receive_update_data(trans, ptr, update.guest_len, + hdr, update.hdr_len); + + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("Error RECEIVE_UPDATE_DATA ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } +err: + g_free(trans); + g_free(hdr); + return ret; +} + +int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) +{ + SevGuestState *s = sev_guest; + + /* + * If this is first buffer and SEV is not in recieiving state then + * use RECEIVE_START command to create a encryption context. + */ + if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && + sev_receive_start(s, f)) { + return 1; + } + + return sev_receive_update_data(f, ptr); +} + +int sev_remove_shared_regions_list(unsigned long start, unsigned long end) +{ + SevGuestState *s = sev_guest; + struct shared_region *pos, *next_pos; + + QTAILQ_FOREACH_SAFE(pos, &s->shared_regions_list, list, next_pos) { + unsigned long l, r; + unsigned long curr_gfn_end = pos->gfn_end; + + /* + * Find if any intersection exists ? + * left bound for intersecting segment + */ + l = MAX(start, pos->gfn_start); + /* right bound for intersecting segment */ + r = MIN(end, pos->gfn_end); + if (l <= r) { + if (pos->gfn_start == l && pos->gfn_end == r) { + QTAILQ_REMOVE(&s->shared_regions_list, pos, list); + g_free(pos); + } else if (l == pos->gfn_start) { + pos->gfn_start = r; + } else if (r == pos->gfn_end) { + pos->gfn_end = l; + } else { + /* Do a de-merge -- split linked list nodes */ + struct shared_region *shrd_region; + + pos->gfn_end = l; + shrd_region = g_malloc0(sizeof(*shrd_region)); + if (!shrd_region) { + return 0; + } + shrd_region->gfn_start = r; + shrd_region->gfn_end = curr_gfn_end; + QTAILQ_INSERT_AFTER(&s->shared_regions_list, pos, + shrd_region, list); + } + } + if (end <= curr_gfn_end) { + break; + } + } + return 0; +} + +int sev_add_shared_regions_list(unsigned long start, unsigned long end) +{ + struct shared_region *shrd_region; + struct shared_region *pos; + SevGuestState *s = sev_guest; + + if (QTAILQ_EMPTY(&s->shared_regions_list)) { + shrd_region = g_malloc0(sizeof(*shrd_region)); + if (!shrd_region) { + return -1; + } + shrd_region->gfn_start = start; + shrd_region->gfn_end = end; + QTAILQ_INSERT_TAIL(&s->shared_regions_list, shrd_region, list); + return 0; + } + + /* + * shared regions list is a sorted list in ascending order + * of guest PA's and also merges consecutive range of guest PA's + */ + QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { + /* handle duplicate overlapping regions */ + if (start >= pos->gfn_start && end <= pos->gfn_end) { + return 0; + } + if (pos->gfn_end < start) { + continue; + } + /* merge consecutive guest PA(s) -- forward merge */ + if (pos->gfn_start <= start && pos->gfn_end >= start) { + pos->gfn_end = end; + return 0; + } + break; + } + /* + * Add a new node + */ + shrd_region = g_malloc0(sizeof(*shrd_region)); + if (!shrd_region) { + return -1; + } + shrd_region->gfn_start = start; + shrd_region->gfn_end = end; + if (pos) { + QTAILQ_INSERT_BEFORE(pos, shrd_region, list); + } else { + QTAILQ_INSERT_TAIL(&s->shared_regions_list, shrd_region, list); + } + return 1; +} + +int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent) +{ + SevGuestState *s = sev_guest; + struct shared_region *pos; + + QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { + qemu_put_be32(f, SHARED_REGION_LIST_CONT); + qemu_put_be32(f, pos->gfn_start); + qemu_put_be32(f, pos->gfn_end); + *bytes_sent += 12; + } + + qemu_put_be32(f, SHARED_REGION_LIST_END); + *bytes_sent += 4; + return 0; +} + +int sev_load_incoming_shared_regions_list(QEMUFile *f) +{ + SevGuestState *s = sev_guest; + struct shared_region *shrd_region; + int status; + + status = qemu_get_be32(f); + while (status == SHARED_REGION_LIST_CONT) { + + shrd_region = g_malloc0(sizeof(*shrd_region)); + if (!shrd_region) { + return 0; + } + shrd_region->gfn_start = qemu_get_be32(f); + shrd_region->gfn_end = qemu_get_be32(f); + + QTAILQ_INSERT_TAIL(&s->shared_regions_list, shrd_region, list); + + status = qemu_get_be32(f); + } + return 0; +} + +bool sev_is_gfn_in_unshared_region(unsigned long gfn) +{ + SevGuestState *s = sev_guest; + struct shared_region *pos; + + QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { + if (gfn >= pos->gfn_start && gfn < pos->gfn_end) { + return false; + } + } + return true; +} + +static CsvBatchCmdList * +csv_batch_cmd_list_create(struct kvm_csv_batch_list_node *head, + CsvDestroyCmdNodeFn func) +{ + CsvBatchCmdList *csv_batch_cmd_list = + g_malloc0(sizeof(*csv_batch_cmd_list)); + + if (!csv_batch_cmd_list) { + return NULL; + } + + csv_batch_cmd_list->head = head; + csv_batch_cmd_list->tail = head; + csv_batch_cmd_list->destroy_fn = func; + + return csv_batch_cmd_list; +} + +static int +csv_batch_cmd_list_add_after(CsvBatchCmdList *list, + struct kvm_csv_batch_list_node *new_node) +{ + list->tail->next_cmd_addr = (__u64)new_node; + list->tail = new_node; + + return 0; +} + +static struct kvm_csv_batch_list_node * +csv_batch_cmd_list_node_create(uint64_t cmd_data_addr, uint64_t addr) +{ + struct kvm_csv_batch_list_node *new_node = + g_malloc0(sizeof(struct kvm_csv_batch_list_node)); + + if (!new_node) { + return NULL; + } + + new_node->cmd_data_addr = cmd_data_addr; + new_node->addr = addr; + new_node->next_cmd_addr = 0; + + return new_node; +} + +static int csv_batch_cmd_list_destroy(CsvBatchCmdList *list) +{ + struct kvm_csv_batch_list_node *node = list->head; + + while (node != NULL) { + if (list->destroy_fn != NULL) + list->destroy_fn((void *)node->cmd_data_addr); + + list->head = (struct kvm_csv_batch_list_node *)node->next_cmd_addr; + g_free(node); + node = list->head; + } + + g_free(list); + return 0; +} + +static void send_update_data_free(void *data) +{ + struct kvm_sev_send_update_data *update = + (struct kvm_sev_send_update_data *)data; + g_free((guchar *)update->hdr_uaddr); + g_free((guchar *)update->trans_uaddr); + g_free(update); +} + +static void receive_update_data_free(void *data) +{ + struct kvm_sev_receive_update_data *update = + (struct kvm_sev_receive_update_data *)data; + g_free((guchar *)update->hdr_uaddr); + g_free((guchar *)update->trans_uaddr); + g_free(update); +} + +static int +csv_send_queue_data(SevGuestState *s, uint8_t *ptr, + uint32_t size, uint64_t addr) +{ + int ret = 0; + int fw_error; + guchar *trans; + guchar *packet_hdr; + struct kvm_sev_send_update_data *update; + struct kvm_csv_batch_list_node *new_node = NULL; + + /* If this is first call then query the packet header bytes and allocate + * the packet buffer. + */ + if (s->send_packet_hdr_len < 1) { + s->send_packet_hdr_len = sev_send_get_packet_len(&fw_error); + if (s->send_packet_hdr_len < 1) { + error_report("%s: SEND_UPDATE fw_error=%d '%s'", + __func__, fw_error, fw_error_to_str(fw_error)); + return 1; + } + } + + packet_hdr = g_new(guchar, s->send_packet_hdr_len); + memset(packet_hdr, 0, s->send_packet_hdr_len); + + update = g_new0(struct kvm_sev_send_update_data, 1); + + /* allocate transport buffer */ + trans = g_new(guchar, size); + + update->hdr_uaddr = (unsigned long)packet_hdr; + update->hdr_len = s->send_packet_hdr_len; + update->guest_uaddr = (unsigned long)ptr; + update->guest_len = size; + update->trans_uaddr = (unsigned long)trans; + update->trans_len = size; + + new_node = csv_batch_cmd_list_node_create((uint64_t)update, addr); + if (!new_node) { + ret = -ENOMEM; + goto err; + } + + if (s->csv_batch_cmd_list == NULL) { + s->csv_batch_cmd_list = csv_batch_cmd_list_create(new_node, + send_update_data_free); + if (s->csv_batch_cmd_list == NULL) { + ret = -ENOMEM; + goto err; + } + } else { + /* Add new_node's command address to the last_node */ + csv_batch_cmd_list_add_after(s->csv_batch_cmd_list, new_node); + } + + trace_kvm_sev_send_update_data(ptr, trans, size); + + return ret; + +err: + g_free(trans); + g_free(update); + g_free(packet_hdr); + g_free(new_node); + if (s->csv_batch_cmd_list) { + csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); + s->csv_batch_cmd_list = NULL; + } + return ret; +} + +static int +csv_receive_queue_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr) +{ + int ret = 0; + gchar *hdr = NULL, *trans = NULL; + struct kvm_sev_receive_update_data *update; + struct kvm_csv_batch_list_node *new_node = NULL; + + update = g_new0(struct kvm_sev_receive_update_data, 1); + /* get packet header */ + update->hdr_len = qemu_get_be32(f); + hdr = g_new(gchar, update->hdr_len); + qemu_get_buffer(f, (uint8_t *)hdr, update->hdr_len); + update->hdr_uaddr = (unsigned long)hdr; + + /* get transport buffer */ + update->trans_len = qemu_get_be32(f); + trans = g_new(gchar, update->trans_len); + update->trans_uaddr = (unsigned long)trans; + qemu_get_buffer(f, (uint8_t *)update->trans_uaddr, update->trans_len); + + /* set guest address,guest len is page_size */ + update->guest_uaddr = (uint64_t)ptr; + update->guest_len = TARGET_PAGE_SIZE; + + new_node = csv_batch_cmd_list_node_create((uint64_t)update, 0); + if (!new_node) { + ret = -ENOMEM; + goto err; + } + + if (s->csv_batch_cmd_list == NULL) { + s->csv_batch_cmd_list = csv_batch_cmd_list_create(new_node, + receive_update_data_free); + if (s->csv_batch_cmd_list == NULL) { + ret = -ENOMEM; + goto err; + } + } else { + /* Add new_node's command address to the last_node */ + csv_batch_cmd_list_add_after(s->csv_batch_cmd_list, new_node); + } + + trace_kvm_sev_receive_update_data(trans, (void *)ptr, update->guest_len, + (void *)hdr, update->hdr_len); + + return ret; + +err: + g_free(trans); + g_free(update); + g_free(hdr); + g_free(new_node); + if (s->csv_batch_cmd_list) { + csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); + s->csv_batch_cmd_list = NULL; + } + return ret; +} + +static int +csv_command_batch(uint32_t cmd_id, uint64_t head_uaddr, int *fw_err) +{ + int ret; + struct kvm_csv_command_batch command_batch = { }; + + command_batch.command_id = cmd_id; + command_batch.csv_batch_list_uaddr = head_uaddr; + + ret = sev_ioctl(sev_guest->sev_fd, KVM_CSV_COMMAND_BATCH, + &command_batch, fw_err); + if (ret) { + error_report("%s: COMMAND_BATCH ret=%d fw_err=%d '%s'", + __func__, ret, *fw_err, fw_error_to_str(*fw_err)); + } + + return ret; +} + +static int +csv_send_update_data_batch(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) +{ + int ret, fw_error = 0; + struct kvm_sev_send_update_data *update; + struct kvm_csv_batch_list_node *node; + + ret = csv_command_batch(KVM_SEV_SEND_UPDATE_DATA, + (uint64_t)s->csv_batch_cmd_list->head, &fw_error); + if (ret) { + error_report("%s: csv_command_batch ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + + for (node = s->csv_batch_cmd_list->head; + node != NULL; + node = (struct kvm_csv_batch_list_node *)node->next_cmd_addr) { + if (node != s->csv_batch_cmd_list->head) { + /* head's page header is saved before send_update_data */ + qemu_put_be64(f, node->addr); + *bytes_sent += 8; + if (node->next_cmd_addr != 0) + qemu_put_be32(f, RAM_SAVE_ENCRYPTED_PAGE_BATCH); + else + qemu_put_be32(f, RAM_SAVE_ENCRYPTED_PAGE_BATCH_END); + *bytes_sent += 4; + } + update = (struct kvm_sev_send_update_data *)node->cmd_data_addr; + qemu_put_be32(f, update->hdr_len); + qemu_put_buffer(f, (uint8_t *)update->hdr_uaddr, update->hdr_len); + *bytes_sent += (4 + update->hdr_len); + + qemu_put_be32(f, update->trans_len); + qemu_put_buffer(f, (uint8_t *)update->trans_uaddr, update->trans_len); + *bytes_sent += (4 + update->trans_len); + } + +err: + csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); + s->csv_batch_cmd_list = NULL; + return ret; +} + +static int +csv_receive_update_data_batch(SevGuestState *s) +{ + int ret; + int fw_error; + + ret = csv_command_batch(KVM_SEV_RECEIVE_UPDATE_DATA, + (uint64_t)s->csv_batch_cmd_list->head, &fw_error); + if (ret) { + error_report("%s: csv_command_batch ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + } + + csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); + s->csv_batch_cmd_list = NULL; + return ret; +} + +int +csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) +{ + SevGuestState *s = sev_guest; + + /* Only support for HYGON CSV */ + if (!is_hygon_cpu()) { + error_report("Only support enqueue pages for HYGON CSV"); + return -EINVAL; + } + + return csv_send_queue_data(s, ptr, sz, addr); +} + +int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) +{ + SevGuestState *s = sev_guest; + + /* Only support for HYGON CSV */ + if (!is_hygon_cpu()) { + error_report("Only support enqueue received pages for HYGON CSV"); + return -EINVAL; + } + + /* + * If this is first buffer and SEV is not in recieiving state then + * use RECEIVE_START command to create a encryption context. + */ + if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && + sev_receive_start(s, f)) { + return 1; + } + + return csv_receive_queue_data(s, f, ptr); +} + +int +csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) +{ + SevGuestState *s = sev_guest; + + /* Only support for HYGON CSV */ + if (!is_hygon_cpu()) { + error_report("Only support transfer queued pages for HYGON CSV"); + return -EINVAL; + } + + /* + * If this is a first buffer then create outgoing encryption context + * and write our PDH, policy and session data. + */ + if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && + sev_send_start(s, f, bytes_sent)) { + error_report("Failed to create outgoing context"); + return 1; + } + + return csv_send_update_data_batch(s, f, bytes_sent); +} + +int csv_load_queued_incoming_pages(QEMUFile *f) +{ + SevGuestState *s = sev_guest; + + /* Only support for HYGON CSV */ + if (!is_hygon_cpu()) { + error_report("Only support load queued pages for HYGON CSV"); + return -EINVAL; + } + + return csv_receive_update_data_batch(s); +} + +static int +sev_send_vmsa_get_packet_len(int *fw_err) +{ + int ret; + struct kvm_sev_send_update_vmsa update = { 0, }; + + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, + &update, fw_err); + if (*fw_err != SEV_RET_INVALID_LEN) { + ret = 0; + error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", + __func__, ret, *fw_err, fw_error_to_str(*fw_err)); + goto err; + } + + ret = update.hdr_len; + +err: + return ret; +} + +static int +sev_send_update_vmsa(SevGuestState *s, QEMUFile *f, uint32_t cpu_id, + uint32_t cpu_index, uint32_t size, uint64_t *bytes_sent) +{ + int ret, fw_error; + guchar *trans = NULL; + struct kvm_sev_send_update_vmsa update = {}; + + /* + * If this is first call then query the packet header bytes and allocate + * the packet buffer. + */ + if (!s->send_vmsa_packet_hdr) { + s->send_vmsa_packet_hdr_len = sev_send_vmsa_get_packet_len(&fw_error); + if (s->send_vmsa_packet_hdr_len < 1) { + error_report("%s: SEND_UPDATE_VMSA fw_error=%d '%s'", + __func__, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + s->send_vmsa_packet_hdr = g_new(gchar, s->send_vmsa_packet_hdr_len); + } + + /* allocate transport buffer */ + trans = g_new(guchar, size); + + update.vcpu_id = cpu_id; + update.hdr_uaddr = (uintptr_t)s->send_vmsa_packet_hdr; + update.hdr_len = s->send_vmsa_packet_hdr_len; + update.trans_uaddr = (uintptr_t)trans; + update.trans_len = size; + + trace_kvm_sev_send_update_vmsa(cpu_id, cpu_index, trans, size); + + ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, &fw_error); + if (ret) { + error_report("%s: SEND_UPDATE_VMSA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto err; + } + + /* + * Migration of vCPU's VMState according to the instance_id + * (i.e. CPUState.cpu_index) + */ + qemu_put_be32(f, sizeof(uint32_t)); + qemu_put_buffer(f, (uint8_t *)&cpu_index, sizeof(uint32_t)); + *bytes_sent += 4 + sizeof(uint32_t); + + qemu_put_be32(f, update.hdr_len); + qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); + *bytes_sent += 4 + update.hdr_len; + + qemu_put_be32(f, update.trans_len); + qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + *bytes_sent += 4 + update.trans_len; + +err: + g_free(trans); + return ret; +} + +int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent) +{ + SevGuestState *s = sev_guest; + CPUState *cpu; + int ret = 0; + + /* Only support migrate VMSAs for HYGON CSV2 guest */ + if (!sev_es_enabled() || !is_hygon_cpu()) { + return 0; + } + + CPU_FOREACH(cpu) { + qemu_put_be32(f, ENCRYPTED_CPU_STATE_CONT); + *bytes_sent += 4; + ret = sev_send_update_vmsa(s, f, kvm_arch_vcpu_id(cpu), + cpu->cpu_index, TARGET_PAGE_SIZE, bytes_sent); + if (ret) { + goto err; + } + } + + qemu_put_be32(f, ENCRYPTED_CPU_STATE_END); + *bytes_sent += 4; + +err: + return ret; +} + +static int sev_receive_update_vmsa(QEMUFile *f) +{ + int ret = 1, fw_error = 0; + CPUState *cpu; + uint32_t cpu_index, cpu_id = 0; + gchar *hdr = NULL, *trans = NULL; + struct kvm_sev_receive_update_vmsa update = {}; + + /* get cpu index buffer */ + assert(qemu_get_be32(f) == sizeof(uint32_t)); + qemu_get_buffer(f, (uint8_t *)&cpu_index, sizeof(uint32_t)); + + CPU_FOREACH(cpu) { + if (cpu->cpu_index == cpu_index) { + cpu_id = kvm_arch_vcpu_id(cpu); + break; + } + } + update.vcpu_id = cpu_id; + + /* get packet header */ + update.hdr_len = qemu_get_be32(f); + if (!check_blob_length(update.hdr_len)) { + return 1; + } + + hdr = g_new(gchar, update.hdr_len); + qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); + update.hdr_uaddr = (uintptr_t)hdr; + + /* get transport buffer */ + update.trans_len = qemu_get_be32(f); + if (!check_blob_length(update.trans_len)) { + goto err; + } + + trans = g_new(gchar, update.trans_len); + update.trans_uaddr = (uintptr_t)trans; + qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); + + trace_kvm_sev_receive_update_vmsa(cpu_id, cpu_index, + trans, update.trans_len, hdr, update.hdr_len); + + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_VMSA, + &update, &fw_error); + if (ret) { + error_report("Error RECEIVE_UPDATE_VMSA ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + } + +err: + g_free(trans); + g_free(hdr); + return ret; +} + +int csv_load_incoming_cpu_state(QEMUFile *f) +{ + int status, ret = 0; + + /* Only support migrate VMSAs for HYGON CSV2 guest */ + if (!sev_es_enabled() || !is_hygon_cpu()) { + return 0; + } + + status = qemu_get_be32(f); + while (status == ENCRYPTED_CPU_STATE_CONT) { + ret = sev_receive_update_vmsa(f); + if (ret) { + break; + } + + status = qemu_get_be32(f); + } + + return ret; +} + +static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +}; + +static const QemuUUID sev_kernel_entry_guid = { + .data = UUID_LE(0x4de79437, 0xabd2, 0x427f, 0xb8, 0x35, 0xd5, 0xb1, + 0x72, 0xd2, 0x04, 0x5b) +}; +static const QemuUUID sev_initrd_entry_guid = { + .data = UUID_LE(0x44baf731, 0x3a2f, 0x4bd7, 0x9a, 0xf1, 0x41, 0xe2, + 0x91, 0x69, 0x78, 0x1d) +}; +static const QemuUUID sev_cmdline_entry_guid = { + .data = UUID_LE(0x97d02dd8, 0xbd20, 0x4c94, 0xaa, 0x78, 0xe7, 0x71, + 0x4d, 0x36, 0xab, 0x2a) +}; + +/* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. + */ +bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +{ + uint8_t *data; + SevHashTableDescriptor *area; + SevHashTable *ht; + PaddedSevHashTable *padded_ht; + uint8_t cmdline_hash[HASH_SIZE]; + uint8_t initrd_hash[HASH_SIZE]; + uint8_t kernel_hash[HASH_SIZE]; + uint8_t *hashp; + size_t hash_len = HASH_SIZE; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_guest->kernel_hashes) { + return false; + } + + if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); + return false; + } + area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } + + /* + * Calculate hash of kernel command-line with the terminating null byte. If + * the user doesn't supply a command-line via -append, the 1-byte "\0" will + * be used. + */ + hashp = cmdline_hash; + if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->cmdline_data, + ctx->cmdline_size, &hashp, &hash_len, errp) < 0) { + return false; + } + assert(hash_len == HASH_SIZE); + + /* + * Calculate hash of initrd. If the user doesn't supply an initrd via + * -initrd, an empty buffer will be used (ctx->initrd_size == 0). + */ + hashp = initrd_hash; + if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->initrd_data, + ctx->initrd_size, &hashp, &hash_len, errp) < 0) { + return false; + } + assert(hash_len == HASH_SIZE); + + /* Calculate hash of the kernel */ + hashp = kernel_hash; + struct iovec iov[2] = { + { .iov_base = ctx->setup_data, .iov_len = ctx->setup_size }, + { .iov_base = ctx->kernel_data, .iov_len = ctx->kernel_size } + }; + if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256, iov, ARRAY_SIZE(iov), + &hashp, &hash_len, errp) < 0) { + return false; + } + assert(hash_len == HASH_SIZE); + + /* + * Populate the hashes table in the guest's memory at the OVMF-designated + * area for the SEV hashes table + */ + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } + ht = &padded_ht->ht; + + ht->guid = sev_hash_table_header_guid; + ht->len = sizeof(*ht); + + ht->cmdline.guid = sev_cmdline_entry_guid; + ht->cmdline.len = sizeof(ht->cmdline); + memcpy(ht->cmdline.hash, cmdline_hash, sizeof(ht->cmdline.hash)); + + ht->initrd.guid = sev_initrd_entry_guid; + ht->initrd.len = sizeof(ht->initrd); + memcpy(ht->initrd.hash, initrd_hash, sizeof(ht->initrd.hash)); + + ht->kernel.guid = sev_kernel_entry_guid; + ht->kernel.len = sizeof(ht->kernel); + memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash)); + + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + + if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { + ret = false; + } + + address_space_unmap(&address_space_memory, padded_ht, + mapped_len, true, mapped_len); + + return ret; +} + +static int _sev_send_start(QEMUFile *f, uint64_t *bytes_sent) +{ + SevGuestState *s = sev_guest; + + return sev_send_start(s, f, bytes_sent); +} + +static int _sev_receive_start(QEMUFile *f) +{ + SevGuestState *s = sev_guest; + + return sev_receive_start(s, f); +} + +struct sev_ops sev_ops = { + .sev_ioctl = sev_ioctl, + .fw_error_to_str = fw_error_to_str, + .sev_send_start = _sev_send_start, + .sev_receive_start = _sev_receive_start, +}; + static void sev_register_types(void) { diff --git a/target/i386/sev.h b/target/i386/sev.h index e7499c95b1e87cd6ecdff2c28009b65807b52946..647b426b16f4b4c4d003a8a59ac07f42f966f112 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -38,6 +38,13 @@ typedef struct SevKernelLoaderContext { size_t cmdline_size; } SevKernelLoaderContext; +#define RAM_SAVE_ENCRYPTED_PAGE 0x1 +#define RAM_SAVE_SHARED_REGIONS_LIST 0x2 + +#define RAM_SAVE_ENCRYPTED_PAGE_BATCH 0x4 +#define RAM_SAVE_ENCRYPTED_PAGE_BATCH_END 0x5 +#define RAM_SAVE_ENCRYPTED_CPU_STATE 0x6 + #ifdef CONFIG_SEV bool sev_enabled(void); bool sev_es_enabled(void); @@ -51,12 +58,35 @@ uint32_t sev_get_reduced_phys_bits(void); bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); +int sev_save_setup(const char *pdh, const char *plat_cert, + const char *amd_cert); +int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, + uint32_t size, uint64_t *bytes_sent); +int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr); int sev_inject_launch_secret(const char *hdr, const char *secret, uint64_t gpa, Error **errp); int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); void sev_es_set_reset_vector(CPUState *cpu); +int sev_remove_shared_regions_list(unsigned long gfn_start, + unsigned long gfn_end); +int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end); +int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent); +int sev_load_incoming_shared_regions_list(QEMUFile *f); +bool sev_is_gfn_in_unshared_region(unsigned long gfn); +void sev_del_migrate_blocker(void); int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +extern bool sev_kvm_has_msr_ghcb; + +struct sev_ops { + int (*sev_ioctl)(int fd, int cmd, void *data, int *error); + const char *(*fw_error_to_str)(int code); + int (*sev_send_start)(QEMUFile *f, uint64_t *bytes_sent); + int (*sev_receive_start)(QEMUFile *f); +}; + +extern struct sev_ops sev_ops; + #endif diff --git a/target/i386/trace-events b/target/i386/trace-events index 2cd8726eebb7d42b70a14ba3c128290b428847da..515441c4f36ee133ad29c3fc52884717d2e84104 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -11,3 +11,18 @@ kvm_sev_launch_measurement(const char *value) "data %s" kvm_sev_launch_finish(void) "" kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" +kvm_sev_send_start(uint64_t pdh, int l1, uint64_t plat, int l2, uint64_t amd, int l3) "pdh 0x%" PRIx64 " len %d plat 0x%" PRIx64 " len %d amd 0x%" PRIx64 " len %d" +kvm_sev_send_update_data(void *src, void *dst, int len) "guest %p trans %p len %d" +kvm_sev_send_finish(void) "" +kvm_sev_receive_start(int policy, void *session, void *pdh) "policy 0x%x session %p pdh %p" +kvm_sev_receive_update_data(void *src, void *dst, int len, void *hdr, int hdr_len) "guest %p trans %p len %d hdr %p hdr_len %d" +kvm_sev_receive_finish(void) "" +kvm_sev_send_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *dst, int len) "cpu_id %d cpu_index %d trans %p len %d" +kvm_sev_receive_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *src, int len, void *hdr, int hdr_len) "cpu_id %d cpu_index %d trans %p len %d hdr %p hdr_len %d" + +# csv.c +kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" PRIx64 "addr %p len 0x%" PRIu64 +kvm_csv3_send_encrypt_data(void *dst, int len) "trans %p len %d" +kvm_csv3_send_encrypt_context(void *dst, int len) "trans %p len %d" +kvm_csv3_receive_encrypt_data(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d" +kvm_csv3_receive_encrypt_context(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d"