From f93f2af9ef14a3dd56789a0fa8fa5f0c5c7feea1 Mon Sep 17 00:00:00 2001 From: hanliyang Date: Fri, 3 Jan 2025 16:28:18 +0800 Subject: [PATCH 1/3] target/i386: sev: Return 0 if sev_send_get_packet_len() fails The send_packet_hdr_len of struct SEVState is of type size_t which is an unsigned class type. If the send_packet_hdr_len is assigned as -1, then it will be a huge number and the QEMU process will crash when allocating packet buffer with the huge size. For example, the following code could cause crash described above. ``` static int sev_send_update_data(SEVState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, uint64_t *bytes_sent) { ...... if (!s->send_packet_hdr) { s->send_packet_hdr_len = sev_send_get_packet_len(&fw_error); if (s->send_packet_hdr_len < 1) { error_report("%s: SEND_UPDATE fw_error=%d '%s'", __func__, fw_error, fw_error_to_str(fw_error)); return 1; } s->send_packet_hdr = g_new(gchar, s->send_packet_hdr_len); } ...... } ``` [ hly: Also return 0 if sev_send_vmsa_get_packet_len() fails. ] Signed-off-by: hanliyang --- target/i386/sev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 94d3c1853b..484e6e7509 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1643,7 +1643,7 @@ sev_send_get_packet_len(int *fw_err) ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, fw_err); if (*fw_err != SEV_RET_INVALID_LEN) { - ret = -1; + ret = 0; error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", __func__, ret, *fw_err, fw_error_to_str(*fw_err)); goto err; @@ -2370,7 +2370,7 @@ sev_send_vmsa_get_packet_len(int *fw_err) ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, fw_err); if (*fw_err != SEV_RET_INVALID_LEN) { - ret = -1; + ret = 0; error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", __func__, ret, *fw_err, fw_error_to_str(*fw_err)); goto err; -- Gitee From 5ccdbb5830cf509f658bfa18c6789f3f6b2ccb87 Mon Sep 17 00:00:00 2001 From: hanliyang Date: Fri, 26 Apr 2024 21:17:35 +0800 Subject: [PATCH 2/3] target/i386/csv: The CSV2 guest running on the recipient side is not resettable Currently, both CSV2 guest running on the recipient side and CSV3 guest are not resettable. Provide a new global variable kvm_csv_reset_inhibit to indicate the resettable state if the VM is using CSV technologies. Signed-off-by: hanliyang --- target/i386/csv.c | 2 ++ target/i386/csv.h | 2 ++ target/i386/kvm/kvm.c | 4 +++- target/i386/kvm/sev-stub.c | 2 ++ target/i386/sev.c | 3 +++ 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/target/i386/csv.c b/target/i386/csv.c index 770ea7d5bf..12e2cc16be 100644 --- a/target/i386/csv.c +++ b/target/i386/csv.c @@ -93,6 +93,8 @@ csv_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) qemu_mutex_init(&csv_guest.dma_map_regions_list_mutex); csv_guest.sev_send_start = ops->sev_send_start; csv_guest.sev_receive_start = ops->sev_receive_start; + + kvm_csv_reset_inhibit = true; } return 0; } diff --git a/target/i386/csv.h b/target/i386/csv.h index 485534d2d6..cc02ba83a4 100644 --- a/target/i386/csv.h +++ b/target/i386/csv.h @@ -18,6 +18,8 @@ #include "qemu/thread.h" #include "qemu/queue.h" +extern bool kvm_csv_reset_inhibit; + #ifdef CONFIG_SEV #include "cpu.h" diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 469349420c..a14debcfc2 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -5383,7 +5383,9 @@ bool kvm_has_waitpkg(void) bool kvm_arch_cpu_check_are_resettable(void) { - return !(sev_es_enabled() && !is_hygon_cpu()) && !csv_enabled(); + if (is_hygon_cpu()) + return !kvm_csv_reset_inhibit; + return !sev_es_enabled(); } #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c index 6080c007a2..0651502f1c 100644 --- a/target/i386/kvm/sev-stub.c +++ b/target/i386/kvm/sev-stub.c @@ -20,3 +20,5 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) /* If we get here, cgs must be some non-SEV thing */ return 0; } + +bool kvm_csv_reset_inhibit; diff --git a/target/i386/sev.c b/target/i386/sev.c index 484e6e7509..3f25d2b6d7 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -219,6 +219,7 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { }; bool kvm_has_msr_ghcb; +bool kvm_csv_reset_inhibit; static int sev_ioctl(int fd, int cmd, void *data, int *error) @@ -1275,6 +1276,8 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: failed to create encryption context", __func__); goto err; } + } else if (is_hygon_cpu() && sev_es_enabled()) { + kvm_csv_reset_inhibit = true; } /* CSV guest needs no notifier to reg/unreg memory */ -- Gitee From 24bbe058737913a992ea28103f7672d33b217717 Mon Sep 17 00:00:00 2001 From: thomas Date: Fri, 12 Jul 2024 11:10:53 +0800 Subject: [PATCH 3/3] virtio-net: Fix network stall at the host side waiting for kick commit f937309fbdbb48c354220a3e7110c202ae4aa7fa upstream. Patch 06b12970174 ("virtio-net: fix network stall under load") added double-check to test whether the available buffer size can satisfy the request or not, in case the guest has added some buffers to the avail ring simultaneously after the first check. It will be lucky if the available buffer size becomes okay after the double-check, then the host can send the packet to the guest. If the buffer size still can't satisfy the request, even if the guest has added some buffers, viritio-net would stall at the host side forever. The patch enables notification and checks whether the guest has added some buffers since last check of available buffers when the available buffers are insufficient. If no buffer is added, return false, else recheck the available buffers in the loop. If the available buffers are sufficient, disable notification and return true. Changes: 1. Change the return type of virtqueue_get_avail_bytes() from void to int, it returns an opaque that represents the shadow_avail_idx of the virtqueue on success, else -1 on error. 2. Add a new API: virtio_queue_enable_notification_and_check(), it takes an opaque as input arg which is returned from virtqueue_get_avail_bytes(). It enables notification firstly, then checks whether the guest has added some buffers since last check of available buffers or not by virtio_queue_poll(), return ture if yes. The patch also reverts patch "06b12970174". The case below can reproduce the stall. Guest 0 +--------+ | iperf | ---------------> | server | Host | +--------+ +--------+ | ... | iperf |---- | client |---- Guest n +--------+ | +--------+ | | iperf | ---------------> | server | +--------+ Boot many guests from qemu with virtio network: qemu ... -netdev tap,id=net_x \ -device virtio-net-pci-non-transitional,\ iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x Each guest acts as iperf server with commands below: iperf3 -s -D -i 10 -p 8001 iperf3 -s -D -i 10 -p 8002 The host as iperf client: iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 40000 iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 40000 After some time, the host loses connection to the guest, the guest can send packet to the host, but can't receive packet from the host. It's more likely to happen if SWIOTLB is enabled in the guest, allocating and freeing bounce buffer takes some CPU ticks, copying from/to bounce buffer takes more CPU ticks, compared with that there is no bounce buffer in the guest. Once the rate of producing packets from the host approximates the rate of receiveing packets in the guest, the guest would loop in NAPI. receive packets --- | | v | free buf virtnet_poll | | v | add buf to avail ring --- | | need kick the host? | NAPI continues v receive packets --- | | v | free buf virtnet_poll | | v | add buf to avail ring --- | v ... ... On the other hand, the host fetches free buf from avail ring, if the buf in the avail ring is not enough, the host notifies the guest the event by writing the avail idx read from avail ring to the event idx of used ring, then the host goes to sleep, waiting for the kick signal from the guest. Once the guest finds the host is waiting for kick singal (in virtqueue_kick_prepare_split()), it kicks the host. The host may stall forever at the sequences below: Host Guest ------------ ----------- fetch buf, send packet receive packet --- ... ... | fetch buf, send packet add buf | ... add buf virtnet_poll buf not enough avail idx-> add buf | read avail idx add buf | add buf --- receive packet --- write event idx ... | wait for kick add buf virtnet_poll ... | --- no more packet, exit NAPI In the first loop of NAPI above, indicated in the range of virtnet_poll above, the host is sending packets while the guest is receiving packets and adding buffers. step 1: The buf is not enough, for example, a big packet needs 5 buf, but the available buf count is 3. The host read current avail idx. step 2: The guest adds some buf, then checks whether the host is waiting for kick signal, not at this time. The used ring is not empty, the guest continues the second loop of NAPI. step 3: The host writes the avail idx read from avail ring to used ring as event idx via virtio_queue_set_notification(q->rx_vq, 1). step 4: At the end of the second loop of NAPI, recheck whether kick is needed, as the event idx in the used ring written by the host is beyound the range of kick condition, the guest will not send kick signal to the host. Fixes: 06b12970174 ("virtio-net: fix network stall under load") Cc: qemu-stable@nongnu.org Signed-off-by: Wencheng Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 28 ++++++++++------- hw/virtio/virtio.c | 64 +++++++++++++++++++++++++++++++++++--- include/hw/virtio/virtio.h | 19 +++++++++-- 3 files changed, 92 insertions(+), 19 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7d459726d4..09b76676dd 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1518,24 +1518,28 @@ static bool virtio_net_can_receive(NetClientState *nc) static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) { + int opaque; + unsigned int in_bytes; VirtIONet *n = q->n; - if (virtio_queue_empty(q->rx_vq) || - (n->mergeable_rx_bufs && - !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { - virtio_queue_set_notification(q->rx_vq, 1); - - /* To avoid a race condition where the guest has made some buffers - * available after the above check but before notification was - * enabled, check for available buffers again. - */ - if (virtio_queue_empty(q->rx_vq) || - (n->mergeable_rx_bufs && - !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { + + while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { + opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, + bufsize, 0); + /* Buffer is enough, disable notifiaction */ + if (bufsize <= in_bytes) { + break; + } + + if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { + /* Guest has added some buffers, try again */ + continue; + } else { return 0; } } virtio_queue_set_notification(q->rx_vq, 0); + return 1; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index ea7c079fb0..2971d271ff 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -660,6 +660,60 @@ int virtio_queue_empty(VirtQueue *vq) } } +static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx) +{ + if (unlikely(!vq->vring.avail)) { + return false; + } + + return (uint16_t)shadow_idx != vring_avail_idx(vq); +} + +static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx) +{ + VRingPackedDesc desc; + VRingMemoryRegionCaches *caches; + + if (unlikely(!vq->vring.desc)) { + return false; + } + + caches = vring_get_region_caches(vq); + if (!caches) { + return false; + } + + vring_packed_desc_read(vq->vdev, &desc, &caches->desc, + shadow_idx, true); + + return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter); +} + +static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx) +{ + if (virtio_device_disabled(vq->vdev)) { + return false; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + return virtio_queue_packed_poll(vq, shadow_idx); + } else { + return virtio_queue_split_poll(vq, shadow_idx); + } +} + +bool virtio_queue_enable_notification_and_check(VirtQueue *vq, + int opaque) +{ + virtio_queue_set_notification(vq, 1); + + if (opaque >= 0) { + return virtio_queue_poll(vq, (unsigned)opaque); + } else { + return false; + } +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -1226,9 +1280,9 @@ err: goto done; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, unsigned max_in_bytes, + unsigned max_out_bytes) { uint16_t desc_size; VRingMemoryRegionCaches *caches; @@ -1261,7 +1315,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, caches); } - return; + return (int)vq->shadow_avail_idx; err: if (in_bytes) { *in_bytes = 0; @@ -1269,6 +1323,8 @@ err: if (out_bytes) { *out_bytes = 0; } + + return -1; } int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 8bab9cfb75..0d4299b87b 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -203,9 +203,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, VirtQueueElement *elem); int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, unsigned int out_bytes); -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes); +/** + * Return <0 on error or an opaque >=0 to pass to + * virtio_queue_enable_notification_and_check on success. + */ +int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, unsigned max_in_bytes, + unsigned max_out_bytes); void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); @@ -232,6 +236,15 @@ int virtio_queue_ready(VirtQueue *vq); int virtio_queue_empty(VirtQueue *vq); +/** + * Enable notification and check whether guest has added some + * buffers since last call to virtqueue_get_avail_bytes. + * + * @opaque: value returned from virtqueue_get_avail_bytes + */ +bool virtio_queue_enable_notification_and_check(VirtQueue *vq, + int opaque); + /* Host binding interface. */ uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr); -- Gitee