From 495164b84ef0e254a4fb5525ba22ad74c83abb25 Mon Sep 17 00:00:00 2001 From: Wencheng Yang Date: Sun, 18 Aug 2024 16:47:12 +0800 Subject: [PATCH] virtio-net: Fix network stall at the host side waiting for kick Signed-off-by: Wencheng Yang --- ...etwork-stall-at-the-host-side-waitin.patch | 339 ++++++++++++++++++ qemu.spec | 7 +- 2 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 1054-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch diff --git a/1054-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch b/1054-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch new file mode 100644 index 0000000..eb97e05 --- /dev/null +++ b/1054-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch @@ -0,0 +1,339 @@ +From f8854b36713ca37b4f8dd62d59ac9f83623c47fe Mon Sep 17 00:00:00 2001 +From: thomas +Date: Fri, 12 Jul 2024 11:10:53 +0800 +Subject: [PATCH] virtio-net: Fix network stall at the host side waiting for + kick + +Patch 06b12970174 ("virtio-net: fix network stall under load") +added double-check to test whether the available buffer size +can satisfy the request or not, in case the guest has added +some buffers to the avail ring simultaneously after the first +check. It will be lucky if the available buffer size becomes +okay after the double-check, then the host can send the packet +to the guest. If the buffer size still can't satisfy the request, +even if the guest has added some buffers, viritio-net would +stall at the host side forever. + +The patch enables notification and checks whether the guest has +added some buffers since last check of available buffers when +the available buffers are insufficient. If no buffer is added, +return false, else recheck the available buffers in the loop. +If the available buffers are sufficient, disable notification +and return true. + +Changes: +1. Change the return type of virtqueue_get_avail_bytes() from void + to int, it returns an opaque that represents the shadow_avail_idx + of the virtqueue on success, else -1 on error. +2. Add a new API: virtio_queue_enable_notification_and_check(), + it takes an opaque as input arg which is returned from + virtqueue_get_avail_bytes(). It enables notification firstly, + then checks whether the guest has added some buffers since + last check of available buffers or not by virtio_queue_poll(), + return ture if yes. + +The patch also reverts patch "06b12970174". +The patch also reverts patch +1052-hw-net-virtio-net-Update-event-idx-if-guest-has-made.patch. + +The case below can reproduce the stall. + + Guest 0 + +--------+ + | iperf | + ---------------> | server | + Host | +--------+ + +--------+ | ... + | iperf |---- + | client |---- Guest n + +--------+ | +--------+ + | | iperf | + ---------------> | server | + +--------+ + +Boot many guests from qemu with virtio network: + qemu ... -netdev tap,id=net_x \ + -device virtio-net-pci-non-transitional,\ + iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x + +Each guest acts as iperf server with commands below: + iperf3 -s -D -i 10 -p 8001 + iperf3 -s -D -i 10 -p 8002 + +The host as iperf client: + iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 40000 + iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 40000 + +After some time, the host loses connection to the guest, +the guest can send packet to the host, but can't receive +packet from the host. + +It's more likely to happen if SWIOTLB is enabled in the guest, +allocating and freeing bounce buffer takes some CPU ticks, +copying from/to bounce buffer takes more CPU ticks, compared +with that there is no bounce buffer in the guest. +Once the rate of producing packets from the host approximates +the rate of receiveing packets in the guest, the guest would +loop in NAPI. + + receive packets --- + | | + v | + free buf virtnet_poll + | | + v | + add buf to avail ring --- + | + | need kick the host? + | NAPI continues + v + receive packets --- + | | + v | + free buf virtnet_poll + | | + v | + add buf to avail ring --- + | + v + ... ... + +On the other hand, the host fetches free buf from avail +ring, if the buf in the avail ring is not enough, the +host notifies the guest the event by writing the avail +idx read from avail ring to the event idx of used ring, +then the host goes to sleep, waiting for the kick signal +from the guest. + +Once the guest finds the host is waiting for kick singal +(in virtqueue_kick_prepare_split()), it kicks the host. + +The host may stall forever at the sequences below: + + Host Guest + ------------ ----------- + fetch buf, send packet receive packet --- + ... ... | + fetch buf, send packet add buf | + ... add buf virtnet_poll + buf not enough avail idx-> add buf | + read avail idx add buf | + add buf --- + receive packet --- + write event idx ... | + wait for kick add buf virtnet_poll + ... | + --- + no more packet, exit NAPI + +In the first loop of NAPI above, indicated in the range of +virtnet_poll above, the host is sending packets while the +guest is receiving packets and adding buffers. + step 1: The buf is not enough, for example, a big packet + needs 5 buf, but the available buf count is 3. + The host read current avail idx. + step 2: The guest adds some buf, then checks whether the + host is waiting for kick signal, not at this time. + The used ring is not empty, the guest continues + the second loop of NAPI. + step 3: The host writes the avail idx read from avail + ring to used ring as event idx via + virtio_queue_set_notification(q->rx_vq, 1). + step 4: At the end of the second loop of NAPI, recheck + whether kick is needed, as the event idx in the + used ring written by the host is beyound the + range of kick condition, the guest will not + send kick signal to the host. + +Fixes: 06b12970174 ("virtio-net: fix network stall under load") +Cc: qemu-stable@nongnu.org +Signed-off-by: Wencheng Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 29 +++++++++-------- + hw/virtio/virtio.c | 64 +++++++++++++++++++++++++++++++++++--- + include/hw/virtio/virtio.h | 18 +++++++++-- + 3 files changed, 91 insertions(+), 20 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index dc0b65501e..3f589e618a 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1635,25 +1635,28 @@ static bool virtio_net_can_receive(NetClientState *nc) + + static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) + { ++ int opaque; ++ unsigned int in_bytes; + VirtIONet *n = q->n; +- if (virtio_queue_empty(q->rx_vq) || +- (n->mergeable_rx_bufs && +- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { +- virtio_queue_set_notification(q->rx_vq, 1); +- +- /* To avoid a race condition where the guest has made some buffers +- * available after the above check but before notification was +- * enabled, check for available buffers again. +- */ +- if (virtio_queue_empty(q->rx_vq) || +- (n->mergeable_rx_bufs && +- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { +- virtio_queue_set_notification(q->rx_vq, 1); ++ ++ while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { ++ opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, ++ bufsize, 0); ++ /* Buffer is enough, disable notifiaction */ ++ if (bufsize <= in_bytes) { ++ break; ++ } ++ ++ if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { ++ /* Guest has added some buffers, try again */ ++ continue; ++ } else { + return 0; + } + } + + virtio_queue_set_notification(q->rx_vq, 0); ++ + return 1; + } + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3a160f86ed..4e8e131915 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -744,6 +744,60 @@ int virtio_queue_empty(VirtQueue *vq) + } + } + ++static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx) ++{ ++ if (unlikely(!vq->vring.avail)) { ++ return false; ++ } ++ ++ return (uint16_t)shadow_idx != vring_avail_idx(vq); ++} ++ ++static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx) ++{ ++ VRingPackedDesc desc; ++ VRingMemoryRegionCaches *caches; ++ ++ if (unlikely(!vq->vring.desc)) { ++ return false; ++ } ++ ++ caches = vring_get_region_caches(vq); ++ if (!caches) { ++ return false; ++ } ++ ++ vring_packed_desc_read(vq->vdev, &desc, &caches->desc, ++ shadow_idx, true); ++ ++ return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter); ++} ++ ++static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx) ++{ ++ if (virtio_device_disabled(vq->vdev)) { ++ return false; ++ } ++ ++ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { ++ return virtio_queue_packed_poll(vq, shadow_idx); ++ } else { ++ return virtio_queue_split_poll(vq, shadow_idx); ++ } ++} ++ ++bool virtio_queue_enable_notification_and_check(VirtQueue *vq, ++ int opaque) ++{ ++ virtio_queue_set_notification(vq, 1); ++ ++ if (opaque >= 0) { ++ return virtio_queue_poll(vq, (unsigned)opaque); ++ } else { ++ return false; ++ } ++} ++ + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) + { +@@ -1323,9 +1377,9 @@ err: + goto done; + } + +-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, +- unsigned int *out_bytes, +- unsigned max_in_bytes, unsigned max_out_bytes) ++int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, ++ unsigned int *out_bytes, unsigned max_in_bytes, ++ unsigned max_out_bytes) + { + uint16_t desc_size; + VRingMemoryRegionCaches *caches; +@@ -1358,7 +1412,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + caches); + } + +- return; ++ return (int)vq->shadow_avail_idx; + err: + if (in_bytes) { + *in_bytes = 0; +@@ -1366,6 +1420,8 @@ err: + if (out_bytes) { + *out_bytes = 0; + } ++ ++ return -1; + } + + int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c8f72850bc..d2f4ed160d 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -270,9 +270,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, + VirtQueueElement *elem); + int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, + unsigned int out_bytes); +-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, +- unsigned int *out_bytes, +- unsigned max_in_bytes, unsigned max_out_bytes); ++/** ++ * Return <0 on error or an opaque >=0 to pass to ++ * virtio_queue_enable_notification_and_check on success. ++ */ ++int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, ++ unsigned int *out_bytes, unsigned max_in_bytes, ++ unsigned max_out_bytes); + + void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq); + void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); +@@ -306,6 +310,14 @@ int virtio_queue_ready(VirtQueue *vq); + + int virtio_queue_empty(VirtQueue *vq); + ++/** ++ * Enable notification and check whether guest has added some ++ * buffers since last call to virtqueue_get_avail_bytes. ++ * ++ * @opaque: value returned from virtqueue_get_avail_bytes ++ */ ++bool virtio_queue_enable_notification_and_check(VirtQueue *vq, ++ int opaque); + /* Host binding interface. */ + + uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr); +-- +2.43.5 + diff --git a/qemu.spec b/qemu.spec index ce4a4b5..b2f7ded 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,4 +1,4 @@ -%define anolis_release 17 +%define anolis_release 18 %bcond_with check @@ -357,6 +357,7 @@ Patch1050: 1050-target-i386-Add-new-Hygon-Dharma-CPU-model.patch Patch1051: 1051-vfio-Add-vfio-based-mediated-hct-support.patch Patch1052: 1052-hw-net-virtio-net-Update-event-idx-if-guest-has-made.patch Patch1053: 1053-target-i386-csv-Release-CSV3-shared-pages-after-unma.patch +Patch1054: 1054-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch ExclusiveArch: x86_64 aarch64 loongarch64 @@ -1920,6 +1921,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Sun Aug 18 2024 Wencheng Yang - 2:8.2.0-18 +- Patch1054: 1054-virtio-net-Fix-network-stall-at-the-host-side-waitin.patch + (Fix network stall at the host side waiting for kick) + * Mon Jun 20 2024 Wencheng Yang - 2:8.2.0-17 - Patch1053: 1053-target-i386-csv-Release-CSV3-shared-pages-after-unma.patch (Release CSV3 shared pages after unmapping DMA) -- Gitee