From 6bd76caad823c3eac2f3d6ffd9f178faee6fc69f Mon Sep 17 00:00:00 2001 From: hkk Date: Fri, 6 Sep 2024 10:56:41 +0800 Subject: [PATCH] Synchronize DPDK upstream patches about af_xdp --- ..._xdp-fix-build-with-Wunused-function.patch | 163 ++++ 0435-net-af_xdp-use-libxdp-if-available.patch | 154 ++++ ...dp-make-UMEM-configure-more-readable.patch | 64 ++ ...-re-enable-secondary-process-support.patch | 399 +++++++++ ...-generic-dummy-packet-burst-function.patch | 73 ++ ...-probing-when-multiprocess-is-disabl.patch | 37 + ...add-missing-trailing-newline-in-logs.patch | 87 ++ ...dp-make-compatible-with-libbpf-0.7.0.patch | 121 +++ ...e-socket-is-deleted-on-Rx-queue-setu.patch | 93 ++ ...erve-fill-queue-before-socket-create.patch | 79 ++ ...ustom-program-loading-with-multiple-.patch | 62 ++ ...p-fix-shared-UMEM-fill-queue-reserve.patch | 86 ++ ...-af_xdp-allow-using-copy-mode-in-XSK.patch | 148 ++++ ...ve-XDP-library-presence-flag-setting.patch | 44 + ...clear-which-libxdp-version-is-requir.patch | 47 + ...-version-based-check-for-shared-UMEM.patch | 62 ++ ...-version-based-check-for-program-loa.patch | 48 ++ ...-log-on-XDP-program-removal-failures.patch | 64 ++ ...dp-make-compatible-with-libbpf-0.8.0.patch | 101 +++ ...PMDs-to-support-disabling-IOVA-as-PA.patch | 39 + ...af_xdp-parse-NUMA-node-ID-from-sysfs.patch | 60 ++ 0459-net-af_xdp-support-CNI-Integration.patch | 809 ++++++++++++++++++ ...af_xdp-fix-socket-handler-validation.patch | 36 + ...nfiguration-without-IOVA-field-in-mb.patch | 48 ++ ...k-atomic-intrinsics-fetch-operations.patch | 45 + ...-net-af_xdp-fix-missing-UMEM-feature.patch | 90 ++ ...net-af_xdp-fix-Rx-and-Tx-queue-state.patch | 59 ++ ...oid-error-log-for-virtual-interfaces.patch | 42 + ...p-fix-memzone-leak-on-config-failure.patch | 45 + ...ix-leak-on-XSK-configuration-failure.patch | 51 ++ ...-fix-multi-interface-support-for-k8s.patch | 388 +++++++++ ...ort-AF_XDP-device-plugin-pinned-maps.patch | 246 ++++++ 0471-net-af_xdp-fix-port-ID-in-Rx-mbuf.patch | 62 ++ ...f_xdp-count-mbuf-allocation-failures.patch | 61 ++ 0473-net-af_xdp-fix-stats-reset.patch | 75 ++ ...af_xdp-remove-unused-local-statistic.patch | 43 + 0475-bpf-disable-on-32-bit-x86.patch | 56 ++ ...xdp-parse-UMEM-map-info-from-mempool.patch | 104 +++ ...er-interface-functions-to-its-own-fi.patch | 199 +++++ 0478-adapt-libbpf-0.8.0.patch | 26 + dpdk.spec | 99 ++- 41 files changed, 4608 insertions(+), 7 deletions(-) create mode 100644 0434-net-af_xdp-fix-build-with-Wunused-function.patch create mode 100644 0435-net-af_xdp-use-libxdp-if-available.patch create mode 100644 0436-net-af_xdp-make-UMEM-configure-more-readable.patch create mode 100644 0437-net-af_xdp-re-enable-secondary-process-support.patch create mode 100644 0438-ethdev-introduce-generic-dummy-packet-burst-function.patch create mode 100644 0439-net-af_xdp-allow-probing-when-multiprocess-is-disabl.patch create mode 100644 0440-net-af_xdp-add-missing-trailing-newline-in-logs.patch create mode 100644 0441-net-af_xdp-make-compatible-with-libbpf-0.7.0.patch create mode 100644 0442-net-af_xdp-ensure-socket-is-deleted-on-Rx-queue-setu.patch create mode 100644 0443-net-af_xdp-reserve-fill-queue-before-socket-create.patch create mode 100644 0444-net-af_xdp-fix-custom-program-loading-with-multiple-.patch create mode 100644 0445-net-af_xdp-fix-shared-UMEM-fill-queue-reserve.patch create mode 100644 0446-net-af_xdp-allow-using-copy-mode-in-XSK.patch create mode 100644 0450-net-af_xdp-move-XDP-library-presence-flag-setting.patch create mode 100644 0451-net-af_xdp-make-clear-which-libxdp-version-is-requir.patch create mode 100644 0452-net-af_xdp-avoid-version-based-check-for-shared-UMEM.patch create mode 100644 0453-net-af_xdp-avoid-version-based-check-for-program-loa.patch create mode 100644 0454-net-af_xdp-add-log-on-XDP-program-removal-failures.patch create mode 100644 0455-net-af_xdp-make-compatible-with-libbpf-0.8.0.patch create mode 100644 0456-drivers-mark-SW-PMDs-to-support-disabling-IOVA-as-PA.patch create mode 100644 0458-net-af_xdp-parse-NUMA-node-ID-from-sysfs.patch create mode 100644 0459-net-af_xdp-support-CNI-Integration.patch create mode 100644 0460-net-af_xdp-fix-socket-handler-validation.patch create mode 100644 0461-build-clarify-configuration-without-IOVA-field-in-mb.patch create mode 100644 0462-rework-atomic-intrinsics-fetch-operations.patch create mode 100644 0463-net-af_xdp-fix-missing-UMEM-feature.patch create mode 100644 0464-net-af_xdp-fix-Rx-and-Tx-queue-state.patch create mode 100644 0465-net-af_xdp-avoid-error-log-for-virtual-interfaces.patch create mode 100644 0466-net-af_xdp-fix-memzone-leak-on-config-failure.patch create mode 100644 0467-net-af_xdp-fix-leak-on-XSK-configuration-failure.patch create mode 100644 0469-net-af_xdp-fix-multi-interface-support-for-k8s.patch create mode 100644 0470-net-af_xdp-support-AF_XDP-device-plugin-pinned-maps.patch create mode 100644 0471-net-af_xdp-fix-port-ID-in-Rx-mbuf.patch create mode 100644 0472-net-af_xdp-count-mbuf-allocation-failures.patch create mode 100644 0473-net-af_xdp-fix-stats-reset.patch create mode 100644 0474-net-af_xdp-remove-unused-local-statistic.patch create mode 100644 0475-bpf-disable-on-32-bit-x86.patch create mode 100644 0476-net-af_xdp-parse-UMEM-map-info-from-mempool.patch create mode 100644 0477-ethdev-move-driver-interface-functions-to-its-own-fi.patch create mode 100644 0478-adapt-libbpf-0.8.0.patch diff --git a/0434-net-af_xdp-fix-build-with-Wunused-function.patch b/0434-net-af_xdp-fix-build-with-Wunused-function.patch new file mode 100644 index 0000000..fc15bbd --- /dev/null +++ b/0434-net-af_xdp-fix-build-with-Wunused-function.patch @@ -0,0 +1,163 @@ +From af8d89a0c6d5cfd4d77ec1ec224e5a72996026df Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Thu, 9 Dec 2021 17:19:47 +0000 +Subject: [PATCH] net/af_xdp: fix build with -Wunused-function + +[ upstream commit af8d89a0c6d5cfd4d77ec1ec224e5a72996026df ] + +The get_shared_umem function is only called when the kernel +flag XDP_UMEM_UNALIGNED_CHUNK_FLAG is defined. Move the +function implementation and associated helper so that it only +gets compiled when that flag is set. + +Fixes: 74b46340e2d4 ("net/af_xdp: support shared UMEM") +Cc: stable@dpdk.org + +Signed-off-by: Ciara Loftus +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 121 ++++++++++++++-------------- + 1 file changed, 60 insertions(+), 61 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 96c2c9d939..b3ed704b36 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -697,67 +697,6 @@ find_internal_resource(struct pmd_internals *port_int) + return list; + } + +-/* Check if the netdev,qid context already exists */ +-static inline bool +-ctx_exists(struct pkt_rx_queue *rxq, const char *ifname, +- struct pkt_rx_queue *list_rxq, const char *list_ifname) +-{ +- bool exists = false; +- +- if (rxq->xsk_queue_idx == list_rxq->xsk_queue_idx && +- !strncmp(ifname, list_ifname, IFNAMSIZ)) { +- AF_XDP_LOG(ERR, "ctx %s,%i already exists, cannot share umem\n", +- ifname, rxq->xsk_queue_idx); +- exists = true; +- } +- +- return exists; +-} +- +-/* Get a pointer to an existing UMEM which overlays the rxq's mb_pool */ +-static inline int +-get_shared_umem(struct pkt_rx_queue *rxq, const char *ifname, +- struct xsk_umem_info **umem) +-{ +- struct internal_list *list; +- struct pmd_internals *internals; +- int i = 0, ret = 0; +- struct rte_mempool *mb_pool = rxq->mb_pool; +- +- if (mb_pool == NULL) +- return ret; +- +- pthread_mutex_lock(&internal_list_lock); +- +- TAILQ_FOREACH(list, &internal_list, next) { +- internals = list->eth_dev->data->dev_private; +- for (i = 0; i < internals->queue_cnt; i++) { +- struct pkt_rx_queue *list_rxq = +- &internals->rx_queues[i]; +- if (rxq == list_rxq) +- continue; +- if (mb_pool == internals->rx_queues[i].mb_pool) { +- if (ctx_exists(rxq, ifname, list_rxq, +- internals->if_name)) { +- ret = -1; +- goto out; +- } +- if (__atomic_load_n( +- &internals->rx_queues[i].umem->refcnt, +- __ATOMIC_ACQUIRE)) { +- *umem = internals->rx_queues[i].umem; +- goto out; +- } +- } +- } +- } +- +-out: +- pthread_mutex_unlock(&internal_list_lock); +- +- return ret; +-} +- + static int + eth_dev_configure(struct rte_eth_dev *dev) + { +@@ -1013,6 +952,66 @@ static inline uintptr_t get_base_addr(struct rte_mempool *mp, uint64_t *align) + return aligned_addr; + } + ++/* Check if the netdev,qid context already exists */ ++static inline bool ++ctx_exists(struct pkt_rx_queue *rxq, const char *ifname, ++ struct pkt_rx_queue *list_rxq, const char *list_ifname) ++{ ++ bool exists = false; ++ ++ if (rxq->xsk_queue_idx == list_rxq->xsk_queue_idx && ++ !strncmp(ifname, list_ifname, IFNAMSIZ)) { ++ AF_XDP_LOG(ERR, "ctx %s,%i already exists, cannot share umem\n", ++ ifname, rxq->xsk_queue_idx); ++ exists = true; ++ } ++ ++ return exists; ++} ++ ++/* Get a pointer to an existing UMEM which overlays the rxq's mb_pool */ ++static inline int ++get_shared_umem(struct pkt_rx_queue *rxq, const char *ifname, ++ struct xsk_umem_info **umem) ++{ ++ struct internal_list *list; ++ struct pmd_internals *internals; ++ int i = 0, ret = 0; ++ struct rte_mempool *mb_pool = rxq->mb_pool; ++ ++ if (mb_pool == NULL) ++ return ret; ++ ++ pthread_mutex_lock(&internal_list_lock); ++ ++ TAILQ_FOREACH(list, &internal_list, next) { ++ internals = list->eth_dev->data->dev_private; ++ for (i = 0; i < internals->queue_cnt; i++) { ++ struct pkt_rx_queue *list_rxq = ++ &internals->rx_queues[i]; ++ if (rxq == list_rxq) ++ continue; ++ if (mb_pool == internals->rx_queues[i].mb_pool) { ++ if (ctx_exists(rxq, ifname, list_rxq, ++ internals->if_name)) { ++ ret = -1; ++ goto out; ++ } ++ if (__atomic_load_n(&internals->rx_queues[i].umem->refcnt, ++ __ATOMIC_ACQUIRE)) { ++ *umem = internals->rx_queues[i].umem; ++ goto out; ++ } ++ } ++ } ++ } ++ ++out: ++ pthread_mutex_unlock(&internal_list_lock); ++ ++ return ret; ++} ++ + static struct + xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + struct pkt_rx_queue *rxq) +-- +2.33.0 + diff --git a/0435-net-af_xdp-use-libxdp-if-available.patch b/0435-net-af_xdp-use-libxdp-if-available.patch new file mode 100644 index 0000000..3db0e6d --- /dev/null +++ b/0435-net-af_xdp-use-libxdp-if-available.patch @@ -0,0 +1,154 @@ +From fa4dfda5fe9c3483944955986b60f4d536c4a8bc Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Fri, 28 Jan 2022 09:50:29 +0000 +Subject: [PATCH] net/af_xdp: use libxdp if available + +[ upstream commit fa4dfda5fe9c3483944955986b60f4d536c4a8bc ] + +AF_XDP support is deprecated in libbpf since v0.7.0 [1]. The libxdp library +now provides the functionality which once was in libbpf and which the +AF_XDP PMD relies on. This commit updates the AF_XDP meson build to use the +libxdp library if a version >= v1.2.2 is available. If it is not available, +only versions of libbpf prior to v0.7.0 are allowed, as they still contain +the required AF_XDP functionality. + +libbpf still remains a dependency even if libxdp is present, as we use +libbpf APIs for program loading. + +The minimum required kernel version for libxdp for use with AF_XDP is v5.3. +For the library to be fully-featured, a kernel v5.10 or newer is +recommended. The full compatibility information can be found in the libxdp +README. + +v1.2.2 of libxdp includes an important fix required for linking with DPDK +which is why this version or greater is required. Meson uses pkg-config to +verify the version of libxdp on the system, so it is necessary that the +library is discoverable using pkg-config in order for the PMD to use it. To +verify this, you can run: pkg-config --modversion libxdp + +[1] https://github.com/libbpf/libbpf/commit/277846bc6c15 + +Signed-off-by: Ciara Loftus +--- + doc/guides/nics/af_xdp.rst | 6 ++-- + drivers/net/af_xdp/compat.h | 6 +++- + drivers/net/af_xdp/meson.build | 39 +++++++++++++++++++++----- + drivers/net/af_xdp/rte_eth_af_xdp.c | 1 - + 4 files changed, 43 insertions(+), 13 deletions(-) + +diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst +index c9d0e1ad6c..db02ea1984 100644 +--- a/doc/guides/nics/af_xdp.rst ++++ b/doc/guides/nics/af_xdp.rst +@@ -43,9 +43,7 @@ Prerequisites + This is a Linux-specific PMD, thus the following prerequisites apply: + + * A Linux Kernel (version > v4.18) with XDP sockets configuration enabled; +-* libbpf (within kernel version > v5.1-rc4) with latest af_xdp support installed, +- User can install libbpf via `make install_lib` && `make install_headers` in +- /tools/lib/bpf; ++* Both libxdp >=v1.2.2 and libbpf libraries installed, or, libbpf <=v0.6.0 + * A Kernel bound interface to attach to; + * For need_wakeup feature, it requires kernel version later than v5.3-rc1; + * For PMD zero copy, it requires kernel version later than v5.4-rc1; +@@ -143,4 +141,4 @@ Limitations + NAPI context from a watchdog timer instead of from softirqs. More information + on this feature can be found at [1]. + +- [1] https://lwn.net/Articles/837010/ +\ No newline at end of file ++ [1] https://lwn.net/Articles/837010/ + +diff --git a/drivers/net/af_xdp/compat.h b/drivers/net/af_xdp/compat.h +index 3880dc7dd7..bf40c6572e 100644 +--- a/drivers/net/af_xdp/compat.h ++++ b/drivers/net/af_xdp/compat.h +@@ -2,12 +2,16 @@ + * Copyright(c) 2020 Intel Corporation. + */ + ++#ifdef RTE_NET_AF_XDP_LIBXDP ++#include ++#else + #include ++#endif + #include + #include + + #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE && \ +- defined(RTE_LIBRTE_AF_XDP_PMD_SHARED_UMEM) ++ defined(RTE_NET_AF_XDP_SHARED_UMEM) + #define ETH_AF_XDP_SHARED_UMEM 1 + #endif + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 3ed2b29784..93e895eab9 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -9,19 +9,44 @@ endif + + sources = files('rte_eth_af_xdp.c') + ++xdp_dep = dependency('libxdp', version : '>=1.2.2', required: false, method: 'pkg-config') + bpf_dep = dependency('libbpf', required: false, method: 'pkg-config') + if not bpf_dep.found() + bpf_dep = cc.find_library('bpf', required: false) + endif + +-if bpf_dep.found() and cc.has_header('bpf/xsk.h') and cc.has_header('linux/if_xdp.h') +- ext_deps += bpf_dep +- bpf_ver_dep = dependency('libbpf', version : '>=0.2.0', +- required: false, method: 'pkg-config') +- if bpf_ver_dep.found() +- dpdk_conf.set('RTE_LIBRTE_AF_XDP_PMD_SHARED_UMEM', 1) ++if cc.has_header('linux/if_xdp.h') ++ if xdp_dep.found() and cc.has_header('xdp/xsk.h') ++ if bpf_dep.found() and cc.has_header('bpf/bpf.h') ++ cflags += ['-DRTE_NET_AF_XDP_LIBXDP'] ++ cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] ++ ext_deps += xdp_dep ++ ext_deps += bpf_dep ++ else ++ build = false ++ reason = 'missing dependency, libbpf' ++ endif ++ elif bpf_dep.found() and cc.has_header('bpf/xsk.h') and cc.has_header('bpf/bpf.h') ++ # libxdp not found. Rely solely on libbpf for xsk functionality ++ # which is only available in versions <= v0.6.0. ++ bpf_ver_dep = dependency('libbpf', version : '<=0.6.0', ++ required: false, method: 'pkg-config') ++ if bpf_ver_dep.found() ++ ext_deps += bpf_dep ++ bpf_shumem_ver_dep = dependency('libbpf', version : '>=0.2.0', ++ required: false, method: 'pkg-config') ++ if bpf_shumem_ver_dep.found() ++ cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] ++ endif ++ else ++ build = false ++ reason = 'missing dependency, "libxdp" or "libbpf <= v0.6.0"' ++ endif ++ else ++ build = false ++ reason = 'missing dependency, "libxdp" and "libbpf"' + endif + else + build = false +- reason = 'missing dependency, "libbpf"' ++ reason = 'missing header, "linux/if_xdp.h"' + endif +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index b3ed704b36..1b6192fa44 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -16,7 +16,6 @@ + #include + #include "af_xdp_deps.h" + #include +-#include + + #include + #include +-- +2.33.0 + diff --git a/0436-net-af_xdp-make-UMEM-configure-more-readable.patch b/0436-net-af_xdp-make-UMEM-configure-more-readable.patch new file mode 100644 index 0000000..97d7fd8 --- /dev/null +++ b/0436-net-af_xdp-make-UMEM-configure-more-readable.patch @@ -0,0 +1,64 @@ +From e1543baea37db002238a30d120a58472fb6471a7 Mon Sep 17 00:00:00 2001 +From: Haiyue Wang +Date: Wed, 9 Feb 2022 20:43:58 +0800 +Subject: [PATCH] net/af_xdp: make UMEM configure more readable + +[ upstream commit e1543baea37db002238a30d120a58472fb6471a7 ] + +The below compile time defined style make the code not so readable, the +first function end block is after "#endif" segment. + + #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + + xdp_umem_configure() + { + + #else + xdp_umem_configure() + { + + #endif + 'shared code block' + } + +Signed-off-by: Haiyue Wang +Acked-by: Ciara Loftus +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 1b6192fa44..802f912cb7 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1078,6 +1078,12 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + __atomic_store_n(&umem->refcnt, 1, __ATOMIC_RELEASE); + } + ++ return umem; ++ ++err: ++ xdp_umem_destroy(umem); ++ return NULL; ++} + #else + static struct + xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, +@@ -1138,13 +1144,13 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + } + umem->mz = mz; + +-#endif + return umem; + + err: + xdp_umem_destroy(umem); + return NULL; + } ++#endif + + static int + load_custom_xdp_prog(const char *prog_path, int if_index, struct bpf_map **map) +-- +2.33.0 + diff --git a/0437-net-af_xdp-re-enable-secondary-process-support.patch b/0437-net-af_xdp-re-enable-secondary-process-support.patch new file mode 100644 index 0000000..0c06899 --- /dev/null +++ b/0437-net-af_xdp-re-enable-secondary-process-support.patch @@ -0,0 +1,399 @@ +From 9876cf8316b3ef31dea2381024cf92a5af945616 Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Wed, 9 Feb 2022 09:48:08 +0000 +Subject: [PATCH] net/af_xdp: re-enable secondary process support + +[ upstream commit 9876cf8316b3ef31dea2381024cf92a5af945616 ] + +Secondary process support had been disabled for the AF_XDP PMD because +there was no logic in place to share the AF_XDP socket file descriptors +between the processes. This commit introduces this logic using the IPC +APIs. + +Rx and Tx are disabled in the secondary process due to memory mapping of +the AF_XDP rings being assigned by the kernel in the primary process only. +However other operations including retrieval of stats are permitted. + +Signed-off-by: Ciara Loftus +Acked-by: Stephen Hemminger +--- + doc/guides/nics/af_xdp.rst | 9 ++ + doc/guides/nics/features/af_xdp.ini | 1 + + drivers/net/af_xdp/rte_eth_af_xdp.c | 215 +++++++++++++++++++++++-- + 3 files changed, 211 insertions(+), 15 deletions(-) + +diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst +index db02ea1984..3d8b70e3f8 100644 +--- a/doc/guides/nics/af_xdp.rst ++++ b/doc/guides/nics/af_xdp.rst +@@ -141,4 +141,13 @@ Limitations + NAPI context from a watchdog timer instead of from softirqs. More information + on this feature can be found at [1]. + ++- **Secondary Processes** ++ ++ Rx and Tx are not supported for secondary processes due to memory mapping of ++ the AF_XDP rings being assigned by the kernel in the primary process only. ++ However other operations including statistics retrieval are permitted. ++ The maximum number of queues permitted for PMDs operating in this model is 8 ++ as this is the maximum number of fds that can be sent through the IPC APIs as ++ defined by RTE_MP_MAX_FD_NUM. ++ + [1] https://lwn.net/Articles/837010/ +diff --git a/doc/guides/nics/features/af_xdp.ini b/doc/guides/nics/features/af_xdp.ini +index 54b738e616..8e7e075aaf 100644 +--- a/doc/guides/nics/features/af_xdp.ini ++++ b/doc/guides/nics/features/af_xdp.ini +@@ -9,4 +9,5 @@ Power mgmt address monitor = Y + MTU update = Y + Promiscuous mode = Y + Stats per queue = Y ++Multiprocess aware = Y + x86-64 = Y + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 802f912cb7..4a37c11960 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -80,6 +80,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE); + + #define ETH_AF_XDP_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN) + ++#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds" ++ ++static int afxdp_dev_count; ++ ++/* Message header to synchronize fds via IPC */ ++struct ipc_hdr { ++ char port_name[RTE_DEV_NAME_MAX_LEN]; ++ /* The file descriptors are in the dedicated part ++ * of the Unix message to be translated by the kernel. ++ */ ++}; ++ + struct xsk_umem_info { + struct xsk_umem *umem; + struct rte_ring *buf_ring; +@@ -147,6 +159,10 @@ struct pmd_internals { + struct pkt_tx_queue *tx_queues; + }; + ++struct pmd_process_private { ++ int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT]; ++}; ++ + #define ETH_AF_XDP_IFACE_ARG "iface" + #define ETH_AF_XDP_START_QUEUE_ARG "start_queue" + #define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" +@@ -795,11 +811,12 @@ static int + eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + { + struct pmd_internals *internals = dev->data->dev_private; ++ struct pmd_process_private *process_private = dev->process_private; + struct xdp_statistics xdp_stats; + struct pkt_rx_queue *rxq; + struct pkt_tx_queue *txq; + socklen_t optlen; +- int i, ret; ++ int i, ret, fd; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + optlen = sizeof(struct xdp_statistics); +@@ -815,8 +832,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + stats->ibytes += stats->q_ibytes[i]; + stats->imissed += rxq->stats.rx_dropped; + stats->oerrors += txq->stats.tx_dropped; +- ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP, +- XDP_STATISTICS, &xdp_stats, &optlen); ++ fd = process_private->rxq_xsk_fds[i]; ++ ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS, ++ &xdp_stats, &optlen) : -1; + if (ret != 0) { + AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n"); + return -1; +@@ -884,7 +902,7 @@ eth_dev_close(struct rte_eth_dev *dev) + int i; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return 0; ++ goto out; + + AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n", + rte_socket_id()); +@@ -927,6 +945,9 @@ eth_dev_close(struct rte_eth_dev *dev) + } + } + ++out: ++ rte_free(dev->process_private); ++ + return 0; + } + +@@ -1355,6 +1376,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, + struct rte_mempool *mb_pool) + { + struct pmd_internals *internals = dev->data->dev_private; ++ struct pmd_process_private *process_private = dev->process_private; + struct pkt_rx_queue *rxq; + int ret; + +@@ -1393,6 +1415,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, + rxq->fds[0].fd = xsk_socket__fd(rxq->xsk); + rxq->fds[0].events = POLLIN; + ++ process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd; ++ + dev->data->rx_queues[rx_queue_id] = rxq; + return 0; + +@@ -1694,6 +1718,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + { + const char *name = rte_vdev_device_name(dev); + const unsigned int numa_node = dev->device.numa_node; ++ struct pmd_process_private *process_private; + struct pmd_internals *internals; + struct rte_eth_dev *eth_dev; + int ret; +@@ -1759,9 +1784,17 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + if (ret) + goto err_free_tx; + ++ process_private = (struct pmd_process_private *) ++ rte_zmalloc_socket(name, sizeof(struct pmd_process_private), ++ RTE_CACHE_LINE_SIZE, numa_node); ++ if (process_private == NULL) { ++ AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n"); ++ goto err_free_tx; ++ } ++ + eth_dev = rte_eth_vdev_allocate(dev, 0); + if (eth_dev == NULL) +- goto err_free_tx; ++ goto err_free_pp; + + eth_dev->data->dev_private = internals; + eth_dev->data->dev_link = pmd_link; +@@ -1770,6 +1803,10 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + eth_dev->dev_ops = &ops; + eth_dev->rx_pkt_burst = eth_af_xdp_rx; + eth_dev->tx_pkt_burst = eth_af_xdp_tx; ++ eth_dev->process_private = process_private; ++ ++ for (i = 0; i < queue_cnt; i++) ++ process_private->rxq_xsk_fds[i] = -1; + + #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); +@@ -1777,6 +1814,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + + return eth_dev; + ++err_free_pp: ++ rte_free(process_private); + err_free_tx: + rte_free(internals->tx_queues); + err_free_rx: +@@ -1786,6 +1825,119 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + return NULL; + } + ++/* Secondary process requests rxq fds from primary. */ ++static int ++afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev) ++{ ++ struct pmd_process_private *process_private = dev->process_private; ++ struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0}; ++ struct rte_mp_msg request, *reply; ++ struct rte_mp_reply replies; ++ struct ipc_hdr *request_param = (struct ipc_hdr *)request.param; ++ int i, ret; ++ ++ /* Prepare the request */ ++ memset(&request, 0, sizeof(request)); ++ strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name)); ++ strlcpy(request_param->port_name, name, ++ sizeof(request_param->port_name)); ++ request.len_param = sizeof(*request_param); ++ ++ /* Send the request and receive the reply */ ++ AF_XDP_LOG(DEBUG, "Sending multi-process IPC request for %s\n", name); ++ ret = rte_mp_request_sync(&request, &replies, &timeout); ++ if (ret < 0 || replies.nb_received != 1) { ++ AF_XDP_LOG(ERR, "Failed to request fds from primary: %d", ++ rte_errno); ++ return -1; ++ } ++ reply = replies.msgs; ++ AF_XDP_LOG(DEBUG, "Received multi-process IPC reply for %s\n", name); ++ if (dev->data->nb_rx_queues != reply->num_fds) { ++ AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n", ++ reply->num_fds, dev->data->nb_rx_queues); ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < reply->num_fds; i++) ++ process_private->rxq_xsk_fds[i] = reply->fds[i]; ++ ++ free(reply); ++ return 0; ++} ++ ++/* Primary process sends rxq fds to secondary. */ ++static int ++afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer) ++{ ++ struct rte_eth_dev *dev; ++ struct pmd_process_private *process_private; ++ struct rte_mp_msg reply; ++ const struct ipc_hdr *request_param = ++ (const struct ipc_hdr *)request->param; ++ struct ipc_hdr *reply_param = ++ (struct ipc_hdr *)reply.param; ++ const char *request_name = request_param->port_name; ++ int i; ++ ++ AF_XDP_LOG(DEBUG, "Received multi-process IPC request for %s\n", ++ request_name); ++ ++ /* Find the requested port */ ++ dev = rte_eth_dev_get_by_name(request_name); ++ if (!dev) { ++ AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name); ++ return -1; ++ } ++ process_private = dev->process_private; ++ ++ /* Populate the reply with the xsk fd for each queue */ ++ reply.num_fds = 0; ++ if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) { ++ AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)\n", ++ dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM); ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < dev->data->nb_rx_queues; i++) ++ reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i]; ++ ++ /* Send the reply */ ++ strlcpy(reply.name, request->name, sizeof(reply.name)); ++ strlcpy(reply_param->port_name, request_name, ++ sizeof(reply_param->port_name)); ++ reply.len_param = sizeof(*reply_param); ++ AF_XDP_LOG(DEBUG, "Sending multi-process IPC reply for %s\n", ++ reply_param->port_name); ++ if (rte_mp_reply(&reply, peer) < 0) { ++ AF_XDP_LOG(ERR, "Failed to reply to multi-process IPC request\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++/* Secondary process rx function. RX is disabled because memory mapping of the ++ * rings being assigned by the kernel in the primary process only. ++ */ ++static uint16_t ++eth_af_xdp_rx_noop(void *queue __rte_unused, ++ struct rte_mbuf **bufs __rte_unused, ++ uint16_t nb_pkts __rte_unused) ++{ ++ return 0; ++} ++ ++/* Secondary process tx function. TX is disabled because memory mapping of the ++ * rings being assigned by the kernel in the primary process only. ++ */ ++static uint16_t ++eth_af_xdp_tx_noop(void *queue __rte_unused, ++ struct rte_mbuf **bufs __rte_unused, ++ uint16_t nb_pkts __rte_unused) ++{ ++ return 0; ++} ++ + static int + rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + { +@@ -1795,19 +1947,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT; + int shared_umem = 0; + char prog_path[PATH_MAX] = {'\0'}; +- int busy_budget = -1; ++ int busy_budget = -1, ret; + struct rte_eth_dev *eth_dev = NULL; +- const char *name; ++ const char *name = rte_vdev_device_name(dev); + +- AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", +- rte_vdev_device_name(dev)); ++ AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name); + +- name = rte_vdev_device_name(dev); + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { +- AF_XDP_LOG(ERR, "Failed to probe %s. " +- "AF_XDP PMD does not support secondary processes.\n", +- name); +- return -ENOTSUP; ++ eth_dev = rte_eth_dev_attach_secondary(name); ++ if (eth_dev == NULL) { ++ AF_XDP_LOG(ERR, "Failed to probe %s\n", name); ++ return -EINVAL; ++ } ++ eth_dev->dev_ops = &ops; ++ eth_dev->device = &dev->device; ++ eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop; ++ eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop; ++ eth_dev->process_private = (struct pmd_process_private *) ++ rte_zmalloc_socket(name, ++ sizeof(struct pmd_process_private), ++ RTE_CACHE_LINE_SIZE, ++ eth_dev->device->numa_node); ++ if (eth_dev->process_private == NULL) { ++ AF_XDP_LOG(ERR, ++ "Failed to alloc memory for process private\n"); ++ return -ENOMEM; ++ } ++ ++ /* Obtain the xsk fds from the primary process. */ ++ if (afxdp_mp_request_fds(name, eth_dev)) ++ return -1; ++ ++ rte_eth_dev_probing_finish(eth_dev); ++ return 0; + } + + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); +@@ -1842,6 +2014,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + return -1; + } + ++ /* Register IPC callback which shares xsk fds from primary to secondary */ ++ if (!afxdp_dev_count) { ++ ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds); ++ if (ret < 0) { ++ AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s", ++ name, strerror(rte_errno)); ++ return -1; ++ } ++ } ++ afxdp_dev_count++; ++ + rte_eth_dev_probing_finish(eth_dev); + + return 0; +@@ -1864,9 +2047,11 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev) + return 0; + + eth_dev_close(eth_dev); ++ if (afxdp_dev_count == 1) ++ rte_mp_action_unregister(ETH_AF_XDP_MP_KEY); ++ afxdp_dev_count--; + rte_eth_dev_release_port(eth_dev); + +- + return 0; + } + +-- +2.33.0 + diff --git a/0438-ethdev-introduce-generic-dummy-packet-burst-function.patch b/0438-ethdev-introduce-generic-dummy-packet-burst-function.patch new file mode 100644 index 0000000..9ae9937 --- /dev/null +++ b/0438-ethdev-introduce-generic-dummy-packet-burst-function.patch @@ -0,0 +1,73 @@ +From a41f593f1bce27cd94eae0e85a8085c592b14b30 Mon Sep 17 00:00:00 2001 +From: Ferruh Yigit +Date: Fri, 11 Feb 2022 19:11:42 +0000 +Subject: [PATCH] ethdev: introduce generic dummy packet burst function + +[ upstream commit a41f593f1bce27cd94eae0e85a8085c592b14b30 ] + +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Multiple PMDs have dummy/noop Rx/Tx packet burst functions. + +These dummy functions are very simple, introduce a common function in +the ethdev and update drivers to use it instead of each driver having +its own functions. + +Signed-off-by: Ferruh Yigit +Acked-by: Morten Brørup +Acked-by: Viacheslav Ovsiienko +Acked-by: Thomas Monjalon +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 26 ++------------- + 1 files changed, 73 insertions(+), 325 deletions(-) + create mode 100644 lib/ethdev/ethdev_driver.c + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 4a37c11960..6ac710c6bd 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1916,28 +1916,6 @@ afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer) + return 0; + } + +-/* Secondary process rx function. RX is disabled because memory mapping of the +- * rings being assigned by the kernel in the primary process only. +- */ +-static uint16_t +-eth_af_xdp_rx_noop(void *queue __rte_unused, +- struct rte_mbuf **bufs __rte_unused, +- uint16_t nb_pkts __rte_unused) +-{ +- return 0; +-} +- +-/* Secondary process tx function. TX is disabled because memory mapping of the +- * rings being assigned by the kernel in the primary process only. +- */ +-static uint16_t +-eth_af_xdp_tx_noop(void *queue __rte_unused, +- struct rte_mbuf **bufs __rte_unused, +- uint16_t nb_pkts __rte_unused) +-{ +- return 0; +-} +- + static int + rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + { +@@ -1961,8 +1939,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + } + eth_dev->dev_ops = &ops; + eth_dev->device = &dev->device; +- eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop; +- eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop; ++ eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; ++ eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; + eth_dev->process_private = (struct pmd_process_private *) + rte_zmalloc_socket(name, + sizeof(struct pmd_process_private), +-- +2.33.0 + diff --git a/0439-net-af_xdp-allow-probing-when-multiprocess-is-disabl.patch b/0439-net-af_xdp-allow-probing-when-multiprocess-is-disabl.patch new file mode 100644 index 0000000..6ccf858 --- /dev/null +++ b/0439-net-af_xdp-allow-probing-when-multiprocess-is-disabl.patch @@ -0,0 +1,37 @@ +From 17ec9678d4647f0e90b04dc2a091e36451d16ad4 Mon Sep 17 00:00:00 2001 +From: Junxiao Shi +Date: Thu, 17 Feb 2022 11:09:06 +0000 +Subject: [PATCH] net/af_xdp: allow probing when multiprocess is disabled + +[ upstream commit 17ec9678d4647f0e90b04dc2a091e36451d16ad4 ] + +If EAL multiprocess feature has been disabled via rte_mp_disable() +function, AF_XDP driver may not be able to register its IPC callback. +Previously this leads to probe failure. +This commit adds a check for this condition so that AF_XDP can still be +used even if multiprocess is disabled. + +Fixes: 9876cf8316b3 ("net/af_xdp: re-enable secondary process support") + +Signed-off-by: Junxiao Shi +Acked-by: Ciara Loftus +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 6ac710c6bd..2163df7c5c 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1995,7 +1995,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + /* Register IPC callback which shares xsk fds from primary to secondary */ + if (!afxdp_dev_count) { + ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds); +- if (ret < 0) { ++ if (ret < 0 && rte_errno != ENOTSUP) { + AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s", + name, strerror(rte_errno)); + return -1; +-- +2.33.0 + diff --git a/0440-net-af_xdp-add-missing-trailing-newline-in-logs.patch b/0440-net-af_xdp-add-missing-trailing-newline-in-logs.patch new file mode 100644 index 0000000..41fe067 --- /dev/null +++ b/0440-net-af_xdp-add-missing-trailing-newline-in-logs.patch @@ -0,0 +1,87 @@ +From 744fd4126937b7f195fac38ae24cab52d0922b04 Mon Sep 17 00:00:00 2001 +From: David Marchand +Date: Thu, 17 Feb 2022 14:06:14 +0100 +Subject: [PATCH] net/af_xdp: add missing trailing newline in logs + +[ upstream commit 744fd4126937b7f195fac38ae24cab52d0922b04 ] + +Caught while trying --in-memory mode, some log messages in this driver +are not terminated with a newline: +rte_pmd_af_xdp_probe(): net_af_xdp: Failed to register multi-process IPC +callback: Operation not supportedvdev_probe(): failed to initialize +net_af_xdp device + +Other locations in this driver had the same issue, fix all at once. + +Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") +Fixes: d8a210774e1d ("net/af_xdp: support unaligned umem chunks") +Fixes: 9876cf8316b3 ("net/af_xdp: re-enable secondary process support") +Cc: stable@dpdk.org + +Signed-off-by: David Marchand +Acked-by: Ciara Loftus +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 2163df7c5c..69dfa1b898 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1071,7 +1071,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, + rte_socket_id()); + if (umem == NULL) { +- AF_XDP_LOG(ERR, "Failed to allocate umem info"); ++ AF_XDP_LOG(ERR, "Failed to allocate umem info\n"); + return NULL; + } + +@@ -1084,7 +1084,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + ret = xsk_umem__create(&umem->umem, base_addr, umem_size, + &rxq->fq, &rxq->cq, &usr_config); + if (ret) { +- AF_XDP_LOG(ERR, "Failed to create umem"); ++ AF_XDP_LOG(ERR, "Failed to create umem\n"); + goto err; + } + umem->buffer = base_addr; +@@ -1124,7 +1124,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + + umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id()); + if (umem == NULL) { +- AF_XDP_LOG(ERR, "Failed to allocate umem info"); ++ AF_XDP_LOG(ERR, "Failed to allocate umem info\n"); + return NULL; + } + +@@ -1160,7 +1160,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + &usr_config); + + if (ret) { +- AF_XDP_LOG(ERR, "Failed to create umem"); ++ AF_XDP_LOG(ERR, "Failed to create umem\n"); + goto err; + } + umem->mz = mz; +@@ -1847,7 +1847,7 @@ afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev) + AF_XDP_LOG(DEBUG, "Sending multi-process IPC request for %s\n", name); + ret = rte_mp_request_sync(&request, &replies, &timeout); + if (ret < 0 || replies.nb_received != 1) { +- AF_XDP_LOG(ERR, "Failed to request fds from primary: %d", ++ AF_XDP_LOG(ERR, "Failed to request fds from primary: %d\n", + rte_errno); + return -1; + } +@@ -1996,7 +1996,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + if (!afxdp_dev_count) { + ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds); + if (ret < 0 && rte_errno != ENOTSUP) { +- AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s", ++ AF_XDP_LOG(ERR, "%s: Failed to register multi-process IPC callback: %s\n", + name, strerror(rte_errno)); + return -1; + } +-- +2.33.0 + diff --git a/0441-net-af_xdp-make-compatible-with-libbpf-0.7.0.patch b/0441-net-af_xdp-make-compatible-with-libbpf-0.7.0.patch new file mode 100644 index 0000000..3f5b42a --- /dev/null +++ b/0441-net-af_xdp-make-compatible-with-libbpf-0.7.0.patch @@ -0,0 +1,121 @@ +From 8d3d9c72513ac116996a05700f18c10b332e7699 Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Thu, 17 Feb 2022 14:45:24 +0000 +Subject: [PATCH] net/af_xdp: make compatible with libbpf >= 0.7.0 + +[ upstream commit 8d3d9c72513ac116996a05700f18c10b332e7699 ] + +libbpf v0.7.0 deprecates the bpf_prog_load function. Use meson to detect +if libbpf >= v0.7.0 is linked and if so, use the recommended replacement +functions bpf_object__open_file and bpf_object__load. + +Signed-off-by: Ciara Loftus +--- + drivers/net/af_xdp/compat.h | 39 +++++++++++++++++++++++++++++ + drivers/net/af_xdp/meson.build | 5 ++++ + drivers/net/af_xdp/rte_eth_af_xdp.c | 9 +++---- + 3 files changed, 48 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/af_xdp/compat.h b/drivers/net/af_xdp/compat.h +index bf40c6572e..28ea64aeaa 100644 +--- a/drivers/net/af_xdp/compat.h ++++ b/drivers/net/af_xdp/compat.h +@@ -7,6 +7,7 @@ + #else + #include + #endif ++#include + #include + #include + +@@ -58,3 +59,41 @@ tx_syscall_needed(struct xsk_ring_prod *q __rte_unused) + return 1; + } + #endif ++ ++#ifdef RTE_NET_AF_XDP_LIBBPF_OBJ_OPEN ++static int load_program(const char *prog_path, struct bpf_object **obj) ++{ ++ struct bpf_program *prog; ++ int err; ++ ++ *obj = bpf_object__open_file(prog_path, NULL); ++ err = libbpf_get_error(*obj); ++ if (err) ++ return -1; ++ ++ err = bpf_object__load(*obj); ++ if (err) ++ goto out; ++ ++ prog = bpf_object__next_program(*obj, NULL); ++ if (!prog) ++ goto out; ++ ++ return bpf_program__fd(prog); ++ ++out: ++ bpf_object__close(*obj); ++ return -1; ++} ++#else ++static int load_program(const char *prog_path, struct bpf_object **obj) ++{ ++ int ret, prog_fd; ++ ++ ret = bpf_prog_load(prog_path, BPF_PROG_TYPE_XDP, obj, &prog_fd); ++ if (ret) ++ return -1; ++ ++ return prog_fd; ++} ++#endif +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 93e895eab9..1e0de23705 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -22,6 +22,11 @@ if cc.has_header('linux/if_xdp.h') + cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] + ext_deps += xdp_dep + ext_deps += bpf_dep ++ bpf_ver_dep = dependency('libbpf', version : '>=0.7.0', ++ required: false, method: 'pkg-config') ++ if bpf_ver_dep.found() ++ cflags += ['-DRTE_NET_AF_XDP_LIBBPF_OBJ_OPEN'] ++ endif + else + build = false + reason = 'missing dependency, libbpf' +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 69dfa1b898..2477f31c7b 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -15,7 +15,6 @@ + #include + #include + #include "af_xdp_deps.h" +-#include + + #include + #include +@@ -1176,13 +1175,13 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + static int + load_custom_xdp_prog(const char *prog_path, int if_index, struct bpf_map **map) + { +- int ret, prog_fd = -1; ++ int ret, prog_fd; + struct bpf_object *obj; + +- ret = bpf_prog_load(prog_path, BPF_PROG_TYPE_XDP, &obj, &prog_fd); +- if (ret) { ++ prog_fd = load_program(prog_path, &obj); ++ if (prog_fd < 0) { + AF_XDP_LOG(ERR, "Failed to load program %s\n", prog_path); +- return ret; ++ return -1; + } + + /* +-- +2.33.0 + diff --git a/0442-net-af_xdp-ensure-socket-is-deleted-on-Rx-queue-setu.patch b/0442-net-af_xdp-ensure-socket-is-deleted-on-Rx-queue-setu.patch new file mode 100644 index 0000000..d5e24a5 --- /dev/null +++ b/0442-net-af_xdp-ensure-socket-is-deleted-on-Rx-queue-setu.patch @@ -0,0 +1,93 @@ +From b26431a617e4039e6c0f65c5ee56f62f347b686b Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Fri, 18 Feb 2022 11:20:36 +0000 +Subject: [PATCH] net/af_xdp: ensure socket is deleted on Rx queue setup error + +[ upstream commit b26431a617e4039e6c0f65c5ee56f62f347b686b ] + +The Rx queue setup can fail for many reasons eg. failure to setup the +custom program, failure to allocate or reserve fill queue buffers, +failure to configure busy polling etc. When a failure like one of these +occurs, if the xsk is already set up it should be deleted before +returning. This commit ensures this happens. + +Fixes: d8a210774e1d ("net/af_xdp: support unaligned umem chunks") +Fixes: 288a85aef192 ("net/af_xdp: enable custom XDP program loading") +Fixes: 055a393626ed ("net/af_xdp: prefer busy polling") +Fixes: 01fa83c94d7e ("net/af_xdp: workaround custom program loading") +Cc: stable@dpdk.org + +Signed-off-by: Ciara Loftus +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 2477f31c7b..7d5e2887b8 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1301,7 +1301,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + if (ret) { + AF_XDP_LOG(ERR, "Failed to load custom XDP program %s\n", + internals->prog_path); +- goto err; ++ goto out_umem; + } + internals->custom_prog_configured = 1; + cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; +@@ -1318,7 +1318,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + + if (ret) { + AF_XDP_LOG(ERR, "Failed to create xsk socket.\n"); +- goto err; ++ goto out_umem; + } + + /* insert the xsk into the xsks_map */ +@@ -1330,7 +1330,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + &rxq->xsk_queue_idx, &fd, 0); + if (err) { + AF_XDP_LOG(ERR, "Failed to insert xsk in map.\n"); +- goto err; ++ goto out_xsk; + } + } + +@@ -1338,7 +1338,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + ret = rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size); + if (ret) { + AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n"); +- goto err; ++ goto out_xsk; + } + #endif + +@@ -1346,20 +1346,21 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + ret = configure_preferred_busy_poll(rxq); + if (ret) { + AF_XDP_LOG(ERR, "Failed configure busy polling.\n"); +- goto err; ++ goto out_xsk; + } + } + + ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); + if (ret) { +- xsk_socket__delete(rxq->xsk); + AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); +- goto err; ++ goto out_xsk; + } + + return 0; + +-err: ++out_xsk: ++ xsk_socket__delete(rxq->xsk); ++out_umem: + if (__atomic_sub_fetch(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) == 0) + xdp_umem_destroy(rxq->umem); + +-- +2.33.0 + diff --git a/0443-net-af_xdp-reserve-fill-queue-before-socket-create.patch b/0443-net-af_xdp-reserve-fill-queue-before-socket-create.patch new file mode 100644 index 0000000..14fb920 --- /dev/null +++ b/0443-net-af_xdp-reserve-fill-queue-before-socket-create.patch @@ -0,0 +1,79 @@ +From 81fe6720f84fde2a9fe65f688d7895ca348f0738 Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Fri, 18 Feb 2022 11:20:37 +0000 +Subject: [PATCH] net/af_xdp: reserve fill queue before socket create + +[ upstream commit 81fe6720f84fde2a9fe65f688d7895ca348f0738 ] + +Some zero copy AF_XDP drivers eg. ice require that there are addresses +already in the fill queue before the socket is created. Otherwise you may +see log messages such as: + +XSK buffer pool does not provide enough addresses to fill 2047 buffers on +Rx ring 0 + +This commit ensures that the addresses are available before creating the +socket, instead of after. + +Signed-off-by: Ciara Loftus +Tested-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 7d5e2887b8..65479138d3 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1283,6 +1283,20 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + return -ENOMEM; + txq->umem = rxq->umem; + ++#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) ++ ret = rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size); ++ if (ret) { ++ AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n"); ++ goto out_umem; ++ } ++#endif ++ ++ ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); ++ if (ret) { ++ AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); ++ goto out_umem; ++ } ++ + cfg.rx_size = ring_size; + cfg.tx_size = ring_size; + cfg.libbpf_flags = 0; +@@ -1334,14 +1348,6 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + } + } + +-#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) +- ret = rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size); +- if (ret) { +- AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n"); +- goto out_xsk; +- } +-#endif +- + if (rxq->busy_budget) { + ret = configure_preferred_busy_poll(rxq); + if (ret) { +@@ -1350,12 +1356,6 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + } + } + +- ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); +- if (ret) { +- AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); +- goto out_xsk; +- } +- + return 0; + + out_xsk: +-- +2.33.0 + diff --git a/0444-net-af_xdp-fix-custom-program-loading-with-multiple-.patch b/0444-net-af_xdp-fix-custom-program-loading-with-multiple-.patch new file mode 100644 index 0000000..de1b773 --- /dev/null +++ b/0444-net-af_xdp-fix-custom-program-loading-with-multiple-.patch @@ -0,0 +1,62 @@ +From 6f6134c35e3d2340e07f86414c75413e3ac7a0bc Mon Sep 17 00:00:00 2001 +From: Junxiao Shi +Date: Wed, 9 Mar 2022 21:18:43 +0000 +Subject: [PATCH] net/af_xdp: fix custom program loading with multiple queues + +[ upstream commit 6f6134c35e3d2340e07f86414c75413e3ac7a0bc ] + +When the PMD is configured to load a custom XDP program, it sets +XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD flag to prevent libbpf from +loading its default XDP program. However, when queue_count is set to +greater than 1, this flag is only set for the first XSK socket but not +for subsequent XSK sockets. This causes XSK socket creation failure. + +This commit ensures that XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD flag is +set for all XSK socket creations when custom XDP program is being used. + +Fixes: 01fa83c94d7e ("net/af_xdp: workaround custom program loading") +Cc: stable@dpdk.org + +Signed-off-by: Junxiao Shi +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 65479138d3..9920f49870 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1307,18 +1307,19 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + cfg.bind_flags |= XDP_USE_NEED_WAKEUP; + #endif + +- if (strnlen(internals->prog_path, PATH_MAX) && +- !internals->custom_prog_configured) { +- ret = load_custom_xdp_prog(internals->prog_path, +- internals->if_index, +- &internals->map); +- if (ret) { +- AF_XDP_LOG(ERR, "Failed to load custom XDP program %s\n", +- internals->prog_path); +- goto out_umem; ++ if (strnlen(internals->prog_path, PATH_MAX)) { ++ if (!internals->custom_prog_configured) { ++ ret = load_custom_xdp_prog(internals->prog_path, ++ internals->if_index, ++ &internals->map); ++ if (ret) { ++ AF_XDP_LOG(ERR, "Failed to load custom XDP program %s\n", ++ internals->prog_path); ++ goto out_umem; ++ } ++ internals->custom_prog_configured = 1; + } +- internals->custom_prog_configured = 1; +- cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; ++ cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; + } + + if (internals->shared_umem) +-- +2.33.0 + diff --git a/0445-net-af_xdp-fix-shared-UMEM-fill-queue-reserve.patch b/0445-net-af_xdp-fix-shared-UMEM-fill-queue-reserve.patch new file mode 100644 index 0000000..09f58ea --- /dev/null +++ b/0445-net-af_xdp-fix-shared-UMEM-fill-queue-reserve.patch @@ -0,0 +1,86 @@ +From 6dd3286f3a62be287899551b9c422e579af95b0b Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Fri, 11 Mar 2022 13:45:13 +0000 +Subject: [PATCH] net/af_xdp: fix shared UMEM fill queue reserve + +[ upstream commit 6dd3286f3a62be287899551b9c422e579af95b0b ] + +Commit 81fe6720f84f ("net/af_xdp: reserve fill queue before socket create") +moves the fill queue reserve logic to before the creation of the socket in +order to suppress kernel logs like: + +XSK buffer pool does not provide enough addresses to fill 2047 buffers on +Rx ring 0 + +However, for queues that share umem, the fill queue reserve must occur +after the socket creation, because the fill queue is not valid until +that point. + +This commit uses the umem refcnt value to determine whether the queue is +sharing a umem, and performs the fill queue reservation either before or +after the socket creation, depending on the refcnt value. + +The kernel logs will still be seen for the shared umem queues. + +Fixes: 81fe6720f84f ("net/af_xdp: reserve fill queue before socket create") + +Signed-off-by: Ciara Loftus +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 9920f49870..1e37da6e84 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1277,11 +1277,13 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + int ret = 0; + int reserve_size = ETH_AF_XDP_DFLT_NUM_DESCS; + struct rte_mbuf *fq_bufs[reserve_size]; ++ bool reserve_before; + + rxq->umem = xdp_umem_configure(internals, rxq); + if (rxq->umem == NULL) + return -ENOMEM; + txq->umem = rxq->umem; ++ reserve_before = __atomic_load_n(&rxq->umem->refcnt, __ATOMIC_ACQUIRE) <= 1; + + #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + ret = rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size); +@@ -1291,10 +1293,13 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + } + #endif + +- ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); +- if (ret) { +- AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); +- goto out_umem; ++ /* reserve fill queue of queues not (yet) sharing UMEM */ ++ if (reserve_before) { ++ ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); ++ if (ret) { ++ AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); ++ goto out_umem; ++ } + } + + cfg.rx_size = ring_size; +@@ -1336,6 +1341,15 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + goto out_umem; + } + ++ if (!reserve_before) { ++ /* reserve fill queue of queues sharing UMEM */ ++ ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); ++ if (ret) { ++ AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); ++ goto out_xsk; ++ } ++ } ++ + /* insert the xsk into the xsks_map */ + if (internals->custom_prog_configured) { + int err, fd; +-- +2.33.0 + diff --git a/0446-net-af_xdp-allow-using-copy-mode-in-XSK.patch b/0446-net-af_xdp-allow-using-copy-mode-in-XSK.patch new file mode 100644 index 0000000..1595dc3 --- /dev/null +++ b/0446-net-af_xdp-allow-using-copy-mode-in-XSK.patch @@ -0,0 +1,148 @@ +From b275e298c11aad100cf7966cdcc6d6cb5d36549a Mon Sep 17 00:00:00 2001 +From: Xiaoyun Li +Date: Tue, 14 Jun 2022 17:10:13 +0800 +Subject: [PATCH] net/af_xdp: allow using copy mode in XSK + +[ upstream commit b275e298c11aad100cf7966cdcc6d6cb5d36549a ] + + +DPDK assumes that users only want AF_XDP socket (XSK) into zero copy +mode when the kernel supports it. However, sometimes kernel driver +doesn't support it well and copy mode is more stable and preferred. + +This patch allows using devarg "-a xx:xx.x,force_copy=1" to force the +AF_XDP socket into copy mode. + +Signed-off-by: Xiaoyun Li +Reviewed-by: Andrew Rybchenko +--- + doc/guides/nics/af_xdp.rst | 2 ++ + drivers/net/af_xdp/rte_eth_af_xdp.c | 25 ++++++++++++++++++++----- + 2 files changed, 22 insertions(+), 5 deletions(-) + +diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst +index 56681c8365..d42e0f1f79 100644 +--- a/doc/guides/nics/af_xdp.rst ++++ b/doc/guides/nics/af_xdp.rst +@@ -36,6 +36,8 @@ The following options can be provided to set up an af_xdp port in DPDK. + default 0); + * ``xdp_prog`` - path to custom xdp program (optional, default none); + * ``busy_budget`` - busy polling budget (optional, default 64); ++* ``force_copy`` - PMD will force AF_XDP socket into copy mode (optional, ++ default 0); + + Prerequisites + ------------- +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 1e37da6e84..fce649c2a1 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -150,6 +150,7 @@ struct pmd_internals { + bool shared_umem; + char prog_path[PATH_MAX]; + bool custom_prog_configured; ++ bool force_copy; + struct bpf_map *map; + + struct rte_ether_addr eth_addr; +@@ -168,6 +169,7 @@ struct pmd_process_private { + #define ETH_AF_XDP_SHARED_UMEM_ARG "shared_umem" + #define ETH_AF_XDP_PROG_ARG "xdp_prog" + #define ETH_AF_XDP_BUDGET_ARG "busy_budget" ++#define ETH_AF_XDP_FORCE_COPY_ARG "force_copy" + + static const char * const valid_arguments[] = { + ETH_AF_XDP_IFACE_ARG, +@@ -176,6 +178,7 @@ static const char * const valid_arguments[] = { + ETH_AF_XDP_SHARED_UMEM_ARG, + ETH_AF_XDP_PROG_ARG, + ETH_AF_XDP_BUDGET_ARG, ++ ETH_AF_XDP_FORCE_COPY_ARG, + NULL + }; + +@@ -1308,6 +1311,10 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + cfg.bind_flags = 0; + ++ /* Force AF_XDP socket into copy mode when users want it */ ++ if (internals->force_copy) ++ cfg.bind_flags |= XDP_COPY; ++ + #if defined(XDP_USE_NEED_WAKEUP) + cfg.bind_flags |= XDP_USE_NEED_WAKEUP; + #endif +@@ -1655,7 +1662,7 @@ xdp_get_channels_info(const char *if_name, int *max_queues, + static int + parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + int *queue_cnt, int *shared_umem, char *prog_path, +- int *busy_budget) ++ int *busy_budget, int *force_copy) + { + int ret; + +@@ -1691,6 +1698,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + if (ret < 0) + goto free_kvlist; + ++ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_FORCE_COPY_ARG, ++ &parse_integer_arg, force_copy); ++ if (ret < 0) ++ goto free_kvlist; ++ + free_kvlist: + rte_kvargs_free(kvlist); + return ret; +@@ -1729,7 +1741,7 @@ get_iface_info(const char *if_name, + static struct rte_eth_dev * + init_internals(struct rte_vdev_device *dev, const char *if_name, + int start_queue_idx, int queue_cnt, int shared_umem, +- const char *prog_path, int busy_budget) ++ const char *prog_path, int busy_budget, int force_copy) + { + const char *name = rte_vdev_device_name(dev); + const unsigned int numa_node = dev->device.numa_node; +@@ -1757,6 +1769,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + } + #endif + internals->shared_umem = shared_umem; ++ internals->force_copy = force_copy; + + if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, + &internals->combined_queue_cnt)) { +@@ -1941,6 +1954,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + int shared_umem = 0; + char prog_path[PATH_MAX] = {'\0'}; + int busy_budget = -1, ret; ++ int force_copy = 0; + struct rte_eth_dev *eth_dev = NULL; + const char *name = rte_vdev_device_name(dev); + +@@ -1986,7 +2000,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt, &shared_umem, prog_path, +- &busy_budget) < 0) { ++ &busy_budget, &force_copy) < 0) { + AF_XDP_LOG(ERR, "Invalid kvargs value\n"); + return -EINVAL; + } +@@ -2001,7 +2015,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, + xsk_queue_cnt, shared_umem, prog_path, +- busy_budget); ++ busy_budget, force_copy); + if (eth_dev == NULL) { + AF_XDP_LOG(ERR, "Failed to init internals\n"); + return -1; +@@ -2060,4 +2074,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, + "queue_count= " + "shared_umem= " + "xdp_prog= " +- "busy_budget="); ++ "busy_budget= " ++ "force_copy= "); +-- +2.33.0 + diff --git a/0450-net-af_xdp-move-XDP-library-presence-flag-setting.patch b/0450-net-af_xdp-move-XDP-library-presence-flag-setting.patch new file mode 100644 index 0000000..8e5480d --- /dev/null +++ b/0450-net-af_xdp-move-XDP-library-presence-flag-setting.patch @@ -0,0 +1,44 @@ +From 50b855fc47f07e98895703fcb324266d1454be3e Mon Sep 17 00:00:00 2001 +From: Andrew Rybchenko +Date: Thu, 6 Oct 2022 09:26:49 +0300 +Subject: [PATCH] net/af_xdp: move XDP library presence flag setting + +[ upstream commit 50b855fc47f07e98895703fcb324266d1454be3e ] + + +RTE_NET_AF_XDP_LIBXDP is a conditional to include xdp/xsk.h and should +be set as soon as we know that the header is present. +RTE_NET_AF_XDP_SHARED_UMEM is one of conditions to use +xsk_socket__create_shared(). +Both do not depend on libbpf and bpf/bpf.h presence. + +Since else branch below returns error, there is no functional changes, +just style which will help on further rework. + +Signed-off-by: Andrew Rybchenko +Reviewed-by: Ferruh Yigit +--- + drivers/net/af_xdp/meson.build | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 1e0de23705..882d0b9518 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -17,10 +17,10 @@ endif + + if cc.has_header('linux/if_xdp.h') + if xdp_dep.found() and cc.has_header('xdp/xsk.h') ++ cflags += ['-DRTE_NET_AF_XDP_LIBXDP'] ++ cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] ++ ext_deps += xdp_dep + if bpf_dep.found() and cc.has_header('bpf/bpf.h') +- cflags += ['-DRTE_NET_AF_XDP_LIBXDP'] +- cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] +- ext_deps += xdp_dep + ext_deps += bpf_dep + bpf_ver_dep = dependency('libbpf', version : '>=0.7.0', + required: false, method: 'pkg-config') +-- +2.33.0 + diff --git a/0451-net-af_xdp-make-clear-which-libxdp-version-is-requir.patch b/0451-net-af_xdp-make-clear-which-libxdp-version-is-requir.patch new file mode 100644 index 0000000..62bc533 --- /dev/null +++ b/0451-net-af_xdp-make-clear-which-libxdp-version-is-requir.patch @@ -0,0 +1,47 @@ +From f76dc44dedeead72c785ad911946818065a9f389 Mon Sep 17 00:00:00 2001 +From: Andrew Rybchenko +Date: Thu, 6 Oct 2022 09:26:50 +0300 +Subject: [PATCH] net/af_xdp: make clear which libxdp version is required + +[ upstream commit f76dc44dedeead72c785ad911946818065a9f389 ] + + +Include checked libxdp version in driver build skip reason. + +Signed-off-by: Andrew Rybchenko +Reviewed-by: Ferruh Yigit +--- + drivers/net/af_xdp/meson.build | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 882d0b9518..fa011c357d 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -9,7 +9,8 @@ endif + + sources = files('rte_eth_af_xdp.c') + +-xdp_dep = dependency('libxdp', version : '>=1.2.2', required: false, method: 'pkg-config') ++libxdp_ver = '>=1.2.2' ++xdp_dep = dependency('libxdp', version : libxdp_ver, required: false, method: 'pkg-config') + bpf_dep = dependency('libbpf', required: false, method: 'pkg-config') + if not bpf_dep.found() + bpf_dep = cc.find_library('bpf', required: false) +@@ -45,11 +46,11 @@ if cc.has_header('linux/if_xdp.h') + endif + else + build = false +- reason = 'missing dependency, "libxdp" or "libbpf <= v0.6.0"' ++ reason = 'missing dependency, "libxdp ' + libxdp_ver + '" or "libbpf <= v0.6.0"' + endif + else + build = false +- reason = 'missing dependency, "libxdp" and "libbpf"' ++ reason = 'missing dependency, "libxdp ' + libxdp_ver + '" and "libbpf"' + endif + else + build = false +-- +2.33.0 + diff --git a/0452-net-af_xdp-avoid-version-based-check-for-shared-UMEM.patch b/0452-net-af_xdp-avoid-version-based-check-for-shared-UMEM.patch new file mode 100644 index 0000000..97819f4 --- /dev/null +++ b/0452-net-af_xdp-avoid-version-based-check-for-shared-UMEM.patch @@ -0,0 +1,62 @@ +From e024c7e838fc4e4315f61b1eeb0f147b0353f708 Mon Sep 17 00:00:00 2001 +From: Andrew Rybchenko +Date: Thu, 6 Oct 2022 09:26:51 +0300 +Subject: [PATCH] net/af_xdp: avoid version-based check for shared UMEM + +[ upstream commit e024c7e838fc4e4315f61b1eeb0f147b0353f708 ] + + +Check for xsk_socket__create_shared() function instead. + +Signed-off-by: Andrew Rybchenko +Reviewed-by: Ferruh Yigit +--- + drivers/net/af_xdp/meson.build | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index fa011c357d..a01a67c7e7 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -19,7 +19,6 @@ endif + if cc.has_header('linux/if_xdp.h') + if xdp_dep.found() and cc.has_header('xdp/xsk.h') + cflags += ['-DRTE_NET_AF_XDP_LIBXDP'] +- cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] + ext_deps += xdp_dep + if bpf_dep.found() and cc.has_header('bpf/bpf.h') + ext_deps += bpf_dep +@@ -39,11 +38,6 @@ if cc.has_header('linux/if_xdp.h') + required: false, method: 'pkg-config') + if bpf_ver_dep.found() + ext_deps += bpf_dep +- bpf_shumem_ver_dep = dependency('libbpf', version : '>=0.2.0', +- required: false, method: 'pkg-config') +- if bpf_shumem_ver_dep.found() +- cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] +- endif + else + build = false + reason = 'missing dependency, "libxdp ' + libxdp_ver + '" or "libbpf <= v0.6.0"' +@@ -56,3 +50,18 @@ else + build = false + reason = 'missing header, "linux/if_xdp.h"' + endif ++ ++if build ++ xsk_check_prefix = ''' ++#ifdef RTE_NET_AF_XDP_LIBXDP ++#include ++#else ++#include ++#endif ++ ''' ++ ++ if cc.has_function('xsk_socket__create_shared', prefix : xsk_check_prefix, ++ dependencies : ext_deps) ++ cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] ++ endif ++endif +-- +2.33.0 + diff --git a/0453-net-af_xdp-avoid-version-based-check-for-program-loa.patch b/0453-net-af_xdp-avoid-version-based-check-for-program-loa.patch new file mode 100644 index 0000000..0705fe2 --- /dev/null +++ b/0453-net-af_xdp-avoid-version-based-check-for-program-loa.patch @@ -0,0 +1,48 @@ +From 0ed0bc383428042c45d7d77f689bcd87b3700d5b Mon Sep 17 00:00:00 2001 +From: Andrew Rybchenko +Date: Thu, 6 Oct 2022 09:26:52 +0300 +Subject: [PATCH] net/af_xdp: avoid version-based check for program load + +[ upstream commit 0ed0bc383428042c45d7d77f689bcd87b3700d5b ] + + +Version-based checks are bad. It is better to check for required +functions. Check for bpf_object__next_program() in this case since +it appears last in libbpf among functions used to load program +without bpf_prog_load() which is deprecated in libbpf v0.7.0. + +Signed-off-by: Andrew Rybchenko +Reviewed-by: Ferruh Yigit +--- + drivers/net/af_xdp/meson.build | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index a01a67c7e7..9d5ffab96b 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -22,11 +22,6 @@ if cc.has_header('linux/if_xdp.h') + ext_deps += xdp_dep + if bpf_dep.found() and cc.has_header('bpf/bpf.h') + ext_deps += bpf_dep +- bpf_ver_dep = dependency('libbpf', version : '>=0.7.0', +- required: false, method: 'pkg-config') +- if bpf_ver_dep.found() +- cflags += ['-DRTE_NET_AF_XDP_LIBBPF_OBJ_OPEN'] +- endif + else + build = false + reason = 'missing dependency, libbpf' +@@ -64,4 +59,9 @@ if build + dependencies : ext_deps) + cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] + endif ++ if cc.has_function('bpf_object__next_program', ++ prefix : '#include ', ++ dependencies : bpf_dep) ++ cflags += ['-DRTE_NET_AF_XDP_LIBBPF_OBJ_OPEN'] ++ endif + endif +-- +2.33.0 + diff --git a/0454-net-af_xdp-add-log-on-XDP-program-removal-failures.patch b/0454-net-af_xdp-add-log-on-XDP-program-removal-failures.patch new file mode 100644 index 0000000..5a7b1e4 --- /dev/null +++ b/0454-net-af_xdp-add-log-on-XDP-program-removal-failures.patch @@ -0,0 +1,64 @@ +From 5ff3dbe6ce4c8c15fbb09fdf2ee64d09ce3be3cc Mon Sep 17 00:00:00 2001 +From: Andrew Rybchenko +Date: Thu, 6 Oct 2022 09:26:53 +0300 +Subject: [PATCH] net/af_xdp: add log on XDP program removal failures + +[ upstream commit 5ff3dbe6ce4c8c15fbb09fdf2ee64d09ce3be3cc ] + +Make it visible in logs if something goes wrong on XDP program +removal failure. + +Signed-off-by: Andrew Rybchenko +Reviewed-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 9957de2314..f7c2321a18 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -866,18 +866,24 @@ eth_stats_reset(struct rte_eth_dev *dev) + return 0; + } + +-static void ++static int + remove_xdp_program(struct pmd_internals *internals) + { + uint32_t curr_prog_id = 0; ++ int ret; + +- if (bpf_get_link_xdp_id(internals->if_index, &curr_prog_id, +- XDP_FLAGS_UPDATE_IF_NOEXIST)) { ++ ret = bpf_get_link_xdp_id(internals->if_index, &curr_prog_id, ++ XDP_FLAGS_UPDATE_IF_NOEXIST); ++ if (ret != 0) { + AF_XDP_LOG(ERR, "bpf_get_link_xdp_id failed\n"); +- return; ++ return ret; + } +- bpf_set_link_xdp_fd(internals->if_index, -1, +- XDP_FLAGS_UPDATE_IF_NOEXIST); ++ ++ ret = bpf_set_link_xdp_fd(internals->if_index, -1, ++ XDP_FLAGS_UPDATE_IF_NOEXIST); ++ if (ret != 0) ++ AF_XDP_LOG(ERR, "bpf_set_link_xdp_fd failed\n"); ++ return ret; + } + + static void +@@ -932,7 +938,8 @@ eth_dev_close(struct rte_eth_dev *dev) + */ + dev->data->mac_addrs = NULL; + +- remove_xdp_program(internals); ++ if (remove_xdp_program(internals) != 0) ++ AF_XDP_LOG(ERR, "Error while removing XDP program.\n"); + + if (internals->shared_umem) { + struct internal_list *list; +-- +2.33.0 + diff --git a/0455-net-af_xdp-make-compatible-with-libbpf-0.8.0.patch b/0455-net-af_xdp-make-compatible-with-libbpf-0.8.0.patch new file mode 100644 index 0000000..2bc1507 --- /dev/null +++ b/0455-net-af_xdp-make-compatible-with-libbpf-0.8.0.patch @@ -0,0 +1,101 @@ +From 1eb1846b1a2caaf77d0697747d3ecd1272ce0bfc Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Thu, 6 Oct 2022 09:26:54 +0300 +Subject: [PATCH] net/af_xdp: make compatible with libbpf 0.8.0 + +[ upstream commit 1eb1846b1a2caaf77d0697747d3ecd1272ce0bfc ] + +libbpf v0.8.0 deprecates the bpf_get_link_xdp_id() and +bpf_set_link_xdp_fd() functions. Use meson to detect if +bpf_xdp_attach() is available and if so, use the recommended +replacement functions bpf_xdp_query_id(), bpf_xdp_attach() +and bpf_xdp_detach(). + +Signed-off-by: Ciara Loftus +Signed-off-by: Andrew Rybchenko +Reviewed-by: Ferruh Yigit +--- + drivers/net/af_xdp/meson.build | 5 ++++ + drivers/net/af_xdp/rte_eth_af_xdp.c | 38 +++++++++++++++++++++++++- + 3 files changed, 46 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 9d5ffab96b..858047989e 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -64,4 +64,9 @@ if build + dependencies : bpf_dep) + cflags += ['-DRTE_NET_AF_XDP_LIBBPF_OBJ_OPEN'] + endif ++ if cc.has_function('bpf_xdp_attach', ++ prefix : '#include ', ++ dependencies : bpf_dep) ++ cflags += ['-DRTE_NET_AF_XDP_LIBBPF_XDP_ATTACH'] ++ endif + endif +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index f7c2321a18..b6ec9bf490 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -866,6 +866,40 @@ eth_stats_reset(struct rte_eth_dev *dev) + return 0; + } + ++#ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH ++ ++static int link_xdp_prog_with_dev(int ifindex, int fd, __u32 flags) ++{ ++ return bpf_xdp_attach(ifindex, fd, flags, NULL); ++} ++ ++static int ++remove_xdp_program(struct pmd_internals *internals) ++{ ++ uint32_t curr_prog_id = 0; ++ int ret; ++ ++ ret = bpf_xdp_query_id(internals->if_index, XDP_FLAGS_UPDATE_IF_NOEXIST, ++ &curr_prog_id); ++ if (ret != 0) { ++ AF_XDP_LOG(ERR, "bpf_xdp_query_id failed\n"); ++ return ret; ++ } ++ ++ ret = bpf_xdp_detach(internals->if_index, XDP_FLAGS_UPDATE_IF_NOEXIST, ++ NULL); ++ if (ret != 0) ++ AF_XDP_LOG(ERR, "bpf_xdp_detach failed\n"); ++ return ret; ++} ++ ++#else ++ ++static int link_xdp_prog_with_dev(int ifindex, int fd, __u32 flags) ++{ ++ return bpf_set_link_xdp_fd(ifindex, fd, flags); ++} ++ + static int + remove_xdp_program(struct pmd_internals *internals) + { +@@ -886,6 +920,8 @@ remove_xdp_program(struct pmd_internals *internals) + return ret; + } + ++#endif ++ + static void + xdp_umem_destroy(struct xsk_umem_info *umem) + { +@@ -1205,7 +1241,7 @@ load_custom_xdp_prog(const char *prog_path, int if_index, struct bpf_map **map) + } + + /* Link the program with the given network device */ +- ret = bpf_set_link_xdp_fd(if_index, prog_fd, ++ ret = link_xdp_prog_with_dev(if_index, prog_fd, + XDP_FLAGS_UPDATE_IF_NOEXIST); + if (ret) { + AF_XDP_LOG(ERR, "Failed to set prog fd %d on interface\n", +-- +2.33.0 + diff --git a/0456-drivers-mark-SW-PMDs-to-support-disabling-IOVA-as-PA.patch b/0456-drivers-mark-SW-PMDs-to-support-disabling-IOVA-as-PA.patch new file mode 100644 index 0000000..663f33f --- /dev/null +++ b/0456-drivers-mark-SW-PMDs-to-support-disabling-IOVA-as-PA.patch @@ -0,0 +1,39 @@ +From b1ae367ab8794da0d46dc7e29eef22e1ec265157 Mon Sep 17 00:00:00 2001 +From: Shijith Thotton +Date: Sat, 8 Oct 2022 02:32:11 +0530 +Subject: [PATCH] drivers: mark SW PMDs to support disabling IOVA as PA + +[ upstream commit b1ae367ab8794da0d46dc7e29eef22e1ec265157 ] + +Enabled software PMDs in IOVA as PA disabled build +as they work with IOVA as VA. + +Signed-off-by: Shijith Thotton +Acked-by: Olivier Matz +--- + drivers/net/af_packet/meson.build | 1 + + drivers/net/af_xdp/meson.build | 2 ++ + 2 files changed, 23 insertions(+) + +diff --git a/drivers/net/af_packet/meson.build b/drivers/net/af_packet/meson.build +index c014e9b61b..bab008d083 100644 +--- a/drivers/net/af_packet/meson.build ++++ b/drivers/net/af_packet/meson.build +@@ -6,3 +6,4 @@ if not is_linux + reason = 'only supported on Linux' + endif + sources = files('rte_eth_af_packet.c') ++pmd_supports_disable_iova_as_pa = true +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 858047989e..979b914bb6 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -70,3 +70,5 @@ if build + cflags += ['-DRTE_NET_AF_XDP_LIBBPF_XDP_ATTACH'] + endif + endif ++ ++pmd_supports_disable_iova_as_pa = true +-- +2.33.0 + diff --git a/0458-net-af_xdp-parse-NUMA-node-ID-from-sysfs.patch b/0458-net-af_xdp-parse-NUMA-node-ID-from-sysfs.patch new file mode 100644 index 0000000..f72ad98 --- /dev/null +++ b/0458-net-af_xdp-parse-NUMA-node-ID-from-sysfs.patch @@ -0,0 +1,60 @@ +From 3d28387cbc480a12b861294d36deaa26978419e1 Mon Sep 17 00:00:00 2001 +From: Frank Du +Date: Mon, 12 Dec 2022 08:48:57 +0800 +Subject: [PATCH] net/af_xdp: parse NUMA node ID from sysfs + +[ upstream commit 3d28387cbc480a12b861294d36deaa26978419e1 ] + +Get from /sys/class/net/{if}/device/numa_node. + +Signed-off-by: Frank Du +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index b6ec9bf490..38b9d36ab5 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -39,6 +39,7 @@ + #include + + #include "compat.h" ++#include "eal_filesystem.h" + + #ifndef SO_PREFER_BUSY_POLL + #define SO_PREFER_BUSY_POLL 69 +@@ -2038,9 +2039,6 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + return -EINVAL; + } + +- if (dev->device.numa_node == SOCKET_ID_ANY) +- dev->device.numa_node = rte_socket_id(); +- + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt, &shared_umem, prog_path, + &busy_budget, &force_copy) < 0) { +@@ -2053,6 +2051,19 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + return -EINVAL; + } + ++ /* get numa node id from net sysfs */ ++ if (dev->device.numa_node == SOCKET_ID_ANY) { ++ unsigned long numa = 0; ++ char numa_path[PATH_MAX]; ++ ++ snprintf(numa_path, sizeof(numa_path), "/sys/class/net/%s/device/numa_node", ++ if_name); ++ if (eal_parse_sysfs_value(numa_path, &numa) != 0) ++ dev->device.numa_node = rte_socket_id(); ++ else ++ dev->device.numa_node = numa; ++ } ++ + busy_budget = busy_budget == -1 ? ETH_AF_XDP_DFLT_BUSY_BUDGET : + busy_budget; + +-- +2.33.0 + diff --git a/0459-net-af_xdp-support-CNI-Integration.patch b/0459-net-af_xdp-support-CNI-Integration.patch new file mode 100644 index 0000000..80e0415 --- /dev/null +++ b/0459-net-af_xdp-support-CNI-Integration.patch @@ -0,0 +1,809 @@ +From 7fc6ae50369d75b9aa550072182fa92f8c4e13a4 Mon Sep 17 00:00:00 2001 +From: Shibin Koikkara Reeny +Date: Wed, 15 Feb 2023 16:30:14 +0000 +Subject: [PATCH] net/af_xdp: support CNI Integration + +[ upstream commit 7fc6ae50369d75b9aa550072182fa92f8c4e13a4 ] + +Integrate support for the AF_XDP CNI and device plugin [1] so that the +DPDK AF_XDP PMD can work in an unprivileged container environment. + +Part of the AF_XDP PMD initialization process involves loading an eBPF +program onto the given netdev. This operation requires privileges, which +prevents the PMD from being able to work in an unprivileged container +(without root access). The plugin CNI handles the program loading. CNI +open Unix Domain Socket (UDS) and waits listening for a client to make +requests over that UDS. The client(DPDK) connects and a "handshake" +occurs, then the File Descriptor which points to the XSKMAP associated +with the loaded eBPF program is handed over to the client. The client +can then proceed with creating an AF_XDP socket and inserting the socket +into the XSKMAP pointed to by the FD received on the UDS. + +A new vdev arg "use_cni" is created to indicate user wishes to run +the PMD in unprivileged mode and to receive the XSKMAP FD from the CNI. + +When this flag is set, the XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD libbpf +flag should be used when creating the socket, which tells libbpf not to +load the default libbpf program on the netdev. We tell libbpf not to do +this because the loading is handled by the CNI in this scenario. + +Patch include howto doc explain how to configure AF_XDP CNI to working +with DPDK. + +[1]: https://github.com/intel/afxdp-plugins-for-kubernetes + +Signed-off-by: Shibin Koikkara Reeny +Tested-by: Anatoly Burakov +Reviewed-by: Qi Zhang +Acked-by: John McNamara +Acked-by: Ferruh Yigit +--- + doc/guides/howto/af_xdp_cni.rst | 255 ++++++++++++++++++++ + doc/guides/howto/index.rst | 1 + + drivers/net/af_xdp/rte_eth_af_xdp.c | 349 +++++++++++++++++++++++++++- + 3 files changed, 613 insertions(+), 11 deletions(-) + create mode 100644 doc/guides/howto/af_xdp_cni.rst + +diff --git a/doc/guides/howto/af_xdp_cni.rst b/doc/guides/howto/af_xdp_cni.rst +new file mode 100644 +index 0000000000..a38f02450e +--- /dev/null ++++ b/doc/guides/howto/af_xdp_cni.rst +@@ -0,0 +1,255 @@ ++.. SPDX-License-Identifier: BSD-3-Clause ++ Copyright(c) 2023 Intel Corporation. ++ ++Using a CNI with the AF_XDP driver ++================================== ++ ++Introduction ++------------ ++ ++CNI, the Container Network Interface, is a technology for configuring ++container network interfaces ++and which can be used to setup Kubernetes networking. ++AF_XDP is a Linux socket Address Family that enables an XDP program ++to redirect packets to a memory buffer in userspace. ++ ++This document explains how to enable the `AF_XDP Plugin for Kubernetes`_ within ++a DPDK application using the `AF_XDP PMD`_ to connect and use these technologies. ++ ++.. _AF_XDP Plugin for Kubernetes: https://github.com/intel/afxdp-plugins-for-kubernetes ++ ++ ++Background ++---------- ++ ++The standard `AF_XDP PMD`_ initialization process involves loading an eBPF program ++onto the kernel netdev to be used by the PMD. ++This operation requires root or escalated Linux privileges ++and thus prevents the PMD from working in an unprivileged container. ++The AF_XDP CNI plugin handles this situation ++by providing a device plugin that performs the program loading. ++ ++At a technical level the CNI opens a Unix Domain Socket and listens for a client ++to make requests over that socket. ++A DPDK application acting as a client connects and initiates a configuration "handshake". ++The client then receives a file descriptor which points to the XSKMAP ++associated with the loaded eBPF program. ++The XSKMAP is a BPF map of AF_XDP sockets (XSK). ++The client can then proceed with creating an AF_XDP socket ++and inserting that socket into the XSKMAP pointed to by the descriptor. ++ ++The EAL vdev argument ``use_cni`` is used to indicate that the user wishes ++to run the PMD in unprivileged mode and to receive the XSKMAP file descriptor ++from the CNI. ++When this flag is set, ++the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag ++should be used when creating the socket ++to instruct libbpf not to load the default libbpf program on the netdev. ++Instead the loading is handled by the CNI. ++ ++.. _AF_XDP PMD: https://doc.dpdk.org/guides/nics/af_xdp.html ++ ++.. note:: ++ ++ The Unix Domain Socket file path appear in the end user is "/tmp/afxdp.sock". ++ ++ ++Prerequisites ++------------- ++ ++Docker and container prerequisites: ++ ++* Set up the device plugin ++ as described in the instructions for `AF_XDP Plugin for Kubernetes`_. ++ ++* The Docker image should contain the libbpf and libxdp libraries, ++ which are dependencies for AF_XDP, ++ and should include support for the ``ethtool`` command. ++ ++* The Pod should have enabled the capabilities ``CAP_NET_RAW`` and ``CAP_BPF`` ++ for AF_XDP along with support for hugepages. ++ ++* Increase locked memory limit so containers have enough memory for packet buffers. ++ For example: ++ ++ .. code-block:: console ++ ++ cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/limits.conf ++ [Service] ++ LimitMEMLOCK=infinity ++ EOF ++ ++* dpdk-testpmd application should have AF_XDP feature enabled. ++ ++ For further information see the docs for the: :doc:`../../nics/af_xdp`. ++ ++ ++Example ++------- ++ ++Howto run dpdk-testpmd with CNI plugin: ++ ++* Clone the CNI plugin ++ ++ .. code-block:: console ++ ++ # git clone https://github.com/intel/afxdp-plugins-for-kubernetes.git ++ ++* Build the CNI plugin ++ ++ .. code-block:: console ++ ++ # cd afxdp-plugins-for-kubernetes/ ++ # make build ++ ++ .. note:: ++ ++ CNI plugin has a dependence on the config.json. ++ ++ Sample Config.json ++ ++ .. code-block:: json ++ ++ { ++ "logLevel":"debug", ++ "logFile":"afxdp-dp-e2e.log", ++ "pools":[ ++ { ++ "name":"e2e", ++ "mode":"primary", ++ "timeout":30, ++ "ethtoolCmds" : ["-L -device- combined 1"], ++ "devices":[ ++ { ++ "name":"ens785f0" ++ } ++ ] ++ } ++ ] ++ } ++ ++ For further reference please use the `config.json`_ ++ ++ .. _config.json: https://github.com/intel/afxdp-plugins-for-kubernetes/blob/v0.0.2/test/e2e/config.json ++ ++* Create the Network Attachment definition ++ ++ .. code-block:: console ++ ++ # kubectl create -f nad.yaml ++ ++ Sample nad.yml ++ ++ .. code-block:: yaml ++ ++ apiVersion: "k8s.cni.cncf.io/v1" ++ kind: NetworkAttachmentDefinition ++ metadata: ++ name: afxdp-e2e-test ++ annotations: ++ k8s.v1.cni.cncf.io/resourceName: afxdp/e2e ++ spec: ++ config: '{ ++ "cniVersion": "0.3.0", ++ "type": "afxdp", ++ "mode": "cdq", ++ "logFile": "afxdp-cni-e2e.log", ++ "logLevel": "debug", ++ "ipam": { ++ "type": "host-local", ++ "subnet": "192.168.1.0/24", ++ "rangeStart": "192.168.1.200", ++ "rangeEnd": "192.168.1.216", ++ "routes": [ ++ { "dst": "0.0.0.0/0" } ++ ], ++ "gateway": "192.168.1.1" ++ } ++ }' ++ ++ For further reference please use the `nad.yaml`_ ++ ++ .. _nad.yaml: https://github.com/intel/afxdp-plugins-for-kubernetes/blob/v0.0.2/test/e2e/nad.yaml ++ ++* Build the Docker image ++ ++ .. code-block:: console ++ ++ # docker build -t afxdp-e2e-test -f Dockerfile . ++ ++ Sample Dockerfile: ++ ++ .. code-block:: console ++ ++ FROM ubuntu:20.04 ++ RUN apt-get update -y ++ RUN apt install build-essential libelf-dev -y ++ RUN apt-get install iproute2 acl -y ++ RUN apt install python3-pyelftools ethtool -y ++ RUN apt install libnuma-dev libjansson-dev libpcap-dev net-tools -y ++ RUN apt-get install clang llvm -y ++ COPY ./libbpf.tar.gz /tmp ++ RUN cd /tmp && tar -xvmf libbpf.tar.gz && cd libbpf/src && make install ++ COPY ./libxdp.tar.gz /tmp ++ RUN cd /tmp && tar -xvmf libxdp.tar.gz && cd libxdp && make install ++ ++ .. note:: ++ ++ All the files that need to COPY-ed should be in the same directory as the Dockerfile ++ ++* Run the Pod ++ ++ .. code-block:: console ++ ++ # kubectl create -f pod.yaml ++ ++ Sample pod.yaml: ++ ++ .. code-block:: yaml ++ ++ apiVersion: v1 ++ kind: Pod ++ metadata: ++ name: afxdp-e2e-test ++ annotations: ++ k8s.v1.cni.cncf.io/networks: afxdp-e2e-test ++ spec: ++ containers: ++ - name: afxdp ++ image: afxdp-e2e-test:latest ++ imagePullPolicy: Never ++ env: ++ - name: LD_LIBRARY_PATH ++ value: /usr/lib64/:/usr/local/lib/ ++ command: ["tail", "-f", "/dev/null"] ++ securityContext: ++ capabilities: ++ add: ++ - CAP_NET_RAW ++ - CAP_BPF ++ resources: ++ requests: ++ hugepages-2Mi: 2Gi ++ memory: 2Gi ++ afxdp/e2e: '1' ++ limits: ++ hugepages-2Mi: 2Gi ++ memory: 2Gi ++ afxdp/e2e: '1' ++ ++ For further reference please use the `pod.yaml`_ ++ ++ .. _pod.yaml: https://github.com/intel/afxdp-plugins-for-kubernetes/blob/v0.0.2/test/e2e/pod-1c1d.yaml ++ ++* Run DPDK with a command like the following: ++ ++ .. code-block:: console ++ ++ kubectl exec -i --container -- \ ++ //dpdk-testpmd -l 0,1 --no-pci \ ++ --vdev=net_af_xdp0,use_cni=1,iface= \ ++ -- --no-mlockall --in-memory ++ ++For further reference please use the `e2e`_ test case in `AF_XDP Plugin for Kubernetes`_ ++ ++ .. _e2e: https://github.com/intel/afxdp-plugins-for-kubernetes/tree/v0.0.2/test/e2e +diff --git a/doc/guides/howto/index.rst b/doc/guides/howto/index.rst +index bf6337d021..71a3381c36 100644 +--- a/doc/guides/howto/index.rst ++++ b/doc/guides/howto/index.rst +@@ -8,6 +8,7 @@ HowTo Guides + :maxdepth: 2 + :numbered: + ++ af_xdp_cni + lm_bond_virtio_sriov + lm_virtio_vhost_user + flow_bifurcation +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 38b9d36ab5..02c874d51e 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -82,6 +83,23 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE); + + #define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds" + ++#define MAX_LONG_OPT_SZ 64 ++#define UDS_MAX_FD_NUM 2 ++#define UDS_MAX_CMD_LEN 64 ++#define UDS_MAX_CMD_RESP 128 ++#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd" ++#define UDS_SOCK "/tmp/afxdp.sock" ++#define UDS_CONNECT_MSG "/connect" ++#define UDS_HOST_OK_MSG "/host_ok" ++#define UDS_HOST_NAK_MSG "/host_nak" ++#define UDS_VERSION_MSG "/version" ++#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd" ++#define UDS_XSK_SOCKET_MSG "/xsk_socket" ++#define UDS_FD_ACK_MSG "/fd_ack" ++#define UDS_FD_NAK_MSG "/fd_nak" ++#define UDS_FIN_MSG "/fin" ++#define UDS_FIN_ACK_MSG "/fin_ack" ++ + static int afxdp_dev_count; + + /* Message header to synchronize fds via IPC */ +@@ -152,6 +170,7 @@ struct pmd_internals { + char prog_path[PATH_MAX]; + bool custom_prog_configured; + bool force_copy; ++ bool use_cni; + struct bpf_map *map; + + struct rte_ether_addr eth_addr; +@@ -171,6 +190,7 @@ struct pmd_process_private { + #define ETH_AF_XDP_PROG_ARG "xdp_prog" + #define ETH_AF_XDP_BUDGET_ARG "busy_budget" + #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy" ++#define ETH_AF_XDP_USE_CNI_ARG "use_cni" + + static const char * const valid_arguments[] = { + ETH_AF_XDP_IFACE_ARG, +@@ -180,6 +200,7 @@ static const char * const valid_arguments[] = { + ETH_AF_XDP_PROG_ARG, + ETH_AF_XDP_BUDGET_ARG, + ETH_AF_XDP_FORCE_COPY_ARG, ++ ETH_AF_XDP_USE_CNI_ARG, + NULL + }; + +@@ -1130,7 +1151,8 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + ret = xsk_umem__create(&umem->umem, base_addr, umem_size, + &rxq->fq, &rxq->cq, &usr_config); + if (ret) { +- AF_XDP_LOG(ERR, "Failed to create umem\n"); ++ AF_XDP_LOG(ERR, "Failed to create umem [%d]: [%s]\n", ++ errno, strerror(errno)); + goto err; + } + umem->buffer = base_addr; +@@ -1315,6 +1337,245 @@ configure_preferred_busy_poll(struct pkt_rx_queue *rxq) + return 0; + } + ++static int ++init_uds_sock(struct sockaddr_un *server) ++{ ++ int sock; ++ ++ sock = socket(AF_UNIX, SOCK_SEQPACKET, 0); ++ if (sock < 0) { ++ AF_XDP_LOG(ERR, "Failed to opening stream socket\n"); ++ return -1; ++ } ++ ++ server->sun_family = AF_UNIX; ++ strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path)); ++ ++ if (connect(sock, (struct sockaddr *)server, sizeof(struct sockaddr_un)) < 0) { ++ close(sock); ++ AF_XDP_LOG(ERR, "Error connecting stream socket errno = [%d]: [%s]\n", ++ errno, strerror(errno)); ++ return -1; ++ } ++ ++ return sock; ++} ++ ++struct msg_internal { ++ char response[UDS_MAX_CMD_RESP]; ++ int len_param; ++ int num_fds; ++ int fds[UDS_MAX_FD_NUM]; ++}; ++ ++static int ++send_msg(int sock, char *request, int *fd) ++{ ++ int snd; ++ struct iovec iov; ++ struct msghdr msgh; ++ struct cmsghdr *cmsg; ++ struct sockaddr_un dst; ++ char control[CMSG_SPACE(sizeof(*fd))]; ++ ++ memset(&dst, 0, sizeof(dst)); ++ dst.sun_family = AF_UNIX; ++ strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path)); ++ ++ /* Initialize message header structure */ ++ memset(&msgh, 0, sizeof(msgh)); ++ memset(control, 0, sizeof(control)); ++ iov.iov_base = request; ++ iov.iov_len = strlen(request); ++ ++ msgh.msg_name = &dst; ++ msgh.msg_namelen = sizeof(dst); ++ msgh.msg_iov = &iov; ++ msgh.msg_iovlen = 1; ++ msgh.msg_control = control; ++ msgh.msg_controllen = sizeof(control); ++ ++ /* Translate the FD. */ ++ cmsg = CMSG_FIRSTHDR(&msgh); ++ cmsg->cmsg_len = CMSG_LEN(sizeof(*fd)); ++ cmsg->cmsg_level = SOL_SOCKET; ++ cmsg->cmsg_type = SCM_RIGHTS; ++ memcpy(CMSG_DATA(cmsg), fd, sizeof(*fd)); ++ ++ /* Send the request message. */ ++ do { ++ snd = sendmsg(sock, &msgh, 0); ++ } while (snd < 0 && errno == EINTR); ++ ++ return snd; ++} ++ ++static int ++read_msg(int sock, char *response, struct sockaddr_un *s, int *fd) ++{ ++ int msglen; ++ struct msghdr msgh; ++ struct iovec iov; ++ char control[CMSG_SPACE(sizeof(*fd))]; ++ struct cmsghdr *cmsg; ++ ++ /* Initialize message header structure */ ++ memset(&msgh, 0, sizeof(msgh)); ++ iov.iov_base = response; ++ iov.iov_len = UDS_MAX_CMD_RESP; ++ ++ msgh.msg_name = s; ++ msgh.msg_namelen = sizeof(*s); ++ msgh.msg_iov = &iov; ++ msgh.msg_iovlen = 1; ++ msgh.msg_control = control; ++ msgh.msg_controllen = sizeof(control); ++ ++ msglen = recvmsg(sock, &msgh, 0); ++ ++ /* zero length message means socket was closed */ ++ if (msglen == 0) ++ return 0; ++ ++ if (msglen < 0) { ++ AF_XDP_LOG(ERR, "recvmsg failed, %s\n", strerror(errno)); ++ return -1; ++ } ++ ++ /* read auxiliary FDs if any */ ++ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; ++ cmsg = CMSG_NXTHDR(&msgh, cmsg)) { ++ if (cmsg->cmsg_level == SOL_SOCKET && ++ cmsg->cmsg_type == SCM_RIGHTS) { ++ memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd)); ++ break; ++ } ++ } ++ ++ response[msglen] = '\0'; ++ return msglen; ++} ++ ++static int ++make_request_cni(int sock, struct sockaddr_un *server, char *request, ++ int *req_fd, char *response, int *out_fd) ++{ ++ int rval; ++ ++ AF_XDP_LOG(DEBUG, "Request: [%s]\n", request); ++ ++ /* if no file descriptor to send then directly write to socket. ++ * else use sendmsg() to send the file descriptor. ++ */ ++ if (req_fd == NULL) ++ rval = write(sock, request, strlen(request)); ++ else ++ rval = send_msg(sock, request, req_fd); ++ ++ if (rval < 0) { ++ AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); ++ return -1; ++ } ++ ++ rval = read_msg(sock, response, server, out_fd); ++ if (rval <= 0) { ++ AF_XDP_LOG(ERR, "Read error %d\n", rval); ++ return -1; ++ } ++ AF_XDP_LOG(DEBUG, "Response: [%s]\n", request); ++ ++ return 0; ++} ++ ++static int ++check_response(char *response, char *exp_resp, long size) ++{ ++ return strncmp(response, exp_resp, size); ++} ++ ++static int ++get_cni_fd(char *if_name) ++{ ++ char request[UDS_MAX_CMD_LEN], response[UDS_MAX_CMD_RESP]; ++ char hostname[MAX_LONG_OPT_SZ], exp_resp[UDS_MAX_CMD_RESP]; ++ struct sockaddr_un server; ++ int xsk_map_fd = -1, out_fd = 0; ++ int sock, err; ++ ++ err = gethostname(hostname, MAX_LONG_OPT_SZ - 1); ++ if (err) ++ return -1; ++ ++ memset(&server, 0, sizeof(server)); ++ sock = init_uds_sock(&server); ++ ++ /* Initiates handshake to CNI send: /connect,hostname */ ++ snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname); ++ memset(response, 0, sizeof(response)); ++ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); ++ goto err_close; ++ } ++ ++ /* Expect /host_ok */ ++ strlcpy(exp_resp, UDS_HOST_OK_MSG, UDS_MAX_CMD_LEN); ++ if (check_response(response, exp_resp, strlen(exp_resp)) < 0) { ++ AF_XDP_LOG(ERR, "Unexpected response [%s]\n", response); ++ goto err_close; ++ } ++ /* Request for "/version" */ ++ strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN); ++ memset(response, 0, sizeof(response)); ++ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); ++ goto err_close; ++ } ++ ++ /* Request for file descriptor for netdev name*/ ++ snprintf(request, sizeof(request), "%s,%s", UDS_XSK_MAP_FD_MSG, if_name); ++ memset(response, 0, sizeof(response)); ++ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); ++ goto err_close; ++ } ++ ++ if (out_fd < 0) { ++ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); ++ goto err_close; ++ } ++ ++ xsk_map_fd = out_fd; ++ ++ /* Expect fd_ack with file descriptor */ ++ strlcpy(exp_resp, UDS_FD_ACK_MSG, UDS_MAX_CMD_LEN); ++ if (check_response(response, exp_resp, strlen(exp_resp)) < 0) { ++ AF_XDP_LOG(ERR, "Unexpected response [%s]\n", response); ++ goto err_close; ++ } ++ ++ /* Initiate close connection */ ++ strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN); ++ memset(response, 0, sizeof(response)); ++ if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); ++ goto err_close; ++ } ++ ++ /* Connection close */ ++ strlcpy(exp_resp, UDS_FIN_ACK_MSG, UDS_MAX_CMD_LEN); ++ if (check_response(response, exp_resp, strlen(exp_resp)) < 0) { ++ AF_XDP_LOG(ERR, "Unexpected response [%s]\n", response); ++ goto err_close; ++ } ++ close(sock); ++ ++ return xsk_map_fd; ++ ++err_close: ++ close(sock); ++ return -1; ++} ++ + static int + xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + int ring_size) +@@ -1363,6 +1624,10 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + cfg.bind_flags |= XDP_USE_NEED_WAKEUP; + #endif + ++ /* Disable libbpf from loading XDP program */ ++ if (internals->use_cni) ++ cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; ++ + if (strnlen(internals->prog_path, PATH_MAX)) { + if (!internals->custom_prog_configured) { + ret = load_custom_xdp_prog(internals->prog_path, +@@ -1414,7 +1679,23 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + } + } + +- if (rxq->busy_budget) { ++ if (internals->use_cni) { ++ int err, fd, map_fd; ++ ++ /* get socket fd from CNI plugin */ ++ map_fd = get_cni_fd(internals->if_name); ++ if (map_fd < 0) { ++ AF_XDP_LOG(ERR, "Failed to receive CNI plugin fd\n"); ++ goto out_xsk; ++ } ++ /* get socket fd */ ++ fd = xsk_socket__fd(rxq->xsk); ++ err = bpf_map_update_elem(map_fd, &rxq->xsk_queue_idx, &fd, 0); ++ if (err) { ++ AF_XDP_LOG(ERR, "Failed to insert unprivileged xsk in map.\n"); ++ goto out_xsk; ++ } ++ } else if (rxq->busy_budget) { + ret = configure_preferred_busy_poll(rxq); + if (ret) { + AF_XDP_LOG(ERR, "Failed configure busy polling.\n"); +@@ -1585,6 +1866,27 @@ static const struct eth_dev_ops ops = { + .get_monitor_addr = eth_get_monitor_addr, + }; + ++/* CNI option works in unprivileged container environment ++ * and ethernet device functionality will be reduced. So ++ * additional customiszed eth_dev_ops struct is needed ++ * for cni. Promiscuous enable and disable functionality ++ * is removed. ++ **/ ++static const struct eth_dev_ops ops_cni = { ++ .dev_start = eth_dev_start, ++ .dev_stop = eth_dev_stop, ++ .dev_close = eth_dev_close, ++ .dev_configure = eth_dev_configure, ++ .dev_infos_get = eth_dev_info, ++ .mtu_set = eth_dev_mtu_set, ++ .rx_queue_setup = eth_rx_queue_setup, ++ .tx_queue_setup = eth_tx_queue_setup, ++ .link_update = eth_link_update, ++ .stats_get = eth_stats_get, ++ .stats_reset = eth_stats_reset, ++ .get_monitor_addr = eth_get_monitor_addr, ++}; ++ + /** parse busy_budget argument */ + static int + parse_budget_arg(const char *key __rte_unused, +@@ -1705,8 +2007,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues, + + static int + parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, +- int *queue_cnt, int *shared_umem, char *prog_path, +- int *busy_budget, int *force_copy) ++ int *queue_cnt, int *shared_umem, char *prog_path, ++ int *busy_budget, int *force_copy, int *use_cni) + { + int ret; + +@@ -1747,6 +2049,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + if (ret < 0) + goto free_kvlist; + ++ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_USE_CNI_ARG, ++ &parse_integer_arg, use_cni); ++ if (ret < 0) ++ goto free_kvlist; ++ + free_kvlist: + rte_kvargs_free(kvlist); + return ret; +@@ -1784,8 +2091,9 @@ get_iface_info(const char *if_name, + + static struct rte_eth_dev * + init_internals(struct rte_vdev_device *dev, const char *if_name, +- int start_queue_idx, int queue_cnt, int shared_umem, +- const char *prog_path, int busy_budget, int force_copy) ++ int start_queue_idx, int queue_cnt, int shared_umem, ++ const char *prog_path, int busy_budget, int force_copy, ++ int use_cni) + { + const char *name = rte_vdev_device_name(dev); + const unsigned int numa_node = dev->device.numa_node; +@@ -1814,6 +2122,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + #endif + internals->shared_umem = shared_umem; + internals->force_copy = force_copy; ++ internals->use_cni = use_cni; + + if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, + &internals->combined_queue_cnt)) { +@@ -1872,7 +2181,11 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + eth_dev->data->dev_link = pmd_link; + eth_dev->data->mac_addrs = &internals->eth_addr; + eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; +- eth_dev->dev_ops = &ops; ++ if (!internals->use_cni) ++ eth_dev->dev_ops = &ops; ++ else ++ eth_dev->dev_ops = &ops_cni; ++ + eth_dev->rx_pkt_burst = eth_af_xdp_rx; + eth_dev->tx_pkt_burst = eth_af_xdp_tx; + eth_dev->process_private = process_private; +@@ -1999,6 +2312,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + char prog_path[PATH_MAX] = {'\0'}; + int busy_budget = -1, ret; + int force_copy = 0; ++ int use_cni = 0; + struct rte_eth_dev *eth_dev = NULL; + const char *name = rte_vdev_device_name(dev); + +@@ -2041,11 +2355,23 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt, &shared_umem, prog_path, +- &busy_budget, &force_copy) < 0) { ++ &busy_budget, &force_copy, &use_cni) < 0) { + AF_XDP_LOG(ERR, "Invalid kvargs value\n"); + return -EINVAL; + } + ++ if (use_cni && busy_budget > 0) { ++ AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n", ++ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_BUDGET_ARG); ++ return -EINVAL; ++ } ++ ++ if (use_cni && strnlen(prog_path, PATH_MAX)) { ++ AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n", ++ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_PROG_ARG); ++ return -EINVAL; ++ } ++ + if (strlen(if_name) == 0) { + AF_XDP_LOG(ERR, "Network interface must be specified\n"); + return -EINVAL; +@@ -2068,8 +2394,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + busy_budget; + + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, +- xsk_queue_cnt, shared_umem, prog_path, +- busy_budget, force_copy); ++ xsk_queue_cnt, shared_umem, prog_path, ++ busy_budget, force_copy, use_cni); + if (eth_dev == NULL) { + AF_XDP_LOG(ERR, "Failed to init internals\n"); + return -1; +@@ -2129,4 +2455,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, + "shared_umem= " + "xdp_prog= " + "busy_budget= " +- "force_copy= "); ++ "force_copy= " ++ "use_cni= "); +-- +2.33.0 + diff --git a/0460-net-af_xdp-fix-socket-handler-validation.patch b/0460-net-af_xdp-fix-socket-handler-validation.patch new file mode 100644 index 0000000..333fb65 --- /dev/null +++ b/0460-net-af_xdp-fix-socket-handler-validation.patch @@ -0,0 +1,36 @@ +From cd2729e49d14b8f8411a9c39312ff60bc7200f8a Mon Sep 17 00:00:00 2001 +From: Shibin Koikkara Reeny +Date: Tue, 21 Feb 2023 11:14:26 +0000 +Subject: [PATCH] net/af_xdp: fix socket handler validation + +[ upstream commit cd2729e49d14b8f8411a9c39312ff60bc7200f8a ] + +Fix integer handling issues (NEGATIVE_RETURNS) reported +in coverity scan. Add a check to sock variable if the +value is negative return -1. + +Coverity issue: 383245 +Fixes: 7fc6ae50369d ("net/af_xdp: support CNI Integration") + +Signed-off-by: Shibin Koikkara Reeny +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 02c874d51e..2a20a6960c 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1508,6 +1508,8 @@ get_cni_fd(char *if_name) + + memset(&server, 0, sizeof(server)); + sock = init_uds_sock(&server); ++ if (sock < 0) ++ return -1; + + /* Initiates handshake to CNI send: /connect,hostname */ + snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname); +-- +2.33.0 + diff --git a/0461-build-clarify-configuration-without-IOVA-field-in-mb.patch b/0461-build-clarify-configuration-without-IOVA-field-in-mb.patch new file mode 100644 index 0000000..ded92e8 --- /dev/null +++ b/0461-build-clarify-configuration-without-IOVA-field-in-mb.patch @@ -0,0 +1,48 @@ +From d5d9e8fe0906f24579b9ff8c24442c235cc1819a Mon Sep 17 00:00:00 2001 +From: Thomas Monjalon +Date: Mon, 6 Mar 2023 17:13:27 +0100 +Subject: [PATCH] build: clarify configuration without IOVA field in mbuf + +[ upstream commit d5d9e8fe0906f24579b9ff8c24442c235cc1819a ] + +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The impact of the option "enable_iova_as_pa" is explained for users. + +Also the code flag "RTE_IOVA_AS_PA" is renamed as "RTE_IOVA_IN_MBUF" +in order to be more accurate (IOVA mode is decided at runtime), +and more readable in the code. + +Similarly the drivers are using the variable "require_iova_in_mbuf" +instead of "pmd_supports_disable_iova_as_pa" with an opposite meaning. +By default, it is assumed that drivers require the IOVA field in mbuf. +The drivers which support removing this field have to declare themselves. +Some bus drivers are declared compatible. + +If the option "enable_iova_as_pa" is disabled, the unsupported drivers +will be listed with the new reason text "requires IOVA in mbuf". + +Suggested-by: Bruce Richardson +Signed-off-by: Thomas Monjalon +Reviewed-by: Bruce Richardson +Acked-by: Morten Brørup +Acked-by: Chengwen Feng +--- + drivers/net/af_xdp/meson.build | 2 +- + 1 files changed, 81 insertions(+), 67 deletions(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 979b914bb6..9a8dbb4d49 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -71,4 +71,4 @@ if build + endif + endif + +-pmd_supports_disable_iova_as_pa = true ++require_iova_in_mbuf = false +-- +2.33.0 + diff --git a/0462-rework-atomic-intrinsics-fetch-operations.patch b/0462-rework-atomic-intrinsics-fetch-operations.patch new file mode 100644 index 0000000..ee54327 --- /dev/null +++ b/0462-rework-atomic-intrinsics-fetch-operations.patch @@ -0,0 +1,45 @@ +From ed090599c8bdcf5788d3459a752f1bbac0cef176 Mon Sep 17 00:00:00 2001 +From: Tyler Retzlaff +Date: Mon, 20 Mar 2023 12:00:21 -0700 +Subject: [PATCH] rework atomic intrinsics fetch operations + +[ upstream commit ed090599c8bdcf5788d3459a752f1bbac0cef176 ] + +Use __atomic_fetch_{add,and,or,sub,xor} instead of +__atomic_{add,and,or,sub,xor}_fetch adding the necessary code to +allow consumption of the resulting value. + +Signed-off-by: Tyler Retzlaff +Reviewed-by: Ruifeng Wang +Acked-by: Pavan Nikhilesh +Acked-by: Nithin Dabilpuram +Reviewed-by: David Marchand +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 4 ++-- + 1 files changed, 77 insertions(+), 77 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 2a20a6960c..c7786cc53a 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -979,7 +979,7 @@ eth_dev_close(struct rte_eth_dev *dev) + break; + xsk_socket__delete(rxq->xsk); + +- if (__atomic_sub_fetch(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) ++ if (__atomic_fetch_sub(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) - 1 + == 0) { + (void)xsk_umem__delete(rxq->umem->umem); + xdp_umem_destroy(rxq->umem); +@@ -1710,7 +1710,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + out_xsk: + xsk_socket__delete(rxq->xsk); + out_umem: +- if (__atomic_sub_fetch(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) == 0) ++ if (__atomic_fetch_sub(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) - 1 == 0) + xdp_umem_destroy(rxq->umem); + + return ret; +-- +2.33.0 + diff --git a/0463-net-af_xdp-fix-missing-UMEM-feature.patch b/0463-net-af_xdp-fix-missing-UMEM-feature.patch new file mode 100644 index 0000000..cd2f8a0 --- /dev/null +++ b/0463-net-af_xdp-fix-missing-UMEM-feature.patch @@ -0,0 +1,90 @@ +From 349e2ee77e114850a23c670fa7d6e380af509cc3 Mon Sep 17 00:00:00 2001 +From: Shibin Koikkara Reeny +Date: Thu, 28 Sep 2023 09:25:53 +0000 +Subject: [PATCH] net/af_xdp: fix missing UMEM feature + +[ upstream commit 349e2ee77e114850a23c670fa7d6e380af509cc3 ] + +Shared UMEM feature is missing in the af_xdp driver build +after the commit 33d66940e9ba ("build: use C11 standard"). + +Runtime Error log while using Shared UMEM feature: +rte_pmd_af_xdp_probe(): Initializing pmd_af_xdp for net_af_xdp0 +init_internals(): Shared UMEM feature not available. Check kernel +and libbpf version +rte_pmd_af_xdp_probe(): Failed to init internals +vdev_probe(): failed to initialize net_af_xdp0 device +EAL: Bus (vdev) probe failed. + +Reason for the missing UMEM feature is because the C11 standard +doesn't include the GNU compiler extensions typeof and asm, used +by the libbpf and libxdp header files. + +Meson error log: + In file included from +dpdk/build/meson-private/tmpf74nkhqd/testfile.c:5: +/usr/local/include/bpf/xsk.h: In function 'xsk_prod_nb_free': +/usr/local/include/bpf/xsk.h:165:26: error: expected ';' before '___p1' + 165 | r->cached_cons = libbpf_smp_load_acquire(r->consumer); + | ^~~~~~~~~~~~~~~~~~~~~~~ +/usr/local/include/bpf/xsk.h:165:26: error: 'asm' undeclared (first use +in this function) +... +/usr/local/include/bpf/xsk.h:199:9: error: unknown type name 'typeof' + 199 | libbpf_smp_store_release(prod->producer, *prod->producer + + nb); + | ^~~~~~~~~~~~~~~~~~~~~~~~ + +Fix is to provide alternative keywords by C macros. + +Fixes: 33d66940e9ba ("build: use C11 standard") + +Signed-off-by: Shibin Koikkara Reeny +Acked-by: Bruce Richardson +--- + drivers/net/af_xdp/meson.build | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 9a8dbb4d49..9f33e57fa2 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -48,6 +48,14 @@ endif + + if build + xsk_check_prefix = ''' ++#ifndef typeof ++#define typeof __typeof__ ++#endif ++ ++#ifndef asm ++#define asm __asm__ ++#endif ++ + #ifdef RTE_NET_AF_XDP_LIBXDP + #include + #else +@@ -56,17 +64,17 @@ if build + ''' + + if cc.has_function('xsk_socket__create_shared', prefix : xsk_check_prefix, +- dependencies : ext_deps) ++ dependencies : ext_deps, args: cflags) + cflags += ['-DRTE_NET_AF_XDP_SHARED_UMEM'] + endif + if cc.has_function('bpf_object__next_program', + prefix : '#include ', +- dependencies : bpf_dep) ++ dependencies : bpf_dep, args: cflags) + cflags += ['-DRTE_NET_AF_XDP_LIBBPF_OBJ_OPEN'] + endif + if cc.has_function('bpf_xdp_attach', + prefix : '#include ', +- dependencies : bpf_dep) ++ dependencies : bpf_dep, args: cflags) + cflags += ['-DRTE_NET_AF_XDP_LIBBPF_XDP_ATTACH'] + endif + endif +-- +2.33.0 + diff --git a/0464-net-af_xdp-fix-Rx-and-Tx-queue-state.patch b/0464-net-af_xdp-fix-Rx-and-Tx-queue-state.patch new file mode 100644 index 0000000..2532711 --- /dev/null +++ b/0464-net-af_xdp-fix-Rx-and-Tx-queue-state.patch @@ -0,0 +1,59 @@ +From 9a204f7e4e86d8270cae3cdec2b7949f5954fde2 Mon Sep 17 00:00:00 2001 +From: Jie Hai +Date: Fri, 8 Sep 2023 19:28:28 +0800 +Subject: [PATCH] net/af_xdp: fix Rx and Tx queue state + +[ upstream commit 9a204f7e4e86d8270cae3cdec2b7949f5954fde2 ] + +The DPDK framework reports the queue state, which is stored in +dev->data->tx_queue_state and dev->data->rx_queue_state. The +state is maintained by the driver. Users may determine whether +a queue participates in packet forwarding based on the state. +Therefore, the driver needs to modify the queue state in time +according to the actual situation. + +Fixes: 9ad9ff476cac ("ethdev: add queue state in queried queue information") +Cc: stable@dpdk.org + +Signed-off-by: Jie Hai +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index c7786cc53a..0cc51223ba 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -694,7 +694,13 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + static int + eth_dev_start(struct rte_eth_dev *dev) + { ++ uint16_t i; ++ + dev->data->dev_link.link_status = RTE_ETH_LINK_UP; ++ for (i = 0; i < dev->data->nb_rx_queues; i++) { ++ dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; ++ dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; ++ } + + return 0; + } +@@ -703,7 +709,14 @@ eth_dev_start(struct rte_eth_dev *dev) + static int + eth_dev_stop(struct rte_eth_dev *dev) + { ++ uint16_t i; ++ + dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN; ++ for (i = 0; i < dev->data->nb_rx_queues; i++) { ++ dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; ++ dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; ++ } ++ + return 0; + } + +-- +2.33.0 + diff --git a/0465-net-af_xdp-avoid-error-log-for-virtual-interfaces.patch b/0465-net-af_xdp-avoid-error-log-for-virtual-interfaces.patch new file mode 100644 index 0000000..5ed8fba --- /dev/null +++ b/0465-net-af_xdp-avoid-error-log-for-virtual-interfaces.patch @@ -0,0 +1,42 @@ +From 140bc789d8ac0e316269bf3a5b69ddc599ff35af Mon Sep 17 00:00:00 2001 +From: David Marchand +Date: Fri, 27 Oct 2023 16:32:41 +0200 +Subject: [PATCH] net/af_xdp: avoid error log for virtual interfaces + +[ upstream commit 140bc789d8ac0e316269bf3a5b69ddc599ff35af ] + +For virtual interfaces, like a veth or a tap used in OVS unit tests, +plugging a af_xdp port on them results in an error level message: + +dpdk|ERR|EAL: eal_parse_sysfs_value(): cannot open sysfs value + /sys/class/net/ovs-tap1/device/numa_node +netdev_dpdk|INFO|Device 'net_af_xdptap1,iface=ovs-tap1' attached to DPDK + +Avoid this error by checking if the sysfs file exists, like what is done +in DPDK bus drivers using eal_parse_sysfs_value(). + +Fixes: 3d28387cbc48 ("net/af_xdp: parse NUMA node ID from sysfs") +Cc: stable@dpdk.org + +Signed-off-by: David Marchand +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 0cc51223ba..353c8688ec 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -2399,7 +2399,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + snprintf(numa_path, sizeof(numa_path), "/sys/class/net/%s/device/numa_node", + if_name); +- if (eal_parse_sysfs_value(numa_path, &numa) != 0) ++ if (access(numa_path, R_OK) != 0 || eal_parse_sysfs_value(numa_path, &numa) != 0) + dev->device.numa_node = rte_socket_id(); + else + dev->device.numa_node = numa; +-- +2.33.0 + diff --git a/0466-net-af_xdp-fix-memzone-leak-on-config-failure.patch b/0466-net-af_xdp-fix-memzone-leak-on-config-failure.patch new file mode 100644 index 0000000..0df8621 --- /dev/null +++ b/0466-net-af_xdp-fix-memzone-leak-on-config-failure.patch @@ -0,0 +1,45 @@ +From 960775fc5b7d43cfbf9fa31e4c0b36c4da79615a Mon Sep 17 00:00:00 2001 +From: Yunjian Wang +Date: Tue, 5 Dec 2023 20:23:51 +0800 +Subject: [PATCH] net/af_xdp: fix memzone leak on config failure + +[ upstream commit 960775fc5b7d43cfbf9fa31e4c0b36c4da79615a ] + +In xdp_umem_configure() allocated memzone for the 'umem', we should +free it when xsk_umem__create() call fails, otherwise it will lead to +memory zone leak. To fix it move 'umem->mz = mz;' assignment after +'mz == NULL' check. + +Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") +Cc: stable@dpdk.org + +Signed-off-by: Yunjian Wang +Acked-by: Ciara Loftus +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 353c8688ec..2d151e45c7 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1234,6 +1234,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + AF_XDP_LOG(ERR, "Failed to reserve memzone for af_xdp umem.\n"); + goto err; + } ++ umem->mz = mz; + + ret = xsk_umem__create(&umem->umem, mz->addr, + ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE, +@@ -1244,7 +1245,6 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + AF_XDP_LOG(ERR, "Failed to create umem\n"); + goto err; + } +- umem->mz = mz; + + return umem; + +-- +2.33.0 + diff --git a/0467-net-af_xdp-fix-leak-on-XSK-configuration-failure.patch b/0467-net-af_xdp-fix-leak-on-XSK-configuration-failure.patch new file mode 100644 index 0000000..48e817c --- /dev/null +++ b/0467-net-af_xdp-fix-leak-on-XSK-configuration-failure.patch @@ -0,0 +1,51 @@ +From 955acb9594cec548ae57319bfc99d4297d773675 Mon Sep 17 00:00:00 2001 +From: Yunjian Wang +Date: Fri, 23 Feb 2024 09:45:45 +0800 +Subject: [PATCH] net/af_xdp: fix leak on XSK configuration failure + +[ upstream commit 955acb9594cec548ae57319bfc99d4297d773675 ] + +In xdp_umem_configure() allocated some resources for the +xsk umem, we should delete them when xsk configure fails, +otherwise it will lead to resources leak. + +Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") +Cc: stable@dpdk.org + +Signed-off-by: Yunjian Wang +Reviewed-by: Ciara Loftus +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 2d151e45c7..268a130c49 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -960,6 +960,9 @@ remove_xdp_program(struct pmd_internals *internals) + static void + xdp_umem_destroy(struct xsk_umem_info *umem) + { ++ (void)xsk_umem__delete(umem->umem); ++ umem->umem = NULL; ++ + #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + umem->mb_pool = NULL; + #else +@@ -992,11 +995,8 @@ eth_dev_close(struct rte_eth_dev *dev) + break; + xsk_socket__delete(rxq->xsk); + +- if (__atomic_fetch_sub(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) - 1 +- == 0) { +- (void)xsk_umem__delete(rxq->umem->umem); ++ if (__atomic_fetch_sub(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) - 1 == 0) + xdp_umem_destroy(rxq->umem); +- } + + /* free pkt_tx_queue */ + rte_free(rxq->pair); +-- +2.33.0 + diff --git a/0469-net-af_xdp-fix-multi-interface-support-for-k8s.patch b/0469-net-af_xdp-fix-multi-interface-support-for-k8s.patch new file mode 100644 index 0000000..050f34e --- /dev/null +++ b/0469-net-af_xdp-fix-multi-interface-support-for-k8s.patch @@ -0,0 +1,388 @@ +From 9c1323736cf91aa46d43def8e8d2349f7498a203 Mon Sep 17 00:00:00 2001 +From: Maryam Tahhan +Date: Mon, 8 Apr 2024 09:09:21 -0400 +Subject: [PATCH] net/af_xdp: fix multi-interface support for k8s + +[ upstream commit 9c1323736cf91aa46d43def8e8d2349f7498a203 ] + +The original 'use_cni' implementation, was added +to enable support for the AF_XDP PMD in a K8s env +without any escalated privileges. +However 'use_cni' used a hardcoded socket rather +than a configurable one. If a DPDK pod is requesting +multiple net devices and these devices are from +different pools, then the AF_XDP PMD attempts to +mount all the netdev UDSes in the pod as /tmp/afxdp.sock. +Which means that at best only 1 netdev will handshake +correctly with the AF_XDP DP. This patch addresses +this by making the socket parameter configurable using +a new vdev param called 'dp_path' alongside the +original 'use_cni' param. If the 'dp_path' parameter +is not set alongside the 'use_cni' parameter, then +it's configured inside the AF_XDP PMD (transparently +to the user). This change has been tested +with the AF_XDP DP PR 81[1], with both single and +multiple interfaces. + +[1] https://github.com/intel/afxdp-plugins-for-kubernetes/pull/81 + +Fixes: 7fc6ae50369d ("net/af_xdp: support CNI Integration") +Cc: stable@dpdk.org + +Signed-off-by: Maryam Tahhan +Acked-by: Ciara Loftus +--- + drivers/net/af_xdp/compat.h | 15 ++++ + drivers/net/af_xdp/meson.build | 4 ++ + drivers/net/af_xdp/rte_eth_af_xdp.c | 97 ++++++++++++++++---------- + 3 files changed, 141 insertions(+), 55 deletions(-) + +diff --git a/drivers/net/af_xdp/compat.h b/drivers/net/af_xdp/compat.h +index 28ea64aeaa..3b5a5c1ed5 100644 +--- a/drivers/net/af_xdp/compat.h ++++ b/drivers/net/af_xdp/compat.h +@@ -46,6 +46,21 @@ create_shared_socket(struct xsk_socket **xsk_ptr __rte_unused, + } + #endif + ++#ifdef ETH_AF_XDP_UPDATE_XSKMAP ++static __rte_always_inline int ++update_xskmap(struct xsk_socket *xsk, int map_fd, int xsk_queue_idx __rte_unused) ++{ ++ return xsk_socket__update_xskmap(xsk, map_fd); ++} ++#else ++static __rte_always_inline int ++update_xskmap(struct xsk_socket *xsk, int map_fd, int xsk_queue_idx) ++{ ++ int fd = xsk_socket__fd(xsk); ++ return bpf_map_update_elem(map_fd, &xsk_queue_idx, &fd, 0); ++} ++#endif ++ + #ifdef XDP_USE_NEED_WAKEUP + static int + tx_syscall_needed(struct xsk_ring_prod *q) +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 9f33e57fa2..280bfa8f80 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -77,6 +77,10 @@ if build + dependencies : bpf_dep, args: cflags) + cflags += ['-DRTE_NET_AF_XDP_LIBBPF_XDP_ATTACH'] + endif ++ if cc.has_function('xsk_socket__update_xskmap', prefix : xsk_check_prefix, ++ dependencies : ext_deps, args: cflags) ++ cflags += ['-DETH_AF_XDP_UPDATE_XSKMAP'] ++ endif + endif + + require_iova_in_mbuf = false +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 6ba455bb9b..dcd590569e 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -83,12 +83,13 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE); + + #define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds" + ++#define DP_BASE_PATH "/tmp/afxdp_dp" ++#define DP_UDS_SOCK "afxdp.sock" + #define MAX_LONG_OPT_SZ 64 + #define UDS_MAX_FD_NUM 2 + #define UDS_MAX_CMD_LEN 64 + #define UDS_MAX_CMD_RESP 128 + #define UDS_XSK_MAP_FD_MSG "/xsk_map_fd" +-#define UDS_SOCK "/tmp/afxdp.sock" + #define UDS_CONNECT_MSG "/connect" + #define UDS_HOST_OK_MSG "/host_ok" + #define UDS_HOST_NAK_MSG "/host_nak" +@@ -171,6 +172,7 @@ struct pmd_internals { + bool custom_prog_configured; + bool force_copy; + bool use_cni; ++ char dp_path[PATH_MAX]; + struct bpf_map *map; + + struct rte_ether_addr eth_addr; +@@ -191,6 +193,7 @@ struct pmd_process_private { + #define ETH_AF_XDP_BUDGET_ARG "busy_budget" + #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy" + #define ETH_AF_XDP_USE_CNI_ARG "use_cni" ++#define ETH_AF_XDP_DP_PATH_ARG "dp_path" + + static const char * const valid_arguments[] = { + ETH_AF_XDP_IFACE_ARG, +@@ -201,6 +204,7 @@ static const char * const valid_arguments[] = { + ETH_AF_XDP_BUDGET_ARG, + ETH_AF_XDP_FORCE_COPY_ARG, + ETH_AF_XDP_USE_CNI_ARG, ++ ETH_AF_XDP_DP_PATH_ARG, + NULL + }; + +@@ -1352,7 +1356,7 @@ configure_preferred_busy_poll(struct pkt_rx_queue *rxq) + } + + static int +-init_uds_sock(struct sockaddr_un *server) ++init_uds_sock(struct sockaddr_un *server, const char *dp_path) + { + int sock; + +@@ -1363,7 +1367,7 @@ init_uds_sock(struct sockaddr_un *server) + } + + server->sun_family = AF_UNIX; +- strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path)); ++ strlcpy(server->sun_path, dp_path, sizeof(server->sun_path)); + + if (connect(sock, (struct sockaddr *)server, sizeof(struct sockaddr_un)) < 0) { + close(sock); +@@ -1383,7 +1387,7 @@ struct msg_internal { + }; + + static int +-send_msg(int sock, char *request, int *fd) ++send_msg(int sock, char *request, int *fd, const char *dp_path) + { + int snd; + struct iovec iov; +@@ -1394,7 +1398,7 @@ send_msg(int sock, char *request, int *fd) + + memset(&dst, 0, sizeof(dst)); + dst.sun_family = AF_UNIX; +- strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path)); ++ strlcpy(dst.sun_path, dp_path, sizeof(dst.sun_path)); + + /* Initialize message header structure */ + memset(&msgh, 0, sizeof(msgh)); +@@ -1471,8 +1475,8 @@ read_msg(int sock, char *response, struct sockaddr_un *s, int *fd) + } + + static int +-make_request_cni(int sock, struct sockaddr_un *server, char *request, +- int *req_fd, char *response, int *out_fd) ++make_request_dp(int sock, struct sockaddr_un *server, char *request, ++ int *req_fd, char *response, int *out_fd, const char *dp_path) + { + int rval; + +@@ -1484,7 +1488,7 @@ make_request_cni(int sock, struct sockaddr_un *server, char *request, + if (req_fd == NULL) + rval = write(sock, request, strlen(request)); + else +- rval = send_msg(sock, request, req_fd); ++ rval = send_msg(sock, request, req_fd, dp_path); + + if (rval < 0) { + AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); +@@ -1508,7 +1512,7 @@ check_response(char *response, char *exp_resp, long size) + } + + static int +-get_cni_fd(char *if_name) ++uds_get_xskmap_fd(char *if_name, const char *dp_path) + { + char request[UDS_MAX_CMD_LEN], response[UDS_MAX_CMD_RESP]; + char hostname[MAX_LONG_OPT_SZ], exp_resp[UDS_MAX_CMD_RESP]; +@@ -1521,14 +1525,14 @@ get_cni_fd(char *if_name) + return -1; + + memset(&server, 0, sizeof(server)); +- sock = init_uds_sock(&server); ++ sock = init_uds_sock(&server, dp_path); + if (sock < 0) + return -1; + +- /* Initiates handshake to CNI send: /connect,hostname */ ++ /* Initiates handshake to the AF_XDP Device Plugin send: /connect,hostname */ + snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname); + memset(response, 0, sizeof(response)); +- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { + AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); + goto err_close; + } +@@ -1542,7 +1546,7 @@ get_cni_fd(char *if_name) + /* Request for "/version" */ + strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN); + memset(response, 0, sizeof(response)); +- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { + AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); + goto err_close; + } +@@ -1550,7 +1554,7 @@ get_cni_fd(char *if_name) + /* Request for file descriptor for netdev name*/ + snprintf(request, sizeof(request), "%s,%s", UDS_XSK_MAP_FD_MSG, if_name); + memset(response, 0, sizeof(response)); +- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { + AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); + goto err_close; + } +@@ -1572,7 +1576,7 @@ get_cni_fd(char *if_name) + /* Initiate close connection */ + strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN); + memset(response, 0, sizeof(response)); +- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) { ++ if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) { + AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request); + goto err_close; + } +@@ -1697,21 +1701,21 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + } + + if (internals->use_cni) { +- int err, fd, map_fd; ++ int err, map_fd; + +- /* get socket fd from CNI plugin */ +- map_fd = get_cni_fd(internals->if_name); ++ /* get socket fd from AF_XDP Device Plugin */ ++ map_fd = uds_get_xskmap_fd(internals->if_name, internals->dp_path); + if (map_fd < 0) { +- AF_XDP_LOG(ERR, "Failed to receive CNI plugin fd\n"); ++ AF_XDP_LOG(ERR, "Failed to receive xskmap fd from AF_XDP Device Plugin\n"); + goto out_xsk; + } +- /* get socket fd */ +- fd = xsk_socket__fd(rxq->xsk); +- err = bpf_map_update_elem(map_fd, &rxq->xsk_queue_idx, &fd, 0); ++ ++ err = update_xskmap(rxq->xsk, map_fd, rxq->xsk_queue_idx); + if (err) { +- AF_XDP_LOG(ERR, "Failed to insert unprivileged xsk in map.\n"); ++ AF_XDP_LOG(ERR, "Failed to insert xsk in map.\n"); + goto out_xsk; + } ++ + } else if (rxq->busy_budget) { + ret = configure_preferred_busy_poll(rxq); + if (ret) { +@@ -1883,13 +1887,13 @@ static const struct eth_dev_ops ops = { + .get_monitor_addr = eth_get_monitor_addr, + }; + +-/* CNI option works in unprivileged container environment +- * and ethernet device functionality will be reduced. So +- * additional customiszed eth_dev_ops struct is needed +- * for cni. Promiscuous enable and disable functionality +- * is removed. ++/* AF_XDP Device Plugin option works in unprivileged ++ * container environments and ethernet device functionality ++ * will be reduced. So additional customised eth_dev_ops ++ * struct is needed for the Device Plugin. Promiscuous ++ * enable and disable functionality is removed. + **/ +-static const struct eth_dev_ops ops_cni = { ++static const struct eth_dev_ops ops_afxdp_dp = { + .dev_start = eth_dev_start, + .dev_stop = eth_dev_stop, + .dev_close = eth_dev_close, +@@ -2025,7 +2029,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues, + static int + parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + int *queue_cnt, int *shared_umem, char *prog_path, +- int *busy_budget, int *force_copy, int *use_cni) ++ int *busy_budget, int *force_copy, int *use_cni, ++ char *dp_path) + { + int ret; + +@@ -2071,6 +2076,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + if (ret < 0) + goto free_kvlist; + ++ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_DP_PATH_ARG, ++ &parse_prog_arg, dp_path); ++ if (ret < 0) ++ goto free_kvlist; ++ + free_kvlist: + rte_kvargs_free(kvlist); + return ret; +@@ -2110,7 +2120,7 @@ static struct rte_eth_dev * + init_internals(struct rte_vdev_device *dev, const char *if_name, + int start_queue_idx, int queue_cnt, int shared_umem, + const char *prog_path, int busy_budget, int force_copy, +- int use_cni) ++ int use_cni, const char *dp_path) + { + const char *name = rte_vdev_device_name(dev); + const unsigned int numa_node = dev->device.numa_node; +@@ -2140,6 +2150,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + internals->shared_umem = shared_umem; + internals->force_copy = force_copy; + internals->use_cni = use_cni; ++ strlcpy(internals->dp_path, dp_path, PATH_MAX); + + if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, + &internals->combined_queue_cnt)) { +@@ -2201,7 +2212,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + if (!internals->use_cni) + eth_dev->dev_ops = &ops; + else +- eth_dev->dev_ops = &ops_cni; ++ eth_dev->dev_ops = &ops_afxdp_dp; + + eth_dev->rx_pkt_burst = eth_af_xdp_rx; + eth_dev->tx_pkt_burst = eth_af_xdp_tx; +@@ -2330,6 +2341,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + int busy_budget = -1, ret; + int force_copy = 0; + int use_cni = 0; ++ char dp_path[PATH_MAX] = {'\0'}; + struct rte_eth_dev *eth_dev = NULL; + const char *name = rte_vdev_device_name(dev); + +@@ -2372,7 +2384,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt, &shared_umem, prog_path, +- &busy_budget, &force_copy, &use_cni) < 0) { ++ &busy_budget, &force_copy, &use_cni, dp_path) < 0) { + AF_XDP_LOG(ERR, "Invalid kvargs value\n"); + return -EINVAL; + } +@@ -2386,7 +2398,19 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + if (use_cni && strnlen(prog_path, PATH_MAX)) { + AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n", + ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_PROG_ARG); +- return -EINVAL; ++ return -EINVAL; ++ } ++ ++ if (use_cni && !strnlen(dp_path, PATH_MAX)) { ++ snprintf(dp_path, sizeof(dp_path), "%s/%s/%s", DP_BASE_PATH, if_name, DP_UDS_SOCK); ++ AF_XDP_LOG(INFO, "'%s' parameter not provided, setting value to '%s'\n", ++ ETH_AF_XDP_DP_PATH_ARG, dp_path); ++ } ++ ++ if (!use_cni && strnlen(dp_path, PATH_MAX)) { ++ AF_XDP_LOG(ERR, "'%s' parameter is set, but '%s' was not enabled\n", ++ ETH_AF_XDP_DP_PATH_ARG, ETH_AF_XDP_USE_CNI_ARG); ++ return -EINVAL; + } + + if (strlen(if_name) == 0) { +@@ -2412,7 +2436,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, + xsk_queue_cnt, shared_umem, prog_path, +- busy_budget, force_copy, use_cni); ++ busy_budget, force_copy, use_cni, dp_path); + if (eth_dev == NULL) { + AF_XDP_LOG(ERR, "Failed to init internals\n"); + return -1; +@@ -2473,4 +2497,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, + "xdp_prog= " + "busy_budget= " + "force_copy= " +- "use_cni= "); ++ "use_cni= " ++ "dp_path= "); +-- +2.33.0 + diff --git a/0470-net-af_xdp-support-AF_XDP-device-plugin-pinned-maps.patch b/0470-net-af_xdp-support-AF_XDP-device-plugin-pinned-maps.patch new file mode 100644 index 0000000..80ec520 --- /dev/null +++ b/0470-net-af_xdp-support-AF_XDP-device-plugin-pinned-maps.patch @@ -0,0 +1,246 @@ +From 8a324b1c6464a1bc7eb36b8473b3f6286ba00f62 Mon Sep 17 00:00:00 2001 +From: Maryam Tahhan +Date: Mon, 8 Apr 2024 09:09:22 -0400 +Subject: [PATCH] net/af_xdp: support AF_XDP device plugin pinned maps + +[ upstream commit 8a324b1c6464a1bc7eb36b8473b3f6286ba00f62 ] + +Enable the AF_XDP PMD to retrieve the xskmap +from a pinned eBPF map. This map is expected +to be pinned by an external entity like the +AF_XDP Device Plugin. This enabled unprivileged +pods to create and use AF_XDP sockets. + +Signed-off-by: Maryam Tahhan +Acked-by: Ciara Loftus +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 93 ++++++++++++++++++++------ + 1 files changed, 142 insertions(+), 38 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index dcd590569e..2d6f64337e 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -85,6 +85,7 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE); + + #define DP_BASE_PATH "/tmp/afxdp_dp" + #define DP_UDS_SOCK "afxdp.sock" ++#define DP_XSK_MAP "xsks_map" + #define MAX_LONG_OPT_SZ 64 + #define UDS_MAX_FD_NUM 2 + #define UDS_MAX_CMD_LEN 64 +@@ -172,6 +173,7 @@ struct pmd_internals { + bool custom_prog_configured; + bool force_copy; + bool use_cni; ++ bool use_pinned_map; + char dp_path[PATH_MAX]; + struct bpf_map *map; + +@@ -193,6 +195,7 @@ struct pmd_process_private { + #define ETH_AF_XDP_BUDGET_ARG "busy_budget" + #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy" + #define ETH_AF_XDP_USE_CNI_ARG "use_cni" ++#define ETH_AF_XDP_USE_PINNED_MAP_ARG "use_pinned_map" + #define ETH_AF_XDP_DP_PATH_ARG "dp_path" + + static const char * const valid_arguments[] = { +@@ -204,6 +207,7 @@ static const char * const valid_arguments[] = { + ETH_AF_XDP_BUDGET_ARG, + ETH_AF_XDP_FORCE_COPY_ARG, + ETH_AF_XDP_USE_CNI_ARG, ++ ETH_AF_XDP_USE_PINNED_MAP_ARG, + ETH_AF_XDP_DP_PATH_ARG, + NULL + }; +@@ -1259,6 +1263,21 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + } + #endif + ++static int ++get_pinned_map(const char *dp_path, int *map_fd) ++{ ++ *map_fd = bpf_obj_get(dp_path); ++ if (!*map_fd) { ++ AF_XDP_LOG(ERR, "Failed to find xsks_map in %s\n", dp_path); ++ return -1; ++ } ++ ++ AF_XDP_LOG(INFO, "Successfully retrieved map %s with fd %d\n", ++ dp_path, *map_fd); ++ ++ return 0; ++} ++ + static int + load_custom_xdp_prog(const char *prog_path, int if_index, struct bpf_map **map) + { +@@ -1646,7 +1665,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + #endif + + /* Disable libbpf from loading XDP program */ +- if (internals->use_cni) ++ if (internals->use_cni || internals->use_pinned_map) + cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; + + if (strnlen(internals->prog_path, PATH_MAX)) { +@@ -1700,14 +1719,23 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + } + } + +- if (internals->use_cni) { ++ if (internals->use_cni || internals->use_pinned_map) { + int err, map_fd; + +- /* get socket fd from AF_XDP Device Plugin */ +- map_fd = uds_get_xskmap_fd(internals->if_name, internals->dp_path); +- if (map_fd < 0) { +- AF_XDP_LOG(ERR, "Failed to receive xskmap fd from AF_XDP Device Plugin\n"); +- goto out_xsk; ++ if (internals->use_cni) { ++ /* get socket fd from AF_XDP Device Plugin */ ++ map_fd = uds_get_xskmap_fd(internals->if_name, internals->dp_path); ++ if (map_fd < 0) { ++ AF_XDP_LOG(ERR, "Failed to receive xskmap fd from AF_XDP Device Plugin\n"); ++ goto out_xsk; ++ } ++ } else { ++ /* get socket fd from AF_XDP plugin */ ++ err = get_pinned_map(internals->dp_path, &map_fd); ++ if (err < 0 || map_fd < 0) { ++ AF_XDP_LOG(ERR, "Failed to retrieve pinned map fd\n"); ++ goto out_xsk; ++ } + } + + err = update_xskmap(rxq->xsk, map_fd, rxq->xsk_queue_idx); +@@ -2030,7 +2058,7 @@ static int + parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + int *queue_cnt, int *shared_umem, char *prog_path, + int *busy_budget, int *force_copy, int *use_cni, +- char *dp_path) ++ int *use_pinned_map, char *dp_path) + { + int ret; + +@@ -2076,6 +2104,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + if (ret < 0) + goto free_kvlist; + ++ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_USE_PINNED_MAP_ARG, ++ &parse_integer_arg, use_pinned_map); ++ if (ret < 0) ++ goto free_kvlist; ++ + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_DP_PATH_ARG, + &parse_prog_arg, dp_path); + if (ret < 0) +@@ -2120,7 +2153,7 @@ static struct rte_eth_dev * + init_internals(struct rte_vdev_device *dev, const char *if_name, + int start_queue_idx, int queue_cnt, int shared_umem, + const char *prog_path, int busy_budget, int force_copy, +- int use_cni, const char *dp_path) ++ int use_cni, int use_pinned_map, const char *dp_path) + { + const char *name = rte_vdev_device_name(dev); + const unsigned int numa_node = dev->device.numa_node; +@@ -2150,6 +2183,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + internals->shared_umem = shared_umem; + internals->force_copy = force_copy; + internals->use_cni = use_cni; ++ internals->use_pinned_map = use_pinned_map; + strlcpy(internals->dp_path, dp_path, PATH_MAX); + + if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, +@@ -2209,7 +2243,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, + eth_dev->data->dev_link = pmd_link; + eth_dev->data->mac_addrs = &internals->eth_addr; + eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; +- if (!internals->use_cni) ++ if (!internals->use_cni && !internals->use_pinned_map) + eth_dev->dev_ops = &ops; + else + eth_dev->dev_ops = &ops_afxdp_dp; +@@ -2341,6 +2375,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + int busy_budget = -1, ret; + int force_copy = 0; + int use_cni = 0; ++ int use_pinned_map = 0; + char dp_path[PATH_MAX] = {'\0'}; + struct rte_eth_dev *eth_dev = NULL; + const char *name = rte_vdev_device_name(dev); +@@ -2384,20 +2419,29 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt, &shared_umem, prog_path, +- &busy_budget, &force_copy, &use_cni, dp_path) < 0) { ++ &busy_budget, &force_copy, &use_cni, &use_pinned_map, ++ dp_path) < 0) { + AF_XDP_LOG(ERR, "Invalid kvargs value\n"); + return -EINVAL; + } + +- if (use_cni && busy_budget > 0) { ++ if (use_cni && use_pinned_map) { + AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n", +- ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_BUDGET_ARG); ++ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_USE_PINNED_MAP_ARG); + return -EINVAL; + } + +- if (use_cni && strnlen(prog_path, PATH_MAX)) { +- AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' parameter is not valid\n", +- ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_PROG_ARG); ++ if ((use_cni || use_pinned_map) && busy_budget > 0) { ++ AF_XDP_LOG(ERR, "When '%s' or '%s' parameter is used, '%s' parameter is not valid\n", ++ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_USE_PINNED_MAP_ARG, ++ ETH_AF_XDP_BUDGET_ARG); ++ return -EINVAL; ++ } ++ ++ if ((use_cni || use_pinned_map) && strnlen(prog_path, PATH_MAX)) { ++ AF_XDP_LOG(ERR, "When '%s' or '%s' parameter is used, '%s' parameter is not valid\n", ++ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_USE_PINNED_MAP_ARG, ++ ETH_AF_XDP_PROG_ARG); + return -EINVAL; + } + +@@ -2407,9 +2451,16 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + ETH_AF_XDP_DP_PATH_ARG, dp_path); + } + +- if (!use_cni && strnlen(dp_path, PATH_MAX)) { +- AF_XDP_LOG(ERR, "'%s' parameter is set, but '%s' was not enabled\n", +- ETH_AF_XDP_DP_PATH_ARG, ETH_AF_XDP_USE_CNI_ARG); ++ if (use_pinned_map && !strnlen(dp_path, PATH_MAX)) { ++ snprintf(dp_path, sizeof(dp_path), "%s/%s/%s", DP_BASE_PATH, if_name, DP_XSK_MAP); ++ AF_XDP_LOG(INFO, "'%s' parameter not provided, setting value to '%s'\n", ++ ETH_AF_XDP_DP_PATH_ARG, dp_path); ++ } ++ ++ if ((!use_cni && !use_pinned_map) && strnlen(dp_path, PATH_MAX)) { ++ AF_XDP_LOG(ERR, "'%s' parameter is set, but '%s' or '%s' were not enabled\n", ++ ETH_AF_XDP_DP_PATH_ARG, ETH_AF_XDP_USE_CNI_ARG, ++ ETH_AF_XDP_USE_PINNED_MAP_ARG); + return -EINVAL; + } + +@@ -2436,7 +2487,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, + xsk_queue_cnt, shared_umem, prog_path, +- busy_budget, force_copy, use_cni, dp_path); ++ busy_budget, force_copy, use_cni, use_pinned_map, ++ dp_path); + if (eth_dev == NULL) { + AF_XDP_LOG(ERR, "Failed to init internals\n"); + return -1; +@@ -2498,4 +2550,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, + "busy_budget= " + "force_copy= " + "use_cni= " ++ "use_pinned_map= " + "dp_path= "); +-- +2.33.0 + diff --git a/0471-net-af_xdp-fix-port-ID-in-Rx-mbuf.patch b/0471-net-af_xdp-fix-port-ID-in-Rx-mbuf.patch new file mode 100644 index 0000000..9e1cdac --- /dev/null +++ b/0471-net-af_xdp-fix-port-ID-in-Rx-mbuf.patch @@ -0,0 +1,62 @@ +From 9bab1d2667aec4942ae1d384671e5148960bd88f Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Tue, 14 May 2024 08:41:52 +0000 +Subject: [PATCH] net/af_xdp: fix port ID in Rx mbuf + +[ upstream commit 9bab1d2667aec4942ae1d384671e5148960bd88f ] + +Record the port id in the af_xdp rx queue structure and use it +to set the port id of the mbuf of a received packed. + +Bugzilla ID: 1428 +Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") +Cc: stable@dpdk.org + +Reported-by: Stephen Hemminger +Signed-off-by: Ciara Loftus +Acked-by: Maryam Tahhan +Acked-by: Stephen Hemminger +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 2d6f64337e..0997543ee5 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -133,6 +133,7 @@ struct pkt_rx_queue { + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + struct rte_mempool *mb_pool; ++ uint16_t port; + + struct rx_stats stats; + +@@ -368,6 +369,7 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + bufs[i]->data_off = offset - sizeof(struct rte_mbuf) - + rte_pktmbuf_priv_size(umem->mb_pool) - + umem->mb_pool->header_size; ++ bufs[i]->port = rxq->port; + + rte_pktmbuf_pkt_len(bufs[i]) = len; + rte_pktmbuf_data_len(bufs[i]) = len; +@@ -434,6 +436,7 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + rte_pktmbuf_data_len(mbufs[i]) = len; + rx_bytes += len; + bufs[i] = mbufs[i]; ++ bufs[i]->port = rxq->port; + } + + xsk_ring_cons__release(rx, nb_pkts); +@@ -1813,6 +1816,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, + + process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd; + ++ rxq->port = dev->data->port_id; ++ + dev->data->rx_queues[rx_queue_id] = rxq; + return 0; + +-- +2.33.0 + diff --git a/0472-net-af_xdp-count-mbuf-allocation-failures.patch b/0472-net-af_xdp-count-mbuf-allocation-failures.patch new file mode 100644 index 0000000..bb2e820 --- /dev/null +++ b/0472-net-af_xdp-count-mbuf-allocation-failures.patch @@ -0,0 +1,61 @@ +From f294405ab98594aa41269507dde95cc89bb20a61 Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Tue, 14 May 2024 08:41:53 +0000 +Subject: [PATCH] net/af_xdp: count mbuf allocation failures + +[ upstream commit f294405ab98594aa41269507dde95cc89bb20a61 ] + +Failures to allocate mbufs in the receive path were not being +accounted for in the ethdev statistics. Fix this. + +Bugzilla ID: 1429 +Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") +Cc: stable@dpdk.org + +Reported-by: Stephen Hemminger +Signed-off-by: Ciara Loftus +Acked-by: Stephen Hemminger +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 0997543ee5..0db761a204 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -320,6 +320,7 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + unsigned long rx_bytes = 0; + int i; + struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE]; ++ struct rte_eth_dev *dev = &rte_eth_devices[rxq->port]; + + nb_pkts = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); + +@@ -347,6 +348,8 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + * xsk_ring_cons__peek + */ + rx->cached_cons -= nb_pkts; ++ dev->data->rx_mbuf_alloc_failed += nb_pkts; ++ + return 0; + } + +@@ -398,6 +401,7 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + int i; + uint32_t free_thresh = fq->size >> 1; + struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; ++ struct rte_eth_dev *dev = &rte_eth_devices[rxq->port]; + + if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh) + (void)reserve_fill_queue(umem, nb_pkts, NULL, fq); +@@ -416,6 +420,7 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + * xsk_ring_cons__peek + */ + rx->cached_cons -= nb_pkts; ++ dev->data->rx_mbuf_alloc_failed += nb_pkts; + return 0; + } + +-- +2.33.0 + diff --git a/0473-net-af_xdp-fix-stats-reset.patch b/0473-net-af_xdp-fix-stats-reset.patch new file mode 100644 index 0000000..6899866 --- /dev/null +++ b/0473-net-af_xdp-fix-stats-reset.patch @@ -0,0 +1,75 @@ +From 3577b1ee97dcfe9ff1b9e050bed55f17c435a93d Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Tue, 14 May 2024 08:41:54 +0000 +Subject: [PATCH] net/af_xdp: fix stats reset + +[ upstream commit 3577b1ee97dcfe9ff1b9e050bed55f17c435a93d ] + +The imissed statistic was not properly reset because it was +read directly from the kernel statistics. To fix this, take note +of the kernel statistic when the stats are reset and deduct this +value from the kernel statistic read during statistics get. + +Bugzilla ID: 1430 +Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") +Cc: stable@dpdk.org + +Reported-by: Stephen Hemminger +Signed-off-by: Ciara Loftus +Acked-by: Maryam Tahhan +Acked-by: Stephen Hemminger +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 0db761a204..25e67851f6 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -126,6 +126,7 @@ struct rx_stats { + uint64_t rx_pkts; + uint64_t rx_bytes; + uint64_t rx_dropped; ++ uint64_t imissed_offset; + }; + + struct pkt_rx_queue { +@@ -892,7 +893,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n"); + return -1; + } +- stats->imissed += xdp_stats.rx_dropped; ++ stats->imissed += xdp_stats.rx_dropped - rxq->stats.imissed_offset; + + stats->opackets += stats->q_opackets[i]; + stats->obytes += stats->q_obytes[i]; +@@ -905,13 +906,25 @@ static int + eth_stats_reset(struct rte_eth_dev *dev) + { + struct pmd_internals *internals = dev->data->dev_private; +- int i; ++ struct pmd_process_private *process_private = dev->process_private; ++ struct xdp_statistics xdp_stats; ++ socklen_t optlen; ++ int i, ret, fd; + + for (i = 0; i < internals->queue_cnt; i++) { + memset(&internals->rx_queues[i].stats, 0, + sizeof(struct rx_stats)); + memset(&internals->tx_queues[i].stats, 0, + sizeof(struct tx_stats)); ++ fd = process_private->rxq_xsk_fds[i]; ++ optlen = sizeof(struct xdp_statistics); ++ ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS, ++ &xdp_stats, &optlen) : -1; ++ if (ret != 0) { ++ AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n"); ++ return -1; ++ } ++ internals->rx_queues[i].stats.imissed_offset = xdp_stats.rx_dropped; + } + + return 0; +-- +2.33.0 + diff --git a/0474-net-af_xdp-remove-unused-local-statistic.patch b/0474-net-af_xdp-remove-unused-local-statistic.patch new file mode 100644 index 0000000..1af5cad --- /dev/null +++ b/0474-net-af_xdp-remove-unused-local-statistic.patch @@ -0,0 +1,43 @@ +From abdabad636a54660f4a260e1e9dc573900582bd4 Mon Sep 17 00:00:00 2001 +From: Ciara Loftus +Date: Tue, 14 May 2024 08:41:55 +0000 +Subject: [PATCH] net/af_xdp: remove unused local statistic + +[ upstream commit abdabad636a54660f4a260e1e9dc573900582bd4 ] + +The rx_dropped statistic is never incremented so its existence +is pointless. Remove it. + +Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD") +Cc: stable@dpdk.org + +Reported-by: Stephen Hemminger +Signed-off-by: Ciara Loftus +Acked-by: Stephen Hemminger +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 25e67851f6..4b282adb03 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -125,7 +125,6 @@ struct xsk_umem_info { + struct rx_stats { + uint64_t rx_pkts; + uint64_t rx_bytes; +- uint64_t rx_dropped; + uint64_t imissed_offset; + }; + +@@ -884,7 +883,6 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + + stats->ipackets += stats->q_ipackets[i]; + stats->ibytes += stats->q_ibytes[i]; +- stats->imissed += rxq->stats.rx_dropped; + stats->oerrors += txq->stats.tx_dropped; + fd = process_private->rxq_xsk_fds[i]; + ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS, +-- +2.33.0 + diff --git a/0475-bpf-disable-on-32-bit-x86.patch b/0475-bpf-disable-on-32-bit-x86.patch new file mode 100644 index 0000000..8a157b2 --- /dev/null +++ b/0475-bpf-disable-on-32-bit-x86.patch @@ -0,0 +1,56 @@ +From 4edbcc7b5313949083f7694847342b1a45659d6b Mon Sep 17 00:00:00 2001 +From: Luca Boccassi +Date: Thu, 25 Apr 2024 16:05:58 +0100 +Subject: [PATCH] bpf: disable on 32-bit x86 + +[ upstream commit 4edbcc7b5313949083f7694847342b1a45659d6b ] + +As per Intel, this is not supported, and the librte-bpf test fails on +32bit x86 kernels, so disable the library and the pmd. + +Cc: stable@dpdk.org + +Signed-off-by: Luca Boccassi +Acked-by: Stephen Hemminger +--- + drivers/net/af_xdp/meson.build | 6 ++++++ + lib/bpf/meson.build | 6 ++++++ + 2 files changed, 17 insertions(+) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 280bfa8f80..69d109ff46 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -7,6 +7,12 @@ if is_windows + subdir_done() + endif + ++if arch_subdir == 'x86' and dpdk_conf.get('RTE_ARCH_32') ++ build = false ++ reason = 'not supported on 32-bit x86' ++ subdir_done() ++endif ++ + sources = files('rte_eth_af_xdp.c') + + libxdp_ver = '>=1.2.2' +diff --git a/lib/bpf/meson.build b/lib/bpf/meson.build +index cd739bb827..aa258a9061 100644 +--- a/lib/bpf/meson.build ++++ b/lib/bpf/meson.build +@@ -7,6 +7,12 @@ if is_windows + subdir_done() + endif + ++if arch_subdir == 'x86' and dpdk_conf.get('RTE_ARCH_32') ++ build = false ++ reason = 'not supported on 32-bit x86' ++ subdir_done() ++endif ++ + sources = files('bpf.c', + 'bpf_dump.c', + 'bpf_exec.c', +-- +2.33.0 + diff --git a/0476-net-af_xdp-parse-UMEM-map-info-from-mempool.patch b/0476-net-af_xdp-parse-UMEM-map-info-from-mempool.patch new file mode 100644 index 0000000..8309eed --- /dev/null +++ b/0476-net-af_xdp-parse-UMEM-map-info-from-mempool.patch @@ -0,0 +1,104 @@ +From 97039941b217c7a5aa490099379ddc7f8665f3a4 Mon Sep 17 00:00:00 2001 +From: Frank Du +Date: Thu, 20 Jun 2024 11:25:23 +0800 +Subject: [PATCH] net/af_xdp: parse UMEM map info from mempool + +[ upstream commit 97039941b217c7a5aa490099379ddc7f8665f3a4 ] + +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current calculation assumes that the mbufs are contiguous. However, +this assumption is incorrect when the mbuf memory spans across huge +page. + +Correct to directly read with mempool get range API. + +Fixes: d8a210774e1d ("net/af_xdp: support unaligned umem chunks") +Cc: stable@dpdk.org + +Signed-off-by: Frank Du +Acked-by: Morten Brørup +Acked-by: Ferruh Yigit +--- + drivers/net/af_xdp/rte_eth_af_xdp.c | 42 ++++++++++++++--------------- + 1 file changed, 20 insertions(+), 22 deletions(-) + +diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c +index 4b282adb03..0bc0d9a55a 100644 +--- a/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -1067,19 +1067,6 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused, + } + + #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) +-static inline uintptr_t get_base_addr(struct rte_mempool *mp, uint64_t *align) +-{ +- struct rte_mempool_memhdr *memhdr; +- uintptr_t memhdr_addr, aligned_addr; +- +- memhdr = STAILQ_FIRST(&mp->mem_list); +- memhdr_addr = (uintptr_t)memhdr->addr; +- aligned_addr = memhdr_addr & ~(getpagesize() - 1); +- *align = memhdr_addr - aligned_addr; +- +- return aligned_addr; +-} +- + /* Check if the netdev,qid context already exists */ + static inline bool + ctx_exists(struct pkt_rx_queue *rxq, const char *ifname, +@@ -1150,9 +1137,10 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + .fill_size = ETH_AF_XDP_DFLT_NUM_DESCS * 2, + .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG}; +- void *base_addr = NULL; + struct rte_mempool *mb_pool = rxq->mb_pool; +- uint64_t umem_size, align = 0; ++ void *aligned_addr; ++ uint64_t umem_size; ++ struct rte_mempool_mem_range_info range; + + if (internals->shared_umem) { + if (get_shared_umem(rxq, internals->if_name, &umem) < 0) +@@ -1184,19 +1172,29 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + } + + umem->mb_pool = mb_pool; +- base_addr = (void *)get_base_addr(mb_pool, &align); +- umem_size = (uint64_t)mb_pool->populated_size * +- (uint64_t)usr_config.frame_size + +- align; +- +- ret = xsk_umem__create(&umem->umem, base_addr, umem_size, ++ ret = rte_mempool_get_mem_range(mb_pool, &range); ++ if (ret < 0) { ++ AF_XDP_LOG(ERR, "Failed(%d) to get range from mempool\n", ret); ++ goto err; ++ } ++ if (!range.is_contiguous) { ++ AF_XDP_LOG(ERR, "Can't mapped to umem as mempool is not contiguous\n"); ++ goto err; ++ } ++ /* ++ * umem requires the memory area be page aligned, safe to map with a large area as ++ * the memory pointer for each XSK TX/RX descriptor is derived from mbuf data area. ++ */ ++ aligned_addr = (void *)RTE_ALIGN_FLOOR((uintptr_t)range.start, getpagesize()); ++ umem_size = range.length + RTE_PTR_DIFF(range.start, aligned_addr); ++ ret = xsk_umem__create(&umem->umem, aligned_addr, umem_size, + &rxq->fq, &rxq->cq, &usr_config); + if (ret) { + AF_XDP_LOG(ERR, "Failed to create umem [%d]: [%s]\n", + errno, strerror(errno)); + goto err; + } +- umem->buffer = base_addr; ++ umem->buffer = aligned_addr; + + if (internals->shared_umem) { + umem->max_xsks = mb_pool->populated_size / +-- +2.33.0 + diff --git a/0477-ethdev-move-driver-interface-functions-to-its-own-fi.patch b/0477-ethdev-move-driver-interface-functions-to-its-own-fi.patch new file mode 100644 index 0000000..2e7b4b1 --- /dev/null +++ b/0477-ethdev-move-driver-interface-functions-to-its-own-fi.patch @@ -0,0 +1,199 @@ +From 226962a2920959977ecc1cf7ab17f250dea60b29 Mon Sep 17 00:00:00 2001 +From: Ferruh Yigit +Date: Fri, 11 Feb 2022 19:11:43 +0000 +Subject: [PATCH] ethdev: move driver interface functions to its own file + +[ upstream commit 4b4f810e47647f9deeacf04aa1e332b548f61461 ] + +ethdev has two interfaces, one interface between applications and +library, these APIs are declared in the rte_ethdev.h public header. +Other interface is between drivers and library, these functions are +declared in ethdev_driver.h and marked as internal. + +But all functions are defined in rte_ethdev.c file. This patch moves +functions for drivers to its own file, ethdev_driver.c for cleanup, no +functional change in functions. + +Some public APIs and driver helpers call common internal functions, +which were mostly static since both were in same file. To be able to +move driver helpers, common functions are moved to ethdev_private.c. +(ethdev_private.c is used for functions that are internal to the library +and shared by multiple .c files in the ethdev library.) + +Signed-off-by: Ferruh Yigit +Acked-by: Thomas Monjalon +--- + lib/ethdev/ethdev_driver.c | 11 +++++++++++ + lib/ethdev/ethdev_driver.h | 18 ++++++++++++++++++ + lib/ethdev/version.map | 1 + + lib/mempool/rte_mempool.c | 34 ++++++++++++++++++++++++++++++++++ + lib/mempool/rte_mempool.h | 33 +++++++++++++++++++++++++++++++++ + lib/mempool/version.map | 2 ++ + 6 files changed, 99 insertions(+) + +diff --git a/lib/ethdev/ethdev_driver.c b/lib/ethdev/ethdev_driver.c +index fb7323f..b2fa68d 100644 +--- a/lib/ethdev/ethdev_driver.c ++++ b/lib/ethdev/ethdev_driver.c +@@ -11,3 +11,14 @@ rte_eth_pkt_burst_dummy(void *queue __rte_unused, + { + return 0; + } ++ ++struct rte_eth_dev * ++rte_eth_dev_get_by_name(const char *name) ++{ ++ uint16_t pid; ++ ++ if (rte_eth_dev_get_port_by_name(name, &pid)) ++ return NULL; ++ ++ return &rte_eth_devices[pid]; ++} +\ No newline at end of file +diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h +index 6f539d4..e057348 100644 +--- a/lib/ethdev/ethdev_driver.h ++++ b/lib/ethdev/ethdev_driver.h +@@ -1726,6 +1726,24 @@ rte_eth_hairpin_queue_peer_bind(uint16_t cur_port, uint16_t cur_queue, + struct rte_hairpin_peer_info *peer_info, + uint32_t direction); + ++/** ++ * @internal ++ * Get rte_eth_dev from device name. The device name should be specified ++ * as below: ++ * - PCIe address (Domain:Bus:Device.Function), for example 0000:2:00.0 ++ * - SoC device name, for example fsl-gmac0 ++ * - vdev dpdk name, for example net_[pcap0|null0|tap0] ++ * ++ * @param name ++ * PCI address or name of the device ++ * @return ++ * - rte_eth_dev if successful ++ * - NULL on failure ++ */ ++__rte_internal ++struct rte_eth_dev* ++rte_eth_dev_get_by_name(const char *name); ++ + /** + * @internal + * Reset the current queue state and configuration to disconnect (unbind) it +diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map +index 44cbe04..40dca3d 100644 +--- a/lib/ethdev/version.map ++++ b/lib/ethdev/version.map +@@ -282,6 +282,7 @@ INTERNAL { + rte_eth_dev_callback_process; + rte_eth_dev_create; + rte_eth_dev_destroy; ++ rte_eth_dev_get_by_name; + rte_eth_dev_is_rx_hairpin_queue; + rte_eth_dev_is_tx_hairpin_queue; + rte_eth_dev_probing_finish; +diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c +index 871f4d1..14c4e34 100644 +--- a/lib/mempool/rte_mempool.c ++++ b/lib/mempool/rte_mempool.c +@@ -1383,6 +1383,40 @@ struct mempool_callback_data { + void *user_data; + }; + ++int rte_mempool_get_mem_range(const struct rte_mempool *mp, ++ struct rte_mempool_mem_range_info *mem_range) ++{ ++ void *address_low = (void *)UINTPTR_MAX; ++ void *address_high = 0; ++ size_t address_diff = 0; ++ size_t total_size = 0; ++ struct rte_mempool_memhdr *hdr; ++ ++ if (mp == NULL || mem_range == NULL) ++ return -EINVAL; ++ ++ /* go through memory chunks and find the lowest and highest addresses */ ++ STAILQ_FOREACH(hdr, &mp->mem_list, next) { ++ if (address_low > hdr->addr) ++ address_low = hdr->addr; ++ if (address_high < RTE_PTR_ADD(hdr->addr, hdr->len)) ++ address_high = RTE_PTR_ADD(hdr->addr, hdr->len); ++ total_size += hdr->len; ++ } ++ ++ /* check if mempool was not populated yet (no memory chunks) */ ++ if (address_low == (void *)UINTPTR_MAX) ++ return -EINVAL; ++ ++ address_diff = (size_t)RTE_PTR_DIFF(address_high, address_low); ++ ++ mem_range->start = address_low; ++ mem_range->length = address_diff; ++ mem_range->is_contiguous = (total_size == address_diff) ? true : false; ++ ++ return 0; ++} ++ + static void + mempool_event_callback_invoke(enum rte_mempool_event event, + struct rte_mempool *mp) +diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h +index 1e7a3c1..9933498 100644 +--- a/lib/mempool/rte_mempool.h ++++ b/lib/mempool/rte_mempool.h +@@ -1864,6 +1864,39 @@ int + rte_mempool_event_callback_register(rte_mempool_event_callback *func, + void *user_data); + ++/** ++ * A structure used to retrieve information about the memory range ++ * of the mempool. ++ */ ++struct rte_mempool_mem_range_info { ++ /** Start of the memory range used by mempool objects */ ++ void *start; ++ /** Length of the memory range used by mempool objects */ ++ size_t length; ++ /** Are all memory addresses used by mempool objects contiguous */ ++ bool is_contiguous; ++}; ++ ++/** ++ * @warning ++ * @b EXPERIMENTAL: this API may change without prior notice. ++ * ++ * Get information about the memory range used to store objects in the mempool. ++ * ++ * @param[in] mp ++ * Pointer to an initialized mempool. ++ * @param[out] mem_range ++ * Pointer to struct which is used to return lowest address, ++ * length of the memory range containing all the addresses, ++ * and whether these addresses are contiguous. ++ * @return ++ * 0 on success, -EINVAL if mempool is not valid or mem_range is NULL. ++ **/ ++__rte_experimental ++int ++rte_mempool_get_mem_range(const struct rte_mempool *mp, ++ struct rte_mempool_mem_range_info *mem_range); ++ + /** + * @internal + * Unregister a callback added with rte_mempool_event_callback_register(). +diff --git a/lib/mempool/version.map b/lib/mempool/version.map +index 1b7d7c5..b1240da 100644 +--- a/lib/mempool/version.map ++++ b/lib/mempool/version.map +@@ -63,6 +63,8 @@ EXPERIMENTAL { + __rte_mempool_trace_ops_alloc; + __rte_mempool_trace_ops_free; + __rte_mempool_trace_set_ops_byname; ++ # added in 24.07 ++ rte_mempool_get_mem_range; + }; + + INTERNAL { +-- +2.33.0 + diff --git a/0478-adapt-libbpf-0.8.0.patch b/0478-adapt-libbpf-0.8.0.patch new file mode 100644 index 0000000..dc2ea1c --- /dev/null +++ b/0478-adapt-libbpf-0.8.0.patch @@ -0,0 +1,26 @@ +From 1183699a4305850fd10c766f24adbe989a7dc39b Mon Sep 17 00:00:00 2001 +From: hkk +Date: Fri, 20 Sep 2024 16:18:05 +0800 +Subject: [PATCH] adapt libbpf 0.8.0 + +--- + drivers/net/af_xdp/meson.build | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build +index 69d109f..3759f8b 100644 +--- a/drivers/net/af_xdp/meson.build ++++ b/drivers/net/af_xdp/meson.build +@@ -34,8 +34,7 @@ if cc.has_header('linux/if_xdp.h') + endif + elif bpf_dep.found() and cc.has_header('bpf/xsk.h') and cc.has_header('bpf/bpf.h') + # libxdp not found. Rely solely on libbpf for xsk functionality +- # which is only available in versions <= v0.6.0. +- bpf_ver_dep = dependency('libbpf', version : '<=0.6.0', ++ bpf_ver_dep = dependency('libbpf', + required: false, method: 'pkg-config') + if bpf_ver_dep.found() + ext_deps += bpf_dep +-- +2.33.0 + diff --git a/dpdk.spec b/dpdk.spec index df9bfa9..2193c96 100644 --- a/dpdk.spec +++ b/dpdk.spec @@ -1,6 +1,6 @@ Name: dpdk Version: 21.11 -Release: 69 +Release: 70 Packager: packaging@6wind.com URL: http://dpdk.org %global source_version 21.11 @@ -467,8 +467,50 @@ Patch6429: 0430-app-testpmd-fix-crash-in-multi-process-forwarding.patch Patch6430: 0431-dma-hisilicon-remove-support-for-HIP09-platform.patch Patch6431: 0432-net-hns3-support-new-device.patch Patch6432: 0433-fix-mode4-with-dedicated-queues.patch - Patch6433: 0434-net-hns3-check-Rx-DMA-address-alignmnent.patch + +patch6434: 0434-net-af_xdp-fix-build-with-Wunused-function.patch +patch6435: 0435-net-af_xdp-use-libxdp-if-available.patch +patch6436: 0436-net-af_xdp-make-UMEM-configure-more-readable.patch +patch6437: 0437-net-af_xdp-re-enable-secondary-process-support.patch +patch6438: 0438-ethdev-introduce-generic-dummy-packet-burst-function.patch +patch6439: 0439-net-af_xdp-allow-probing-when-multiprocess-is-disabl.patch +patch6440: 0440-net-af_xdp-add-missing-trailing-newline-in-logs.patch +patch6441: 0441-net-af_xdp-make-compatible-with-libbpf-0.7.0.patch +patch6442: 0442-net-af_xdp-ensure-socket-is-deleted-on-Rx-queue-setu.patch +patch6443: 0443-net-af_xdp-reserve-fill-queue-before-socket-create.patch +patch6444: 0444-net-af_xdp-fix-custom-program-loading-with-multiple-.patch +patch6445: 0445-net-af_xdp-fix-shared-UMEM-fill-queue-reserve.patch +patch6446: 0446-net-af_xdp-allow-using-copy-mode-in-XSK.patch + +patch6450: 0450-net-af_xdp-move-XDP-library-presence-flag-setting.patch +patch6451: 0451-net-af_xdp-make-clear-which-libxdp-version-is-requir.patch +patch6452: 0452-net-af_xdp-avoid-version-based-check-for-shared-UMEM.patch +patch6453: 0453-net-af_xdp-avoid-version-based-check-for-program-loa.patch +patch6454: 0454-net-af_xdp-add-log-on-XDP-program-removal-failures.patch +patch6455: 0455-net-af_xdp-make-compatible-with-libbpf-0.8.0.patch +patch6456: 0456-drivers-mark-SW-PMDs-to-support-disabling-IOVA-as-PA.patch +patch6458: 0458-net-af_xdp-parse-NUMA-node-ID-from-sysfs.patch +patch6459: 0459-net-af_xdp-support-CNI-Integration.patch +patch6460: 0460-net-af_xdp-fix-socket-handler-validation.patch +patch6461: 0461-build-clarify-configuration-without-IOVA-field-in-mb.patch +patch6462: 0462-rework-atomic-intrinsics-fetch-operations.patch +patch6463: 0463-net-af_xdp-fix-missing-UMEM-feature.patch +patch6464: 0464-net-af_xdp-fix-Rx-and-Tx-queue-state.patch +patch6465: 0465-net-af_xdp-avoid-error-log-for-virtual-interfaces.patch +patch6466: 0466-net-af_xdp-fix-memzone-leak-on-config-failure.patch +patch6467: 0467-net-af_xdp-fix-leak-on-XSK-configuration-failure.patch + +patch6469: 0469-net-af_xdp-fix-multi-interface-support-for-k8s.patch +patch6470: 0470-net-af_xdp-support-AF_XDP-device-plugin-pinned-maps.patch +patch6471: 0471-net-af_xdp-fix-port-ID-in-Rx-mbuf.patch +patch6472: 0472-net-af_xdp-count-mbuf-allocation-failures.patch +patch6473: 0473-net-af_xdp-fix-stats-reset.patch +patch6474: 0474-net-af_xdp-remove-unused-local-statistic.patch +patch6475: 0475-bpf-disable-on-32-bit-x86.patch +patch6476: 0476-net-af_xdp-parse-UMEM-map-info-from-mempool.patch +patch6477: 0477-ethdev-move-driver-interface-functions-to-its-own-fi.patch +patch6478: 0478-adapt-libbpf-0.8.0.patch Summary: Data Plane Development Kit core Group: System Environment/Libraries @@ -484,6 +526,7 @@ BuildRequires: uname-build-checks BuildRequires: chrpath BuildRequires: groff-base BuildRequires: libibverbs +BuildRequires: libbpf-devel libbpf %define kern_devel_ver %(uname -r) %define arch_type %(uname -m) @@ -520,6 +563,7 @@ This package contains the pdump tool for capture the dpdk network packets. %build export CFLAGS="%{optflags}" +export CFLAGS+=" -Wno-error=deprecated-declarations" %ifarch sw_64 meson build -Dplatform=generic -Dexamples=l3fwd-power,ethtool,kni,dma,ptpclient %else @@ -625,6 +669,47 @@ strip -g $RPM_BUILD_ROOT/lib/modules/%{kern_devel_ver}/extra/dpdk/igb_uio.ko /usr/sbin/depmod %changelog +* Fri Sep 6 2024 hankangknag - 21.11-70 + Sync some patches from upstream about bugfix, modifies are as follow: +- net/af_xdp: parse UMEM map info from mempool +- bpf: disable on 32-bit x86 +- net/af_xdp: remove unused local statistic +- net/af_xdp: fix stats reset +- net/af_xdp: count mbuf allocation failures +- net/af_xdp: fix port ID in Rx mbuf +- net/af_xdp: support AF_XDP device plugin pinned maps +- net/af_xdp: fix multi-interface support for k8s +- net/af_xdp: fix leak on XSK configuration failure +- net/af_xdp: fix memzone leak on config failure +- net/af_xdp: avoid error log for virtual interfaces +- net/af_xdp: fix Rx and Tx queue state +- net/af_xdp: fix missing UMEM feature +- rework atomic intrinsics fetch operations +- build: clarify configuration without IOVA field in mbuf +- net/af_xdp: fix socket handler validation +- net/af_xdp: support CNI Integration +- net/af_xdp: parse NUMA node ID from sysfs +- drivers: mark SW PMDs to support disabling IOVA as PA +- net/af_xdp: make compatible with libbpf 0.8.0 +- net/af_xdp: add log on XDP program removal failures +- net/af_xdp: avoid version-based check for program load +- net/af_xdp: avoid version-based check for shared UMEM +- net/af_xdp: make clear which libxdp version is required +- net/af_xdp: move XDP library presence flag setting +- net/af_xdp: allow using copy mode in XSK +- net/af_xdp: fix shared UMEM fill queue reserve +- net/af_xdp: fix custom program loading with multiple queues +- net/af_xdp: reserve fill queue before socket create +- net/af_xdp: ensure socket is deleted on Rx queue setup error +- net/af_xdp: make compatible with libbpf >= 0.7.0 +- net/af_xdp: add missing trailing newline in logs +- net/af_xdp: allow probing when multiprocess is disabled +- ethdev: introduce generic dummy packet burst function +- net/af_xdp: re-enable secondary process support +- net/af_xdp: make UMEM configure more readable +- net/af_xdp: use libxdp if available +- net/af_xdp: fix build with -Wunused-function + * Tue Aug 13 2024 chenyi - 21.11-69 Sync some patches from upstream about bugfix, modifies are as follow: @@ -633,7 +718,7 @@ strip -g $RPM_BUILD_ROOT/lib/modules/%{kern_devel_ver}/extra/dpdk/igb_uio.ko * Thu Jul 11 2024 hankangknag - 21.11-68 Sync some patches from upstream about bugfix, modifies are as follow: - - mlx5: device should be started to set the flow + - mlx5: device should be started to set the flow * Mon Jun 3 2024 chenyi - 21.11-67 Sync some patches from upstream about bugfix, modifies @@ -930,11 +1015,11 @@ strip -g $RPM_BUILD_ROOT/lib/modules/%{kern_devel_ver}/extra/dpdk/igb_uio.ko - firmware version - RSS RETA - module eeprom information - - Rx/Tx burst mode + - Rx/Tx burst mode - Rx/Tx descriptor * Wed Nov 16 2022 chenjiji - 21.11-25 - Sync some patches for bonding PMD and testpmd. And patchs + Sync some patches for bonding PMD and testpmd. And patchs are as follows: - app/testpmd: revert MAC update in checksum forwarding - net/bonding: fix bond4 drop valid MAC packets @@ -946,7 +1031,7 @@ strip -g $RPM_BUILD_ROOT/lib/modules/%{kern_devel_ver}/extra/dpdk/igb_uio.ko - app/testpmd: add SW L4 checksum in multi-segments - app/testpmd: fix L4 checksum in multi-segments - net/bonding: fix mbuf fast free handling - + * Tue Nov 15 2022 jiangheng - 21.11-24 - proc-info: add gazelle-proc-info support in dpdk @@ -957,7 +1042,7 @@ strip -g $RPM_BUILD_ROOT/lib/modules/%{kern_devel_ver}/extra/dpdk/igb_uio.ko - set platform to generic for compatibility * Sat Oct 29 2022 chenjiji - 21.11-21 - Sync some patches for bonding PMD and testpmd. And patchs + Sync some patches for bonding PMD and testpmd. And patchs are as follows: - net/bonding: fix Tx hash for TCP - net/bonding: add link speeds configuration -- Gitee