diff --git a/0001-tests-Ensure-graceful-resource-cleaning.patch b/0001-tests-Ensure-graceful-resource-cleaning.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b42364d01014b20735e0af1b128777b963c354a --- /dev/null +++ b/0001-tests-Ensure-graceful-resource-cleaning.patch @@ -0,0 +1,89 @@ +From a9eb7409fb0b6af5dc54ae222286926b6a028ef0 Mon Sep 17 00:00:00 2001 +From: Edward Srouji +Date: Tue, 4 Mar 2025 16:02:35 +0200 +Subject: [PATCH] tests: Ensure graceful resource cleaning + +A SEGFAULT was observed recently on some environments with +python 3.12.X. +To work around the issue and ensure that python garbage collector exiting +gracefully, a new decorator was added that catches SkipTest unittest +exceptions and closes the context and its underlying resources. + +An a example of a segmentation fault occurrence that this test fixes: +$ python3 tests/run_tests.py test_mlx5_dma_memcpy +sTraceback (most recent call last): + File "cq.pyx", line 359, in pyverbs.cq.CQEX.close +pyverbs.pyverbs_error.PyverbsRDMAError: Failed to destroy CQEX. Errno: +9, Bad file descriptor +Exception ignored in: 'pyverbs.cq.CQEX.__dealloc__' +Traceback (most recent call last): + File "cq.pyx", line 359, in pyverbs.cq.CQEX.close +pyverbs.pyverbs_error.PyverbsRDMAError: Failed to destroy CQEX. Errno: +9, Bad file descriptor +Segmentation fault (core dumped) + +Signed-off-by: Edward Srouji +--- + tests/base.py | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +diff --git a/tests/base.py b/tests/base.py +index 2738714612ec..c6ffa1beca1a 100644 +--- a/tests/base.py ++++ b/tests/base.py +@@ -3,6 +3,7 @@ + + import multiprocessing as mp + import subprocess ++import functools + import unittest + import tempfile + import random +@@ -532,7 +533,35 @@ class RDMACMBaseTest(RDMATestCase): + sys.exit(2) + + +-class BaseResources(object): ++def catch_skiptest(func): ++ """ ++ Decorator to catch unittest.SkipTest in __init__ resource functions. ++ It gracefully closes the context and all of its underlying resources. ++ """ ++ @functools.wraps(func) ++ def wrapper(self, *args, **kwargs): ++ try: ++ func(self, *args, **kwargs) ++ except unittest.SkipTest as e: ++ if hasattr(self, 'ctx') and self.ctx: ++ self.ctx.close() ++ raise e ++ return wrapper ++ ++ ++class SkipTestMeta(type): ++ """ ++ Metaclass to automatically wrap __init__ in catch_skiptest. ++ It should only be used in resource classes, such as those inheriting from ++ BaseResources. ++ """ ++ def __new__(cls, name, bases, dct): ++ if "__init__" in dct: ++ dct["__init__"] = catch_skiptest(dct["__init__"]) ++ return super().__new__(cls, name, bases, dct) ++ ++ ++class BaseResources(object, metaclass=SkipTestMeta): + """ + BaseResources class is a base aggregator object which contains basic + resources like Context and PD. It opens a context over the given device +@@ -548,6 +577,7 @@ class BaseResources(object): + self.dev_name = dev_name + self.gid_index = gid_index + self.ib_port = ib_port ++ self.ctx = None + self.create_context() + self.create_pd() + +-- +2.49.0 + diff --git a/9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch b/9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ee61e6bd91230787f908449d7026de3556a6f9d --- /dev/null +++ b/9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch @@ -0,0 +1,42 @@ +From 1c63f25b55ca4f5317e1c85b548469bbc747e147 Mon Sep 17 00:00:00 2001 +From: "Goldman, Adam" +Date: Tue, 4 Feb 2020 08:55:20 -0500 +Subject: [PATCH] kernel-boot: Do not perform device rename on OPA devices + +PSM2 will not run with recent rdma-core releases. Several tools and +libraries like PSM2, require the hfi1 name to be present. + +Recent rdma-core releases added a new feature to rename kernel devices, +but the default configuration will not work with hfi1 fabrics. + +Related opa-psm2 github issue: + https://github.com/intel/opa-psm2/issues/43 + +Fixes: 5b4099d47be3 ("kernel-boot: Perform device rename to make stable names") +Reviewed-by: Mike Marciniszyn +Signed-off-by: Goldman, Adam +--- + kernel-boot/rdma-persistent-naming.rules | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel-boot/rdma-persistent-naming.rules b/kernel-boot/rdma-persistent-naming.rules +index 6f9c53a5..3ce34ea9 100644 +--- a/kernel-boot/rdma-persistent-naming.rules ++++ b/kernel-boot/rdma-persistent-naming.rules +@@ -26,10 +26,10 @@ + # Device type = RoCE + # mlx5_0 -> rocex525400c0fe123455 + # +-ACTION=="add", SUBSYSTEM=="infiniband", PROGRAM="rdma_rename %k NAME_FALLBACK" ++ACTION=="add", SUBSYSTEM=="infiniband", KERNEL!="hfi1*", PROGRAM="rdma_rename %k NAME_FALLBACK" + + # Example: + # * NAME_FIXED + # fixed name for specific board_id + # +-#ACTION=="add", ATTR{board_id}=="MSF0010110035", SUBSYSTEM=="infiniband", PROGRAM="rdma_rename %k NAME_FIXED myib" +\ No newline at end of file ++#ACTION=="add", ATTR{board_id}=="MSF0010110035", SUBSYSTEM=="infiniband", PROGRAM="rdma_rename %k NAME_FIXED myib" +-- +2.30.1 + diff --git a/rdma-core-54.0.tar.gz b/rdma-core-54.0.tar.gz deleted file mode 100644 index 11ef5085f591f37ca776ea0d881a2aadd83c6477..0000000000000000000000000000000000000000 Binary files a/rdma-core-54.0.tar.gz and /dev/null differ diff --git a/rdma-core-57.0.tar.gz b/rdma-core-57.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d9ef0187e99508a0f70e816e89aba0c31aeed9b0 Binary files /dev/null and b/rdma-core-57.0.tar.gz differ diff --git a/rdma-core.spec b/rdma-core.spec index 88ae94f04c77974c5754b89d30b35e3b4581c8dd..e2a7f80b28a80f2be983a5d40f2b3a31a577108d 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,15 +1,18 @@ -%define anolis_release 2 +%define anolis_release 1 %bcond_with pandoc %bcond_with static %bcond_without pyverbs Name: rdma-core -Version: 54.0 +Version: 57.0 Release: %{anolis_release}%{?dist} Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core Source0: %{url}/releases/download/v%{version}/%{name}-%{version}.tar.gz +Source: https://github.com/linux-rdma/rdma-core/releases/download/v57.0/rdma-core-57.0.tar.gz +Patch1: 0001-tests-Ensure-graceful-resource-cleaning.patch +Patch2: 9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch BuildRequires: binutils cmake >= 2.8.11 gcc ninja-build BuildRequires: /usr/bin/rst2man perl-generators python3-docutils @@ -70,6 +73,11 @@ Obsoletes: infiniband-diags-devel < %{EVR} Obsoletes: libibmad-devel < %{EVR} BuildRequires: pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) +BuildRequires: python +BuildRequires: python-docutils +BuildRequires: make +BuildRequires: pkgconfig +BuildRequires: valgrind-devel %description devel RDMA core development libraries and headers. @@ -627,6 +635,11 @@ fi %endif %changelog +* Thu Aug 21 2025 wenyuzifang - 57.0-1 +- Updated to version 57.0 to fix xxxxxxxxxx +- Apply patch to ensure graceful resource cleanup and prevent SEGFAULT during test skips +- Apply patch to ensure PSM2 works correctly with rdma-core by preserving hfi1 device names + * Sun Apr 27 2025 Shangtong Guo - 54.0-2 - add support for riscv64 build