diff --git a/0099-libhns-Add-debug-log-for-lock-free-mode.patch b/0099-libhns-Add-debug-log-for-lock-free-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..f96487d4c4f3815aa5fc015005501f520d45667c --- /dev/null +++ b/0099-libhns-Add-debug-log-for-lock-free-mode.patch @@ -0,0 +1,47 @@ +From 16fe23412aa1f7042359ce0a3ffd6e15d771d316 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 24 Apr 2025 20:32:12 +0800 +Subject: [PATCH] libhns: Add debug log for lock-free mode + +Currently there is no way to observe whether the lock-free mode is +configured from the driver's perspective. Add debug log for this. + +Signed-off-by: Junxian Huang +Signed-off-by: hbmm +--- + providers/hns/hns_roce_u_verbs.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 7418d2c6..8f5ad9a0 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -232,6 +232,7 @@ err: + struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr) + { ++ struct hns_roce_pd *protection_domain; + struct hns_roce_pad *pad; + + if (ibv_check_alloc_parent_domain(attr)) +@@ -248,12 +249,16 @@ struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + return NULL; + } + ++ protection_domain = to_hr_pd(attr->pd); + if (attr->td) { + pad->td = to_hr_td(attr->td); + atomic_fetch_add(&pad->td->refcount, 1); ++ verbs_debug(verbs_get_ctx(context), ++ "set PAD(0x%x) to lock-free mode.\n", ++ protection_domain->pdn); + } + +- pad->pd.protection_domain = to_hr_pd(attr->pd); ++ pad->pd.protection_domain = protection_domain; + atomic_fetch_add(&pad->pd.protection_domain->refcount, 1); + + ibv_initialize_parent_domain(&pad->pd.ibv_pd, +-- +2.43.0 + diff --git a/0100-libhns-Fix-ret-not-assigned-in-create-srq.patch b/0100-libhns-Fix-ret-not-assigned-in-create-srq.patch new file mode 100644 index 0000000000000000000000000000000000000000..075d8e80736bb6371331ffbefcde76c6af697278 --- /dev/null +++ b/0100-libhns-Fix-ret-not-assigned-in-create-srq.patch @@ -0,0 +1,50 @@ +From c007ca259ee550e8617dd507cae21d544ec34962 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:14 +0800 +Subject: [PATCH] libhns: Fix ret not assigned in create srq() + +Fix the problem that ret may not be assigned in the error flow +of create_srq(). + +Fixes: b38bae4b5b9e ("libhns: Add support for lock-free SRQ") +Fixes: b914c76318f5 ("libhns: Refactor the process of create_srq") +Signed-off-by: Junxian Huang +Signed-off-by: hbmm +--- + providers/hns/hns_roce_u_verbs.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 8f5ad9a0..fc61b0ec 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1075,16 +1075,23 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + goto err; + } + +- if (hns_roce_srq_spinlock_init(context, srq, init_attr)) ++ if (pad) ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ ++ ret = hns_roce_srq_spinlock_init(srq, init_attr); ++ if (ret) + goto err_free_srq; + + set_srq_param(context, srq, init_attr); +- if (alloc_srq_buf(srq)) ++ ret = alloc_srq_buf(srq); ++ if (ret) + goto err_destroy_lock; + + srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB); +- if (!srq->rdb) ++ if (!srq->rdb) { ++ ret = ENOMEM; + goto err_srq_buf; ++ } + + ret = exec_srq_create_cmd(context, srq, init_attr); + if (ret) +-- +2.43.0 + diff --git a/0101-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch b/0101-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b8366638ce9d6b983f44eb87fbe75506274179a --- /dev/null +++ b/0101-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch @@ -0,0 +1,88 @@ +From 4fb4ece2cc721270cec6cd34e8ab510613e7746f Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:15 +0800 +Subject: [PATCH] libhns: Fix pad refcnt leaking in error flow of create + qp/cq/srq + +Decrease pad refcnt by 1 in error flow of create qp/cq/srq. + +Fixes: 8c865c315c34 ("libhns: Add support for lock-free CQ") +Fixes: 179f015e090d ("libhns: Add support for lock-free QP") +Fixes: b38bae4b5b9e ("libhns: Add support for lock-free SRQ") +Signed-off-by: Junxian Huang +Signed-off-by: hbmm +--- + providers/hns/hns_roce_u_verbs.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index fc61b0ec..f2b9665a 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -541,11 +541,9 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) + return -EOPNOTSUPP; + +- if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { +- if (!to_hr_pad(attr->parent_domain)) { +- verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); +- return EINVAL; +- } ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD && !pad) { ++ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); ++ return EINVAL; + } + + attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, +@@ -685,6 +683,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr, + struct hnsdv_cq_init_attr *hns_cq_attr) + { ++ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); + struct hns_roce_context *hr_ctx = to_hr_ctx(context); + struct hns_roce_cq *cq; + int ret; +@@ -699,7 +698,12 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + goto err; + } + +- ret = hns_roce_cq_spinlock_init(context, cq, attr); ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { ++ cq->parent_domain = attr->parent_domain; ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ } ++ ++ ret = hns_roce_cq_spinlock_init(cq, attr); + if (ret) + goto err_lock; + +@@ -737,6 +741,8 @@ err_db: + err_buf: + hns_roce_spinlock_destroy(&cq->hr_lock); + err_lock: ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) ++ atomic_fetch_sub(&pad->pd.refcount, 1); + free(cq); + err: + errno = abs(ret); +@@ -1122,6 +1128,8 @@ err_destroy_lock: + hns_roce_spinlock_destroy(&srq->hr_lock); + + err_free_srq: ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); + free(srq); + + err: +@@ -2078,6 +2086,8 @@ err_cmd: + err_buf: + hns_roce_qp_spinlock_destroy(qp); + err_spinlock: ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); + free(qp); + err: + if (ret < 0) +-- +2.43.0 + diff --git a/0102-libhns-Fix-freeing-pad-without-checking-refcnt.patch b/0102-libhns-Fix-freeing-pad-without-checking-refcnt.patch new file mode 100644 index 0000000000000000000000000000000000000000..02cc871b074c77e875965c533296336035333794 --- /dev/null +++ b/0102-libhns-Fix-freeing-pad-without-checking-refcnt.patch @@ -0,0 +1,56 @@ +From 8696a5644f54d31fa55d4483104ed3d6a4253652 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:16 +0800 +Subject: [PATCH] libhns: Fix freeing pad without checking refcnt + +Currently pad refcnt will be added when creating qp/cq/srq, but it is +not checked when freeing pad. Add a check to prevent freeing pad when +it is still used by any qp/cq/srq. + +Fixes: ae35032532fb ("libhns: Add support for thread domain and parent domain") +Signed-off-by: Junxian Huang +Signed-off-by: hbmm +--- + providers/hns/hns_roce_u_verbs.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index f2b9665a..28e9255b 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -267,14 +267,18 @@ struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + return &pad->pd.ibv_pd; + } + +-static void hns_roce_free_pad(struct hns_roce_pad *pad) ++static int hns_roce_free_pad(struct hns_roce_pad *pad) + { ++ if (atomic_load(&pad->pd.refcount) > 1) ++ return EBUSY; ++ + atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1); + + if (pad->td) + atomic_fetch_sub(&pad->td->refcount, 1); + + free(pad); ++ return 0; + } + + static int hns_roce_free_pd(struct hns_roce_pd *pd) +@@ -303,10 +307,8 @@ int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd) + struct hns_roce_pad *pad = to_hr_pad(ibv_pd); + struct hns_roce_pd *pd = to_hr_pd(ibv_pd); + +- if (pad) { +- hns_roce_free_pad(pad); +- return 0; +- } ++ if (pad) ++ return hns_roce_free_pad(pad); + + return hns_roce_free_pd(pd); + } +-- +2.43.0 + diff --git a/0103-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch b/0103-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..adf52fa1b5921d98540557c906a4e0da8e359991 --- /dev/null +++ b/0103-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch @@ -0,0 +1,32 @@ +From e4806c2a3adfd2b7212dccf80de45060c6dd8580 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:17 +0800 +Subject: [PATCH] verbs: Assign ibv srq->pd when creating SRQ + +Some providers need to access ibv_srq->pd during SRQ destruction, but +it may not be assigned currently when using ibv_create_srq_ex(). This +may lead to some SRQ-related resource leaks. Assign ibv_srq->pd when +creating SRQ to ensure pd can be obtained correctly. + +Fixes: 40c1365b2198 ("Add support for XRC SRQs") +Signed-off-by: Junxian Huang +Signed-off-by: hbmm +--- + libibverbs/cmd_srq.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/libibverbs/cmd_srq.c b/libibverbs/cmd_srq.c +index dfaaa6aa..259ea0d1 100644 +--- a/libibverbs/cmd_srq.c ++++ b/libibverbs/cmd_srq.c +@@ -63,6 +63,7 @@ static int ibv_icmd_create_srq(struct ibv_pd *pd, struct verbs_srq *vsrq, + struct verbs_xrcd *vxrcd = NULL; + enum ibv_srq_type srq_type; + ++ srq->pd = pd; + srq->context = pd->context; + pthread_mutex_init(&srq->mutex, NULL); + pthread_cond_init(&srq->cond, NULL); +-- +2.43.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 131cbd6a67a740033ba3e732fcd5fd0628cfcd1b..35631c44fc8596c32994b6905ae808bdfb7c7f15 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 41.0 -Release: 35 +Release: 36 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -104,6 +104,11 @@ patch95: 0095-libhns-Adapt-UD-inline-data-size-for-UCX.patch patch96: 0096-libhns-Fix-wrong-order-of-spin_unlock-in-modify_qp.patch patch97: 0097-libxscale-Match-dev-by-vid-and-did.patch patch98: 0098-libxscale-update-to-version-2412GA.patch +patch99:0099-libhns-Add-debug-log-for-lock-free-mode.patch +patch100:0100-libhns-Fix-ret-not-assigned-in-create-srq.patch +patch101:0101-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch +patch102:0102-libhns-Fix-freeing-pad-without-checking-refcnt.patch +patch103:0103-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -354,6 +359,12 @@ fi %{_mandir}/* %changelog +* Sat Jul 5 2025 hbmm - 41.0-36 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: libhns: Bugfixes and one debug improvement + * Wed May 14 2025 Xin Tian - 41.0-35 - Type: feature - ID: NA