From b6ac52e2e9d130956d8675d85dc629bd01d91fd4 Mon Sep 17 00:00:00 2001 From: Xinghai Cen Date: Tue, 7 Jan 2025 19:36:49 +0800 Subject: [PATCH] libhns: Fixed several bugs in libhns to openEuler-24.03-LTS Fixed several bugs in libhns: libhns: Add error logs to help diagnosis libhns: Fix coredump during QP destruction when send_cq == recv_cq libhns: Fix memory leakage when DCA is enabled libhns: Fix the exception branch of wr_start() is not locked libhns: Fix reference to uninitialized cq pointer libhns: Fix out-of-order issue of requester when setting FENCE --- ...f-order-issue-of-requester-when-sett.patch | 50 ++++ ...eference-to-uninitialized-cq-pointer.patch | 68 +++++ ...xception-branch-of-wr_start-is-not-l.patch | 46 ++++ ...x-memory-leakage-when-DCA-is-enabled.patch | 39 +++ ...ump-during-QP-destruction-when-send_.patch | 54 ++++ ...hns-Add-error-logs-to-help-diagnosis.patch | 243 ++++++++++++++++++ rdma-core.spec | 54 +++- 7 files changed, 545 insertions(+), 9 deletions(-) create mode 100644 0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch create mode 100644 0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch create mode 100644 0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch create mode 100644 0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch create mode 100644 0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch create mode 100644 0042-libhns-Add-error-logs-to-help-diagnosis.patch diff --git a/0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch b/0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch new file mode 100644 index 0000000..18722b8 --- /dev/null +++ b/0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch @@ -0,0 +1,50 @@ +From 1e168586452dc8ee4b41384d4ba5030408ba28f7 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Fri, 8 Nov 2024 17:04:09 +0800 +Subject: [PATCH] libhns: Fix out-of-order issue of requester when + setting FENCE + +mainline inclusion +from mainline-master +commit c4119911c212aaa552c9cb928fba0a696640c9b5 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB3ZHQ +CVE: NA +Reference: https://github.com/linux-rdma/rdma-core/pull/1513/commits/c4119911c212aaa552c9cb928fba0a696640c9b5 + +---------------------------------------------------------------------- + +The FENCE indicator in hns WQE doesn't ensure that response data from +a previous Read/Atomic operation has been written to the requester's +memory before the subsequent Send/Write operation is processed. This +may result in the subsequent Send/Write operation accessing the original +data in memory instead of the expected response data. + +Unlike FENCE, the SO (Strong Order) indicator blocks the subsequent +operation until the previous response data is written to memory and a +bresp is returned. Set the SO indicator instead of FENCE to maintain +strict order. + +Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE") +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 9371150..2debcb3 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1527,7 +1527,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + + hr_reg_write_bool(wqe, RCWQE_CQE, + !!(wr->send_flags & IBV_SEND_SIGNALED)); +- hr_reg_write_bool(wqe, RCWQE_FENCE, ++ hr_reg_write_bool(wqe, RCWQE_SO, + !!(wr->send_flags & IBV_SEND_FENCE)); + hr_reg_write_bool(wqe, RCWQE_SE, + !!(wr->send_flags & IBV_SEND_SOLICITED)); +-- +2.25.1 + diff --git a/0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch b/0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch new file mode 100644 index 0000000..21771f5 --- /dev/null +++ b/0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch @@ -0,0 +1,68 @@ +From 18ec396f9e56062a97207643a4c0c453f24e07b1 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Fri, 8 Nov 2024 17:04:08 +0800 +Subject: [PATCH] libhns: Fix reference to uninitialized cq pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: 8bit + +mainline inclusion +from mainline-master +commit 18e3117cdd161a3f40b8a917f24cfb5227a1d75a +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB3ZHQ +CVE: NA +Reference: https://github.com/linux-rdma/rdma-core/pull/1513/commits/18e3117cdd161a3f40b8a917f24cfb5227a1d75a + +---------------------------------------------------------------------- + +For QPs which do not have an SQ, such as XRC TGT,the send_cq +pointer will not be initailized. Since the supported max_gs +will be 0 in this case, check it and return before referencing +the send_cq pointer. + +Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE") +Signed-off-by: Chengchang Tang +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 2debcb3..465ef1e 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1579,7 +1579,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); + struct hns_roce_qp *qp = to_hr_qp(ibvqp); + struct hns_roce_sge_info sge_info = {}; +- struct hns_roce_rc_sq_wqe *wqe; ++ struct hns_roce_rc_sq_wqe *wqe = NULL; + struct ibv_qp_attr attr = {}; + unsigned int wqe_idx, nreq; + int ret; +@@ -1595,15 +1595,15 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + sge_info.start_idx = qp->next_sge; /* start index of extend sge */ + + for (nreq = 0; wr; ++nreq, wr = wr->next) { +- if (hns_roce_v2_wq_overflow(&qp->sq, nreq, +- to_hr_cq(qp->verbs_qp.qp.send_cq))) { +- ret = ENOMEM; ++ if (wr->num_sge > (int)qp->sq.max_gs) { ++ ret = qp->sq.max_gs > 0 ? EINVAL : EOPNOTSUPP; + *bad_wr = wr; + goto out; + } + +- if (wr->num_sge > qp->sq.max_gs) { +- ret = EINVAL; ++ if (hns_roce_v2_wq_overflow(&qp->sq, nreq, ++ to_hr_cq(qp->verbs_qp.qp.send_cq))) { ++ ret = ENOMEM; + *bad_wr = wr; + goto out; + } +-- +2.25.1 + diff --git a/0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch b/0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch new file mode 100644 index 0000000..1b915fc --- /dev/null +++ b/0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch @@ -0,0 +1,46 @@ +From 816341b19c5e7138d44f3366afcc92d6edf8f434 Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Wed, 12 Jun 2024 17:11:13 +0800 +Subject: [PATCH] libhns: Fix the exception branch of wr_start() is not locked + +mainline inclusion +from mainline-master +commit d1d0ac518e20dc1eca4fcc62d091c69e20b114da +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB66RT +CVE: NA +Reference: +https://github.com/linux-rdma/rdma-core/pull/commits/d1d0ac518e20dc1eca4fcc62d091c69e20b114da + +------------------------------------------------------------------ + +The provider should provide locking to ensure that ibv_wr_start() +and ibv_wr_complete()/abort() form a per-QP critical section +where no other threads can enter. + +The exception branch of wr_start() is not locked, fix it here. +Because check_qp_send () does not require lock protection, +hns_roce_spin_lock () is placed after check_qp_send (). + +Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") +Signed-off-by: wenglianfa +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 465ef1e..e4232ea 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2930,6 +2930,7 @@ static void wr_start(struct ibv_qp_ex *ibv_qp) + + ret = check_qp_send(qp, ctx); + if (ret) { ++ hns_roce_spin_lock(&qp->sq.hr_lock); + qp->err = ret; + return; + } +-- +2.25.1 + diff --git a/0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch b/0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch new file mode 100644 index 0000000..80a9c3b --- /dev/null +++ b/0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch @@ -0,0 +1,39 @@ +From a73b31041073a45b04535c4183b39195bbebe514 Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Thu, 25 Jul 2024 11:06:01 +0800 +Subject: [PATCH] libhns: Fix memory leakage when DCA is enabled + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB66RT + +------------------------------------------------------------------ + +After DCA is enabled and a QP is created, the memory block +applied for DCA is not free when the QP is destroyed. Here +fix it. + +Fixes:26e79aa7eb8d ("libhns: Add support for attaching QP's WQE buffer") +Signed-off-by: wenglianfa +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index e30880c..c733b21 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1357,7 +1357,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + + if (qp->rq.wqe_cnt) + free(qp->rq.wrid); +- hns_roce_free_buf(&qp->buf); ++ else ++ hns_roce_free_buf(&qp->buf); + } + + static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, +-- +2.25.1 + diff --git a/0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch b/0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch new file mode 100644 index 0000000..68e5aaf --- /dev/null +++ b/0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch @@ -0,0 +1,54 @@ +From acd954b18ca6075ae13f7a10f65bcf7eb9ac107e Mon Sep 17 00:00:00 2001 +From: Yuyu Li +Date: Mon, 25 Nov 2024 16:13:48 +0800 +Subject: [PATCH] libhns: Fix coredump during QP destruction when send_cq + == recv_cq + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB7JZL + +------------------------------------------------------------------ + +If the specified send CQ and recv CQ are both +the same CQ, the QP node in SCQ is not deleted. +which causes a segfault to occur when recreating +the QP. Here fix it. + +coredump info: +0x0000ffff8fbc37d4 in list_add_before_ +0x0000ffff8fbc381c in list_add_tail_ +0x0000ffff8fbc9d9c in add_qp_to_cq_list +0x0000ffff8fbca008 in create_qp +0x0000ffff8fbca110 in hns_roce_u_create_qp +0x0000ffff8feae39c in __ibv_create_qp_1_1 +0x0000000000401420 in test_ctrl_path + +Fixes:e95b5da96721 ("libhns: Support reporting wc as software mode") +Signed-off-by: Yuyu Li +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index e4232ea..c746e03 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2006,9 +2006,10 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + list_del(&qp->rcq_node); + } + +- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) { +- __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, +- NULL); ++ if (ibqp->send_cq) { ++ if (ibqp->send_cq != ibqp->recv_cq) ++ __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, ++ NULL); + list_del(&qp->scq_node); + } + +-- +2.25.1 + diff --git a/0042-libhns-Add-error-logs-to-help-diagnosis.patch b/0042-libhns-Add-error-logs-to-help-diagnosis.patch new file mode 100644 index 0000000..fe0f42b --- /dev/null +++ b/0042-libhns-Add-error-logs-to-help-diagnosis.patch @@ -0,0 +1,243 @@ +From 71c49ce25111c8e853ad509430f14d22dbe94303 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Fri, 27 Dec 2024 14:02:29 +0800 +Subject: [PATCH] libhns: Add error logs to help diagnosis + +mainline inclusion +from mainline-master +commit 7849f1b17f89b8baa0065adaf9cd04204698ea82 +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IBFGPH +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1533/commits/7849f1b17f89b8baa0065adaf9cd04204698ea82 + +---------------------------------------------------------------------- + +Add error logs to help diagnosis. + +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u.c | 4 +- + providers/hns/hns_roce_u_hw_v2.c | 3 ++ + providers/hns/hns_roce_u_verbs.c | 79 ++++++++++++++++++++++++++------ + 3 files changed, 70 insertions(+), 16 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index e219b9e..ec995e7 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -424,8 +424,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); +- if (context->uar == MAP_FAILED) ++ if (context->uar == MAP_FAILED) { ++ verbs_err(&context->ibv_ctx, "failed to mmap uar page.\n"); + goto err_set_attr; ++ } + + if (init_dca_context(context, cmd_fd, + &resp, ctx_attr, hr_dev->page_size)) +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index c746e03..0628646 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -3057,6 +3057,9 @@ static int fill_send_wr_ops(const struct ibv_qp_init_attr_ex *attr, + fill_send_wr_ops_ud(qp_ex); + break; + default: ++ verbs_err(verbs_get_ctx(qp_ex->qp_base.context), ++ "QP type %d not supported for qp_ex send ops.\n", ++ attr->qp_type); + return -EOPNOTSUPP; + } + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index c733b21..e9acfab 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -422,8 +422,11 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + { + struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); + +- if (!attr->cqe || attr->cqe > context->max_cqe) ++ if (!attr->cqe || attr->cqe > context->max_cqe) { ++ verbs_err(&context->ibv_ctx, "unsupported cq depth %u.\n", ++ attr->cqe); + return EINVAL; ++ } + + if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) { + verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n", +@@ -431,8 +434,11 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + return EOPNOTSUPP; + } + +- if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) ++ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) { ++ verbs_err(&context->ibv_ctx, "unsupported wc flags 0x%llx.\n", ++ attr->wc_flags); + return EOPNOTSUPP; ++ } + + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { + if (!pad) { +@@ -504,8 +510,11 @@ static int exec_cq_create_cmd(struct ibv_context *context, + ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex), 0); +- if (ret) ++ if (ret) { ++ verbs_err(verbs_get_ctx(context), ++ "failed to exec create cq cmd, ret = %d.\n", ret); + return ret; ++ } + + cq->cqn = resp_drv->cqn; + cq->flags = resp_drv->cap_flags; +@@ -724,13 +733,20 @@ static int verify_srq_create_attr(struct hns_roce_context *context, + struct ibv_srq_init_attr_ex *attr) + { + if (attr->srq_type != IBV_SRQT_BASIC && +- attr->srq_type != IBV_SRQT_XRC) ++ attr->srq_type != IBV_SRQT_XRC) { ++ verbs_err(&context->ibv_ctx, ++ "unsupported srq type, type = %d.\n", attr->srq_type); + return -EINVAL; ++ } + + if (!attr->attr.max_sge || + attr->attr.max_wr > context->max_srq_wr || +- attr->attr.max_sge > context->max_srq_sge) ++ attr->attr.max_sge > context->max_srq_sge) { ++ verbs_err(&context->ibv_ctx, ++ "invalid srq attr size, max_wr = %u, max_sge = %u.\n", ++ attr->attr.max_wr, attr->attr.max_sge); + return -EINVAL; ++ } + + attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr, + HNS_ROCE_MIN_SRQ_WQE_NUM); +@@ -862,8 +878,12 @@ static int exec_srq_create_cmd(struct ibv_context *context, + ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, init_attr, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex)); +- if (ret) ++ if (ret) { ++ verbs_err(verbs_get_ctx(context), ++ "failed to exec create srq cmd, ret = %d.\n", ++ ret); + return ret; ++ } + + srq->srqn = resp_ex.srqn; + srq->cap_flags = resp_ex.cap_flags; +@@ -1086,9 +1106,12 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); ++ int ret = 0; + +- if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) +- return EOPNOTSUPP; ++ if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) { ++ ret = EOPNOTSUPP; ++ goto out; ++ } + + if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS && + !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK)) +@@ -1102,17 +1125,21 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + case IBV_QPT_RC: + case IBV_QPT_XRC_SEND: + if (!(attr->comp_mask & IBV_QP_INIT_ATTR_PD)) +- return EINVAL; ++ ret = EINVAL; + break; + case IBV_QPT_XRC_RECV: + if (!(attr->comp_mask & IBV_QP_INIT_ATTR_XRCD)) +- return EINVAL; ++ ret = EINVAL; + break; + default: + return EOPNOTSUPP; + } + +- return 0; ++out: ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, "invalid comp_mask 0x%x.\n", ++ attr->comp_mask); ++ return ret; + } + + static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr) +@@ -1137,8 +1164,13 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + if (cap->max_send_wr > ctx->max_qp_wr || + cap->max_recv_wr > ctx->max_qp_wr || + cap->max_send_sge > ctx->max_sge || +- cap->max_recv_sge > ctx->max_sge) ++ cap->max_recv_sge > ctx->max_sge) { ++ verbs_err(&ctx->ibv_ctx, ++ "invalid qp cap size, max_send/recv_wr = {%u, %u}, max_send/recv_sge = {%u, %u}.\n", ++ cap->max_send_wr, cap->max_recv_wr, ++ cap->max_send_sge, cap->max_recv_sge); + return -EINVAL; ++ } + + has_rq = hns_roce_qp_has_rq(attr); + if (!has_rq) { +@@ -1147,12 +1179,20 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + } + + min_wqe_num = HNS_ROCE_V2_MIN_WQE_NUM; +- if (cap->max_send_wr < min_wqe_num) ++ if (cap->max_send_wr < min_wqe_num) { ++ verbs_debug(&ctx->ibv_ctx, ++ "change sq depth from %u to minimum %u.\n", ++ cap->max_send_wr, min_wqe_num); + cap->max_send_wr = min_wqe_num; ++ } + + if (cap->max_recv_wr) { +- if (cap->max_recv_wr < min_wqe_num) ++ if (cap->max_recv_wr < min_wqe_num) { ++ verbs_debug(&ctx->ibv_ctx, ++ "change rq depth from %u to minimum %u.\n", ++ cap->max_recv_wr, min_wqe_num); + cap->max_recv_wr = min_wqe_num; ++ } + + if (!cap->max_recv_sge) + return -EINVAL; +@@ -1646,6 +1686,11 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, + ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex)); ++ if (ret) { ++ verbs_err(&ctx->ibv_ctx, ++ "failed to exec create qp cmd, ret = %d.\n", ret); ++ return ret; ++ } + + qp->flags = resp_ex.drv_payload.cap_flags; + *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key; +@@ -1707,8 +1752,12 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, + { + qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE, + MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key); +- if (qp->dwqe_page == MAP_FAILED) ++ if (qp->dwqe_page == MAP_FAILED) { ++ verbs_err(verbs_get_ctx(ibv_ctx), ++ "failed to mmap direct wqe page, QPN = %u.\n", ++ qp->verbs_qp.qp.qp_num); + return -EINVAL; ++ } + + return 0; + } +-- +2.25.1 + diff --git a/rdma-core.spec b/rdma-core.spec index 8ee3dba..c5fdf9d 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,8 +1,8 @@ Name: rdma-core Version: 50.0 -Release: 12 +Release: 18 Summary: RDMA core userspace libraries and daemons -License: GPLv2 or BSD +License: GPL-2.0-only OR BSD-2-Clause AND BSD-3-Clause Url: https://github.com/linux-rdma/rdma-core Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -41,6 +41,12 @@ patch32: 0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch patch33: 0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch patch34: 0034-librdmacm-Fix-an-overflow-bug-in-qsort-comparison-function.patch patch35: 0035-Fix-the-stride-calculation-for-MSN-PSN-area.patch +patch37: 0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch +patch38: 0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch +patch39: 0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch +patch40: 0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch +patch41: 0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch +patch42: 0042-libhns-Add-error-logs-to-help-diagnosis.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) systemd systemd-devel @@ -251,7 +257,6 @@ Obsoletes: infiniband-diags-help < %{version}-%{release} Man pages and other related documents for %{name}. %prep -%setup %autosetup -v -p1 %build @@ -313,12 +318,6 @@ install -D -m 0644 ibacm_opts.cfg %{buildroot}%{_sysconfdir}/rdma/ rm -rf %{buildroot}/%{_initrddir}/ rm -f %{buildroot}/%{_sbindir}/srp_daemon.sh -%ldconfig_scriptlets -n libibverbs - -%ldconfig_scriptlets -n libibumad - -%ldconfig_scriptlets -n librdmacm - %post if [ -x /sbin/udevadm ];then /sbin/udevadm trigger --subsystem-match=infiniband --action=change || true @@ -623,6 +622,43 @@ fi %doc %{_docdir}/%{name}-%{version}/70-persistent-ipoib.rules %changelog +* Fri Jan 3 2025 Xinghai Cen - 50.0-18 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add error logs to help diagnosis + +* Thu Nov 28 2024 Xinghai Cen - 50.0-17 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix coredump during QP destruction when send_cq == recv_cq + +* Mon Nov 25 2024 Xinghai Cen - 50.0-16 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fixes several bugs for libhns + +* Fri Nov 15 2024 Xinghai Cen - 50.0-15 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Two bugfixes in post_send flow + +* Fri Nov 08 2024 Funda Wang - 50.0-14 +- adopt to new cmake macro +- migrated to SPDX license +- drop useless setup macro, cause it duplicates with autosetup +- drop useless ldconfig_scriptlets, it has been done through glibc's + filetriggers since openeuler 1.0 + +* Tue Jan 7 2025 Xinghai Cen - 50.0-13 +- Type: requirment +- ID: NA +- SUG: NA +- DESC: No code modification, just a pile + * Wed Jul 17 2024 dfh - 50.0-12 - Type: bugfix - ID: NA -- Gitee