diff --git a/0052-libhns-Bugfix-for-wrong-timing-of-modifying-ibv_qp-s.patch b/0052-libhns-Bugfix-for-wrong-timing-of-modifying-ibv_qp-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..b83e99fc5fe4432f95968b0a096c3458bd8222f2 --- /dev/null +++ b/0052-libhns-Bugfix-for-wrong-timing-of-modifying-ibv_qp-s.patch @@ -0,0 +1,43 @@ +From ef63fff534db1e8c7d4537c543a9dc8b9773923d Mon Sep 17 00:00:00 2001 +From: Yangyang Li +Date: Fri, 1 Dec 2023 10:43:23 +0800 +Subject: [PATCH 52/54] libhns: Bugfix for wrong timing of modifying ibv_qp + state to err + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8L4YU + +-------------------------------------------------------------------------- + +Currently the QPC state in HW is modified inside the critical section of +spinlock but the ibv_qp state is modified outside. There will be a short +period when QPC state has been modified to err with ibv_qp state still +remaining RTS. WQEs during this period will still be post-send by RTS-state +ibv_qp but then dropped by err-state HW with no flush CQEs generated. + +To fix this problem, the QPC state in HW and ibv_qp state should be both +modified to err inside the critical section of spinlock. + +Fixes: f1a80cc3dfe2 ("libhns: Bugfix for flush cqe in case multi-process") +Signed-off-by: Yangyang Li +--- + providers/hns/hns_roce_u_hw_v2.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index ee2fffe..78bb7e0 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1742,6 +1742,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + sizeof(resp_ex)); + + if (flag) { ++ if (!ret) ++ qp->state = IBV_QPS_ERR; + hns_roce_spin_unlock(&hr_qp->sq.hr_lock); + hns_roce_spin_unlock(&hr_qp->rq.hr_lock); + } +-- +2.25.1 + diff --git a/0053-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch b/0053-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch new file mode 100644 index 0000000000000000000000000000000000000000..77e18dfc1aa0e8f2f3ad671d92f52f993f9296d1 --- /dev/null +++ b/0053-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch @@ -0,0 +1,95 @@ +From 32842498c7b507a8f27ae404cf5e6dc5caf55192 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Thu, 7 Dec 2023 09:48:02 +0800 +Subject: [PATCH 53/54] libhns: Fix owner bit when SQ wraps around in new IO + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF9Q + +-------------------------------------------------------------------------- + +The owner bit has been write in init_rc_wqe() or init_ud_wqe() +with a write value. And it will be overwritten by some subsequent +operations. When the SQ wraps around, the overwritten value will be +an incorrect value. + +For example, driver will assign the owner bit in the second step, +and overwrite it in the third step. + +```c +ibv_wr_start(); +ibv_wr_rdma_write(); +if (inline) + ibv_wr_set_inline_data_list(); +else + ibv_wr_set_sge_list(); +ibv_wr_complete(); +``` + +This patch removes the redundant owner bit assignment operations +in new IO. + +Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") +Fixes: 163d62ca6196 ("libhns: Fix the owner bit error of sq in new io") +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u_hw_v2.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 78bb7e0..695d565 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2314,8 +2314,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge, + + wqe->msg_len = htole32(qp->sge_info.total_len); + hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); +- +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_send_rc(struct ibv_qp_ex *ibv_qp) +@@ -2507,7 +2505,6 @@ static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr, + + qp->sge_info.total_len = length; + set_inline_data_list_rc(qp, wqe, 1, &buff); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, +@@ -2525,7 +2522,6 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, + qp->sge_info.total_len += buf_list[i].length; + + set_inline_data_list_rc(qp, wqe, num_buf, buf_list); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static struct hns_roce_ud_sq_wqe * +@@ -2662,7 +2658,6 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, + hr_reg_write(wqe, UDWQE_SGE_NUM, cnt); + + qp->sge_info.start_idx += cnt; +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void set_inline_data_list_ud(struct hns_roce_qp *qp, +@@ -2728,7 +2723,6 @@ static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr, + + qp->sge_info.total_len = length; + set_inline_data_list_ud(qp, wqe, 1, &buff); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, +@@ -2746,7 +2740,6 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + qp->sge_info.total_len += buf_list[i].length; + + set_inline_data_list_ud(qp, wqe, num_buf, buf_list); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_start(struct ibv_qp_ex *ibv_qp) +-- +2.25.1 + diff --git a/0054-libhns-Fix-missing-DB-when-compiler-does-not-support.patch b/0054-libhns-Fix-missing-DB-when-compiler-does-not-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..7bcf80c7e494d0d1b8788c77ae20fe0db1357932 --- /dev/null +++ b/0054-libhns-Fix-missing-DB-when-compiler-does-not-support.patch @@ -0,0 +1,84 @@ +From 72c68907fbeba58b306c512f1bd8e1e52b46d0f0 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Fri, 8 Dec 2023 09:49:42 +0800 +Subject: [PATCH 54/54] libhns: Fix missing DB when compiler does not support + SVE + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I8MPTX + +---------------------------------------------------------------------- + +Currently, if compiler does not support SVE, hns_roce_sve_write512() will +be a empty function, which means that this doorbell will be missed when +HNS_ROCE_QP_CAP_SVE_DIRECT_WQE is set in qp flag. + +This patch ensures that driver will at least generate the DB regardless +of whether SVE DWQE is supported or not. + +Fixes: 7b1f5c5654c2 ("libhns: Add support for SVE Direct WQE function") +Signed-off-by: Chengchang Tang +Signed-off-by: Ran Zhou +--- + providers/hns/hns_roce_u_hw_v2.c | 33 +++++++++++++------------------- + 1 file changed, 13 insertions(+), 20 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 695d565..a76e67c 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -318,26 +318,22 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, + hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db); + } + +-static void hns_roce_write512(uint64_t *dest, uint64_t *val) ++static void hns_roce_qp_write512(struct hns_roce_qp *qp, uint64_t *val) + { +- mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe)); +-} ++ uint64_t *dest = qp->sq.db_reg; + + #if defined(HNS_SVE) +-static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val) +-{ +- asm volatile( +- "ldr z0, [%0]\n" +- "str z0, [%1]\n" +- ::"r" (val), "r"(dest):"cc", "memory" +- ); +-} +-#else +-static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val) +-{ +- return; +-} ++ if (qp->flags & HNS_ROCE_QP_CAP_SVE_DIRECT_WQE) { ++ asm volatile( ++ "ldr z0, [%0]\n" ++ "str z0, [%1]\n" ++ ::"r" (val), "r"(dest):"cc", "memory" ++ ); ++ return; ++ } + #endif ++ mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe)); ++} + + static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) + { +@@ -355,10 +351,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) + hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT); + hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head); + +- if (qp->flags & HNS_ROCE_QP_CAP_SVE_DIRECT_WQE) +- hns_roce_sve_write512(qp->sq.db_reg, wqe); +- else +- hns_roce_write512(qp->sq.db_reg, wqe); ++ hns_roce_qp_write512(qp, wqe); + } + + static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq) +-- +2.25.1 + diff --git a/rdma-core.spec b/rdma-core.spec index a516049a6fc6b4b295fb1147c5f26be0095ea1a9..ea951db632d49b6835ae20d50233ab782e23dded 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 41.0 -Release: 14 +Release: 15 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -57,6 +57,9 @@ Patch47: 0048-libhns-Fix-incorrect-post-send-with-direct-wqe-of-wr.patch Patch48: 0049-libhns-Add-a-judgment-to-the-congestion-control-algo.patch Patch49: 0050-libhns-Support-user-to-choose-using-UD-sl-or-pktype-.patch Patch50: 0051-libhns-Get-dmac-from-kernel-driver.patch +Patch51: 0052-libhns-Bugfix-for-wrong-timing-of-modifying-ibv_qp-s.patch +Patch52: 0053-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch +Patch53: 0054-libhns-Fix-missing-DB-when-compiler-does-not-support.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -304,6 +307,12 @@ fi %{_mandir}/* %changelog +* Mon Jan 15 2024 Ran Zhou - 41.0-15 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Backport bugfix from SP3 + * Thu Jan 11 2024 Ran Zhou - 41.0-14 - Type: bugfix - ID: NA