diff --git a/0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch b/0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch index e53cc220522ddbe0c12cb46d6316090dc880d307..a111ed5fbe1498867397d92951d95e3c22625199 100644 --- a/0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch +++ b/0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch @@ -1,8 +1,8 @@ -From da7f5d66f410f226f1cc0437bb4fc3124fcbb3f3 Mon Sep 17 00:00:00 2001 +From 4deb1a1a9b181d481f51a989b5c173857da87c44 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 5 Mar 2024 13:57:23 +0800 -Subject: [PATCH 6/7] libhns: Encapsulate context attribute setting into a - single function +Subject: [PATCH] libhns: Encapsulate context attribute setting into a single + function driver inclusion category: feature @@ -14,7 +14,7 @@ attribute setting into a single function set_context_attr() to make hns_roce_alloc_context() more readable. Signed-off-by: Junxian Huang -Signed-off-by: Ran Zhou dd +Signed-off-by: Ran Zhou --- providers/hns/hns_roce_u.c | 69 ++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch b/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch index 876602fb193eb8556b9278d398de5296fae10e56..137e4a3e14bcb57608227e1009303fb9f38a7884 100644 --- a/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch +++ b/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch @@ -1,13 +1,22 @@ -From bb6a6264246a2a51680a2d4b104a296a9cdf4fab Mon Sep 17 00:00:00 2001 +From 510ebb10167a964ddb02bc1a6df90ea767d611e9 Mon Sep 17 00:00:00 2001 From: zzry <1245464216@qq.com> Date: Fri, 8 Mar 2024 15:05:55 +0800 Subject: [PATCH 07/10] libhns: Add support for thread domain and parent domain +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + Add support for thread domain (TD) and parent domain (PAD). Extend the orginal hns_roce_pd struct to hns_roce_pad by adding the new hns_roce_td struct. When a parent domain holds a thread domain, the associated data path will be set to lock-free mode to improve performance. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang --- providers/hns/hns_roce_u.c | 5 +- providers/hns/hns_roce_u.h | 69 +++++++++++++- @@ -359,5 +368,5 @@ index dcdc722..ecf8666 100644 struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context, -- -2.30.0 +2.33.0 diff --git a/0008-libhns-Add-support-for-lock-free-QP.patch b/0008-libhns-Add-support-for-lock-free-QP.patch index b66bdfacaafdd81f4b7a6adeddf713a9ac769179..b2d6afe5630d15d2fb0f3c4e079dd7b4d3771226 100644 --- a/0008-libhns-Add-support-for-lock-free-QP.patch +++ b/0008-libhns-Add-support-for-lock-free-QP.patch @@ -1,9 +1,18 @@ -From fc7cb76b5b56d67182e6fa1cb7a3c19aa09ef90a Mon Sep 17 00:00:00 2001 +From f5f54bf889825da254b2a5df859da1c471a40314 Mon Sep 17 00:00:00 2001 From: zzry <1245464216@qq.com> Date: Fri, 8 Mar 2024 15:56:09 +0800 Subject: [PATCH 08/10] libhns: Add support for lock-free QP +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + Drop QP locks when associated to a PAD holding a TD. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang --- providers/hns/hns_roce_u.h | 2 +- providers/hns/hns_roce_u_hw_v2.c | 26 ++++++++++------- @@ -219,5 +228,5 @@ index ecf8666..d503031 100644 err: if (ret < 0) -- -2.30.0 +2.33.0 diff --git a/0009-libhns-Add-support-for-lock-free-CQ.patch b/0009-libhns-Add-support-for-lock-free-CQ.patch index 97873e02cf10068850eaeb9cb7f8f6fa2183135b..12ddd86d4dbfb126b9d8d8d65d6b22289be9db83 100644 --- a/0009-libhns-Add-support-for-lock-free-CQ.patch +++ b/0009-libhns-Add-support-for-lock-free-CQ.patch @@ -1,9 +1,18 @@ -From 41d0630d763bd39631331c76a9ecdbb245ce9502 Mon Sep 17 00:00:00 2001 +From cac8fdd87cd6e222ab5184f3d91dfc99bb922627 Mon Sep 17 00:00:00 2001 From: zzry <1245464216@qq.com> Date: Fri, 8 Mar 2024 16:29:34 +0800 Subject: [PATCH 09/10] libhns: Add support for lock-free CQ +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + Drop CQ locks when associated to a PAD holding a TD. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang --- providers/hns/hns_roce_u.h | 3 +- providers/hns/hns_roce_u_hw_v2.c | 46 +++++++++++++------------- @@ -269,5 +278,5 @@ index d503031..afde313 100644 return ret; } -- -2.30.0 +2.33.0 diff --git a/0010-libhns-Add-support-for-lock-free-SRQ.patch b/0010-libhns-Add-support-for-lock-free-SRQ.patch index 64e66973591dd4cfe2f5f5861632d2eaccbffb6e..bf7a392a74b95aed1b1f64660f6b327800cf4153 100644 --- a/0010-libhns-Add-support-for-lock-free-SRQ.patch +++ b/0010-libhns-Add-support-for-lock-free-SRQ.patch @@ -1,9 +1,18 @@ -From c252a18578d12fd27b726b7b376fbebc3f2c98c3 Mon Sep 17 00:00:00 2001 +From 19f2857b3bb6b5b6992ae7314b52c7b84e08780d Mon Sep 17 00:00:00 2001 From: zzry <1245464216@qq.com> Date: Fri, 8 Mar 2024 16:33:48 +0800 Subject: [PATCH 10/10] libhns: Add support for lock-free SRQ +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + Drop SRQ locks when associated to a PAD holding a TD. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang --- providers/hns/hns_roce_u.h | 2 +- providers/hns/hns_roce_u_hw_v2.c | 8 ++++---- @@ -144,5 +153,5 @@ index afde313..00e59dc 100644 return 0; -- -2.30.0 +2.33.0 diff --git a/0011-libhns-Support-flexible-WQE-buffer-page-size.patch b/0011-libhns-Support-flexible-WQE-buffer-page-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..f16e317cb4f055506f19846ff21d6411fc433d48 --- /dev/null +++ b/0011-libhns-Support-flexible-WQE-buffer-page-size.patch @@ -0,0 +1,186 @@ +From b05879f0287aa5b4bd315fea3ef0e0b82238e935 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 23 Oct 2023 21:13:03 +0800 +Subject: [PATCH 11/18] libhns: Support flexible WQE buffer page size + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HIN + +-------------------------------------------------------------------------- + +Currently, driver fixedly allocates 4K pages for user space WQE buffer +even in a 64K system. This results in HW reading WQE with a granularity +of 4K even in a 64K system. Considering that we support 1024-byte inline, +in the scenario of using SQ inline, HW will switch pages every 4 WQEs. +This will introduce a delay of about 400ns, which is an average delay of +100ns per packet. + +In order to improve performance, we allow user-mode drivers to use a +larger page size to allocate WQE buffers, thereby reducing the latency +introduced by HW page switching. User-mode drivers will be allowed to +allocate WQE buffers between 4K to system page size. During +ibv_create_qp(), the driver will dynamically select the appropriate page +size based on ibv_qp_cap, thus reducing memory consumption while improving +performance. + +This feature needs to be used in conjunction with the kernel-mode driver. +In order to ensure forward compatibility, if the kernel-mode driver does +not support this feature, the user-mode driver will continue to use a +fixed 4K pagesize to allocate WQE buffer. + +Signed-off-by: Chengchang Tang +--- + kernel-headers/rdma/hns-abi.h | 5 +++- + providers/hns/hns_roce_u.h | 1 + + providers/hns/hns_roce_u_verbs.c | 51 ++++++++++++++++++++++++++++---- + 3 files changed, 50 insertions(+), 7 deletions(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index 39ed8a4..f33d876 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -90,7 +90,8 @@ struct hns_roce_ib_create_qp { + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 sq_no_prefetch; +- __u8 reserved[5]; ++ __u8 pageshift; ++ __u8 reserved[4]; + __aligned_u64 sdb_addr; + __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */ + __aligned_u64 create_flags; +@@ -119,12 +120,14 @@ enum { + HNS_ROCE_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4, + }; + + enum { + HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ, + }; + + struct hns_roce_ib_alloc_ucontext_resp { +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 21a6e28..56851b0 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -349,6 +349,7 @@ struct hns_roce_qp { + uint8_t sl; + uint8_t tc_mode; + uint8_t priority; ++ uint8_t pageshift; + unsigned int qkey; + enum ibv_mtu path_mtu; + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 00e59dc..fc255ed 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1178,31 +1178,69 @@ static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf) + } + } + ++static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ uint32_t ext_sge_size; ++ uint32_t sq_size; ++ uint32_t rq_size; ++ uint8_t pg_shift; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ)) { ++ qp->pageshift = HNS_HW_PAGE_SHIFT; ++ return; ++ } ++ ++ /* ++ * The larger the pagesize used, the better the performance, but it ++ * may waste more memory. Therefore, we use the least common multiple ++ * (aligned to power of 2) of sq wqe buffer size, rq wqe buffer size, ++ * and ext_sge buffer size as the pagesize. Additionally, since the ++ * kernel cannot guarantee the allocation of contiguous memory larger ++ * than the system page, the pagesize must be smaller than the system ++ * page. ++ */ ++ sq_size = qp->sq.wqe_cnt << qp->sq.wqe_shift; ++ ext_sge_size = qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift; ++ rq_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; ++ ++ pg_shift = max_t(uint8_t, sq_size ? hr_ilog32(sq_size) : 0, ++ ext_sge_size ? hr_ilog32(ext_sge_size) : 0); ++ pg_shift = max_t(uint8_t, pg_shift, rq_size ? hr_ilog32(rq_size) : 0); ++ pg_shift = max_t(uint8_t, pg_shift, HNS_HW_PAGE_SHIFT); ++ qp->pageshift = min_t(uint8_t, pg_shift, hr_ilog32(hr_dev->page_size)); ++} ++ + static int calc_qp_buff_size(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *ctx, + struct hns_roce_qp *qp) + { + struct hns_roce_wq *sq = &qp->sq; + struct hns_roce_wq *rq = &qp->rq; ++ unsigned int page_size; + unsigned int size; + + qp->buf_size = 0; ++ get_best_multi_region_pg_shift(hr_dev, ctx, qp); ++ page_size = 1 << qp->pageshift; + + /* SQ WQE */ + sq->offset = 0; +- size = to_hr_hem_entries_size(sq->wqe_cnt, sq->wqe_shift); ++ size = align(sq->wqe_cnt << sq->wqe_shift, page_size); + qp->buf_size += size; + + /* extend SGE WQE in SQ */ + qp->ex_sge.offset = qp->buf_size; + if (qp->ex_sge.sge_cnt > 0) { +- size = to_hr_hem_entries_size(qp->ex_sge.sge_cnt, +- qp->ex_sge.sge_shift); ++ size = align(qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift, ++ page_size); + qp->buf_size += size; + } + + /* RQ WQE */ + rq->offset = qp->buf_size; +- size = to_hr_hem_entries_size(rq->wqe_cnt, rq->wqe_shift); ++ size = align(rq->wqe_cnt << rq->wqe_shift, page_size); + qp->buf_size += size; + + if (qp->buf_size < 1) +@@ -1227,7 +1265,7 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); + +- if (calc_qp_buff_size(hr_dev, qp)) ++ if (calc_qp_buff_size(hr_dev, ctx, qp)) + return -EINVAL; + + qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(uint64_t)); +@@ -1245,7 +1283,7 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, + goto err_alloc; + } + +- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE)) ++ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, 1 << qp->pageshift)) + goto err_alloc; + + return 0; +@@ -1482,6 +1520,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, + cmd_ex.buf_addr = (uintptr_t)qp->buf.buf; + cmd_ex.log_sq_stride = qp->sq.wqe_shift; + cmd_ex.log_sq_bb_count = hr_ilog32(qp->sq.wqe_cnt); ++ cmd_ex.pageshift = qp->pageshift; + + if (hns_attr && + hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE) { +-- +2.33.0 + diff --git a/0012-Update-kernel-headers.patch b/0012-Update-kernel-headers.patch new file mode 100644 index 0000000000000000000000000000000000000000..43c7af3352239ce88d293d32437d5fbe2c504831 --- /dev/null +++ b/0012-Update-kernel-headers.patch @@ -0,0 +1,41 @@ +From 1c2b95fe3fc64075178935bb3e1bf2086694fba3 Mon Sep 17 00:00:00 2001 +From: Yixing Liu +Date: Wed, 14 Dec 2022 16:37:26 +0800 +Subject: [PATCH 12/18] Update kernel headers + +To commit ?? ("RDMA/hns: Kernel notify usr space to stop ring db"). + +Signed-off-by: Yixing Liu +--- + kernel-headers/rdma/hns-abi.h | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index f33d876..1d51612 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -111,9 +111,9 @@ struct hns_roce_ib_create_qp_resp { + }; + + struct hns_roce_ib_modify_qp_resp { +- __u8 tc_mode; +- __u8 priority; +- __u8 reserved[6]; ++ __u8 tc_mode; ++ __u8 priority; ++ __u8 reserved[6]; + }; + + enum { +@@ -139,6 +139,8 @@ struct hns_roce_ib_alloc_ucontext_resp { + __u32 max_inline_data; + __u8 congest_type; + __u8 reserved0[7]; ++ __aligned_u64 rsv_for_dca[2]; ++ __aligned_u64 reset_mmap_key; + }; + + struct hns_roce_ib_alloc_ucontext { +-- +2.33.0 + diff --git a/0013-libhns-Add-reset-stop-flow-mechanism.patch b/0013-libhns-Add-reset-stop-flow-mechanism.patch new file mode 100644 index 0000000000000000000000000000000000000000..518957500873f82910c40971c81f673940f0c784 --- /dev/null +++ b/0013-libhns-Add-reset-stop-flow-mechanism.patch @@ -0,0 +1,189 @@ +From 13d5c1bd7192d75f27aba97e556fb83bd182c561 Mon Sep 17 00:00:00 2001 +From: Guofeng Yue +Date: Mon, 9 May 2022 16:03:38 +0800 +Subject: [PATCH 13/18] libhns: Add reset stop flow mechanism + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I65WI7 + +------------------------------------------------------------------ + +Add an interface to the user space, which is used to receive +the kernel reset state. After receiving the reset flag, the +user space stops sending db. + +Signed-off-by: Yixing Liu +Signed-off-by: Guofeng Yue +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 25 +++++++++++++++++++++++++ + providers/hns/hns_roce_u.h | 5 +++++ + providers/hns/hns_roce_u_db.h | 8 +++++++- + providers/hns/hns_roce_u_hw_v2.c | 19 ++++++++++++++----- + 4 files changed, 51 insertions(+), 6 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index e1c2659..0e4f4c1 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -100,6 +100,24 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + return count_shift > size_shift ? count_shift - size_shift : 0; + } + ++static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, ++ struct hns_roce_alloc_ucontext_resp *resp, ++ int page_size) ++{ ++ uint64_t reset_mmap_key = resp->reset_mmap_key; ++ ++ /* The reset mmap key is 0, which means it is not supported. */ ++ if (reset_mmap_key == 0) ++ return 0; ++ ++ ctx->reset_state = mmap(NULL, page_size, PROT_READ, MAP_SHARED, ++ cmd_fd, reset_mmap_key); ++ if (ctx->reset_state == MAP_FAILED) ++ return -ENOMEM; ++ ++ return 0; ++} ++ + static int set_context_attr(struct hns_roce_device *hr_dev, + struct hns_roce_context *context, + struct hns_roce_alloc_ucontext_resp *resp) +@@ -176,6 +194,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + ++ if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) ++ goto reset_free; ++ + pthread_mutex_init(&context->qp_table_mutex, NULL); + pthread_mutex_init(&context->srq_table_mutex, NULL); + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); +@@ -185,6 +206,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + return &context->ibv_ctx; + ++reset_free: ++ munmap(context->uar, hr_dev->page_size); + err_free: + verbs_uninit_context(&context->ibv_ctx); + free(context); +@@ -197,6 +220,8 @@ static void hns_roce_free_context(struct ibv_context *ibctx) + struct hns_roce_context *context = to_hr_ctx(ibctx); + + munmap(context->uar, hr_dev->page_size); ++ if (context->reset_state) ++ munmap(context->reset_state, hr_dev->page_size); + verbs_uninit_context(&context->ibv_ctx); + free(context); + } +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 56851b0..49de0f9 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -201,9 +201,14 @@ struct hns_roce_spinlock { + int need_lock; + }; + ++struct hns_roce_v2_reset_state { ++ uint32_t is_reset; ++}; ++ + struct hns_roce_context { + struct verbs_context ibv_ctx; + void *uar; ++ void *reset_state; + pthread_spinlock_t uar_lock; + + struct { +diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h +index 8c47a53..de288de 100644 +--- a/providers/hns/hns_roce_u_db.h ++++ b/providers/hns/hns_roce_u_db.h +@@ -40,8 +40,14 @@ + + #define HNS_ROCE_WORD_NUM 2 + +-static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM]) ++static inline void hns_roce_write64(struct hns_roce_context *ctx, void *dest, ++ __le32 val[HNS_ROCE_WORD_NUM]) + { ++ struct hns_roce_v2_reset_state *state = ctx->reset_state; ++ ++ if (state && state->is_reset) ++ return; ++ + mmio_write64_le(dest, *(__le64 *)val); + } + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 1d7a304..1855d83 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -284,7 +284,8 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, + hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB); + hr_reg_write(&rq_db, DB_PI, rq_head); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db); ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ (__le32 *)&rq_db); + } + + static void hns_roce_update_sq_db(struct hns_roce_context *ctx, +@@ -298,7 +299,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, + hr_reg_write(&sq_db, DB_PI, qp->sq.head); + hr_reg_write(&sq_db, DB_SL, qp->sl); + +- hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db); ++ hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db); + } + + static void hns_roce_write512(uint64_t *dest, uint64_t *val) +@@ -309,6 +310,12 @@ static void hns_roce_write512(uint64_t *dest, uint64_t *val) + static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) + { + struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); ++ struct hns_roce_v2_reset_state *state = ctx->reset_state; ++ ++ if (state && state->is_reset) ++ return; + + /* All kinds of DirectWQE have the same header field layout */ + hr_reg_enable(rc_sq_wqe, RCWQE_FLAG); +@@ -328,7 +335,8 @@ static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq) + hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index); + hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ (__le32 *)&cq_db); + } + + static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, +@@ -762,7 +770,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) + hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn); + hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ (__le32 *)&cq_db); + + return 0; + } +@@ -1741,7 +1750,7 @@ static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db, + hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB); + hr_reg_write(db, DB_PI, srq->idx_que.head); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, + (__le32 *)db); + } + +-- +2.33.0 + diff --git a/0014-libhns-Support-reporting-wc-as-software-mode.patch b/0014-libhns-Support-reporting-wc-as-software-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..5734965a4ea8922c1f48df23f26fca0d4b995ba3 --- /dev/null +++ b/0014-libhns-Support-reporting-wc-as-software-mode.patch @@ -0,0 +1,542 @@ +From 3344ba5dc2240ae4ce43b6df2cbef78539c38e0c Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 26 Sep 2023 19:19:06 +0800 +Subject: [PATCH 14/18] libhns: Support reporting wc as software mode + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0 + +---------------------------------------------------------- + +When HW is in resetting stage, we could not poll back all the expected +work completions as the HW won't generate cqe anymore. + +This patch allows driver to compose the expected wc instead of the HW +during resetting stage. Once the hardware finished resetting, we can +poll cq from hardware again. + +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u.h | 12 ++ + providers/hns/hns_roce_u_hw_v2.c | 217 +++++++++++++++++++++++++++++-- + providers/hns/hns_roce_u_hw_v2.h | 2 + + providers/hns/hns_roce_u_verbs.c | 91 +++++++++++++ + 4 files changed, 310 insertions(+), 12 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 49de0f9..5adf6bd 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -238,6 +238,8 @@ struct hns_roce_context { + unsigned int cqe_size; + uint32_t config; + unsigned int max_inline_data; ++ ++ bool reseted; + }; + + struct hns_roce_td { +@@ -271,6 +273,11 @@ struct hns_roce_cq { + unsigned int cqe_size; + struct hns_roce_v2_cqe *cqe; + struct ibv_pd *parent_domain; ++ struct list_head list_sq; ++ struct list_head list_rq; ++ struct list_head list_srq; ++ struct list_head list_xrc_srq; ++ struct hns_roce_v2_cqe *sw_cqe; + }; + + struct hns_roce_idx_que { +@@ -307,6 +314,7 @@ struct hns_roce_srq { + unsigned int *rdb; + unsigned int cap_flags; + unsigned short counter; ++ struct list_node xrc_srcq_node; + }; + + struct hns_roce_wq { +@@ -368,6 +376,10 @@ struct hns_roce_qp { + void *cur_wqe; + unsigned int rb_sq_head; /* roll back sq head */ + struct hns_roce_sge_info sge_info; ++ ++ struct list_node rcq_node; ++ struct list_node scq_node; ++ struct list_node srcq_node; + }; + + struct hns_roce_av { +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 1855d83..2119c4c 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -723,6 +723,180 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx, + return hns_roce_flush_cqe(*cur_qp, status); + } + ++static void hns_roce_fill_swc(struct hns_roce_cq *cq, struct ibv_wc *wc, ++ uint64_t wr_id, uint32_t qp_num) ++{ ++ if (!wc) { ++ cq->verbs_cq.cq_ex.status = IBV_WC_WR_FLUSH_ERR; ++ cq->verbs_cq.cq_ex.wr_id = wr_id; ++ hr_reg_write(cq->sw_cqe, CQE_LCL_QPN, qp_num); ++ return; ++ } ++ ++ wc->wr_id = wr_id; ++ wc->status = IBV_WC_WR_FLUSH_ERR; ++ wc->vendor_err = 0; ++ wc->qp_num = qp_num; ++} ++ ++static int hns_roce_get_wq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp, ++ struct ibv_wc *wc, bool is_sq) ++{ ++ struct hns_roce_wq *wq = is_sq ? &qp->sq : &qp->rq; ++ unsigned int left_wr; ++ uint64_t wr_id; ++ ++ left_wr = wq->head - wq->tail; ++ if (left_wr == 0) { ++ if (is_sq) ++ list_del_init(&qp->scq_node); ++ else ++ list_del_init(&qp->rcq_node); ++ ++ return ENOENT; ++ } ++ ++ wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ hns_roce_fill_swc(cq, wc, wr_id, qp->verbs_qp.qp.qp_num); ++ wq->tail++; ++ return V2_CQ_OK; ++} ++ ++static int hns_roce_gen_sq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ struct hns_roce_qp *next, *qp = NULL; ++ ++ list_for_each_safe(&cq->list_sq, qp, next, scq_node) { ++ if (hns_roce_get_wq_swc(cq, qp, wc, true) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_gen_rq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ struct hns_roce_qp *next, *qp = NULL; ++ ++ list_for_each_safe(&cq->list_rq, qp, next, rcq_node) { ++ if (hns_roce_get_wq_swc(cq, qp, wc, false) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_get_srq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp, ++ struct hns_roce_srq *srq, struct ibv_wc *wc) ++{ ++ unsigned int left_wr; ++ uint64_t wr_id; ++ ++ hns_roce_spin_lock(&srq->hr_lock); ++ left_wr = srq->idx_que.head - srq->idx_que.tail; ++ if (left_wr == 0) { ++ if (qp) ++ list_del_init(&qp->srcq_node); ++ else ++ list_del_init(&srq->xrc_srcq_node); ++ ++ hns_roce_spin_unlock(&srq->hr_lock); ++ return ENOENT; ++ } ++ ++ wr_id = srq->wrid[srq->idx_que.tail & (srq->wqe_cnt - 1)]; ++ hns_roce_fill_swc(cq, wc, wr_id, srq->srqn); ++ srq->idx_que.tail++; ++ hns_roce_spin_unlock(&srq->hr_lock); ++ ++ return V2_CQ_OK; ++} ++ ++static int hns_roce_gen_common_srq_swc(struct hns_roce_cq *cq, ++ struct ibv_wc *wc) ++{ ++ struct hns_roce_qp *next, *qp = NULL; ++ struct hns_roce_srq *srq; ++ ++ list_for_each_safe(&cq->list_srq, qp, next, srcq_node) { ++ srq = to_hr_srq(qp->verbs_qp.qp.srq); ++ if (hns_roce_get_srq_swc(cq, qp, srq, wc) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_gen_xrc_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ struct hns_roce_srq *next, *srq = NULL; ++ ++ list_for_each_safe(&cq->list_xrc_srq, srq, next, xrc_srcq_node) { ++ if (hns_roce_get_srq_swc(cq, NULL, srq, wc) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_gen_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ int err; ++ ++ err = hns_roce_gen_common_srq_swc(cq, wc); ++ if (err == V2_CQ_OK) ++ return err; ++ ++ return hns_roce_gen_xrc_srq_swc(cq, wc); ++} ++ ++static int hns_roce_poll_one_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ int err; ++ ++ err = hns_roce_gen_sq_swc(cq, wc); ++ if (err == V2_CQ_OK) ++ return err; ++ ++ err = hns_roce_gen_rq_swc(cq, wc); ++ if (err == V2_CQ_OK) ++ return err; ++ ++ return hns_roce_gen_srq_swc(cq, wc); ++} ++ ++static int hns_roce_poll_swc(struct hns_roce_cq *cq, int ne, struct ibv_wc *wc) ++{ ++ int npolled; ++ int err; ++ ++ for (npolled = 0; npolled < ne; npolled++) { ++ err = hns_roce_poll_one_swc(cq, wc + npolled); ++ if (err == V2_CQ_EMPTY) ++ break; ++ } ++ ++ return npolled; ++} ++ ++static bool hns_roce_reseted(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_v2_reset_state *state = ctx->reset_state; ++ ++ if (state && state->is_reset) ++ ctx->reseted = true; ++ ++ return ctx->reseted; ++} ++ + static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + struct ibv_wc *wc) + { +@@ -734,6 +908,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + hns_roce_spin_lock(&cq->hr_lock); + ++ if (unlikely(hns_roce_reseted(ctx))) { ++ npolled = hns_roce_poll_swc(cq, ne, wc); ++ hns_roce_spin_unlock(&cq->hr_lock); ++ return npolled; ++ } ++ + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); + if (err != V2_CQ_OK) +@@ -1602,11 +1782,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + return ret; + } + +-static void hns_roce_lock_cqs(struct ibv_qp *qp) ++void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) + { +- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); +- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); +- + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { + hns_roce_spin_lock(&send_cq->hr_lock); +@@ -1624,11 +1801,8 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp) + } + } + +-static void hns_roce_unlock_cqs(struct ibv_qp *qp) ++void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) + { +- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); +- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); +- + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { + hns_roce_spin_unlock(&send_cq->hr_lock); +@@ -1662,17 +1836,22 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + + hns_roce_v2_clear_qp(ctx, qp); + +- hns_roce_lock_cqs(ibqp); ++ hns_roce_lock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq)); + +- if (ibqp->recv_cq) ++ if (ibqp->recv_cq) { + __hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); ++ list_del(&qp->srcq_node); ++ list_del(&qp->rcq_node); ++ } + +- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) ++ if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) { + __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, + NULL); ++ list_del(&qp->scq_node); ++ } + +- hns_roce_unlock_cqs(ibqp); ++ hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq)); + + hns_roce_free_qp_buf(qp, ctx); + +@@ -1822,7 +2001,14 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, + + hns_roce_spin_lock(&cq->hr_lock); + ++ if (unlikely(hns_roce_reseted(ctx))) { ++ err = hns_roce_poll_one_swc(cq, NULL); ++ goto start_poll_done; ++ } ++ + err = hns_roce_poll_one(ctx, &qp, cq, NULL); ++ ++start_poll_done: + if (err != V2_CQ_OK) + hns_roce_spin_unlock(&cq->hr_lock); + +@@ -1836,6 +2022,9 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current) + struct hns_roce_qp *qp = NULL; + int err; + ++ if (unlikely(hns_roce_reseted(ctx))) ++ return hns_roce_poll_one_swc(cq, NULL); ++ + err = hns_roce_poll_one(ctx, &qp, cq, NULL); + if (err != V2_CQ_OK) + return err; +@@ -1853,11 +2042,15 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current) + struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); + struct hns_roce_context *ctx = to_hr_ctx(current->context); + ++ if (unlikely(hns_roce_reseted(ctx))) ++ goto end_poll_done; ++ + if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) + *cq->db = cq->cons_index & RECORD_DB_CI_MASK; + else + update_cq_db(ctx, cq); + ++end_poll_done: + hns_roce_spin_unlock(&cq->hr_lock); + } + +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index abf9467..1a7b828 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -344,5 +344,7 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); + void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags); + int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp); ++void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); ++void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); + + #endif /* _HNS_ROCE_U_HW_V2_H */ +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index fc255ed..3f23715 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -513,6 +513,32 @@ static int exec_cq_create_cmd(struct ibv_context *context, + return 0; + } + ++static int hns_roce_init_cq_swc(struct hns_roce_cq *cq, ++ struct ibv_cq_init_attr_ex *attr) ++{ ++ list_head_init(&cq->list_sq); ++ list_head_init(&cq->list_rq); ++ list_head_init(&cq->list_srq); ++ list_head_init(&cq->list_xrc_srq); ++ ++ if (!(attr->wc_flags & CREATE_CQ_SUPPORTED_WC_FLAGS)) ++ return 0; ++ ++ cq->sw_cqe = calloc(1, sizeof(struct hns_roce_v2_cqe)); ++ if (!cq->sw_cqe) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq) ++{ ++ if (cq->sw_cqe) { ++ free(cq->sw_cqe); ++ cq->sw_cqe = NULL; ++ } ++} ++ + static struct ibv_cq_ex *create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr) + { +@@ -552,6 +578,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + + *cq->db = 0; + ++ ret = hns_roce_init_cq_swc(cq, attr); ++ if (ret) ++ goto err_swc; ++ + ret = exec_cq_create_cmd(context, cq, attr); + if (ret) + goto err_cmd; +@@ -561,6 +591,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + return &cq->verbs_cq.cq_ex; + + err_cmd: ++ hns_roce_uninit_cq_swc(cq); ++err_swc: + hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); + err_db: + hns_roce_free_buf(&cq->buf); +@@ -625,6 +657,8 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq) + if (ret) + return ret; + ++ hns_roce_uninit_cq_swc(to_hr_cq(cq)); ++ + hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB); + hns_roce_free_buf(&hr_cq->buf); + +@@ -839,6 +873,22 @@ static int exec_srq_create_cmd(struct ibv_context *context, + return 0; + } + ++static void init_srq_cq_list(struct hns_roce_srq *srq, ++ struct ibv_srq_init_attr_ex *init_attr) ++{ ++ struct hns_roce_cq *srq_cq; ++ ++ list_node_init(&srq->xrc_srcq_node); ++ ++ if (!init_attr->cq) ++ return; ++ ++ srq_cq = to_hr_cq(init_attr->cq); ++ hns_roce_spin_lock(&srq_cq->hr_lock); ++ list_add_tail(&srq_cq->list_xrc_srq, &srq->xrc_srcq_node); ++ hns_roce_spin_unlock(&srq_cq->hr_lock); ++} ++ + static struct ibv_srq *create_srq(struct ibv_context *context, + struct ibv_srq_init_attr_ex *init_attr) + { +@@ -885,6 +935,8 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + init_attr->attr.max_sge = + min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge); + ++ init_srq_cq_list(srq, init_attr); ++ + return &srq->verbs_srq.srq; + + err_destroy_srq: +@@ -960,6 +1012,18 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) + return ret; + } + ++static void del_srq_from_cq_list(struct hns_roce_srq *srq) ++{ ++ struct hns_roce_cq *srq_cq = to_hr_cq(srq->verbs_srq.cq); ++ ++ if (!srq_cq) ++ return; ++ ++ hns_roce_spin_lock(&srq_cq->hr_lock); ++ list_del(&srq->xrc_srcq_node); ++ hns_roce_spin_unlock(&srq_cq->hr_lock); ++} ++ + int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + { + struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context); +@@ -967,6 +1031,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + struct hns_roce_srq *srq = to_hr_srq(ibv_srq); + int ret; + ++ del_srq_from_cq_list(srq); ++ + ret = ibv_cmd_destroy_srq(ibv_srq); + if (ret) + return ret; +@@ -1600,6 +1666,30 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, + return 0; + } + ++static void add_qp_to_cq_list(struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_cq *send_cq, *recv_cq; ++ ++ send_cq = attr->send_cq ? to_hr_cq(attr->send_cq) : NULL; ++ recv_cq = attr->recv_cq ? to_hr_cq(attr->recv_cq) : NULL; ++ ++ list_node_init(&qp->scq_node); ++ list_node_init(&qp->rcq_node); ++ list_node_init(&qp->srcq_node); ++ ++ hns_roce_lock_cqs(send_cq, recv_cq); ++ if (send_cq) ++ list_add_tail(&send_cq->list_sq, &qp->scq_node); ++ if (recv_cq) { ++ if (attr->srq) ++ list_add_tail(&recv_cq->list_srq, &qp->srcq_node); ++ else ++ list_add_tail(&recv_cq->list_rq, &qp->rcq_node); ++ } ++ hns_roce_unlock_cqs(send_cq, recv_cq); ++} ++ + static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + struct ibv_qp_init_attr_ex *attr, + struct hnsdv_qp_init_attr *hns_attr) +@@ -1652,6 +1742,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + } + + qp_setup_config(attr, qp, context); ++ add_qp_to_cq_list(attr, qp); + + return &qp->verbs_qp.qp; + +-- +2.33.0 + diff --git a/0015-libhns-return-error-when-post-send-in-reset-state.patch b/0015-libhns-return-error-when-post-send-in-reset-state.patch new file mode 100644 index 0000000000000000000000000000000000000000..7b5067bad478ff3c8cd4a35ad90a5df5a5dcaca8 --- /dev/null +++ b/0015-libhns-return-error-when-post-send-in-reset-state.patch @@ -0,0 +1,155 @@ +From 0b33b387d5b806804ae9278d3911289f8619dfd2 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 26 Sep 2023 19:19:07 +0800 +Subject: [PATCH 15/18] libhns: return error when post send in reset state + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HQV + +-------------------------------------------------------------------------- + +If the device has been resetted, the original business will not be able +to continue. The current design is to allow users to continue issuing IO. +Such a design is meaningless, the user should perceive the exception and +restore the business as soon as possible. + +The current kernel mode directly returns an error when device has been +resetted, and this patch can unify the behavior of the kernel mode and +user mode. + +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u_hw_v2.c | 66 ++++++++++++++++++++++++-------- + 1 file changed, 51 insertions(+), 15 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 2119c4c..fe22b43 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -956,14 +956,24 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) + return 0; + } + +-static inline int check_qp_send(struct ibv_qp *qp) ++static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { +- if (unlikely(qp->state == IBV_QPS_RESET || +- qp->state == IBV_QPS_INIT || +- qp->state == IBV_QPS_RTR)) ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ int ret = 0; ++ ++ if (unlikely(ibvqp->state == IBV_QPS_RESET || ++ ibvqp->state == IBV_QPS_INIT || ++ ibvqp->state == IBV_QPS_RTR)){ ++ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), ++ "unsupported qp state, state = %d.\n", ibvqp->state); + return EINVAL; ++ } else if (unlikely(hns_roce_reseted(ctx))) { ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to send, device has been reseted!\n"); ++ return EIO; ++ } + +- return 0; ++ return ret; + } + + static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg, +@@ -1453,7 +1463,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + unsigned int wqe_idx, nreq; + int ret; + +- ret = check_qp_send(ibvqp); ++ ret = check_qp_send(qp, ctx); + if (unlikely(ret)) { + *bad_wr = wr; + return ret; +@@ -1531,12 +1541,22 @@ out: + return ret; + } + +-static inline int check_qp_recv(struct ibv_qp *qp) ++static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { +- if (qp->state == IBV_QPS_RESET) ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ int ret = 0; ++ ++ if (ibvqp->state == IBV_QPS_RESET) { ++ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), ++ "unsupported qp state, state = %d.\n", ibvqp->state); + return EINVAL; ++ } else if (unlikely(hns_roce_reseted(ctx))) { ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "fail to recv, device has been reseted!\n"); ++ return EIO; ++ } + +- return 0; ++ return ret; + } + + static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe, +@@ -1603,7 +1623,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + struct ibv_qp_attr attr = {}; + int ret; + +- ret = check_qp_recv(ibvqp); ++ ret = check_qp_recv(qp, ctx); + if (unlikely(ret)) { + *bad_wr = wr; + return ret; +@@ -1933,6 +1953,16 @@ static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db, + (__le32 *)db); + } + ++static int check_srq_recv(struct hns_roce_context *ctx) ++{ ++ if (hns_roce_reseted(ctx)) { ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "srq failed to recv, device has been reseted!\n"); ++ return EIO; ++ } ++ return 0; ++} ++ + static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +@@ -1944,6 +1974,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + int ret = 0; + void *wqe; + ++ ret = check_srq_recv(ctx); ++ if (ret) { ++ *bad_wr = wr; ++ return ret; ++ } ++ + hns_roce_spin_lock(&srq->hr_lock); + + max_sge = srq->max_gs - srq->rsv_sge; +@@ -2751,13 +2787,13 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + + static void wr_start(struct ibv_qp_ex *ibv_qp) + { ++ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context); + struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); +- enum ibv_qp_state state = ibv_qp->qp_base.state; ++ int ret; + +- if (state == IBV_QPS_RESET || +- state == IBV_QPS_INIT || +- state == IBV_QPS_RTR) { +- qp->err = EINVAL; ++ ret = check_qp_send(qp, ctx); ++ if (ret) { ++ qp->err = ret; + return; + } + +-- +2.33.0 + diff --git a/0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch b/0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch new file mode 100644 index 0000000000000000000000000000000000000000..7777e81f3654b5cf836faf5066b6492c881a1ced --- /dev/null +++ b/0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch @@ -0,0 +1,76 @@ +From 2c11318d6a06ba6afd6efd91b2881b9fa05f35f6 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 26 Sep 2023 19:19:09 +0800 +Subject: [PATCH 16/18] libhns: assign doorbell to zero when allocate it + +driver inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HQV + +-------------------------------------------------------------------------- + +Clear the doorbell when getting it to avoid clearing it in each +function that uses hns_roce_alloc_db() + +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u_db.c | 2 ++ + providers/hns/hns_roce_u_verbs.c | 8 -------- + 2 files changed, 2 insertions(+), 8 deletions(-) + +diff --git a/providers/hns/hns_roce_u_db.c b/providers/hns/hns_roce_u_db.c +index 0314254..bbef988 100644 +--- a/providers/hns/hns_roce_u_db.c ++++ b/providers/hns/hns_roce_u_db.c +@@ -116,6 +116,8 @@ found: + + out: + pthread_mutex_unlock((pthread_mutex_t *)&ctx->db_list_mutex); ++ if (db) ++ *((unsigned int *)db) = 0; + + return db; + } +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 3f23715..69bcc13 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -576,8 +576,6 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + goto err_db; + } + +- *cq->db = 0; +- + ret = hns_roce_init_cq_swc(cq, attr); + if (ret) + goto err_swc; +@@ -921,8 +919,6 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + if (!srq->rdb) + goto err_srq_buf; + +- *srq->rdb = 0; +- + ret = exec_srq_create_cmd(context, srq, init_attr); + if (ret) + goto err_srq_db; +@@ -1505,8 +1501,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, + qp->sdb = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB); + if (!qp->sdb) + return -ENOMEM; +- +- *qp->sdb = 0; + } + + if (attr->cap.max_recv_sge) { +@@ -1518,8 +1512,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, + + return -ENOMEM; + } +- +- *qp->rdb = 0; + } + + return 0; +-- +2.33.0 + diff --git a/0017-libhns-Fix-missing-reset-notification.patch b/0017-libhns-Fix-missing-reset-notification.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b3e2c6512b1dbf03dc942679338215d200232fb --- /dev/null +++ b/0017-libhns-Fix-missing-reset-notification.patch @@ -0,0 +1,92 @@ +From 8b922418b18fefe2a60e122374b3bc8096672661 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 26 Sep 2023 19:19:10 +0800 +Subject: [PATCH 17/18] libhns: Fix missing reset notification. + +driver inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HQV + +-------------------------------------------------------------------------- + +Currently, userspace driver get the reset notification by reading a +a shared variable which would be set to non-zero during reset. However, +if the user does not call driver's IO interface during reset, the reset +notification will be ignored. because this variable will be clear after +completes the reset. + +This patch use a new reset flag to get whether the driver has been reset +at any time. A non-zero value will be assigned to this new reset +flag by default, which will permanently become 0 once a reset occurs. +During reset, the kernel space driver will assign 0 to this variable. +After reset, this variable will be remapped to a page of all zeros. The +userspace driver can judge whether the driver has been reset by whether +this variable is 0. + +Fixes: 34f2ad8085c2 ("libhns: Add reset stop flow mechanism") +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u.c | 4 ++++ + providers/hns/hns_roce_u.h | 2 ++ + providers/hns/hns_roce_u_hw_v2.c | 3 +++ + 3 files changed, 9 insertions(+) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 0e4f4c1..810b650 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -105,6 +105,7 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, + int page_size) + { + uint64_t reset_mmap_key = resp->reset_mmap_key; ++ struct hns_roce_v2_reset_state *state; + + /* The reset mmap key is 0, which means it is not supported. */ + if (reset_mmap_key == 0) +@@ -115,6 +116,9 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, + if (ctx->reset_state == MAP_FAILED) + return -ENOMEM; + ++ state = ctx->reset_state; ++ ctx->use_new_reset_flag = state->hw_ready; ++ + return 0; + } + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5adf6bd..024932a 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -203,6 +203,7 @@ struct hns_roce_spinlock { + + struct hns_roce_v2_reset_state { + uint32_t is_reset; ++ uint32_t hw_ready; + }; + + struct hns_roce_context { +@@ -239,6 +240,7 @@ struct hns_roce_context { + uint32_t config; + unsigned int max_inline_data; + ++ bool use_new_reset_flag; + bool reseted; + }; + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index fe22b43..a0dce1c 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -891,6 +891,9 @@ static bool hns_roce_reseted(struct hns_roce_context *ctx) + { + struct hns_roce_v2_reset_state *state = ctx->reset_state; + ++ if (ctx->use_new_reset_flag) ++ return !state->hw_ready; ++ + if (state && state->is_reset) + ctx->reseted = true; + +-- +2.33.0 + diff --git a/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch b/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch new file mode 100644 index 0000000000000000000000000000000000000000..7957daea82ec2adf1133a404053b1e3327dc52a0 --- /dev/null +++ b/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch @@ -0,0 +1,94 @@ +From 64e8d59358cfdb05d7b172bb1b60f18fb7f3d844 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Thu, 7 Dec 2023 09:48:02 +0800 +Subject: [PATCH 18/18] libhns: Fix owner bit when SQ wraps around in new IO + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I98YNG + +-------------------------------------------------------------------------- + +The owner bit has been write in init_rc_wqe() or init_ud_wqe() +with a write value. And it will be overwritten by some subsequent +operations. When the SQ wraps around, the overwritten value will be +an incorrect value. + +For example, driver will assign the owner bit in the second step, +and overwrite it in the third step. + +```c +ibv_wr_start(); +ibv_wr_rdma_write(); +if (inline) + ibv_wr_set_inline_data_list(); +else + ibv_wr_set_sge_list(); +ibv_wr_complete(); +``` + +This patch removes the redundant owner bit assignment operations +in new IO. + +Fixes: ("libhns: Fix the owner bit error of sq in new io") +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u_hw_v2.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index a0dce1c..9016978 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2353,8 +2353,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge, + + wqe->msg_len = htole32(qp->sge_info.total_len); + hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); +- +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_send_rc(struct ibv_qp_ex *ibv_qp) +@@ -2546,7 +2544,6 @@ static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr, + + qp->sge_info.total_len = length; + set_inline_data_list_rc(qp, wqe, 1, &buff); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, +@@ -2564,7 +2561,6 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, + qp->sge_info.total_len += buf_list[i].length; + + set_inline_data_list_rc(qp, wqe, num_buf, buf_list); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static struct hns_roce_ud_sq_wqe * +@@ -2701,7 +2697,6 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, + hr_reg_write(wqe, UDWQE_SGE_NUM, cnt); + + qp->sge_info.start_idx += cnt; +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void set_inline_data_list_ud(struct hns_roce_qp *qp, +@@ -2767,7 +2762,6 @@ static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr, + + qp->sge_info.total_len = length; + set_inline_data_list_ud(qp, wqe, 1, &buff); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, +@@ -2785,7 +2779,6 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + qp->sge_info.total_len += buf_list[i].length; + + set_inline_data_list_ud(qp, wqe, num_buf, buf_list); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_start(struct ibv_qp_ex *ibv_qp) +-- +2.33.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 2d77d03feb910540db4d768ef9e8c32f0ad4010d..0e282e2e074470ff334c7cc3c10b97840210a109 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 50.0 -Release: 4 +Release: 5 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -16,6 +16,14 @@ patch7: 0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch patch8: 0008-libhns-Add-support-for-lock-free-QP.patch patch9: 0009-libhns-Add-support-for-lock-free-CQ.patch patch10: 0010-libhns-Add-support-for-lock-free-SRQ.patch +patch11: 0011-libhns-Support-flexible-WQE-buffer-page-size.patch +patch12: 0012-Update-kernel-headers.patch +patch13: 0013-libhns-Add-reset-stop-flow-mechanism.patch +patch14: 0014-libhns-Support-reporting-wc-as-software-mode.patch +patch15: 0015-libhns-return-error-when-post-send-in-reset-state.patch +patch16: 0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch +patch17: 0017-libhns-Fix-missing-reset-notification.patch +patch18: 0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -594,6 +602,12 @@ fi %{_mandir}/* %changelog +* Tue Mar 26 2024 Ran Zhou - 50.0-5 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support software wc and fix commit info of previous patches + * Thu Mar 21 2024 Ran Zhou - 50.0-4 - Type: requirement - ID: NA