From b731de85ba2f9c52b834159c0ad5d8bc385cdf12 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 30 Nov 2022 17:03:44 +0800 Subject: [PATCH] Support hns roce DCA DCA(Dynamic context attachment) support many RC QPs to share the WQE buffer in a memory pool, this help reducing the memory consumption when there are many QPs are inactive. Signed-off-by: Chengchang Tang (cherry picked from commit b88a370b79cd9b93f9a587c1e4e0d9c89d0b1d1e) --- 0031-Update-kernel-headers.patch | 152 ++++ 0032-libhns-Introduce-DCA-for-RC-QP.patch | 342 ++++++++ ...upport-for-shrinking-DCA-memory-pool.patch | 204 +++++ ...upport-for-attaching-QP-s-WQE-buffer.patch | 618 ++++++++++++++ ...Use-shared-memory-to-sync-DCA-status.patch | 167 ++++ ...hns-Sync-DCA-status-by-shared-memory.patch | 222 +++++ ...d-direct-verbs-support-to-config-DCA.patch | 766 ++++++++++++++++++ rdma-core.spec | 18 +- 8 files changed, 2488 insertions(+), 1 deletion(-) create mode 100644 0031-Update-kernel-headers.patch create mode 100644 0032-libhns-Introduce-DCA-for-RC-QP.patch create mode 100644 0033-libhns-Add-support-for-shrinking-DCA-memory-pool.patch create mode 100644 0034-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch create mode 100644 0035-libhns-Use-shared-memory-to-sync-DCA-status.patch create mode 100644 0036-libhns-Sync-DCA-status-by-shared-memory.patch create mode 100644 0037-libhns-Add-direct-verbs-support-to-config-DCA.patch diff --git a/0031-Update-kernel-headers.patch b/0031-Update-kernel-headers.patch new file mode 100644 index 0000000..311287c --- /dev/null +++ b/0031-Update-kernel-headers.patch @@ -0,0 +1,152 @@ +From 7d72b40d311875677135289874d4a69e4891b0de Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 28 Nov 2022 21:52:20 +0800 +Subject: Update kernel headers + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M + +---------------------------------------------------------- + +To commit ?? ("RDMA/hns: Fixes concurrent ressetting and post_recv in DCA +mode"). + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + kernel-headers/rdma/hns-abi.h | 84 ++++++++++++++++++++++++++++++++--- + 1 file changed, 78 insertions(+), 6 deletions(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index 9866c51..6950841 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -77,7 +77,9 @@ enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, ++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4, + HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, ++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, + }; + + struct hns_roce_ib_create_qp_resp { +@@ -95,33 +97,46 @@ struct hns_roce_ib_modify_qp_resp { + __u8 tc_mode; + __u8 priority; + __u8 reserved[6]; ++ __u32 dcan; ++ __u32 rsv2; + }; + + enum { + HNS_ROCE_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3, + }; + + enum { + HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA, + }; + + struct hns_roce_ib_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 cqe_size; +- __u32 srq_tab_size; +- __u32 reserved; +- __u32 config; +- __u32 max_inline_data; +- __u8 mac_type; +- __u8 rsv1[7]; ++ __u32 srq_tab_size; ++ __u32 reserved; ++ __u32 config; ++ __u32 max_inline_data; ++ __u8 mac_type; ++ __u8 rsv1[7]; ++ __u32 dca_qps; ++ __u32 dca_mmap_size; ++ __aligned_u64 dca_mmap_key; ++}; ++ ++enum hns_roce_uctx_comp_mask { ++ HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0, + }; + + struct hns_roce_ib_alloc_ucontext { + __u32 config; ++ __u32 comp; /* use hns_roce_uctx_comp_mask */ ++ __u32 dca_max_qps; + __u32 reserved; + }; + +@@ -129,4 +144,61 @@ struct hns_roce_ib_alloc_pd_resp { + __u32 pdn; + }; + ++#define UVERBS_ID_NS_MASK 0xF000 ++#define UVERBS_ID_NS_SHIFT 12 ++ ++enum hns_ib_objects { ++ HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum hns_ib_dca_mem_methods { ++ HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_METHOD_DCA_MEM_DEREG, ++ HNS_IB_METHOD_DCA_MEM_SHRINK, ++ HNS_IB_METHOD_DCA_MEM_ATTACH, ++ HNS_IB_METHOD_DCA_MEM_DETACH, ++ HNS_IB_METHOD_DCA_MEM_QUERY, ++}; ++ ++enum hns_ib_dca_mem_reg_attrs { ++ HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_REG_FLAGS, ++ HNS_IB_ATTR_DCA_MEM_REG_LEN, ++ HNS_IB_ATTR_DCA_MEM_REG_ADDR, ++ HNS_IB_ATTR_DCA_MEM_REG_KEY, ++}; ++ ++enum hns_ib_dca_mem_dereg_attrs { ++ HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum hns_ib_dca_mem_shrink_attrs { ++ HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, ++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, ++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, ++}; ++ ++enum hns_ib_dca_mem_attach_attrs { ++ HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, ++}; ++ ++enum hns_ib_dca_mem_detach_attrs { ++ HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, ++}; ++ ++enum hns_ib_dca_mem_query_attrs { ++ HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, ++}; ++ + #endif /* HNS_ABI_USER_H */ +-- +2.30.0 + diff --git a/0032-libhns-Introduce-DCA-for-RC-QP.patch b/0032-libhns-Introduce-DCA-for-RC-QP.patch new file mode 100644 index 0000000..4799e19 --- /dev/null +++ b/0032-libhns-Introduce-DCA-for-RC-QP.patch @@ -0,0 +1,342 @@ +From 58de0f69573e8b76affe401a261f17f1a5cedc01 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:09 +0800 +Subject: libhns: Introduce DCA for RC QP + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M + +---------------------------------------------------------- + +The HIP09 introduces the DCA(Dynamic context attachment) feature which +supports many RC QPs to share the WQE buffer in a memory pool, this will +reduce the memory consumption when there are too many QPs inactive. + +Two functions are defined for adding buffers to memory pool and removing +buffers from memory pool by calling ib cmd implemented in hns kernelspace +driver. + +If a QP enables DCA feature, the WQE's buffer will be attached to the +memory pool when the users start to post WRs and be detached when all CQEs +has been polled. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 59 ++++++++++++- + providers/hns/hns_roce_u.h | 21 +++++ + providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++ + 3 files changed, 223 insertions(+), 4 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 3428bda..bd2b251 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -95,6 +95,53 @@ static const struct verbs_context_ops hns_common_ops = { + .alloc_parent_domain = hns_roce_u_alloc_pad, + }; + ++static int init_dca_context(struct hns_roce_context *ctx, int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ int ret; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) ++ return 0; ++ ++ list_head_init(&dca_ctx->mem_list); ++ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) ++ return ret; ++ ++ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; ++ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; ++ dca_ctx->mem_cnt = 0; ++ ++ return 0; ++} ++ ++static void uninit_dca_context(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) ++ return; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ hns_roce_cleanup_dca_mem(ctx); ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ pthread_spin_destroy(&dca_ctx->lock); ++} ++ ++static int hns_roce_mmap(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *context, int cmd_fd) ++{ ++ int page_size = hr_dev->page_size; ++ ++ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, cmd_fd, 0); ++ if (context->uar == MAP_FAILED) ++ return -ENOMEM; ++ ++ return 0; ++} ++ + static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + { + uint32_t count_shift = hr_ilog32(entry_count); +@@ -119,7 +166,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + return NULL; + + cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS; ++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -165,11 +212,12 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + context->max_srq_wr = dev_attrs.max_srq_wr; + context->max_srq_sge = dev_attrs.max_srq_sge; + +- context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, +- MAP_SHARED, cmd_fd, 0); +- if (context->uar == MAP_FAILED) ++ if (init_dca_context(context, hr_dev->page_size)) + goto err_free; + ++ if (hns_roce_mmap(hr_dev, context, cmd_fd)) ++ goto dca_free; ++ + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); + + verbs_set_ops(&context->ibv_ctx, &hns_common_ops); +@@ -177,6 +225,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + return &context->ibv_ctx; + ++dca_free: ++ uninit_dca_context(context); + err_free: + verbs_uninit_context(&context->ibv_ctx); + free(context); +@@ -189,6 +239,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx) + struct hns_roce_context *context = to_hr_ctx(ibctx); + + munmap(context->uar, hr_dev->page_size); ++ uninit_dca_context(context); + verbs_uninit_context(&context->ibv_ctx); + free(context); + } +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 2edb07e..0e25ce5 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -147,6 +147,10 @@ + + #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) + ++enum { ++ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15), ++}; ++ + #define HNS_ROCE_QP_TABLE_BITS 8 + #define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS) + +@@ -199,6 +203,18 @@ struct hns_roce_spinlock { + int need_lock; + }; + ++#define HNS_DCA_MAX_MEM_SIZE ~0UL ++#define HNS_DCA_DEFAULT_UNIT_PAGES 16 ++ ++struct hns_roce_dca_ctx { ++ struct list_head mem_list; ++ pthread_spinlock_t lock; ++ int mem_cnt; ++ unsigned int unit_size; ++ uint64_t max_size; ++ uint64_t curr_size; ++}; ++ + struct hns_roce_context { + struct verbs_context ibv_ctx; + void *uar; +@@ -231,6 +247,8 @@ struct hns_roce_context { + unsigned int cqe_size; + uint32_t config; + unsigned int max_inline_data; ++ ++ struct hns_roce_dca_ctx dca_ctx; + }; + + struct hns_roce_td { +@@ -562,6 +580,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); ++int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); ++ + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + + extern const struct hns_roce_u_hw hns_roce_u_hw_v2; +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 471dd9c..02c43ae 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf) + + munmap(buf->buf, buf->length); + } ++ ++struct hns_roce_dca_mem { ++ uint32_t handle; ++ struct list_node entry; ++ struct hns_roce_buf buf; ++ struct hns_roce_context *ctx; ++}; ++ ++static void free_dca_mem(struct hns_roce_context *ctx, ++ struct hns_roce_dca_mem *mem) ++{ ++ hns_roce_free_buf(&mem->buf); ++ free(mem); ++} ++ ++static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size) ++{ ++ struct hns_roce_dca_mem *mem = NULL; ++ int ret; ++ ++ mem = malloc(sizeof(struct hns_roce_dca_mem)); ++ if (!mem) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE); ++ if (ret) { ++ errno = ENOMEM; ++ free(mem); ++ return NULL; ++ } ++ ++ return mem; ++} ++ ++static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) ++{ ++ return (uintptr_t)dca_mem; ++} ++ ++static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) ++{ ++ return dca_mem->buf.buf + offset; ++} ++ ++static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key, ++ void *addr, uint32_t size, uint32_t *handle) ++{ ++ struct ib_uverbs_attr *attr; ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_REG, 4); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR, ++ ioctl_ptr_to_u64(addr)); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key); ++ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE); ++ ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) { ++ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n", ++ ret); ++ return ret; ++ } ++ ++ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr); ++ ++ return 0; ++} ++ ++static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_DEREG, 1); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_warn(&ctx->ibv_ctx, ++ "failed to dereg DCA mem-%u, ret = %d.\n", ++ handle, ret); ++} ++ ++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_roce_dca_mem *mem; ++ struct hns_roce_dca_mem *tmp; ++ ++ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry) ++ deregister_dca_mem(ctx, mem->handle); ++} ++ ++static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, ++ uint32_t alloc_size) ++{ ++ bool enable; ++ ++ pthread_spin_lock(&ctx->lock); ++ ++ if (ctx->unit_size == 0) /* Pool size can't be increased */ ++ enable = false; ++ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */ ++ enable = true; ++ else /* Pool size doesn't exceed max size */ ++ enable = (ctx->curr_size + alloc_size) < ctx->max_size; ++ ++ pthread_spin_unlock(&ctx->lock); ++ ++ return enable; ++} ++ ++int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_roce_dca_mem *mem; ++ int ret; ++ ++ if (!add_dca_mem_enabled(&ctx->dca_ctx, size)) ++ return -ENOMEM; ++ ++ /* Step 1: Alloc DCA mem address */ ++ mem = alloc_dca_mem( ++ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size); ++ if (!mem) ++ return -ENOMEM; ++ ++ /* Step 2: Register DCA mem uobject to pin user address */ ++ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0), ++ mem->buf.length, &mem->handle); ++ if (ret) { ++ free_dca_mem(ctx, mem); ++ return ret; ++ } ++ ++ /* Step 3: Add DCA mem node to pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ list_add_tail(&dca_ctx->mem_list, &mem->entry); ++ dca_ctx->mem_cnt++; ++ dca_ctx->curr_size += mem->buf.length; ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ return 0; ++} +-- +2.30.0 + diff --git a/0033-libhns-Add-support-for-shrinking-DCA-memory-pool.patch b/0033-libhns-Add-support-for-shrinking-DCA-memory-pool.patch new file mode 100644 index 0000000..b2da30b --- /dev/null +++ b/0033-libhns-Add-support-for-shrinking-DCA-memory-pool.patch @@ -0,0 +1,204 @@ +From c8d7a2dc811a18ffd314b8764c961234e5f2ec77 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:13 +0800 +Subject: libhns: Add support for shrinking DCA memory pool + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M + +---------------------------------------------------------- + +The QP's WQE buffer may be detached after QP is modified or CQE is polled, +and the state of DCA mem object may be changed as clean for no QP is using +it. So shrink the clean DCA mem from the memory pool and destroy the DCA +mem's buffer to reduce the memory consumption. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 2 + + providers/hns/hns_roce_u_buf.c | 103 +++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.c | 7 +++ + 3 files changed, 112 insertions(+) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 0e25ce5..7b5c5c9 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -212,6 +212,7 @@ struct hns_roce_dca_ctx { + int mem_cnt; + unsigned int unit_size; + uint64_t max_size; ++ uint64_t min_size; + uint64_t curr_size; + }; + +@@ -580,6 +581,7 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 02c43ae..c0f86e9 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -101,6 +101,20 @@ static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) + return (uintptr_t)dca_mem; + } + ++static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx, ++ uint64_t key) ++{ ++ struct hns_roce_dca_mem *mem; ++ struct hns_roce_dca_mem *tmp; ++ ++ list_for_each_safe(&ctx->mem_list, mem, tmp, entry) { ++ if (dca_mem_to_key(mem) == key) ++ return mem; ++ } ++ ++ return NULL; ++} ++ + static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) + { + return dca_mem->buf.buf + offset; +@@ -156,6 +170,32 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) + deregister_dca_mem(ctx, mem->handle); + } + ++struct hns_dca_mem_shrink_resp { ++ uint32_t free_mems; ++ uint64_t free_key; ++}; ++ ++static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ uint64_t size, struct hns_dca_mem_shrink_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_SHRINK, 4); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, ++ &resp->free_key, sizeof(resp->free_key)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, ++ &resp->free_mems, sizeof(resp->free_mems)); ++ ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n", ++ ret); ++ ++ return ret; ++} + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + uint32_t alloc_size) + { +@@ -175,6 +215,17 @@ static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + return enable; + } + ++static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) ++{ ++ bool enable; ++ ++ pthread_spin_lock(&ctx->lock); ++ enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size; ++ pthread_spin_unlock(&ctx->lock); ++ ++ return enable; ++} ++ + int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; +@@ -207,3 +258,55 @@ int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + + return 0; + } ++ ++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_dca_mem_shrink_resp resp = {}; ++ struct hns_roce_dca_mem *mem; ++ int dca_mem_cnt; ++ uint32_t handle; ++ int ret; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ dca_mem_cnt = ctx->dca_ctx.mem_cnt; ++ pthread_spin_unlock(&dca_ctx->lock); ++ while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) { ++ resp.free_mems = 0; ++ /* Step 1: Use any DCA mem uobject to shrink pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = list_tail(&dca_ctx->mem_list, ++ struct hns_roce_dca_mem, entry); ++ handle = mem ? mem->handle : 0; ++ pthread_spin_unlock(&dca_ctx->lock); ++ if (!mem) ++ break; ++ ++ ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp); ++ if (ret || likely(resp.free_mems < 1)) ++ break; ++ ++ /* Step 2: Remove shrunk DCA mem node from pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = key_to_dca_mem(dca_ctx, resp.free_key); ++ if (mem) { ++ list_del(&mem->entry); ++ dca_ctx->mem_cnt--; ++ dca_ctx->curr_size -= mem->buf.length; ++ } ++ ++ handle = mem ? mem->handle : 0; ++ pthread_spin_unlock(&dca_ctx->lock); ++ if (!mem) ++ break; ++ ++ /* Step 3: Destroy DCA mem uobject */ ++ deregister_dca_mem(ctx, handle); ++ free_dca_mem(ctx, mem); ++ /* No any free memory after deregister 1 DCA mem */ ++ if (resp.free_mems <= 1) ++ break; ++ ++ dca_mem_cnt--; ++ } ++} +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 7b2f2d1..f3a7e6b 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -738,6 +738,10 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + hns_roce_spin_unlock(&cq->hr_lock); + ++ /* Try to shrink the DCA mem */ ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + return err == V2_CQ_POLL_ERR ? err : npolled; + } + +@@ -1674,6 +1678,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + + free(qp); + ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + return ret; + } + +-- +2.30.0 + diff --git a/0034-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch b/0034-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch new file mode 100644 index 0000000..65aa2b2 --- /dev/null +++ b/0034-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch @@ -0,0 +1,618 @@ +From 835bc1a62dfc3398ef9da23de07348a353f67214 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:17 +0800 +Subject: libhns: Add support for attaching QP's WQE buffer + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M + +---------------------------------------------------------- + +If a uQP works in DCA mode, the WQE's buffer will be split as many blocks +and be stored into a list. The blocks are allocated from the DCA's memory +pool before posting WRs and are dropped when the QP's CI is equal to PI +after polling CQ. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 26 ++++- + providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++- + providers/hns/hns_roce_u_hw_v2.c | 151 ++++++++++++++++++++++++--- + providers/hns/hns_roce_u_hw_v2.h | 2 + + providers/hns/hns_roce_u_verbs.c | 32 ++++-- + 5 files changed, 358 insertions(+), 26 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 7b5c5c9..44a733f 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -348,11 +348,18 @@ struct hns_roce_sge_ex { + unsigned int sge_shift; + }; + ++struct hns_roce_dca_buf { ++ void **bufs; ++ unsigned int max_cnt; ++ unsigned int shift; ++}; ++ + struct hns_roce_qp { + struct verbs_qp verbs_qp; + struct hns_roce_buf buf; ++ struct hns_roce_dca_buf dca_wqe; + int max_inline_data; +- int buf_size; ++ unsigned int buf_size; + unsigned int sq_signal_bits; + struct hns_roce_wq sq; + struct hns_roce_wq rq; +@@ -401,11 +408,22 @@ struct hns_roce_u_hw { + struct verbs_context_ops hw_ops; + }; + ++struct hns_roce_dca_attach_attr { ++ uint32_t sq_offset; ++ uint32_t sge_offset; ++ uint32_t rq_offset; ++}; ++ ++struct hns_roce_dca_detach_attr { ++ uint32_t sq_index; ++}; ++ + /* + * The entries's buffer should be aligned to a multiple of the hardware's + * minimum page size. + */ + #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE) ++#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE) + + static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift) + { +@@ -581,9 +599,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ uint32_t size, struct hns_roce_dca_buf *buf); ++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_detach_attr *attr); + void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); +-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); + + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index c0f86e9..3d41b89 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + + return ret; + } ++ ++struct hns_dca_mem_query_resp { ++ uint64_t key; ++ uint32_t offset; ++ uint32_t page_count; ++}; ++ ++static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ uint32_t index, struct hns_dca_mem_query_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_QUERY, 5); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, ++ &resp->key, sizeof(resp->key)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, ++ &resp->offset, sizeof(resp->offset)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, ++ &resp->page_count, sizeof(resp->page_count)); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, ++ "failed to query DCA mem-%u, ret = %d.\n", ++ handle, ret); ++ ++ return ret; ++} ++ ++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_detach_attr *attr) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_DETACH, 4); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, ++ attr->sq_index); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_warn(&ctx->ibv_ctx, ++ "failed to detach DCA mem-%u, ret = %d.\n", ++ handle, ret); ++} ++ ++struct hns_dca_mem_attach_resp { ++#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0) ++ uint32_t alloc_flags; ++ uint32_t alloc_pages; ++}; ++ ++static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ struct hns_dca_mem_attach_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_ATTACH, 6); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, ++ attr->sq_offset); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, ++ attr->sge_offset); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, ++ attr->rq_offset); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, ++ &resp->alloc_flags, sizeof(resp->alloc_flags)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, ++ &resp->alloc_pages, sizeof(resp->alloc_pages)); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, ++ "failed to attach DCA mem-%u, ret = %d.\n", ++ handle, ret); ++ ++ return ret; ++} ++ + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + uint32_t alloc_size) + { +@@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) + return enable; + } + +-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) ++static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_roce_dca_mem *mem; +@@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) + dca_mem_cnt--; + } + } ++ ++static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf, ++ uint32_t page_index, int page_count) ++{ ++ void **pages = &buf->bufs[page_index]; ++ int page_size = 1 << buf->shift; ++ int i; ++ ++ for (i = 0; i < page_count; i++) { ++ pages[i] = addr; ++ addr += page_size; ++ } ++} ++ ++static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_buf *buf, uint32_t page_count) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_dca_mem_query_resp resp = {}; ++ struct hns_roce_dca_mem *mem; ++ uint32_t idx = 0; ++ int ret; ++ ++ while (idx < page_count && idx < buf->max_cnt) { ++ resp.page_count = 0; ++ ret = query_dca_mem(ctx, handle, idx, &resp); ++ if (ret) ++ return -ENOMEM; ++ if (resp.page_count < 1) ++ break; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = key_to_dca_mem(dca_ctx, resp.key); ++ if (mem && resp.offset < mem->buf.length) { ++ config_dca_pages(dca_mem_addr(mem, resp.offset), ++ buf, idx, resp.page_count); ++ } else { ++ pthread_spin_unlock(&dca_ctx->lock); ++ break; ++ } ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ idx += resp.page_count; ++ } ++ ++ return (idx >= page_count) ? 0 : -ENOMEM; ++} ++ ++#define DCA_EXPAND_MEM_TRY_TIMES 3 ++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ uint32_t size, struct hns_roce_dca_buf *buf) ++{ ++ uint32_t buf_pages = size >> buf->shift; ++ struct hns_dca_mem_attach_resp resp = {}; ++ bool is_new_buf = true; ++ int try_times = 0; ++ int ret = 0; ++ ++ do { ++ resp.alloc_pages = 0; ++ ret = attach_dca_mem(ctx, handle, attr, &resp); ++ if (ret) ++ break; ++ ++ if (resp.alloc_pages >= buf_pages) { ++ is_new_buf = !!(resp.alloc_flags & ++ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER); ++ break; ++ } ++ ++ ret = add_dca_mem(ctx, size); ++ if (ret) ++ break; ++ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES); ++ ++ if (ret || resp.alloc_pages < buf_pages) { ++ verbs_err(&ctx->ibv_ctx, ++ "failed to attach, size %u count %u != %u, ret = %d.\n", ++ size, buf_pages, resp.alloc_pages, ret); ++ return -ENOMEM; ++ } ++ ++ /* No need config user address if DCA config not changed */ ++ if (!is_new_buf && buf->bufs[0]) ++ return 0; ++ ++ return setup_dca_buf(ctx, handle, buf, buf_pages); ++} +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index f3a7e6b..7e3ad92 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -197,19 +197,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) + return get_sw_cqe_v2(cq, cq->cons_index); + } + ++static inline bool check_qp_dca_enable(struct hns_roce_qp *qp) ++{ ++ return !!qp->dca_wqe.bufs; ++} ++ ++static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset) ++{ ++ if (likely(qp->buf.buf)) ++ return qp->buf.buf + offset; ++ else if (unlikely(check_qp_dca_enable(qp))) ++ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] + ++ (offset & ((1 << qp->dca_wqe.shift) - 1)); ++ else ++ return NULL; ++} ++ + static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); ++ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); + } + + static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); ++ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); + } + + static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); ++ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift)); + } + + static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) +@@ -569,6 +585,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + wc->opcode = wc_send_op_map[opcode]; + } + ++static bool check_dca_attach_enable(struct hns_roce_qp *qp) ++{ ++ return check_qp_dca_enable(qp) && ++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH); ++} ++ ++static bool check_dca_detach_enable(struct hns_roce_qp *qp) ++{ ++ return check_qp_dca_enable(qp) && ++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH); ++} ++ ++static int dca_attach_qp_buf(struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_dca_attach_attr attr = {}; ++ uint32_t idx; ++ int ret; ++ ++ hns_roce_spin_lock(&qp->sq.hr_lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); ++ ++ if (qp->sq.wqe_cnt > 0) { ++ idx = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ attr.sq_offset = idx << qp->sq.wqe_shift; ++ } ++ ++ if (qp->ex_sge.sge_cnt > 0) { ++ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1); ++ attr.sge_offset = idx << qp->ex_sge.sge_shift; ++ } ++ ++ if (qp->rq.wqe_cnt > 0) { ++ idx = qp->rq.head & (qp->rq.wqe_cnt - 1); ++ attr.rq_offset = idx << qp->rq.wqe_shift; ++ } ++ ++ ++ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, ++ qp->buf_size, &qp->dca_wqe); ++ ++ hns_roce_spin_unlock(&qp->rq.hr_lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ return ret; ++} ++ ++static void dca_detach_qp_buf(struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_dca_detach_attr attr; ++ bool is_empty; ++ ++ hns_roce_spin_lock(&qp->sq.hr_lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); ++ ++ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail; ++ if (is_empty && qp->sq.wqe_cnt > 0) ++ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ ++ hns_roce_spin_unlock(&qp->rq.hr_lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ if (is_empty) ++ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr); ++} ++ + static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, + struct hns_roce_cq *cq) + { +@@ -725,6 +808,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); ++ if (qp && check_dca_detach_enable(qp)) ++ dca_detach_qp_buf(ctx, qp); ++ + if (err != V2_CQ_OK) + break; + } +@@ -768,19 +854,30 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) + return 0; + } + +-static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx) ++static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { +- if (unlikely(qp->qp_type != IBV_QPT_RC && +- qp->qp_type != IBV_QPT_UD) && +- qp->qp_type != IBV_QPT_XRC_SEND) ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ int ret = 0; ++ ++ if (unlikely(ibvqp->qp_type != IBV_QPT_RC && ++ ibvqp->qp_type != IBV_QPT_UD) && ++ ibvqp->qp_type != IBV_QPT_XRC_SEND) + return -EINVAL; + +- if (unlikely(qp->state == IBV_QPS_RESET || +- qp->state == IBV_QPS_INIT || +- qp->state == IBV_QPS_RTR)) ++ if (unlikely(ibvqp->state == IBV_QPS_RESET || ++ ibvqp->state == IBV_QPS_INIT || ++ ibvqp->state == IBV_QPS_RTR)) + return -EINVAL; + +- return 0; ++ if (check_dca_attach_enable(qp)) { ++ ret = dca_attach_qp_buf(ctx, qp); ++ if (ret) ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to attach QP-%u send, ret = %d.\n", ++ qp->verbs_qp.qp.qp_num, ret); ++ } ++ ++ return ret; + } + + static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg, +@@ -1148,6 +1245,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + return 0; + } + ++static inline void fill_rc_dca_fields(uint32_t qp_num, ++ struct hns_roce_rc_sq_wqe *wqe) ++{ ++ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num); ++ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH); ++} ++ + static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe, + const struct ibv_send_wr *wr) + { +@@ -1259,6 +1363,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + return ret; + + wqe_valid: ++ if (check_qp_dca_enable(qp)) ++ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe); ++ + enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq); + + return 0; +@@ -1275,7 +1382,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + struct ibv_qp_attr attr; + int ret; + +- ret = check_qp_send(ibvqp, ctx); ++ ret = check_qp_send(qp, ctx); + if (unlikely(ret)) { + *bad_wr = wr; + return ret; +@@ -1352,15 +1459,20 @@ out: + return ret; + } + +-static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx) ++static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { +- if (unlikely(qp->qp_type != IBV_QPT_RC && +- qp->qp_type != IBV_QPT_UD)) ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ ++ if (unlikely(ibvqp->qp_type != IBV_QPT_RC && ++ ibvqp->qp_type != IBV_QPT_UD)) + return -EINVAL; + +- if (qp->state == IBV_QPS_RESET || qp->srq) ++ if (ibvqp->state == IBV_QPS_RESET || ibvqp->srq) + return -EINVAL; + ++ if (check_dca_attach_enable(qp)) ++ return dca_attach_qp_buf(ctx, qp); ++ + return 0; + } + +@@ -1428,7 +1540,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + struct ibv_qp_attr attr; + int ret; + +- ret = check_qp_recv(ibvqp, ctx); ++ ret = check_qp_recv(qp, ctx); + if (unlikely(ret)) { + *bad_wr = wr; + return ret; +@@ -1551,6 +1663,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, + static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask) + { ++ struct hns_roce_context *ctx = to_hr_ctx(qp->context); + struct hns_roce_modify_qp_ex_resp resp_ex = {}; + struct hns_roce_modify_qp_ex cmd_ex = {}; + struct hns_roce_qp *hr_qp = to_hr_qp(qp); +@@ -1598,6 +1711,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (hr_qp->tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP) + hr_qp->sl = hr_qp->priority; + ++ /* Try to shrink the DCA mem */ ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + record_qp_attr(qp, attr, attr_mask); + + return ret; +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index d71c695..a22995d 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -239,6 +239,8 @@ struct hns_roce_rc_sq_wqe { + #define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259) + #define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260) + ++#define RCWQE_SQPN_L_WIDTH 2 ++ + struct hns_roce_v2_wqe_data_seg { + __le32 len; + __le32 lkey; +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index f6c7423..749b01b 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1165,6 +1165,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + return 0; + } + ++static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) ++{ ++ if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) ++ return true; ++ ++ return false; ++} ++ + static void qp_free_wqe(struct hns_roce_qp *qp) + { + free_recv_rinl_buf(&qp->rq_rinl_buf); +@@ -1176,8 +1184,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + hns_roce_free_buf(&qp->buf); + } + +-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, +- struct hns_roce_context *ctx) ++static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); + +@@ -1195,12 +1203,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, + } + + if (qp->rq_rinl_buf.wqe_cnt) { +- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf)) ++ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge, ++ &qp->rq_rinl_buf)) + goto err_alloc; + } + +- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE)) +- goto err_alloc; ++ if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { ++ /* when DCA is enabled, use a buffer list to store page addr */ ++ qp->buf.buf = NULL; ++ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); ++ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT; ++ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); ++ if (!qp->dca_wqe.bufs) ++ goto err_alloc; ++ } else { ++ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, ++ HNS_HW_PAGE_SIZE)) ++ goto err_alloc; ++ } + + return 0; + +@@ -1467,7 +1487,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, + pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE)) + return -ENOMEM; + +- ret = qp_alloc_wqe(&attr->cap, qp, ctx); ++ ret = qp_alloc_wqe(attr, qp, ctx); + if (ret) + return ret; + +-- +2.30.0 + diff --git a/0035-libhns-Use-shared-memory-to-sync-DCA-status.patch b/0035-libhns-Use-shared-memory-to-sync-DCA-status.patch new file mode 100644 index 0000000..635c080 --- /dev/null +++ b/0035-libhns-Use-shared-memory-to-sync-DCA-status.patch @@ -0,0 +1,167 @@ +From a5e62921afc2fcc152e8b0584f2d04d1a4db4f10 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 29 Jun 2021 20:06:47 +0800 +Subject: libhns: Use shared memory to sync DCA status + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M + +---------------------------------------------------------- + +The user DCA needs to check the QP attaching state before filling wqe +buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but +this will result in too much time being wasted on system calls, so use a +shared table between user driver and kernel driver to sync DCA status. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++--- + providers/hns/hns_roce_u.h | 10 ++++++++ + 2 files changed, 57 insertions(+), 4 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index bd2b251..fe30cda 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -95,9 +95,33 @@ static const struct verbs_context_ops hns_common_ops = { + .alloc_parent_domain = hns_roce_u_alloc_pad, + }; + +-static int init_dca_context(struct hns_roce_context *ctx, int page_size) ++static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, ++ int page_size, size_t size, uint64_t mmap_key) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ void *addr; ++ ++ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd, ++ mmap_key); ++ if (addr == MAP_FAILED) { ++ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n"); ++ return -EINVAL; ++ } ++ ++ dca_ctx->buf_status = addr; ++ dca_ctx->sync_status = addr + size / 2; ++ ++ return 0; ++} ++ ++static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, ++ struct hns_roce_alloc_ucontext_resp *resp, ++ int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ uint64_t mmap_key = resp->dca_mmap_key; ++ int mmap_size = resp->dca_mmap_size; ++ int max_qps = resp->dca_qps; + int ret; + + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) +@@ -112,6 +136,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size) + dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; + dca_ctx->mem_cnt = 0; + ++ if (mmap_key) { ++ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; ++ ++ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) { ++ dca_ctx->status_size = mmap_size; ++ dca_ctx->max_qps = min_t(int, max_qps, ++ mmap_size * 8 / bits_per_qp); ++ } ++ } ++ + return 0; + } + +@@ -125,6 +159,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx) + pthread_spin_lock(&dca_ctx->lock); + hns_roce_cleanup_dca_mem(ctx); + pthread_spin_unlock(&dca_ctx->lock); ++ if (dca_ctx->buf_status) ++ munmap(dca_ctx->buf_status, dca_ctx->status_size); + + pthread_spin_destroy(&dca_ctx->lock); + } +@@ -149,6 +185,14 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + return count_shift > size_shift ? count_shift - size_shift : 0; + } + ++static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) ++{ ++ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | ++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; ++ cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; ++ cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; ++} ++ + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -165,8 +209,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (!context) + return NULL; + +- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; ++ ucontext_set_cmd(&cmd, hr_dev->page_size); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -212,7 +255,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + context->max_srq_wr = dev_attrs.max_srq_wr; + context->max_srq_sge = dev_attrs.max_srq_sge; + +- if (init_dca_context(context, hr_dev->page_size)) ++ if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) + goto err_free; + + if (hns_roce_mmap(hr_dev, context, cmd_fd)) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 44a733f..a8f811e 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -35,6 +35,7 @@ + + #include + #include ++#include + #include + + #include +@@ -44,6 +45,7 @@ + #include + #include + #include ++#include + #include + #include "hns_roce_u_abi.h" + +@@ -52,6 +54,8 @@ + + #define PFX "hns: " + ++typedef _Atomic(uint64_t) atomic_bitmap_t; ++ + /* The minimum page size is 4K for hardware */ + #define HNS_HW_PAGE_SHIFT 12 + #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) +@@ -214,6 +218,12 @@ struct hns_roce_dca_ctx { + uint64_t max_size; + uint64_t min_size; + uint64_t curr_size; ++ ++#define HNS_DCA_BITS_PER_STATUS 1 ++ unsigned int max_qps; ++ unsigned int status_size; ++ atomic_bitmap_t *buf_status; ++ atomic_bitmap_t *sync_status; + }; + + struct hns_roce_context { +-- +2.30.0 + diff --git a/0036-libhns-Sync-DCA-status-by-shared-memory.patch b/0036-libhns-Sync-DCA-status-by-shared-memory.patch new file mode 100644 index 0000000..e9108f8 --- /dev/null +++ b/0036-libhns-Sync-DCA-status-by-shared-memory.patch @@ -0,0 +1,222 @@ +From 13d4b60fcd0880fae54b1af627eeb7297d7b086d Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 29 Jun 2021 21:01:27 +0800 +Subject: libhns: Sync DCA status by shared memory + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M + +---------------------------------------------------------- + +Use DCA num from the resp of modify_qp() and indicate the DCA status bit in +the shared memory, if the num is valid, the user DCA can get the DCA status +by testing the bit in the shared memory for each QP, othewise invoke the +verbs 'HNS_IB_METHOD_DCA_MEM_ATTACH' to check the DCA status. + +Each QP has 2 bits in shared memory, 1 bit is used to lock the DCA status +changing by kernel driver or user driver, another bit is used to indicate +the DCA attaching status. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 31 +++++++++++++++++++++++ + providers/hns/hns_roce_u_buf.c | 42 ++++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++- + 3 files changed, 92 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index a8f811e..91b0c8f 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -362,6 +362,7 @@ struct hns_roce_dca_buf { + void **bufs; + unsigned int max_cnt; + unsigned int shift; ++ unsigned int dcan; + }; + + struct hns_roce_qp { +@@ -422,6 +423,7 @@ struct hns_roce_dca_attach_attr { + uint32_t sq_offset; + uint32_t sge_offset; + uint32_t rq_offset; ++ bool force; + }; + + struct hns_roce_dca_detach_attr { +@@ -534,6 +536,32 @@ static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock) + return 0; + } + ++#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64)) ++#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64) ++ ++static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr) ++{ ++ p += HNS_ROCE_BIT_WORD(nr); ++ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr)); ++} ++ ++static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr) ++{ ++ uint64_t mask = HNS_ROCE_BIT_MASK(nr); ++ ++ p += HNS_ROCE_BIT_WORD(nr); ++ if (atomic_load(p) & mask) ++ return true; ++ ++ return (atomic_fetch_or(p, mask) & mask) != 0; ++} ++ ++static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) ++{ ++ p += HNS_ROCE_BIT_WORD(nr); ++ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); ++} ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +@@ -614,6 +642,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint32_t size, struct hns_roce_dca_buf *buf); + void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr); ++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); ++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); ++ + void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 3d41b89..08c0fbc 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -440,6 +440,45 @@ static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, + return (idx >= page_count) ? 0 : -ENOMEM; + } + ++#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS) ++#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n) ++ ++#define MAX_DCA_TRY_LOCK_TIMES 10 ++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->sync_status; ++ int try_times = 0; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return true; ++ ++ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan))) ++ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES) ++ return false; ++ ++ return true; ++} ++ ++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->sync_status; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return; ++ ++ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan)); ++} ++ ++static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->buf_status; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return false; ++ ++ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan)); ++} ++ + #define DCA_EXPAND_MEM_TRY_TIMES 3 + int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, +@@ -451,6 +490,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + int try_times = 0; + int ret = 0; + ++ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan)) ++ return 0; ++ + do { + resp.alloc_pages = 0; + ret = attach_dca_mem(ctx, handle, attr, &resp); +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 7e3ad92..028d20c 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -601,6 +601,7 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) + { + struct hns_roce_dca_attach_attr attr = {}; ++ bool enable_detach; + uint32_t idx; + int ret; + +@@ -622,9 +623,16 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, + attr.rq_offset = idx << qp->rq.wqe_shift; + } + ++ enable_detach = check_dca_detach_enable(qp); ++ if (enable_detach && ++ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan)) ++ /* Force attach if failed to sync dca status */ ++ attr.force = true; + + ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, +- qp->buf_size, &qp->dca_wqe); ++ qp->buf_size, &qp->dca_wqe); ++ if (ret && enable_detach) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); + + hns_roce_spin_unlock(&qp->rq.hr_lock); + hns_roce_spin_unlock(&qp->sq.hr_lock); +@@ -1450,6 +1458,9 @@ out: + + hns_roce_spin_unlock(&qp->sq.hr_lock); + ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + +@@ -1582,6 +1593,9 @@ out: + + hns_roce_spin_unlock(&qp->rq.hr_lock); + ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(ibvqp, &attr, IBV_QP_STATE); +@@ -1693,6 +1707,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (attr->qp_state == IBV_QPS_RTR) { + hr_qp->tc_mode = resp_ex.drv_payload.tc_mode; + hr_qp->priority = resp_ex.drv_payload.priority; ++ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan; + } + } + +@@ -2721,6 +2736,9 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp) + out: + hns_roce_spin_unlock(&qp->sq.hr_lock); + ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibv_qp->qp_base.state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE); +-- +2.30.0 + diff --git a/0037-libhns-Add-direct-verbs-support-to-config-DCA.patch b/0037-libhns-Add-direct-verbs-support-to-config-DCA.patch new file mode 100644 index 0000000..87e9ddf --- /dev/null +++ b/0037-libhns-Add-direct-verbs-support-to-config-DCA.patch @@ -0,0 +1,766 @@ +From 6aa5efb3059c66d3d0f49804551b38c5ed827ec1 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:49 +0800 +Subject: libhns: Add direct verbs support to config DCA + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M + +---------------------------------------------------------- + +Add two direct verbs to config DCA: +1. hnsdv_open_device() is used to config DCA memory pool. +2. hnsdv_create_qp() is used to create a DCA QP. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + debian/control | 2 +- + debian/ibverbs-providers.install | 1 + + debian/ibverbs-providers.lintian-overrides | 4 +- + debian/ibverbs-providers.symbols | 6 ++ + debian/libibverbs-dev.install | 4 + + providers/hns/CMakeLists.txt | 9 ++- + providers/hns/hns_roce_u.c | 92 +++++++++++++++++++--- + providers/hns/hns_roce_u.h | 2 + + providers/hns/hns_roce_u_abi.h | 1 + + providers/hns/hns_roce_u_buf.c | 3 + + providers/hns/hns_roce_u_hw_v2.c | 33 +++++++- + providers/hns/hns_roce_u_verbs.c | 58 ++++++++++++-- + providers/hns/hnsdv.h | 65 +++++++++++++++ + providers/hns/libhns.map | 9 +++ + redhat/rdma-core.spec | 5 +- + suse/rdma-core.spec | 21 ++++- + 16 files changed, 289 insertions(+), 26 deletions(-) + create mode 100644 providers/hns/hnsdv.h + create mode 100644 providers/hns/libhns.map + +diff --git a/debian/control b/debian/control +index 7485ad3..22eb6cd 100644 +--- a/debian/control ++++ b/debian/control +@@ -94,7 +94,7 @@ Description: User space provider drivers for libibverbs + - cxgb4: Chelsio T4 iWARP HCAs + - efa: Amazon Elastic Fabric Adapter + - hfi1verbs: Intel Omni-Path HFI +- - hns: HiSilicon Hip06 SoC ++ - hns: HiSilicon Hip08+ SoC + - ipathverbs: QLogic InfiniPath HCAs + - irdma: Intel Ethernet Connection RDMA + - mlx4: Mellanox ConnectX-3 InfiniBand HCAs +diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install +index 4f971fb..c6ecbbc 100644 +--- a/debian/ibverbs-providers.install ++++ b/debian/ibverbs-providers.install +@@ -1,5 +1,6 @@ + etc/libibverbs.d/ + usr/lib/*/libefa.so.* + usr/lib/*/libibverbs/lib*-rdmav*.so ++usr/lib/*/libhns.so.* + usr/lib/*/libmlx4.so.* + usr/lib/*/libmlx5.so.* +diff --git a/debian/ibverbs-providers.lintian-overrides b/debian/ibverbs-providers.lintian-overrides +index 8a44d54..f6afb70 100644 +--- a/debian/ibverbs-providers.lintian-overrides ++++ b/debian/ibverbs-providers.lintian-overrides +@@ -1,2 +1,2 @@ +-# libefa, libmlx4 and libmlx5 are ibverbs provider that provides more functions. +-ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmlx4-1 libmlx5-1 ++# libefa, libhns, libmlx4 and libmlx5 are ibverbs provider that provides more functions. ++ibverbs-providers: package-name-doesnt-match-sonames libefa1 libhns-1 libmlx4-1 libmlx5-1 +diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols +index 2c6b330..1844369 100644 +--- a/debian/ibverbs-providers.symbols ++++ b/debian/ibverbs-providers.symbols +@@ -162,3 +162,9 @@ libefa.so.1 ibverbs-providers #MINVER# + efadv_create_qp_ex@EFA_1.1 26 + efadv_query_device@EFA_1.1 26 + efadv_query_ah@EFA_1.1 26 ++libhns.so.1 ibverbs-providers #MINVER# ++* Build-Depends-Package: libibverbs-dev ++ HNS_1.0@HNS_1.0 36 ++ hnsdv_is_supported@HNS_1.0 36 ++ hnsdv_open_device@HNS_1.0 36 ++ hnsdv_create_qp@HNS_1.0 36 +diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install +index bc8caa5..7d6e6a2 100644 +--- a/debian/libibverbs-dev.install ++++ b/debian/libibverbs-dev.install +@@ -1,5 +1,6 @@ + usr/include/infiniband/arch.h + usr/include/infiniband/efadv.h ++usr/include/infiniband/hnsdv.h + usr/include/infiniband/ib_user_ioctl_verbs.h + usr/include/infiniband/mlx4dv.h + usr/include/infiniband/mlx5_api.h +@@ -14,6 +15,8 @@ usr/include/infiniband/verbs_api.h + usr/lib/*/lib*-rdmav*.a + usr/lib/*/libefa.a + usr/lib/*/libefa.so ++usr/lib/*/libhns.a ++usr/lib/*/libhns.so + usr/lib/*/libibverbs*.so + usr/lib/*/libibverbs.a + usr/lib/*/libmlx4.a +@@ -21,6 +24,7 @@ usr/lib/*/libmlx4.so + usr/lib/*/libmlx5.a + usr/lib/*/libmlx5.so + usr/lib/*/pkgconfig/libefa.pc ++usr/lib/*/pkgconfig/libhns.pc + usr/lib/*/pkgconfig/libibverbs.pc + usr/lib/*/pkgconfig/libmlx4.pc + usr/lib/*/pkgconfig/libmlx5.pc +diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt +index 7aaca75..160e1ff 100644 +--- a/providers/hns/CMakeLists.txt ++++ b/providers/hns/CMakeLists.txt +@@ -1,7 +1,14 @@ +-rdma_provider(hns ++rdma_shared_provider(hns libhns.map ++ 1 1.0.${PACKAGE_VERSION} + hns_roce_u.c + hns_roce_u_buf.c + hns_roce_u_db.c + hns_roce_u_hw_v2.c + hns_roce_u_verbs.c + ) ++ ++publish_headers(infiniband ++ hnsdv.h ++) ++ ++rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index fe30cda..0cf6d4b 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -114,8 +114,60 @@ static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + return 0; + } + ++bool hnsdv_is_supported(struct ibv_device *device) ++{ ++ return is_hns_dev(device); ++} ++ ++struct ibv_context *hnsdv_open_device(struct ibv_device *device, ++ struct hnsdv_context_attr *attr) ++{ ++ if (!is_hns_dev(device)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ return verbs_open_device(device, attr); ++} ++ ++static void set_dca_pool_param(struct hns_roce_context *ctx, ++ struct hnsdv_context_attr *attr, int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE) ++ dca_ctx->unit_size = align(attr->dca_unit_size, page_size); ++ else ++ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; ++ ++ /* The memory pool cannot be expanded, only init the DCA context. */ ++ if (dca_ctx->unit_size == 0) ++ return; ++ ++ /* If not set, the memory pool can be expanded unlimitedly. */ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE) ++ dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size, ++ dca_ctx->unit_size) * ++ dca_ctx->unit_size; ++ else ++ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; ++ ++ /* If not set, the memory pool cannot be shrunk. */ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE) ++ dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size, ++ dca_ctx->unit_size) * ++ dca_ctx->unit_size; ++ else ++ dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE; ++ ++ verbs_debug(&ctx->ibv_ctx, ++ "Support DCA, unit %d, max %ld, min %ld Bytes.\n", ++ dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size); ++} ++ + static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, ++ struct hnsdv_context_attr *attr, + int page_size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; +@@ -127,14 +179,18 @@ static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) + return 0; + ++ dca_ctx->unit_size = 0; ++ dca_ctx->mem_cnt = 0; ++ + list_head_init(&dca_ctx->mem_list); + ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) + return ret; + +- dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; +- dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; +- dca_ctx->mem_cnt = 0; ++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) ++ return 0; ++ ++ set_dca_pool_param(ctx, attr, page_size); + + if (mmap_key) { + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; +@@ -185,18 +241,28 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + return count_shift > size_shift ? count_shift - size_shift : 0; + } + +-static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) ++static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, ++ struct hnsdv_context_attr *attr) + { + cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; +- cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; +- cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; ++ HNS_ROCE_CQE_INLINE_FLAGS; ++ ++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) ++ return; ++ ++ cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA; ++ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) { ++ cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; ++ cmd->dca_max_qps = attr->dca_prime_qps; ++ } + } + + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) + { ++ struct hnsdv_context_attr *ctx_attr = private_data; + struct hns_roce_device *hr_dev = to_hr_dev(ibdev); + struct hns_roce_alloc_ucontext_resp resp = {}; + struct hns_roce_alloc_ucontext cmd = {}; +@@ -209,7 +275,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (!context) + return NULL; + +- ucontext_set_cmd(&cmd, hr_dev->page_size); ++ ucontext_set_cmd(&cmd, ctx_attr); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -255,7 +321,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + context->max_srq_wr = dev_attrs.max_srq_wr; + context->max_srq_sge = dev_attrs.max_srq_sge; + +- if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) ++ if (init_dca_context(context, cmd_fd, ++ &resp, ctx_attr, hr_dev->page_size)) + goto err_free; + + if (hns_roce_mmap(hr_dev, context, cmd_fd)) +@@ -317,4 +384,11 @@ static const struct verbs_device_ops hns_roce_dev_ops = { + .uninit_device = hns_uninit_device, + .alloc_context = hns_roce_alloc_context, + }; ++ ++bool is_hns_dev(struct ibv_device *device) ++{ ++ struct verbs_device *verbs_device = verbs_get_device(device); ++ ++ return verbs_device->ops == &hns_roce_dev_ops; ++} + PROVIDER_DRIVER(hns, hns_roce_dev_ops); +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 91b0c8f..71c35c5 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -562,6 +562,8 @@ static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) + atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); + } + ++bool is_hns_dev(struct ibv_device *device); ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h +index 0519ac7..1eaf62d 100644 +--- a/providers/hns/hns_roce_u_abi.h ++++ b/providers/hns/hns_roce_u_abi.h +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include "hnsdv.h" + + DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, + empty, hns_roce_ib_alloc_pd_resp); +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 08c0fbc..780683e 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -56,6 +56,9 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, + + void hns_roce_free_buf(struct hns_roce_buf *buf) + { ++ if (!buf->buf) ++ return; ++ + ibv_dofork_range(buf->buf, buf->length); + + munmap(buf->buf, buf->length); +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 028d20c..7661863 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1473,6 +1473,7 @@ out: + static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ int ret = 0; + + if (unlikely(ibvqp->qp_type != IBV_QPT_RC && + ibvqp->qp_type != IBV_QPT_UD)) +@@ -1481,10 +1482,15 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + if (ibvqp->state == IBV_QPS_RESET || ibvqp->srq) + return -EINVAL; + +- if (check_dca_attach_enable(qp)) +- return dca_attach_qp_buf(ctx, qp); ++ if (check_dca_attach_enable(qp)) { ++ ret = dca_attach_qp_buf(ctx, qp); ++ if (ret) ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to attach QP-%u recv, ret = %d.\n", ++ qp->verbs_qp.qp.qp_num, ret); ++ } + +- return 0; ++ return ret; + } + + static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe, +@@ -1951,6 +1957,9 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, + hns_roce_spin_lock(&cq->hr_lock); + + err = hns_roce_poll_one(ctx, &qp, cq, NULL); ++ if (qp && check_dca_detach_enable(qp)) ++ dca_detach_qp_buf(ctx, qp); ++ + if (err != V2_CQ_OK) + hns_roce_spin_unlock(&cq->hr_lock); + +@@ -1965,6 +1974,8 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current) + int err; + + err = hns_roce_poll_one(ctx, &qp, cq, NULL); ++ if (qp && check_dca_detach_enable(qp)) ++ dca_detach_qp_buf(ctx, qp); + if (err != V2_CQ_OK) + return err; + +@@ -2159,6 +2170,9 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode) + hr_reg_clear(wqe, RCWQE_INLINE); + hr_reg_clear(wqe, RCWQE_SO); + ++ if (check_qp_dca_enable(qp)) ++ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, wqe); ++ + qp->sq.wrid[wqe_idx] = wr_id; + qp->cur_wqe = wqe; + qp->sq.head++; +@@ -2691,8 +2705,10 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + + static void wr_start(struct ibv_qp_ex *ibv_qp) + { ++ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context); + struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); + enum ibv_qp_state state = ibv_qp->qp_base.state; ++ int ret; + + if (state == IBV_QPS_RESET || + state == IBV_QPS_INIT || +@@ -2701,6 +2717,17 @@ static void wr_start(struct ibv_qp_ex *ibv_qp) + return; + } + ++ if (check_qp_dca_enable(qp)) { ++ ret = dca_attach_qp_buf(ctx, qp); ++ if (ret) { ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to attach QP-%u send, ret = %d.\n", ++ qp->verbs_qp.qp.qp_num, ret); ++ qp->err = ret; ++ return; ++ } ++ } ++ + hns_roce_spin_lock(&qp->sq.hr_lock); + qp->sge_info.start_idx = qp->next_sge; + qp->rb_sq_head = qp->sq.head; +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 749b01b..282ab74 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -961,6 +961,15 @@ enum { + IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, + }; + ++enum { ++ SEND_OPS_FLAG_MASK = ++ IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | ++ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | ++ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV | ++ IBV_QP_EX_WITH_SEND_WITH_INV, ++}; ++ + static int check_qp_create_mask(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr) + { +@@ -969,6 +978,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) + return -EOPNOTSUPP; + ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS && ++ !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK)) ++ return -EOPNOTSUPP; ++ + switch (attr->qp_type) { + case IBV_QPT_UD: + if (hr_dev->hw_version == HNS_ROCE_HW_VER2) +@@ -1165,9 +1178,21 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + return 0; + } + +-static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) ++static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr) + { +- if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) ++ /* DCA pool disable */ ++ if (!dca_ctx->unit_size) ++ return false; ++ ++ /* Unsupport type */ ++ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND) ++ return false; ++ ++ if (hns_attr && ++ (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) && ++ (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE)) + return true; + + return false; +@@ -1185,6 +1210,7 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + } + + static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr, + struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); +@@ -1208,7 +1234,8 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + goto err_alloc; + } + +- if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { ++ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) && ++ ctx->dca_ctx.max_size > 0) { + /* when DCA is enabled, use a buffer list to store page addr */ + qp->buf.buf = NULL; + qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); +@@ -1216,6 +1243,7 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); + if (!qp->dca_wqe.bufs) + goto err_alloc; ++ verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n"); + } else { + if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, + HNS_HW_PAGE_SIZE)) +@@ -1478,6 +1506,7 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + } + + static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr, + struct hns_roce_qp *qp, + struct hns_roce_context *ctx) + { +@@ -1487,7 +1516,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, + pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE)) + return -ENOMEM; + +- ret = qp_alloc_wqe(attr, qp, ctx); ++ ret = qp_alloc_wqe(attr, hns_attr, qp, ctx); + if (ret) + return ret; + +@@ -1510,7 +1539,8 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, + } + + static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, +- struct ibv_qp_init_attr_ex *attr) ++ struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr) + { + struct hns_roce_context *context = to_hr_ctx(ibv_ctx); + struct hns_roce_qp *qp; +@@ -1533,7 +1563,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + if (ret) + goto err_spinlock; + +- ret = hns_roce_alloc_qp_buf(attr, qp, context); ++ ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context); + if (ret) + goto err_buf; + +@@ -1587,7 +1617,7 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd, + attrx.comp_mask = IBV_QP_INIT_ATTR_PD; + attrx.pd = pd; + +- qp = create_qp(pd->context, &attrx); ++ qp = create_qp(pd->context, &attrx, NULL); + if (qp) + memcpy(attr, &attrx, sizeof(*attr)); + +@@ -1597,7 +1627,19 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd, + struct ibv_qp *hns_roce_u_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr) + { +- return create_qp(context, attr); ++ return create_qp(context, attr, NULL); ++} ++ ++struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct hnsdv_qp_init_attr *hns_attr) ++{ ++ if (!is_hns_dev(context->device)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ return create_qp(context, qp_attr, hns_attr); + } + + struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context, +diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h +new file mode 100644 +index 0000000..cfe1611 +--- /dev/null ++++ b/providers/hns/hnsdv.h +@@ -0,0 +1,65 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ ++/* ++ * Copyright (c) 2021 HiSilicon Limited. ++ */ ++ ++#ifndef __HNSDV_H__ ++#define __HNSDV_H__ ++ ++#include ++#include ++ ++#include ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++enum hnsdv_context_attr_flags { ++ HNSDV_CONTEXT_FLAGS_DCA = 1 << 0, ++}; ++ ++enum hnsdv_context_comp_mask { ++ HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0, ++ HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1, ++ HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2, ++ HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3, ++}; ++ ++struct hnsdv_context_attr { ++ uint64_t flags; /* Use enum hnsdv_context_attr_flags */ ++ uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */ ++ uint32_t dca_prime_qps; ++ uint32_t dca_unit_size; ++ uint64_t dca_max_size; ++ uint64_t dca_min_size; ++}; ++ ++bool hnsdv_is_supported(struct ibv_device *device); ++struct ibv_context *hnsdv_open_device(struct ibv_device *device, ++ struct hnsdv_context_attr *attr); ++ ++enum hnsdv_qp_create_flags { ++ HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0, ++}; ++ ++enum hnsdv_qp_init_attr_mask { ++ HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, ++}; ++ ++struct hnsdv_qp_init_attr { ++ uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */ ++ uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */ ++}; ++ ++struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct hnsdv_qp_init_attr *hns_qp_attr); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* __HNSDV_H__ */ +diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map +new file mode 100644 +index 0000000..aed491c +--- /dev/null ++++ b/providers/hns/libhns.map +@@ -0,0 +1,9 @@ ++/* Export symbols should be added below according to ++ Documentation/versioning.md document. */ ++HNS_1.0 { ++ global: ++ hnsdv_is_supported; ++ hnsdv_open_device; ++ hnsdv_create_qp; ++ local: *; ++}; +diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec +index f1b196a..321578c 100644 +--- a/redhat/rdma-core.spec ++++ b/redhat/rdma-core.spec +@@ -150,6 +150,8 @@ Provides: libefa = %{version}-%{release} + Obsoletes: libefa < %{version}-%{release} + Provides: libhfi1 = %{version}-%{release} + Obsoletes: libhfi1 < %{version}-%{release} ++Provides: libhns = %{version}-%{release} ++Obsoletes: libhns < %{version}-%{release} + Provides: libipathverbs = %{version}-%{release} + Obsoletes: libipathverbs < %{version}-%{release} + Provides: libirdma = %{version}-%{release} +@@ -177,7 +179,7 @@ Device-specific plug-in ibverbs userspace drivers are included: + - libcxgb4: Chelsio T4 iWARP HCA + - libefa: Amazon Elastic Fabric Adapter + - libhfi1: Intel Omni-Path HFI +-- libhns: HiSilicon Hip06 SoC ++- libhns: HiSilicon Hip08+ SoC + - libipathverbs: QLogic InfiniPath HCA + - libirdma: Intel Ethernet Connection RDMA + - libmlx4: Mellanox ConnectX-3 InfiniBand HCA +@@ -562,6 +564,7 @@ fi + %dir %{_sysconfdir}/libibverbs.d + %dir %{_libdir}/libibverbs + %{_libdir}/libefa.so.* ++%{_libdir}/libhns.so.* + %{_libdir}/libibverbs*.so.* + %{_libdir}/libibverbs/*.so + %{_libdir}/libmlx5.so.* +diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec +index bd1faec..ce19db1 100644 +--- a/suse/rdma-core.spec ++++ b/suse/rdma-core.spec +@@ -35,6 +35,7 @@ License: BSD-2-Clause OR GPL-2.0-only + Group: Productivity/Networking/Other + + %define efa_so_major 1 ++%define hns_so_major 1 + %define verbs_so_major 1 + %define rdmacm_so_major 1 + %define umad_so_major 3 +@@ -44,6 +45,7 @@ Group: Productivity/Networking/Other + %define mad_major 5 + + %define efa_lname libefa%{efa_so_major} ++%define hns_lname libhns%{hns_so_major} + %define verbs_lname libibverbs%{verbs_so_major} + %define rdmacm_lname librdmacm%{rdmacm_so_major} + %define umad_lname libibumad%{umad_so_major} +@@ -157,6 +159,7 @@ Requires: %{umad_lname} = %{version}-%{release} + Requires: %{verbs_lname} = %{version}-%{release} + %if 0%{?dma_coherent} + Requires: %{efa_lname} = %{version}-%{release} ++Requires: %{hns_lname} = %{version}-%{release} + Requires: %{mlx4_lname} = %{version}-%{release} + Requires: %{mlx5_lname} = %{version}-%{release} + %endif +@@ -197,6 +200,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Obsoletes: libcxgb4-rdmav2 < %{version}-%{release} + Obsoletes: libefa-rdmav2 < %{version}-%{release} + Obsoletes: libhfi1verbs-rdmav2 < %{version}-%{release} ++Obsoletes: libhns-rdmav2 < %{version}-%{release} + Obsoletes: libipathverbs-rdmav2 < %{version}-%{release} + Obsoletes: libmlx4-rdmav2 < %{version}-%{release} + Obsoletes: libmlx5-rdmav2 < %{version}-%{release} +@@ -205,6 +209,7 @@ Obsoletes: libocrdma-rdmav2 < %{version}-%{release} + Obsoletes: librxe-rdmav2 < %{version}-%{release} + %if 0%{?dma_coherent} + Requires: %{efa_lname} = %{version}-%{release} ++Requires: %{hns_lname} = %{version}-%{release} + Requires: %{mlx4_lname} = %{version}-%{release} + Requires: %{mlx5_lname} = %{version}-%{release} + %endif +@@ -223,7 +228,7 @@ Device-specific plug-in ibverbs userspace drivers are included: + - libcxgb4: Chelsio T4 iWARP HCA + - libefa: Amazon Elastic Fabric Adapter + - libhfi1: Intel Omni-Path HFI +-- libhns: HiSilicon Hip06 SoC ++- libhns: HiSilicon Hip08+ SoC + - libipathverbs: QLogic InfiniPath HCA + - libirdma: Intel Ethernet Connection RDMA + - libmlx4: Mellanox ConnectX-3 InfiniBand HCA +@@ -250,6 +255,13 @@ Group: System/Libraries + %description -n %efa_lname + This package contains the efa runtime library. + ++%package -n %hns_lname ++Summary: HNS runtime library ++Group: System/Libraries ++ ++%description -n %hns_lname ++This package contains the hns runtime library. ++ + %package -n %mlx4_lname + Summary: MLX4 runtime library + Group: System/Libraries +@@ -493,6 +505,9 @@ rm -rf %{buildroot}/%{_sbindir}/srp_daemon.sh + %post -n %efa_lname -p /sbin/ldconfig + %postun -n %efa_lname -p /sbin/ldconfig + ++%post -n %hns_lname -p /sbin/ldconfig ++%postun -n %hns_lname -p /sbin/ldconfig ++ + %post -n %mlx4_lname -p /sbin/ldconfig + %postun -n %mlx4_lname -p /sbin/ldconfig + +@@ -689,6 +704,10 @@ done + %defattr(-,root,root) + %{_libdir}/libefa*.so.* + ++%files -n %hns_lname ++%defattr(-,root,root) ++%{_libdir}/libhns*.so.* ++ + %files -n %mlx4_lname + %defattr(-,root,root) + %{_libdir}/libmlx4*.so.* +-- +2.30.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 69702db..971ad45 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 41.0 -Release: 6 +Release: 7 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -36,6 +36,13 @@ Patch26: 0027-libhns-Add-RoH-device-IDs.patch Patch27: 0028-Update-kernel-headers.patch Patch28: 0029-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch Patch29: 0030-libhns-Add-support-for-the-thread-domain-and-the-par.patch +Patch30: 0031-Update-kernel-headers.patch +Patch31: 0032-libhns-Introduce-DCA-for-RC-QP.patch +Patch32: 0033-libhns-Add-support-for-shrinking-DCA-memory-pool.patch +Patch33: 0034-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch +Patch34: 0035-libhns-Use-shared-memory-to-sync-DCA-status.patch +Patch35: 0036-libhns-Sync-DCA-status-by-shared-memory.patch +Patch36: 0037-libhns-Add-direct-verbs-support-to-config-DCA.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -59,6 +66,8 @@ Provides: libefa = %{version}-%{release} Obsoletes: libefa < %{version}-%{release} Provides: libhfi1 = %{version}-%{release} Obsoletes: libhfi1 < %{version}-%{release} +Provides: libhns = %{version}-%{release} +Obsoletes: libhns < %{version}-%{release} Provides: libi40iw = %{version}-%{release} Obsoletes: libi40iw < %{version}-%{release} Provides: libipathverbs = %{version}-%{release} @@ -243,6 +252,7 @@ fi %{_libdir}/libibmad*.so.* %{_libdir}/libibnetdisc*.so.* %{_libdir}/libefa.so.* +%{_libdir}/libhns.so.* %{_libdir}/libibverbs*.so.* %{_libdir}/libibverbs/*.so %{_libdir}/libmlx5.so.* @@ -280,6 +290,12 @@ fi %{_mandir}/* %changelog +* Wed Nov 30 2022 tangchengchang - 41.0-7 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for hns DCA + * Mon Nov 28 2022 Yixing Liu - 41.0-6 - Type: requirement - ID: NA -- Gitee