From b95203b1fa98845c760b0dbe887ec4f631a844f7 Mon Sep 17 00:00:00 2001 From: Ran Zhou Date: Thu, 11 Apr 2024 09:22:46 +0800 Subject: [PATCH] Support hns roce DCA DCA(Dynamic context attachment) support many RC QPs to share the WQE buffer in a memory pool, this help reducing the memory consumption when there are many QPs are inactive. Signed-off-by: Ran Zhou (cherry picked from commit 994c08d7e68ba906b7f7c16e8528700508af94b1) --- 0019-Update-kernel-headers.patch | 145 +++++ 0020-libhns-Introduce-DCA-for-RC-QP.patch | 346 +++++++++++ ...upport-for-shrinking-DCA-memory-pool.patch | 204 +++++++ ...upport-for-attaching-QP-s-WQE-buffer.patch | 575 ++++++++++++++++++ ...Use-shared-memory-to-sync-DCA-status.patch | 167 +++++ ...hns-Sync-DCA-status-by-shared-memory.patch | 223 +++++++ ...d-direct-verbs-support-to-config-DCA.patch | 386 ++++++++++++ rdma-core.spec | 18 +- 8 files changed, 2062 insertions(+), 2 deletions(-) create mode 100644 0019-Update-kernel-headers.patch create mode 100644 0020-libhns-Introduce-DCA-for-RC-QP.patch create mode 100644 0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch create mode 100644 0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch create mode 100644 0023-libhns-Use-shared-memory-to-sync-DCA-status.patch create mode 100644 0024-libhns-Sync-DCA-status-by-shared-memory.patch create mode 100644 0025-libhns-Add-direct-verbs-support-to-config-DCA.patch diff --git a/0019-Update-kernel-headers.patch b/0019-Update-kernel-headers.patch new file mode 100644 index 0000000..a2f881e --- /dev/null +++ b/0019-Update-kernel-headers.patch @@ -0,0 +1,145 @@ +From 12067eedd348988f882f707555239d692f6c13c4 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 28 Nov 2022 21:52:20 +0800 +Subject: [PATCH 19/25] Update kernel headers + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +To commit ?? ("RDMA/hns: Fixes concurrent ressetting and post_recv in DCA +mode"). + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + kernel-headers/rdma/hns-abi.h | 73 ++++++++++++++++++++++++++++++++++- + 1 file changed, 72 insertions(+), 1 deletion(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index 1d51612..8a8f2e4 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -102,7 +102,9 @@ enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, ++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4, + HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, ++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, + }; + + struct hns_roce_ib_create_qp_resp { +@@ -114,12 +116,15 @@ struct hns_roce_ib_modify_qp_resp { + __u8 tc_mode; + __u8 priority; + __u8 reserved[6]; ++ __u32 dcan; ++ __u32 rsv2; + }; + + enum { + HNS_ROCE_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3, + HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4, + }; + +@@ -127,6 +132,7 @@ enum { + HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA, + HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ, + }; + +@@ -139,12 +145,20 @@ struct hns_roce_ib_alloc_ucontext_resp { + __u32 max_inline_data; + __u8 congest_type; + __u8 reserved0[7]; +- __aligned_u64 rsv_for_dca[2]; ++ __u32 dca_qps; ++ __u32 dca_mmap_size; ++ __aligned_u64 dca_mmap_key; + __aligned_u64 reset_mmap_key; + }; + ++enum hns_roce_uctx_comp_mask { ++ HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0, ++}; ++ + struct hns_roce_ib_alloc_ucontext { + __u32 config; ++ __u32 comp; /* use hns_roce_uctx_comp_mask */ ++ __u32 dca_max_qps; + __u32 reserved; + }; + +@@ -158,4 +172,61 @@ struct hns_roce_ib_create_ah_resp { + __u8 tc_mode; + }; + ++#define UVERBS_ID_NS_MASK 0xF000 ++#define UVERBS_ID_NS_SHIFT 12 ++ ++enum hns_ib_objects { ++ HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum hns_ib_dca_mem_methods { ++ HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_METHOD_DCA_MEM_DEREG, ++ HNS_IB_METHOD_DCA_MEM_SHRINK, ++ HNS_IB_METHOD_DCA_MEM_ATTACH, ++ HNS_IB_METHOD_DCA_MEM_DETACH, ++ HNS_IB_METHOD_DCA_MEM_QUERY, ++}; ++ ++enum hns_ib_dca_mem_reg_attrs { ++ HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_REG_FLAGS, ++ HNS_IB_ATTR_DCA_MEM_REG_LEN, ++ HNS_IB_ATTR_DCA_MEM_REG_ADDR, ++ HNS_IB_ATTR_DCA_MEM_REG_KEY, ++}; ++ ++enum hns_ib_dca_mem_dereg_attrs { ++ HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum hns_ib_dca_mem_shrink_attrs { ++ HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, ++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, ++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, ++}; ++ ++enum hns_ib_dca_mem_attach_attrs { ++ HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, ++}; ++ ++enum hns_ib_dca_mem_detach_attrs { ++ HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, ++}; ++ ++enum hns_ib_dca_mem_query_attrs { ++ HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, ++}; ++ + #endif /* HNS_ABI_USER_H */ +-- +2.33.0 + diff --git a/0020-libhns-Introduce-DCA-for-RC-QP.patch b/0020-libhns-Introduce-DCA-for-RC-QP.patch new file mode 100644 index 0000000..721d391 --- /dev/null +++ b/0020-libhns-Introduce-DCA-for-RC-QP.patch @@ -0,0 +1,346 @@ +From f0d70762b8c69e735a1d15f8379b649bcad3929c Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:09 +0800 +Subject: [PATCH 20/25] libhns: Introduce DCA for RC QP + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +The HIP09 introduces the DCA(Dynamic context attachment) feature which +supports many RC QPs to share the WQE buffer in a memory pool, this will +reduce the memory consumption when there are too many QPs inactive. + +Two functions are defined for adding buffers to memory pool and removing +buffers from memory pool by calling ib cmd implemented in hns kernelspace +driver. + +If a QP enables DCA feature, the WQE's buffer will be attached to the +memory pool when the users start to post WRs and be detached when all CQEs +has been polled. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 61 +++++++++++++- + providers/hns/hns_roce_u.h | 21 ++++- + providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++ + 3 files changed, 226 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 810b650..2272431 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -100,6 +100,53 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + return count_shift > size_shift ? count_shift - size_shift : 0; + } + ++static int hns_roce_mmap(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *context, int cmd_fd) ++{ ++ int page_size = hr_dev->page_size; ++ ++ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, cmd_fd, 0); ++ if (context->uar == MAP_FAILED) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static int init_dca_context(struct hns_roce_context *ctx, int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ int ret; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) ++ return 0; ++ ++ list_head_init(&dca_ctx->mem_list); ++ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) ++ return ret; ++ ++ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; ++ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; ++ dca_ctx->mem_cnt = 0; ++ ++ return 0; ++} ++ ++static void uninit_dca_context(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) ++ return; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ hns_roce_cleanup_dca_mem(ctx); ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ pthread_spin_destroy(&dca_ctx->lock); ++} ++ + static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, + int page_size) +@@ -185,7 +232,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + return NULL; + + cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS; ++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -198,9 +245,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + ++ if (init_dca_context(context, hr_dev->page_size)) ++ goto err_free; ++ + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) + goto reset_free; + ++ if (hns_roce_mmap(hr_dev, context, cmd_fd)) ++ goto uar_free; ++ + pthread_mutex_init(&context->qp_table_mutex, NULL); + pthread_mutex_init(&context->srq_table_mutex, NULL); + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); +@@ -210,8 +263,11 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + return &context->ibv_ctx; + ++uar_free: ++ if (context->reset_state) ++ munmap(context->reset_state, hr_dev->page_size); + reset_free: +- munmap(context->uar, hr_dev->page_size); ++ uninit_dca_context(context); + err_free: + verbs_uninit_context(&context->ibv_ctx); + free(context); +@@ -226,6 +282,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx) + munmap(context->uar, hr_dev->page_size); + if (context->reset_state) + munmap(context->reset_state, hr_dev->page_size); ++ uninit_dca_context(context); + verbs_uninit_context(&context->ibv_ctx); + free(context); + } +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 024932a..90b2205 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -147,6 +147,10 @@ + + #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) + ++enum { ++ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15), ++}; ++ + #define HNS_ROCE_QP_TABLE_BITS 8 + #define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS) + +@@ -201,6 +205,18 @@ struct hns_roce_spinlock { + int need_lock; + }; + ++#define HNS_DCA_MAX_MEM_SIZE ~0UL ++#define HNS_DCA_DEFAULT_UNIT_PAGES 16 ++ ++struct hns_roce_dca_ctx { ++ struct list_head mem_list; ++ pthread_spinlock_t lock; ++ int mem_cnt; ++ unsigned int unit_size; ++ uint64_t max_size; ++ uint64_t curr_size; ++}; ++ + struct hns_roce_v2_reset_state { + uint32_t is_reset; + uint32_t hw_ready; +@@ -239,7 +255,7 @@ struct hns_roce_context { + unsigned int cqe_size; + uint32_t config; + unsigned int max_inline_data; +- ++ struct hns_roce_dca_ctx dca_ctx; + bool use_new_reset_flag; + bool reseted; + }; +@@ -586,6 +602,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); ++int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); ++ + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + + bool is_hns_dev(struct ibv_device *device); +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 471dd9c..02c43ae 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf) + + munmap(buf->buf, buf->length); + } ++ ++struct hns_roce_dca_mem { ++ uint32_t handle; ++ struct list_node entry; ++ struct hns_roce_buf buf; ++ struct hns_roce_context *ctx; ++}; ++ ++static void free_dca_mem(struct hns_roce_context *ctx, ++ struct hns_roce_dca_mem *mem) ++{ ++ hns_roce_free_buf(&mem->buf); ++ free(mem); ++} ++ ++static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size) ++{ ++ struct hns_roce_dca_mem *mem = NULL; ++ int ret; ++ ++ mem = malloc(sizeof(struct hns_roce_dca_mem)); ++ if (!mem) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE); ++ if (ret) { ++ errno = ENOMEM; ++ free(mem); ++ return NULL; ++ } ++ ++ return mem; ++} ++ ++static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) ++{ ++ return (uintptr_t)dca_mem; ++} ++ ++static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) ++{ ++ return dca_mem->buf.buf + offset; ++} ++ ++static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key, ++ void *addr, uint32_t size, uint32_t *handle) ++{ ++ struct ib_uverbs_attr *attr; ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_REG, 4); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR, ++ ioctl_ptr_to_u64(addr)); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key); ++ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE); ++ ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) { ++ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n", ++ ret); ++ return ret; ++ } ++ ++ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr); ++ ++ return 0; ++} ++ ++static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_DEREG, 1); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_warn(&ctx->ibv_ctx, ++ "failed to dereg DCA mem-%u, ret = %d.\n", ++ handle, ret); ++} ++ ++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_roce_dca_mem *mem; ++ struct hns_roce_dca_mem *tmp; ++ ++ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry) ++ deregister_dca_mem(ctx, mem->handle); ++} ++ ++static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, ++ uint32_t alloc_size) ++{ ++ bool enable; ++ ++ pthread_spin_lock(&ctx->lock); ++ ++ if (ctx->unit_size == 0) /* Pool size can't be increased */ ++ enable = false; ++ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */ ++ enable = true; ++ else /* Pool size doesn't exceed max size */ ++ enable = (ctx->curr_size + alloc_size) < ctx->max_size; ++ ++ pthread_spin_unlock(&ctx->lock); ++ ++ return enable; ++} ++ ++int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_roce_dca_mem *mem; ++ int ret; ++ ++ if (!add_dca_mem_enabled(&ctx->dca_ctx, size)) ++ return -ENOMEM; ++ ++ /* Step 1: Alloc DCA mem address */ ++ mem = alloc_dca_mem( ++ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size); ++ if (!mem) ++ return -ENOMEM; ++ ++ /* Step 2: Register DCA mem uobject to pin user address */ ++ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0), ++ mem->buf.length, &mem->handle); ++ if (ret) { ++ free_dca_mem(ctx, mem); ++ return ret; ++ } ++ ++ /* Step 3: Add DCA mem node to pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ list_add_tail(&dca_ctx->mem_list, &mem->entry); ++ dca_ctx->mem_cnt++; ++ dca_ctx->curr_size += mem->buf.length; ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ return 0; ++} +-- +2.33.0 + diff --git a/0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch b/0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch new file mode 100644 index 0000000..aa6797e --- /dev/null +++ b/0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch @@ -0,0 +1,204 @@ +From c104e33f0c4466f0c4b163984736eac18e9c8357 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:13 +0800 +Subject: [PATCH 21/25] libhns: Add support for shrinking DCA memory pool + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +The QP's WQE buffer may be detached after QP is modified or CQE is polled, +and the state of DCA mem object may be changed as clean for no QP is using +it. So shrink the clean DCA mem from the memory pool and destroy the DCA +mem's buffer to reduce the memory consumption. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 2 + + providers/hns/hns_roce_u_buf.c | 103 +++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.c | 7 +++ + 3 files changed, 112 insertions(+) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 90b2205..e3fa24d 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -214,6 +214,7 @@ struct hns_roce_dca_ctx { + int mem_cnt; + unsigned int unit_size; + uint64_t max_size; ++ uint64_t min_size; + uint64_t curr_size; + }; + +@@ -602,6 +603,7 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 02c43ae..c0f86e9 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -101,6 +101,20 @@ static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) + return (uintptr_t)dca_mem; + } + ++static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx, ++ uint64_t key) ++{ ++ struct hns_roce_dca_mem *mem; ++ struct hns_roce_dca_mem *tmp; ++ ++ list_for_each_safe(&ctx->mem_list, mem, tmp, entry) { ++ if (dca_mem_to_key(mem) == key) ++ return mem; ++ } ++ ++ return NULL; ++} ++ + static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) + { + return dca_mem->buf.buf + offset; +@@ -156,6 +170,32 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) + deregister_dca_mem(ctx, mem->handle); + } + ++struct hns_dca_mem_shrink_resp { ++ uint32_t free_mems; ++ uint64_t free_key; ++}; ++ ++static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ uint64_t size, struct hns_dca_mem_shrink_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_SHRINK, 4); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, ++ &resp->free_key, sizeof(resp->free_key)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, ++ &resp->free_mems, sizeof(resp->free_mems)); ++ ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n", ++ ret); ++ ++ return ret; ++} + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + uint32_t alloc_size) + { +@@ -175,6 +215,17 @@ static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + return enable; + } + ++static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) ++{ ++ bool enable; ++ ++ pthread_spin_lock(&ctx->lock); ++ enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size; ++ pthread_spin_unlock(&ctx->lock); ++ ++ return enable; ++} ++ + int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; +@@ -207,3 +258,55 @@ int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + + return 0; + } ++ ++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_dca_mem_shrink_resp resp = {}; ++ struct hns_roce_dca_mem *mem; ++ int dca_mem_cnt; ++ uint32_t handle; ++ int ret; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ dca_mem_cnt = ctx->dca_ctx.mem_cnt; ++ pthread_spin_unlock(&dca_ctx->lock); ++ while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) { ++ resp.free_mems = 0; ++ /* Step 1: Use any DCA mem uobject to shrink pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = list_tail(&dca_ctx->mem_list, ++ struct hns_roce_dca_mem, entry); ++ handle = mem ? mem->handle : 0; ++ pthread_spin_unlock(&dca_ctx->lock); ++ if (!mem) ++ break; ++ ++ ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp); ++ if (ret || likely(resp.free_mems < 1)) ++ break; ++ ++ /* Step 2: Remove shrunk DCA mem node from pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = key_to_dca_mem(dca_ctx, resp.free_key); ++ if (mem) { ++ list_del(&mem->entry); ++ dca_ctx->mem_cnt--; ++ dca_ctx->curr_size -= mem->buf.length; ++ } ++ ++ handle = mem ? mem->handle : 0; ++ pthread_spin_unlock(&dca_ctx->lock); ++ if (!mem) ++ break; ++ ++ /* Step 3: Destroy DCA mem uobject */ ++ deregister_dca_mem(ctx, handle); ++ free_dca_mem(ctx, mem); ++ /* No any free memory after deregister 1 DCA mem */ ++ if (resp.free_mems <= 1) ++ break; ++ ++ dca_mem_cnt--; ++ } ++} +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 9016978..0a100b8 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -932,6 +932,10 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + hns_roce_spin_unlock(&cq->hr_lock); + ++ /* Try to shrink the DCA mem */ ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + return err == V2_CQ_POLL_ERR ? err : npolled; + } + +@@ -1883,6 +1887,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + + free(qp); + ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + return ret; + } + +-- +2.33.0 + diff --git a/0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch b/0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch new file mode 100644 index 0000000..1238770 --- /dev/null +++ b/0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch @@ -0,0 +1,575 @@ +From a1a5d42a2c48660c040695bd8316538a9ce83ab2 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:17 +0800 +Subject: [PATCH 22/25] libhns: Add support for attaching QP's WQE buffer + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +If a uQP works in DCA mode, the WQE's buffer will be split as many blocks +and be stored into a list. The blocks are allocated from the DCA's memory +pool before posting WRs and are dropped when the QP's CI is equal to PI +after polling CQ. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 26 ++++- + providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++- + providers/hns/hns_roce_u_hw_v2.c | 125 +++++++++++++++++++++- + providers/hns/hns_roce_u_hw_v2.h | 2 + + providers/hns/hns_roce_u_verbs.c | 32 ++++-- + 5 files changed, 345 insertions(+), 13 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index e3fa24d..ba646d3 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -365,11 +365,18 @@ struct hns_roce_sge_ex { + unsigned int sge_shift; + }; + ++struct hns_roce_dca_buf { ++ void **bufs; ++ unsigned int max_cnt; ++ unsigned int shift; ++}; ++ + struct hns_roce_qp { + struct verbs_qp verbs_qp; + struct hns_roce_buf buf; ++ struct hns_roce_dca_buf dca_wqe; + int max_inline_data; +- int buf_size; ++ unsigned int buf_size; + unsigned int sq_signal_bits; + struct hns_roce_wq sq; + struct hns_roce_wq rq; +@@ -423,11 +430,22 @@ struct hns_roce_u_hw { + struct verbs_context_ops hw_ops; + }; + ++struct hns_roce_dca_attach_attr { ++ uint32_t sq_offset; ++ uint32_t sge_offset; ++ uint32_t rq_offset; ++}; ++ ++struct hns_roce_dca_detach_attr { ++ uint32_t sq_index; ++}; ++ + /* + * The entries's buffer should be aligned to a multiple of the hardware's + * minimum page size. + */ + #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE) ++#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE) + + static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift) + { +@@ -603,9 +621,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ uint32_t size, struct hns_roce_dca_buf *buf); ++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_detach_attr *attr); + void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); +-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); + + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index c0f86e9..3d41b89 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + + return ret; + } ++ ++struct hns_dca_mem_query_resp { ++ uint64_t key; ++ uint32_t offset; ++ uint32_t page_count; ++}; ++ ++static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ uint32_t index, struct hns_dca_mem_query_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_QUERY, 5); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, ++ &resp->key, sizeof(resp->key)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, ++ &resp->offset, sizeof(resp->offset)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, ++ &resp->page_count, sizeof(resp->page_count)); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, ++ "failed to query DCA mem-%u, ret = %d.\n", ++ handle, ret); ++ ++ return ret; ++} ++ ++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_detach_attr *attr) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_DETACH, 4); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, ++ attr->sq_index); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_warn(&ctx->ibv_ctx, ++ "failed to detach DCA mem-%u, ret = %d.\n", ++ handle, ret); ++} ++ ++struct hns_dca_mem_attach_resp { ++#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0) ++ uint32_t alloc_flags; ++ uint32_t alloc_pages; ++}; ++ ++static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ struct hns_dca_mem_attach_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_ATTACH, 6); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, ++ attr->sq_offset); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, ++ attr->sge_offset); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, ++ attr->rq_offset); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, ++ &resp->alloc_flags, sizeof(resp->alloc_flags)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, ++ &resp->alloc_pages, sizeof(resp->alloc_pages)); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, ++ "failed to attach DCA mem-%u, ret = %d.\n", ++ handle, ret); ++ ++ return ret; ++} ++ + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + uint32_t alloc_size) + { +@@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) + return enable; + } + +-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) ++static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_roce_dca_mem *mem; +@@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) + dca_mem_cnt--; + } + } ++ ++static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf, ++ uint32_t page_index, int page_count) ++{ ++ void **pages = &buf->bufs[page_index]; ++ int page_size = 1 << buf->shift; ++ int i; ++ ++ for (i = 0; i < page_count; i++) { ++ pages[i] = addr; ++ addr += page_size; ++ } ++} ++ ++static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_buf *buf, uint32_t page_count) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_dca_mem_query_resp resp = {}; ++ struct hns_roce_dca_mem *mem; ++ uint32_t idx = 0; ++ int ret; ++ ++ while (idx < page_count && idx < buf->max_cnt) { ++ resp.page_count = 0; ++ ret = query_dca_mem(ctx, handle, idx, &resp); ++ if (ret) ++ return -ENOMEM; ++ if (resp.page_count < 1) ++ break; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = key_to_dca_mem(dca_ctx, resp.key); ++ if (mem && resp.offset < mem->buf.length) { ++ config_dca_pages(dca_mem_addr(mem, resp.offset), ++ buf, idx, resp.page_count); ++ } else { ++ pthread_spin_unlock(&dca_ctx->lock); ++ break; ++ } ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ idx += resp.page_count; ++ } ++ ++ return (idx >= page_count) ? 0 : -ENOMEM; ++} ++ ++#define DCA_EXPAND_MEM_TRY_TIMES 3 ++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ uint32_t size, struct hns_roce_dca_buf *buf) ++{ ++ uint32_t buf_pages = size >> buf->shift; ++ struct hns_dca_mem_attach_resp resp = {}; ++ bool is_new_buf = true; ++ int try_times = 0; ++ int ret = 0; ++ ++ do { ++ resp.alloc_pages = 0; ++ ret = attach_dca_mem(ctx, handle, attr, &resp); ++ if (ret) ++ break; ++ ++ if (resp.alloc_pages >= buf_pages) { ++ is_new_buf = !!(resp.alloc_flags & ++ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER); ++ break; ++ } ++ ++ ret = add_dca_mem(ctx, size); ++ if (ret) ++ break; ++ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES); ++ ++ if (ret || resp.alloc_pages < buf_pages) { ++ verbs_err(&ctx->ibv_ctx, ++ "failed to attach, size %u count %u != %u, ret = %d.\n", ++ size, buf_pages, resp.alloc_pages, ret); ++ return -ENOMEM; ++ } ++ ++ /* No need config user address if DCA config not changed */ ++ if (!is_new_buf && buf->bufs[0]) ++ return 0; ++ ++ return setup_dca_buf(ctx, handle, buf, buf_pages); ++} +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 0a100b8..7a93456 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -199,19 +199,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) + return get_sw_cqe_v2(cq, cq->cons_index); + } + ++static inline bool check_qp_dca_enable(struct hns_roce_qp *qp) ++{ ++ return !!qp->dca_wqe.bufs; ++} ++ ++static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset) ++{ ++ if (likely(qp->buf.buf)) ++ return qp->buf.buf + offset; ++ else if (unlikely(check_qp_dca_enable(qp))) ++ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] + ++ (offset & ((1 << qp->dca_wqe.shift) - 1)); ++ else ++ return NULL; ++} ++ + static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); ++ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); + } + + static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); ++ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); + } + + static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); ++ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift)); + } + + static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) +@@ -580,6 +596,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + wc->opcode = wc_send_op_map[opcode]; + } + ++static bool check_dca_attach_enable(struct hns_roce_qp *qp) ++{ ++ return check_qp_dca_enable(qp) && ++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH); ++} ++ ++static bool check_dca_detach_enable(struct hns_roce_qp *qp) ++{ ++ return check_qp_dca_enable(qp) && ++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH); ++} ++ ++static int dca_attach_qp_buf(struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_dca_attach_attr attr = {}; ++ uint32_t idx; ++ int ret; ++ ++ hns_roce_spin_lock(&qp->sq.hr_lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); ++ ++ if (qp->sq.wqe_cnt > 0) { ++ idx = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ attr.sq_offset = idx << qp->sq.wqe_shift; ++ } ++ ++ if (qp->ex_sge.sge_cnt > 0) { ++ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1); ++ attr.sge_offset = idx << qp->ex_sge.sge_shift; ++ } ++ ++ if (qp->rq.wqe_cnt > 0) { ++ idx = qp->rq.head & (qp->rq.wqe_cnt - 1); ++ attr.rq_offset = idx << qp->rq.wqe_shift; ++ } ++ ++ ++ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, ++ qp->buf_size, &qp->dca_wqe); ++ ++ hns_roce_spin_unlock(&qp->rq.hr_lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ return ret; ++} ++ ++static void dca_detach_qp_buf(struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_dca_detach_attr attr; ++ bool is_empty; ++ ++ hns_roce_spin_lock(&qp->sq.hr_lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); ++ ++ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail; ++ if (is_empty && qp->sq.wqe_cnt > 0) ++ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ ++ hns_roce_spin_unlock(&qp->rq.hr_lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ if (is_empty && qp->sq.wqe_cnt > 0) ++ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr); ++} ++ + static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, + struct hns_roce_cq *cq) + { +@@ -919,6 +1002,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); ++ if (qp && check_dca_detach_enable(qp)) ++ dca_detach_qp_buf(ctx, qp); ++ + if (err != V2_CQ_OK) + break; + } +@@ -970,7 +1056,7 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + + if (unlikely(ibvqp->state == IBV_QPS_RESET || + ibvqp->state == IBV_QPS_INIT || +- ibvqp->state == IBV_QPS_RTR)){ ++ ibvqp->state == IBV_QPS_RTR)) { + verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), + "unsupported qp state, state = %d.\n", ibvqp->state); + return EINVAL; +@@ -980,6 +1066,14 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + return EIO; + } + ++ if (check_dca_attach_enable(qp)) { ++ ret = dca_attach_qp_buf(ctx, qp); ++ if (ret) ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to attach QP-%u send, ret = %d.\n", ++ qp->verbs_qp.qp.qp_num, ret); ++ } ++ + return ret; + } + +@@ -1347,6 +1441,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + return 0; + } + ++static inline void fill_rc_dca_fields(uint32_t qp_num, ++ struct hns_roce_rc_sq_wqe *wqe) ++{ ++ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num); ++ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH); ++} ++ + static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe, + const struct ibv_send_wr *wr) + { +@@ -1454,6 +1555,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + return ret; + + wqe_valid: ++ if (check_qp_dca_enable(qp)) ++ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe); ++ + enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq); + + return 0; +@@ -1563,6 +1667,14 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + return EIO; + } + ++ if (check_dca_attach_enable(qp)) { ++ ret = dca_attach_qp_buf(ctx, qp); ++ if (ret) ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to attach QP-%u recv, ret = %d.\n", ++ qp->verbs_qp.qp.qp_num, ret); ++ } ++ + return ret; + } + +@@ -1758,6 +1870,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, + static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask) + { ++ struct hns_roce_context *ctx = to_hr_ctx(qp->context); + struct hns_roce_modify_qp_ex_resp resp_ex = {}; + struct hns_roce_modify_qp_ex cmd_ex = {}; + struct hns_roce_qp *hr_qp = to_hr_qp(qp); +@@ -1804,6 +1917,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + hns_roce_init_qp_indices(to_hr_qp(qp)); + } + ++ /* Try to shrink the DCA mem */ ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + record_qp_attr(qp, attr, attr_mask); + + return ret; +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 1a7b828..50a920f 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -237,6 +237,8 @@ struct hns_roce_rc_sq_wqe { + #define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259) + #define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260) + ++#define RCWQE_SQPN_L_WIDTH 2 ++ + struct hns_roce_v2_wqe_data_seg { + __le32 len; + __le32 lkey; +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 69bcc13..248d862 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1311,6 +1311,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + return 0; + } + ++static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) ++{ ++ if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) ++ return true; ++ ++ return false; ++} ++ + static void qp_free_wqe(struct hns_roce_qp *qp) + { + free_recv_rinl_buf(&qp->rq_rinl_buf); +@@ -1322,8 +1330,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + hns_roce_free_buf(&qp->buf); + } + +-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, +- struct hns_roce_context *ctx) ++static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); + +@@ -1341,12 +1349,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, + } + + if (qp->rq_rinl_buf.wqe_cnt) { +- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf)) ++ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge, ++ &qp->rq_rinl_buf)) + goto err_alloc; + } + +- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, 1 << qp->pageshift)) +- goto err_alloc; ++ if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { ++ /* when DCA is enabled, use a buffer list to store page addr */ ++ qp->buf.buf = NULL; ++ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); ++ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT; ++ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); ++ if (!qp->dca_wqe.bufs) ++ goto err_alloc; ++ } else { ++ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, ++ HNS_HW_PAGE_SIZE)) ++ goto err_alloc; ++ } + + return 0; + +@@ -1636,7 +1656,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, + { + int ret; + +- ret = qp_alloc_wqe(&attr->cap, qp, ctx); ++ ret = qp_alloc_wqe(attr, qp, ctx); + if (ret) + return ret; + +-- +2.33.0 + diff --git a/0023-libhns-Use-shared-memory-to-sync-DCA-status.patch b/0023-libhns-Use-shared-memory-to-sync-DCA-status.patch new file mode 100644 index 0000000..9cf722e --- /dev/null +++ b/0023-libhns-Use-shared-memory-to-sync-DCA-status.patch @@ -0,0 +1,167 @@ +From 831683cc6bb077ab409cb6a1b7252a6e1762bc11 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 29 Jun 2021 20:06:47 +0800 +Subject: [PATCH 23/25] libhns: Use shared memory to sync DCA status + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +The user DCA needs to check the QP attaching state before filling wqe +buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but +this will result in too much time being wasted on system calls, so use a +shared table between user driver and kernel driver to sync DCA status. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++--- + providers/hns/hns_roce_u.h | 10 ++++++++ + 2 files changed, 57 insertions(+), 4 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 2272431..56ff201 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -113,9 +113,33 @@ static int hns_roce_mmap(struct hns_roce_device *hr_dev, + return 0; + } + +-static int init_dca_context(struct hns_roce_context *ctx, int page_size) ++static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, ++ int page_size, size_t size, uint64_t mmap_key) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ void *addr; ++ ++ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd, ++ mmap_key); ++ if (addr == MAP_FAILED) { ++ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n"); ++ return -EINVAL; ++ } ++ ++ dca_ctx->buf_status = addr; ++ dca_ctx->sync_status = addr + size / 2; ++ ++ return 0; ++} ++ ++static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, ++ struct hns_roce_alloc_ucontext_resp *resp, ++ int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ uint64_t mmap_key = resp->dca_mmap_key; ++ int mmap_size = resp->dca_mmap_size; ++ int max_qps = resp->dca_qps; + int ret; + + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) +@@ -130,6 +154,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size) + dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; + dca_ctx->mem_cnt = 0; + ++ if (mmap_key) { ++ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; ++ ++ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) { ++ dca_ctx->status_size = mmap_size; ++ dca_ctx->max_qps = min_t(int, max_qps, ++ mmap_size * 8 / bits_per_qp); ++ } ++ } ++ + return 0; + } + +@@ -143,6 +177,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx) + pthread_spin_lock(&dca_ctx->lock); + hns_roce_cleanup_dca_mem(ctx); + pthread_spin_unlock(&dca_ctx->lock); ++ if (dca_ctx->buf_status) ++ munmap(dca_ctx->buf_status, dca_ctx->status_size); + + pthread_spin_destroy(&dca_ctx->lock); + } +@@ -217,6 +253,14 @@ static int set_context_attr(struct hns_roce_device *hr_dev, + return 0; + } + ++static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) ++{ ++ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | ++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; ++ cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; ++ cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; ++} ++ + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -231,8 +275,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (!context) + return NULL; + +- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; ++ ucontext_set_cmd(&cmd, hr_dev->page_size); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -245,7 +288,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + +- if (init_dca_context(context, hr_dev->page_size)) ++ if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) + goto err_free; + + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index ba646d3..e808ff3 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -35,6 +35,7 @@ + + #include + #include ++#include + #include + + #include +@@ -44,6 +45,7 @@ + #include + #include + #include ++#include + #include + #include "hns_roce_u_abi.h" + +@@ -52,6 +54,8 @@ + + #define PFX "hns: " + ++typedef _Atomic(uint64_t) atomic_bitmap_t; ++ + /* The minimum page size is 4K for hardware */ + #define HNS_HW_PAGE_SHIFT 12 + #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) +@@ -216,6 +220,12 @@ struct hns_roce_dca_ctx { + uint64_t max_size; + uint64_t min_size; + uint64_t curr_size; ++ ++#define HNS_DCA_BITS_PER_STATUS 1 ++ unsigned int max_qps; ++ unsigned int status_size; ++ atomic_bitmap_t *buf_status; ++ atomic_bitmap_t *sync_status; + }; + + struct hns_roce_v2_reset_state { +-- +2.33.0 + diff --git a/0024-libhns-Sync-DCA-status-by-shared-memory.patch b/0024-libhns-Sync-DCA-status-by-shared-memory.patch new file mode 100644 index 0000000..3005ba4 --- /dev/null +++ b/0024-libhns-Sync-DCA-status-by-shared-memory.patch @@ -0,0 +1,223 @@ +From 5b151e86c6004c11913fc9a8086f0fc63902af45 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 29 Jun 2021 21:01:27 +0800 +Subject: [PATCH 24/25] libhns: Sync DCA status by shared memory + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +Use DCA num from the resp of modify_qp() and indicate the DCA status bit in +the shared memory, if the num is valid, the user DCA can get the DCA status +by testing the bit in the shared memory for each QP, othewise invoke the +verbs 'HNS_IB_METHOD_DCA_MEM_ATTACH' to check the DCA status. + +Each QP has 2 bits in shared memory, 1 bit is used to lock the DCA status +changing by kernel driver or user driver, another bit is used to indicate +the DCA attaching status. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 31 +++++++++++++++++++++++ + providers/hns/hns_roce_u_buf.c | 42 ++++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.c | 21 +++++++++++++++- + 3 files changed, 93 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index e808ff3..5bddb00 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -379,6 +379,7 @@ struct hns_roce_dca_buf { + void **bufs; + unsigned int max_cnt; + unsigned int shift; ++ unsigned int dcan; + }; + + struct hns_roce_qp { +@@ -444,6 +445,7 @@ struct hns_roce_dca_attach_attr { + uint32_t sq_offset; + uint32_t sge_offset; + uint32_t rq_offset; ++ bool force; + }; + + struct hns_roce_dca_detach_attr { +@@ -556,6 +558,32 @@ static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock) + return 0; + } + ++#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64)) ++#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64) ++ ++static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr) ++{ ++ p += HNS_ROCE_BIT_WORD(nr); ++ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr)); ++} ++ ++static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr) ++{ ++ uint64_t mask = HNS_ROCE_BIT_MASK(nr); ++ ++ p += HNS_ROCE_BIT_WORD(nr); ++ if (atomic_load(p) & mask) ++ return true; ++ ++ return (atomic_fetch_or(p, mask) & mask) != 0; ++} ++ ++static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) ++{ ++ p += HNS_ROCE_BIT_WORD(nr); ++ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); ++} ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +@@ -636,6 +664,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint32_t size, struct hns_roce_dca_buf *buf); + void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr); ++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); ++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); ++ + void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 3d41b89..08c0fbc 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -440,6 +440,45 @@ static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, + return (idx >= page_count) ? 0 : -ENOMEM; + } + ++#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS) ++#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n) ++ ++#define MAX_DCA_TRY_LOCK_TIMES 10 ++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->sync_status; ++ int try_times = 0; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return true; ++ ++ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan))) ++ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES) ++ return false; ++ ++ return true; ++} ++ ++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->sync_status; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return; ++ ++ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan)); ++} ++ ++static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->buf_status; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return false; ++ ++ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan)); ++} ++ + #define DCA_EXPAND_MEM_TRY_TIMES 3 + int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, +@@ -451,6 +490,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + int try_times = 0; + int ret = 0; + ++ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan)) ++ return 0; ++ + do { + resp.alloc_pages = 0; + ret = attach_dca_mem(ctx, handle, attr, &resp); +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 7a93456..15d9108 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -612,6 +612,7 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) + { + struct hns_roce_dca_attach_attr attr = {}; ++ bool enable_detach; + uint32_t idx; + int ret; + +@@ -633,9 +634,16 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, + attr.rq_offset = idx << qp->rq.wqe_shift; + } + ++ enable_detach = check_dca_detach_enable(qp); ++ if (enable_detach && ++ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan)) ++ /* Force attach if failed to sync dca status */ ++ attr.force = true; + + ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, +- qp->buf_size, &qp->dca_wqe); ++ qp->buf_size, &qp->dca_wqe); ++ if (ret && enable_detach) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); + + hns_roce_spin_unlock(&qp->rq.hr_lock); + hns_roce_spin_unlock(&qp->sq.hr_lock); +@@ -1643,6 +1651,9 @@ out: + + hns_roce_spin_unlock(&qp->sq.hr_lock); + ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + +@@ -1784,6 +1795,9 @@ out: + + hns_roce_spin_unlock(&qp->rq.hr_lock); + ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(ibvqp, &attr, IBV_QP_STATE); +@@ -1902,6 +1916,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (attr->qp_state == IBV_QPS_RTR) { + hr_qp->tc_mode = resp_ex.drv_payload.tc_mode; + hr_qp->priority = resp_ex.drv_payload.priority; ++ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan; + } + } + +@@ -2951,6 +2966,10 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp) + + out: + hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibv_qp->qp_base.state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE); +-- +2.33.0 + diff --git a/0025-libhns-Add-direct-verbs-support-to-config-DCA.patch b/0025-libhns-Add-direct-verbs-support-to-config-DCA.patch new file mode 100644 index 0000000..b5a4c71 --- /dev/null +++ b/0025-libhns-Add-direct-verbs-support-to-config-DCA.patch @@ -0,0 +1,386 @@ +From 08b80f6450477832b1a194f18fbed60367da46de Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:49 +0800 +Subject: [PATCH 25/25] libhns: Add direct verbs support to config DCA + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +Add two direct verbs to config DCA: +1. hnsdv_open_device() is used to config DCA memory pool. +2. hnsdv_create_qp() is used to create a DCA QP. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + debian/control | 2 +- + providers/hns/hns_roce_u.c | 80 ++++++++++++++++++++++++++++---- + providers/hns/hns_roce_u.h | 4 +- + providers/hns/hns_roce_u_buf.c | 3 ++ + providers/hns/hns_roce_u_verbs.c | 39 ++++++++++++++-- + providers/hns/hnsdv.h | 29 +++++++++++- + providers/hns/libhns.map | 1 + + 7 files changed, 140 insertions(+), 18 deletions(-) + +diff --git a/debian/control b/debian/control +index 160824f..2a55372 100644 +--- a/debian/control ++++ b/debian/control +@@ -87,7 +87,7 @@ Description: User space provider drivers for libibverbs + - efa: Amazon Elastic Fabric Adapter + - erdma: Alibaba Elastic RDMA (iWarp) Adapter + - hfi1verbs: Intel Omni-Path HFI +- - hns: HiSilicon Hip06 SoC ++ - hns: HiSilicon Hip08+ SoC + - ipathverbs: QLogic InfiniPath HCAs + - irdma: Intel Ethernet Connection RDMA + - mana: Microsoft Azure Network Adapter +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 56ff201..93a0312 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -132,8 +132,55 @@ static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + return 0; + } + ++struct ibv_context *hnsdv_open_device(struct ibv_device *device, ++ struct hnsdv_context_attr *attr) ++{ ++ if (!is_hns_dev(device)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ return verbs_open_device(device, attr); ++} ++ ++static void set_dca_pool_param(struct hns_roce_context *ctx, ++ struct hnsdv_context_attr *attr, int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE) ++ dca_ctx->unit_size = align(attr->dca_unit_size, page_size); ++ else ++ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; ++ ++ /* The memory pool cannot be expanded, only init the DCA context. */ ++ if (dca_ctx->unit_size == 0) ++ return; ++ ++ /* If not set, the memory pool can be expanded unlimitedly. */ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE) ++ dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size, ++ dca_ctx->unit_size) * ++ dca_ctx->unit_size; ++ else ++ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; ++ ++ /* If not set, the memory pool cannot be shrunk. */ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE) ++ dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size, ++ dca_ctx->unit_size) * ++ dca_ctx->unit_size; ++ else ++ dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE; ++ ++ verbs_debug(&ctx->ibv_ctx, ++ "Support DCA, unit %u, max %lu, min %lu Bytes.\n", ++ dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size); ++} ++ + static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, ++ struct hnsdv_context_attr *attr, + int page_size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; +@@ -145,14 +192,18 @@ static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) + return 0; + ++ dca_ctx->unit_size = 0; ++ dca_ctx->mem_cnt = 0; ++ + list_head_init(&dca_ctx->mem_list); + ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) + return ret; + +- dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; +- dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; +- dca_ctx->mem_cnt = 0; ++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) ++ return 0; ++ ++ set_dca_pool_param(ctx, attr, page_size); + + if (mmap_key) { + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; +@@ -253,18 +304,28 @@ static int set_context_attr(struct hns_roce_device *hr_dev, + return 0; + } + +-static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) ++static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, ++ struct hnsdv_context_attr *attr) + { + cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; +- cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; +- cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; ++ HNS_ROCE_CQE_INLINE_FLAGS; ++ ++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) ++ return; ++ ++ cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA; ++ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) { ++ cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; ++ cmd->dca_max_qps = attr->dca_prime_qps; ++ } + } + + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) + { ++ struct hnsdv_context_attr *ctx_attr = private_data; + struct hns_roce_device *hr_dev = to_hr_dev(ibdev); + struct hns_roce_alloc_ucontext_resp resp = {}; + struct hns_roce_alloc_ucontext cmd = {}; +@@ -275,7 +336,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (!context) + return NULL; + +- ucontext_set_cmd(&cmd, hr_dev->page_size); ++ ucontext_set_cmd(&cmd, ctx_attr); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -288,7 +349,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + +- if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) ++ if (init_dca_context(context, cmd_fd, ++ &resp, ctx_attr, hr_dev->page_size)) + goto err_free; + + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5bddb00..691bf61 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -584,6 +584,8 @@ static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) + atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); + } + ++bool is_hns_dev(struct ibv_device *device); ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +@@ -672,8 +674,6 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + +-bool is_hns_dev(struct ibv_device *device); +- + extern const struct hns_roce_u_hw hns_roce_u_hw_v2; + + #endif /* _HNS_ROCE_U_H */ +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 08c0fbc..780683e 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -56,6 +56,9 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, + + void hns_roce_free_buf(struct hns_roce_buf *buf) + { ++ if (!buf->buf) ++ return; ++ + ibv_dofork_range(buf->buf, buf->length); + + munmap(buf->buf, buf->length); +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 248d862..8964d53 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1072,6 +1072,15 @@ enum { + IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, + }; + ++enum { ++ SEND_OPS_FLAG_MASK = ++ IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | ++ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | ++ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV | ++ IBV_QP_EX_WITH_SEND_WITH_INV, ++}; ++ + static int check_qp_create_mask(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr) + { +@@ -1080,6 +1089,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) + return EOPNOTSUPP; + ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS && ++ !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK)) ++ return -EOPNOTSUPP; ++ + switch (attr->qp_type) { + case IBV_QPT_UD: + if (hr_dev->hw_version == HNS_ROCE_HW_VER2) +@@ -1311,9 +1324,21 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + return 0; + } + +-static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) ++static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr) + { +- if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) ++ /* DCA pool disable */ ++ if (!dca_ctx->unit_size) ++ return false; ++ ++ /* Unsupport type */ ++ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND) ++ return false; ++ ++ if (hns_attr && ++ (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) && ++ (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE)) + return true; + + return false; +@@ -1331,6 +1356,7 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + } + + static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr, + struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); +@@ -1354,7 +1380,8 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + goto err_alloc; + } + +- if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { ++ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) && ++ ctx->dca_ctx.max_size > 0) { + /* when DCA is enabled, use a buffer list to store page addr */ + qp->buf.buf = NULL; + qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); +@@ -1362,6 +1389,7 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); + if (!qp->dca_wqe.bufs) + goto err_alloc; ++ verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n"); + } else { + if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, + HNS_HW_PAGE_SIZE)) +@@ -1651,12 +1679,13 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + } + + static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr, + struct hns_roce_qp *qp, + struct hns_roce_context *ctx) + { + int ret; + +- ret = qp_alloc_wqe(attr, qp, ctx); ++ ret = qp_alloc_wqe(attr, hns_attr, qp, ctx); + if (ret) + return ret; + +@@ -1731,7 +1760,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + if (ret) + goto err_spinlock; + +- ret = hns_roce_alloc_qp_buf(attr, qp, context); ++ ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context); + if (ret) + goto err_buf; + +diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h +index 451b26e..68bf001 100644 +--- a/providers/hns/hnsdv.h ++++ b/providers/hns/hnsdv.h +@@ -22,17 +22,42 @@ enum hnsdv_qp_congest_ctrl_type { + HNSDV_QP_CREATE_ENABLE_DIP = 1 << 3, + }; + ++enum hnsdv_qp_create_flags { ++ HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0, ++}; ++ ++enum hnsdv_context_comp_mask { ++ HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0, ++ HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1, ++ HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2, ++ HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3, ++}; ++ + enum hnsdv_qp_init_attr_mask { ++ HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, + HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE = 1 << 1, + }; + ++struct hnsdv_context_attr { ++ uint64_t flags; /* Use enum hnsdv_context_attr_flags */ ++ uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */ ++ uint32_t dca_prime_qps; ++ uint32_t dca_unit_size; ++ uint64_t dca_max_size; ++ uint64_t dca_min_size; ++}; ++ + struct hnsdv_qp_init_attr { + uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */ +- uint32_t create_flags; ++ uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */ + uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */ + uint8_t reserved[3]; + }; + ++enum hnsdv_context_attr_flags { ++ HNSDV_CONTEXT_FLAGS_DCA = 1 << 0, ++}; ++ + enum hnsdv_query_context_comp_mask { + HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0, + }; +@@ -50,6 +75,8 @@ int hnsdv_query_device(struct ibv_context *ctx_in, + struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_attr, + struct hnsdv_qp_init_attr *hns_qp_attr); ++struct ibv_context *hnsdv_open_device(struct ibv_device *device, ++ struct hnsdv_context_attr *attr); + + #ifdef __cplusplus + } +diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map +index e9bf417..a955346 100644 +--- a/providers/hns/libhns.map ++++ b/providers/hns/libhns.map +@@ -5,5 +5,6 @@ HNS_1.0 { + hnsdv_is_supported; + hnsdv_create_qp; + hnsdv_query_device; ++ hnsdv_open_device; + local: *; + }; +-- +2.33.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 0e282e2..7ad39a1 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 50.0 -Release: 5 +Release: 6 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -24,6 +24,13 @@ patch15: 0015-libhns-return-error-when-post-send-in-reset-state.patch patch16: 0016-libhns-assign-doorbell-to-zero-when-allocate-it.patch patch17: 0017-libhns-Fix-missing-reset-notification.patch patch18: 0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch +patch19: 0019-Update-kernel-headers.patch +patch20: 0020-libhns-Introduce-DCA-for-RC-QP.patch +patch21: 0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch +patch22: 0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch +patch23: 0023-libhns-Use-shared-memory-to-sync-DCA-status.patch +patch24: 0024-libhns-Sync-DCA-status-by-shared-memory.patch +patch25: 0025-libhns-Add-direct-verbs-support-to-config-DCA.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -376,9 +383,11 @@ fi %{_mandir}/man3/umad* %{_mandir}/man3/*_to_ibv_rate.* %{_mandir}/man7/rdma_cm.* +%{_mandir}/man3/manadv* %{_mandir}/man3/mlx5dv* %{_mandir}/man3/mlx4dv* %{_mandir}/man7/efadv* +%{_mandir}/man7/manadv* %{_mandir}/man7/mlx5dv* %{_mandir}/man7/mlx4dv* %{_mandir}/man3/ibnd_* @@ -599,9 +608,14 @@ fi %doc %{_docdir}/%{name}-%{version}/libibverbs.md %doc %{_docdir}/%{name}-%{version}/tag_matching.md %doc %{_docdir}/%{name}-%{version}/70-persistent-ipoib.rules -%{_mandir}/* %changelog +* Thu Apr 11 2024 Ran Zhou - 50.0-6 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for DCA + * Tue Mar 26 2024 Ran Zhou - 50.0-5 - Type: requirement - ID: NA -- Gitee