1 Star 0 Fork 52

lyn/rdma-core

forked from src-openEuler/rdma-core 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0035-libhns-Use-shared-memory-to-sync-DCA-status.patch 5.31 KB
一键复制 编辑 原始数据 按行查看 历史
tcc@hello 提交于 2022-11-30 17:03 +08:00 . Support hns roce DCA
From a5e62921afc2fcc152e8b0584f2d04d1a4db4f10 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 29 Jun 2021 20:06:47 +0800
Subject: libhns: Use shared memory to sync DCA status
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
The user DCA needs to check the QP attaching state before filling wqe
buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but
this will result in too much time being wasted on system calls, so use a
shared table between user driver and kernel driver to sync DCA status.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++---
providers/hns/hns_roce_u.h | 10 ++++++++
2 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index bd2b251..fe30cda 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -95,9 +95,33 @@ static const struct verbs_context_ops hns_common_ops = {
.alloc_parent_domain = hns_roce_u_alloc_pad,
};
-static int init_dca_context(struct hns_roce_context *ctx, int page_size)
+static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
+ int page_size, size_t size, uint64_t mmap_key)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ void *addr;
+
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
+ mmap_key);
+ if (addr == MAP_FAILED) {
+ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n");
+ return -EINVAL;
+ }
+
+ dca_ctx->buf_status = addr;
+ dca_ctx->sync_status = addr + size / 2;
+
+ return 0;
+}
+
+static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
+ struct hns_roce_alloc_ucontext_resp *resp,
+ int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ uint64_t mmap_key = resp->dca_mmap_key;
+ int mmap_size = resp->dca_mmap_size;
+ int max_qps = resp->dca_qps;
int ret;
if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
@@ -112,6 +136,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size)
dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
dca_ctx->mem_cnt = 0;
+ if (mmap_key) {
+ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
+
+ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) {
+ dca_ctx->status_size = mmap_size;
+ dca_ctx->max_qps = min_t(int, max_qps,
+ mmap_size * 8 / bits_per_qp);
+ }
+ }
+
return 0;
}
@@ -125,6 +159,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx)
pthread_spin_lock(&dca_ctx->lock);
hns_roce_cleanup_dca_mem(ctx);
pthread_spin_unlock(&dca_ctx->lock);
+ if (dca_ctx->buf_status)
+ munmap(dca_ctx->buf_status, dca_ctx->status_size);
pthread_spin_destroy(&dca_ctx->lock);
}
@@ -149,6 +185,14 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
return count_shift > size_shift ? count_shift - size_shift : 0;
}
+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size)
+{
+ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
+ cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
+ cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS;
+}
+
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
@@ -165,8 +209,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
+ ucontext_set_cmd(&cmd, hr_dev->page_size);
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -212,7 +255,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->max_srq_wr = dev_attrs.max_srq_wr;
context->max_srq_sge = dev_attrs.max_srq_sge;
- if (init_dca_context(context, hr_dev->page_size))
+ if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size))
goto err_free;
if (hns_roce_mmap(hr_dev, context, cmd_fd))
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 44a733f..a8f811e 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -35,6 +35,7 @@
#include <stddef.h>
#include <endian.h>
+#include <stdatomic.h>
#include <util/compiler.h>
#include <infiniband/driver.h>
@@ -44,6 +45,7 @@
#include <ccan/array_size.h>
#include <util/bitmap.h>
#include <ccan/container_of.h>
+#include <ccan/minmax.h>
#include <linux/if_ether.h>
#include "hns_roce_u_abi.h"
@@ -52,6 +54,8 @@
#define PFX "hns: "
+typedef _Atomic(uint64_t) atomic_bitmap_t;
+
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
@@ -214,6 +218,12 @@ struct hns_roce_dca_ctx {
uint64_t max_size;
uint64_t min_size;
uint64_t curr_size;
+
+#define HNS_DCA_BITS_PER_STATUS 1
+ unsigned int max_qps;
+ unsigned int status_size;
+ atomic_bitmap_t *buf_status;
+ atomic_bitmap_t *sync_status;
};
struct hns_roce_context {
--
2.30.0
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/lyn1001/rdma-core.git
git@gitee.com:lyn1001/rdma-core.git
lyn1001
rdma-core
rdma-core
master

搜索帮助