diff --git a/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch b/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch new file mode 100644 index 0000000000000000000000000000000000000000..876602fb193eb8556b9278d398de5296fae10e56 --- /dev/null +++ b/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch @@ -0,0 +1,363 @@ +From bb6a6264246a2a51680a2d4b104a296a9cdf4fab Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 15:05:55 +0800 +Subject: [PATCH 07/10] libhns: Add support for thread domain and parent domain + +Add support for thread domain (TD) and parent domain (PAD). +Extend the orginal hns_roce_pd struct to hns_roce_pad by +adding the new hns_roce_td struct. When a parent domain +holds a thread domain, the associated data path will be set +to lock-free mode to improve performance. +--- + providers/hns/hns_roce_u.c | 5 +- + providers/hns/hns_roce_u.h | 69 +++++++++++++- + providers/hns/hns_roce_u_verbs.c | 156 ++++++++++++++++++++++++++++--- + 3 files changed, 215 insertions(+), 15 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 90f250e..e1c2659 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -67,7 +67,7 @@ static const struct verbs_context_ops hns_common_ops = { + .create_qp = hns_roce_u_create_qp, + .create_qp_ex = hns_roce_u_create_qp_ex, + .dealloc_mw = hns_roce_u_dealloc_mw, +- .dealloc_pd = hns_roce_u_free_pd, ++ .dealloc_pd = hns_roce_u_dealloc_pd, + .dereg_mr = hns_roce_u_dereg_mr, + .destroy_cq = hns_roce_u_destroy_cq, + .modify_cq = hns_roce_u_modify_cq, +@@ -88,6 +88,9 @@ static const struct verbs_context_ops hns_common_ops = { + .close_xrcd = hns_roce_u_close_xrcd, + .open_qp = hns_roce_u_open_qp, + .get_srq_num = hns_roce_u_get_srq_num, ++ .alloc_td = hns_roce_u_alloc_td, ++ .dealloc_td = hns_roce_u_dealloc_td, ++ .alloc_parent_domain = hns_roce_u_alloc_pad, + }; + + static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index c73e5c0..5d3f480 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -196,6 +196,11 @@ struct hns_roce_db_page { + unsigned long *bitmap; + }; + ++struct hns_roce_spinlock { ++ pthread_spinlock_t lock; ++ int need_lock; ++}; ++ + struct hns_roce_context { + struct verbs_context ibv_ctx; + void *uar; +@@ -230,9 +235,21 @@ struct hns_roce_context { + unsigned int max_inline_data; + }; + ++struct hns_roce_td { ++ struct ibv_td ibv_td; ++ atomic_int refcount; ++}; ++ + struct hns_roce_pd { + struct ibv_pd ibv_pd; + unsigned int pdn; ++ atomic_int refcount; ++ struct hns_roce_pd *protection_domain; ++}; ++ ++struct hns_roce_pad { ++ struct hns_roce_pd pd; ++ struct hns_roce_td *td; + }; + + struct hns_roce_cq { +@@ -406,9 +423,35 @@ static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx) + return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx.context); + } + ++static inline struct hns_roce_td *to_hr_td(struct ibv_td *ibv_td) ++{ ++ return container_of(ibv_td, struct hns_roce_td, ibv_td); ++} ++ ++/* to_hr_pd always returns the real hns_roce_pd obj. */ + static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd) + { +- return container_of(ibv_pd, struct hns_roce_pd, ibv_pd); ++ struct hns_roce_pd *pd = ++ container_of(ibv_pd, struct hns_roce_pd, ibv_pd); ++ ++ if (pd->protection_domain) ++ return pd->protection_domain; ++ ++ return pd; ++} ++ ++static inline struct hns_roce_pad *to_hr_pad(struct ibv_pd *ibv_pd) ++{ ++ struct hns_roce_pad *pad = ++ ibv_pd ? ++ container_of(ibv_pd, struct hns_roce_pad, pd.ibv_pd) : ++ NULL; ++ ++ if (pad && pad->pd.protection_domain) ++ return pad; ++ ++ /* Otherwise ibv_pd isn't a parent_domain */ ++ return NULL; + } + + static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) +@@ -431,14 +474,35 @@ static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah) + return container_of(ibv_ah, struct hns_roce_ah, ibv_ah); + } + ++static inline int hns_roce_spin_lock(struct hns_roce_spinlock *hr_lock) ++{ ++ if (hr_lock->need_lock) ++ return pthread_spin_lock(&hr_lock->lock); ++ ++ return 0; ++} ++ ++static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock) ++{ ++ if (hr_lock->need_lock) ++ return pthread_spin_unlock(&hr_lock->lock); ++ ++ return 0; ++} ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); + int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); + ++struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context, ++ struct ibv_td_init_attr *attr); ++int hns_roce_u_dealloc_td(struct ibv_td *ibv_td); ++struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr); + struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context); +-int hns_roce_u_free_pd(struct ibv_pd *pd); ++int hns_roce_u_dealloc_pd(struct ibv_pd *pd); + + struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, int access); +@@ -497,6 +561,7 @@ int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd); + int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, + int page_size); + void hns_roce_free_buf(struct hns_roce_buf *buf); ++void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index dcdc722..ecf8666 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -42,6 +43,37 @@ + #include "hns_roce_u_db.h" + #include "hns_roce_u_hw_v2.h" + ++static bool hns_roce_whether_need_lock(struct ibv_pd *pd) ++{ ++ struct hns_roce_pad *pad; ++ ++ pad = to_hr_pad(pd); ++ if (pad && pad->td) ++ return false; ++ ++ return true; ++} ++ ++static int hns_roce_spinlock_init(struct hns_roce_spinlock *hr_lock, ++ bool need_lock) ++{ ++ hr_lock->need_lock = need_lock; ++ ++ if (need_lock) ++ return pthread_spin_init(&hr_lock->lock, ++ PTHREAD_PROCESS_PRIVATE); ++ ++ return 0; ++} ++ ++static int hns_roce_spinlock_destroy(struct hns_roce_spinlock *hr_lock) ++{ ++ if (hr_lock->need_lock) ++ return pthread_spin_destroy(&hr_lock->lock); ++ ++ return 0; ++} ++ + void hns_roce_init_qp_indices(struct hns_roce_qp *qp) + { + qp->sq.head = 0; +@@ -85,38 +117,138 @@ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); + } + ++struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context, ++ struct ibv_td_init_attr *attr) ++{ ++ struct hns_roce_td *td; ++ ++ if (attr->comp_mask) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ td = calloc(1, sizeof(*td)); ++ if (!td) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ td->ibv_td.context = context; ++ atomic_init(&td->refcount, 1); ++ ++ return &td->ibv_td; ++} ++ ++int hns_roce_u_dealloc_td(struct ibv_td *ibv_td) ++{ ++ struct hns_roce_td *td; ++ ++ td = to_hr_td(ibv_td); ++ if (atomic_load(&td->refcount) > 1) ++ return EBUSY; ++ ++ free(td); ++ ++ return 0; ++} ++ + struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context) + { ++ struct hns_roce_alloc_pd_resp resp = {}; + struct ibv_alloc_pd cmd; + struct hns_roce_pd *pd; +- struct hns_roce_alloc_pd_resp resp = {}; +- +- pd = malloc(sizeof(*pd)); +- if (!pd) +- return NULL; + +- if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), +- &resp.ibv_resp, sizeof(resp))) { +- free(pd); ++ pd = calloc(1, sizeof(*pd)); ++ if (!pd) { ++ errno = ENOMEM; + return NULL; + } ++ errno = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); + ++ if (errno) ++ goto err; ++ ++ atomic_init(&pd->refcount, 1); + pd->pdn = resp.pdn; + + return &pd->ibv_pd; ++ ++err: ++ free(pd); ++ return NULL; + } + +-int hns_roce_u_free_pd(struct ibv_pd *pd) ++struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr) ++{ ++ struct hns_roce_pad *pad; ++ ++ if (ibv_check_alloc_parent_domain(attr)) ++ return NULL; ++ ++ if (attr->comp_mask) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ pad = calloc(1, sizeof(*pad)); ++ if (!pad) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (attr->td) { ++ pad->td = to_hr_td(attr->td); ++ atomic_fetch_add(&pad->td->refcount, 1); ++ } ++ ++ pad->pd.protection_domain = to_hr_pd(attr->pd); ++ atomic_fetch_add(&pad->pd.protection_domain->refcount, 1); ++ ++ atomic_init(&pad->pd.refcount, 1); ++ ibv_initialize_parent_domain(&pad->pd.ibv_pd, ++ &pad->pd.protection_domain->ibv_pd); ++ ++ return &pad->pd.ibv_pd; ++} ++ ++static void hns_roce_free_pad(struct hns_roce_pad *pad) ++{ ++ atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1); ++ ++ if (pad->td) ++ atomic_fetch_sub(&pad->td->refcount, 1); ++ ++ free(pad); ++} ++ ++static int hns_roce_free_pd(struct hns_roce_pd *pd) + { + int ret; + +- ret = ibv_cmd_dealloc_pd(pd); ++ if (atomic_load(&pd->refcount) > 1) ++ return EBUSY; ++ ++ ret = ibv_cmd_dealloc_pd(&pd->ibv_pd); + if (ret) + return ret; + +- free(to_hr_pd(pd)); ++ free(pd); ++ return 0; ++} + +- return ret; ++int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd) ++{ ++ struct hns_roce_pad *pad = to_hr_pad(ibv_pd); ++ struct hns_roce_pd *pd = to_hr_pd(ibv_pd); ++ ++ if (pad) { ++ hns_roce_free_pad(pad); ++ return 0; ++ } ++ ++ return hns_roce_free_pd(pd); + } + + struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context, +-- +2.30.0 + diff --git a/0008-libhns-Add-support-for-lock-free-QP.patch b/0008-libhns-Add-support-for-lock-free-QP.patch new file mode 100644 index 0000000000000000000000000000000000000000..b66bdfacaafdd81f4b7a6adeddf713a9ac769179 --- /dev/null +++ b/0008-libhns-Add-support-for-lock-free-QP.patch @@ -0,0 +1,223 @@ +From fc7cb76b5b56d67182e6fa1cb7a3c19aa09ef90a Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 15:56:09 +0800 +Subject: [PATCH 08/10] libhns: Add support for lock-free QP + +Drop QP locks when associated to a PAD holding a TD. +--- + providers/hns/hns_roce_u.h | 2 +- + providers/hns/hns_roce_u_hw_v2.c | 26 ++++++++++------- + providers/hns/hns_roce_u_verbs.c | 49 +++++++++++++++++++++++++++++--- + 3 files changed, 61 insertions(+), 16 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5d3f480..5732e39 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -305,7 +305,7 @@ struct hns_roce_srq { + + struct hns_roce_wq { + unsigned long *wrid; +- pthread_spinlock_t lock; ++ struct hns_roce_spinlock hr_lock; + unsigned int wqe_cnt; + int max_post; + unsigned int head; +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index dd13049..90a76e2 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1270,7 +1270,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + return ret; + } + +- pthread_spin_lock(&qp->sq.lock); ++ hns_roce_spin_lock(&qp->sq.hr_lock); + + sge_info.start_idx = qp->next_sge; /* start index of extend sge */ + +@@ -1331,7 +1331,7 @@ out: + *(qp->sdb) = qp->sq.head & 0xffff; + } + +- pthread_spin_unlock(&qp->sq.lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); + + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; +@@ -1420,7 +1420,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + return ret; + } + +- pthread_spin_lock(&qp->rq.lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); + + max_sge = qp->rq.max_gs - qp->rq.rsv_sge; + for (nreq = 0; wr; ++nreq, wr = wr->next) { +@@ -1454,7 +1454,7 @@ out: + hns_roce_update_rq_db(ctx, ibvqp->qp_num, qp->rq.head); + } + +- pthread_spin_unlock(&qp->rq.lock); ++ hns_roce_spin_unlock(&qp->rq.hr_lock); + + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; +@@ -1549,8 +1549,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int ret; + + if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) { +- pthread_spin_lock(&hr_qp->sq.lock); +- pthread_spin_lock(&hr_qp->rq.lock); ++ hns_roce_spin_lock(&hr_qp->sq.hr_lock); ++ hns_roce_spin_lock(&hr_qp->rq.hr_lock); + flag = true; + } + +@@ -1561,8 +1561,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (flag) { + if (!ret) + qp->state = IBV_QPS_ERR; +- pthread_spin_unlock(&hr_qp->rq.lock); +- pthread_spin_unlock(&hr_qp->sq.lock); ++ hns_roce_spin_unlock(&hr_qp->sq.hr_lock); ++ hns_roce_spin_unlock(&hr_qp->rq.hr_lock); + } + + if (ret) +@@ -1640,6 +1640,7 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp) + static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + { + struct hns_roce_context *ctx = to_hr_ctx(ibqp->context); ++ struct hns_roce_pad *pad = to_hr_pad(ibqp->pd); + struct hns_roce_qp *qp = to_hr_qp(ibqp); + int ret; + +@@ -1666,6 +1667,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + + hns_roce_free_qp_buf(qp, ctx); + ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); ++ + free(qp); + + return ret; +@@ -2555,7 +2559,7 @@ static void wr_start(struct ibv_qp_ex *ibv_qp) + return; + } + +- pthread_spin_lock(&qp->sq.lock); ++ hns_roce_spin_lock(&qp->sq.hr_lock); + qp->sge_info.start_idx = qp->next_sge; + qp->rb_sq_head = qp->sq.head; + qp->err = 0; +@@ -2588,7 +2592,7 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp) + } + + out: +- pthread_spin_unlock(&qp->sq.lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); + if (ibv_qp->qp_base.state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE); +@@ -2603,7 +2607,7 @@ static void wr_abort(struct ibv_qp_ex *ibv_qp) + + qp->sq.head = qp->rb_sq_head; + +- pthread_spin_unlock(&qp->sq.lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); + } + + enum { +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index ecf8666..d503031 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1033,6 +1033,41 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx, + return verify_qp_create_cap(ctx, attr); + } + ++static int hns_roce_qp_spinlock_init(struct hns_roce_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp) ++{ ++ bool sq_need_lock; ++ bool rq_need_lock; ++ int ret; ++ ++ sq_need_lock = hns_roce_whether_need_lock(attr->pd); ++ if (!sq_need_lock) ++ verbs_info(&ctx->ibv_ctx, "configure sq as no lock.\n"); ++ ++ rq_need_lock = hns_roce_whether_need_lock(attr->pd); ++ if (!rq_need_lock) ++ verbs_info(&ctx->ibv_ctx, "configure rq as no lock.\n"); ++ ++ ret = hns_roce_spinlock_init(&qp->sq.hr_lock, sq_need_lock); ++ if (ret) ++ return ret; ++ ++ ret = hns_roce_spinlock_init(&qp->rq.hr_lock, rq_need_lock); ++ if (ret) { ++ hns_roce_spinlock_destroy(&qp->sq.hr_lock); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp) ++{ ++ hns_roce_spinlock_destroy(&qp->rq.hr_lock); ++ hns_roce_spinlock_destroy(&qp->sq.hr_lock); ++} ++ + static int alloc_recv_rinl_buf(uint32_t max_sge, + struct hns_roce_rinl_buf *rinl_buf) + { +@@ -1435,10 +1470,6 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, + { + int ret; + +- if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) || +- pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE)) +- return -ENOMEM; +- + ret = qp_alloc_wqe(&attr->cap, qp, ctx); + if (ret) + return ret; +@@ -1466,6 +1497,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + struct hnsdv_qp_init_attr *hns_attr) + { + struct hns_roce_context *context = to_hr_ctx(ibv_ctx); ++ struct hns_roce_pad *pad = to_hr_pad(attr->pd); + struct hns_roce_qp *qp; + uint64_t dwqe_mmap_key; + int ret; +@@ -1482,6 +1514,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + + hns_roce_set_qp_params(attr, qp, context); + ++ if (pad) ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ ++ ret = hns_roce_qp_spinlock_init(context, attr, qp); ++ if (ret) ++ goto err_spinlock; ++ + ret = hns_roce_alloc_qp_buf(attr, qp, context); + if (ret) + goto err_buf; +@@ -1515,6 +1554,8 @@ err_ops: + err_cmd: + hns_roce_free_qp_buf(qp, context); + err_buf: ++ hns_roce_qp_spinlock_destroy(qp); ++err_spinlock: + free(qp); + err: + if (ret < 0) +-- +2.30.0 + diff --git a/0009-libhns-Add-support-for-lock-free-CQ.patch b/0009-libhns-Add-support-for-lock-free-CQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..97873e02cf10068850eaeb9cb7f8f6fa2183135b --- /dev/null +++ b/0009-libhns-Add-support-for-lock-free-CQ.patch @@ -0,0 +1,273 @@ +From 41d0630d763bd39631331c76a9ecdbb245ce9502 Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 16:29:34 +0800 +Subject: [PATCH 09/10] libhns: Add support for lock-free CQ + +Drop CQ locks when associated to a PAD holding a TD. +--- + providers/hns/hns_roce_u.h | 3 +- + providers/hns/hns_roce_u_hw_v2.c | 46 +++++++++++++------------- + providers/hns/hns_roce_u_verbs.c | 56 ++++++++++++++++++++++++++++---- + 3 files changed, 74 insertions(+), 31 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5732e39..0035e36 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -255,7 +255,7 @@ struct hns_roce_pad { + struct hns_roce_cq { + struct verbs_cq verbs_cq; + struct hns_roce_buf buf; +- pthread_spinlock_t lock; ++ struct hns_roce_spinlock hr_lock; + unsigned int cqn; + unsigned int cq_depth; + unsigned int cons_index; +@@ -265,6 +265,7 @@ struct hns_roce_cq { + unsigned long flags; + unsigned int cqe_size; + struct hns_roce_v2_cqe *cqe; ++ struct ibv_pd *parent_domain; + }; + + struct hns_roce_idx_que { +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 90a76e2..2fb4d72 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -268,9 +268,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq, + if (cur + nreq < wq->max_post) + return 0; + +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + cur = wq->head - wq->tail; +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + + return cur + nreq >= wq->max_post; + } +@@ -724,7 +724,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + int err = V2_CQ_OK; + int npolled; + +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); +@@ -739,7 +739,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + update_cq_db(ctx, cq); + } + +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + + return err == V2_CQ_POLL_ERR ? err : npolled; + } +@@ -1510,9 +1510,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn, + struct hns_roce_srq *srq) + { +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + __hns_roce_v2_cq_clean(cq, qpn, srq); +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + } + + static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, +@@ -1600,18 +1600,18 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp) + + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { +- pthread_spin_lock(&send_cq->lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); + } else if (send_cq->cqn < recv_cq->cqn) { +- pthread_spin_lock(&send_cq->lock); +- pthread_spin_lock(&recv_cq->lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); ++ hns_roce_spin_lock(&recv_cq->hr_lock); + } else { +- pthread_spin_lock(&recv_cq->lock); +- pthread_spin_lock(&send_cq->lock); ++ hns_roce_spin_lock(&recv_cq->hr_lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); + } + } else if (send_cq) { +- pthread_spin_lock(&send_cq->lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); + } else if (recv_cq) { +- pthread_spin_lock(&recv_cq->lock); ++ hns_roce_spin_lock(&recv_cq->hr_lock); + } + } + +@@ -1622,18 +1622,18 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp) + + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { +- pthread_spin_unlock(&send_cq->lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); + } else if (send_cq->cqn < recv_cq->cqn) { +- pthread_spin_unlock(&recv_cq->lock); +- pthread_spin_unlock(&send_cq->lock); ++ hns_roce_spin_unlock(&recv_cq->hr_lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); + } else { +- pthread_spin_unlock(&send_cq->lock); +- pthread_spin_unlock(&recv_cq->lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); ++ hns_roce_spin_unlock(&recv_cq->hr_lock); + } + } else if (send_cq) { +- pthread_spin_unlock(&send_cq->lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); + } else if (recv_cq) { +- pthread_spin_unlock(&recv_cq->lock); ++ hns_roce_spin_unlock(&recv_cq->hr_lock); + } + } + +@@ -1811,11 +1811,11 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, + if (attr->comp_mask) + return EINVAL; + +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + + err = hns_roce_poll_one(ctx, &qp, cq, NULL); + if (err != V2_CQ_OK) +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + + return err; + } +@@ -1849,7 +1849,7 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current) + else + update_cq_db(ctx, cq); + +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + } + + static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current) +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index d503031..afde313 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -407,6 +407,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw) + return 0; + } + ++enum { ++ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS | ++ IBV_CQ_INIT_ATTR_MASK_PD, ++}; ++ + enum { + CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | + IBV_WC_EX_WITH_CVLAN, +@@ -415,21 +420,47 @@ enum { + static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + struct hns_roce_context *context) + { ++ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); ++ + if (!attr->cqe || attr->cqe > context->max_cqe) + return EINVAL; + +- if (attr->comp_mask) ++ if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) { ++ verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n", ++ attr->comp_mask); + return EOPNOTSUPP; ++ } + + if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) + return EOPNOTSUPP; + ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { ++ if (!pad) { ++ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); ++ return EINVAL; ++ } ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ } ++ + attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, + roundup_pow_of_two(attr->cqe)); + + return 0; + } + ++static int hns_roce_cq_spinlock_init(struct ibv_context *context, ++ struct hns_roce_cq *cq, ++ struct ibv_cq_init_attr_ex *attr) ++{ ++ bool need_lock; ++ ++ need_lock = hns_roce_whether_need_lock(attr->parent_domain); ++ if (!need_lock) ++ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n"); ++ ++ return hns_roce_spinlock_init(&cq->hr_lock, need_lock); ++} ++ + static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq) + { + int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size); +@@ -486,7 +517,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + goto err; + } + +- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) ++ cq->parent_domain = attr->parent_domain; ++ ++ ret = hns_roce_cq_spinlock_init(context, cq, attr); + if (ret) + goto err_lock; + +@@ -517,8 +551,9 @@ err_cmd: + hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); + err_db: + hns_roce_free_buf(&cq->buf); +-err_lock: + err_buf: ++ hns_roce_spinlock_destroy(&cq->hr_lock); ++err_lock: + free(cq); + err: + if (ret < 0) +@@ -569,16 +604,23 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) + + int hns_roce_u_destroy_cq(struct ibv_cq *cq) + { ++ struct hns_roce_cq *hr_cq = to_hr_cq(cq); ++ struct hns_roce_pad *pad = to_hr_pad(hr_cq->parent_domain); + int ret; + + ret = ibv_cmd_destroy_cq(cq); + if (ret) + return ret; + +- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db, +- HNS_ROCE_CQ_TYPE_DB); +- hns_roce_free_buf(&to_hr_cq(cq)->buf); +- free(to_hr_cq(cq)); ++ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB); ++ hns_roce_free_buf(&hr_cq->buf); ++ ++ hns_roce_spinlock_destroy(&hr_cq->hr_lock); ++ ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); ++ ++ free(hr_cq); + + return ret; + } +-- +2.30.0 + diff --git a/0010-libhns-Add-support-for-lock-free-SRQ.patch b/0010-libhns-Add-support-for-lock-free-SRQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..64e66973591dd4cfe2f5f5861632d2eaccbffb6e --- /dev/null +++ b/0010-libhns-Add-support-for-lock-free-SRQ.patch @@ -0,0 +1,148 @@ +From c252a18578d12fd27b726b7b376fbebc3f2c98c3 Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 16:33:48 +0800 +Subject: [PATCH 10/10] libhns: Add support for lock-free SRQ + +Drop SRQ locks when associated to a PAD holding a TD. +--- + providers/hns/hns_roce_u.h | 2 +- + providers/hns/hns_roce_u_hw_v2.c | 8 ++++---- + providers/hns/hns_roce_u_verbs.c | 31 +++++++++++++++++++++++++++++-- + 3 files changed, 34 insertions(+), 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 0035e36..21a6e28 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -292,7 +292,7 @@ struct hns_roce_srq { + struct hns_roce_idx_que idx_que; + struct hns_roce_buf wqe_buf; + struct hns_roce_rinl_buf srq_rinl_buf; +- pthread_spinlock_t lock; ++ struct hns_roce_spinlock hr_lock; + unsigned long *wrid; + unsigned int srqn; + unsigned int wqe_cnt; +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 2fb4d72..1d7a304 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -229,14 +229,14 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind) + uint32_t bitmap_num; + int bit_num; + +- pthread_spin_lock(&srq->lock); ++ hns_roce_spin_lock(&srq->hr_lock); + + bitmap_num = ind / BIT_CNT_PER_LONG; + bit_num = ind % BIT_CNT_PER_LONG; + srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); + srq->idx_que.tail++; + +- pthread_spin_unlock(&srq->lock); ++ hns_roce_spin_unlock(&srq->hr_lock); + } + + static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe, +@@ -1756,7 +1756,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + int ret = 0; + void *wqe; + +- pthread_spin_lock(&srq->lock); ++ hns_roce_spin_lock(&srq->hr_lock); + + max_sge = srq->max_gs - srq->rsv_sge; + for (nreq = 0; wr; ++nreq, wr = wr->next) { +@@ -1795,7 +1795,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + update_srq_db(ctx, &srq_db, srq); + } + +- pthread_spin_unlock(&srq->lock); ++ hns_roce_spin_unlock(&srq->hr_lock); + + return ret; + } +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index afde313..00e59dc 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -461,6 +461,19 @@ static int hns_roce_cq_spinlock_init(struct ibv_context *context, + return hns_roce_spinlock_init(&cq->hr_lock, need_lock); + } + ++static int hns_roce_srq_spinlock_init(struct ibv_context *context, ++ struct hns_roce_srq *srq, ++ struct ibv_srq_init_attr_ex *attr) ++{ ++ bool need_lock; ++ ++ need_lock = hns_roce_whether_need_lock(attr->pd); ++ if (!need_lock) ++ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n"); ++ ++ return hns_roce_spinlock_init(&srq->hr_lock, need_lock); ++} ++ + static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq) + { + int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size); +@@ -830,6 +843,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + struct ibv_srq_init_attr_ex *init_attr) + { + struct hns_roce_context *hr_ctx = to_hr_ctx(context); ++ struct hns_roce_pad *pad = to_hr_pad(init_attr->pd); + struct hns_roce_srq *srq; + int ret; + +@@ -843,12 +857,15 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + goto err; + } + +- if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) ++ if (pad) ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ ++ if (hns_roce_srq_spinlock_init(context, srq, init_attr)) + goto err_free_srq; + + set_srq_param(context, srq, init_attr); + if (alloc_srq_buf(srq)) +- goto err_free_srq; ++ goto err_destroy_lock; + + srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB); + if (!srq->rdb) +@@ -879,6 +896,9 @@ err_srq_db: + err_srq_buf: + free_srq_buf(srq); + ++err_destroy_lock: ++ hns_roce_spinlock_destroy(&srq->hr_lock); ++ + err_free_srq: + free(srq); + +@@ -943,6 +963,7 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) + int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + { + struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context); ++ struct hns_roce_pad *pad = to_hr_pad(ibv_srq->pd); + struct hns_roce_srq *srq = to_hr_srq(ibv_srq); + int ret; + +@@ -954,6 +975,12 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + + hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB); + free_srq_buf(srq); ++ ++ hns_roce_spinlock_destroy(&srq->hr_lock); ++ ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); ++ + free(srq); + + return 0; +-- +2.30.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 01b45ffdcfbd43d8f5dd1a53c96cb9f4d4efb25d..2d77d03feb910540db4d768ef9e8c32f0ad4010d 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 50.0 -Release: 3 +Release: 4 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -12,6 +12,10 @@ patch3: 0003-Update-kernel-headers.patch patch4: 0004-libhns-Introduce-hns-direct-verbs.patch patch5: 0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch patch6: 0006-libhns-Support-congestion-control-algorithm-configur.patch +patch7: 0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch +patch8: 0008-libhns-Add-support-for-lock-free-QP.patch +patch9: 0009-libhns-Add-support-for-lock-free-CQ.patch +patch10: 0010-libhns-Add-support-for-lock-free-SRQ.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -590,6 +594,12 @@ fi %{_mandir}/* %changelog +* Thu Mar 21 2024 Ran Zhou - 50.0-4 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support td lock-free + * Tue Mar 12 2024 Ran Zhou - 50.0-3 - Type: requirement - ID: NA