diff --git a/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch b/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch new file mode 100644 index 0000000000000000000000000000000000000000..f68570161a76d4254ad420909beaa5c581689964 --- /dev/null +++ b/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch @@ -0,0 +1,70 @@ +From 5cc1a047c4d71ced86b0f71f66adf12475a3c788 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Tue, 11 May 2021 19:06:35 +0800 +Subject: libhns: Bugfix for checking whether the SRQ is full when posting WR + +If the user post a list of WRs, the head in the for loop is not updated in +time, and the judgment of if (head == tail) becomes invalid. + +Signed-off-by: Wenpeng Liang +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u_hw_v2.c | 17 +++++++++++++---- + providers/hns/hns_roce_u_verbs.c | 2 +- + 2 files changed, 14 insertions(+), 5 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 82124082..0c15bdbe 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1527,6 +1527,15 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + return ret; + } + ++static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) ++{ ++ struct hns_roce_idx_que *idx_que = &srq->idx_que; ++ unsigned int cur; ++ ++ cur = idx_que->head - idx_que->tail; ++ return cur >= srq->wqe_cnt - 1; ++} ++ + static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) + { + struct hns_roce_idx_que *idx_que = &srq->idx_que; +@@ -1577,14 +1586,14 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + + max_sge = srq->max_gs - srq->rsv_sge; + for (nreq = 0; wr; ++nreq, wr = wr->next) { +- if (wr->num_sge > max_sge) { +- ret = -EINVAL; ++ if (hns_roce_v2_srqwq_overflow(srq)) { ++ ret = -ENOMEM; + *bad_wr = wr; + break; + } + +- if (srq->idx_que.head == srq->idx_que.tail) { +- ret = -ENOMEM; ++ if (wr->num_sge > max_sge) { ++ ret = -EINVAL; + *bad_wr = wr; + break; + } +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 3abf7b48..dace35fd 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -492,7 +492,7 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) + idx_que->bitmap[i] = ~(0UL); + + idx_que->head = 0; +- idx_que->tail = srq->wqe_cnt - 1; ++ idx_que->tail = 0; + + return 0; + } +-- +2.30.0 + diff --git a/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch b/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch new file mode 100644 index 0000000000000000000000000000000000000000..205419a31bcf5e0423f96d0f3e4f32ebdef1a880 --- /dev/null +++ b/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch @@ -0,0 +1,30 @@ +From a79800afbbc48e5c5274bf3fc0e890705b3a596d Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Tue, 11 May 2021 19:06:36 +0800 +Subject: libhns: Allow users to create a 0-depth SRQs + +Users is allowed to create 0-depth SRQs, so the judgement about whether +max_wr is zero should be removed. + +Signed-off-by: Wenpeng Liang +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u_verbs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index dace35fd..2d1a6de3 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -525,7 +525,7 @@ static int hns_roce_verify_srq(struct hns_roce_context *context, + init_attr->srq_type != IBV_SRQT_XRC) + return -EINVAL; + +- if (!init_attr->attr.max_wr || !init_attr->attr.max_sge || ++ if (!init_attr->attr.max_sge || + init_attr->attr.max_wr > context->max_srq_wr || + init_attr->attr.max_sge > context->max_srq_sge) + return -EINVAL; +-- +2.30.0 + diff --git a/0011-libhns-Refactor-the-process-of-post_srq_recv.patch b/0011-libhns-Refactor-the-process-of-post_srq_recv.patch new file mode 100644 index 0000000000000000000000000000000000000000..693fc83c6a6e89b0da1833723d468213adccd2bf --- /dev/null +++ b/0011-libhns-Refactor-the-process-of-post_srq_recv.patch @@ -0,0 +1,176 @@ +From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Tue, 11 May 2021 19:06:37 +0800 +Subject: libhns: Refactor the process of post_srq_recv + +SRQ is a shared queue, it mainly consists of four parts: + +1. wqe buf: wqe buf is used to store wqe data. + +2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so +the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO +queue, it can instruct the hardware to obtain the corresponding WQE. + +3.bitmap: bitmap is used to generate and release wqe_idx. When the user +has a new WR, the driver finds the idx of the idle wqe in bitmap. When the +CQE of wqe is generated, the driver releases the idx. + +4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it +to the user when ibv_poll_cq() is invoked. + +After refactor, the functions of the four parts are more clearer. + +Signed-off-by: Wenpeng Liang +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++------------- + 1 file changed, 57 insertions(+), 38 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 0c15bdbe..b622eaef 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) + return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); + } + +-static void *get_srq_wqe(struct hns_roce_srq *srq, int n) ++static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) + { + return srq->buf.buf + (n << srq->wqe_shift); + } +@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) + return cur >= srq->wqe_cnt - 1; + } + +-static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) ++static int check_post_srq_valid(struct hns_roce_srq *srq, ++ struct ibv_recv_wr *wr) ++{ ++ unsigned int max_sge = srq->max_gs - srq->rsv_sge; ++ ++ if (hns_roce_v2_srqwq_overflow(srq)) ++ return -ENOMEM; ++ ++ if (wr->num_sge > max_sge) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx) + { + struct hns_roce_idx_que *idx_que = &srq->idx_que; + int bit_num; +@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) + return 0; + } + ++static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx, ++ struct ibv_recv_wr *wr) ++{ ++ struct hns_roce_v2_wqe_data_seg *dseg; ++ int i; ++ ++ dseg = get_srq_wqe(srq, wqe_idx); ++ ++ for (i = 0; i < wr->num_sge; ++i) { ++ dseg[i].len = htole32(wr->sg_list[i].length); ++ dseg[i].lkey = htole32(wr->sg_list[i].lkey); ++ dseg[i].addr = htole64(wr->sg_list[i].addr); ++ } ++ ++ /* hw stop reading when identify the last one */ ++ if (srq->rsv_sge) { ++ dseg[i].len = htole32(INVALID_SGE_LENGTH); ++ dseg[i].lkey = htole32(0x0); ++ dseg[i].addr = 0; ++ } ++} ++ ++static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) ++{ ++ struct hns_roce_idx_que *idx_que = &srq->idx_que; ++ unsigned int head; ++ __le32 *idx_buf; ++ ++ head = idx_que->head & (srq->wqe_cnt - 1); ++ ++ idx_buf = get_idx_buf(idx_que, head); ++ *idx_buf = htole32(wqe_idx); ++ ++ idx_que->head++; ++} ++ + static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) + { + struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context); + struct hns_roce_srq *srq = to_hr_srq(ib_srq); +- struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_db srq_db; +- unsigned int max_sge; +- __le32 *srq_idx; ++ unsigned int wqe_idx; + int ret = 0; +- int wqe_idx; +- void *wqe; + int nreq; +- int ind; +- int i; + + pthread_spin_lock(&srq->lock); + +- /* current idx of srqwq */ +- ind = srq->idx_que.head & (srq->wqe_cnt - 1); +- +- max_sge = srq->max_gs - srq->rsv_sge; + for (nreq = 0; wr; ++nreq, wr = wr->next) { +- if (hns_roce_v2_srqwq_overflow(srq)) { +- ret = -ENOMEM; +- *bad_wr = wr; +- break; +- } +- +- if (wr->num_sge > max_sge) { +- ret = -EINVAL; ++ ret = check_post_srq_valid(srq, wr); ++ if (ret) { + *bad_wr = wr; + break; + } +@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + break; + } + +- wqe = get_srq_wqe(srq, wqe_idx); +- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; +- +- for (i = 0; i < wr->num_sge; ++i) { +- set_data_seg_v2(dseg, wr->sg_list + i); +- dseg++; +- } +- +- /* hw stop reading when identify the last one */ +- if (srq->rsv_sge) +- set_ending_data_seg(dseg); +- +- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind); +- *srq_idx = htole32(wqe_idx); ++ fill_srq_wqe(srq, wqe_idx, wr); ++ fill_wqe_idx(srq, wqe_idx); + + srq->wrid[wqe_idx] = wr->wr_id; +- ind = (ind + 1) & (srq->wqe_cnt - 1); + } + + if (nreq) { +- srq->idx_que.head += nreq; +- + /* + * Make sure that descriptors are written before + * we write doorbell record. +-- +2.30.0 + diff --git a/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch b/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..b8569f2d05dbba8d78d918fbfcc8473c4eb25e27 --- /dev/null +++ b/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch @@ -0,0 +1,33 @@ +From a18b0ee409d3382aa556b8f06a6cd6bfbef3f5c8 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Tue, 11 May 2021 19:06:38 +0800 +Subject: libhns: Set srqlimit to 0 when creating SRQ + +According to the IB specification, the srq_limt parameter should not be +configured when creating srq. But the libhns does not set attr.srq_limit +to 0 currently. As a result, when attr.srq_limit provided by the user is +not 0, the value of attr.srq_limit returned to the user will be different +from that obtained by ibv_query_srq(). Therefore, the driver should set +attr.srq_limit to 0 when creating SRQ. + +Signed-off-by: Wenpeng Liang +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u_verbs.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 2d1a6de3..107da753 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -580,6 +580,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1); + srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); + attr->max_sge = srq->max_gs; ++ attr->srq_limit = 0; + + ret = hns_roce_create_idx_que(srq); + if (ret) +-- +2.30.0 + diff --git a/0013-libhns-Refactor-the-process-of-create_srq.patch b/0013-libhns-Refactor-the-process-of-create_srq.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2a08551149c1019c9cad344a605067dab7d299d --- /dev/null +++ b/0013-libhns-Refactor-the-process-of-create_srq.patch @@ -0,0 +1,367 @@ +From b914c76318f5b95e3157c3cbf1ccb49ec6d27635 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Tue, 11 May 2021 19:06:39 +0800 +Subject: libhns: Refactor the process of create_srq + +Reorganize create_srq() as several sub-functions to make the process +clearer. + +Signed-off-by: Wenpeng Liang +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u.h | 7 +- + providers/hns/hns_roce_u_hw_v2.c | 2 +- + providers/hns/hns_roce_u_verbs.c | 178 ++++++++++++++++++------------- + 3 files changed, 105 insertions(+), 82 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index b3f48113..a437727c 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -211,7 +211,8 @@ struct hns_roce_idx_que { + + struct hns_roce_srq { + struct verbs_srq verbs_srq; +- struct hns_roce_buf buf; ++ struct hns_roce_idx_que idx_que; ++ struct hns_roce_buf wqe_buf; + pthread_spinlock_t lock; + unsigned long *wrid; + unsigned int srqn; +@@ -221,7 +222,6 @@ struct hns_roce_srq { + unsigned int wqe_shift; + unsigned int *db; + unsigned short counter; +- struct hns_roce_idx_que idx_que; + }; + + struct hns_roce_wq { +@@ -343,8 +343,7 @@ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) + + static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq) + { +- return container_of(container_of(ibv_srq, struct verbs_srq, srq), +- struct hns_roce_srq, verbs_srq); ++ return container_of(ibv_srq, struct hns_roce_srq, verbs_srq.srq); + } + + static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp) +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index b622eaef..d4e7e4f9 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -244,7 +244,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) + + static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) + { +- return srq->buf.buf + (n << srq->wqe_shift); ++ return srq->wqe_buf.buf + (n << srq->wqe_shift); + } + + static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 107da753..75b9e530 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -432,17 +432,23 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx, + uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >> + ctx->srq_table_shift; + ++ pthread_mutex_lock(&ctx->srq_table_mutex); ++ + if (!ctx->srq_table[tind].refcnt) { + ctx->srq_table[tind].table = + calloc(ctx->srq_table_mask + 1, + sizeof(struct hns_roce_srq *)); +- if (!ctx->srq_table[tind].table) ++ if (!ctx->srq_table[tind].table) { ++ pthread_mutex_unlock(&ctx->srq_table_mutex); + return -ENOMEM; ++ } + } + + ++ctx->srq_table[tind].refcnt; + ctx->srq_table[tind].table[srq->srqn & ctx->srq_table_mask] = srq; + ++ pthread_mutex_unlock(&ctx->srq_table_mutex); ++ + return 0; + } + +@@ -461,13 +467,46 @@ static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn) + { + uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift; + ++ pthread_mutex_lock(&ctx->srq_table_mutex); ++ + if (!--ctx->srq_table[tind].refcnt) + free(ctx->srq_table[tind].table); + else + ctx->srq_table[tind].table[srqn & ctx->srq_table_mask] = NULL; ++ ++ pthread_mutex_unlock(&ctx->srq_table_mutex); ++} ++ ++static int verify_srq_create_attr(struct hns_roce_context *context, ++ struct ibv_srq_init_attr_ex *attr) ++{ ++ if (attr->srq_type != IBV_SRQT_BASIC && ++ attr->srq_type != IBV_SRQT_XRC) ++ return -EINVAL; ++ ++ if (!attr->attr.max_sge || ++ attr->attr.max_wr > context->max_srq_wr || ++ attr->attr.max_sge > context->max_srq_sge) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq, ++ struct ibv_srq_init_attr_ex *attr) ++{ ++ if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) ++ srq->rsv_sge = 1; ++ ++ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1); ++ srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge); ++ srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * ++ srq->max_gs)); ++ attr->attr.max_sge = srq->max_gs; ++ attr->attr.srq_limit = 0; + } + +-static int hns_roce_create_idx_que(struct hns_roce_srq *srq) ++static int alloc_srq_idx_que(struct hns_roce_srq *srq) + { + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int buf_size; +@@ -478,13 +517,13 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) + BIT_CNT_PER_LONG; + idx_que->bitmap = calloc(idx_que->bitmap_cnt, sizeof(unsigned long)); + if (!idx_que->bitmap) +- return ENOMEM; ++ return -ENOMEM; + + buf_size = to_hr_hem_entries_size(srq->wqe_cnt, idx_que->entry_shift); + if (hns_roce_alloc_buf(&idx_que->buf, buf_size, HNS_HW_PAGE_SIZE)) { + free(idx_que->bitmap); + idx_que->bitmap = NULL; +- return ENOMEM; ++ return -ENOMEM; + } + + /* init the idx_que bitmap */ +@@ -497,40 +536,48 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) + return 0; + } + +-static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq) ++static int alloc_srq_wqe_buf(struct hns_roce_srq *srq) + { +- int srq_buf_size; ++ int buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); + +- srq->wrid = calloc(srq->wqe_cnt, sizeof(unsigned long)); +- if (!srq->wrid) +- return ENOMEM; ++ return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE); ++} + +- srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * +- srq->max_gs)); +- srq_buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); ++static int alloc_srq_buf(struct hns_roce_srq *srq) ++{ ++ int ret; + +- /* allocate srq wqe buf */ +- if (hns_roce_alloc_buf(&srq->buf, srq_buf_size, HNS_HW_PAGE_SIZE)) { +- free(srq->wrid); +- return ENOMEM; ++ ret = alloc_srq_idx_que(srq); ++ if (ret) ++ return ret; ++ ++ ret = alloc_srq_wqe_buf(srq); ++ if (ret) ++ goto err_idx_que; ++ ++ srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid)); ++ if (!srq->wrid) { ++ ret = -ENOMEM; ++ goto err_wqe_buf; + } + + return 0; +-} + +-static int hns_roce_verify_srq(struct hns_roce_context *context, +- struct ibv_srq_init_attr_ex *init_attr) +-{ +- if (init_attr->srq_type != IBV_SRQT_BASIC && +- init_attr->srq_type != IBV_SRQT_XRC) +- return -EINVAL; ++err_wqe_buf: ++ hns_roce_free_buf(&srq->wqe_buf); ++err_idx_que: ++ hns_roce_free_buf(&srq->idx_que.buf); ++ free(srq->idx_que.bitmap); + +- if (!init_attr->attr.max_sge || +- init_attr->attr.max_wr > context->max_srq_wr || +- init_attr->attr.max_sge > context->max_srq_sge) +- return -EINVAL; ++ return ret; ++} + +- return 0; ++static void free_srq_buf(struct hns_roce_srq *srq) ++{ ++ free(srq->wrid); ++ hns_roce_free_buf(&srq->wqe_buf); ++ hns_roce_free_buf(&srq->idx_que.buf); ++ free(srq->idx_que.bitmap); + } + + static int exec_srq_create_cmd(struct ibv_context *context, +@@ -541,7 +588,7 @@ static int exec_srq_create_cmd(struct ibv_context *context, + struct hns_roce_create_srq_ex cmd_ex = {}; + int ret; + +- cmd_ex.buf_addr = (uintptr_t)srq->buf.buf; ++ cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf; + cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf; + cmd_ex.db_addr = (uintptr_t)srq->db; + +@@ -559,57 +606,44 @@ static int exec_srq_create_cmd(struct ibv_context *context, + static struct ibv_srq *create_srq(struct ibv_context *context, + struct ibv_srq_init_attr_ex *init_attr) + { +- struct hns_roce_context *ctx = to_hr_ctx(context); +- struct ibv_srq_attr *attr = &init_attr->attr; ++ struct hns_roce_context *hr_ctx = to_hr_ctx(context); + struct hns_roce_srq *srq; + int ret; + +- if (hns_roce_verify_srq(ctx, init_attr)) +- return NULL; ++ ret = verify_srq_create_attr(hr_ctx, init_attr); ++ if (ret) ++ goto err; + + srq = calloc(1, sizeof(*srq)); +- if (!srq) +- return NULL; ++ if (!srq) { ++ ret = -ENOMEM; ++ goto err; ++ } + + if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) + goto err_free_srq; + +- if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) +- srq->rsv_sge = 1; +- +- srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1); +- srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); +- attr->max_sge = srq->max_gs; +- attr->srq_limit = 0; +- +- ret = hns_roce_create_idx_que(srq); +- if (ret) ++ set_srq_param(context, srq, init_attr); ++ if (alloc_srq_buf(srq)) + goto err_free_srq; + +- ret = hns_roce_alloc_srq_buf(srq); +- if (ret) +- goto err_idx_que; +- +- srq->db = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB); ++ srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB); + if (!srq->db) + goto err_srq_buf; + +- *(srq->db) = 0; +- +- pthread_mutex_lock(&ctx->srq_table_mutex); ++ *srq->db = 0; + + ret = exec_srq_create_cmd(context, srq, init_attr); + if (ret) + goto err_srq_db; + +- ret = hns_roce_store_srq(ctx, srq); ++ ret = hns_roce_store_srq(hr_ctx, srq); + if (ret) + goto err_destroy_srq; + +- pthread_mutex_unlock(&ctx->srq_table_mutex); +- +- srq->max_gs = attr->max_sge; +- attr->max_sge = min(attr->max_sge - srq->rsv_sge, ctx->max_srq_sge); ++ srq->max_gs = init_attr->attr.max_sge; ++ init_attr->attr.max_sge = ++ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge); + + return &srq->verbs_srq.srq; + +@@ -617,20 +651,19 @@ err_destroy_srq: + ibv_cmd_destroy_srq(&srq->verbs_srq.srq); + + err_srq_db: +- pthread_mutex_unlock(&ctx->srq_table_mutex); +- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB); ++ hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB); + + err_srq_buf: +- free(srq->wrid); +- hns_roce_free_buf(&srq->buf); +- +-err_idx_que: +- free(srq->idx_que.bitmap); +- hns_roce_free_buf(&srq->idx_que.buf); ++ free_srq_buf(srq); + + err_free_srq: + free(srq); + ++err: ++ if (ret < 0) ++ ret = -ret; ++ ++ errno = ret; + return NULL; + } + +@@ -690,23 +723,14 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + struct hns_roce_srq *srq = to_hr_srq(ibv_srq); + int ret; + +- pthread_mutex_lock(&ctx->srq_table_mutex); +- + ret = ibv_cmd_destroy_srq(ibv_srq); +- if (ret) { +- pthread_mutex_unlock(&ctx->srq_table_mutex); ++ if (ret) + return ret; +- } + + hns_roce_clear_srq(ctx, srq->srqn); + +- pthread_mutex_unlock(&ctx->srq_table_mutex); +- + hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB); +- hns_roce_free_buf(&srq->buf); +- free(srq->wrid); +- hns_roce_free_buf(&srq->idx_que.buf); +- free(srq->idx_que.bitmap); ++ free_srq_buf(srq); + free(srq); + + return 0; +-- +2.30.0 + diff --git a/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch b/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..31dbe9f58e3dc7cc13440fb781b683fae674b3c9 --- /dev/null +++ b/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch @@ -0,0 +1,69 @@ +From d68ac72a8e4f2cf9754d3fcbbb8ff2a03e514c2f Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Tue, 11 May 2021 19:06:40 +0800 +Subject: libhns: Remove the reserved wqe of SRQ + +There is an unreasonable reserved WQE in SRQ, it should be removed. + +Signed-off-by: Wenpeng Liang +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u.h | 1 + + providers/hns/hns_roce_u_hw_v2.c | 4 +--- + providers/hns/hns_roce_u_verbs.c | 5 ++++- + 3 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index a437727c..0d7abd81 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -64,6 +64,7 @@ + #define HNS_ROCE_MIN_CQE_NUM 0x40 + #define HNS_ROCE_V1_MIN_WQE_NUM 0x20 + #define HNS_ROCE_V2_MIN_WQE_NUM 0x40 ++#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 + + #define HNS_ROCE_CQE_SIZE 0x20 + #define HNS_ROCE_V3_CQE_SIZE 0x40 +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index d4e7e4f9..2fb6cdaf 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1530,10 +1530,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) + { + struct hns_roce_idx_que *idx_que = &srq->idx_que; +- unsigned int cur; + +- cur = idx_que->head - idx_que->tail; +- return cur >= srq->wqe_cnt - 1; ++ return idx_que->head - idx_que->tail >= srq->wqe_cnt; + } + + static int check_post_srq_valid(struct hns_roce_srq *srq, +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 75b9e530..4847639b 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -489,6 +489,9 @@ static int verify_srq_create_attr(struct hns_roce_context *context, + attr->attr.max_sge > context->max_srq_sge) + return -EINVAL; + ++ attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr, ++ HNS_ROCE_MIN_SRQ_WQE_NUM); ++ + return 0; + } + +@@ -498,7 +501,7 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq, + if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) + srq->rsv_sge = 1; + +- srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1); ++ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr); + srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge); + srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * + srq->max_gs)); +-- +2.30.0 + diff --git a/0015-libhns-Refactor-process-of-setting-extended-sge.patch b/0015-libhns-Refactor-process-of-setting-extended-sge.patch new file mode 100644 index 0000000000000000000000000000000000000000..3555177f7d5be107ed0fbb016b841d0e1557bc26 --- /dev/null +++ b/0015-libhns-Refactor-process-of-setting-extended-sge.patch @@ -0,0 +1,89 @@ +From 11c81d0e3a987f95b74e03b5e592a45029302f1d Mon Sep 17 00:00:00 2001 +From: Weihang Li +Date: Fri, 14 May 2021 10:02:56 +0800 +Subject: libhns: Refactor process of setting extended sge + +Refactor and encapsulate the parts of getting number of extended sge a WQE +can use to make it easier to understand. + +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++------------ + 1 file changed, 29 insertions(+), 16 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 30ab072a..a8508fc5 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -920,31 +920,44 @@ err_alloc: + return -ENOMEM; + } + +-static void set_extend_sge_param(struct hns_roce_device *hr_dev, +- struct ibv_qp_init_attr_ex *attr, +- struct hns_roce_qp *qp, unsigned int wr_cnt) ++static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) + { +- int cnt = 0; ++ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD) ++ return qp->sq.max_gs; ++ ++ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) ++ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; ++ ++ return 0; ++} ++ ++static void set_ext_sge_param(struct hns_roce_device *hr_dev, ++ struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp, unsigned int wr_cnt) ++{ ++ unsigned int total_sge_cnt; ++ unsigned int wqe_sge_cnt; ++ ++ qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; + + if (hr_dev->hw_version == HNS_ROCE_HW_VER1) { + qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; +- } else { +- qp->sq.max_gs = attr->cap.max_send_sge; +- if (attr->qp_type == IBV_QPT_UD) +- cnt = roundup_pow_of_two(wr_cnt * qp->sq.max_gs); +- else if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) +- cnt = roundup_pow_of_two(wr_cnt * +- (qp->sq.max_gs - +- HNS_ROCE_SGE_IN_WQE)); ++ return; + } + +- qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; ++ qp->sq.max_gs = attr->cap.max_send_sge; ++ ++ wqe_sge_cnt = get_wqe_ext_sge_cnt(qp); + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ +- qp->ex_sge.sge_cnt = cnt ? +- max(cnt, HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0; ++ if (wqe_sge_cnt) { ++ total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt); ++ qp->ex_sge.sge_cnt = ++ max(total_sge_cnt, ++ (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); ++ } + } + + static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, +@@ -988,7 +1001,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, + qp->sq.wqe_cnt = cnt; + qp->sq.shift = hr_ilog32(cnt); + +- set_extend_sge_param(hr_dev, attr, qp, cnt); ++ set_ext_sge_param(hr_dev, attr, qp, cnt); + + qp->sq.max_post = min(ctx->max_qp_wr, cnt); + qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs); +-- +2.30.0 + diff --git a/0016-libhns-Optimize-set_sge-process.patch b/0016-libhns-Optimize-set_sge-process.patch new file mode 100644 index 0000000000000000000000000000000000000000..7754801a38d74427e2fee563dcc75f61571c2612 --- /dev/null +++ b/0016-libhns-Optimize-set_sge-process.patch @@ -0,0 +1,139 @@ +From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001 +From: Lang Cheng +Date: Fri, 14 May 2021 10:02:57 +0800 +Subject: libhns: Optimize set_sge process + +Use local variables to avoid frequent ldr/str operations. And because UD's +process of setting sge is more simple then RC, set_sge() can be splited +into two functions for compiler optimization. + +Signed-off-by: Lang Cheng +Signed-off-by: Weihang Li +--- + providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++--------- + 1 file changed, 61 insertions(+), 22 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 4988943a..dc79a6f8 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx) + return 0; + } + +-static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg, +- struct hns_roce_qp *qp, struct ibv_send_wr *wr, +- struct hns_roce_sge_info *sge_info) ++static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg, ++ struct hns_roce_qp *qp, struct ibv_send_wr *wr, ++ struct hns_roce_sge_info *sge_info) + { ++ uint32_t mask = qp->ex_sge.sge_cnt - 1; ++ uint32_t index = sge_info->start_idx; ++ struct ibv_sge *sge = wr->sg_list; ++ uint32_t len = 0; ++ uint32_t cnt = 0; ++ int flag; + int i; + +- sge_info->valid_num = 0; +- sge_info->total_len = 0; ++ flag = (wr->send_flags & IBV_SEND_INLINE && ++ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD && ++ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP); + +- for (i = 0; i < wr->num_sge; i++) { +- if (unlikely(!wr->sg_list[i].length)) ++ for (i = 0; i < wr->num_sge; i++, sge++) { ++ if (unlikely(!sge->length)) + continue; + +- sge_info->total_len += wr->sg_list[i].length; +- sge_info->valid_num++; ++ len += sge->length; ++ cnt++; + +- if (wr->send_flags & IBV_SEND_INLINE && +- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD && +- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP) ++ if (flag) + continue; + +- /* No inner sge in UD wqe */ +- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE && +- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) { +- set_data_seg_v2(dseg, wr->sg_list + i); ++ if (cnt <= HNS_ROCE_SGE_IN_WQE) { ++ set_data_seg_v2(dseg, sge); + dseg++; + } else { +- dseg = get_send_sge_ex(qp, sge_info->start_idx & +- (qp->ex_sge.sge_cnt - 1)); +- set_data_seg_v2(dseg, wr->sg_list + i); +- sge_info->start_idx++; ++ dseg = get_send_sge_ex(qp, index & mask); ++ set_data_seg_v2(dseg, sge); ++ index++; + } + } ++ ++ sge_info->start_idx = index; ++ sge_info->valid_num = cnt; ++ sge_info->total_len = len; ++} ++ ++static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg, ++ struct hns_roce_qp *qp, struct ibv_send_wr *wr, ++ struct hns_roce_sge_info *sge_info) ++{ ++ int flag = wr->send_flags & IBV_SEND_INLINE; ++ uint32_t mask = qp->ex_sge.sge_cnt - 1; ++ uint32_t index = sge_info->start_idx; ++ struct ibv_sge *sge = wr->sg_list; ++ uint32_t len = 0; ++ uint32_t cnt = 0; ++ int i; ++ ++ for (i = 0; i < wr->num_sge; i++, sge++) { ++ if (unlikely(!sge->length)) ++ continue; ++ ++ len += sge->length; ++ cnt++; ++ ++ if (flag) ++ continue; ++ ++ /* No inner sge in UD wqe */ ++ dseg = get_send_sge_ex(qp, index & mask); ++ set_data_seg_v2(dseg, sge); ++ index++; ++ } ++ ++ sge_info->start_idx = index; ++ sge_info->valid_num = cnt; ++ sge_info->total_len = len; + } + + static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, +@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, + UD_SQ_WQE_MSG_START_SGE_IDX_S, + sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); + +- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); ++ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); + + ud_sq_wqe->msg_len = htole32(sge_info->total_len); + +@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + wqe += sizeof(struct hns_roce_rc_sq_wqe); + dseg = wqe; + +- set_sge(dseg, qp, wr, sge_info); ++ set_rc_sge(dseg, qp, wr, sge_info); + + rc_sq_wqe->msg_len = htole32(sge_info->total_len); + +-- +2.30.0 + diff --git a/rdma-core.spec b/rdma-core.spec index a90f81deeefbadf34ef478a853f75eceb50f61f7..436a5067f6b5de3ccee218278d80330646e55d88 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 35.1 -Release: 2 +Release: 3 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -15,6 +15,14 @@ Patch5: 0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch Patch7: 0007-libhns-Add-support-for-direct-wqe.patch Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch +Patch9: 0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch +Patch10: 0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch +Patch11: 0011-libhns-Refactor-the-process-of-post_srq_recv.patch +Patch12: 0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch +Patch13: 0013-libhns-Refactor-the-process-of-create_srq.patch +Patch14: 0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch +Patch15: 0015-libhns-Refactor-process-of-setting-extended-sge.patch +Patch16: 0016-libhns-Optimize-set_sge-process.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -259,6 +267,12 @@ fi %{_mandir}/* %changelog +* Mon Jul 11 2022 luozhengfeng - 35.1-3 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: bugfix and refactor for hns SRQ and SGE + * Mon Jan 10 2022 tangchengchang - 35.1-2 - Type: requirement - ID: NA