diff --git a/0001-Update-kernel-headers.patch b/0001-Update-kernel-headers.patch deleted file mode 100644 index 8a96d70a8fcd4636b0d88dce6d2972929c71ea39..0000000000000000000000000000000000000000 --- a/0001-Update-kernel-headers.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 693d55e80976217215844258e5b78bc115382689 Mon Sep 17 00:00:00 2001 -From: Guofeng Yue -Date: Mon, 10 Jan 2022 10:44:23 +0800 -Subject: [PATCH 1/8] Update kernel headers - -To commit 62c4d8878d13 ("RDMA/hns: Remove support for HIP06"). - -Signed-off-by: Guofeng Yue ---- - kernel-headers/rdma/hns-abi.h | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h -index 42b17765..abfd36e2 100644 ---- a/kernel-headers/rdma/hns-abi.h -+++ b/kernel-headers/rdma/hns-abi.h -@@ -77,17 +77,19 @@ enum hns_roce_qp_cap_flags { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, - HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, - HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, -+ HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, - }; - - struct hns_roce_ib_create_qp_resp { - __aligned_u64 cap_flags; -+ __aligned_u64 dwqe_mmap_key; - }; - - struct hns_roce_ib_alloc_ucontext_resp { - __u32 qp_tab_size; - __u32 cqe_size; -- __u32 srq_tab_size; -- __u32 reserved; -+ __u32 srq_tab_size; -+ __u32 reserved; - }; - - struct hns_roce_ib_alloc_pd_resp { --- -2.33.0 - diff --git a/0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch b/0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch deleted file mode 100644 index 7dcc9b88e9113c24a20889e1f8de7449bce436de..0000000000000000000000000000000000000000 --- a/0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 08ec3c43bf9710fdf3ca664f7cd63436e67339d7 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:34 +0800 -Subject: [PATCH 2/8] libhns: Fix the ownership of the head/tail pointer of SRQ - WQE - -The CQE of SRQ is not generated in the order of wqe, so the wqe_idx -corresponding to the idle WQE should be placed in a FIFO, then the hardware -will be instructed to obtain the corresponding WQE. Therefore, the WQ -of SRQ has no concept of head pointer and tail pointer, but the queue of -wqe_idx does. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u.h | 4 ++-- - providers/hns/hns_roce_u_hw_v2.c | 12 ++++++------ - providers/hns/hns_roce_u_verbs.c | 6 +++--- - 3 files changed, 11 insertions(+), 11 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 8f805dd1..b3f48113 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -205,6 +205,8 @@ struct hns_roce_idx_que { - int entry_shift; - unsigned long *bitmap; - int bitmap_cnt; -+ unsigned int head; -+ unsigned int tail; - }; - - struct hns_roce_srq { -@@ -217,8 +219,6 @@ struct hns_roce_srq { - unsigned int max_gs; - unsigned int rsv_sge; - unsigned int wqe_shift; -- int head; -- int tail; - unsigned int *db; - unsigned short counter; - struct hns_roce_idx_que idx_que; -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 4988943a..f947dbd7 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -262,7 +262,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind) - bitmap_num = ind / BIT_CNT_PER_LONG; - bit_num = ind % BIT_CNT_PER_LONG; - srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); -- srq->tail++; -+ srq->idx_que.tail++; - - pthread_spin_unlock(&srq->lock); - } -@@ -1564,7 +1564,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - pthread_spin_lock(&srq->lock); - - /* current idx of srqwq */ -- ind = srq->head & (srq->wqe_cnt - 1); -+ ind = srq->idx_que.head & (srq->wqe_cnt - 1); - - max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { -@@ -1574,7 +1574,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - break; - } - -- if (srq->head == srq->tail) { -+ if (srq->idx_que.head == srq->idx_que.tail) { - ret = -ENOMEM; - *bad_wr = wr; - break; -@@ -1607,7 +1607,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - } - - if (nreq) { -- srq->head += nreq; -+ srq->idx_que.head += nreq; - - /* - * Make sure that descriptors are written before -@@ -1617,8 +1617,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - - srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S | - srq->srqn); -- srq_db.parameter = -- htole32(srq->head & DB_PARAM_SRQ_PRODUCER_COUNTER_M); -+ srq_db.parameter = htole32(srq->idx_que.head & -+ DB_PARAM_SRQ_PRODUCER_COUNTER_M); - - hns_roce_write64((uint32_t *)&srq_db, ctx, - ROCEE_VF_DB_CFG0_OFFSET); -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 30ab072a..9b4934b9 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -491,6 +491,9 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - for (i = 0; i < idx_que->bitmap_cnt; ++i) - idx_que->bitmap[i] = ~(0UL); - -+ idx_que->head = 0; -+ idx_que->tail = srq->wqe_cnt - 1; -+ - return 0; - } - -@@ -512,9 +515,6 @@ static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq) - return ENOMEM; - } - -- srq->head = 0; -- srq->tail = srq->wqe_cnt - 1; -- - return 0; - } - --- -2.33.0 - diff --git a/0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch b/0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch deleted file mode 100644 index 6385c36b0ee9c9b764cf3d9791dd0530cbecc104..0000000000000000000000000000000000000000 --- a/0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch +++ /dev/null @@ -1,180 +0,0 @@ -From 9cc4c4b8d31b35428859ef626d4428fc393aace4 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Thu, 11 Nov 2021 21:08:35 +0800 -Subject: [PATCH 3/8] libhns: Fix wrong data type when writing doorbell - -The DB data is a __le32[] value instead of uint32_t[], and the DB register -should be written with a little-endian data instead of uint64_t. - -Fixes: 1523fbb1ea8e ("libhns: Add verbs of cq support") -Signed-off-by: Lang Cheng -Signed-off-by: Yixing Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_db.h | 14 ++++---------- - providers/hns/hns_roce_u_hw_v1.c | 17 +++++++++-------- - providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++++++----------- - 3 files changed, 25 insertions(+), 29 deletions(-) - -diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h -index b44e64d4..13df9b52 100644 ---- a/providers/hns/hns_roce_u_db.h -+++ b/providers/hns/hns_roce_u_db.h -@@ -32,23 +32,17 @@ - - #include - -+#include - #include "hns_roce_u.h" - - #ifndef _HNS_ROCE_U_DB_H - #define _HNS_ROCE_U_DB_H - --#if __BYTE_ORDER == __LITTLE_ENDIAN --#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0]) --#elif __BYTE_ORDER == __BIG_ENDIAN --#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1]) --#else --#error __BYTE_ORDER not defined --#endif -+#define HNS_ROCE_WORD_NUM 2 - --static inline void hns_roce_write64(uint32_t val[2], -- struct hns_roce_context *ctx, int offset) -+static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM]) - { -- *(volatile uint64_t *) (ctx->uar + offset) = HNS_ROCE_PAIR_TO_64(val); -+ mmio_write64_le(dest, *(__le64 *)val); - } - - void *hns_roce_alloc_db(struct hns_roce_context *ctx, -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 8f0a71aa..14ee4817 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -65,7 +65,7 @@ static void hns_roce_update_rq_head(struct hns_roce_context *ctx, - - udma_to_device_barrier(); - -- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_DB_OTHERS_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&rq_db); - } - - static void hns_roce_update_sq_head(struct hns_roce_context *ctx, -@@ -84,7 +84,7 @@ static void hns_roce_update_sq_head(struct hns_roce_context *ctx, - - udma_to_device_barrier(); - -- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_DB_SQ_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_SQ_L_0_REG, (__le32 *)&sq_db); - } - - static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx, -@@ -102,7 +102,7 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx, - CQ_DB_U32_4_CONS_IDX_S, - cq->cons_index & ((cq->cq_depth << 1) - 1)); - -- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_DB_OTHERS_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db); - } - - static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe, -@@ -422,10 +422,11 @@ static int hns_roce_u_v1_poll_cq(struct ibv_cq *ibvcq, int ne, - */ - static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited) - { -- uint32_t ci; -- uint32_t solicited_flag; -- struct hns_roce_cq_db cq_db = {}; -+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); - struct hns_roce_cq *cq = to_hr_cq(ibvcq); -+ struct hns_roce_cq_db cq_db = {}; -+ uint32_t solicited_flag; -+ uint32_t ci; - - ci = cq->cons_index & ((cq->cq_depth << 1) - 1); - solicited_flag = solicited ? HNS_ROCE_CQ_DB_REQ_SOL : -@@ -441,8 +442,8 @@ static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited) - roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M, - CQ_DB_U32_4_CONS_IDX_S, ci); - -- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context), -- ROCEE_DB_OTHERS_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db); -+ - return 0; - } - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index f947dbd7..efd949f4 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -293,7 +293,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, - HNS_ROCE_V2_RQ_DB); - rq_db.parameter = htole32(rq_head); - -- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db); - } - - static void hns_roce_update_sq_db(struct hns_roce_context *ctx, -@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, - sq_db.parameter = htole32(sq_head); - roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl); - -- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db); - } - - static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, -@@ -325,7 +325,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, - roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M, - DB_PARAM_CQ_CMD_SN_S, 1); - -- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); - } - - static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, -@@ -659,11 +659,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, - - static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) - { -- uint32_t ci; -- uint32_t cmd_sn; -- uint32_t solicited_flag; -- struct hns_roce_db cq_db = {}; -+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); - struct hns_roce_cq *cq = to_hr_cq(ibvcq); -+ struct hns_roce_db cq_db = {}; -+ uint32_t solicited_flag; -+ uint32_t cmd_sn; -+ uint32_t ci; - - ci = cq->cons_index & ((cq->cq_depth << 1) - 1); - cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK; -@@ -681,8 +682,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) - DB_PARAM_CQ_CMD_SN_S, cmd_sn); - roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag); - -- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context), -- ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); -+ - return 0; - } - -@@ -1620,8 +1621,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - srq_db.parameter = htole32(srq->idx_que.head & - DB_PARAM_SRQ_PRODUCER_COUNTER_M); - -- hns_roce_write64((uint32_t *)&srq_db, ctx, -- ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, -+ (__le32 *)&srq_db); - } - - pthread_spin_unlock(&srq->lock); --- -2.33.0 - diff --git a/0004-libhns-Remove-unsupported-QP-type.patch b/0004-libhns-Remove-unsupported-QP-type.patch deleted file mode 100644 index e144b050b6b376db6e91723db6a091c0572066b9..0000000000000000000000000000000000000000 --- a/0004-libhns-Remove-unsupported-QP-type.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 60d82566fc94b11280be26733bc306e6af3d2697 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 9 Nov 2021 20:40:58 +0800 -Subject: [PATCH 4/8] libhns: Remove unsupported QP type - -Currently, user space does not support UC type QP. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u_hw_v1.c | 1 - - providers/hns/hns_roce_u_hw_v2.c | 3 +-- - 2 files changed, 1 insertion(+), 3 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 14ee4817..279c9b0f 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -532,7 +532,6 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, - ctrl->flag |= htole32(ps_opcode); - wqe += sizeof(struct hns_roce_wqe_raddr_seg); - break; -- case IBV_QPT_UC: - case IBV_QPT_UD: - default: - break; -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index efd949f4..c62f74b5 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -460,8 +460,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, - struct hns_roce_qp **cur_qp, - struct ibv_wc *wc, uint32_t opcode) - { -- if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC || -- (*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_UC) && -+ if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) && - (opcode == HNS_ROCE_RECV_OP_SEND || - opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM || - opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) && --- -2.33.0 - diff --git a/0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch b/0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch deleted file mode 100644 index 4ae71d1c8622acffb3e68d1d96d91cb478991e0b..0000000000000000000000000000000000000000 --- a/0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch +++ /dev/null @@ -1,67 +0,0 @@ -From e460a4208d1821b1477e621ad5a7b72068e844f9 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:32 +0800 -Subject: [PATCH 5/8] libhns: Avoid using WQE indexes that exceed the SRQ size - -The index of SRQ WQE got from bitmap may be greater than the capability, -so a check for that should be added. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++------ - 1 file changed, 14 insertions(+), 6 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index c62f74b5..1169b64b 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1527,8 +1527,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - return ret; - } - --static int find_empty_entry(struct hns_roce_idx_que *idx_que) -+static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) - { -+ struct hns_roce_idx_que *idx_que = &srq->idx_que; - int bit_num; - int i; - -@@ -1536,12 +1537,20 @@ static int find_empty_entry(struct hns_roce_idx_que *idx_que) - for (i = 0; i < idx_que->bitmap_cnt && idx_que->bitmap[i] == 0; ++i) - ; - if (i == idx_que->bitmap_cnt) -- return ENOMEM; -+ return -ENOMEM; - - bit_num = ffsl(idx_que->bitmap[i]); - idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); - -- return i * BIT_CNT_PER_LONG + (bit_num - 1); -+ *wqe_idx = i * BIT_CNT_PER_LONG + (bit_num - 1); -+ -+ /* If wqe_cnt is less than BIT_CNT_PER_LONG, wqe_idx may be greater -+ * than wqe_cnt. -+ */ -+ if (*wqe_idx >= srq->wqe_cnt) -+ return -ENOMEM; -+ -+ return 0; - } - - static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, -@@ -1580,9 +1589,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - break; - } - -- wqe_idx = find_empty_entry(&srq->idx_que); -- if (wqe_idx < 0 || wqe_idx >= srq->wqe_cnt) { -- ret = -ENOMEM; -+ ret = get_wqe_idx(srq, &wqe_idx); -+ if (ret) { - *bad_wr = wr; - break; - } --- -2.33.0 - diff --git a/0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch b/0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch deleted file mode 100644 index 9105a5d969a62354e1a57df3153e84c0c494bbc4..0000000000000000000000000000000000000000 --- a/0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 91034654bdb2fd6e1fce81b4c1aea41bb4b6bf98 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:33 +0800 -Subject: [PATCH 6/8] libhns: Don't create RQ for a QP that associated with a - SRQ - -If a QP is associated with a SRQ, it's RQ should not be created. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 9b4934b9..125858d2 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -760,6 +760,11 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, - cap->max_recv_sge > ctx->max_sge) - return -EINVAL; - -+ if (attr->srq) { -+ cap->max_recv_wr = 0; -+ cap->max_recv_sge = 0; -+ } -+ - min_wqe_num = hr_dev->hw_version == HNS_ROCE_HW_VER1 ? - HNS_ROCE_V1_MIN_WQE_NUM : HNS_ROCE_V2_MIN_WQE_NUM; - --- -2.33.0 - diff --git a/0007-libhns-Add-support-for-direct-wqe.patch b/0007-libhns-Add-support-for-direct-wqe.patch deleted file mode 100644 index e310d4922dbc6f9f22bc11b2bed828745b767033..0000000000000000000000000000000000000000 --- a/0007-libhns-Add-support-for-direct-wqe.patch +++ /dev/null @@ -1,368 +0,0 @@ -From 64c66455fef1c908cc8f06a2b71aa2fd71806218 Mon Sep 17 00:00:00 2001 -From: Yixing Liu -Date: Wed, 15 Dec 2021 16:42:30 +0800 -Subject: [PATCH 7/8] libhns: Add support for direct wqe - -The current write wqe mechanism is to write to DDR first, and then notify -the hardware through doorbell to read the data. Direct wqe is a mechanism -to fill wqe directly into the hardware. In the case of light load, the wqe -will be filled into pcie bar space of the hardware, this will reduce one -memory access operation and therefore reduce the latency. SIMD instructions -allows cpu to write the 512 bits at one time to device memory, thus it can -be used for posting direct wqe. - -The process of post send of HIP08/09: - - +-----------+ - | post send | - +-----+-----+ - | - +-----+-----+ - | write WQE | - +-----+-----+ - | - | udma_to_device_barrier() - | - +-----+-----+ Y +-----------+ N - | HIP09 ? +------+ multi WR ?+-------------+ - +-----+-----+ +-----+-----+ | - | N | Y | - +-----+-----+ +-----+-----+ +--------+--------+ - | ring DB | | ring DB | |direct WQE (ST4) | - +-----------+ +-----------+ +-----------------+ - -Signed-off-by: Yixing Liu -Signed-off-by: Lang Cheng -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 5 +++- - providers/hns/hns_roce_u_hw_v2.c | 43 ++++++++++++++++++++++++++------ - providers/hns/hns_roce_u_hw_v2.h | 31 +++++++++++++---------- - providers/hns/hns_roce_u_verbs.c | 26 +++++++++++++++++-- - util/mmio.h | 27 +++++++++++++++++++- - 5 files changed, 107 insertions(+), 25 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index b3f48113..37711363 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -80,6 +80,8 @@ - - #define INVALID_SGE_LENGTH 0x80000000 - -+#define HNS_ROCE_DWQE_PAGE_SIZE 65536 -+ - #define HNS_ROCE_ADDRESS_MASK 0xFFFFFFFF - #define HNS_ROCE_ADDRESS_SHIFT 32 - -@@ -279,13 +281,14 @@ struct hns_roce_qp { - struct hns_roce_sge_ex ex_sge; - unsigned int next_sge; - int port_num; -- int sl; -+ uint8_t sl; - unsigned int qkey; - enum ibv_mtu path_mtu; - - struct hns_roce_rinl_buf rq_rinl_buf; - unsigned long flags; - int refcnt; /* specially used for XRC */ -+ void *dwqe_page; - }; - - struct hns_roce_av { -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 1169b64b..f102fd61 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -33,6 +33,7 @@ - #define _GNU_SOURCE - #include - #include -+#include - #include "hns_roce_u.h" - #include "hns_roce_u_db.h" - #include "hns_roce_u_hw_v2.h" -@@ -297,20 +298,40 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, - } - - static void hns_roce_update_sq_db(struct hns_roce_context *ctx, -- unsigned int qpn, unsigned int sl, -- unsigned int sq_head) -+ struct hns_roce_qp *qp) - { - struct hns_roce_db sq_db = {}; - -- sq_db.byte_4 = htole32(qpn); -+ sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num); - roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, - HNS_ROCE_V2_SQ_DB); -- sq_db.parameter = htole32(sq_head); -- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl); - -+ sq_db.parameter = htole32(qp->sq.head); -+ roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl); - hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db); - } - -+static void hns_roce_write512(uint64_t *dest, uint64_t *val) -+{ -+ mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe)); -+} -+ -+static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) -+{ -+ struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; -+ -+ /* All kinds of DirectWQE have the same header field layout */ -+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1); -+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M, -+ RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl); -+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M, -+ RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT); -+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M, -+ RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); -+ -+ hns_roce_write512(qp->dwqe_page, wqe); -+} -+ - static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, - struct hns_roce_cq *cq) - { -@@ -339,8 +360,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, - return NULL; - } - --static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, -- struct hns_roce_qp *qp) -+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp) - { - uint32_t qpn = qp->verbs_qp.qp.qp_num; - uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -@@ -1196,6 +1216,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, - break; - case IBV_QPT_UD: - ret = set_ud_wqe(wqe, qp, wr, nreq, &sge_info); -+ qp->sl = to_hr_ah(wr->wr.ud.ah)->av.sl; - break; - default: - ret = EINVAL; -@@ -1214,7 +1235,10 @@ out: - - udma_to_device_barrier(); - -- hns_roce_update_sq_db(ctx, ibvqp->qp_num, qp->sl, qp->sq.head); -+ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) -+ hns_roce_write_dwqe(qp, wqe); -+ else -+ hns_roce_update_sq_db(ctx, qp); - - if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) - *(qp->sdb) = qp->sq.head & 0xffff; -@@ -1506,6 +1530,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - if (ret) - return ret; - -+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) -+ munmap(qp->dwqe_page, HNS_ROCE_DWQE_PAGE_SIZE); -+ - hns_roce_v2_clear_qp(ctx, qp); - - hns_roce_lock_cqs(ibqp); -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index c13d82e3..af72cd70 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -40,6 +40,8 @@ - - #define HNS_ROCE_CMDSN_MASK 0x3 - -+#define HNS_ROCE_SL_SHIFT 2 -+ - /* V2 REG DEFINITION */ - #define ROCEE_VF_DB_CFG0_OFFSET 0x0230 - -@@ -133,6 +135,8 @@ struct hns_roce_db { - #define DB_BYTE_4_CMD_S 24 - #define DB_BYTE_4_CMD_M GENMASK(27, 24) - -+#define DB_BYTE_4_FLAG_S 31 -+ - #define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0 - #define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0) - -@@ -216,8 +220,16 @@ struct hns_roce_rc_sq_wqe { - }; - - #define RC_SQ_WQE_BYTE_4_OPCODE_S 0 --#define RC_SQ_WQE_BYTE_4_OPCODE_M \ -- (((1UL << 5) - 1) << RC_SQ_WQE_BYTE_4_OPCODE_S) -+#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) -+ -+#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5 -+#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) -+ -+#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13 -+#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) -+ -+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15 -+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) - - #define RC_SQ_WQE_BYTE_4_OWNER_S 7 - -@@ -239,6 +251,8 @@ struct hns_roce_rc_sq_wqe { - - #define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22 - -+#define RC_SQ_WQE_BYTE_4_FLAG_S 31 -+ - #define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0 - #define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \ - (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S) -@@ -311,23 +325,12 @@ struct hns_roce_ud_sq_wqe { - #define UD_SQ_WQE_OPCODE_S 0 - #define UD_SQ_WQE_OPCODE_M GENMASK(4, 0) - --#define UD_SQ_WQE_DB_SL_L_S 5 --#define UD_SQ_WQE_DB_SL_L_M GENMASK(6, 5) -- --#define UD_SQ_WQE_DB_SL_H_S 13 --#define UD_SQ_WQE_DB_SL_H_M GENMASK(14, 13) -- --#define UD_SQ_WQE_INDEX_S 15 --#define UD_SQ_WQE_INDEX_M GENMASK(30, 15) -- - #define UD_SQ_WQE_OWNER_S 7 - - #define UD_SQ_WQE_CQE_S 8 - - #define UD_SQ_WQE_SE_S 11 - --#define UD_SQ_WQE_FLAG_S 31 -- - #define UD_SQ_WQE_PD_S 0 - #define UD_SQ_WQE_PD_M GENMASK(23, 0) - -@@ -376,4 +379,6 @@ struct hns_roce_ud_sq_wqe { - - #define MAX_SERVICE_LEVEL 0x7 - -+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); -+ - #endif /* _HNS_ROCE_U_HW_V2_H */ -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 125858d2..fc902815 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -1076,7 +1076,8 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx, - - static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, - struct hns_roce_qp *qp, -- struct hns_roce_context *ctx) -+ struct hns_roce_context *ctx, -+ uint64_t *dwqe_mmap_key) - { - struct hns_roce_create_qp_ex_resp resp_ex = {}; - struct hns_roce_create_qp_ex cmd_ex = {}; -@@ -1093,6 +1094,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, - &resp_ex.ibv_resp, sizeof(resp_ex)); - - qp->flags = resp_ex.drv_payload.cap_flags; -+ *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key; - - return ret; - } -@@ -1144,11 +1146,23 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, - return ret; - } - -+static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, -+ uint64_t dwqe_mmap_key) -+{ -+ qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE, -+ MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key); -+ if (qp->dwqe_page == MAP_FAILED) -+ return -EINVAL; -+ -+ return 0; -+} -+ - static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - struct ibv_qp_init_attr_ex *attr) - { - struct hns_roce_context *context = to_hr_ctx(ibv_ctx); - struct hns_roce_qp *qp; -+ uint64_t dwqe_mmap_key; - int ret; - - ret = verify_qp_create_attr(context, attr); -@@ -1167,7 +1181,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - if (ret) - goto err_buf; - -- ret = qp_exec_create_cmd(attr, qp, context); -+ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key); - if (ret) - goto err_cmd; - -@@ -1175,10 +1189,18 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - if (ret) - goto err_store; - -+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) { -+ ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key); -+ if (ret) -+ goto err_dwqe; -+ } -+ - qp_setup_config(attr, qp, context); - - return &qp->verbs_qp.qp; - -+err_dwqe: -+ hns_roce_v2_clear_qp(context, qp); - err_store: - ibv_cmd_destroy_qp(&qp->verbs_qp.qp); - err_cmd: -diff --git a/util/mmio.h b/util/mmio.h -index 101af9dd..01d1455e 100644 ---- a/util/mmio.h -+++ b/util/mmio.h -@@ -210,8 +210,33 @@ static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) - { - s390_mmio_write(dest, src, bytecnt); - } --#else - -+#elif defined(__aarch64__) || defined(__arm__) -+#include -+ -+static inline void _mmio_memcpy_x64_64b(void *dest, const void *src) -+{ -+ vst4q_u64(dest, vld4q_u64(src)); -+} -+ -+static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) -+{ -+ do { -+ _mmio_memcpy_x64_64b(dest, src); -+ bytecnt -= sizeof(uint64x2x4_t); -+ src += sizeof(uint64x2x4_t); -+ } while (bytecnt > 0); -+} -+ -+#define mmio_memcpy_x64(dest, src, bytecount) \ -+ ({ \ -+ if (__builtin_constant_p((bytecount) == 64)) \ -+ _mmio_memcpy_x64_64b((dest), (src)); \ -+ else \ -+ _mmio_memcpy_x64((dest), (src), (bytecount)); \ -+ }) -+ -+#else - /* Transfer is some multiple of 64 bytes */ - static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) - { --- -2.33.0 - diff --git a/0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch b/0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch deleted file mode 100644 index b19d4c1b7662a37dc60dd3abf78645bbf9374e67..0000000000000000000000000000000000000000 --- a/0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 608c142e7cbac2a6c02071022fe87b081a6ddc4f Mon Sep 17 00:00:00 2001 -From: Yixing Liu -Date: Tue, 21 Dec 2021 21:38:08 +0800 -Subject: [PATCH 8/8] libhns: Use new SQ doorbell register for HIP09 - -HIP09 set a new BAR space for SQ doorbell. Each SQ doorbell has an -independent BAR space and the size is 64KB. SQ doorbell share -the same BAR space with direct WQE. - -Signed-off-by: Yixing Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 1 + - providers/hns/hns_roce_u_hw_v2.c | 4 ++-- - providers/hns/hns_roce_u_verbs.c | 5 +++++ - 3 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 37711363..460363b7 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -238,6 +238,7 @@ struct hns_roce_wq { - unsigned int wqe_shift; - unsigned int shift; /* wq size is 2^shift */ - int offset; -+ void *db_reg; - }; - - /* record the result of sge process */ -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index f102fd61..9cbc0aac 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, - - sq_db.parameter = htole32(qp->sq.head); - roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl); -- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db); -+ hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db); - } - - static void hns_roce_write512(uint64_t *dest, uint64_t *val) -@@ -329,7 +329,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) - roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M, - RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); - -- hns_roce_write512(qp->dwqe_page, wqe); -+ hns_roce_write512(qp->sq.db_reg, wqe); - } - - static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index fc902815..c5022c83 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -1117,6 +1117,11 @@ static void qp_setup_config(struct ibv_qp_init_attr_ex *attr, - } - - qp->max_inline_data = attr->cap.max_inline_data; -+ -+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) -+ qp->sq.db_reg = qp->dwqe_page; -+ else -+ qp->sq.db_reg = ctx->uar + ROCEE_VF_DB_CFG0_OFFSET; - } - - void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx) --- -2.33.0 - diff --git a/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch b/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch deleted file mode 100644 index f68570161a76d4254ad420909beaa5c581689964..0000000000000000000000000000000000000000 --- a/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 5cc1a047c4d71ced86b0f71f66adf12475a3c788 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:35 +0800 -Subject: libhns: Bugfix for checking whether the SRQ is full when posting WR - -If the user post a list of WRs, the head in the for loop is not updated in -time, and the judgment of if (head == tail) becomes invalid. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 17 +++++++++++++---- - providers/hns/hns_roce_u_verbs.c | 2 +- - 2 files changed, 14 insertions(+), 5 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 82124082..0c15bdbe 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1527,6 +1527,15 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - return ret; - } - -+static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) -+{ -+ struct hns_roce_idx_que *idx_que = &srq->idx_que; -+ unsigned int cur; -+ -+ cur = idx_que->head - idx_que->tail; -+ return cur >= srq->wqe_cnt - 1; -+} -+ - static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; -@@ -1577,14 +1586,14 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - - max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { -- if (wr->num_sge > max_sge) { -- ret = -EINVAL; -+ if (hns_roce_v2_srqwq_overflow(srq)) { -+ ret = -ENOMEM; - *bad_wr = wr; - break; - } - -- if (srq->idx_que.head == srq->idx_que.tail) { -- ret = -ENOMEM; -+ if (wr->num_sge > max_sge) { -+ ret = -EINVAL; - *bad_wr = wr; - break; - } -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 3abf7b48..dace35fd 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -492,7 +492,7 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - idx_que->bitmap[i] = ~(0UL); - - idx_que->head = 0; -- idx_que->tail = srq->wqe_cnt - 1; -+ idx_que->tail = 0; - - return 0; - } --- -2.30.0 - diff --git a/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch b/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch deleted file mode 100644 index 205419a31bcf5e0423f96d0f3e4f32ebdef1a880..0000000000000000000000000000000000000000 --- a/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch +++ /dev/null @@ -1,30 +0,0 @@ -From a79800afbbc48e5c5274bf3fc0e890705b3a596d Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:36 +0800 -Subject: libhns: Allow users to create a 0-depth SRQs - -Users is allowed to create 0-depth SRQs, so the judgement about whether -max_wr is zero should be removed. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index dace35fd..2d1a6de3 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -525,7 +525,7 @@ static int hns_roce_verify_srq(struct hns_roce_context *context, - init_attr->srq_type != IBV_SRQT_XRC) - return -EINVAL; - -- if (!init_attr->attr.max_wr || !init_attr->attr.max_sge || -+ if (!init_attr->attr.max_sge || - init_attr->attr.max_wr > context->max_srq_wr || - init_attr->attr.max_sge > context->max_srq_sge) - return -EINVAL; --- -2.30.0 - diff --git a/0011-libhns-Refactor-the-process-of-post_srq_recv.patch b/0011-libhns-Refactor-the-process-of-post_srq_recv.patch deleted file mode 100644 index 693fc83c6a6e89b0da1833723d468213adccd2bf..0000000000000000000000000000000000000000 --- a/0011-libhns-Refactor-the-process-of-post_srq_recv.patch +++ /dev/null @@ -1,176 +0,0 @@ -From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:37 +0800 -Subject: libhns: Refactor the process of post_srq_recv - -SRQ is a shared queue, it mainly consists of four parts: - -1. wqe buf: wqe buf is used to store wqe data. - -2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so -the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO -queue, it can instruct the hardware to obtain the corresponding WQE. - -3.bitmap: bitmap is used to generate and release wqe_idx. When the user -has a new WR, the driver finds the idx of the idle wqe in bitmap. When the -CQE of wqe is generated, the driver releases the idx. - -4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it -to the user when ibv_poll_cq() is invoked. - -After refactor, the functions of the four parts are more clearer. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++------------- - 1 file changed, 57 insertions(+), 38 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 0c15bdbe..b622eaef 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) - return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); - } - --static void *get_srq_wqe(struct hns_roce_srq *srq, int n) -+static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) - { - return srq->buf.buf + (n << srq->wqe_shift); - } -@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) - return cur >= srq->wqe_cnt - 1; - } - --static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) -+static int check_post_srq_valid(struct hns_roce_srq *srq, -+ struct ibv_recv_wr *wr) -+{ -+ unsigned int max_sge = srq->max_gs - srq->rsv_sge; -+ -+ if (hns_roce_v2_srqwq_overflow(srq)) -+ return -ENOMEM; -+ -+ if (wr->num_sge > max_sge) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; - int bit_num; -@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) - return 0; - } - -+static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx, -+ struct ibv_recv_wr *wr) -+{ -+ struct hns_roce_v2_wqe_data_seg *dseg; -+ int i; -+ -+ dseg = get_srq_wqe(srq, wqe_idx); -+ -+ for (i = 0; i < wr->num_sge; ++i) { -+ dseg[i].len = htole32(wr->sg_list[i].length); -+ dseg[i].lkey = htole32(wr->sg_list[i].lkey); -+ dseg[i].addr = htole64(wr->sg_list[i].addr); -+ } -+ -+ /* hw stop reading when identify the last one */ -+ if (srq->rsv_sge) { -+ dseg[i].len = htole32(INVALID_SGE_LENGTH); -+ dseg[i].lkey = htole32(0x0); -+ dseg[i].addr = 0; -+ } -+} -+ -+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) -+{ -+ struct hns_roce_idx_que *idx_que = &srq->idx_que; -+ unsigned int head; -+ __le32 *idx_buf; -+ -+ head = idx_que->head & (srq->wqe_cnt - 1); -+ -+ idx_buf = get_idx_buf(idx_que, head); -+ *idx_buf = htole32(wqe_idx); -+ -+ idx_que->head++; -+} -+ - static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - struct ibv_recv_wr *wr, - struct ibv_recv_wr **bad_wr) - { - struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context); - struct hns_roce_srq *srq = to_hr_srq(ib_srq); -- struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_db srq_db; -- unsigned int max_sge; -- __le32 *srq_idx; -+ unsigned int wqe_idx; - int ret = 0; -- int wqe_idx; -- void *wqe; - int nreq; -- int ind; -- int i; - - pthread_spin_lock(&srq->lock); - -- /* current idx of srqwq */ -- ind = srq->idx_que.head & (srq->wqe_cnt - 1); -- -- max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { -- if (hns_roce_v2_srqwq_overflow(srq)) { -- ret = -ENOMEM; -- *bad_wr = wr; -- break; -- } -- -- if (wr->num_sge > max_sge) { -- ret = -EINVAL; -+ ret = check_post_srq_valid(srq, wr); -+ if (ret) { - *bad_wr = wr; - break; - } -@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - break; - } - -- wqe = get_srq_wqe(srq, wqe_idx); -- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; -- -- for (i = 0; i < wr->num_sge; ++i) { -- set_data_seg_v2(dseg, wr->sg_list + i); -- dseg++; -- } -- -- /* hw stop reading when identify the last one */ -- if (srq->rsv_sge) -- set_ending_data_seg(dseg); -- -- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind); -- *srq_idx = htole32(wqe_idx); -+ fill_srq_wqe(srq, wqe_idx, wr); -+ fill_wqe_idx(srq, wqe_idx); - - srq->wrid[wqe_idx] = wr->wr_id; -- ind = (ind + 1) & (srq->wqe_cnt - 1); - } - - if (nreq) { -- srq->idx_que.head += nreq; -- - /* - * Make sure that descriptors are written before - * we write doorbell record. --- -2.30.0 - diff --git a/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch b/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch deleted file mode 100644 index b8569f2d05dbba8d78d918fbfcc8473c4eb25e27..0000000000000000000000000000000000000000 --- a/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch +++ /dev/null @@ -1,33 +0,0 @@ -From a18b0ee409d3382aa556b8f06a6cd6bfbef3f5c8 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:38 +0800 -Subject: libhns: Set srqlimit to 0 when creating SRQ - -According to the IB specification, the srq_limt parameter should not be -configured when creating srq. But the libhns does not set attr.srq_limit -to 0 currently. As a result, when attr.srq_limit provided by the user is -not 0, the value of attr.srq_limit returned to the user will be different -from that obtained by ibv_query_srq(). Therefore, the driver should set -attr.srq_limit to 0 when creating SRQ. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 2d1a6de3..107da753 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -580,6 +580,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context, - srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1); - srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); - attr->max_sge = srq->max_gs; -+ attr->srq_limit = 0; - - ret = hns_roce_create_idx_que(srq); - if (ret) --- -2.30.0 - diff --git a/0013-libhns-Refactor-the-process-of-create_srq.patch b/0013-libhns-Refactor-the-process-of-create_srq.patch deleted file mode 100644 index a2a08551149c1019c9cad344a605067dab7d299d..0000000000000000000000000000000000000000 --- a/0013-libhns-Refactor-the-process-of-create_srq.patch +++ /dev/null @@ -1,367 +0,0 @@ -From b914c76318f5b95e3157c3cbf1ccb49ec6d27635 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:39 +0800 -Subject: libhns: Refactor the process of create_srq - -Reorganize create_srq() as several sub-functions to make the process -clearer. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u.h | 7 +- - providers/hns/hns_roce_u_hw_v2.c | 2 +- - providers/hns/hns_roce_u_verbs.c | 178 ++++++++++++++++++------------- - 3 files changed, 105 insertions(+), 82 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index b3f48113..a437727c 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -211,7 +211,8 @@ struct hns_roce_idx_que { - - struct hns_roce_srq { - struct verbs_srq verbs_srq; -- struct hns_roce_buf buf; -+ struct hns_roce_idx_que idx_que; -+ struct hns_roce_buf wqe_buf; - pthread_spinlock_t lock; - unsigned long *wrid; - unsigned int srqn; -@@ -221,7 +222,6 @@ struct hns_roce_srq { - unsigned int wqe_shift; - unsigned int *db; - unsigned short counter; -- struct hns_roce_idx_que idx_que; - }; - - struct hns_roce_wq { -@@ -343,8 +343,7 @@ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) - - static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq) - { -- return container_of(container_of(ibv_srq, struct verbs_srq, srq), -- struct hns_roce_srq, verbs_srq); -+ return container_of(ibv_srq, struct hns_roce_srq, verbs_srq.srq); - } - - static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp) -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index b622eaef..d4e7e4f9 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -244,7 +244,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) - - static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) - { -- return srq->buf.buf + (n << srq->wqe_shift); -+ return srq->wqe_buf.buf + (n << srq->wqe_shift); - } - - static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 107da753..75b9e530 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -432,17 +432,23 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx, - uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >> - ctx->srq_table_shift; - -+ pthread_mutex_lock(&ctx->srq_table_mutex); -+ - if (!ctx->srq_table[tind].refcnt) { - ctx->srq_table[tind].table = - calloc(ctx->srq_table_mask + 1, - sizeof(struct hns_roce_srq *)); -- if (!ctx->srq_table[tind].table) -+ if (!ctx->srq_table[tind].table) { -+ pthread_mutex_unlock(&ctx->srq_table_mutex); - return -ENOMEM; -+ } - } - - ++ctx->srq_table[tind].refcnt; - ctx->srq_table[tind].table[srq->srqn & ctx->srq_table_mask] = srq; - -+ pthread_mutex_unlock(&ctx->srq_table_mutex); -+ - return 0; - } - -@@ -461,13 +467,46 @@ static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn) - { - uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift; - -+ pthread_mutex_lock(&ctx->srq_table_mutex); -+ - if (!--ctx->srq_table[tind].refcnt) - free(ctx->srq_table[tind].table); - else - ctx->srq_table[tind].table[srqn & ctx->srq_table_mask] = NULL; -+ -+ pthread_mutex_unlock(&ctx->srq_table_mutex); -+} -+ -+static int verify_srq_create_attr(struct hns_roce_context *context, -+ struct ibv_srq_init_attr_ex *attr) -+{ -+ if (attr->srq_type != IBV_SRQT_BASIC && -+ attr->srq_type != IBV_SRQT_XRC) -+ return -EINVAL; -+ -+ if (!attr->attr.max_sge || -+ attr->attr.max_wr > context->max_srq_wr || -+ attr->attr.max_sge > context->max_srq_sge) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq, -+ struct ibv_srq_init_attr_ex *attr) -+{ -+ if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) -+ srq->rsv_sge = 1; -+ -+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1); -+ srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge); -+ srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * -+ srq->max_gs)); -+ attr->attr.max_sge = srq->max_gs; -+ attr->attr.srq_limit = 0; - } - --static int hns_roce_create_idx_que(struct hns_roce_srq *srq) -+static int alloc_srq_idx_que(struct hns_roce_srq *srq) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; - unsigned int buf_size; -@@ -478,13 +517,13 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - BIT_CNT_PER_LONG; - idx_que->bitmap = calloc(idx_que->bitmap_cnt, sizeof(unsigned long)); - if (!idx_que->bitmap) -- return ENOMEM; -+ return -ENOMEM; - - buf_size = to_hr_hem_entries_size(srq->wqe_cnt, idx_que->entry_shift); - if (hns_roce_alloc_buf(&idx_que->buf, buf_size, HNS_HW_PAGE_SIZE)) { - free(idx_que->bitmap); - idx_que->bitmap = NULL; -- return ENOMEM; -+ return -ENOMEM; - } - - /* init the idx_que bitmap */ -@@ -497,40 +536,48 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - return 0; - } - --static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq) -+static int alloc_srq_wqe_buf(struct hns_roce_srq *srq) - { -- int srq_buf_size; -+ int buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); - -- srq->wrid = calloc(srq->wqe_cnt, sizeof(unsigned long)); -- if (!srq->wrid) -- return ENOMEM; -+ return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE); -+} - -- srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * -- srq->max_gs)); -- srq_buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); -+static int alloc_srq_buf(struct hns_roce_srq *srq) -+{ -+ int ret; - -- /* allocate srq wqe buf */ -- if (hns_roce_alloc_buf(&srq->buf, srq_buf_size, HNS_HW_PAGE_SIZE)) { -- free(srq->wrid); -- return ENOMEM; -+ ret = alloc_srq_idx_que(srq); -+ if (ret) -+ return ret; -+ -+ ret = alloc_srq_wqe_buf(srq); -+ if (ret) -+ goto err_idx_que; -+ -+ srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid)); -+ if (!srq->wrid) { -+ ret = -ENOMEM; -+ goto err_wqe_buf; - } - - return 0; --} - --static int hns_roce_verify_srq(struct hns_roce_context *context, -- struct ibv_srq_init_attr_ex *init_attr) --{ -- if (init_attr->srq_type != IBV_SRQT_BASIC && -- init_attr->srq_type != IBV_SRQT_XRC) -- return -EINVAL; -+err_wqe_buf: -+ hns_roce_free_buf(&srq->wqe_buf); -+err_idx_que: -+ hns_roce_free_buf(&srq->idx_que.buf); -+ free(srq->idx_que.bitmap); - -- if (!init_attr->attr.max_sge || -- init_attr->attr.max_wr > context->max_srq_wr || -- init_attr->attr.max_sge > context->max_srq_sge) -- return -EINVAL; -+ return ret; -+} - -- return 0; -+static void free_srq_buf(struct hns_roce_srq *srq) -+{ -+ free(srq->wrid); -+ hns_roce_free_buf(&srq->wqe_buf); -+ hns_roce_free_buf(&srq->idx_que.buf); -+ free(srq->idx_que.bitmap); - } - - static int exec_srq_create_cmd(struct ibv_context *context, -@@ -541,7 +588,7 @@ static int exec_srq_create_cmd(struct ibv_context *context, - struct hns_roce_create_srq_ex cmd_ex = {}; - int ret; - -- cmd_ex.buf_addr = (uintptr_t)srq->buf.buf; -+ cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf; - cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf; - cmd_ex.db_addr = (uintptr_t)srq->db; - -@@ -559,57 +606,44 @@ static int exec_srq_create_cmd(struct ibv_context *context, - static struct ibv_srq *create_srq(struct ibv_context *context, - struct ibv_srq_init_attr_ex *init_attr) - { -- struct hns_roce_context *ctx = to_hr_ctx(context); -- struct ibv_srq_attr *attr = &init_attr->attr; -+ struct hns_roce_context *hr_ctx = to_hr_ctx(context); - struct hns_roce_srq *srq; - int ret; - -- if (hns_roce_verify_srq(ctx, init_attr)) -- return NULL; -+ ret = verify_srq_create_attr(hr_ctx, init_attr); -+ if (ret) -+ goto err; - - srq = calloc(1, sizeof(*srq)); -- if (!srq) -- return NULL; -+ if (!srq) { -+ ret = -ENOMEM; -+ goto err; -+ } - - if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) - goto err_free_srq; - -- if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) -- srq->rsv_sge = 1; -- -- srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1); -- srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); -- attr->max_sge = srq->max_gs; -- attr->srq_limit = 0; -- -- ret = hns_roce_create_idx_que(srq); -- if (ret) -+ set_srq_param(context, srq, init_attr); -+ if (alloc_srq_buf(srq)) - goto err_free_srq; - -- ret = hns_roce_alloc_srq_buf(srq); -- if (ret) -- goto err_idx_que; -- -- srq->db = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB); -+ srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB); - if (!srq->db) - goto err_srq_buf; - -- *(srq->db) = 0; -- -- pthread_mutex_lock(&ctx->srq_table_mutex); -+ *srq->db = 0; - - ret = exec_srq_create_cmd(context, srq, init_attr); - if (ret) - goto err_srq_db; - -- ret = hns_roce_store_srq(ctx, srq); -+ ret = hns_roce_store_srq(hr_ctx, srq); - if (ret) - goto err_destroy_srq; - -- pthread_mutex_unlock(&ctx->srq_table_mutex); -- -- srq->max_gs = attr->max_sge; -- attr->max_sge = min(attr->max_sge - srq->rsv_sge, ctx->max_srq_sge); -+ srq->max_gs = init_attr->attr.max_sge; -+ init_attr->attr.max_sge = -+ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge); - - return &srq->verbs_srq.srq; - -@@ -617,20 +651,19 @@ err_destroy_srq: - ibv_cmd_destroy_srq(&srq->verbs_srq.srq); - - err_srq_db: -- pthread_mutex_unlock(&ctx->srq_table_mutex); -- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB); -+ hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB); - - err_srq_buf: -- free(srq->wrid); -- hns_roce_free_buf(&srq->buf); -- --err_idx_que: -- free(srq->idx_que.bitmap); -- hns_roce_free_buf(&srq->idx_que.buf); -+ free_srq_buf(srq); - - err_free_srq: - free(srq); - -+err: -+ if (ret < 0) -+ ret = -ret; -+ -+ errno = ret; - return NULL; - } - -@@ -690,23 +723,14 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) - struct hns_roce_srq *srq = to_hr_srq(ibv_srq); - int ret; - -- pthread_mutex_lock(&ctx->srq_table_mutex); -- - ret = ibv_cmd_destroy_srq(ibv_srq); -- if (ret) { -- pthread_mutex_unlock(&ctx->srq_table_mutex); -+ if (ret) - return ret; -- } - - hns_roce_clear_srq(ctx, srq->srqn); - -- pthread_mutex_unlock(&ctx->srq_table_mutex); -- - hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB); -- hns_roce_free_buf(&srq->buf); -- free(srq->wrid); -- hns_roce_free_buf(&srq->idx_que.buf); -- free(srq->idx_que.bitmap); -+ free_srq_buf(srq); - free(srq); - - return 0; --- -2.30.0 - diff --git a/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch b/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch deleted file mode 100644 index 31dbe9f58e3dc7cc13440fb781b683fae674b3c9..0000000000000000000000000000000000000000 --- a/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch +++ /dev/null @@ -1,69 +0,0 @@ -From d68ac72a8e4f2cf9754d3fcbbb8ff2a03e514c2f Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:40 +0800 -Subject: libhns: Remove the reserved wqe of SRQ - -There is an unreasonable reserved WQE in SRQ, it should be removed. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u.h | 1 + - providers/hns/hns_roce_u_hw_v2.c | 4 +--- - providers/hns/hns_roce_u_verbs.c | 5 ++++- - 3 files changed, 6 insertions(+), 4 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index a437727c..0d7abd81 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -64,6 +64,7 @@ - #define HNS_ROCE_MIN_CQE_NUM 0x40 - #define HNS_ROCE_V1_MIN_WQE_NUM 0x20 - #define HNS_ROCE_V2_MIN_WQE_NUM 0x40 -+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 - - #define HNS_ROCE_CQE_SIZE 0x20 - #define HNS_ROCE_V3_CQE_SIZE 0x40 -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index d4e7e4f9..2fb6cdaf 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1530,10 +1530,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; -- unsigned int cur; - -- cur = idx_que->head - idx_que->tail; -- return cur >= srq->wqe_cnt - 1; -+ return idx_que->head - idx_que->tail >= srq->wqe_cnt; - } - - static int check_post_srq_valid(struct hns_roce_srq *srq, -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 75b9e530..4847639b 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -489,6 +489,9 @@ static int verify_srq_create_attr(struct hns_roce_context *context, - attr->attr.max_sge > context->max_srq_sge) - return -EINVAL; - -+ attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr, -+ HNS_ROCE_MIN_SRQ_WQE_NUM); -+ - return 0; - } - -@@ -498,7 +501,7 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq, - if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) - srq->rsv_sge = 1; - -- srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1); -+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr); - srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge); - srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * - srq->max_gs)); --- -2.30.0 - diff --git a/0015-libhns-Refactor-process-of-setting-extended-sge.patch b/0015-libhns-Refactor-process-of-setting-extended-sge.patch deleted file mode 100644 index 3555177f7d5be107ed0fbb016b841d0e1557bc26..0000000000000000000000000000000000000000 --- a/0015-libhns-Refactor-process-of-setting-extended-sge.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 11c81d0e3a987f95b74e03b5e592a45029302f1d Mon Sep 17 00:00:00 2001 -From: Weihang Li -Date: Fri, 14 May 2021 10:02:56 +0800 -Subject: libhns: Refactor process of setting extended sge - -Refactor and encapsulate the parts of getting number of extended sge a WQE -can use to make it easier to understand. - -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++------------ - 1 file changed, 29 insertions(+), 16 deletions(-) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 30ab072a..a8508fc5 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -920,31 +920,44 @@ err_alloc: - return -ENOMEM; - } - --static void set_extend_sge_param(struct hns_roce_device *hr_dev, -- struct ibv_qp_init_attr_ex *attr, -- struct hns_roce_qp *qp, unsigned int wr_cnt) -+static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) - { -- int cnt = 0; -+ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD) -+ return qp->sq.max_gs; -+ -+ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) -+ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; -+ -+ return 0; -+} -+ -+static void set_ext_sge_param(struct hns_roce_device *hr_dev, -+ struct ibv_qp_init_attr_ex *attr, -+ struct hns_roce_qp *qp, unsigned int wr_cnt) -+{ -+ unsigned int total_sge_cnt; -+ unsigned int wqe_sge_cnt; -+ -+ qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; - - if (hr_dev->hw_version == HNS_ROCE_HW_VER1) { - qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; -- } else { -- qp->sq.max_gs = attr->cap.max_send_sge; -- if (attr->qp_type == IBV_QPT_UD) -- cnt = roundup_pow_of_two(wr_cnt * qp->sq.max_gs); -- else if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) -- cnt = roundup_pow_of_two(wr_cnt * -- (qp->sq.max_gs - -- HNS_ROCE_SGE_IN_WQE)); -+ return; - } - -- qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; -+ qp->sq.max_gs = attr->cap.max_send_sge; -+ -+ wqe_sge_cnt = get_wqe_ext_sge_cnt(qp); - - /* If the number of extended sge is not zero, they MUST use the - * space of HNS_HW_PAGE_SIZE at least. - */ -- qp->ex_sge.sge_cnt = cnt ? -- max(cnt, HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0; -+ if (wqe_sge_cnt) { -+ total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt); -+ qp->ex_sge.sge_cnt = -+ max(total_sge_cnt, -+ (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); -+ } - } - - static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, -@@ -988,7 +1001,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, - qp->sq.wqe_cnt = cnt; - qp->sq.shift = hr_ilog32(cnt); - -- set_extend_sge_param(hr_dev, attr, qp, cnt); -+ set_ext_sge_param(hr_dev, attr, qp, cnt); - - qp->sq.max_post = min(ctx->max_qp_wr, cnt); - qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs); --- -2.30.0 - diff --git a/0016-libhns-Optimize-set_sge-process.patch b/0016-libhns-Optimize-set_sge-process.patch deleted file mode 100644 index 7754801a38d74427e2fee563dcc75f61571c2612..0000000000000000000000000000000000000000 --- a/0016-libhns-Optimize-set_sge-process.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Fri, 14 May 2021 10:02:57 +0800 -Subject: libhns: Optimize set_sge process - -Use local variables to avoid frequent ldr/str operations. And because UD's -process of setting sge is more simple then RC, set_sge() can be splited -into two functions for compiler optimization. - -Signed-off-by: Lang Cheng -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++--------- - 1 file changed, 61 insertions(+), 22 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 4988943a..dc79a6f8 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx) - return 0; - } - --static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg, -- struct hns_roce_qp *qp, struct ibv_send_wr *wr, -- struct hns_roce_sge_info *sge_info) -+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg, -+ struct hns_roce_qp *qp, struct ibv_send_wr *wr, -+ struct hns_roce_sge_info *sge_info) - { -+ uint32_t mask = qp->ex_sge.sge_cnt - 1; -+ uint32_t index = sge_info->start_idx; -+ struct ibv_sge *sge = wr->sg_list; -+ uint32_t len = 0; -+ uint32_t cnt = 0; -+ int flag; - int i; - -- sge_info->valid_num = 0; -- sge_info->total_len = 0; -+ flag = (wr->send_flags & IBV_SEND_INLINE && -+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD && -+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP); - -- for (i = 0; i < wr->num_sge; i++) { -- if (unlikely(!wr->sg_list[i].length)) -+ for (i = 0; i < wr->num_sge; i++, sge++) { -+ if (unlikely(!sge->length)) - continue; - -- sge_info->total_len += wr->sg_list[i].length; -- sge_info->valid_num++; -+ len += sge->length; -+ cnt++; - -- if (wr->send_flags & IBV_SEND_INLINE && -- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD && -- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP) -+ if (flag) - continue; - -- /* No inner sge in UD wqe */ -- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE && -- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) { -- set_data_seg_v2(dseg, wr->sg_list + i); -+ if (cnt <= HNS_ROCE_SGE_IN_WQE) { -+ set_data_seg_v2(dseg, sge); - dseg++; - } else { -- dseg = get_send_sge_ex(qp, sge_info->start_idx & -- (qp->ex_sge.sge_cnt - 1)); -- set_data_seg_v2(dseg, wr->sg_list + i); -- sge_info->start_idx++; -+ dseg = get_send_sge_ex(qp, index & mask); -+ set_data_seg_v2(dseg, sge); -+ index++; - } - } -+ -+ sge_info->start_idx = index; -+ sge_info->valid_num = cnt; -+ sge_info->total_len = len; -+} -+ -+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg, -+ struct hns_roce_qp *qp, struct ibv_send_wr *wr, -+ struct hns_roce_sge_info *sge_info) -+{ -+ int flag = wr->send_flags & IBV_SEND_INLINE; -+ uint32_t mask = qp->ex_sge.sge_cnt - 1; -+ uint32_t index = sge_info->start_idx; -+ struct ibv_sge *sge = wr->sg_list; -+ uint32_t len = 0; -+ uint32_t cnt = 0; -+ int i; -+ -+ for (i = 0; i < wr->num_sge; i++, sge++) { -+ if (unlikely(!sge->length)) -+ continue; -+ -+ len += sge->length; -+ cnt++; -+ -+ if (flag) -+ continue; -+ -+ /* No inner sge in UD wqe */ -+ dseg = get_send_sge_ex(qp, index & mask); -+ set_data_seg_v2(dseg, sge); -+ index++; -+ } -+ -+ sge_info->start_idx = index; -+ sge_info->valid_num = cnt; -+ sge_info->total_len = len; - } - - static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, -@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, - UD_SQ_WQE_MSG_START_SGE_IDX_S, - sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); - -- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); -+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); - - ud_sq_wqe->msg_len = htole32(sge_info->total_len); - -@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - wqe += sizeof(struct hns_roce_rc_sq_wqe); - dseg = wqe; - -- set_sge(dseg, qp, wr, sge_info); -+ set_rc_sge(dseg, qp, wr, sge_info); - - rc_sq_wqe->msg_len = htole32(sge_info->total_len); - --- -2.30.0 - diff --git a/backport-fixbug-increase-maximum-number-of-cpus-rdma.patch b/backport-fixbug-increase-maximum-number-of-cpus-rdma.patch deleted file mode 100644 index bba1d01e227eea3ee95ce96b5f5099cde301fbc1..0000000000000000000000000000000000000000 --- a/backport-fixbug-increase-maximum-number-of-cpus-rdma.patch +++ /dev/null @@ -1,27 +0,0 @@ -From c381cfa26ba6163b9cc51212702e64bf1d83f838 Mon Sep 17 00:00:00 2001 -From: swimlessbird <52704385+swimlessbird@users.noreply.github.com> -Date: Fri, 17 Sep 2021 14:35:05 +0800 -Subject: [PATCH] ibdiags: Increase maximum number of CPUs - -In modern systems, the old limit (8) is small enough, so increase -to something larger (256). - -Signed-off-by: Suwan Sun -Signed-off-by: Leon Romanovsky ---- - infiniband-diags/ibsysstat.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/infiniband-diags/ibsysstat.c b/infiniband-diags/ibsysstat.c -index 6ff7ca0c4..73972d039 100644 ---- a/infiniband-diags/ibsysstat.c -+++ b/infiniband-diags/ibsysstat.c -@@ -41,7 +41,7 @@ - - #include "ibdiag_common.h" - --#define MAX_CPUS 8 -+#define MAX_CPUS 256 - - static struct ibmad_port *srcport; - diff --git a/rdma-core-35.1.tar.gz b/rdma-core-35.1.tar.gz deleted file mode 100644 index 504375077359244632cf12625777c65076935774..0000000000000000000000000000000000000000 Binary files a/rdma-core-35.1.tar.gz and /dev/null differ diff --git a/rdma-core-41.0.tar.gz b/rdma-core-41.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19fbd78398613efd77dfd570feb6c558a04666dc Binary files /dev/null and b/rdma-core-41.0.tar.gz differ diff --git a/rdma-core.spec b/rdma-core.spec index f2b7bd0687137e6c5299b7b46d3535c393d9373c..d3ae5ff0e3e63c97b6943402b409327660896fe0 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,29 +1,11 @@ Name: rdma-core -Version: 35.1 -Release: 3 +Version: 41.0 +Release: 1 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz -Patch0: backport-fixbug-increase-maximum-number-of-cpus-rdma.patch -Patch1: 0001-Update-kernel-headers.patch -Patch2: 0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch -Patch3: 0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch -Patch4: 0004-libhns-Remove-unsupported-QP-type.patch -Patch5: 0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch -Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch -Patch7: 0007-libhns-Add-support-for-direct-wqe.patch -Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch -Patch9: 0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch -Patch10: 0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch -Patch11: 0011-libhns-Refactor-the-process-of-post_srq_recv.patch -Patch12: 0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch -Patch13: 0013-libhns-Refactor-the-process-of-create_srq.patch -Patch14: 0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch -Patch15: 0015-libhns-Refactor-process-of-setting-extended-sge.patch -Patch16: 0016-libhns-Optimize-set_sge-process.patch - BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel BuildRequires: python3-devel python3-Cython python3 python3-docutils perl-generators @@ -267,6 +249,12 @@ fi %{_mandir}/* %changelog +* Mon Jul 25 2022 tangchengchang - 41.0-1 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: update to 41.0 + * Mon Jul 11 2022 luozhengfeng - 35.1-3 - Type: bugfix - ID: NA