diff --git a/0001-Update-kernel-headers.patch b/0001-Update-kernel-headers.patch deleted file mode 100644 index 8a96d70a8fcd4636b0d88dce6d2972929c71ea39..0000000000000000000000000000000000000000 --- a/0001-Update-kernel-headers.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 693d55e80976217215844258e5b78bc115382689 Mon Sep 17 00:00:00 2001 -From: Guofeng Yue -Date: Mon, 10 Jan 2022 10:44:23 +0800 -Subject: [PATCH 1/8] Update kernel headers - -To commit 62c4d8878d13 ("RDMA/hns: Remove support for HIP06"). - -Signed-off-by: Guofeng Yue ---- - kernel-headers/rdma/hns-abi.h | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h -index 42b17765..abfd36e2 100644 ---- a/kernel-headers/rdma/hns-abi.h -+++ b/kernel-headers/rdma/hns-abi.h -@@ -77,17 +77,19 @@ enum hns_roce_qp_cap_flags { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, - HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, - HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, -+ HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, - }; - - struct hns_roce_ib_create_qp_resp { - __aligned_u64 cap_flags; -+ __aligned_u64 dwqe_mmap_key; - }; - - struct hns_roce_ib_alloc_ucontext_resp { - __u32 qp_tab_size; - __u32 cqe_size; -- __u32 srq_tab_size; -- __u32 reserved; -+ __u32 srq_tab_size; -+ __u32 reserved; - }; - - struct hns_roce_ib_alloc_pd_resp { --- -2.33.0 - diff --git a/0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch b/0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch deleted file mode 100644 index 7dcc9b88e9113c24a20889e1f8de7449bce436de..0000000000000000000000000000000000000000 --- a/0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 08ec3c43bf9710fdf3ca664f7cd63436e67339d7 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:34 +0800 -Subject: [PATCH 2/8] libhns: Fix the ownership of the head/tail pointer of SRQ - WQE - -The CQE of SRQ is not generated in the order of wqe, so the wqe_idx -corresponding to the idle WQE should be placed in a FIFO, then the hardware -will be instructed to obtain the corresponding WQE. Therefore, the WQ -of SRQ has no concept of head pointer and tail pointer, but the queue of -wqe_idx does. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u.h | 4 ++-- - providers/hns/hns_roce_u_hw_v2.c | 12 ++++++------ - providers/hns/hns_roce_u_verbs.c | 6 +++--- - 3 files changed, 11 insertions(+), 11 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 8f805dd1..b3f48113 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -205,6 +205,8 @@ struct hns_roce_idx_que { - int entry_shift; - unsigned long *bitmap; - int bitmap_cnt; -+ unsigned int head; -+ unsigned int tail; - }; - - struct hns_roce_srq { -@@ -217,8 +219,6 @@ struct hns_roce_srq { - unsigned int max_gs; - unsigned int rsv_sge; - unsigned int wqe_shift; -- int head; -- int tail; - unsigned int *db; - unsigned short counter; - struct hns_roce_idx_que idx_que; -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 4988943a..f947dbd7 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -262,7 +262,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind) - bitmap_num = ind / BIT_CNT_PER_LONG; - bit_num = ind % BIT_CNT_PER_LONG; - srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); -- srq->tail++; -+ srq->idx_que.tail++; - - pthread_spin_unlock(&srq->lock); - } -@@ -1564,7 +1564,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - pthread_spin_lock(&srq->lock); - - /* current idx of srqwq */ -- ind = srq->head & (srq->wqe_cnt - 1); -+ ind = srq->idx_que.head & (srq->wqe_cnt - 1); - - max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { -@@ -1574,7 +1574,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - break; - } - -- if (srq->head == srq->tail) { -+ if (srq->idx_que.head == srq->idx_que.tail) { - ret = -ENOMEM; - *bad_wr = wr; - break; -@@ -1607,7 +1607,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - } - - if (nreq) { -- srq->head += nreq; -+ srq->idx_que.head += nreq; - - /* - * Make sure that descriptors are written before -@@ -1617,8 +1617,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - - srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S | - srq->srqn); -- srq_db.parameter = -- htole32(srq->head & DB_PARAM_SRQ_PRODUCER_COUNTER_M); -+ srq_db.parameter = htole32(srq->idx_que.head & -+ DB_PARAM_SRQ_PRODUCER_COUNTER_M); - - hns_roce_write64((uint32_t *)&srq_db, ctx, - ROCEE_VF_DB_CFG0_OFFSET); -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 30ab072a..9b4934b9 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -491,6 +491,9 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - for (i = 0; i < idx_que->bitmap_cnt; ++i) - idx_que->bitmap[i] = ~(0UL); - -+ idx_que->head = 0; -+ idx_que->tail = srq->wqe_cnt - 1; -+ - return 0; - } - -@@ -512,9 +515,6 @@ static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq) - return ENOMEM; - } - -- srq->head = 0; -- srq->tail = srq->wqe_cnt - 1; -- - return 0; - } - --- -2.33.0 - diff --git a/0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch b/0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch deleted file mode 100644 index 6385c36b0ee9c9b764cf3d9791dd0530cbecc104..0000000000000000000000000000000000000000 --- a/0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch +++ /dev/null @@ -1,180 +0,0 @@ -From 9cc4c4b8d31b35428859ef626d4428fc393aace4 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Thu, 11 Nov 2021 21:08:35 +0800 -Subject: [PATCH 3/8] libhns: Fix wrong data type when writing doorbell - -The DB data is a __le32[] value instead of uint32_t[], and the DB register -should be written with a little-endian data instead of uint64_t. - -Fixes: 1523fbb1ea8e ("libhns: Add verbs of cq support") -Signed-off-by: Lang Cheng -Signed-off-by: Yixing Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_db.h | 14 ++++---------- - providers/hns/hns_roce_u_hw_v1.c | 17 +++++++++-------- - providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++++++----------- - 3 files changed, 25 insertions(+), 29 deletions(-) - -diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h -index b44e64d4..13df9b52 100644 ---- a/providers/hns/hns_roce_u_db.h -+++ b/providers/hns/hns_roce_u_db.h -@@ -32,23 +32,17 @@ - - #include - -+#include - #include "hns_roce_u.h" - - #ifndef _HNS_ROCE_U_DB_H - #define _HNS_ROCE_U_DB_H - --#if __BYTE_ORDER == __LITTLE_ENDIAN --#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0]) --#elif __BYTE_ORDER == __BIG_ENDIAN --#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1]) --#else --#error __BYTE_ORDER not defined --#endif -+#define HNS_ROCE_WORD_NUM 2 - --static inline void hns_roce_write64(uint32_t val[2], -- struct hns_roce_context *ctx, int offset) -+static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM]) - { -- *(volatile uint64_t *) (ctx->uar + offset) = HNS_ROCE_PAIR_TO_64(val); -+ mmio_write64_le(dest, *(__le64 *)val); - } - - void *hns_roce_alloc_db(struct hns_roce_context *ctx, -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 8f0a71aa..14ee4817 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -65,7 +65,7 @@ static void hns_roce_update_rq_head(struct hns_roce_context *ctx, - - udma_to_device_barrier(); - -- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_DB_OTHERS_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&rq_db); - } - - static void hns_roce_update_sq_head(struct hns_roce_context *ctx, -@@ -84,7 +84,7 @@ static void hns_roce_update_sq_head(struct hns_roce_context *ctx, - - udma_to_device_barrier(); - -- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_DB_SQ_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_SQ_L_0_REG, (__le32 *)&sq_db); - } - - static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx, -@@ -102,7 +102,7 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx, - CQ_DB_U32_4_CONS_IDX_S, - cq->cons_index & ((cq->cq_depth << 1) - 1)); - -- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_DB_OTHERS_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db); - } - - static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe, -@@ -422,10 +422,11 @@ static int hns_roce_u_v1_poll_cq(struct ibv_cq *ibvcq, int ne, - */ - static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited) - { -- uint32_t ci; -- uint32_t solicited_flag; -- struct hns_roce_cq_db cq_db = {}; -+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); - struct hns_roce_cq *cq = to_hr_cq(ibvcq); -+ struct hns_roce_cq_db cq_db = {}; -+ uint32_t solicited_flag; -+ uint32_t ci; - - ci = cq->cons_index & ((cq->cq_depth << 1) - 1); - solicited_flag = solicited ? HNS_ROCE_CQ_DB_REQ_SOL : -@@ -441,8 +442,8 @@ static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited) - roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M, - CQ_DB_U32_4_CONS_IDX_S, ci); - -- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context), -- ROCEE_DB_OTHERS_L_0_REG); -+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db); -+ - return 0; - } - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index f947dbd7..efd949f4 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -293,7 +293,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, - HNS_ROCE_V2_RQ_DB); - rq_db.parameter = htole32(rq_head); - -- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db); - } - - static void hns_roce_update_sq_db(struct hns_roce_context *ctx, -@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, - sq_db.parameter = htole32(sq_head); - roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl); - -- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db); - } - - static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, -@@ -325,7 +325,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, - roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M, - DB_PARAM_CQ_CMD_SN_S, 1); - -- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); - } - - static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, -@@ -659,11 +659,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, - - static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) - { -- uint32_t ci; -- uint32_t cmd_sn; -- uint32_t solicited_flag; -- struct hns_roce_db cq_db = {}; -+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); - struct hns_roce_cq *cq = to_hr_cq(ibvcq); -+ struct hns_roce_db cq_db = {}; -+ uint32_t solicited_flag; -+ uint32_t cmd_sn; -+ uint32_t ci; - - ci = cq->cons_index & ((cq->cq_depth << 1) - 1); - cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK; -@@ -681,8 +682,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) - DB_PARAM_CQ_CMD_SN_S, cmd_sn); - roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag); - -- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context), -- ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); -+ - return 0; - } - -@@ -1620,8 +1621,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - srq_db.parameter = htole32(srq->idx_que.head & - DB_PARAM_SRQ_PRODUCER_COUNTER_M); - -- hns_roce_write64((uint32_t *)&srq_db, ctx, -- ROCEE_VF_DB_CFG0_OFFSET); -+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, -+ (__le32 *)&srq_db); - } - - pthread_spin_unlock(&srq->lock); --- -2.33.0 - diff --git a/0004-libhns-Remove-unsupported-QP-type.patch b/0004-libhns-Remove-unsupported-QP-type.patch deleted file mode 100644 index e144b050b6b376db6e91723db6a091c0572066b9..0000000000000000000000000000000000000000 --- a/0004-libhns-Remove-unsupported-QP-type.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 60d82566fc94b11280be26733bc306e6af3d2697 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 9 Nov 2021 20:40:58 +0800 -Subject: [PATCH 4/8] libhns: Remove unsupported QP type - -Currently, user space does not support UC type QP. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u_hw_v1.c | 1 - - providers/hns/hns_roce_u_hw_v2.c | 3 +-- - 2 files changed, 1 insertion(+), 3 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 14ee4817..279c9b0f 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -532,7 +532,6 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, - ctrl->flag |= htole32(ps_opcode); - wqe += sizeof(struct hns_roce_wqe_raddr_seg); - break; -- case IBV_QPT_UC: - case IBV_QPT_UD: - default: - break; -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index efd949f4..c62f74b5 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -460,8 +460,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, - struct hns_roce_qp **cur_qp, - struct ibv_wc *wc, uint32_t opcode) - { -- if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC || -- (*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_UC) && -+ if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) && - (opcode == HNS_ROCE_RECV_OP_SEND || - opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM || - opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) && --- -2.33.0 - diff --git a/0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch b/0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch deleted file mode 100644 index 4ae71d1c8622acffb3e68d1d96d91cb478991e0b..0000000000000000000000000000000000000000 --- a/0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch +++ /dev/null @@ -1,67 +0,0 @@ -From e460a4208d1821b1477e621ad5a7b72068e844f9 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:32 +0800 -Subject: [PATCH 5/8] libhns: Avoid using WQE indexes that exceed the SRQ size - -The index of SRQ WQE got from bitmap may be greater than the capability, -so a check for that should be added. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++------ - 1 file changed, 14 insertions(+), 6 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index c62f74b5..1169b64b 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1527,8 +1527,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - return ret; - } - --static int find_empty_entry(struct hns_roce_idx_que *idx_que) -+static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) - { -+ struct hns_roce_idx_que *idx_que = &srq->idx_que; - int bit_num; - int i; - -@@ -1536,12 +1537,20 @@ static int find_empty_entry(struct hns_roce_idx_que *idx_que) - for (i = 0; i < idx_que->bitmap_cnt && idx_que->bitmap[i] == 0; ++i) - ; - if (i == idx_que->bitmap_cnt) -- return ENOMEM; -+ return -ENOMEM; - - bit_num = ffsl(idx_que->bitmap[i]); - idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); - -- return i * BIT_CNT_PER_LONG + (bit_num - 1); -+ *wqe_idx = i * BIT_CNT_PER_LONG + (bit_num - 1); -+ -+ /* If wqe_cnt is less than BIT_CNT_PER_LONG, wqe_idx may be greater -+ * than wqe_cnt. -+ */ -+ if (*wqe_idx >= srq->wqe_cnt) -+ return -ENOMEM; -+ -+ return 0; - } - - static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, -@@ -1580,9 +1589,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - break; - } - -- wqe_idx = find_empty_entry(&srq->idx_que); -- if (wqe_idx < 0 || wqe_idx >= srq->wqe_cnt) { -- ret = -ENOMEM; -+ ret = get_wqe_idx(srq, &wqe_idx); -+ if (ret) { - *bad_wr = wr; - break; - } --- -2.33.0 - diff --git a/0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch b/0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch deleted file mode 100644 index 9105a5d969a62354e1a57df3153e84c0c494bbc4..0000000000000000000000000000000000000000 --- a/0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 91034654bdb2fd6e1fce81b4c1aea41bb4b6bf98 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:33 +0800 -Subject: [PATCH 6/8] libhns: Don't create RQ for a QP that associated with a - SRQ - -If a QP is associated with a SRQ, it's RQ should not be created. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 9b4934b9..125858d2 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -760,6 +760,11 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, - cap->max_recv_sge > ctx->max_sge) - return -EINVAL; - -+ if (attr->srq) { -+ cap->max_recv_wr = 0; -+ cap->max_recv_sge = 0; -+ } -+ - min_wqe_num = hr_dev->hw_version == HNS_ROCE_HW_VER1 ? - HNS_ROCE_V1_MIN_WQE_NUM : HNS_ROCE_V2_MIN_WQE_NUM; - --- -2.33.0 - diff --git a/0007-libhns-Add-support-for-direct-wqe.patch b/0007-libhns-Add-support-for-direct-wqe.patch deleted file mode 100644 index e310d4922dbc6f9f22bc11b2bed828745b767033..0000000000000000000000000000000000000000 --- a/0007-libhns-Add-support-for-direct-wqe.patch +++ /dev/null @@ -1,368 +0,0 @@ -From 64c66455fef1c908cc8f06a2b71aa2fd71806218 Mon Sep 17 00:00:00 2001 -From: Yixing Liu -Date: Wed, 15 Dec 2021 16:42:30 +0800 -Subject: [PATCH 7/8] libhns: Add support for direct wqe - -The current write wqe mechanism is to write to DDR first, and then notify -the hardware through doorbell to read the data. Direct wqe is a mechanism -to fill wqe directly into the hardware. In the case of light load, the wqe -will be filled into pcie bar space of the hardware, this will reduce one -memory access operation and therefore reduce the latency. SIMD instructions -allows cpu to write the 512 bits at one time to device memory, thus it can -be used for posting direct wqe. - -The process of post send of HIP08/09: - - +-----------+ - | post send | - +-----+-----+ - | - +-----+-----+ - | write WQE | - +-----+-----+ - | - | udma_to_device_barrier() - | - +-----+-----+ Y +-----------+ N - | HIP09 ? +------+ multi WR ?+-------------+ - +-----+-----+ +-----+-----+ | - | N | Y | - +-----+-----+ +-----+-----+ +--------+--------+ - | ring DB | | ring DB | |direct WQE (ST4) | - +-----------+ +-----------+ +-----------------+ - -Signed-off-by: Yixing Liu -Signed-off-by: Lang Cheng -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 5 +++- - providers/hns/hns_roce_u_hw_v2.c | 43 ++++++++++++++++++++++++++------ - providers/hns/hns_roce_u_hw_v2.h | 31 +++++++++++++---------- - providers/hns/hns_roce_u_verbs.c | 26 +++++++++++++++++-- - util/mmio.h | 27 +++++++++++++++++++- - 5 files changed, 107 insertions(+), 25 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index b3f48113..37711363 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -80,6 +80,8 @@ - - #define INVALID_SGE_LENGTH 0x80000000 - -+#define HNS_ROCE_DWQE_PAGE_SIZE 65536 -+ - #define HNS_ROCE_ADDRESS_MASK 0xFFFFFFFF - #define HNS_ROCE_ADDRESS_SHIFT 32 - -@@ -279,13 +281,14 @@ struct hns_roce_qp { - struct hns_roce_sge_ex ex_sge; - unsigned int next_sge; - int port_num; -- int sl; -+ uint8_t sl; - unsigned int qkey; - enum ibv_mtu path_mtu; - - struct hns_roce_rinl_buf rq_rinl_buf; - unsigned long flags; - int refcnt; /* specially used for XRC */ -+ void *dwqe_page; - }; - - struct hns_roce_av { -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 1169b64b..f102fd61 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -33,6 +33,7 @@ - #define _GNU_SOURCE - #include - #include -+#include - #include "hns_roce_u.h" - #include "hns_roce_u_db.h" - #include "hns_roce_u_hw_v2.h" -@@ -297,20 +298,40 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, - } - - static void hns_roce_update_sq_db(struct hns_roce_context *ctx, -- unsigned int qpn, unsigned int sl, -- unsigned int sq_head) -+ struct hns_roce_qp *qp) - { - struct hns_roce_db sq_db = {}; - -- sq_db.byte_4 = htole32(qpn); -+ sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num); - roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, - HNS_ROCE_V2_SQ_DB); -- sq_db.parameter = htole32(sq_head); -- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl); - -+ sq_db.parameter = htole32(qp->sq.head); -+ roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl); - hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db); - } - -+static void hns_roce_write512(uint64_t *dest, uint64_t *val) -+{ -+ mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe)); -+} -+ -+static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) -+{ -+ struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; -+ -+ /* All kinds of DirectWQE have the same header field layout */ -+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1); -+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M, -+ RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl); -+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M, -+ RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT); -+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M, -+ RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); -+ -+ hns_roce_write512(qp->dwqe_page, wqe); -+} -+ - static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, - struct hns_roce_cq *cq) - { -@@ -339,8 +360,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, - return NULL; - } - --static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, -- struct hns_roce_qp *qp) -+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp) - { - uint32_t qpn = qp->verbs_qp.qp.qp_num; - uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -@@ -1196,6 +1216,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, - break; - case IBV_QPT_UD: - ret = set_ud_wqe(wqe, qp, wr, nreq, &sge_info); -+ qp->sl = to_hr_ah(wr->wr.ud.ah)->av.sl; - break; - default: - ret = EINVAL; -@@ -1214,7 +1235,10 @@ out: - - udma_to_device_barrier(); - -- hns_roce_update_sq_db(ctx, ibvqp->qp_num, qp->sl, qp->sq.head); -+ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) -+ hns_roce_write_dwqe(qp, wqe); -+ else -+ hns_roce_update_sq_db(ctx, qp); - - if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) - *(qp->sdb) = qp->sq.head & 0xffff; -@@ -1506,6 +1530,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - if (ret) - return ret; - -+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) -+ munmap(qp->dwqe_page, HNS_ROCE_DWQE_PAGE_SIZE); -+ - hns_roce_v2_clear_qp(ctx, qp); - - hns_roce_lock_cqs(ibqp); -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index c13d82e3..af72cd70 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -40,6 +40,8 @@ - - #define HNS_ROCE_CMDSN_MASK 0x3 - -+#define HNS_ROCE_SL_SHIFT 2 -+ - /* V2 REG DEFINITION */ - #define ROCEE_VF_DB_CFG0_OFFSET 0x0230 - -@@ -133,6 +135,8 @@ struct hns_roce_db { - #define DB_BYTE_4_CMD_S 24 - #define DB_BYTE_4_CMD_M GENMASK(27, 24) - -+#define DB_BYTE_4_FLAG_S 31 -+ - #define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0 - #define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0) - -@@ -216,8 +220,16 @@ struct hns_roce_rc_sq_wqe { - }; - - #define RC_SQ_WQE_BYTE_4_OPCODE_S 0 --#define RC_SQ_WQE_BYTE_4_OPCODE_M \ -- (((1UL << 5) - 1) << RC_SQ_WQE_BYTE_4_OPCODE_S) -+#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) -+ -+#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5 -+#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) -+ -+#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13 -+#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) -+ -+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15 -+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) - - #define RC_SQ_WQE_BYTE_4_OWNER_S 7 - -@@ -239,6 +251,8 @@ struct hns_roce_rc_sq_wqe { - - #define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22 - -+#define RC_SQ_WQE_BYTE_4_FLAG_S 31 -+ - #define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0 - #define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \ - (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S) -@@ -311,23 +325,12 @@ struct hns_roce_ud_sq_wqe { - #define UD_SQ_WQE_OPCODE_S 0 - #define UD_SQ_WQE_OPCODE_M GENMASK(4, 0) - --#define UD_SQ_WQE_DB_SL_L_S 5 --#define UD_SQ_WQE_DB_SL_L_M GENMASK(6, 5) -- --#define UD_SQ_WQE_DB_SL_H_S 13 --#define UD_SQ_WQE_DB_SL_H_M GENMASK(14, 13) -- --#define UD_SQ_WQE_INDEX_S 15 --#define UD_SQ_WQE_INDEX_M GENMASK(30, 15) -- - #define UD_SQ_WQE_OWNER_S 7 - - #define UD_SQ_WQE_CQE_S 8 - - #define UD_SQ_WQE_SE_S 11 - --#define UD_SQ_WQE_FLAG_S 31 -- - #define UD_SQ_WQE_PD_S 0 - #define UD_SQ_WQE_PD_M GENMASK(23, 0) - -@@ -376,4 +379,6 @@ struct hns_roce_ud_sq_wqe { - - #define MAX_SERVICE_LEVEL 0x7 - -+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); -+ - #endif /* _HNS_ROCE_U_HW_V2_H */ -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 125858d2..fc902815 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -1076,7 +1076,8 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx, - - static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, - struct hns_roce_qp *qp, -- struct hns_roce_context *ctx) -+ struct hns_roce_context *ctx, -+ uint64_t *dwqe_mmap_key) - { - struct hns_roce_create_qp_ex_resp resp_ex = {}; - struct hns_roce_create_qp_ex cmd_ex = {}; -@@ -1093,6 +1094,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, - &resp_ex.ibv_resp, sizeof(resp_ex)); - - qp->flags = resp_ex.drv_payload.cap_flags; -+ *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key; - - return ret; - } -@@ -1144,11 +1146,23 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, - return ret; - } - -+static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, -+ uint64_t dwqe_mmap_key) -+{ -+ qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE, -+ MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key); -+ if (qp->dwqe_page == MAP_FAILED) -+ return -EINVAL; -+ -+ return 0; -+} -+ - static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - struct ibv_qp_init_attr_ex *attr) - { - struct hns_roce_context *context = to_hr_ctx(ibv_ctx); - struct hns_roce_qp *qp; -+ uint64_t dwqe_mmap_key; - int ret; - - ret = verify_qp_create_attr(context, attr); -@@ -1167,7 +1181,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - if (ret) - goto err_buf; - -- ret = qp_exec_create_cmd(attr, qp, context); -+ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key); - if (ret) - goto err_cmd; - -@@ -1175,10 +1189,18 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - if (ret) - goto err_store; - -+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) { -+ ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key); -+ if (ret) -+ goto err_dwqe; -+ } -+ - qp_setup_config(attr, qp, context); - - return &qp->verbs_qp.qp; - -+err_dwqe: -+ hns_roce_v2_clear_qp(context, qp); - err_store: - ibv_cmd_destroy_qp(&qp->verbs_qp.qp); - err_cmd: -diff --git a/util/mmio.h b/util/mmio.h -index 101af9dd..01d1455e 100644 ---- a/util/mmio.h -+++ b/util/mmio.h -@@ -210,8 +210,33 @@ static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) - { - s390_mmio_write(dest, src, bytecnt); - } --#else - -+#elif defined(__aarch64__) || defined(__arm__) -+#include -+ -+static inline void _mmio_memcpy_x64_64b(void *dest, const void *src) -+{ -+ vst4q_u64(dest, vld4q_u64(src)); -+} -+ -+static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) -+{ -+ do { -+ _mmio_memcpy_x64_64b(dest, src); -+ bytecnt -= sizeof(uint64x2x4_t); -+ src += sizeof(uint64x2x4_t); -+ } while (bytecnt > 0); -+} -+ -+#define mmio_memcpy_x64(dest, src, bytecount) \ -+ ({ \ -+ if (__builtin_constant_p((bytecount) == 64)) \ -+ _mmio_memcpy_x64_64b((dest), (src)); \ -+ else \ -+ _mmio_memcpy_x64((dest), (src), (bytecount)); \ -+ }) -+ -+#else - /* Transfer is some multiple of 64 bytes */ - static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) - { --- -2.33.0 - diff --git a/0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch b/0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch deleted file mode 100644 index b19d4c1b7662a37dc60dd3abf78645bbf9374e67..0000000000000000000000000000000000000000 --- a/0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 608c142e7cbac2a6c02071022fe87b081a6ddc4f Mon Sep 17 00:00:00 2001 -From: Yixing Liu -Date: Tue, 21 Dec 2021 21:38:08 +0800 -Subject: [PATCH 8/8] libhns: Use new SQ doorbell register for HIP09 - -HIP09 set a new BAR space for SQ doorbell. Each SQ doorbell has an -independent BAR space and the size is 64KB. SQ doorbell share -the same BAR space with direct WQE. - -Signed-off-by: Yixing Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 1 + - providers/hns/hns_roce_u_hw_v2.c | 4 ++-- - providers/hns/hns_roce_u_verbs.c | 5 +++++ - 3 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 37711363..460363b7 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -238,6 +238,7 @@ struct hns_roce_wq { - unsigned int wqe_shift; - unsigned int shift; /* wq size is 2^shift */ - int offset; -+ void *db_reg; - }; - - /* record the result of sge process */ -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index f102fd61..9cbc0aac 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, - - sq_db.parameter = htole32(qp->sq.head); - roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl); -- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db); -+ hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db); - } - - static void hns_roce_write512(uint64_t *dest, uint64_t *val) -@@ -329,7 +329,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) - roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M, - RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); - -- hns_roce_write512(qp->dwqe_page, wqe); -+ hns_roce_write512(qp->sq.db_reg, wqe); - } - - static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index fc902815..c5022c83 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -1117,6 +1117,11 @@ static void qp_setup_config(struct ibv_qp_init_attr_ex *attr, - } - - qp->max_inline_data = attr->cap.max_inline_data; -+ -+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) -+ qp->sq.db_reg = qp->dwqe_page; -+ else -+ qp->sq.db_reg = ctx->uar + ROCEE_VF_DB_CFG0_OFFSET; - } - - void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx) --- -2.33.0 - diff --git a/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch b/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch deleted file mode 100644 index f68570161a76d4254ad420909beaa5c581689964..0000000000000000000000000000000000000000 --- a/0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 5cc1a047c4d71ced86b0f71f66adf12475a3c788 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:35 +0800 -Subject: libhns: Bugfix for checking whether the SRQ is full when posting WR - -If the user post a list of WRs, the head in the for loop is not updated in -time, and the judgment of if (head == tail) becomes invalid. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 17 +++++++++++++---- - providers/hns/hns_roce_u_verbs.c | 2 +- - 2 files changed, 14 insertions(+), 5 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 82124082..0c15bdbe 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1527,6 +1527,15 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - return ret; - } - -+static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) -+{ -+ struct hns_roce_idx_que *idx_que = &srq->idx_que; -+ unsigned int cur; -+ -+ cur = idx_que->head - idx_que->tail; -+ return cur >= srq->wqe_cnt - 1; -+} -+ - static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; -@@ -1577,14 +1586,14 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - - max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { -- if (wr->num_sge > max_sge) { -- ret = -EINVAL; -+ if (hns_roce_v2_srqwq_overflow(srq)) { -+ ret = -ENOMEM; - *bad_wr = wr; - break; - } - -- if (srq->idx_que.head == srq->idx_que.tail) { -- ret = -ENOMEM; -+ if (wr->num_sge > max_sge) { -+ ret = -EINVAL; - *bad_wr = wr; - break; - } -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 3abf7b48..dace35fd 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -492,7 +492,7 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - idx_que->bitmap[i] = ~(0UL); - - idx_que->head = 0; -- idx_que->tail = srq->wqe_cnt - 1; -+ idx_que->tail = 0; - - return 0; - } --- -2.30.0 - diff --git a/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch b/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch deleted file mode 100644 index 205419a31bcf5e0423f96d0f3e4f32ebdef1a880..0000000000000000000000000000000000000000 --- a/0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch +++ /dev/null @@ -1,30 +0,0 @@ -From a79800afbbc48e5c5274bf3fc0e890705b3a596d Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:36 +0800 -Subject: libhns: Allow users to create a 0-depth SRQs - -Users is allowed to create 0-depth SRQs, so the judgement about whether -max_wr is zero should be removed. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index dace35fd..2d1a6de3 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -525,7 +525,7 @@ static int hns_roce_verify_srq(struct hns_roce_context *context, - init_attr->srq_type != IBV_SRQT_XRC) - return -EINVAL; - -- if (!init_attr->attr.max_wr || !init_attr->attr.max_sge || -+ if (!init_attr->attr.max_sge || - init_attr->attr.max_wr > context->max_srq_wr || - init_attr->attr.max_sge > context->max_srq_sge) - return -EINVAL; --- -2.30.0 - diff --git a/0011-libhns-Refactor-the-process-of-post_srq_recv.patch b/0011-libhns-Refactor-the-process-of-post_srq_recv.patch deleted file mode 100644 index 693fc83c6a6e89b0da1833723d468213adccd2bf..0000000000000000000000000000000000000000 --- a/0011-libhns-Refactor-the-process-of-post_srq_recv.patch +++ /dev/null @@ -1,176 +0,0 @@ -From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:37 +0800 -Subject: libhns: Refactor the process of post_srq_recv - -SRQ is a shared queue, it mainly consists of four parts: - -1. wqe buf: wqe buf is used to store wqe data. - -2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so -the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO -queue, it can instruct the hardware to obtain the corresponding WQE. - -3.bitmap: bitmap is used to generate and release wqe_idx. When the user -has a new WR, the driver finds the idx of the idle wqe in bitmap. When the -CQE of wqe is generated, the driver releases the idx. - -4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it -to the user when ibv_poll_cq() is invoked. - -After refactor, the functions of the four parts are more clearer. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++------------- - 1 file changed, 57 insertions(+), 38 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 0c15bdbe..b622eaef 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) - return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); - } - --static void *get_srq_wqe(struct hns_roce_srq *srq, int n) -+static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) - { - return srq->buf.buf + (n << srq->wqe_shift); - } -@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) - return cur >= srq->wqe_cnt - 1; - } - --static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) -+static int check_post_srq_valid(struct hns_roce_srq *srq, -+ struct ibv_recv_wr *wr) -+{ -+ unsigned int max_sge = srq->max_gs - srq->rsv_sge; -+ -+ if (hns_roce_v2_srqwq_overflow(srq)) -+ return -ENOMEM; -+ -+ if (wr->num_sge > max_sge) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; - int bit_num; -@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx) - return 0; - } - -+static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx, -+ struct ibv_recv_wr *wr) -+{ -+ struct hns_roce_v2_wqe_data_seg *dseg; -+ int i; -+ -+ dseg = get_srq_wqe(srq, wqe_idx); -+ -+ for (i = 0; i < wr->num_sge; ++i) { -+ dseg[i].len = htole32(wr->sg_list[i].length); -+ dseg[i].lkey = htole32(wr->sg_list[i].lkey); -+ dseg[i].addr = htole64(wr->sg_list[i].addr); -+ } -+ -+ /* hw stop reading when identify the last one */ -+ if (srq->rsv_sge) { -+ dseg[i].len = htole32(INVALID_SGE_LENGTH); -+ dseg[i].lkey = htole32(0x0); -+ dseg[i].addr = 0; -+ } -+} -+ -+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) -+{ -+ struct hns_roce_idx_que *idx_que = &srq->idx_que; -+ unsigned int head; -+ __le32 *idx_buf; -+ -+ head = idx_que->head & (srq->wqe_cnt - 1); -+ -+ idx_buf = get_idx_buf(idx_que, head); -+ *idx_buf = htole32(wqe_idx); -+ -+ idx_que->head++; -+} -+ - static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - struct ibv_recv_wr *wr, - struct ibv_recv_wr **bad_wr) - { - struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context); - struct hns_roce_srq *srq = to_hr_srq(ib_srq); -- struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_db srq_db; -- unsigned int max_sge; -- __le32 *srq_idx; -+ unsigned int wqe_idx; - int ret = 0; -- int wqe_idx; -- void *wqe; - int nreq; -- int ind; -- int i; - - pthread_spin_lock(&srq->lock); - -- /* current idx of srqwq */ -- ind = srq->idx_que.head & (srq->wqe_cnt - 1); -- -- max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { -- if (hns_roce_v2_srqwq_overflow(srq)) { -- ret = -ENOMEM; -- *bad_wr = wr; -- break; -- } -- -- if (wr->num_sge > max_sge) { -- ret = -EINVAL; -+ ret = check_post_srq_valid(srq, wr); -+ if (ret) { - *bad_wr = wr; - break; - } -@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - break; - } - -- wqe = get_srq_wqe(srq, wqe_idx); -- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; -- -- for (i = 0; i < wr->num_sge; ++i) { -- set_data_seg_v2(dseg, wr->sg_list + i); -- dseg++; -- } -- -- /* hw stop reading when identify the last one */ -- if (srq->rsv_sge) -- set_ending_data_seg(dseg); -- -- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind); -- *srq_idx = htole32(wqe_idx); -+ fill_srq_wqe(srq, wqe_idx, wr); -+ fill_wqe_idx(srq, wqe_idx); - - srq->wrid[wqe_idx] = wr->wr_id; -- ind = (ind + 1) & (srq->wqe_cnt - 1); - } - - if (nreq) { -- srq->idx_que.head += nreq; -- - /* - * Make sure that descriptors are written before - * we write doorbell record. --- -2.30.0 - diff --git a/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch b/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch deleted file mode 100644 index b8569f2d05dbba8d78d918fbfcc8473c4eb25e27..0000000000000000000000000000000000000000 --- a/0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch +++ /dev/null @@ -1,33 +0,0 @@ -From a18b0ee409d3382aa556b8f06a6cd6bfbef3f5c8 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:38 +0800 -Subject: libhns: Set srqlimit to 0 when creating SRQ - -According to the IB specification, the srq_limt parameter should not be -configured when creating srq. But the libhns does not set attr.srq_limit -to 0 currently. As a result, when attr.srq_limit provided by the user is -not 0, the value of attr.srq_limit returned to the user will be different -from that obtained by ibv_query_srq(). Therefore, the driver should set -attr.srq_limit to 0 when creating SRQ. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 2d1a6de3..107da753 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -580,6 +580,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context, - srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1); - srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); - attr->max_sge = srq->max_gs; -+ attr->srq_limit = 0; - - ret = hns_roce_create_idx_que(srq); - if (ret) --- -2.30.0 - diff --git a/0013-libhns-Refactor-the-process-of-create_srq.patch b/0013-libhns-Refactor-the-process-of-create_srq.patch deleted file mode 100644 index a2a08551149c1019c9cad344a605067dab7d299d..0000000000000000000000000000000000000000 --- a/0013-libhns-Refactor-the-process-of-create_srq.patch +++ /dev/null @@ -1,367 +0,0 @@ -From b914c76318f5b95e3157c3cbf1ccb49ec6d27635 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:39 +0800 -Subject: libhns: Refactor the process of create_srq - -Reorganize create_srq() as several sub-functions to make the process -clearer. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u.h | 7 +- - providers/hns/hns_roce_u_hw_v2.c | 2 +- - providers/hns/hns_roce_u_verbs.c | 178 ++++++++++++++++++------------- - 3 files changed, 105 insertions(+), 82 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index b3f48113..a437727c 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -211,7 +211,8 @@ struct hns_roce_idx_que { - - struct hns_roce_srq { - struct verbs_srq verbs_srq; -- struct hns_roce_buf buf; -+ struct hns_roce_idx_que idx_que; -+ struct hns_roce_buf wqe_buf; - pthread_spinlock_t lock; - unsigned long *wrid; - unsigned int srqn; -@@ -221,7 +222,6 @@ struct hns_roce_srq { - unsigned int wqe_shift; - unsigned int *db; - unsigned short counter; -- struct hns_roce_idx_que idx_que; - }; - - struct hns_roce_wq { -@@ -343,8 +343,7 @@ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) - - static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq) - { -- return container_of(container_of(ibv_srq, struct verbs_srq, srq), -- struct hns_roce_srq, verbs_srq); -+ return container_of(ibv_srq, struct hns_roce_srq, verbs_srq.srq); - } - - static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp) -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index b622eaef..d4e7e4f9 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -244,7 +244,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) - - static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) - { -- return srq->buf.buf + (n << srq->wqe_shift); -+ return srq->wqe_buf.buf + (n << srq->wqe_shift); - } - - static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 107da753..75b9e530 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -432,17 +432,23 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx, - uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >> - ctx->srq_table_shift; - -+ pthread_mutex_lock(&ctx->srq_table_mutex); -+ - if (!ctx->srq_table[tind].refcnt) { - ctx->srq_table[tind].table = - calloc(ctx->srq_table_mask + 1, - sizeof(struct hns_roce_srq *)); -- if (!ctx->srq_table[tind].table) -+ if (!ctx->srq_table[tind].table) { -+ pthread_mutex_unlock(&ctx->srq_table_mutex); - return -ENOMEM; -+ } - } - - ++ctx->srq_table[tind].refcnt; - ctx->srq_table[tind].table[srq->srqn & ctx->srq_table_mask] = srq; - -+ pthread_mutex_unlock(&ctx->srq_table_mutex); -+ - return 0; - } - -@@ -461,13 +467,46 @@ static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn) - { - uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift; - -+ pthread_mutex_lock(&ctx->srq_table_mutex); -+ - if (!--ctx->srq_table[tind].refcnt) - free(ctx->srq_table[tind].table); - else - ctx->srq_table[tind].table[srqn & ctx->srq_table_mask] = NULL; -+ -+ pthread_mutex_unlock(&ctx->srq_table_mutex); -+} -+ -+static int verify_srq_create_attr(struct hns_roce_context *context, -+ struct ibv_srq_init_attr_ex *attr) -+{ -+ if (attr->srq_type != IBV_SRQT_BASIC && -+ attr->srq_type != IBV_SRQT_XRC) -+ return -EINVAL; -+ -+ if (!attr->attr.max_sge || -+ attr->attr.max_wr > context->max_srq_wr || -+ attr->attr.max_sge > context->max_srq_sge) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq, -+ struct ibv_srq_init_attr_ex *attr) -+{ -+ if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) -+ srq->rsv_sge = 1; -+ -+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1); -+ srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge); -+ srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * -+ srq->max_gs)); -+ attr->attr.max_sge = srq->max_gs; -+ attr->attr.srq_limit = 0; - } - --static int hns_roce_create_idx_que(struct hns_roce_srq *srq) -+static int alloc_srq_idx_que(struct hns_roce_srq *srq) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; - unsigned int buf_size; -@@ -478,13 +517,13 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - BIT_CNT_PER_LONG; - idx_que->bitmap = calloc(idx_que->bitmap_cnt, sizeof(unsigned long)); - if (!idx_que->bitmap) -- return ENOMEM; -+ return -ENOMEM; - - buf_size = to_hr_hem_entries_size(srq->wqe_cnt, idx_que->entry_shift); - if (hns_roce_alloc_buf(&idx_que->buf, buf_size, HNS_HW_PAGE_SIZE)) { - free(idx_que->bitmap); - idx_que->bitmap = NULL; -- return ENOMEM; -+ return -ENOMEM; - } - - /* init the idx_que bitmap */ -@@ -497,40 +536,48 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq) - return 0; - } - --static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq) -+static int alloc_srq_wqe_buf(struct hns_roce_srq *srq) - { -- int srq_buf_size; -+ int buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); - -- srq->wrid = calloc(srq->wqe_cnt, sizeof(unsigned long)); -- if (!srq->wrid) -- return ENOMEM; -+ return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE); -+} - -- srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * -- srq->max_gs)); -- srq_buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); -+static int alloc_srq_buf(struct hns_roce_srq *srq) -+{ -+ int ret; - -- /* allocate srq wqe buf */ -- if (hns_roce_alloc_buf(&srq->buf, srq_buf_size, HNS_HW_PAGE_SIZE)) { -- free(srq->wrid); -- return ENOMEM; -+ ret = alloc_srq_idx_que(srq); -+ if (ret) -+ return ret; -+ -+ ret = alloc_srq_wqe_buf(srq); -+ if (ret) -+ goto err_idx_que; -+ -+ srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid)); -+ if (!srq->wrid) { -+ ret = -ENOMEM; -+ goto err_wqe_buf; - } - - return 0; --} - --static int hns_roce_verify_srq(struct hns_roce_context *context, -- struct ibv_srq_init_attr_ex *init_attr) --{ -- if (init_attr->srq_type != IBV_SRQT_BASIC && -- init_attr->srq_type != IBV_SRQT_XRC) -- return -EINVAL; -+err_wqe_buf: -+ hns_roce_free_buf(&srq->wqe_buf); -+err_idx_que: -+ hns_roce_free_buf(&srq->idx_que.buf); -+ free(srq->idx_que.bitmap); - -- if (!init_attr->attr.max_sge || -- init_attr->attr.max_wr > context->max_srq_wr || -- init_attr->attr.max_sge > context->max_srq_sge) -- return -EINVAL; -+ return ret; -+} - -- return 0; -+static void free_srq_buf(struct hns_roce_srq *srq) -+{ -+ free(srq->wrid); -+ hns_roce_free_buf(&srq->wqe_buf); -+ hns_roce_free_buf(&srq->idx_que.buf); -+ free(srq->idx_que.bitmap); - } - - static int exec_srq_create_cmd(struct ibv_context *context, -@@ -541,7 +588,7 @@ static int exec_srq_create_cmd(struct ibv_context *context, - struct hns_roce_create_srq_ex cmd_ex = {}; - int ret; - -- cmd_ex.buf_addr = (uintptr_t)srq->buf.buf; -+ cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf; - cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf; - cmd_ex.db_addr = (uintptr_t)srq->db; - -@@ -559,57 +606,44 @@ static int exec_srq_create_cmd(struct ibv_context *context, - static struct ibv_srq *create_srq(struct ibv_context *context, - struct ibv_srq_init_attr_ex *init_attr) - { -- struct hns_roce_context *ctx = to_hr_ctx(context); -- struct ibv_srq_attr *attr = &init_attr->attr; -+ struct hns_roce_context *hr_ctx = to_hr_ctx(context); - struct hns_roce_srq *srq; - int ret; - -- if (hns_roce_verify_srq(ctx, init_attr)) -- return NULL; -+ ret = verify_srq_create_attr(hr_ctx, init_attr); -+ if (ret) -+ goto err; - - srq = calloc(1, sizeof(*srq)); -- if (!srq) -- return NULL; -+ if (!srq) { -+ ret = -ENOMEM; -+ goto err; -+ } - - if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) - goto err_free_srq; - -- if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) -- srq->rsv_sge = 1; -- -- srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1); -- srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); -- attr->max_sge = srq->max_gs; -- attr->srq_limit = 0; -- -- ret = hns_roce_create_idx_que(srq); -- if (ret) -+ set_srq_param(context, srq, init_attr); -+ if (alloc_srq_buf(srq)) - goto err_free_srq; - -- ret = hns_roce_alloc_srq_buf(srq); -- if (ret) -- goto err_idx_que; -- -- srq->db = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB); -+ srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB); - if (!srq->db) - goto err_srq_buf; - -- *(srq->db) = 0; -- -- pthread_mutex_lock(&ctx->srq_table_mutex); -+ *srq->db = 0; - - ret = exec_srq_create_cmd(context, srq, init_attr); - if (ret) - goto err_srq_db; - -- ret = hns_roce_store_srq(ctx, srq); -+ ret = hns_roce_store_srq(hr_ctx, srq); - if (ret) - goto err_destroy_srq; - -- pthread_mutex_unlock(&ctx->srq_table_mutex); -- -- srq->max_gs = attr->max_sge; -- attr->max_sge = min(attr->max_sge - srq->rsv_sge, ctx->max_srq_sge); -+ srq->max_gs = init_attr->attr.max_sge; -+ init_attr->attr.max_sge = -+ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge); - - return &srq->verbs_srq.srq; - -@@ -617,20 +651,19 @@ err_destroy_srq: - ibv_cmd_destroy_srq(&srq->verbs_srq.srq); - - err_srq_db: -- pthread_mutex_unlock(&ctx->srq_table_mutex); -- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB); -+ hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB); - - err_srq_buf: -- free(srq->wrid); -- hns_roce_free_buf(&srq->buf); -- --err_idx_que: -- free(srq->idx_que.bitmap); -- hns_roce_free_buf(&srq->idx_que.buf); -+ free_srq_buf(srq); - - err_free_srq: - free(srq); - -+err: -+ if (ret < 0) -+ ret = -ret; -+ -+ errno = ret; - return NULL; - } - -@@ -690,23 +723,14 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) - struct hns_roce_srq *srq = to_hr_srq(ibv_srq); - int ret; - -- pthread_mutex_lock(&ctx->srq_table_mutex); -- - ret = ibv_cmd_destroy_srq(ibv_srq); -- if (ret) { -- pthread_mutex_unlock(&ctx->srq_table_mutex); -+ if (ret) - return ret; -- } - - hns_roce_clear_srq(ctx, srq->srqn); - -- pthread_mutex_unlock(&ctx->srq_table_mutex); -- - hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB); -- hns_roce_free_buf(&srq->buf); -- free(srq->wrid); -- hns_roce_free_buf(&srq->idx_que.buf); -- free(srq->idx_que.bitmap); -+ free_srq_buf(srq); - free(srq); - - return 0; --- -2.30.0 - diff --git a/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch b/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch deleted file mode 100644 index 31dbe9f58e3dc7cc13440fb781b683fae674b3c9..0000000000000000000000000000000000000000 --- a/0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch +++ /dev/null @@ -1,69 +0,0 @@ -From d68ac72a8e4f2cf9754d3fcbbb8ff2a03e514c2f Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 11 May 2021 19:06:40 +0800 -Subject: libhns: Remove the reserved wqe of SRQ - -There is an unreasonable reserved WQE in SRQ, it should be removed. - -Signed-off-by: Wenpeng Liang -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u.h | 1 + - providers/hns/hns_roce_u_hw_v2.c | 4 +--- - providers/hns/hns_roce_u_verbs.c | 5 ++++- - 3 files changed, 6 insertions(+), 4 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index a437727c..0d7abd81 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -64,6 +64,7 @@ - #define HNS_ROCE_MIN_CQE_NUM 0x40 - #define HNS_ROCE_V1_MIN_WQE_NUM 0x20 - #define HNS_ROCE_V2_MIN_WQE_NUM 0x40 -+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 - - #define HNS_ROCE_CQE_SIZE 0x20 - #define HNS_ROCE_V3_CQE_SIZE 0x40 -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index d4e7e4f9..2fb6cdaf 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1530,10 +1530,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) - static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; -- unsigned int cur; - -- cur = idx_que->head - idx_que->tail; -- return cur >= srq->wqe_cnt - 1; -+ return idx_que->head - idx_que->tail >= srq->wqe_cnt; - } - - static int check_post_srq_valid(struct hns_roce_srq *srq, -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 75b9e530..4847639b 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -489,6 +489,9 @@ static int verify_srq_create_attr(struct hns_roce_context *context, - attr->attr.max_sge > context->max_srq_sge) - return -EINVAL; - -+ attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr, -+ HNS_ROCE_MIN_SRQ_WQE_NUM); -+ - return 0; - } - -@@ -498,7 +501,7 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq, - if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2) - srq->rsv_sge = 1; - -- srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1); -+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr); - srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge); - srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE * - srq->max_gs)); --- -2.30.0 - diff --git a/0015-libhns-Refactor-process-of-setting-extended-sge.patch b/0015-libhns-Refactor-process-of-setting-extended-sge.patch deleted file mode 100644 index 3555177f7d5be107ed0fbb016b841d0e1557bc26..0000000000000000000000000000000000000000 --- a/0015-libhns-Refactor-process-of-setting-extended-sge.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 11c81d0e3a987f95b74e03b5e592a45029302f1d Mon Sep 17 00:00:00 2001 -From: Weihang Li -Date: Fri, 14 May 2021 10:02:56 +0800 -Subject: libhns: Refactor process of setting extended sge - -Refactor and encapsulate the parts of getting number of extended sge a WQE -can use to make it easier to understand. - -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++------------ - 1 file changed, 29 insertions(+), 16 deletions(-) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 30ab072a..a8508fc5 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -920,31 +920,44 @@ err_alloc: - return -ENOMEM; - } - --static void set_extend_sge_param(struct hns_roce_device *hr_dev, -- struct ibv_qp_init_attr_ex *attr, -- struct hns_roce_qp *qp, unsigned int wr_cnt) -+static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) - { -- int cnt = 0; -+ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD) -+ return qp->sq.max_gs; -+ -+ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) -+ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; -+ -+ return 0; -+} -+ -+static void set_ext_sge_param(struct hns_roce_device *hr_dev, -+ struct ibv_qp_init_attr_ex *attr, -+ struct hns_roce_qp *qp, unsigned int wr_cnt) -+{ -+ unsigned int total_sge_cnt; -+ unsigned int wqe_sge_cnt; -+ -+ qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; - - if (hr_dev->hw_version == HNS_ROCE_HW_VER1) { - qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; -- } else { -- qp->sq.max_gs = attr->cap.max_send_sge; -- if (attr->qp_type == IBV_QPT_UD) -- cnt = roundup_pow_of_two(wr_cnt * qp->sq.max_gs); -- else if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) -- cnt = roundup_pow_of_two(wr_cnt * -- (qp->sq.max_gs - -- HNS_ROCE_SGE_IN_WQE)); -+ return; - } - -- qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; -+ qp->sq.max_gs = attr->cap.max_send_sge; -+ -+ wqe_sge_cnt = get_wqe_ext_sge_cnt(qp); - - /* If the number of extended sge is not zero, they MUST use the - * space of HNS_HW_PAGE_SIZE at least. - */ -- qp->ex_sge.sge_cnt = cnt ? -- max(cnt, HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0; -+ if (wqe_sge_cnt) { -+ total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt); -+ qp->ex_sge.sge_cnt = -+ max(total_sge_cnt, -+ (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); -+ } - } - - static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, -@@ -988,7 +1001,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, - qp->sq.wqe_cnt = cnt; - qp->sq.shift = hr_ilog32(cnt); - -- set_extend_sge_param(hr_dev, attr, qp, cnt); -+ set_ext_sge_param(hr_dev, attr, qp, cnt); - - qp->sq.max_post = min(ctx->max_qp_wr, cnt); - qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs); --- -2.30.0 - diff --git a/0016-libhns-Optimize-set_sge-process.patch b/0016-libhns-Optimize-set_sge-process.patch deleted file mode 100644 index 7754801a38d74427e2fee563dcc75f61571c2612..0000000000000000000000000000000000000000 --- a/0016-libhns-Optimize-set_sge-process.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Fri, 14 May 2021 10:02:57 +0800 -Subject: libhns: Optimize set_sge process - -Use local variables to avoid frequent ldr/str operations. And because UD's -process of setting sge is more simple then RC, set_sge() can be splited -into two functions for compiler optimization. - -Signed-off-by: Lang Cheng -Signed-off-by: Weihang Li ---- - providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++--------- - 1 file changed, 61 insertions(+), 22 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 4988943a..dc79a6f8 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx) - return 0; - } - --static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg, -- struct hns_roce_qp *qp, struct ibv_send_wr *wr, -- struct hns_roce_sge_info *sge_info) -+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg, -+ struct hns_roce_qp *qp, struct ibv_send_wr *wr, -+ struct hns_roce_sge_info *sge_info) - { -+ uint32_t mask = qp->ex_sge.sge_cnt - 1; -+ uint32_t index = sge_info->start_idx; -+ struct ibv_sge *sge = wr->sg_list; -+ uint32_t len = 0; -+ uint32_t cnt = 0; -+ int flag; - int i; - -- sge_info->valid_num = 0; -- sge_info->total_len = 0; -+ flag = (wr->send_flags & IBV_SEND_INLINE && -+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD && -+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP); - -- for (i = 0; i < wr->num_sge; i++) { -- if (unlikely(!wr->sg_list[i].length)) -+ for (i = 0; i < wr->num_sge; i++, sge++) { -+ if (unlikely(!sge->length)) - continue; - -- sge_info->total_len += wr->sg_list[i].length; -- sge_info->valid_num++; -+ len += sge->length; -+ cnt++; - -- if (wr->send_flags & IBV_SEND_INLINE && -- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD && -- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP) -+ if (flag) - continue; - -- /* No inner sge in UD wqe */ -- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE && -- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) { -- set_data_seg_v2(dseg, wr->sg_list + i); -+ if (cnt <= HNS_ROCE_SGE_IN_WQE) { -+ set_data_seg_v2(dseg, sge); - dseg++; - } else { -- dseg = get_send_sge_ex(qp, sge_info->start_idx & -- (qp->ex_sge.sge_cnt - 1)); -- set_data_seg_v2(dseg, wr->sg_list + i); -- sge_info->start_idx++; -+ dseg = get_send_sge_ex(qp, index & mask); -+ set_data_seg_v2(dseg, sge); -+ index++; - } - } -+ -+ sge_info->start_idx = index; -+ sge_info->valid_num = cnt; -+ sge_info->total_len = len; -+} -+ -+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg, -+ struct hns_roce_qp *qp, struct ibv_send_wr *wr, -+ struct hns_roce_sge_info *sge_info) -+{ -+ int flag = wr->send_flags & IBV_SEND_INLINE; -+ uint32_t mask = qp->ex_sge.sge_cnt - 1; -+ uint32_t index = sge_info->start_idx; -+ struct ibv_sge *sge = wr->sg_list; -+ uint32_t len = 0; -+ uint32_t cnt = 0; -+ int i; -+ -+ for (i = 0; i < wr->num_sge; i++, sge++) { -+ if (unlikely(!sge->length)) -+ continue; -+ -+ len += sge->length; -+ cnt++; -+ -+ if (flag) -+ continue; -+ -+ /* No inner sge in UD wqe */ -+ dseg = get_send_sge_ex(qp, index & mask); -+ set_data_seg_v2(dseg, sge); -+ index++; -+ } -+ -+ sge_info->start_idx = index; -+ sge_info->valid_num = cnt; -+ sge_info->total_len = len; - } - - static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, -@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, - UD_SQ_WQE_MSG_START_SGE_IDX_S, - sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); - -- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); -+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); - - ud_sq_wqe->msg_len = htole32(sge_info->total_len); - -@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - wqe += sizeof(struct hns_roce_rc_sq_wqe); - dseg = wqe; - -- set_sge(dseg, qp, wr, sge_info); -+ set_rc_sge(dseg, qp, wr, sge_info); - - rc_sq_wqe->msg_len = htole32(sge_info->total_len); - --- -2.30.0 - diff --git a/0017-verbs-Add-generic-logging-API.patch b/0017-verbs-Add-generic-logging-API.patch deleted file mode 100644 index 6734c8fcd24fd00cb7fdada3b79b75f35fcef08b..0000000000000000000000000000000000000000 --- a/0017-verbs-Add-generic-logging-API.patch +++ /dev/null @@ -1,258 +0,0 @@ -From 1ea1524950b8bc4e4dfe06865e1e5c47a657b6e4 Mon Sep 17 00:00:00 2001 -From: Gal Pressman -Date: Sun, 6 Jun 2021 14:48:07 +0300 -Subject: verbs: Add generic logging API - -A debug prints mechanism is useful when debugging application failures. -This patch adds a generic API that can be used by all providers and -replace provider-specific counterparts. - -The debug messages are controlled through an environment variable named -VERBS_LOG_LEVEL, where the value indicates which prints should be -enabled: - -enum { - VERBS_LOG_LEVEL_NONE, - VERBS_LOG_ERR, - VERBS_LOG_WARN, - VERBS_LOG_INFO, - VERBS_LOG_DEBUG, -}; - -For example, to enable prints with level warn or higher, VERBS_LOG_LEVEL -shall be set to 2. - -The output shall be written to the file provided in the VERBS_LOG_FILE -environment variable. When the library is compiled in debug mode and no -file is provided the output shall be written to stderr. - -For data-path flows, where the overhead of the additional if statement -matters, the verbs_*_datapath() macros can be used, which will be -compiled out when the library is compiled for release. - -Signed-off-by: Gal Pressman ---- - Documentation/libibverbs.md | 18 ++++++++++ - buildlib/RDMA_BuildType.cmake | 4 +++ - libibverbs/driver.h | 50 +++++++++++++++++++++++++++ - libibverbs/init.c | 65 +++++++++++++++++++++++++++++++++++ - libibverbs/libibverbs.map.in | 1 + - 5 files changed, 138 insertions(+) - -diff --git a/Documentation/libibverbs.md b/Documentation/libibverbs.md -index cbe076e..980f354 100644 ---- a/Documentation/libibverbs.md -+++ b/Documentation/libibverbs.md -@@ -56,3 +56,21 @@ need to increase this limit. This is usually done for ordinary users - via the file /etc/security/limits.conf. More configuration may be - necessary if you are logging in via OpenSSH and your sshd is - configured to use privilege separation. -+ -+# Debugging -+ -+### Enabling debug prints -+ -+Library and providers debug prints can be enabled using the `VERBS_LOG_LEVEL` -+environment variable, the output shall be written to the file provided in the -+`VERBS_LOG_FILE` environment variable. When the library is compiled in debug -+mode and no file is provided the output will be written to stderr. -+ -+Note: some of the debug prints are only available when the library is compiled -+in debug mode. -+ -+The following table describes the expected behavior when VERBS_LOG_LEVEL is set: -+| | Release | Debug | -+|-----------------|---------------------------------|------------------------------------------------| -+| Regular prints | Output to VERBS_LOG_FILE if set | Output to VERBS_LOG_FILE, or stderr if not set | -+| Datapath prints | Compiled out, no output | Output to VERBS_LOG_FILE, or stderr if not set | -diff --git a/buildlib/RDMA_BuildType.cmake b/buildlib/RDMA_BuildType.cmake -index 17206f5..7a4f6a4 100644 ---- a/buildlib/RDMA_BuildType.cmake -+++ b/buildlib/RDMA_BuildType.cmake -@@ -39,4 +39,8 @@ function(RDMA_BuildType) - CACHE STRING "Default flags for RelWithDebInfo configuration" FORCE) - endif() - endforeach() -+ -+ if (CMAKE_BUILD_TYPE STREQUAL Debug OR CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo) -+ add_definitions("-DVERBS_DEBUG") -+ endif() - endfunction() -diff --git a/libibverbs/driver.h b/libibverbs/driver.h -index 926023b..bdb1aa4 100644 ---- a/libibverbs/driver.h -+++ b/libibverbs/driver.h -@@ -49,6 +49,56 @@ - - struct verbs_device; - -+enum { -+ VERBS_LOG_LEVEL_NONE, -+ VERBS_LOG_ERR, -+ VERBS_LOG_WARN, -+ VERBS_LOG_INFO, -+ VERBS_LOG_DEBUG, -+}; -+ -+void __verbs_log(struct verbs_context *ctx, uint32_t level, -+ const char *fmt, ...); -+ -+#define verbs_log(ctx, level, format, arg...) \ -+do { \ -+ int tmp = errno; \ -+ __verbs_log(ctx, level, "%s: %s:%d: " format, \ -+ (ctx)->context.device->name, __func__, __LINE__, ##arg); \ -+ errno = tmp; \ -+} while (0) -+ -+#define verbs_debug(ctx, format, arg...) \ -+ verbs_log(ctx, VERBS_LOG_DEBUG, format, ##arg) -+ -+#define verbs_info(ctx, format, arg...) \ -+ verbs_log(ctx, VERBS_LOG_INFO, format, ##arg) -+ -+#define verbs_warn(ctx, format, arg...) \ -+ verbs_log(ctx, VERBS_LOG_WARN, format, ##arg) -+ -+#define verbs_err(ctx, format, arg...) \ -+ verbs_log(ctx, VERBS_LOG_ERR, format, ##arg) -+ -+#ifdef VERBS_DEBUG -+#define verbs_log_datapath(ctx, level, format, arg...) \ -+ verbs_log(ctx, level, format, ##arg) -+#else -+#define verbs_log_datapath(ctx, level, format, arg...) {} -+#endif -+ -+#define verbs_debug_datapath(ctx, format, arg...) \ -+ verbs_log_datapath(ctx, VERBS_LOG_DEBUG, format, ##arg) -+ -+#define verbs_info_datapath(ctx, format, arg...) \ -+ verbs_log_datapath(ctx, VERBS_LOG_INFO, format, ##arg) -+ -+#define verbs_warn_datapath(ctx, format, arg...) \ -+ verbs_log_datapath(ctx, VERBS_LOG_WARN, format, ##arg) -+ -+#define verbs_err_datapath(ctx, format, arg...) \ -+ verbs_log_datapath(ctx, VERBS_LOG_ERR, format, ##arg) -+ - enum verbs_xrcd_mask { - VERBS_XRCD_HANDLE = 1 << 0, - VERBS_XRCD_RESERVED = 1 << 1 -diff --git a/libibverbs/init.c b/libibverbs/init.c -index f5340ea..52b166a 100644 ---- a/libibverbs/init.c -+++ b/libibverbs/init.c -@@ -36,6 +36,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -52,11 +53,30 @@ - #include - - #include -+#include "driver.h" - #include "ibverbs.h" - #include - - int abi_ver; - -+static uint32_t verbs_log_level; -+static FILE *verbs_log_fp; -+ -+__attribute__((format(printf, 3, 4))) -+void __verbs_log(struct verbs_context *ctx, uint32_t level, -+ const char *fmt, ...) -+{ -+ va_list args; -+ -+ if (level <= verbs_log_level) { -+ int tmp = errno; -+ va_start(args, fmt); -+ vfprintf(verbs_log_fp, fmt, args); -+ va_end(args); -+ errno = tmp; -+ } -+} -+ - struct ibv_driver { - struct list_node entry; - const struct verbs_device_ops *ops; -@@ -600,6 +620,49 @@ out: - return num_devices; - } - -+static void verbs_set_log_level(void) -+{ -+ char *env; -+ -+ env = getenv("VERBS_LOG_LEVEL"); -+ if (env) -+ verbs_log_level = strtol(env, NULL, 0); -+} -+ -+/* -+ * Fallback in case log file is not provided or can't be opened. -+ * Release mode: disable debug prints. -+ * Debug mode: Use stderr instead of a file. -+ */ -+static void verbs_log_file_fallback(void) -+{ -+#ifdef VERBS_DEBUG -+ verbs_log_fp = stderr; -+#else -+ verbs_log_level = VERBS_LOG_LEVEL_NONE; -+#endif -+} -+ -+static void verbs_set_log_file(void) -+{ -+ char *env; -+ -+ if (verbs_log_level == VERBS_LOG_LEVEL_NONE) -+ return; -+ -+ env = getenv("VERBS_LOG_FILE"); -+ if (!env) { -+ verbs_log_file_fallback(); -+ return; -+ } -+ -+ verbs_log_fp = fopen(env, "aw+"); -+ if (!verbs_log_fp) { -+ verbs_log_file_fallback(); -+ return; -+ } -+} -+ - int ibverbs_init(void) - { - char *env_value; -@@ -621,6 +684,8 @@ int ibverbs_init(void) - return -errno; - - check_memlock_limit(); -+ verbs_set_log_level(); -+ verbs_set_log_file(); - - return 0; - } -diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in -index 7c0fb6a..905f58f 100644 ---- a/libibverbs/libibverbs.map.in -+++ b/libibverbs/libibverbs.map.in -@@ -167,6 +167,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ { - global: - /* These historical symbols are now private to libibverbs */ - __ioctl_final_num_attrs; -+ __verbs_log; - _verbs_init_and_alloc_context; - execute_ioctl; - ibv_cmd_advise_mr; --- -2.27.0 - diff --git a/0018-libhns-Use-the-verbs-logging-API-instead-of-printf-f.patch b/0018-libhns-Use-the-verbs-logging-API-instead-of-printf-f.patch deleted file mode 100644 index e53fb41fa6bad59c74f01d11f5bdc0017e2818f4..0000000000000000000000000000000000000000 --- a/0018-libhns-Use-the-verbs-logging-API-instead-of-printf-f.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 7c9a7a5848d19b792d1b108da55fa48611142a9b Mon Sep 17 00:00:00 2001 -From: Gal Pressman -Date: Tue, 29 Jun 2021 10:43:29 +0300 -Subject: libhns: Use the verbs logging API instead of printf/fprintf - -Use the generic verbs logging API instead of calling printf/fprintf -directly. -This means that by default the prints will no longer be seen, but can be -enabled by setting VERBS_LOG_LEVEL appropriately. - -Signed-off-by: Gal Pressman ---- - providers/hns/hns_roce_u_hw_v1.c | 34 +++++++++++++++++++++----------- - providers/hns/hns_roce_u_hw_v2.c | 4 ++-- - providers/hns/hns_roce_u_verbs.c | 6 ++++-- - 3 files changed, 28 insertions(+), 16 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 279c9b0..6e107af 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -108,7 +108,6 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx, - static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe, - struct ibv_wc *wc) - { -- fprintf(stderr, PFX "error cqe!\n"); - switch (roce_get_field(cqe->cqe_byte_4, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) & -@@ -176,7 +175,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq) - static void *get_recv_wqe(struct hns_roce_qp *qp, int n) - { - if ((n < 0) || (n > qp->rq.wqe_cnt)) { -- printf("rq wqe index:%d,rq wqe cnt:%d\r\n", n, qp->rq.wqe_cnt); -+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), -+ "rq wqe index:%d,rq wqe cnt:%d\r\n", n, -+ qp->rq.wqe_cnt); - return NULL; - } - -@@ -186,7 +187,9 @@ static void *get_recv_wqe(struct hns_roce_qp *qp, int n) - static void *get_send_wqe(struct hns_roce_qp *qp, int n) - { - if ((n < 0) || (n > qp->sq.wqe_cnt)) { -- printf("sq wqe index:%d,sq wqe cnt:%d\r\n", n, qp->sq.wqe_cnt); -+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), -+ "sq wqe index:%d,sq wqe cnt:%d\r\n", n, -+ qp->sq.wqe_cnt); - return NULL; - } - -@@ -207,8 +210,9 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, - cur = wq->head - wq->tail; - pthread_spin_unlock(&cq->lock); - -- printf("wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n", -- wq->head, wq->tail, wq->max_post, nreq); -+ verbs_err(verbs_get_ctx(cq->ibv_cq.context), -+ "wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n", -+ wq->head, wq->tail, wq->max_post, nreq); - - return cur + nreq >= wq->max_post; - } -@@ -221,7 +225,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, - if (ctx->qp_table[tind].refcnt) { - return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; - } else { -- printf("hns_roce_find_qp fail!\n"); -+ verbs_err(&ctx->ibv_ctx, "hns_roce_find_qp fail!\n"); - return NULL; - } - } -@@ -273,7 +277,8 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, - *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context), - qpn & 0xffffff); - if (!*cur_qp) { -- fprintf(stderr, PFX "can't find qp!\n"); -+ verbs_err(verbs_get_ctx(cq->ibv_cq.context), -+ PFX "can't find qp!\n"); - return CQ_POLL_ERR; - } - } -@@ -312,6 +317,8 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, - if (roce_get_field(cqe->cqe_byte_4, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) { -+ verbs_err(verbs_get_ctx(cq->ibv_cq.context), -+ PFX "error cqe!\n"); - hns_roce_handle_error_cqe(cqe, wc); - return CQ_OK; - } -@@ -475,8 +482,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, - if (wr->num_sge > qp->sq.max_gs) { - ret = -1; - *bad_wr = wr; -- printf("wr->num_sge(<=%d) = %d, check failed!\r\n", -- qp->sq.max_gs, wr->num_sge); -+ verbs_err(verbs_get_ctx(ibvqp->context), -+ "wr->num_sge(<=%d) = %d, check failed!\r\n", -+ qp->sq.max_gs, wr->num_sge); - goto out; - } - -@@ -544,8 +552,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, - if (le32toh(ctrl->msg_length) > qp->max_inline_data) { - ret = -1; - *bad_wr = wr; -- printf("inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n", -- wr->send_flags, ctrl->msg_length); -+ verbs_err(verbs_get_ctx(ibvqp->context), -+ "inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n", -+ wr->send_flags, ctrl->msg_length); - return ret; - } - -@@ -650,7 +659,8 @@ static int hns_roce_u_v1_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, - - if (!ret && (attr_mask & IBV_QP_PORT)) { - hr_qp->port_num = attr->port_num; -- printf("hr_qp->port_num= 0x%x\n", hr_qp->port_num); -+ verbs_err(verbs_get_ctx(qp->context), "hr_qp->port_num= 0x%x\n", -+ hr_qp->port_num); - } - - hr_qp->sl = attr->ah_attr.sl; -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 4c21720..d4b76b5 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -629,8 +629,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - - ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc, opcode); - if (ret) { -- fprintf(stderr, -- PFX "failed to handle recv inline wqe!\n"); -+ verbs_err(verbs_get_ctx(cq->ibv_cq.context), -+ PFX "failed to handle recv inline wqe!\n"); - return ret; - } - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 2a9e880..8840a9d 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -164,12 +164,14 @@ struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length, - struct ib_uverbs_reg_mr_resp resp; - - if (!addr) { -- fprintf(stderr, "2nd parm addr is NULL!\n"); -+ verbs_err(verbs_get_ctx(pd->context), -+ "2nd parm addr is NULL!\n"); - return NULL; - } - - if (!length) { -- fprintf(stderr, "3st parm length is 0!\n"); -+ verbs_err(verbs_get_ctx(pd->context), -+ "3st parm length is 0!\n"); - return NULL; - } - --- -2.27.0 - diff --git a/0019-libhns-The-function-declaration-should-be-the-same-a.patch b/0019-libhns-The-function-declaration-should-be-the-same-a.patch deleted file mode 100644 index 68c440a87233a7006fc3f24a898868750505fca6..0000000000000000000000000000000000000000 --- a/0019-libhns-The-function-declaration-should-be-the-same-a.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 4780e0a4c8cf2112425d04b939825a30603d87e6 Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Tue, 9 Nov 2021 20:41:03 +0800 -Subject: libhns: The function declaration should be the same as the definition - -The parameter names should be the same when the function is declared and -defined. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 21a5a6b..a5aa469 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -372,9 +372,9 @@ int hns_roce_u_free_pd(struct ibv_pd *pd); - - struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length, - uint64_t hca_va, int access); --int hns_roce_u_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, -+int hns_roce_u_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, - void *addr, size_t length, int access); --int hns_roce_u_dereg_mr(struct verbs_mr *mr); -+int hns_roce_u_dereg_mr(struct verbs_mr *vmr); - - struct ibv_mw *hns_roce_u_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type); - int hns_roce_u_dealloc_mw(struct ibv_mw *mw); --- -2.27.0 - diff --git a/0020-libhns-The-content-of-the-header-file-should-be-prot.patch b/0020-libhns-The-content-of-the-header-file-should-be-prot.patch deleted file mode 100644 index 4b3d72ea1a22e8ce72040a058f9a8aa1cafe24aa..0000000000000000000000000000000000000000 --- a/0020-libhns-The-content-of-the-header-file-should-be-prot.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 46c810472a1a6e3e093c21b6bcd43af0a0eda10b Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Tue, 9 Nov 2021 20:41:02 +0800 -Subject: libhns: The content of the header file should be protected with - #define - -Header files should be protected with #define to prevent repeated -inclusion. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u_db.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h -index 13df9b5..ca056c3 100644 ---- a/providers/hns/hns_roce_u_db.h -+++ b/providers/hns/hns_roce_u_db.h -@@ -29,14 +29,14 @@ - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -+#ifndef _HNS_ROCE_U_DB_H -+#define _HNS_ROCE_U_DB_H - - #include - - #include - #include "hns_roce_u.h" - --#ifndef _HNS_ROCE_U_DB_H --#define _HNS_ROCE_U_DB_H - - #define HNS_ROCE_WORD_NUM 2 - --- -2.27.0 - diff --git a/0021-libhns-Fix-wrong-type-of-variables-and-fields.patch b/0021-libhns-Fix-wrong-type-of-variables-and-fields.patch deleted file mode 100644 index c6bf051dbada36c4567b8024bc8dd8931b2e7d92..0000000000000000000000000000000000000000 --- a/0021-libhns-Fix-wrong-type-of-variables-and-fields.patch +++ /dev/null @@ -1,124 +0,0 @@ -From dc29ea131407fbbe93497059b61e3ef22a675df1 Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Tue, 9 Nov 2021 20:41:01 +0800 -Subject: libhns: Fix wrong type of variables and fields - -Some variables and fields should be in type of unsigned instead of signed. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u.h | 6 +++--- - providers/hns/hns_roce_u_hw_v1.c | 6 +++--- - providers/hns/hns_roce_u_hw_v2.c | 11 +++++------ - 3 files changed, 11 insertions(+), 12 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index a5aa469..92dc26c 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -101,7 +101,7 @@ - #define roce_set_bit(origin, shift, val) \ - roce_set_field((origin), (1ul << (shift)), (shift), (val)) - --#define hr_ilog32(n) ilog32((n) - 1) -+#define hr_ilog32(n) ilog32((unsigned int)(n) - 1) - - enum { - HNS_ROCE_QP_TABLE_BITS = 8, -@@ -205,7 +205,7 @@ struct hns_roce_cq { - - struct hns_roce_idx_que { - struct hns_roce_buf buf; -- int entry_shift; -+ unsigned int entry_shift; - unsigned long *bitmap; - int bitmap_cnt; - unsigned int head; -@@ -252,7 +252,7 @@ struct hns_roce_sge_info { - struct hns_roce_sge_ex { - int offset; - unsigned int sge_cnt; -- int sge_shift; -+ unsigned int sge_shift; - }; - - struct hns_roce_rinl_sge { -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 6e107af..838e004 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -220,7 +220,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, - static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, - uint32_t qpn) - { -- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; - - if (ctx->qp_table[tind].refcnt) { - return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; -@@ -232,7 +232,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, - - static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn) - { -- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; - - if (!--ctx->qp_table[tind].refcnt) - free(ctx->qp_table[tind].table); -@@ -740,7 +740,7 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, - struct ibv_recv_wr **bad_wr) - { - int ret = 0; -- int nreq; -+ unsigned int nreq; - struct ibv_sge *sg; - struct hns_roce_rc_rq_wqe *rq_wqe; - struct hns_roce_qp *qp = to_hr_qp(ibvqp); -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index d4b76b5..d0df51a 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -248,7 +248,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) - return srq->wqe_buf.buf + (n << srq->wqe_shift); - } - --static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) -+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) - { - return idx_que->buf.buf + (n << idx_que->entry_shift); - } -@@ -352,7 +352,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, - static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, - uint32_t qpn) - { -- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; - - if (ctx->qp_table[tind].refcnt) - return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; -@@ -982,9 +982,8 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, - return ret; - } - --static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, -- struct ibv_send_wr *wr, int nreq, -- struct hns_roce_sge_info *sge_info) -+static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, -+ unsigned int nreq, struct hns_roce_sge_info *sge_info) - { - struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah); - struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe; -@@ -1140,7 +1139,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe, - } - - static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, -- int nreq, struct hns_roce_sge_info *sge_info) -+ unsigned int nreq, struct hns_roce_sge_info *sge_info) - { - struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; - struct hns_roce_v2_wqe_data_seg *dseg; --- -2.27.0 - diff --git a/0022-libhns-Fix-wrong-print-format-for-unsigned-type.patch b/0022-libhns-Fix-wrong-print-format-for-unsigned-type.patch deleted file mode 100644 index 2188ab949a4b24c138b32c18d89d27408ab1f8dc..0000000000000000000000000000000000000000 --- a/0022-libhns-Fix-wrong-print-format-for-unsigned-type.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 031ccf570369d820dab067cf29fb17e338cd4b28 Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Tue, 9 Nov 2021 20:41:00 +0800 -Subject: libhns: Fix wrong print format for unsigned type - -Change %d printf fortmat to %u for unsigned int variant. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u_verbs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 8840a9d..923c005 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -73,7 +73,7 @@ int hns_roce_u_query_device(struct ibv_context *context, - sub_minor = raw_fw_ver & 0xffff; - - snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver), -- "%d.%d.%03d", major, minor, sub_minor); -+ "%u.%u.%03u", major, minor, sub_minor); - - return 0; - } --- -2.27.0 - diff --git a/0023-libhns-Remove-redundant-variable-initialization.patch b/0023-libhns-Remove-redundant-variable-initialization.patch deleted file mode 100644 index 8391fad10b6a01c3610d28515e77e3aa0efc9667..0000000000000000000000000000000000000000 --- a/0023-libhns-Remove-redundant-variable-initialization.patch +++ /dev/null @@ -1,33 +0,0 @@ -From e451dbaff5f0dd1715b6411169e970021cd43f4f Mon Sep 17 00:00:00 2001 -From: Yixing Liu -Date: Tue, 9 Nov 2021 20:40:59 +0800 -Subject: libhns: Remove redundant variable initialization - -The variable of owner_bit has been assigned before the reference, so there -is no need to initialize. - -Signed-off-by: Yixing Liu -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u_hw_v2.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index d0df51a..5fb6477 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1399,9 +1399,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, - { - int nfreed = 0; - bool is_recv_cqe; -+ uint8_t owner_bit; - uint16_t wqe_index; - uint32_t prod_index; -- uint8_t owner_bit = 0; - struct hns_roce_v2_cqe *cqe, *dest; - struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); - --- -2.27.0 - diff --git a/0024-libhns-Remove-unused-macros.patch b/0024-libhns-Remove-unused-macros.patch deleted file mode 100644 index 875ccec9eb582c35baeb3a72a2af9746666df1fb..0000000000000000000000000000000000000000 --- a/0024-libhns-Remove-unused-macros.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 21d81f659d801230a1ccf1aadf9b1ecba5a3ccd8 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Tue, 9 Nov 2021 20:40:57 +0800 -Subject: libhns: Remove unused macros - -These macros used to work, but are no longer used, they should be removed. - -Fixes: 516b8d4e4ebe ("providers: Use the new match_device and allocate_device ops") -Fixes: 887b78c80224 ("libhns: Add initial main frame") -Signed-off-by: Lang Cheng -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c -index 3b31ad3..9dc4905 100644 ---- a/providers/hns/hns_roce_u.c -+++ b/providers/hns/hns_roce_u.c -@@ -41,9 +41,6 @@ - - static void hns_roce_free_context(struct ibv_context *ibctx); - --#define HID_LEN 15 --#define DEV_MATCH_LEN 128 -- - #ifndef PCI_VENDOR_ID_HUAWEI - #define PCI_VENDOR_ID_HUAWEI 0x19E5 - #endif --- -2.27.0 - diff --git a/0025-libhns-Refactor-the-poll-one-interface.patch b/0025-libhns-Refactor-the-poll-one-interface.patch deleted file mode 100644 index dbcdf371185e0d8525e8762a6ae9c8912055c061..0000000000000000000000000000000000000000 --- a/0025-libhns-Refactor-the-poll-one-interface.patch +++ /dev/null @@ -1,545 +0,0 @@ -From 0851ae661c4fe4dd285c22c6acce462fc8004b8d Mon Sep 17 00:00:00 2001 -From: Yixian Liu -Date: Thu, 18 Nov 2021 22:46:10 +0800 -Subject: libhns: Refactor the poll one interface - -Mainly about: - -1. Separate the differences between various objects (such as sq, rq, srq) - into functions. -2. Optimize function names, variable names, and comments to increase code - readability. -3. Use map instead of switch branch to simplify the code. - -Signed-off-by: Yixian Liu -Signed-off-by: Yangyang Li -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 373 +++++++++++++++---------------- - providers/hns/hns_roce_u_hw_v2.h | 10 +- - 2 files changed, 189 insertions(+), 194 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 5fb6477..1b4e91b 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -154,59 +154,37 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr, - return 0; - } - --static void hns_roce_v2_handle_error_cqe(struct hns_roce_v2_cqe *cqe, -- struct ibv_wc *wc) --{ -- unsigned int status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, -- CQE_BYTE_4_STATUS_S); -- unsigned int cqe_status = status & HNS_ROCE_V2_CQE_STATUS_MASK; -+static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, -+ uint8_t status) -+{ -+ static const struct { -+ unsigned int cqe_status; -+ enum ibv_wc_status wc_status; -+ } map[] = { -+ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, -+ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, -+ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, -+ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR }, -+ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR }, -+ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR }, -+ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR }, -+ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR }, -+ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, -+ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, -+ }; - -- switch (cqe_status) { -- case HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR: -- wc->status = IBV_WC_LOC_LEN_ERR; -- break; -- case HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR: -- wc->status = IBV_WC_LOC_QP_OP_ERR; -- break; -- case HNS_ROCE_V2_CQE_LOCAL_PROT_ERR: -- wc->status = IBV_WC_LOC_PROT_ERR; -- break; -- case HNS_ROCE_V2_CQE_WR_FLUSH_ERR: -- wc->status = IBV_WC_WR_FLUSH_ERR; -- break; -- case HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR: -- wc->status = IBV_WC_MW_BIND_ERR; -- break; -- case HNS_ROCE_V2_CQE_BAD_RESP_ERR: -- wc->status = IBV_WC_BAD_RESP_ERR; -- break; -- case HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR: -- wc->status = IBV_WC_LOC_ACCESS_ERR; -- break; -- case HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR: -- wc->status = IBV_WC_REM_INV_REQ_ERR; -- break; -- case HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR: -- wc->status = IBV_WC_REM_ACCESS_ERR; -- break; -- case HNS_ROCE_V2_CQE_REMOTE_OP_ERR: -- wc->status = IBV_WC_REM_OP_ERR; -- break; -- case HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR: -- wc->status = IBV_WC_RETRY_EXC_ERR; -- break; -- case HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR: -- wc->status = IBV_WC_RNR_RETRY_EXC_ERR; -- break; -- case HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR: -- wc->status = IBV_WC_REM_ABORT_ERR; -- break; -- case HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR: -- wc->status = IBV_WC_REM_INV_RD_REQ_ERR; -- break; -- default: -- wc->status = IBV_WC_GENERAL_ERR; -- break; -+ int i; -+ -+ wc->status = IBV_WC_GENERAL_ERR; -+ for (i = 0; i < ARRAY_SIZE(map); i++) { -+ if (status == map[i].cqe_status) { -+ wc->status = map[i].wc_status; -+ break; -+ } - } - } - -@@ -268,6 +246,27 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind) - pthread_spin_unlock(&srq->lock); - } - -+static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe, -+ struct hns_roce_context *ctx, -+ struct hns_roce_qp *hr_qp, -+ struct hns_roce_srq **srq) -+{ -+ uint32_t srqn; -+ -+ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) { -+ srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M, -+ CQE_BYTE_12_XRC_SRQN_S); -+ -+ *srq = hns_roce_find_srq(ctx, srqn); -+ if (!*srq) -+ return -EINVAL; -+ } else if (hr_qp->verbs_qp.qp.srq) { -+ *srq = to_hr_srq(hr_qp->verbs_qp.qp.srq); -+ } -+ -+ return 0; -+} -+ - static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq, - struct hns_roce_cq *cq) - { -@@ -332,7 +331,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) - hns_roce_write512(qp->sq.db_reg, wqe); - } - --static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, -+static void update_cq_db(struct hns_roce_context *ctx, - struct hns_roce_cq *cq) - { - struct hns_roce_db cq_db = {}; -@@ -378,19 +377,17 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp) - static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, - int attr_mask); - --static int hns_roce_flush_cqe(struct hns_roce_qp **cur_qp, struct ibv_wc *wc) -+static int hns_roce_flush_cqe(struct hns_roce_qp *hr_qp, uint8_t status) - { - struct ibv_qp_attr attr; - int attr_mask; - -- if ((wc->status != IBV_WC_SUCCESS) && -- (wc->status != IBV_WC_WR_FLUSH_ERR)) { -+ if (status != HNS_ROCE_V2_CQE_WR_FLUSH_ERR) { - attr_mask = IBV_QP_STATE; - attr.qp_state = IBV_QPS_ERR; -- hns_roce_u_v2_modify_qp(&(*cur_qp)->verbs_qp.qp, &attr, -- attr_mask); -+ hns_roce_u_v2_modify_qp(&hr_qp->verbs_qp.qp, &attr, attr_mask); - -- (*cur_qp)->verbs_qp.qp.state = IBV_QPS_ERR; -+ hr_qp->verbs_qp.qp.state = IBV_QPS_ERR; - } - - return V2_CQ_OK; -@@ -409,41 +406,6 @@ static const unsigned int wc_send_op_map[] = { - [HNS_ROCE_SQ_OP_BIND_MW] = IBV_WC_BIND_MW, - }; - --static void hns_roce_v2_get_opcode_from_sender(struct hns_roce_v2_cqe *cqe, -- struct ibv_wc *wc) --{ -- uint32_t opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, -- CQE_BYTE_4_OPCODE_S); -- -- switch (opcode) { -- case HNS_ROCE_SQ_OP_SEND: -- case HNS_ROCE_SQ_OP_SEND_WITH_INV: -- case HNS_ROCE_SQ_OP_RDMA_WRITE: -- case HNS_ROCE_SQ_OP_BIND_MW: -- wc->wc_flags = 0; -- break; -- case HNS_ROCE_SQ_OP_SEND_WITH_IMM: -- case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM: -- wc->wc_flags = IBV_WC_WITH_IMM; -- break; -- case HNS_ROCE_SQ_OP_LOCAL_INV: -- wc->wc_flags = IBV_WC_WITH_INV; -- break; -- case HNS_ROCE_SQ_OP_RDMA_READ: -- case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP: -- case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD: -- wc->wc_flags = 0; -- wc->byte_len = le32toh(cqe->byte_cnt); -- break; -- default: -- wc->status = IBV_WC_GENERAL_ERR; -- wc->wc_flags = 0; -- return; -- } -- -- wc->opcode = wc_send_op_map[opcode]; --} -- - static const unsigned int wc_rcv_op_map[] = { - [HNS_ROCE_RECV_OP_RDMA_WRITE_IMM] = IBV_WC_RECV_RDMA_WITH_IMM, - [HNS_ROCE_RECV_OP_SEND] = IBV_WC_RECV, -@@ -451,9 +413,8 @@ static const unsigned int wc_rcv_op_map[] = { - [HNS_ROCE_RECV_OP_SEND_WITH_INV] = IBV_WC_RECV, - }; - --static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe, -- struct ibv_wc *wc, -- uint32_t opcode) -+static void get_opcode_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, -+ uint32_t opcode) - { - switch (opcode) { - case HNS_ROCE_RECV_OP_SEND: -@@ -476,9 +437,8 @@ static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe, - wc->opcode = wc_rcv_op_map[opcode]; - } - --static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, -- struct hns_roce_qp **cur_qp, -- struct ibv_wc *wc, uint32_t opcode) -+static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, -+ struct hns_roce_qp **cur_qp, uint32_t opcode) - { - if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) && - (opcode == HNS_ROCE_RECV_OP_SEND || -@@ -521,26 +481,117 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, - return V2_CQ_OK; - } - -+static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc) -+{ -+ wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M, -+ CQE_BYTE_32_SL_S); -+ wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M, -+ CQE_BYTE_32_RMT_QPN_S); -+ wc->slid = 0; -+ wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ? -+ IBV_WC_GRH : 0; -+ wc->pkey_index = 0; -+} -+ -+static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, -+ struct hns_roce_srq *srq) -+{ -+ uint32_t wqe_idx; -+ -+ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, -+ CQE_BYTE_4_WQE_IDX_S); -+ wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; -+ hns_roce_free_srq_wqe(srq, wqe_idx); -+} -+ -+static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, -+ struct hns_roce_qp *hr_qp, uint8_t opcode) -+{ -+ struct hns_roce_wq *wq; -+ int ret; -+ -+ wq = &hr_qp->rq; -+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -+ ++wq->tail; -+ -+ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD) -+ parse_for_ud_qp(cqe, wc); -+ -+ ret = handle_recv_inl_wqe(cqe, wc, &hr_qp, opcode); -+ if (ret) { -+ verbs_err(verbs_get_ctx(hr_qp->verbs_qp.qp.context), -+ PFX "failed to handle recv inline wqe!\n"); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, -+ struct hns_roce_qp *hr_qp, uint8_t opcode) -+{ -+ struct hns_roce_wq *wq; -+ uint32_t wqe_idx; -+ -+ wq = &hr_qp->sq; -+ /* -+ * in case of signalling, the tail pointer needs to be updated -+ * according to the wqe idx in the current cqe first -+ */ -+ if (hr_qp->sq_signal_bits) { -+ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, -+ CQE_BYTE_4_WQE_IDX_S); -+ /* get the processed wqes num since last signalling */ -+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); -+ } -+ /* write the wr_id of wq into the wc */ -+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -+ ++wq->tail; -+ -+ switch (opcode) { -+ case HNS_ROCE_SQ_OP_SEND: -+ case HNS_ROCE_SQ_OP_SEND_WITH_INV: -+ case HNS_ROCE_SQ_OP_RDMA_WRITE: -+ case HNS_ROCE_SQ_OP_BIND_MW: -+ wc->wc_flags = 0; -+ break; -+ case HNS_ROCE_SQ_OP_SEND_WITH_IMM: -+ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM: -+ wc->wc_flags = IBV_WC_WITH_IMM; -+ break; -+ case HNS_ROCE_SQ_OP_LOCAL_INV: -+ wc->wc_flags = IBV_WC_WITH_INV; -+ break; -+ case HNS_ROCE_SQ_OP_RDMA_READ: -+ case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP: -+ case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD: -+ wc->wc_flags = 0; -+ wc->byte_len = le32toh(cqe->byte_cnt); -+ break; -+ default: -+ wc->status = IBV_WC_GENERAL_ERR; -+ wc->wc_flags = 0; -+ return; -+ } -+ -+ wc->opcode = wc_send_op_map[opcode]; -+} -+ - static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - struct hns_roce_qp **cur_qp, struct ibv_wc *wc) - { - struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); - struct hns_roce_srq *srq = NULL; -- struct hns_roce_wq *wq = NULL; - struct hns_roce_v2_cqe *cqe; -- uint16_t wqe_ctr; -- uint32_t opcode; -- uint32_t srqn; -+ uint8_t opcode; -+ uint8_t status; - uint32_t qpn; -- int is_send; -- int ret; -+ bool is_send; - -- /* According to CI, find the relative cqe */ - cqe = next_cqe_sw_v2(cq); - if (!cqe) - return V2_CQ_EMPTY; - -- /* Get the next cqe, CI will be added gradually */ - ++cq->cons_index; - - udma_from_device_barrier(); -@@ -548,102 +599,48 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M, - CQE_BYTE_16_LCL_QPN_S); - -- is_send = (roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == -- HNS_ROCE_V2_CQE_IS_SQ); -- -- /* if qp is zero, it will not get the correct qpn */ -+ /* if cur qp is null, then could not get the correct qpn */ - if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) { - *cur_qp = hns_roce_v2_find_qp(ctx, qpn); - if (!*cur_qp) - return V2_CQ_POLL_ERR; - } -- wc->qp_num = qpn; - -- if ((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) { -- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M, -- CQE_BYTE_12_XRC_SRQN_S); -+ status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, -+ CQE_BYTE_4_STATUS_S); -+ opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, -+ CQE_BYTE_4_OPCODE_S); -+ is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ; -+ if (is_send) { -+ parse_cqe_for_req(cqe, wc, *cur_qp, opcode); -+ } else { -+ wc->byte_len = le32toh(cqe->byte_cnt); -+ get_opcode_for_resp(cqe, wc, opcode); - -- srq = hns_roce_find_srq(ctx, srqn); -- if (!srq) -+ if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq)) - return V2_CQ_POLL_ERR; -- } else if ((*cur_qp)->verbs_qp.qp.srq) { -- srq = to_hr_srq((*cur_qp)->verbs_qp.qp.srq); -- } - -- if (is_send) { -- wq = &(*cur_qp)->sq; -- /* -- * if sq_signal_bits is 1, the tail pointer first update to -- * the wqe corresponding the current cqe -- */ -- if ((*cur_qp)->sq_signal_bits) { -- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4, -- CQE_BYTE_4_WQE_IDX_M, -- CQE_BYTE_4_WQE_IDX_S)); -- /* -- * wq->tail will plus a positive number every time, -- * when wq->tail exceeds 32b, it is 0 and acc -- */ -- wq->tail += (wqe_ctr - (uint16_t) wq->tail) & -- (wq->wqe_cnt - 1); -+ if (srq) { -+ parse_cqe_for_srq(cqe, wc, srq); -+ } else { -+ if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode)) -+ return V2_CQ_POLL_ERR; - } -- /* write the wr_id of wq into the wc */ -- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -- ++wq->tail; -- } else if (srq) { -- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4, -- CQE_BYTE_4_WQE_IDX_M, -- CQE_BYTE_4_WQE_IDX_S)); -- wc->wr_id = srq->wrid[wqe_ctr & (srq->wqe_cnt - 1)]; -- hns_roce_free_srq_wqe(srq, wqe_ctr); -- } else { -- wq = &(*cur_qp)->rq; -- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -- ++wq->tail; - } - -+ wc->qp_num = qpn; -+ - /* -- * HW maintains wc status, set the err type and directly return, after -- * generated the incorrect CQE -+ * once a cqe in error status, the driver needs to help the HW to -+ * generated flushed cqes for all subsequent wqes - */ -- if (roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, -- CQE_BYTE_4_STATUS_S) != HNS_ROCE_V2_CQE_SUCCESS) { -- hns_roce_v2_handle_error_cqe(cqe, wc); -- return hns_roce_flush_cqe(cur_qp, wc); -+ if (status != HNS_ROCE_V2_CQE_SUCCESS) { -+ handle_error_cqe(cqe, wc, status); -+ return hns_roce_flush_cqe(*cur_qp, status); - } - - wc->status = IBV_WC_SUCCESS; - -- /* -- * According to the opcode type of cqe, mark the opcode and other -- * information of wc -- */ -- if (is_send) { -- hns_roce_v2_get_opcode_from_sender(cqe, wc); -- } else { -- /* Get opcode and flag in rq&srq */ -- wc->byte_len = le32toh(cqe->byte_cnt); -- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, -- CQE_BYTE_4_OPCODE_S) & HNS_ROCE_V2_CQE_OPCODE_MASK; -- hns_roce_v2_get_opcode_from_receiver(cqe, wc, opcode); -- -- ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc, opcode); -- if (ret) { -- verbs_err(verbs_get_ctx(cq->ibv_cq.context), -- PFX "failed to handle recv inline wqe!\n"); -- return ret; -- } -- -- wc->sl = (uint8_t)roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M, -- CQE_BYTE_32_SL_S); -- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M, -- CQE_BYTE_32_RMT_QPN_S); -- wc->slid = 0; -- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ? -- IBV_WC_GRH : 0; -- wc->pkey_index = 0; -- } -- - return V2_CQ_OK; - } - -@@ -668,7 +665,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, - if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) - *cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M; - else -- hns_roce_v2_update_cq_cons_index(ctx, cq); -+ update_cq_db(ctx, cq); - } - - pthread_spin_unlock(&cq->lock); -@@ -1438,7 +1435,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, - if (nfreed) { - cq->cons_index += nfreed; - udma_to_device_barrier(); -- hns_roce_v2_update_cq_cons_index(ctx, cq); -+ update_cq_db(ctx, cq); - } - } - -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index af72cd7..51a1df4 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -33,7 +33,10 @@ - #ifndef _HNS_ROCE_U_HW_V2_H - #define _HNS_ROCE_U_HW_V2_H - --#define HNS_ROCE_V2_CQE_IS_SQ 0 -+enum { -+ CQE_FOR_SQ, -+ CQE_FOR_RQ, -+}; - - #define HNS_ROCE_V2_CQ_DB_REQ_SOL 1 - #define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0 -@@ -94,11 +97,6 @@ enum { - V2_CQ_POLL_ERR = -2, - }; - --enum { -- HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, -- HNS_ROCE_V2_CQE_OPCODE_MASK = 0x1f, --}; -- - enum { - HNS_ROCE_V2_CQE_SUCCESS = 0x00, - HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR = 0x01, --- -2.27.0 - diff --git a/0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch b/0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch deleted file mode 100644 index efe74c85e5d06e51d25b3d7bbf99acffb2446c39..0000000000000000000000000000000000000000 --- a/0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 72f495e542c1c458e71fd6971f412edec41830e1 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Wed, 24 Nov 2021 19:03:54 +0800 -Subject: libhns: hr ilog32() should be represented by a function instead of a - macro - -The compiler will check whether the modifiers of the function are of the -correct type. - -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 92dc26c..c1ae1c9 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -101,8 +101,6 @@ - #define roce_set_bit(origin, shift, val) \ - roce_set_field((origin), (1ul << (shift)), (shift), (val)) - --#define hr_ilog32(n) ilog32((unsigned int)(n) - 1) -- - enum { - HNS_ROCE_QP_TABLE_BITS = 8, - HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS, -@@ -326,6 +324,11 @@ static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift) - return hr_hw_page_align(count << buf_shift); - } - -+static inline unsigned int hr_ilog32(unsigned int count) -+{ -+ return ilog32(count - 1); -+} -+ - static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev) - { - return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device); --- -2.27.0 - diff --git a/0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch b/0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch deleted file mode 100644 index f1faf9e3f5f0b5f3606702c7f32f3d830fb877cd..0000000000000000000000000000000000000000 --- a/0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 61911051eec0f984537c2762208b8ecbc875d5d3 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Fri, 26 Nov 2021 16:53:18 +0800 -Subject: libhns: Fix the size setting error when copying CQE in clean cq() - -The size of CQE is different for different versions of hardware, so the -driver needs to specify the size of CQE explicitly. - -Fixes: 3546e6b69ac8 ("libhns: Add support for CQE in size of 64 Bytes") -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 1b4e91b..b13b6dc 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1426,7 +1426,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, - (prod_index + nfreed) & cq->ibv_cq.cqe); - owner_bit = roce_get_bit(dest->byte_4, - CQE_BYTE_4_OWNER_S); -- memcpy(dest, cqe, sizeof(*cqe)); -+ memcpy(dest, cqe, cq->cqe_size); - roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S, - owner_bit); - } --- -2.27.0 - diff --git a/0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch b/0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch deleted file mode 100644 index d6ff8c08b5b5cc6369fa4124192d0bf4f0be1053..0000000000000000000000000000000000000000 --- a/0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 8fbb85bae3fd2632da80e77d02bbbe73aac85f88 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Fri, 26 Nov 2021 17:55:32 +0800 -Subject: libhns: Fix the problem that XRC does not need to create RQ - -XRC QP does not require RQ, so RQ should not be created. - -Fixes: 4ed874a5cf30 ("libhns: Add support for XRC for HIP09") -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_verbs.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 923c005..557d075 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -774,12 +774,22 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, - return 0; - } - -+static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr) -+{ -+ if (attr->qp_type == IBV_QPT_XRC_SEND || -+ attr->qp_type == IBV_QPT_XRC_RECV || attr->srq) -+ return 0; -+ -+ return 1; -+} -+ - static int verify_qp_create_cap(struct hns_roce_context *ctx, - struct ibv_qp_init_attr_ex *attr) - { - struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); - struct ibv_qp_cap *cap = &attr->cap; - uint32_t min_wqe_num; -+ int has_rq; - - if (!cap->max_send_wr && attr->qp_type != IBV_QPT_XRC_RECV) - return -EINVAL; -@@ -790,7 +800,8 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, - cap->max_recv_sge > ctx->max_sge) - return -EINVAL; - -- if (attr->srq) { -+ has_rq = hns_roce_qp_has_rq(attr); -+ if (!has_rq) { - cap->max_recv_wr = 0; - cap->max_recv_sge = 0; - } --- -2.27.0 - diff --git a/0029-libhns-Add-vendor_err-information-for-error-WC.patch b/0029-libhns-Add-vendor_err-information-for-error-WC.patch deleted file mode 100644 index 559315cab33a39aa654d8115ef73ba1f0cbe7685..0000000000000000000000000000000000000000 --- a/0029-libhns-Add-vendor_err-information-for-error-WC.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 29fd05367349c7909949206a13092031b689eca7 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Tue, 30 Nov 2021 20:46:14 +0800 -Subject: libhns: Add vendor_err information for error WC - -ULP can get more error information of CQ though verbs. - -Signed-off-by: Lang Cheng -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 3 +++ - providers/hns/hns_roce_u_hw_v2.h | 3 +++ - 2 files changed, 6 insertions(+) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index b13b6dc..18399e9 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -186,6 +186,9 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - break; - } - } -+ -+ wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M, -+ CQE_BYTE_16_SUB_STATUS_S); - } - - static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index 51a1df4..014cb8c 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -184,6 +184,9 @@ struct hns_roce_v2_cqe { - #define CQE_BYTE_16_LCL_QPN_S 0 - #define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S) - -+#define CQE_BYTE_16_SUB_STATUS_S 24 -+#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S) -+ - #define CQE_BYTE_28_SMAC_S 0 - #define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S) - --- -2.27.0 - diff --git a/0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch b/0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch deleted file mode 100644 index 1d045ab0b8a75973dd35db046699edaaba5d8fe0..0000000000000000000000000000000000000000 --- a/0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 46548879b84e8c502198a549d82ec079ebc8b9a0 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Thu, 2 Dec 2021 21:44:26 +0800 -Subject: libhns: Forcibly rewrite the inline flag of WQE - -When a non-inline WR reuses a WQE that was used for inline the last time, -the remaining inline flag should be cleared. - -Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE") -Fixes: 82fc508a6625 ("libhns: Add support for UD inline") -Signed-off-by: Lang Cheng -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 16 +++++++--------- - 1 file changed, 7 insertions(+), 9 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 18399e9..4eaa929 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -876,8 +876,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - if (!check_inl_data_len(qp, sge_info->total_len)) - return -EINVAL; - -- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, 1); -- - if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) { - roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx, - UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0); -@@ -993,6 +991,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - !!(wr->send_flags & IBV_SEND_SIGNALED)); - roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S, - !!(wr->send_flags & IBV_SEND_SOLICITED)); -+ roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, -+ !!(wr->send_flags & IBV_SEND_INLINE)); - - ret = check_ud_opcode(ud_sq_wqe, wr); - if (ret) -@@ -1044,8 +1044,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - - dseg += sizeof(struct hns_roce_rc_sq_wqe); - -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, 1); -- - if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) { - roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S, - 0); -@@ -1150,13 +1148,13 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - return ret; - - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, -- (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0); -- -+ !!(wr->send_flags & IBV_SEND_SIGNALED)); - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, -- (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0); -- -+ !!(wr->send_flags & IBV_SEND_FENCE)); - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S, -- (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0); -+ !!(wr->send_flags & IBV_SEND_SOLICITED)); -+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, -+ !!(wr->send_flags & IBV_SEND_INLINE)); - - roce_set_field(rc_sq_wqe->byte_20, - RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, --- -2.27.0 - diff --git a/0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch b/0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch deleted file mode 100644 index a7d0d74c7e91f3c64b9c091592802c366fa42f1a..0000000000000000000000000000000000000000 --- a/0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 2194680136400d6a5f12298ff4993fa6f51c2e10 Mon Sep 17 00:00:00 2001 -From: Lang Cheng -Date: Wed, 8 Dec 2021 19:03:56 +0800 -Subject: libhns: Forcibly rewrite the strong-order flag of WQE - -The Local Invalid operation sets so flag, otherwise clears so flag. - -Fixes: a9ae7e9bfb5d ("libhns: Add local invalidate MR support for hip08") -Signed-off-by: Lang Cheng -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 4eaa929..cf871ab 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1143,10 +1143,6 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - struct hns_roce_v2_wqe_data_seg *dseg; - int ret; - -- ret = check_rc_opcode(rc_sq_wqe, wr); -- if (ret) -- return ret; -- - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, - !!(wr->send_flags & IBV_SEND_SIGNALED)); - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, -@@ -1155,6 +1151,11 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - !!(wr->send_flags & IBV_SEND_SOLICITED)); - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, - !!(wr->send_flags & IBV_SEND_INLINE)); -+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0); -+ -+ ret = check_rc_opcode(rc_sq_wqe, wr); -+ if (ret) -+ return ret; - - roce_set_field(rc_sq_wqe->byte_20, - RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, --- -2.27.0 - diff --git a/0032-util-Fix-mmio-memcpy-on-ARM.patch b/0032-util-Fix-mmio-memcpy-on-ARM.patch deleted file mode 100644 index 8f49e0d506f44224adcc91cfe71b942ee5068e11..0000000000000000000000000000000000000000 --- a/0032-util-Fix-mmio-memcpy-on-ARM.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 2a2e3ece2ff801e8d8e4915a56fe3fff8399d6a0 Mon Sep 17 00:00:00 2001 -From: Firas Jahjah -Date: Tue, 28 Dec 2021 15:58:37 +0200 -Subject: util: Fix mmio memcpy on ARM - -The below commit added a new implementation of mmio_memcpy_x64() for -ARM which was broken. The destination buffer must be advanced so we -don't copy to the same 64 bytes. - -Fixes: 159933c37 ("libhns: Add support for direct wqe") -Reviewed-by: Daniel Kranzdorf -Reviewed-by: Yossi Leybovich -Signed-off-by: Firas Jahjah ---- - util/mmio.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/util/mmio.h b/util/mmio.h -index 01d1455..5974058 100644 ---- a/util/mmio.h -+++ b/util/mmio.h -@@ -225,6 +225,7 @@ static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) - _mmio_memcpy_x64_64b(dest, src); - bytecnt -= sizeof(uint64x2x4_t); - src += sizeof(uint64x2x4_t); -+ dest += sizeof(uint64x2x4_t); - } while (bytecnt > 0); - } - --- -2.27.0 - diff --git a/0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch b/0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch deleted file mode 100644 index d501e6c0ca741c5b74622723e1caa03c7e4346e3..0000000000000000000000000000000000000000 --- a/0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch +++ /dev/null @@ -1,550 +0,0 @@ -From 532c4b6babe97e3023a049f1c6bd8a8e3ad95140 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Sat, 25 Dec 2021 17:42:55 +0800 -Subject: libhns: Use new interfaces hr reg ***() to operate the WQE field - -Use hr_reg_xxx() to simply the codes for filling fields. - -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 170 ++++++++++------------------ - providers/hns/hns_roce_u_hw_v2.h | 184 ++++++++++++++----------------- - 2 files changed, 144 insertions(+), 210 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index cf871ab..0cff12b 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -323,13 +323,10 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) - struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; - - /* All kinds of DirectWQE have the same header field layout */ -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1); -- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M, -- RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl); -- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M, -- RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT); -- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M, -- RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); -+ hr_reg_enable(rc_sq_wqe, RCWQE_FLAG); -+ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_L, qp->sl); -+ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT); -+ hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head); - - hns_roce_write512(qp->sq.db_reg, wqe); - } -@@ -834,29 +831,15 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, - tmp += wr->sg_list[i].length; - } - -- roce_set_field(ud_sq_wqe->msg_len, -- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M, -- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S, -- *loc & 0xffff); -- -- roce_set_field(ud_sq_wqe->sge_num_pd, -- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M, -- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S, -- (*loc >> 16) & 0xff); -+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_15_0, *loc & 0xffff); -+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_23_16, (*loc >> 16) & 0xff); - - tmp_data = *loc >> 24; - loc++; - tmp_data |= ((*loc & 0xffff) << 8); - -- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx, -- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M, -- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S, -- tmp_data); -- -- roce_set_field(ud_sq_wqe->udpspn_rsv, -- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M, -- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S, -- *loc >> 16); -+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_47_24, tmp_data); -+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_63_48, *loc >> 16); - } - - static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) -@@ -877,13 +860,11 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - return -EINVAL; - - if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) { -- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx, -- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0); -+ hr_reg_clear(ud_sq_wqe, UDWQE_INLINE_TYPE); - - fill_ud_inn_inl_data(wr, ud_sq_wqe); - } else { -- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx, -- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 1); -+ hr_reg_enable(ud_sq_wqe, UDWQE_INLINE_TYPE); - - ret = fill_ext_sge_inl_data(qp, wr, sge_info); - if (ret) -@@ -891,8 +872,7 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - - sge_info->valid_num = sge_info->start_idx - sge_idx; - -- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M, -- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num); -+ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num); - } - - return 0; -@@ -919,8 +899,7 @@ static int check_ud_opcode(struct hns_roce_ud_sq_wqe *ud_sq_wqe, - - ud_sq_wqe->immtdata = get_immtdata(ib_op, wr); - -- roce_set_field(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OPCODE_M, -- UD_SQ_WQE_OPCODE_S, to_hr_opcode(ib_op)); -+ hr_reg_write(ud_sq_wqe, UDWQE_OPCODE, to_hr_opcode(ib_op)); - - return 0; - } -@@ -931,24 +910,12 @@ static int fill_ud_av(struct hns_roce_ud_sq_wqe *ud_sq_wqe, - if (unlikely(ah->av.sl > MAX_SERVICE_LEVEL)) - return EINVAL; - -- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_SL_M, -- UD_SQ_WQE_SL_S, ah->av.sl); -- -- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_PD_M, -- UD_SQ_WQE_PD_S, to_hr_pd(ah->ibv_ah.pd)->pdn); -- -- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_TCLASS_M, -- UD_SQ_WQE_TCLASS_S, ah->av.tclass); -- -- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_HOPLIMIT_M, -- UD_SQ_WQE_HOPLIMIT_S, ah->av.hop_limit); -- -- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_FLOW_LABEL_M, -- UD_SQ_WQE_FLOW_LABEL_S, ah->av.flowlabel); -- -- roce_set_field(ud_sq_wqe->udpspn_rsv, UD_SQ_WQE_UDP_SPN_M, -- UD_SQ_WQE_UDP_SPN_S, ah->av.udp_sport); -- -+ hr_reg_write(ud_sq_wqe, UDWQE_SL, ah->av.sl); -+ hr_reg_write(ud_sq_wqe, UDWQE_PD, to_hr_pd(ah->ibv_ah.pd)->pdn); -+ hr_reg_write(ud_sq_wqe, UDWQE_TCLASS, ah->av.tclass); -+ hr_reg_write(ud_sq_wqe, UDWQE_HOPLIMIT, ah->av.hop_limit); -+ hr_reg_write(ud_sq_wqe, UDWQE_FLOW_LABEL, ah->av.flowlabel); -+ hr_reg_write(ud_sq_wqe, UDWQE_UDPSPN, ah->av.udp_sport); - memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); - ud_sq_wqe->sgid_index = ah->av.gid_index; - memcpy(ud_sq_wqe->dgid, ah->av.dgid, HNS_ROCE_GID_SIZE); -@@ -962,17 +929,14 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, - { - int ret = 0; - -- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx, -- UD_SQ_WQE_MSG_START_SGE_IDX_M, -- UD_SQ_WQE_MSG_START_SGE_IDX_S, -- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); -+ hr_reg_write(ud_sq_wqe, UDWQE_MSG_START_SGE_IDX, -+ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); - - set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); - - ud_sq_wqe->msg_len = htole32(sge_info->total_len); - -- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M, -- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num); -+ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num); - - if (wr->send_flags & IBV_SEND_INLINE) - ret = set_ud_inl(qp, wr, ud_sq_wqe, sge_info); -@@ -987,12 +951,12 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe; - int ret = 0; - -- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_CQE_S, -- !!(wr->send_flags & IBV_SEND_SIGNALED)); -- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S, -- !!(wr->send_flags & IBV_SEND_SOLICITED)); -- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, -- !!(wr->send_flags & IBV_SEND_INLINE)); -+ hr_reg_write_bool(ud_sq_wqe, UDWQE_CQE, -+ !!(wr->send_flags & IBV_SEND_SIGNALED)); -+ hr_reg_write_bool(ud_sq_wqe, UDWQE_SE, -+ !!(wr->send_flags & IBV_SEND_SOLICITED)); -+ hr_reg_write_bool(ud_sq_wqe, UDWQE_INLINE, -+ !!(wr->send_flags & IBV_SEND_INLINE)); - - ret = check_ud_opcode(ud_sq_wqe, wr); - if (ret) -@@ -1001,8 +965,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - ud_sq_wqe->qkey = htole32(wr->wr.ud.remote_qkey & 0x80000000 ? - qp->qkey : wr->wr.ud.remote_qkey); - -- roce_set_field(ud_sq_wqe->rsv_dqpn, UD_SQ_WQE_DQPN_M, -- UD_SQ_WQE_DQPN_S, wr->wr.ud.remote_qpn); -+ hr_reg_write(ud_sq_wqe, UDWQE_DQPN, wr->wr.ud.remote_qpn); - - ret = fill_ud_av(ud_sq_wqe, ah); - if (ret) -@@ -1021,8 +984,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) - udma_to_device_barrier(); - -- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OWNER_S, -- ~((qp->sq.head + nreq) >> qp->sq.shift)); -+ hr_reg_write_bool(wqe, RCWQE_OWNER, -+ !((qp->sq.head + nreq) & BIT(qp->sq.shift))); - - return ret; - } -@@ -1045,8 +1008,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - dseg += sizeof(struct hns_roce_rc_sq_wqe); - - if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) { -- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S, -- 0); -+ hr_reg_clear(rc_sq_wqe, RCWQE_INLINE_TYPE); - - for (i = 0; i < wr->num_sge; i++) { - memcpy(dseg, (void *)(uintptr_t)(wr->sg_list[i].addr), -@@ -1054,8 +1016,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - dseg += wr->sg_list[i].length; - } - } else { -- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S, -- 1); -+ hr_reg_enable(rc_sq_wqe, RCWQE_INLINE_TYPE); - - ret = fill_ext_sge_inl_data(qp, wr, sge_info); - if (ret) -@@ -1063,9 +1024,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - - sge_info->valid_num = sge_info->start_idx - sge_idx; - -- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M, -- RC_SQ_WQE_BYTE_16_SGE_NUM_S, -- sge_info->valid_num); -+ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num); - } - - return 0; -@@ -1074,17 +1033,16 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe, - const struct ibv_send_wr *wr) - { -- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_MW_TYPE_S, -- wr->bind_mw.mw->type - 1); -- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_ATOMIC_S, -- (wr->bind_mw.bind_info.mw_access_flags & -- IBV_ACCESS_REMOTE_ATOMIC) ? 1 : 0); -- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_READ_S, -- (wr->bind_mw.bind_info.mw_access_flags & -- IBV_ACCESS_REMOTE_READ) ? 1 : 0); -- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_WRITE_S, -- (wr->bind_mw.bind_info.mw_access_flags & -- IBV_ACCESS_REMOTE_WRITE) ? 1 : 0); -+ unsigned int access = wr->bind_mw.bind_info.mw_access_flags; -+ -+ hr_reg_write_bool(wqe, RCWQE_MW_TYPE, wr->bind_mw.mw->type - 1); -+ hr_reg_write_bool(wqe, RCWQE_MW_RA_EN, -+ !!(access & IBV_ACCESS_REMOTE_ATOMIC)); -+ hr_reg_write_bool(wqe, RCWQE_MW_RR_EN, -+ !!(access & IBV_ACCESS_REMOTE_READ)); -+ hr_reg_write_bool(wqe, RCWQE_MW_RW_EN, -+ !!(access & IBV_ACCESS_REMOTE_WRITE)); -+ - wqe->new_rkey = htole32(wr->bind_mw.rkey); - wqe->byte_16 = htole32(wr->bind_mw.bind_info.length & - HNS_ROCE_ADDRESS_MASK); -@@ -1117,7 +1075,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe, - wqe->va = htole64(wr->wr.atomic.remote_addr); - break; - case IBV_WR_LOCAL_INV: -- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 1); -+ hr_reg_enable(wqe, RCWQE_SO); - /* fallthrough */ - case IBV_WR_SEND_WITH_INV: - wqe->inv_key = htole32(wr->invalidate_rkey); -@@ -1130,8 +1088,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe, - break; - } - -- roce_set_field(wqe->byte_4, RC_SQ_WQE_BYTE_4_OPCODE_M, -- RC_SQ_WQE_BYTE_4_OPCODE_S, to_hr_opcode(wr->opcode)); -+ hr_reg_write(wqe, RCWQE_OPCODE, to_hr_opcode(wr->opcode)); - - return ret; - } -@@ -1143,24 +1100,22 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - struct hns_roce_v2_wqe_data_seg *dseg; - int ret; - -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, -- !!(wr->send_flags & IBV_SEND_SIGNALED)); -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, -- !!(wr->send_flags & IBV_SEND_FENCE)); -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S, -- !!(wr->send_flags & IBV_SEND_SOLICITED)); -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, -- !!(wr->send_flags & IBV_SEND_INLINE)); -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0); -+ hr_reg_write_bool(wqe, RCWQE_CQE, -+ !!(wr->send_flags & IBV_SEND_SIGNALED)); -+ hr_reg_write_bool(wqe, RCWQE_FENCE, -+ !!(wr->send_flags & IBV_SEND_FENCE)); -+ hr_reg_write_bool(wqe, RCWQE_SE, -+ !!(wr->send_flags & IBV_SEND_SOLICITED)); -+ hr_reg_write_bool(wqe, RCWQE_INLINE, -+ !!(wr->send_flags & IBV_SEND_INLINE)); -+ hr_reg_clear(wqe, RCWQE_SO); - - ret = check_rc_opcode(rc_sq_wqe, wr); - if (ret) - return ret; - -- roce_set_field(rc_sq_wqe->byte_20, -- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, -- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S, -- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); -+ hr_reg_write(rc_sq_wqe, RCWQE_MSG_START_SGE_IDX, -+ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); - - if (wr->opcode == IBV_WR_BIND_MW) - goto wqe_valid; -@@ -1172,8 +1127,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - - rc_sq_wqe->msg_len = htole32(sge_info->total_len); - -- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M, -- RC_SQ_WQE_BYTE_16_SGE_NUM_S, sge_info->valid_num); -+ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num); - - if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD || - wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { -@@ -1196,8 +1150,8 @@ wqe_valid: - if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) - udma_to_device_barrier(); - -- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S, -- ~((qp->sq.head + nreq) >> qp->sq.shift)); -+ hr_reg_write_bool(wqe, RCWQE_OWNER, -+ !((qp->sq.head + nreq) & BIT(qp->sq.shift))); - - return 0; - } -@@ -1243,10 +1197,8 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, - - switch (ibvqp->qp_type) { - case IBV_QPT_XRC_SEND: -- roce_set_field(wqe->byte_16, -- RC_SQ_WQE_BYTE_16_XRC_SRQN_M, -- RC_SQ_WQE_BYTE_16_XRC_SRQN_S, -- wr->qp_type.xrc.remote_srqn); -+ hr_reg_write(wqe, RCWQE_XRC_SRQN, -+ wr->qp_type.xrc.remote_srqn); - SWITCH_FALLTHROUGH; - case IBV_QPT_RC: - ret = set_rc_wqe(wqe, qp, wr, nreq, &sge_info); -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index 014cb8c..4330b7d 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -220,53 +220,44 @@ struct hns_roce_rc_sq_wqe { - __le64 va; - }; - --#define RC_SQ_WQE_BYTE_4_OPCODE_S 0 --#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) -- --#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5 --#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) -- --#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13 --#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) -- --#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15 --#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) -- --#define RC_SQ_WQE_BYTE_4_OWNER_S 7 -- --#define RC_SQ_WQE_BYTE_4_CQE_S 8 -- --#define RC_SQ_WQE_BYTE_4_FENCE_S 9 -- --#define RC_SQ_WQE_BYTE_4_SO_S 10 -- --#define RC_SQ_WQE_BYTE_4_SE_S 11 -- --#define RC_SQ_WQE_BYTE_4_INLINE_S 12 -- --#define RC_SQ_WQE_BYTE_4_MW_TYPE_S 14 -- --#define RC_SQ_WQE_BYTE_4_ATOMIC_S 20 -- --#define RC_SQ_WQE_BYTE_4_RDMA_READ_S 21 -- --#define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22 -- --#define RC_SQ_WQE_BYTE_4_FLAG_S 31 -- --#define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0 --#define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \ -- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S) -- --#define RC_SQ_WQE_BYTE_16_SGE_NUM_S 24 --#define RC_SQ_WQE_BYTE_16_SGE_NUM_M \ -- (((1UL << 8) - 1) << RC_SQ_WQE_BYTE_16_SGE_NUM_S) -- --#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 --#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M \ -- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S) -- --#define RC_SQ_WQE_BYTE_20_INL_TYPE_S 31 -+#define RCWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_rc_sq_wqe, h, l) -+ -+#define RCWQE_OPCODE RCWQE_FIELD_LOC(4, 0) -+#define RCWQE_DB_SL_L RCWQE_FIELD_LOC(6, 5) -+#define RCWQE_SQPN_L RCWQE_FIELD_LOC(6, 5) -+#define RCWQE_OWNER RCWQE_FIELD_LOC(7, 7) -+#define RCWQE_CQE RCWQE_FIELD_LOC(8, 8) -+#define RCWQE_FENCE RCWQE_FIELD_LOC(9, 9) -+#define RCWQE_SO RCWQE_FIELD_LOC(10, 10) -+#define RCWQE_SE RCWQE_FIELD_LOC(11, 11) -+#define RCWQE_INLINE RCWQE_FIELD_LOC(12, 12) -+#define RCWQE_DB_SL_H RCWQE_FIELD_LOC(14, 13) -+#define RCWQE_WQE_IDX RCWQE_FIELD_LOC(30, 15) -+#define RCWQE_SQPN_H RCWQE_FIELD_LOC(30, 13) -+#define RCWQE_FLAG RCWQE_FIELD_LOC(31, 31) -+#define RCWQE_MSG_LEN RCWQE_FIELD_LOC(63, 32) -+#define RCWQE_INV_KEY_IMMTDATA RCWQE_FIELD_LOC(95, 64) -+#define RCWQE_XRC_SRQN RCWQE_FIELD_LOC(119, 96) -+#define RCWQE_SGE_NUM RCWQE_FIELD_LOC(127, 120) -+#define RCWQE_MSG_START_SGE_IDX RCWQE_FIELD_LOC(151, 128) -+#define RCWQE_REDUCE_CODE RCWQE_FIELD_LOC(158, 152) -+#define RCWQE_INLINE_TYPE RCWQE_FIELD_LOC(159, 159) -+#define RCWQE_RKEY RCWQE_FIELD_LOC(191, 160) -+#define RCWQE_VA_L RCWQE_FIELD_LOC(223, 192) -+#define RCWQE_VA_H RCWQE_FIELD_LOC(255, 224) -+#define RCWQE_LEN0 RCWQE_FIELD_LOC(287, 256) -+#define RCWQE_LKEY0 RCWQE_FIELD_LOC(319, 288) -+#define RCWQE_VA0_L RCWQE_FIELD_LOC(351, 320) -+#define RCWQE_VA0_H RCWQE_FIELD_LOC(383, 352) -+#define RCWQE_LEN1 RCWQE_FIELD_LOC(415, 384) -+#define RCWQE_LKEY1 RCWQE_FIELD_LOC(447, 416) -+#define RCWQE_VA1_L RCWQE_FIELD_LOC(479, 448) -+#define RCWQE_VA1_H RCWQE_FIELD_LOC(511, 480) -+ -+#define RCWQE_MW_TYPE RCWQE_FIELD_LOC(256, 256) -+#define RCWQE_MW_RA_EN RCWQE_FIELD_LOC(258, 258) -+#define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259) -+#define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260) - - struct hns_roce_v2_wqe_data_seg { - __le32 len; -@@ -323,60 +314,51 @@ struct hns_roce_ud_sq_wqe { - uint8_t dgid[HNS_ROCE_GID_SIZE]; - }; - --#define UD_SQ_WQE_OPCODE_S 0 --#define UD_SQ_WQE_OPCODE_M GENMASK(4, 0) -- --#define UD_SQ_WQE_OWNER_S 7 -- --#define UD_SQ_WQE_CQE_S 8 -- --#define UD_SQ_WQE_SE_S 11 -- --#define UD_SQ_WQE_PD_S 0 --#define UD_SQ_WQE_PD_M GENMASK(23, 0) -- --#define UD_SQ_WQE_SGE_NUM_S 24 --#define UD_SQ_WQE_SGE_NUM_M GENMASK(31, 24) -- --#define UD_SQ_WQE_MSG_START_SGE_IDX_S 0 --#define UD_SQ_WQE_MSG_START_SGE_IDX_M GENMASK(23, 0) -- --#define UD_SQ_WQE_UDP_SPN_S 16 --#define UD_SQ_WQE_UDP_SPN_M GENMASK(31, 16) -- --#define UD_SQ_WQE_DQPN_S 0 --#define UD_SQ_WQE_DQPN_M GENMASK(23, 0) -- --#define UD_SQ_WQE_VLAN_S 0 --#define UD_SQ_WQE_VLAN_M GENMASK(15, 0) -- --#define UD_SQ_WQE_HOPLIMIT_S 16 --#define UD_SQ_WQE_HOPLIMIT_M GENMASK(23, 16) -- --#define UD_SQ_WQE_TCLASS_S 24 --#define UD_SQ_WQE_TCLASS_M GENMASK(31, 24) -- --#define UD_SQ_WQE_FLOW_LABEL_S 0 --#define UD_SQ_WQE_FLOW_LABEL_M GENMASK(19, 0) -- --#define UD_SQ_WQE_SL_S 20 --#define UD_SQ_WQE_SL_M GENMASK(23, 20) -- --#define UD_SQ_WQE_VLAN_EN_S 30 -- --#define UD_SQ_WQE_LBI_S 31 -- --#define UD_SQ_WQE_BYTE_4_INL_S 12 --#define UD_SQ_WQE_BYTE_20_INL_TYPE_S 31 -- --#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S 16 --#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M GENMASK(31, 16) --#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S 24 --#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M GENMASK(31, 24) --#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S 0 --#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M GENMASK(23, 0) --#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S 0 --#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M GENMASK(15, 0) -+#define UDWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ud_sq_wqe, h, l) -+ -+#define UDWQE_OPCODE UDWQE_FIELD_LOC(4, 0) -+#define UDWQE_DB_SL_L UDWQE_FIELD_LOC(6, 5) -+#define UDWQE_OWNER UDWQE_FIELD_LOC(7, 7) -+#define UDWQE_CQE UDWQE_FIELD_LOC(8, 8) -+#define UDWQE_RSVD1 UDWQE_FIELD_LOC(10, 9) -+#define UDWQE_SE UDWQE_FIELD_LOC(11, 11) -+#define UDWQE_INLINE UDWQE_FIELD_LOC(12, 12) -+#define UDWQE_DB_SL_H UDWQE_FIELD_LOC(14, 13) -+#define UDWQE_WQE_IDX UDWQE_FIELD_LOC(30, 15) -+#define UDWQE_FLAG UDWQE_FIELD_LOC(31, 31) -+#define UDWQE_MSG_LEN UDWQE_FIELD_LOC(63, 32) -+#define UDWQE_IMMTDATA UDWQE_FIELD_LOC(95, 64) -+#define UDWQE_PD UDWQE_FIELD_LOC(119, 96) -+#define UDWQE_SGE_NUM UDWQE_FIELD_LOC(127, 120) -+#define UDWQE_MSG_START_SGE_IDX UDWQE_FIELD_LOC(151, 128) -+#define UDWQE_RSVD3 UDWQE_FIELD_LOC(158, 152) -+#define UDWQE_INLINE_TYPE UDWQE_FIELD_LOC(159, 159) -+#define UDWQE_RSVD4 UDWQE_FIELD_LOC(175, 160) -+#define UDWQE_UDPSPN UDWQE_FIELD_LOC(191, 176) -+#define UDWQE_QKEY UDWQE_FIELD_LOC(223, 192) -+#define UDWQE_DQPN UDWQE_FIELD_LOC(247, 224) -+#define UDWQE_RSVD5 UDWQE_FIELD_LOC(255, 248) -+#define UDWQE_VLAN UDWQE_FIELD_LOC(271, 256) -+#define UDWQE_HOPLIMIT UDWQE_FIELD_LOC(279, 272) -+#define UDWQE_TCLASS UDWQE_FIELD_LOC(287, 280) -+#define UDWQE_FLOW_LABEL UDWQE_FIELD_LOC(307, 288) -+#define UDWQE_SL UDWQE_FIELD_LOC(311, 308) -+#define UDWQE_PORTN UDWQE_FIELD_LOC(314, 312) -+#define UDWQE_RSVD6 UDWQE_FIELD_LOC(317, 315) -+#define UDWQE_UD_VLAN_EN UDWQE_FIELD_LOC(318, 318) -+#define UDWQE_LBI UDWQE_FIELD_LOC(319, 319) -+#define UDWQE_DMAC_L UDWQE_FIELD_LOC(351, 320) -+#define UDWQE_DMAC_H UDWQE_FIELD_LOC(367, 352) -+#define UDWQE_GMV_IDX UDWQE_FIELD_LOC(383, 368) -+#define UDWQE_DGID0 UDWQE_FIELD_LOC(415, 384) -+#define UDWQE_DGID1 UDWQE_FIELD_LOC(447, 416) -+#define UDWQE_DGID2 UDWQE_FIELD_LOC(479, 448) -+#define UDWQE_DGID3 UDWQE_FIELD_LOC(511, 480) -+ -+#define UDWQE_INLINE_DATA_15_0 UDWQE_FIELD_LOC(63, 48) -+#define UDWQE_INLINE_DATA_23_16 UDWQE_FIELD_LOC(127, 120) -+#define UDWQE_INLINE_DATA_47_24 UDWQE_FIELD_LOC(151, 128) -+#define UDWQE_INLINE_DATA_63_48 UDWQE_FIELD_LOC(175, 160) - - #define MAX_SERVICE_LEVEL 0x7 - --- -2.27.0 - diff --git a/0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch b/0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch deleted file mode 100644 index 8fecc9558d67988d6cbd0518bab34fa63e65c56e..0000000000000000000000000000000000000000 --- a/0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch +++ /dev/null @@ -1,186 +0,0 @@ -From 49263de90f77f218710ef45bc0377d3e2019d811 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Sat, 25 Dec 2021 17:42:54 +0800 -Subject: libhns: Use new interfaces hr reg ***() to operate the DB field - -Use hr_reg_xxx() to simply the codes for filling fields. - -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 59 ++++++++++++++------------------ - providers/hns/hns_roce_u_hw_v2.h | 30 ++++++---------- - 2 files changed, 35 insertions(+), 54 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 0cff12b..e7dec0b 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -291,10 +291,9 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, - { - struct hns_roce_db rq_db = {}; - -- rq_db.byte_4 = htole32(qpn); -- roce_set_field(rq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, -- HNS_ROCE_V2_RQ_DB); -- rq_db.parameter = htole32(rq_head); -+ hr_reg_write(&rq_db, DB_TAG, qpn); -+ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB); -+ hr_reg_write(&rq_db, DB_PI, rq_head); - - hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db); - } -@@ -304,12 +303,11 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, - { - struct hns_roce_db sq_db = {}; - -- sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num); -- roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, -- HNS_ROCE_V2_SQ_DB); -+ hr_reg_write(&sq_db, DB_TAG, qp->verbs_qp.qp.qp_num); -+ hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB); -+ hr_reg_write(&sq_db, DB_PI, qp->sq.head); -+ hr_reg_write(&sq_db, DB_SL, qp->sl); - -- sq_db.parameter = htole32(qp->sq.head); -- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl); - hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db); - } - -@@ -336,14 +334,10 @@ static void update_cq_db(struct hns_roce_context *ctx, - { - struct hns_roce_db cq_db = {}; - -- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn); -- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, -- HNS_ROCE_V2_CQ_DB_PTR); -- -- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M, -- DB_PARAM_CQ_CONSUMER_IDX_S, cq->cons_index); -- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M, -- DB_PARAM_CQ_CMD_SN_S, 1); -+ hr_reg_write(&cq_db, DB_TAG, cq->cqn); -+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_PTR); -+ hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index); -+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1); - - hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); - } -@@ -663,7 +657,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, - - if (npolled || err == V2_CQ_POLL_ERR) { - if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) -- *cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M; -+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK; - else - update_cq_db(ctx, cq); - } -@@ -679,24 +673,17 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) - struct hns_roce_cq *cq = to_hr_cq(ibvcq); - struct hns_roce_db cq_db = {}; - uint32_t solicited_flag; -- uint32_t cmd_sn; - uint32_t ci; - - ci = cq->cons_index & ((cq->cq_depth << 1) - 1); -- cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK; - solicited_flag = solicited ? HNS_ROCE_V2_CQ_DB_REQ_SOL : - HNS_ROCE_V2_CQ_DB_REQ_NEXT; - -- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn); -- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, -- HNS_ROCE_V2_CQ_DB_NTR); -- -- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M, -- DB_PARAM_CQ_CONSUMER_IDX_S, ci); -- -- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M, -- DB_PARAM_CQ_CMD_SN_S, cmd_sn); -- roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag); -+ hr_reg_write(&cq_db, DB_TAG, cq->cqn); -+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_NTR); -+ hr_reg_write(&cq_db, DB_CQ_CI, ci); -+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn); -+ hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag); - - hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); - -@@ -1626,6 +1613,13 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) - idx_que->head++; - } - -+static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq) -+{ -+ hr_reg_write(db, DB_TAG, srq->srqn); -+ hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB); -+ hr_reg_write(db, DB_PI, srq->idx_que.head); -+} -+ - static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - struct ibv_recv_wr *wr, - struct ibv_recv_wr **bad_wr) -@@ -1665,10 +1659,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - */ - udma_to_device_barrier(); - -- srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S | -- srq->srqn); -- srq_db.parameter = htole32(srq->idx_que.head & -- DB_PARAM_SRQ_PRODUCER_COUNTER_M); -+ update_srq_db(&srq_db, srq); - - hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, - (__le32 *)&srq_db); -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index 4330b7d..e91b1f7 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -41,8 +41,6 @@ enum { - #define HNS_ROCE_V2_CQ_DB_REQ_SOL 1 - #define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0 - --#define HNS_ROCE_CMDSN_MASK 0x3 -- - #define HNS_ROCE_SL_SHIFT 2 - - /* V2 REG DEFINITION */ -@@ -127,27 +125,19 @@ struct hns_roce_db { - __le32 byte_4; - __le32 parameter; - }; --#define DB_BYTE_4_TAG_S 0 --#define DB_BYTE_4_TAG_M GENMASK(23, 0) -- --#define DB_BYTE_4_CMD_S 24 --#define DB_BYTE_4_CMD_M GENMASK(27, 24) -- --#define DB_BYTE_4_FLAG_S 31 -- --#define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0 --#define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0) -- --#define DB_PARAM_SL_S 16 --#define DB_PARAM_SL_M GENMASK(18, 16) - --#define DB_PARAM_CQ_CONSUMER_IDX_S 0 --#define DB_PARAM_CQ_CONSUMER_IDX_M GENMASK(23, 0) -+#define DB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_db, h, l) - --#define DB_PARAM_CQ_NOTIFY_S 24 -+#define DB_TAG DB_FIELD_LOC(23, 0) -+#define DB_CMD DB_FIELD_LOC(27, 24) -+#define DB_FLAG DB_FIELD_LOC(31, 31) -+#define DB_PI DB_FIELD_LOC(47, 32) -+#define DB_SL DB_FIELD_LOC(50, 48) -+#define DB_CQ_CI DB_FIELD_LOC(55, 32) -+#define DB_CQ_NOTIFY DB_FIELD_LOC(56, 56) -+#define DB_CQ_CMD_SN DB_FIELD_LOC(58, 57) - --#define DB_PARAM_CQ_CMD_SN_S 25 --#define DB_PARAM_CQ_CMD_SN_M GENMASK(26, 25) -+#define RECORD_DB_CI_MASK GENMASK(23, 0) - - struct hns_roce_v2_cqe { - __le32 byte_4; --- -2.27.0 - diff --git a/0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch b/0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch deleted file mode 100644 index 27adaf655ba9df79eba7038aab8b8a8fa489475f..0000000000000000000000000000000000000000 --- a/0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch +++ /dev/null @@ -1,306 +0,0 @@ -From 2da2a94f0ef5b6cf7fb8eacee1814a418d9bde74 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Sat, 25 Dec 2021 17:42:53 +0800 -Subject: libhns: Add new interfaces hr reg ***() to operate the CQE field - -Implement hr_reg_xxx() to simplify the code for filling or extracting -fields. - -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 53 +++++++++++++++++++++++++ - providers/hns/hns_roce_u_hw_v2.c | 58 ++++++++++------------------ - providers/hns/hns_roce_u_hw_v2.h | 66 ++++++++++++-------------------- - 3 files changed, 98 insertions(+), 79 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index c1ae1c9..df7f485 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -101,6 +101,59 @@ - #define roce_set_bit(origin, shift, val) \ - roce_set_field((origin), (1ul << (shift)), (shift), (val)) - -+#define FIELD_LOC(field_type, field_h, field_l) \ -+ field_type, field_h, \ -+ field_l + BUILD_ASSERT_OR_ZERO(((field_h) / 32) == \ -+ ((field_l) / 32)) -+ -+#define _hr_reg_enable(ptr, field_type, field_h, field_l) \ -+ ({ \ -+ const field_type *_ptr = ptr; \ -+ BUILD_ASSERT((field_h) == (field_l)); \ -+ *((__le32 *)_ptr + (field_h) / 32) |= \ -+ htole32(BIT((field_l) % 32)); \ -+ }) -+ -+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) -+ -+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \ -+ ({ \ -+ const field_type *_ptr = ptr; \ -+ BUILD_ASSERT((field_h) >= (field_l)); \ -+ *((__le32 *)_ptr + (field_h) / 32) &= \ -+ ~htole32(GENMASK((field_h) % 32, (field_l) % 32)); \ -+ }) -+ -+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field) -+ -+#define _hr_reg_write_bool(ptr, field_type, field_h, field_l, val) \ -+ ({ \ -+ (val) ? _hr_reg_enable(ptr, field_type, field_h, field_l) : \ -+ _hr_reg_clear(ptr, field_type, field_h, field_l);\ -+ }) -+ -+#define hr_reg_write_bool(ptr, field, val) _hr_reg_write_bool(ptr, field, val) -+ -+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \ -+ ({ \ -+ const uint32_t _val = val; \ -+ _hr_reg_clear(ptr, field_type, field_h, field_l); \ -+ *((__le32 *)ptr + (field_h) / 32) |= htole32(FIELD_PREP( \ -+ GENMASK((field_h) % 32, (field_l) % 32), _val)); \ -+ }) -+ -+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) -+ -+#define _hr_reg_read(ptr, field_type, field_h, field_l) \ -+ ({ \ -+ const field_type *_ptr = ptr; \ -+ BUILD_ASSERT((field_h) >= (field_l)); \ -+ FIELD_GET(GENMASK((field_h) % 32, (field_l) % 32), \ -+ le32toh(*((__le32 *)_ptr + (field_h) / 32))); \ -+ }) -+ -+#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) -+ - enum { - HNS_ROCE_QP_TABLE_BITS = 8, - HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS, -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index e7dec0b..558457a 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -187,8 +187,7 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - } - } - -- wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M, -- CQE_BYTE_16_SUB_STATUS_S); -+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); - } - - static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) -@@ -200,8 +199,8 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n) - { - struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe); - -- return (!!(roce_get_bit(cqe->byte_4, CQE_BYTE_4_OWNER_S)) ^ -- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL; -+ return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ? -+ cqe : NULL; - } - - static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) -@@ -257,8 +256,7 @@ static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe, - uint32_t srqn; - - if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) { -- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M, -- CQE_BYTE_12_XRC_SRQN_S); -+ srqn = hr_reg_read(cqe, CQE_XRC_SRQN); - - *srq = hns_roce_find_srq(ctx, srqn); - if (!*srq) -@@ -438,15 +436,13 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - (opcode == HNS_ROCE_RECV_OP_SEND || - opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM || - opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) && -- (roce_get_bit(cqe->byte_4, CQE_BYTE_4_RQ_INLINE_S))) { -+ hr_reg_read(cqe, CQE_RQ_INLINE)) { - struct hns_roce_rinl_sge *sge_list; - uint32_t wr_num, wr_cnt, sge_num, data_len; - uint8_t *wqe_buf; - uint32_t sge_cnt, size; - -- wr_num = (uint16_t)roce_get_field(cqe->byte_4, -- CQE_BYTE_4_WQE_IDX_M, -- CQE_BYTE_4_WQE_IDX_S) & 0xffff; -+ wr_num = hr_reg_read(cqe, CQE_WQE_IDX); - wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1); - - sge_list = (*cur_qp)->rq_rinl_buf.wqe_list[wr_cnt].sg_list; -@@ -477,13 +473,10 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - - static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc) - { -- wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M, -- CQE_BYTE_32_SL_S); -- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M, -- CQE_BYTE_32_RMT_QPN_S); -+ wc->sl = hr_reg_read(cqe, CQE_SL); -+ wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN); - wc->slid = 0; -- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ? -- IBV_WC_GRH : 0; -+ wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IBV_WC_GRH : 0; - wc->pkey_index = 0; - } - -@@ -492,8 +485,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - { - uint32_t wqe_idx; - -- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, -- CQE_BYTE_4_WQE_IDX_S); -+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); - wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; - hns_roce_free_srq_wqe(srq, wqe_idx); - } -@@ -533,8 +525,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - * according to the wqe idx in the current cqe first - */ - if (hr_qp->sq_signal_bits) { -- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, -- CQE_BYTE_4_WQE_IDX_S); -+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); - /* get the processed wqes num since last signalling */ - wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); - } -@@ -590,8 +581,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - - udma_from_device_barrier(); - -- qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M, -- CQE_BYTE_16_LCL_QPN_S); -+ qpn = hr_reg_read(cqe, CQE_LCL_QPN); - - /* if cur qp is null, then could not get the correct qpn */ - if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) { -@@ -600,11 +590,9 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - return V2_CQ_POLL_ERR; - } - -- status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, -- CQE_BYTE_4_STATUS_S); -- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, -- CQE_BYTE_4_OPCODE_S); -- is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ; -+ status = hr_reg_read(cqe, CQE_STATUS); -+ opcode = hr_reg_read(cqe, CQE_OPCODE); -+ is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ; - if (is_send) { - parse_cqe_for_req(cqe, wc, *cur_qp, opcode); - } else { -@@ -1350,26 +1338,20 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, - - while ((int) --prod_index - (int) cq->cons_index >= 0) { - cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe); -- if (roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M, -- CQE_BYTE_16_LCL_QPN_S) == qpn) { -- is_recv_cqe = roce_get_bit(cqe->byte_4, -- CQE_BYTE_4_S_R_S); -+ if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) { -+ is_recv_cqe = hr_reg_read(cqe, CQE_S_R); - - if (srq && is_recv_cqe) { -- wqe_index = roce_get_field(cqe->byte_4, -- CQE_BYTE_4_WQE_IDX_M, -- CQE_BYTE_4_WQE_IDX_S); -+ wqe_index = hr_reg_read(cqe, CQE_WQE_IDX); - hns_roce_free_srq_wqe(srq, wqe_index); - } - ++nfreed; - } else if (nfreed) { - dest = get_cqe_v2(cq, - (prod_index + nfreed) & cq->ibv_cq.cqe); -- owner_bit = roce_get_bit(dest->byte_4, -- CQE_BYTE_4_OWNER_S); -+ owner_bit = hr_reg_read(dest, CQE_OWNER); - memcpy(dest, cqe, cq->cqe_size); -- roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S, -- owner_bit); -+ hr_reg_write_bool(dest, CQE_OWNER, owner_bit); - } - } - -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index e91b1f7..92e5f1a 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -154,47 +154,31 @@ struct hns_roce_v2_cqe { - __le32 rsv[8]; - }; - --#define CQE_BYTE_4_OPCODE_S 0 --#define CQE_BYTE_4_OPCODE_M (((1UL << 5) - 1) << CQE_BYTE_4_OPCODE_S) -- --#define CQE_BYTE_4_RQ_INLINE_S 5 -- --#define CQE_BYTE_4_S_R_S 6 --#define CQE_BYTE_4_OWNER_S 7 -- --#define CQE_BYTE_4_STATUS_S 8 --#define CQE_BYTE_4_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_4_STATUS_S) -- --#define CQE_BYTE_4_WQE_IDX_S 16 --#define CQE_BYTE_4_WQE_IDX_M (((1UL << 16) - 1) << CQE_BYTE_4_WQE_IDX_S) -- --#define CQE_BYTE_12_XRC_SRQN_S 0 --#define CQE_BYTE_12_XRC_SRQN_M (((1UL << 24) - 1) << CQE_BYTE_12_XRC_SRQN_S) -- --#define CQE_BYTE_16_LCL_QPN_S 0 --#define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S) -- --#define CQE_BYTE_16_SUB_STATUS_S 24 --#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S) -- --#define CQE_BYTE_28_SMAC_S 0 --#define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S) -- --#define CQE_BYTE_28_PORT_TYPE_S 16 --#define CQE_BYTE_28_PORT_TYPE_M (((1UL << 2) - 1) << CQE_BYTE_28_PORT_TYPE_S) -- --#define CQE_BYTE_32_RMT_QPN_S 0 --#define CQE_BYTE_32_RMT_QPN_M (((1UL << 24) - 1) << CQE_BYTE_32_RMT_QPN_S) -- --#define CQE_BYTE_32_SL_S 24 --#define CQE_BYTE_32_SL_M (((1UL << 3) - 1) << CQE_BYTE_32_SL_S) -- --#define CQE_BYTE_32_PORTN_S 27 --#define CQE_BYTE_32_PORTN_M (((1UL << 3) - 1) << CQE_BYTE_32_PORTN_S) -- --#define CQE_BYTE_32_GRH_S 30 -- --#define CQE_BYTE_32_LPK_S 31 -+#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l) -+ -+#define CQE_OPCODE CQE_FIELD_LOC(4, 0) -+#define CQE_RQ_INLINE CQE_FIELD_LOC(5, 5) -+#define CQE_S_R CQE_FIELD_LOC(6, 6) -+#define CQE_OWNER CQE_FIELD_LOC(7, 7) -+#define CQE_STATUS CQE_FIELD_LOC(15, 8) -+#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16) -+#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32) -+#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64) -+#define CQE_RSV0 CQE_FIELD_LOC(95, 88) -+#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96) -+#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120) -+#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128) -+#define CQE_SMAC CQE_FIELD_LOC(207, 160) -+#define CQE_PORT_TYPE CQE_FIELD_LOC(209, 208) -+#define CQE_VID CQE_FIELD_LOC(221, 210) -+#define CQE_VID_VLD CQE_FIELD_LOC(222, 222) -+#define CQE_RSV2 CQE_FIELD_LOC(223, 223) -+#define CQE_RMT_QPN CQE_FIELD_LOC(247, 224) -+#define CQE_SL CQE_FIELD_LOC(250, 248) -+#define CQE_PORTN CQE_FIELD_LOC(253, 251) -+#define CQE_GRH CQE_FIELD_LOC(254, 254) -+#define CQE_LPK CQE_FIELD_LOC(255, 255) -+#define CQE_RSV3 CQE_FIELD_LOC(511, 256) - - struct hns_roce_rc_sq_wqe { - __le32 byte_4; --- -2.27.0 - diff --git a/0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch b/0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch deleted file mode 100644 index fdfab1a29a303f6b61a6f02e6a6413cbeeec415e..0000000000000000000000000000000000000000 --- a/0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch +++ /dev/null @@ -1,202 +0,0 @@ -From 48e8ca01b1e5d033fca6e988d2d280846c95d7e1 Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Fri, 31 Dec 2021 18:01:06 +0800 -Subject: libhns: Fix the calculation of QP/SRQ table size - -The table_size means the maximum number of QP/SRQ. This value may not be -a power of two. The old algorithm will lead to a result that allocates a -mismatched table. - -Fixes: 887b78c80224 ("libhns: Add initial main frame") -Fixes: 9e3df7578153 ("libhns: Support ibv_create_srq_ex") -Signed-off-by: Wenpeng Liang -Signed-off-by: Leon Romanovsky ---- - providers/hns/hns_roce_u.c | 18 +++++++++++------- - providers/hns/hns_roce_u.h | 20 ++++++++++++++------ - providers/hns/hns_roce_u_hw_v1.c | 4 ++-- - providers/hns/hns_roce_u_hw_v2.c | 4 ++-- - providers/hns/hns_roce_u_verbs.c | 9 ++++----- - 5 files changed, 33 insertions(+), 22 deletions(-) - -diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c -index 9dc4905..6eac4ff 100644 ---- a/providers/hns/hns_roce_u.c -+++ b/providers/hns/hns_roce_u.c -@@ -92,6 +92,13 @@ static const struct verbs_context_ops hns_common_ops = { - .get_srq_num = hns_roce_u_get_srq_num, - }; - -+static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) -+{ -+ uint32_t count_shift = hr_ilog32(entry_count); -+ -+ return count_shift > size_shift ? count_shift - size_shift : 0; -+} -+ - static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, - int cmd_fd, - void *private_data) -@@ -120,18 +127,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, - else - context->cqe_size = HNS_ROCE_V3_CQE_SIZE; - -- context->num_qps = resp.qp_tab_size; -- context->num_srqs = resp.srq_tab_size; -- -- context->qp_table_shift = ffs(context->num_qps) - 1 - -- HNS_ROCE_QP_TABLE_BITS; -+ context->qp_table_shift = calc_table_shift(resp.qp_tab_size, -+ HNS_ROCE_QP_TABLE_BITS); - context->qp_table_mask = (1 << context->qp_table_shift) - 1; - pthread_mutex_init(&context->qp_table_mutex, NULL); - for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i) - context->qp_table[i].refcnt = 0; - -- context->srq_table_shift = ffs(context->num_srqs) - 1 - -- HNS_ROCE_SRQ_TABLE_BITS; -+ context->srq_table_shift = calc_table_shift(resp.srq_tab_size, -+ HNS_ROCE_SRQ_TABLE_BITS); - context->srq_table_mask = (1 << context->srq_table_shift) - 1; - pthread_mutex_init(&context->srq_table_mutex, NULL); - for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i) -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index df7f485..9366923 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -154,10 +154,8 @@ - - #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) - --enum { -- HNS_ROCE_QP_TABLE_BITS = 8, -- HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS, --}; -+#define HNS_ROCE_QP_TABLE_BITS 8 -+#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS) - - #define HNS_ROCE_SRQ_TABLE_BITS 8 - #define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS) -@@ -211,7 +209,6 @@ struct hns_roce_context { - int refcnt; - } qp_table[HNS_ROCE_QP_TABLE_SIZE]; - pthread_mutex_t qp_table_mutex; -- uint32_t num_qps; - uint32_t qp_table_shift; - uint32_t qp_table_mask; - -@@ -220,7 +217,6 @@ struct hns_roce_context { - int refcnt; - } srq_table[HNS_ROCE_SRQ_TABLE_SIZE]; - pthread_mutex_t srq_table_mutex; -- uint32_t num_srqs; - uint32_t srq_table_shift; - uint32_t srq_table_mask; - -@@ -382,6 +378,18 @@ static inline unsigned int hr_ilog32(unsigned int count) - return ilog32(count - 1); - } - -+static inline uint32_t to_hr_qp_table_index(uint32_t qpn, -+ struct hns_roce_context *ctx) -+{ -+ return (qpn >> ctx->qp_table_shift) & (HNS_ROCE_QP_TABLE_SIZE - 1); -+} -+ -+static inline uint32_t to_hr_srq_table_index(uint32_t srqn, -+ struct hns_roce_context *ctx) -+{ -+ return (srqn >> ctx->srq_table_shift) & (HNS_ROCE_SRQ_TABLE_SIZE - 1); -+} -+ - static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev) - { - return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device); -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 838e004..28ad482 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -220,7 +220,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, - static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, - uint32_t qpn) - { -- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = to_hr_qp_table_index(qpn, ctx); - - if (ctx->qp_table[tind].refcnt) { - return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; -@@ -232,7 +232,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, - - static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn) - { -- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = to_hr_qp_table_index(qpn, ctx); - - if (!--ctx->qp_table[tind].refcnt) - free(ctx->qp_table[tind].table); -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 558457a..e39ee7f 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -343,7 +343,7 @@ static void update_cq_db(struct hns_roce_context *ctx, - static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, - uint32_t qpn) - { -- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = to_hr_qp_table_index(qpn, ctx); - - if (ctx->qp_table[tind].refcnt) - return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; -@@ -354,7 +354,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, - void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp) - { - uint32_t qpn = qp->verbs_qp.qp.qp_num; -- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = to_hr_qp_table_index(qpn, ctx); - - pthread_mutex_lock(&ctx->qp_table_mutex); - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 557d075..5ccb701 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -431,8 +431,7 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq) - static int hns_roce_store_srq(struct hns_roce_context *ctx, - struct hns_roce_srq *srq) - { -- uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >> -- ctx->srq_table_shift; -+ uint32_t tind = to_hr_srq_table_index(srq->srqn, ctx); - - pthread_mutex_lock(&ctx->srq_table_mutex); - -@@ -457,7 +456,7 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx, - struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx, - uint32_t srqn) - { -- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift; -+ uint32_t tind = to_hr_srq_table_index(srqn, ctx); - - if (ctx->srq_table[tind].refcnt) - return ctx->srq_table[tind].table[srqn & ctx->srq_table_mask]; -@@ -467,7 +466,7 @@ struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx, - - static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn) - { -- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift; -+ uint32_t tind = to_hr_srq_table_index(srqn, ctx); - - pthread_mutex_lock(&ctx->srq_table_mutex); - -@@ -1108,7 +1107,7 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx, - struct hns_roce_qp *qp) - { - uint32_t qpn = qp->verbs_qp.qp.qp_num; -- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; -+ uint32_t tind = to_hr_qp_table_index(qpn, ctx); - - pthread_mutex_lock(&ctx->qp_table_mutex); - if (!ctx->qp_table[tind].refcnt) { --- -2.27.0 - diff --git a/0037-libhns-Fix-wrong-HIP08-version-macro.patch b/0037-libhns-Fix-wrong-HIP08-version-macro.patch deleted file mode 100644 index 955e2e63d356c8a4db2fb5d3d6fe38e3842d8c7f..0000000000000000000000000000000000000000 --- a/0037-libhns-Fix-wrong-HIP08-version-macro.patch +++ /dev/null @@ -1,31 +0,0 @@ -From d4766cd11b985f7f798410129a0b204beb13ecef Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Mon, 17 Jan 2022 20:43:39 +0800 -Subject: libhns: Fix wrong HIP08 version macro - -The version macro of HIP08 should be consistent with the version number -queried from the hardware. - -Fixes: b8cb140e9cd6 ("libhns: Refresh version info before using it") -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 9366923..2b4ba18 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -48,8 +48,7 @@ - #include "hns_roce_u_abi.h" - - #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') -- --#define HNS_ROCE_HW_VER2 ('h' << 24 | 'i' << 16 | '0' << 8 | '8') -+#define HNS_ROCE_HW_VER2 0x100 - #define HNS_ROCE_HW_VER3 0x130 - - #define PFX "hns: " --- -2.27.0 - diff --git a/0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch b/0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch deleted file mode 100644 index 23e1c0f36f2307820e6bf199c6ae51e2c8ba40a9..0000000000000000000000000000000000000000 --- a/0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 203675526b14d9556eeb4212536ebcfc81691c1b Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Mon, 17 Jan 2022 20:43:38 +0800 -Subject: libhns: Fix out-of-bounds write when filling inline data into - extended sge space - -If the buf to store inline data is in the last page of the extended sge -space, filling the entire inline data into the extended sge space at one -time may result in out-of-bounds writing. - -When the remaining space at the end of the extended sge is not enough to -accommodate the entire inline data, the inline data needs to be filled -into the extended sge space in two steps: -(1) The front part of the inline data is filled into the remaining space - at the end of the extended sge. -(2) The remaining inline data is filled into the header space of the - extended sge. - -Fixes: b7814b7b9715("libhns: Support inline data in extented sge space for RC") -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 40 ++++++++++++++++++++++---------- - 1 file changed, 28 insertions(+), 12 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index e39ee7f..20745dc 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -772,21 +772,43 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, - struct hns_roce_sge_info *sge_info) - { - unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg); -- void *dseg; -+ unsigned int sge_mask = qp->ex_sge.sge_cnt - 1; -+ void *dst_addr, *src_addr, *tail_bound_addr; -+ uint32_t src_len, tail_len; - int i; - -+ - if (sge_info->total_len > qp->sq.max_gs * sge_sz) - return EINVAL; - -- dseg = get_send_sge_ex(qp, sge_info->start_idx); -+ dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask); -+ tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask); - - for (i = 0; i < wr->num_sge; i++) { -- memcpy(dseg, (void *)(uintptr_t)wr->sg_list[i].addr, -- wr->sg_list[i].length); -- dseg += wr->sg_list[i].length; -+ tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr; -+ -+ src_addr = (void *)(uintptr_t)wr->sg_list[i].addr; -+ src_len = wr->sg_list[i].length; -+ -+ if (src_len < tail_len) { -+ memcpy(dst_addr, src_addr, src_len); -+ dst_addr += src_len; -+ } else if (src_len == tail_len) { -+ memcpy(dst_addr, src_addr, src_len); -+ dst_addr = get_send_sge_ex(qp, 0); -+ } else { -+ memcpy(dst_addr, src_addr, tail_len); -+ dst_addr = get_send_sge_ex(qp, 0); -+ src_addr += tail_len; -+ src_len -= tail_len; -+ -+ memcpy(dst_addr, src_addr, src_len); -+ dst_addr += src_len; -+ } - } - -- sge_info->start_idx += DIV_ROUND_UP(sge_info->total_len, sge_sz); -+ sge_info->valid_num = DIV_ROUND_UP(sge_info->total_len, sge_sz); -+ sge_info->start_idx += sge_info->valid_num; - - return 0; - } -@@ -828,7 +850,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - struct hns_roce_ud_sq_wqe *ud_sq_wqe, - struct hns_roce_sge_info *sge_info) - { -- unsigned int sge_idx = sge_info->start_idx; - int ret; - - if (!check_inl_data_len(qp, sge_info->total_len)) -@@ -845,8 +866,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - if (ret) - return ret; - -- sge_info->valid_num = sge_info->start_idx - sge_idx; -- - hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num); - } - -@@ -969,7 +988,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - struct hns_roce_rc_sq_wqe *rc_sq_wqe, - struct hns_roce_sge_info *sge_info) - { -- unsigned int sge_idx = sge_info->start_idx; - void *dseg = rc_sq_wqe; - int ret; - int i; -@@ -997,8 +1015,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - if (ret) - return ret; - -- sge_info->valid_num = sge_info->start_idx - sge_idx; -- - hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num); - } - --- -2.27.0 - diff --git a/0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch b/0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch deleted file mode 100644 index 954be0252b9c1935bd31888b0083a770ed5d2968..0000000000000000000000000000000000000000 --- a/0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 85a5aa79327f45e4bea8d7ad0e55842225ca676a Mon Sep 17 00:00:00 2001 -From: Wenpeng Liang -Date: Tue, 18 Jan 2022 19:58:51 +0800 -Subject: libhns: Clear remaining unused sges when post recv - -The HIP09 requires the driver to clear the unused data segments in wqe -buffer to make the hns ROCEE stop reading the remaining invalid sges for -RQ. - -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 88 ++++++++++++++------------------ - 1 file changed, 39 insertions(+), 49 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 20745dc..6b0d7f1 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -85,14 +85,6 @@ static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, - dseg->len = htole32(sg->length); - } - --/* Fill an ending sge to make hw stop reading the remaining sges in wqe */ --static inline void set_ending_data_seg(struct hns_roce_v2_wqe_data_seg *dseg) --{ -- dseg->lkey = htole32(0x0); -- dseg->addr = 0; -- dseg->len = htole32(INVALID_SGE_LENGTH); --} -- - static void set_extend_atomic_seg(struct hns_roce_qp *qp, unsigned int sge_cnt, - struct hns_roce_sge_info *sge_info, void *buf) - { -@@ -1247,23 +1239,43 @@ static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx) - return 0; - } - --static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr, -- unsigned int wqe_idx) -+static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe, -+ unsigned int max_sge, bool rsv) - { -- struct hns_roce_v2_wqe_data_seg *dseg; -- struct hns_roce_rinl_sge *sge_list; -- int i; -+ struct hns_roce_v2_wqe_data_seg *dseg = wqe; -+ unsigned int i, cnt; - -- dseg = get_recv_wqe_v2(qp, wqe_idx); -- for (i = 0; i < wr->num_sge; i++) { -+ for (i = 0, cnt = 0; i < wr->num_sge; i++) { -+ /* Skip zero-length sge */ - if (!wr->sg_list[i].length) - continue; -- set_data_seg_v2(dseg, wr->sg_list + i); -- dseg++; -+ -+ set_data_seg_v2(dseg + cnt, wr->sg_list + i); -+ cnt++; - } - -- if (qp->rq.rsv_sge) -- set_ending_data_seg(dseg); -+ /* Fill a reserved sge to make ROCEE stop reading remaining segments */ -+ if (rsv) { -+ dseg[cnt].lkey = 0; -+ dseg[cnt].addr = 0; -+ dseg[cnt].len = htole32(INVALID_SGE_LENGTH); -+ } else { -+ /* Clear remaining segments to make ROCEE ignore sges */ -+ if (cnt < max_sge) -+ memset(dseg + cnt, 0, -+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE); -+ } -+} -+ -+static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr, -+ unsigned int wqe_idx, unsigned int max_sge) -+{ -+ struct hns_roce_rinl_sge *sge_list; -+ unsigned int i; -+ void *wqe; -+ -+ wqe = get_recv_wqe_v2(qp, wqe_idx); -+ fill_recv_sge_to_wqe(wr, wqe, max_sge, qp->rq.rsv_sge); - - if (!qp->rq_rinl_buf.wqe_cnt) - return; -@@ -1310,7 +1322,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, - } - - wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1); -- fill_rq_wqe(qp, wr, wqe_idx); -+ fill_rq_wqe(qp, wr, wqe_idx, max_sge); - qp->rq.wrid[wqe_idx] = wr->wr_id; - } - -@@ -1536,10 +1548,8 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) - } - - static int check_post_srq_valid(struct hns_roce_srq *srq, -- struct ibv_recv_wr *wr) -+ struct ibv_recv_wr *wr, unsigned int max_sge) - { -- unsigned int max_sge = srq->max_gs - srq->rsv_sge; -- - if (hns_roce_v2_srqwq_overflow(srq)) - return -ENOMEM; - -@@ -1575,28 +1585,6 @@ static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx) - return 0; - } - --static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx, -- struct ibv_recv_wr *wr) --{ -- struct hns_roce_v2_wqe_data_seg *dseg; -- int i; -- -- dseg = get_srq_wqe(srq, wqe_idx); -- -- for (i = 0; i < wr->num_sge; ++i) { -- dseg[i].len = htole32(wr->sg_list[i].length); -- dseg[i].lkey = htole32(wr->sg_list[i].lkey); -- dseg[i].addr = htole64(wr->sg_list[i].addr); -- } -- -- /* hw stop reading when identify the last one */ -- if (srq->rsv_sge) { -- dseg[i].len = htole32(INVALID_SGE_LENGTH); -- dseg[i].lkey = htole32(0x0); -- dseg[i].addr = 0; -- } --} -- - static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) - { - struct hns_roce_idx_que *idx_que = &srq->idx_que; -@@ -1624,15 +1612,16 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - { - struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context); - struct hns_roce_srq *srq = to_hr_srq(ib_srq); -+ unsigned int wqe_idx, max_sge, nreq; - struct hns_roce_db srq_db; -- unsigned int wqe_idx; - int ret = 0; -- int nreq; -+ void *wqe; - - pthread_spin_lock(&srq->lock); - -+ max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { -- ret = check_post_srq_valid(srq, wr); -+ ret = check_post_srq_valid(srq, wr, max_sge); - if (ret) { - *bad_wr = wr; - break; -@@ -1644,7 +1633,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - break; - } - -- fill_srq_wqe(srq, wqe_idx, wr); -+ wqe = get_srq_wqe(srq, wqe_idx); -+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); - fill_wqe_idx(srq, wqe_idx); - - srq->wrid[wqe_idx] = wr->wr_id; --- -2.27.0 - diff --git a/0040-libhns-Add-support-for-creating-extended-CQ.patch b/0040-libhns-Add-support-for-creating-extended-CQ.patch deleted file mode 100644 index ce6ab1b3275f04ec9df1c10ad0f09b572df32967..0000000000000000000000000000000000000000 --- a/0040-libhns-Add-support-for-creating-extended-CQ.patch +++ /dev/null @@ -1,346 +0,0 @@ -From d8596eff4eb46d1db1b6066e3bbbd03976f49e58 Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Mon, 7 Mar 2022 18:49:35 +0800 -Subject: libhns: Add support for creating extended CQ - -The driver supports ibv_create_cq_ex() to create extended CQ. But the -driver does not yet support the extended attributes specified by -attr->com_mask and attr->wc_flas. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.c | 1 + - providers/hns/hns_roce_u.h | 6 ++- - providers/hns/hns_roce_u_abi.h | 5 +++ - providers/hns/hns_roce_u_hw_v1.c | 20 +++++----- - providers/hns/hns_roce_u_hw_v2.c | 16 ++++---- - providers/hns/hns_roce_u_verbs.c | 63 ++++++++++++++++++++++---------- - 6 files changed, 72 insertions(+), 39 deletions(-) - -diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c -index 6eac4ff0..f8a647b8 100644 ---- a/providers/hns/hns_roce_u.c -+++ b/providers/hns/hns_roce_u.c -@@ -66,6 +66,7 @@ static const struct verbs_context_ops hns_common_ops = { - .bind_mw = hns_roce_u_bind_mw, - .cq_event = hns_roce_u_cq_event, - .create_cq = hns_roce_u_create_cq, -+ .create_cq_ex = hns_roce_u_create_cq_ex, - .create_qp = hns_roce_u_create_qp, - .create_qp_ex = hns_roce_u_create_qp_ex, - .dealloc_mw = hns_roce_u_dealloc_mw, -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 2b4ba181..505e7498 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -236,7 +236,7 @@ struct hns_roce_pd { - }; - - struct hns_roce_cq { -- struct ibv_cq ibv_cq; -+ struct verbs_cq verbs_cq; - struct hns_roce_buf buf; - pthread_spinlock_t lock; - unsigned int cqn; -@@ -406,7 +406,7 @@ static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd) - - static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) - { -- return container_of(ibv_cq, struct hns_roce_cq, ibv_cq); -+ return container_of(ibv_cq, struct hns_roce_cq, verbs_cq.cq); - } - - static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq) -@@ -447,6 +447,8 @@ int hns_roce_u_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw, - struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, - struct ibv_comp_channel *channel, - int comp_vector); -+struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context, -+ struct ibv_cq_init_attr_ex *cq_attr); - - int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); - int hns_roce_u_destroy_cq(struct ibv_cq *cq); -diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h -index e56f9d35..333f977e 100644 ---- a/providers/hns/hns_roce_u_abi.h -+++ b/providers/hns/hns_roce_u_abi.h -@@ -39,8 +39,13 @@ - - DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, - empty, hns_roce_ib_alloc_pd_resp); -+ - DECLARE_DRV_CMD(hns_roce_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, - hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp); -+ -+DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, -+ hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp); -+ - DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, - empty, hns_roce_ib_alloc_ucontext_resp); - -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -index 28ad482c..d47cba0c 100644 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ b/providers/hns/hns_roce_u_hw_v1.c -@@ -161,10 +161,10 @@ static struct hns_roce_cqe *get_cqe(struct hns_roce_cq *cq, int entry) - - static void *get_sw_cqe(struct hns_roce_cq *cq, int n) - { -- struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); -+ struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->verbs_cq.cq.cqe); - - return (!!(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S)) ^ -- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL; -+ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL; - } - - static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq) -@@ -210,7 +210,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, - cur = wq->head - wq->tail; - pthread_spin_unlock(&cq->lock); - -- verbs_err(verbs_get_ctx(cq->ibv_cq.context), -+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), - "wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n", - wq->head, wq->tail, wq->max_post, nreq); - -@@ -274,10 +274,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, - if (!*cur_qp || - (local_qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->verbs_qp.qp.qp_num) { - -- *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context), -+ *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->verbs_cq.cq.context), - qpn & 0xffffff); - if (!*cur_qp) { -- verbs_err(verbs_get_ctx(cq->ibv_cq.context), -+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), - PFX "can't find qp!\n"); - return CQ_POLL_ERR; - } -@@ -317,7 +317,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, - if (roce_get_field(cqe->cqe_byte_4, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) { -- verbs_err(verbs_get_ctx(cq->ibv_cq.context), -+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), - PFX "error cqe!\n"); - hns_roce_handle_error_cqe(cqe, wc); - return CQ_OK; -@@ -599,21 +599,21 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, - uint32_t prod_index; - uint8_t owner_bit = 0; - struct hns_roce_cqe *cqe, *dest; -- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); -+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); - - for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); - ++prod_index) -- if (prod_index == cq->cons_index + cq->ibv_cq.cqe) -+ if (prod_index == cq->cons_index + cq->verbs_cq.cq.cqe) - break; - - while ((int) --prod_index - (int) cq->cons_index >= 0) { -- cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); -+ cqe = get_cqe(cq, prod_index & cq->verbs_cq.cq.cqe); - if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, - CQE_BYTE_16_LOCAL_QPN_S) & 0xffffff) == qpn) { - ++nfreed; - } else if (nfreed) { - dest = get_cqe(cq, -- (prod_index + nfreed) & cq->ibv_cq.cqe); -+ (prod_index + nfreed) & cq->verbs_cq.cq.cqe); - owner_bit = roce_get_bit(dest->cqe_byte_4, - CQE_BYTE_4_OWNER_S); - memcpy(dest, cqe, sizeof(*cqe)); -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index bfd98760..07f3596d 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -189,10 +189,10 @@ static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) - - static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n) - { -- struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe); -+ struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->verbs_cq.cq.cqe); - -- return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ? -- cqe : NULL; -+ return (hr_reg_read(cqe, CQE_OWNER) ^ -+ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL; - } - - static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) -@@ -556,7 +556,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - struct hns_roce_qp **cur_qp, struct ibv_wc *wc) - { -- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); -+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); - struct hns_roce_srq *srq = NULL; - struct hns_roce_v2_cqe *cqe; - uint8_t opcode; -@@ -1356,15 +1356,15 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, - uint16_t wqe_index; - uint32_t prod_index; - struct hns_roce_v2_cqe *cqe, *dest; -- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); -+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); - - for (prod_index = cq->cons_index; get_sw_cqe_v2(cq, prod_index); - ++prod_index) -- if (prod_index > cq->cons_index + cq->ibv_cq.cqe) -+ if (prod_index > cq->cons_index + cq->verbs_cq.cq.cqe) - break; - - while ((int) --prod_index - (int) cq->cons_index >= 0) { -- cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe); -+ cqe = get_cqe_v2(cq, prod_index & cq->verbs_cq.cq.cqe); - if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) { - is_recv_cqe = hr_reg_read(cqe, CQE_S_R); - -@@ -1375,7 +1375,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, - ++nfreed; - } else if (nfreed) { - dest = get_cqe_v2(cq, -- (prod_index + nfreed) & cq->ibv_cq.cqe); -+ (prod_index + nfreed) & cq->verbs_cq.cq.cqe); - owner_bit = hr_reg_read(dest, CQE_OWNER); - memcpy(dest, cqe, cq->cqe_size); - hr_reg_write_bool(dest, CQE_OWNER, owner_bit); -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 3cc9e0c2..a993c39a 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -276,12 +276,17 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw) - return 0; - } - --static int hns_roce_verify_cq(int *cqe, struct hns_roce_context *context) -+static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, -+ struct hns_roce_context *context) - { -- if (*cqe < 1 || *cqe > context->max_cqe) -+ if (!attr->cqe || attr->cqe > context->max_cqe) - return -EINVAL; - -- *cqe = max((uint64_t)HNS_ROCE_MIN_CQE_NUM, roundup_pow_of_two(*cqe)); -+ if (attr->comp_mask || attr->wc_flags) -+ return -EOPNOTSUPP; -+ -+ attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, -+ roundup_pow_of_two(attr->cqe)); - - return 0; - } -@@ -297,25 +302,25 @@ static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq) - } - - static int exec_cq_create_cmd(struct ibv_context *context, -- struct hns_roce_cq *cq, int cqe, -- struct ibv_comp_channel *channel, int comp_vector) -+ struct hns_roce_cq *cq, -+ struct ibv_cq_init_attr_ex *attr) - { -+ struct hns_roce_create_cq_ex_resp resp_ex = {}; - struct hns_roce_ib_create_cq_resp *resp_drv; -- struct hns_roce_create_cq_resp resp = {}; -+ struct hns_roce_create_cq_ex cmd_ex = {}; - struct hns_roce_ib_create_cq *cmd_drv; -- struct hns_roce_create_cq cmd = {}; - int ret; - -- cmd_drv = &cmd.drv_payload; -- resp_drv = &resp.drv_payload; -+ cmd_drv = &cmd_ex.drv_payload; -+ resp_drv = &resp_ex.drv_payload; - - cmd_drv->buf_addr = (uintptr_t)cq->buf.buf; - cmd_drv->db_addr = (uintptr_t)cq->db; - cmd_drv->cqe_size = (uintptr_t)cq->cqe_size; - -- ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector, -- &cq->ibv_cq, &cmd.ibv_cmd, sizeof(cmd), -- &resp.ibv_resp, sizeof(resp)); -+ ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq, -+ &cmd_ex.ibv_cmd, sizeof(cmd_ex), -+ &resp_ex.ibv_resp, sizeof(resp_ex), 0); - if (ret) - return ret; - -@@ -325,16 +330,15 @@ static int exec_cq_create_cmd(struct ibv_context *context, - return 0; - } - --struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, -- struct ibv_comp_channel *channel, -- int comp_vector) -+static struct ibv_cq_ex *create_cq(struct ibv_context *context, -+ struct ibv_cq_init_attr_ex *attr) - { - struct hns_roce_device *hr_dev = to_hr_dev(context->device); - struct hns_roce_context *hr_ctx = to_hr_ctx(context); - struct hns_roce_cq *cq; - int ret; - -- ret = hns_roce_verify_cq(&cqe, hr_ctx); -+ ret = verify_cq_create_attr(attr, hr_ctx); - if (ret) - goto err; - -@@ -348,7 +352,7 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, - if (ret) - goto err_lock; - -- cq->cq_depth = cqe; -+ cq->cq_depth = attr->cqe; - cq->cqe_size = hr_ctx->cqe_size; - - ret = hns_roce_alloc_cq_buf(cq); -@@ -363,13 +367,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, - - *cq->db = 0; - -- ret = exec_cq_create_cmd(context, cq, cqe, channel, comp_vector); -+ ret = exec_cq_create_cmd(context, cq, attr); - if (ret) - goto err_cmd; - - cq->arm_sn = 1; - -- return &cq->ibv_cq; -+ return &cq->verbs_cq.cq_ex; - - err_cmd: - if (hr_dev->hw_version != HNS_ROCE_HW_VER1) -@@ -387,6 +391,27 @@ err: - return NULL; - } - -+struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, -+ struct ibv_comp_channel *channel, -+ int comp_vector) -+{ -+ struct ibv_cq_ex *cq; -+ struct ibv_cq_init_attr_ex attr = { -+ .cqe = cqe, -+ .channel = channel, -+ .comp_vector = comp_vector, -+ }; -+ -+ cq = create_cq(context, &attr); -+ return cq ? ibv_cq_ex_to_cq(cq) : NULL; -+} -+ -+struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context, -+ struct ibv_cq_init_attr_ex *attr) -+{ -+ return create_cq(context, attr); -+} -+ - void hns_roce_u_cq_event(struct ibv_cq *cq) - { - to_hr_cq(cq)->arm_sn++; --- -2.30.0 - diff --git a/0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch b/0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch deleted file mode 100644 index f816bcba9f3e00761b8dffa2f870e5483cbab1e0..0000000000000000000000000000000000000000 --- a/0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch +++ /dev/null @@ -1,415 +0,0 @@ -From 0464e0cb0416d679aba3b58261bbd2cadb74fd03 Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Mon, 7 Mar 2022 18:49:36 +0800 -Subject: libhns: Extended CQ supports the new polling mechanism - -ofed provides new polling APIs for extended CQ. With the new APIs, users -can poll the extended CQ more efficiently. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 1 + - providers/hns/hns_roce_u_hw_v2.c | 319 +++++++++++++++++++++++++++++++ - providers/hns/hns_roce_u_hw_v2.h | 1 + - providers/hns/hns_roce_u_verbs.c | 18 +- - 4 files changed, 337 insertions(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 505e7498..70ac6e5b 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -247,6 +247,7 @@ struct hns_roce_cq { - int arm_sn; - unsigned long flags; - unsigned int cqe_size; -+ struct hns_roce_v2_cqe *cqe; - }; - - struct hns_roce_idx_que { -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 07f3596d..081ab1f3 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -1657,6 +1657,325 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - return ret; - } - -+static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, -+ struct hns_roce_cq *cq) -+{ -+ struct hns_roce_wq *wq = &hr_qp->sq; -+ -+ if (hr_qp->sq_signal_bits) -+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); -+ -+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -+ ++wq->tail; -+} -+ -+static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx, -+ struct hns_roce_cq *cq) -+{ -+ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; -+ hns_roce_free_srq_wqe(srq, wqe_idx); -+} -+ -+static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq) -+{ -+ struct hns_roce_wq *wq = &hr_qp->rq; -+ -+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -+ ++wq->tail; -+} -+ -+static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, -+ struct hns_roce_cq *cq) -+{ -+ struct hns_roce_v2_cqe *cqe = cq->cqe; -+ struct hns_roce_srq *srq = NULL; -+ uint32_t wqe_idx; -+ -+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); -+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { -+ cqe_proc_sq(qp, wqe_idx, cq); -+ } else { -+ if (get_srq_from_cqe(cqe, ctx, qp, &srq)) -+ return V2_CQ_POLL_ERR; -+ -+ if (srq) -+ cqe_proc_srq(srq, wqe_idx, cq); -+ else -+ cqe_proc_rq(qp, cq); -+ } -+ return 0; -+} -+ -+static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status) -+{ -+ int i; -+ -+ static const struct { -+ unsigned int cqe_status; -+ enum ibv_wc_status wc_status; -+ } map[] = { -+ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, -+ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, -+ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, -+ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR }, -+ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR }, -+ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR }, -+ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR }, -+ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, -+ IBV_WC_RETRY_EXC_ERR }, -+ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, -+ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, -+ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, -+ }; -+ -+ cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR; -+ for (i = 0; i < ARRAY_SIZE(map); i++) { -+ if (status == map[i].cqe_status) { -+ cq->verbs_cq.cq_ex.status = map[i].wc_status; -+ break; -+ } -+ } -+} -+ -+static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) -+{ -+ struct hns_roce_qp *qp = NULL; -+ struct hns_roce_v2_cqe *cqe; -+ uint8_t status; -+ uint32_t qpn; -+ -+ cqe = next_cqe_sw_v2(cq); -+ if (!cqe) -+ return ENOENT; -+ -+ ++cq->cons_index; -+ udma_from_device_barrier(); -+ -+ cq->cqe = cqe; -+ qpn = hr_reg_read(cqe, CQE_LCL_QPN); -+ -+ qp = hns_roce_v2_find_qp(ctx, qpn); -+ if (!qp) -+ return V2_CQ_POLL_ERR; -+ -+ if (cqe_proc_wq(ctx, qp, cq)) -+ return V2_CQ_POLL_ERR; -+ -+ status = hr_reg_read(cqe, CQE_STATUS); -+ -+ /* -+ * once a cqe in error status, the driver needs to help the HW to -+ * generated flushed cqes for all subsequent wqes -+ */ -+ if (status != HNS_ROCE_V2_CQE_SUCCESS) { -+ handle_error_cqe_ex(cq, status); -+ return hns_roce_flush_cqe(qp, status); -+ } -+ -+ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; -+ -+ return V2_CQ_OK; -+} -+ -+static int wc_start_poll_cq(struct ibv_cq_ex *current, -+ struct ibv_poll_cq_attr *attr) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ struct hns_roce_context *ctx = to_hr_ctx(current->context); -+ int err; -+ -+ if (attr->comp_mask) -+ return EINVAL; -+ -+ pthread_spin_lock(&cq->lock); -+ -+ err = wc_poll_cqe(ctx, cq); -+ if (err != V2_CQ_OK) -+ pthread_spin_unlock(&cq->lock); -+ -+ return err; -+} -+ -+static int wc_next_poll_cq(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ struct hns_roce_context *ctx = to_hr_ctx(current->context); -+ int err; -+ -+ err = wc_poll_cqe(ctx, cq); -+ if (err != V2_CQ_OK) -+ return err; -+ -+ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) -+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK; -+ else -+ update_cq_db(ctx, cq); -+ -+ return 0; -+} -+ -+static void wc_end_poll_cq(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ struct hns_roce_context *ctx = to_hr_ctx(current->context); -+ -+ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) -+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK; -+ else -+ update_cq_db(ctx, cq); -+ -+ pthread_spin_unlock(&cq->lock); -+} -+ -+static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE); -+ -+ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ) -+ return wc_send_op_map[opcode]; -+ else -+ return wc_rcv_op_map[opcode]; -+} -+ -+static uint32_t wc_read_vendor_err(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ -+ return hr_reg_read(cq->cqe, CQE_SUB_STATUS); -+} -+ -+static uint32_t wc_read_byte_len(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ -+ return le32toh(cq->cqe->byte_cnt); -+} -+ -+static __be32 wc_read_imm_data(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ -+ if (hr_reg_read(cq->cqe, CQE_OPCODE) == HNS_ROCE_RECV_OP_SEND_WITH_INV) -+ /* This is returning invalidate_rkey which is in host order, see -+ * ibv_wc_read_invalidated_rkey. -+ */ -+ return (__force __be32)le32toh(cq->cqe->rkey); -+ -+ return htobe32(le32toh(cq->cqe->immtdata)); -+} -+ -+static uint32_t wc_read_qp_num(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ -+ return hr_reg_read(cq->cqe, CQE_LCL_QPN); -+} -+ -+static uint32_t wc_read_src_qp(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ -+ return hr_reg_read(cq->cqe, CQE_RMT_QPN); -+} -+ -+static unsigned int get_wc_flags_for_sq(uint8_t opcode) -+{ -+ switch (opcode) { -+ case HNS_ROCE_SQ_OP_SEND_WITH_IMM: -+ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM: -+ return IBV_WC_WITH_IMM; -+ case HNS_ROCE_SQ_OP_LOCAL_INV: -+ return IBV_WC_WITH_INV; -+ default: -+ return 0; -+ } -+} -+ -+static unsigned int get_wc_flags_for_rq(uint8_t opcode) -+{ -+ switch (opcode) { -+ case HNS_ROCE_RECV_OP_RDMA_WRITE_IMM: -+ case HNS_ROCE_RECV_OP_SEND_WITH_IMM: -+ return IBV_WC_WITH_IMM; -+ case HNS_ROCE_RECV_OP_SEND_WITH_INV: -+ return IBV_WC_WITH_INV; -+ default: -+ return 0; -+ } -+} -+ -+static unsigned int wc_read_wc_flags(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE); -+ unsigned int wc_flags; -+ -+ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ) { -+ wc_flags = get_wc_flags_for_sq(opcode); -+ } else { -+ wc_flags = get_wc_flags_for_rq(opcode); -+ wc_flags |= hr_reg_read(cq->cqe, CQE_GRH) ? IBV_WC_GRH : 0; -+ } -+ -+ return wc_flags; -+} -+ -+static uint32_t wc_read_slid(struct ibv_cq_ex *current) -+{ -+ return 0; -+} -+ -+static uint8_t wc_read_sl(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ -+ return (uint8_t)hr_reg_read(cq->cqe, CQE_SL); -+} -+ -+static uint8_t wc_read_dlid_path_bits(struct ibv_cq_ex *current) -+{ -+ return 0; -+} -+ -+static uint16_t wc_read_cvlan(struct ibv_cq_ex *current) -+{ -+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); -+ -+ return hr_reg_read(cq->cqe, CQE_VID_VLD) ? -+ hr_reg_read(cq->cqe, CQE_VID) : 0; -+} -+ -+void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags) -+{ -+ cq_ex->start_poll = wc_start_poll_cq; -+ cq_ex->next_poll = wc_next_poll_cq; -+ cq_ex->end_poll = wc_end_poll_cq; -+ cq_ex->read_opcode = wc_read_opcode; -+ cq_ex->read_vendor_err = wc_read_vendor_err; -+ cq_ex->read_wc_flags = wc_read_wc_flags; -+ -+ if (wc_flags & IBV_WC_EX_WITH_BYTE_LEN) -+ cq_ex->read_byte_len = wc_read_byte_len; -+ if (wc_flags & IBV_WC_EX_WITH_IMM) -+ cq_ex->read_imm_data = wc_read_imm_data; -+ if (wc_flags & IBV_WC_EX_WITH_QP_NUM) -+ cq_ex->read_qp_num = wc_read_qp_num; -+ if (wc_flags & IBV_WC_EX_WITH_SRC_QP) -+ cq_ex->read_src_qp = wc_read_src_qp; -+ if (wc_flags & IBV_WC_EX_WITH_SLID) -+ cq_ex->read_slid = wc_read_slid; -+ if (wc_flags & IBV_WC_EX_WITH_SL) -+ cq_ex->read_sl = wc_read_sl; -+ if (wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) -+ cq_ex->read_dlid_path_bits = wc_read_dlid_path_bits; -+ if (wc_flags & IBV_WC_EX_WITH_CVLAN) -+ cq_ex->read_cvlan = wc_read_cvlan; -+} -+ - const struct hns_roce_u_hw hns_roce_u_hw_v2 = { - .hw_version = HNS_ROCE_HW_VER2, - .hw_ops = { -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index 92e5f1a4..0068f4fe 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -337,5 +337,6 @@ struct hns_roce_ud_sq_wqe { - #define MAX_SERVICE_LEVEL 0x7 - - void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); -+void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags); - - #endif /* _HNS_ROCE_U_HW_V2_H */ -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index a993c39a..9ea8a6d3 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -276,13 +276,21 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw) - return 0; - } - -+enum { -+ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | -+ IBV_WC_EX_WITH_CVLAN, -+}; -+ - static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, - struct hns_roce_context *context) - { - if (!attr->cqe || attr->cqe > context->max_cqe) - return -EINVAL; - -- if (attr->comp_mask || attr->wc_flags) -+ if (attr->comp_mask) -+ return -EOPNOTSUPP; -+ -+ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) - return -EOPNOTSUPP; - - attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, -@@ -409,7 +417,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, - struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context, - struct ibv_cq_init_attr_ex *attr) - { -- return create_cq(context, attr); -+ struct ibv_cq_ex *cq; -+ -+ cq = create_cq(context, attr); -+ if (cq) -+ hns_roce_attach_cq_ex_ops(cq, attr->wc_flags); -+ -+ return cq; - } - - void hns_roce_u_cq_event(struct ibv_cq *cq) --- -2.30.0 - diff --git a/0042-libhns-Optimize-the-error-handling-of-CQE.patch b/0042-libhns-Optimize-the-error-handling-of-CQE.patch deleted file mode 100644 index 4ee48826039792791e27d6426b55e5122a7216f4..0000000000000000000000000000000000000000 --- a/0042-libhns-Optimize-the-error-handling-of-CQE.patch +++ /dev/null @@ -1,160 +0,0 @@ -From 2d48954e9b2617cb48f7d5ba47a10ceda4e556ff Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Mon, 7 Mar 2022 18:49:37 +0800 -Subject: libhns: Optimize the error handling of CQE - -Separate the acquisition of wc->status and wc->vendor_err to make the logic -of error handling clearer. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 81 ++++++++------------------------ - 1 file changed, 19 insertions(+), 62 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 081ab1f3..2804450d 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -146,13 +146,13 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr, - return 0; - } - --static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, -- uint8_t status) -+static enum ibv_wc_status get_wc_status(uint8_t status) - { - static const struct { - unsigned int cqe_status; - enum ibv_wc_status wc_status; - } map[] = { -+ { HNS_ROCE_V2_CQE_SUCCESS, IBV_WC_SUCCESS }, - { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, - { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, - { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, -@@ -169,17 +169,12 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, - }; - -- int i; -- -- wc->status = IBV_WC_GENERAL_ERR; -- for (i = 0; i < ARRAY_SIZE(map); i++) { -- if (status == map[i].cqe_status) { -- wc->status = map[i].wc_status; -- break; -- } -+ for (int i = 0; i < ARRAY_SIZE(map); i++) { -+ if (status == map[i].cqe_status) -+ return map[i].wc_status; - } - -- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); -+ return IBV_WC_GENERAL_ERR; - } - - static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) -@@ -581,7 +576,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - return V2_CQ_POLL_ERR; - } - -- status = hr_reg_read(cqe, CQE_STATUS); - opcode = hr_reg_read(cqe, CQE_OPCODE); - is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ; - if (is_send) { -@@ -603,18 +597,18 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - - wc->qp_num = qpn; - -+ status = hr_reg_read(cqe, CQE_STATUS); -+ wc->status = get_wc_status(status); -+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); -+ -+ if (status == HNS_ROCE_V2_CQE_SUCCESS) -+ return V2_CQ_OK; -+ - /* - * once a cqe in error status, the driver needs to help the HW to - * generated flushed cqes for all subsequent wqes - */ -- if (status != HNS_ROCE_V2_CQE_SUCCESS) { -- handle_error_cqe(cqe, wc, status); -- return hns_roce_flush_cqe(*cur_qp, status); -- } -- -- wc->status = IBV_WC_SUCCESS; -- -- return V2_CQ_OK; -+ return hns_roce_flush_cqe(*cur_qp, status); - } - - static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, -@@ -1706,40 +1700,6 @@ static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, - return 0; - } - --static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status) --{ -- int i; -- -- static const struct { -- unsigned int cqe_status; -- enum ibv_wc_status wc_status; -- } map[] = { -- { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, -- { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, -- { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, -- { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR }, -- { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR }, -- { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR }, -- { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR }, -- { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR }, -- { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR }, -- { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR }, -- { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, -- IBV_WC_RETRY_EXC_ERR }, -- { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, -- { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, -- { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, -- }; -- -- cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR; -- for (i = 0; i < ARRAY_SIZE(map); i++) { -- if (status == map[i].cqe_status) { -- cq->verbs_cq.cq_ex.status = map[i].wc_status; -- break; -- } -- } --} -- - static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) - { - struct hns_roce_qp *qp = NULL; -@@ -1765,19 +1725,16 @@ static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) - return V2_CQ_POLL_ERR; - - status = hr_reg_read(cqe, CQE_STATUS); -+ cq->verbs_cq.cq_ex.status = get_wc_status(status); -+ -+ if (status == HNS_ROCE_V2_CQE_SUCCESS) -+ return V2_CQ_OK; - - /* - * once a cqe in error status, the driver needs to help the HW to - * generated flushed cqes for all subsequent wqes - */ -- if (status != HNS_ROCE_V2_CQE_SUCCESS) { -- handle_error_cqe_ex(cq, status); -- return hns_roce_flush_cqe(qp, status); -- } -- -- cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; -- -- return V2_CQ_OK; -+ return hns_roce_flush_cqe(qp, status); - } - - static int wc_start_poll_cq(struct ibv_cq_ex *current, --- -2.30.0 - diff --git a/0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch b/0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch deleted file mode 100644 index 2d90158a343743f3f5ae85bceb61d935f37e62a5..0000000000000000000000000000000000000000 --- a/0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch +++ /dev/null @@ -1,323 +0,0 @@ -From 9dd7b55957ccc720a6844613af9d43680d8fbaad Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Mon, 7 Mar 2022 18:49:38 +0800 -Subject: libhns: Refactor hns roce v2 poll one() and wc poll cqe() - -hns_roce_v2_poll_one() and wc_poll_cqe() have a lot of repetitive code. -Aggregating the repetitive parts of these two functions into one function -hns_roce_poll_one() can reduce the repetitive code. - -Signed-off-by: Xinhao Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 228 +++++++++++++++---------------- - 1 file changed, 107 insertions(+), 121 deletions(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 2804450d..42a77151 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -285,6 +285,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, - - static void hns_roce_update_sq_db(struct hns_roce_context *ctx, - struct hns_roce_qp *qp) -+ - { - struct hns_roce_db sq_db = {}; - -@@ -548,21 +549,101 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, - wc->opcode = wc_send_op_map[opcode]; - } - --static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, -- struct hns_roce_qp **cur_qp, struct ibv_wc *wc) -+static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, -+ struct hns_roce_cq *cq) - { -- struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); -+ struct hns_roce_wq *wq = &hr_qp->sq; -+ -+ if (hr_qp->sq_signal_bits) -+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); -+ -+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -+ ++wq->tail; -+} -+ -+static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx, -+ struct hns_roce_cq *cq) -+{ -+ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; -+ hns_roce_free_srq_wqe(srq, wqe_idx); -+} -+ -+static void cqe_proc_rq(struct hns_roce_wq *wq, struct hns_roce_cq *cq) -+{ -+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -+ ++wq->tail; -+} -+ -+static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, -+ struct hns_roce_cq *cq) -+{ -+ struct hns_roce_v2_cqe *cqe = cq->cqe; -+ struct hns_roce_srq *srq = NULL; -+ uint32_t wqe_idx; -+ -+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); -+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { -+ cqe_proc_sq(qp, wqe_idx, cq); -+ } else { -+ if (get_srq_from_cqe(cqe, ctx, qp, &srq)) -+ return V2_CQ_POLL_ERR; -+ -+ if (srq) -+ cqe_proc_srq(srq, wqe_idx, cq); -+ else -+ cqe_proc_rq(&qp->rq, cq); -+ } -+ -+ return 0; -+} -+ -+static int parse_cqe_for_cq(struct hns_roce_context *ctx, struct hns_roce_cq *cq, -+ struct hns_roce_qp *cur_qp, struct ibv_wc *wc) -+{ -+ struct hns_roce_v2_cqe *cqe = cq->cqe; - struct hns_roce_srq *srq = NULL; -- struct hns_roce_v2_cqe *cqe; - uint8_t opcode; -- uint8_t status; -+ -+ if (!wc) { -+ if (cqe_proc_wq(ctx, cur_qp, cq)) -+ return V2_CQ_POLL_ERR; -+ -+ return 0; -+ } -+ -+ opcode = hr_reg_read(cqe, CQE_OPCODE); -+ -+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { -+ parse_cqe_for_req(cqe, wc, cur_qp, opcode); -+ } else { -+ wc->byte_len = le32toh(cqe->byte_cnt); -+ get_opcode_for_resp(cqe, wc, opcode); -+ -+ if (get_srq_from_cqe(cqe, ctx, cur_qp, &srq)) -+ return V2_CQ_POLL_ERR; -+ -+ if (srq) -+ parse_cqe_for_srq(cqe, wc, srq); -+ else -+ parse_cqe_for_resp(cqe, wc, cur_qp, opcode); -+ } -+ -+ return 0; -+} -+ -+static int hns_roce_poll_one(struct hns_roce_context *ctx, -+ struct hns_roce_qp **cur_qp, struct hns_roce_cq *cq, -+ struct ibv_wc *wc) -+{ -+ struct hns_roce_v2_cqe *cqe; -+ uint8_t status, wc_status; - uint32_t qpn; -- bool is_send; - - cqe = next_cqe_sw_v2(cq); - if (!cqe) -- return V2_CQ_EMPTY; -+ return wc ? V2_CQ_EMPTY : ENOENT; - -+ cq->cqe = cqe; - ++cq->cons_index; - - udma_from_device_barrier(); -@@ -576,31 +657,20 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - return V2_CQ_POLL_ERR; - } - -- opcode = hr_reg_read(cqe, CQE_OPCODE); -- is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ; -- if (is_send) { -- parse_cqe_for_req(cqe, wc, *cur_qp, opcode); -- } else { -- wc->byte_len = le32toh(cqe->byte_cnt); -- get_opcode_for_resp(cqe, wc, opcode); -+ if (parse_cqe_for_cq(ctx, cq, *cur_qp, wc)) -+ return V2_CQ_POLL_ERR; - -- if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq)) -- return V2_CQ_POLL_ERR; -+ status = hr_reg_read(cqe, CQE_STATUS); -+ wc_status = get_wc_status(status); - -- if (srq) { -- parse_cqe_for_srq(cqe, wc, srq); -- } else { -- if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode)) -- return V2_CQ_POLL_ERR; -- } -+ if (wc) { -+ wc->status = wc_status; -+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); -+ wc->qp_num = qpn; -+ } else { -+ cq->verbs_cq.cq_ex.status = wc_status; - } - -- wc->qp_num = qpn; -- -- status = hr_reg_read(cqe, CQE_STATUS); -- wc->status = get_wc_status(status); -- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); -- - if (status == HNS_ROCE_V2_CQE_SUCCESS) - return V2_CQ_OK; - -@@ -614,16 +684,16 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, - static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, - struct ibv_wc *wc) - { -- int npolled; -- int err = V2_CQ_OK; -- struct hns_roce_qp *qp = NULL; -- struct hns_roce_cq *cq = to_hr_cq(ibvcq); - struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); -+ struct hns_roce_cq *cq = to_hr_cq(ibvcq); -+ struct hns_roce_qp *qp = NULL; -+ int err = V2_CQ_OK; -+ int npolled; - - pthread_spin_lock(&cq->lock); - - for (npolled = 0; npolled < ne; ++npolled) { -- err = hns_roce_v2_poll_one(cq, &qp, wc + npolled); -+ err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); - if (err != V2_CQ_OK) - break; - } -@@ -1651,97 +1721,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, - return ret; - } - --static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, -- struct hns_roce_cq *cq) --{ -- struct hns_roce_wq *wq = &hr_qp->sq; -- -- if (hr_qp->sq_signal_bits) -- wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); -- -- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -- ++wq->tail; --} -- --static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx, -- struct hns_roce_cq *cq) --{ -- cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; -- hns_roce_free_srq_wqe(srq, wqe_idx); --} -- --static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq) --{ -- struct hns_roce_wq *wq = &hr_qp->rq; -- -- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -- ++wq->tail; --} -- --static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, -- struct hns_roce_cq *cq) --{ -- struct hns_roce_v2_cqe *cqe = cq->cqe; -- struct hns_roce_srq *srq = NULL; -- uint32_t wqe_idx; -- -- wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); -- if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { -- cqe_proc_sq(qp, wqe_idx, cq); -- } else { -- if (get_srq_from_cqe(cqe, ctx, qp, &srq)) -- return V2_CQ_POLL_ERR; -- -- if (srq) -- cqe_proc_srq(srq, wqe_idx, cq); -- else -- cqe_proc_rq(qp, cq); -- } -- return 0; --} -- --static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) --{ -- struct hns_roce_qp *qp = NULL; -- struct hns_roce_v2_cqe *cqe; -- uint8_t status; -- uint32_t qpn; -- -- cqe = next_cqe_sw_v2(cq); -- if (!cqe) -- return ENOENT; -- -- ++cq->cons_index; -- udma_from_device_barrier(); -- -- cq->cqe = cqe; -- qpn = hr_reg_read(cqe, CQE_LCL_QPN); -- -- qp = hns_roce_v2_find_qp(ctx, qpn); -- if (!qp) -- return V2_CQ_POLL_ERR; -- -- if (cqe_proc_wq(ctx, qp, cq)) -- return V2_CQ_POLL_ERR; -- -- status = hr_reg_read(cqe, CQE_STATUS); -- cq->verbs_cq.cq_ex.status = get_wc_status(status); -- -- if (status == HNS_ROCE_V2_CQE_SUCCESS) -- return V2_CQ_OK; -- -- /* -- * once a cqe in error status, the driver needs to help the HW to -- * generated flushed cqes for all subsequent wqes -- */ -- return hns_roce_flush_cqe(qp, status); --} -- - static int wc_start_poll_cq(struct ibv_cq_ex *current, - struct ibv_poll_cq_attr *attr) - { - struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); - struct hns_roce_context *ctx = to_hr_ctx(current->context); -+ struct hns_roce_qp *qp = NULL; - int err; - - if (attr->comp_mask) -@@ -1749,7 +1734,7 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, - - pthread_spin_lock(&cq->lock); - -- err = wc_poll_cqe(ctx, cq); -+ err = hns_roce_poll_one(ctx, &qp, cq, NULL); - if (err != V2_CQ_OK) - pthread_spin_unlock(&cq->lock); - -@@ -1760,9 +1745,10 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current) - { - struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); - struct hns_roce_context *ctx = to_hr_ctx(current->context); -+ struct hns_roce_qp *qp = NULL; - int err; - -- err = wc_poll_cqe(ctx, cq); -+ err = hns_roce_poll_one(ctx, &qp, cq, NULL); - if (err != V2_CQ_OK) - return err; - --- -2.30.0 - diff --git a/0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch b/0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch deleted file mode 100644 index c6a7721a5cbc38086f2ccb6bf7dfcf582777211e..0000000000000000000000000000000000000000 --- a/0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch +++ /dev/null @@ -1,1007 +0,0 @@ -From 36446a56eea5db54e229207bf39c796df16f519a Mon Sep 17 00:00:00 2001 -From: Xinhao Liu -Date: Mon, 21 Mar 2022 09:32:04 +0800 -Subject: libhns: Extended QP supports the new post send mechanism - -The ofed provides a new set of post send APIs for extended QP. With the new -APIs, users can post send WR more efficiently. The hns driver provides -support for the new APIs. - -Signed-off-by: Xinhao Liu -Signed-off-by: Yixing Liu -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u.h | 6 + - providers/hns/hns_roce_u_hw_v2.c | 814 +++++++++++++++++++++++++++++-- - providers/hns/hns_roce_u_hw_v2.h | 7 + - providers/hns/hns_roce_u_verbs.c | 11 +- - 4 files changed, 792 insertions(+), 46 deletions(-) - -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 171fe06e..96059172 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -338,6 +338,12 @@ struct hns_roce_qp { - unsigned long flags; - int refcnt; /* specially used for XRC */ - void *dwqe_page; -+ -+ /* specific fields for the new post send APIs */ -+ int err; -+ void *cur_wqe; -+ unsigned int rb_sq_head; /* roll back sq head */ -+ struct hns_roce_sge_info sge_info; - }; - - struct hns_roce_av { -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index fab1939b..0169250d 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -78,7 +78,7 @@ static inline unsigned int mtu_enum_to_int(enum ibv_mtu mtu) - static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n); - - static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, -- struct ibv_sge *sg) -+ const struct ibv_sge *sg) - { - dseg->lkey = htole32(sg->lkey); - dseg->addr = htole64(sg->addr); -@@ -824,9 +824,28 @@ static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg, - sge_info->total_len = len; - } - -+static void get_src_buf_info(void **src_addr, uint32_t *src_len, -+ const void *buf_list, int buf_idx, -+ enum hns_roce_wr_buf_type type) -+{ -+ if (type == WR_BUF_TYPE_POST_SEND) { -+ const struct ibv_sge *sg_list = buf_list; -+ -+ *src_addr = (void *)(uintptr_t)sg_list[buf_idx].addr; -+ *src_len = sg_list[buf_idx].length; -+ } else { -+ const struct ibv_data_buf *bf_list = buf_list; -+ -+ *src_addr = bf_list[buf_idx].addr; -+ *src_len = bf_list[buf_idx].length; -+ } -+} -+ - static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, -- const struct ibv_send_wr *wr, -- struct hns_roce_sge_info *sge_info) -+ struct hns_roce_sge_info *sge_info, -+ const void *buf_list, -+ uint32_t num_buf, -+ enum hns_roce_wr_buf_type buf_type) - { - unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg); - unsigned int sge_mask = qp->ex_sge.sge_cnt - 1; -@@ -834,18 +853,15 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, - uint32_t src_len, tail_len; - int i; - -- - if (sge_info->total_len > qp->sq.max_gs * sge_sz) - return EINVAL; - - dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask); - tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask); - -- for (i = 0; i < wr->num_sge; i++) { -+ for (i = 0; i < num_buf; i++) { - tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr; -- -- src_addr = (void *)(uintptr_t)wr->sg_list[i].addr; -- src_len = wr->sg_list[i].length; -+ get_src_buf_info(&src_addr, &src_len, buf_list, i, buf_type); - - if (src_len < tail_len) { - memcpy(dst_addr, src_addr, src_len); -@@ -870,20 +886,11 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, - return 0; - } - --static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, -- struct hns_roce_ud_sq_wqe *ud_sq_wqe) -+static void set_ud_inl_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, -+ uint8_t *data) - { -- uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {0}; - uint32_t *loc = (uint32_t *)data; - uint32_t tmp_data; -- void *tmp = data; -- int i; -- -- for (i = 0; i < wr->num_sge; i++) { -- memcpy(tmp, (void *)(uintptr_t)wr->sg_list[i].addr, -- wr->sg_list[i].length); -- tmp += wr->sg_list[i].length; -- } - - hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_15_0, *loc & 0xffff); - hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_23_16, (*loc >> 16) & 0xff); -@@ -896,6 +903,22 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, - hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_63_48, *loc >> 16); - } - -+static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, -+ struct hns_roce_ud_sq_wqe *ud_sq_wqe) -+{ -+ uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {}; -+ void *tmp = data; -+ int i; -+ -+ for (i = 0; i < wr->num_sge; i++) { -+ memcpy(tmp, (void *)(uintptr_t)wr->sg_list[i].addr, -+ wr->sg_list[i].length); -+ tmp += wr->sg_list[i].length; -+ } -+ -+ set_ud_inl_seg(ud_sq_wqe, data); -+} -+ - static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) - { - int mtu = mtu_enum_to_int(qp->path_mtu); -@@ -919,7 +942,9 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - } else { - hr_reg_enable(ud_sq_wqe, UDWQE_INLINE_TYPE); - -- ret = fill_ext_sge_inl_data(qp, wr, sge_info); -+ ret = fill_ext_sge_inl_data(qp, sge_info, -+ wr->sg_list, wr->num_sge, -+ WR_BUF_TYPE_POST_SEND); - if (ret) - return ret; - -@@ -995,6 +1020,23 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, - return ret; - } - -+static inline void enable_wqe(struct hns_roce_qp *qp, void *sq_wqe, -+ unsigned int index) -+{ -+ struct hns_roce_rc_sq_wqe *wqe = sq_wqe; -+ -+ /* -+ * The pipeline can sequentially post all valid WQEs in wq buf, -+ * including those new WQEs waiting for doorbell to update the PI again. -+ * Therefore, the valid bit of WQE MUST be updated after all of fields -+ * and extSGEs have been written into DDR instead of cache. -+ */ -+ if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) -+ udma_to_device_barrier(); -+ -+ hr_reg_write_bool(wqe, RCWQE_OWNER, !(index & BIT(qp->sq.shift))); -+} -+ - static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - unsigned int nreq, struct hns_roce_sge_info *sge_info) - { -@@ -1026,17 +1068,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - if (ret) - return ret; - -- /* -- * The pipeline can sequentially post all valid WQEs in wq buf, -- * including those new WQEs waiting for doorbell to update the PI again. -- * Therefore, the valid bit of WQE MUST be updated after all of fields -- * and extSGEs have been written into DDR instead of cache. -- */ -- if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) -- udma_to_device_barrier(); -- -- hr_reg_write_bool(wqe, RCWQE_OWNER, -- !((qp->sq.head + nreq) & BIT(qp->sq.shift))); -+ enable_wqe(qp, ud_sq_wqe, qp->sq.head + nreq); - - return ret; - } -@@ -1068,7 +1100,9 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, - } else { - hr_reg_enable(rc_sq_wqe, RCWQE_INLINE_TYPE); - -- ret = fill_ext_sge_inl_data(qp, wr, sge_info); -+ ret = fill_ext_sge_inl_data(qp, sge_info, -+ wr->sg_list, wr->num_sge, -+ WR_BUF_TYPE_POST_SEND); - if (ret) - return ret; - -@@ -1189,17 +1223,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, - return ret; - - wqe_valid: -- /* -- * The pipeline can sequentially post all valid WQEs into WQ buffer, -- * including new WQEs waiting for the doorbell to update the PI again. -- * Therefore, the owner bit of WQE MUST be updated after all fields -- * and extSGEs have been written into DDR instead of cache. -- */ -- if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) -- udma_to_device_barrier(); -- -- hr_reg_write_bool(wqe, RCWQE_OWNER, -- !((qp->sq.head + nreq) & BIT(qp->sq.shift))); -+ enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq); - - return 0; - } -@@ -1921,6 +1945,710 @@ void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags) - cq_ex->read_cvlan = wc_read_cvlan; - } - -+static struct hns_roce_rc_sq_wqe * -+init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode) -+{ -+ unsigned int send_flags = qp->verbs_qp.qp_ex.wr_flags; -+ struct hns_roce_rc_sq_wqe *wqe; -+ unsigned int wqe_idx; -+ -+ if (hns_roce_v2_wq_overflow(&qp->sq, 0, -+ to_hr_cq(qp->verbs_qp.qp.send_cq))) { -+ qp->cur_wqe = NULL; -+ qp->err = ENOMEM; -+ return NULL; -+ } -+ -+ wqe_idx = qp->sq.head & (qp->sq.wqe_cnt - 1); -+ wqe = get_send_wqe(qp, wqe_idx); -+ -+ hr_reg_write(wqe, RCWQE_OPCODE, opcode); -+ hr_reg_write_bool(wqe, RCWQE_CQE, send_flags & IBV_SEND_SIGNALED); -+ hr_reg_write_bool(wqe, RCWQE_FENCE, send_flags & IBV_SEND_FENCE); -+ hr_reg_write_bool(wqe, RCWQE_SE, send_flags & IBV_SEND_SOLICITED); -+ hr_reg_clear(wqe, RCWQE_INLINE); -+ hr_reg_clear(wqe, RCWQE_SO); -+ -+ qp->sq.wrid[wqe_idx] = wr_id; -+ qp->cur_wqe = wqe; -+ qp->sq.head++; -+ -+ return wqe; -+} -+ -+static void wr_set_sge_rc(struct ibv_qp_ex *ibv_qp, uint32_t lkey, -+ uint64_t addr, uint32_t length) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; -+ -+ if (!wqe) -+ return; -+ -+ hr_reg_write(wqe, RCWQE_LKEY0, lkey); -+ hr_reg_write(wqe, RCWQE_VA0_L, addr); -+ hr_reg_write(wqe, RCWQE_VA0_H, addr >> 32); -+ -+ wqe->msg_len = htole32(length); -+ hr_reg_write(wqe, RCWQE_LEN0, length); -+ hr_reg_write(wqe, RCWQE_SGE_NUM, !!length); -+ /* ignore ex sge start index */ -+ -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg, -+ struct hns_roce_qp *qp, const struct ibv_sge *sge, -+ size_t num_sge) -+{ -+ unsigned int index = qp->sge_info.start_idx; -+ unsigned int mask = qp->ex_sge.sge_cnt - 1; -+ unsigned int msg_len = 0; -+ unsigned int cnt = 0; -+ int i; -+ -+ for (i = 0; i < num_sge; i++) { -+ if (!sge[i].length) -+ continue; -+ -+ msg_len += sge[i].length; -+ cnt++; -+ -+ if (cnt <= HNS_ROCE_SGE_IN_WQE) { -+ set_data_seg_v2(dseg, &sge[i]); -+ dseg++; -+ } else { -+ dseg = get_send_sge_ex(qp, index & mask); -+ set_data_seg_v2(dseg, &sge[i]); -+ index++; -+ } -+ } -+ -+ qp->sge_info.start_idx = index; -+ qp->sge_info.valid_num = cnt; -+ qp->sge_info.total_len = msg_len; -+} -+ -+static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge, -+ const struct ibv_sge *sg_list) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; -+ struct hns_roce_v2_wqe_data_seg *dseg; -+ -+ if (!wqe) -+ return; -+ -+ if (num_sge > qp->sq.max_gs) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX, -+ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1)); -+ -+ dseg = (void *)(wqe + 1); -+ set_sgl_rc(dseg, qp, sg_list, num_sge); -+ -+ wqe->msg_len = htole32(qp->sge_info.total_len); -+ hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); -+ -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void wr_send_rc(struct ibv_qp_ex *ibv_qp) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ -+ init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND); -+} -+ -+static void wr_send_imm_rc(struct ibv_qp_ex *ibv_qp, __be32 imm_data) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe; -+ -+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_IMM); -+ if (!wqe) -+ return; -+ -+ wqe->immtdata = htole32(be32toh(imm_data)); -+} -+ -+static void wr_send_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe; -+ -+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_INV); -+ if (!wqe) -+ return; -+ -+ wqe->inv_key = htole32(invalidate_rkey); -+} -+ -+static void wr_local_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe; -+ -+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_LOCAL_INV); -+ if (!wqe) -+ return; -+ -+ hr_reg_enable(wqe, RCWQE_SO); -+ wqe->inv_key = htole32(invalidate_rkey); -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void wr_set_xrc_srqn(struct ibv_qp_ex *ibv_qp, uint32_t remote_srqn) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; -+ -+ if (!wqe) -+ return; -+ -+ hr_reg_write(wqe, RCWQE_XRC_SRQN, remote_srqn); -+} -+ -+static void wr_rdma_read(struct ibv_qp_ex *ibv_qp, uint32_t rkey, -+ uint64_t remote_addr) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe; -+ -+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_RDMA_READ); -+ if (!wqe) -+ return; -+ -+ wqe->va = htole64(remote_addr); -+ wqe->rkey = htole32(rkey); -+} -+ -+static void wr_rdma_write(struct ibv_qp_ex *ibv_qp, uint32_t rkey, -+ uint64_t remote_addr) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe; -+ -+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_RDMA_WRITE); -+ if (!wqe) -+ return; -+ -+ wqe->va = htole64(remote_addr); -+ wqe->rkey = htole32(rkey); -+} -+ -+static void wr_rdma_write_imm(struct ibv_qp_ex *ibv_qp, uint32_t rkey, -+ uint64_t remote_addr, __be32 imm_data) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe; -+ -+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, -+ HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM); -+ if (!wqe) -+ return; -+ -+ wqe->va = htole64(remote_addr); -+ wqe->rkey = htole32(rkey); -+ wqe->immtdata = htole32(be32toh(imm_data)); -+} -+ -+static void set_wr_atomic(struct ibv_qp_ex *ibv_qp, uint32_t rkey, -+ uint64_t remote_addr, uint64_t compare_add, -+ uint64_t swap, uint32_t opcode) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_v2_wqe_data_seg *dseg; -+ struct hns_roce_wqe_atomic_seg *aseg; -+ struct hns_roce_rc_sq_wqe *wqe; -+ -+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, opcode); -+ if (!wqe) -+ return; -+ -+ wqe->va = htole64(remote_addr); -+ wqe->rkey = htole32(rkey); -+ -+ dseg = (void *)(wqe + 1); -+ aseg = (void *)(dseg + 1); -+ -+ if (opcode == HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP) { -+ aseg->fetchadd_swap_data = htole64(swap); -+ aseg->cmp_data = htole64(compare_add); -+ } else { -+ aseg->fetchadd_swap_data = htole64(compare_add); -+ aseg->cmp_data = 0; -+ } -+} -+ -+static void wr_atomic_cmp_swp(struct ibv_qp_ex *ibv_qp, uint32_t rkey, -+ uint64_t remote_addr, uint64_t compare, -+ uint64_t swap) -+{ -+ set_wr_atomic(ibv_qp, rkey, remote_addr, compare, swap, -+ HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP); -+} -+ -+static void wr_atomic_fetch_add(struct ibv_qp_ex *ibv_qp, uint32_t rkey, -+ uint64_t remote_addr, uint64_t add) -+{ -+ set_wr_atomic(ibv_qp, rkey, remote_addr, add, 0, -+ HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD); -+} -+ -+static void set_inline_data_list_rc(struct hns_roce_qp *qp, -+ struct hns_roce_rc_sq_wqe *wqe, -+ size_t num_buf, -+ const struct ibv_data_buf *buf_list) -+{ -+ unsigned int msg_len = qp->sge_info.total_len; -+ void *dseg; -+ int ret; -+ int i; -+ -+ hr_reg_enable(wqe, RCWQE_INLINE); -+ -+ wqe->msg_len = htole32(msg_len); -+ if (msg_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) { -+ hr_reg_clear(wqe, RCWQE_INLINE_TYPE); -+ /* ignore ex sge start index */ -+ -+ dseg = wqe + 1; -+ for (i = 0; i < num_buf; i++) { -+ memcpy(dseg, buf_list[i].addr, buf_list[i].length); -+ dseg += buf_list[i].length; -+ } -+ /* ignore sge num */ -+ } else { -+ if (!check_inl_data_len(qp, msg_len)) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ hr_reg_enable(wqe, RCWQE_INLINE_TYPE); -+ hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX, -+ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1)); -+ -+ ret = fill_ext_sge_inl_data(qp, &qp->sge_info, -+ buf_list, num_buf, -+ WR_BUF_TYPE_SEND_WR_OPS); -+ if (ret) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); -+ } -+} -+ -+static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr, -+ size_t length) -+{ -+ struct ibv_data_buf buff = { .addr = addr, .length = length }; -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; -+ -+ if (!wqe) -+ return; -+ -+ buff.addr = addr; -+ buff.length = length; -+ -+ qp->sge_info.total_len = length; -+ set_inline_data_list_rc(qp, wqe, 1, &buff); -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, -+ const struct ibv_data_buf *buf_list) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; -+ int i; -+ -+ if (!wqe) -+ return; -+ -+ qp->sge_info.total_len = 0; -+ for (i = 0; i < num_buf; i++) -+ qp->sge_info.total_len += buf_list[i].length; -+ -+ set_inline_data_list_rc(qp, wqe, num_buf, buf_list); -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static struct hns_roce_ud_sq_wqe * -+init_ud_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode) -+{ -+ unsigned int send_flags = qp->verbs_qp.qp_ex.wr_flags; -+ struct hns_roce_ud_sq_wqe *wqe; -+ unsigned int wqe_idx; -+ -+ if (hns_roce_v2_wq_overflow(&qp->sq, 0, -+ to_hr_cq(qp->verbs_qp.qp.send_cq))) { -+ qp->cur_wqe = NULL; -+ qp->err = ENOMEM; -+ return NULL; -+ } -+ -+ wqe_idx = qp->sq.head & (qp->sq.wqe_cnt - 1); -+ wqe = get_send_wqe(qp, wqe_idx); -+ -+ hr_reg_write(wqe, UDWQE_OPCODE, opcode); -+ hr_reg_write_bool(wqe, UDWQE_CQE, send_flags & IBV_SEND_SIGNALED); -+ hr_reg_write_bool(wqe, UDWQE_SE, send_flags & IBV_SEND_SOLICITED); -+ hr_reg_clear(wqe, UDWQE_INLINE); -+ -+ qp->sq.wrid[wqe_idx] = wr_id; -+ qp->cur_wqe = wqe; -+ qp->sq.head++; -+ -+ return wqe; -+} -+ -+static void wr_send_ud(struct ibv_qp_ex *ibv_qp) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ -+ init_ud_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND); -+} -+ -+static void wr_send_imm_ud(struct ibv_qp_ex *ibv_qp, __be32 imm_data) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_ud_sq_wqe *wqe; -+ -+ wqe = init_ud_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_IMM); -+ if (!wqe) -+ return; -+ -+ wqe->immtdata = htole32(be32toh(imm_data)); -+} -+ -+static void wr_set_ud_addr(struct ibv_qp_ex *ibv_qp, struct ibv_ah *ah, -+ uint32_t remote_qpn, uint32_t remote_qkey) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; -+ struct hns_roce_ah *hr_ah = to_hr_ah(ah); -+ int ret; -+ -+ if (!wqe) -+ return; -+ -+ wqe->qkey = htole32(remote_qkey & 0x80000000 ? qp->qkey : remote_qkey); -+ -+ hr_reg_write(wqe, UDWQE_DQPN, remote_qpn); -+ -+ ret = fill_ud_av(wqe, hr_ah); -+ if (ret) -+ qp->err = ret; -+ -+ qp->sl = hr_ah->av.sl; -+} -+ -+static void wr_set_sge_ud(struct ibv_qp_ex *ibv_qp, uint32_t lkey, -+ uint64_t addr, uint32_t length) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; -+ struct hns_roce_v2_wqe_data_seg *dseg; -+ int sge_idx; -+ -+ if (!wqe) -+ return; -+ -+ wqe->msg_len = htole32(length); -+ hr_reg_write(wqe, UDWQE_SGE_NUM, 1); -+ sge_idx = qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1); -+ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx); -+ -+ dseg = get_send_sge_ex(qp, sge_idx); -+ -+ dseg->lkey = htole32(lkey); -+ dseg->addr = htole64(addr); -+ dseg->len = htole32(length); -+ -+ qp->sge_info.start_idx++; -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, -+ const struct ibv_sge *sg_list) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ unsigned int sge_idx = qp->sge_info.start_idx; -+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; -+ unsigned int mask = qp->ex_sge.sge_cnt - 1; -+ struct hns_roce_v2_wqe_data_seg *dseg; -+ unsigned int msg_len = 0; -+ unsigned int cnt = 0; -+ -+ if (!wqe) -+ return; -+ -+ if (num_sge > qp->sq.max_gs) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask); -+ for (int i = 0; i < num_sge; i++) { -+ if (!sg_list[i].length) -+ continue; -+ -+ dseg = get_send_sge_ex(qp, sge_idx & mask); -+ set_data_seg_v2(dseg, &sg_list[i]); -+ -+ msg_len += sg_list[i].length; -+ cnt++; -+ sge_idx++; -+ } -+ -+ wqe->msg_len = htole32(msg_len); -+ hr_reg_write(wqe, UDWQE_SGE_NUM, cnt); -+ -+ qp->sge_info.start_idx += cnt; -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void set_inline_data_list_ud(struct hns_roce_qp *qp, -+ struct hns_roce_ud_sq_wqe *wqe, -+ size_t num_buf, -+ const struct ibv_data_buf *buf_list) -+{ -+ uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {}; -+ unsigned int msg_len = qp->sge_info.total_len; -+ void *tmp; -+ int ret; -+ int i; -+ -+ if (!check_inl_data_len(qp, msg_len)) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ hr_reg_enable(wqe, UDWQE_INLINE); -+ -+ wqe->msg_len = htole32(msg_len); -+ if (msg_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) { -+ hr_reg_clear(wqe, UDWQE_INLINE_TYPE); -+ /* ignore ex sge start index */ -+ -+ tmp = data; -+ for (i = 0; i < num_buf; i++) { -+ memcpy(tmp, buf_list[i].addr, buf_list[i].length); -+ tmp += buf_list[i].length; -+ } -+ -+ set_ud_inl_seg(wqe, data); -+ /* ignore sge num */ -+ } else { -+ hr_reg_enable(wqe, UDWQE_INLINE_TYPE); -+ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, -+ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1)); -+ -+ ret = fill_ext_sge_inl_data(qp, &qp->sge_info, -+ buf_list, num_buf, -+ WR_BUF_TYPE_SEND_WR_OPS); -+ if (ret) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ hr_reg_write(wqe, UDWQE_SGE_NUM, qp->sge_info.valid_num); -+ } -+} -+ -+static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr, -+ size_t length) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; -+ struct ibv_data_buf buff; -+ -+ if (!wqe) -+ return; -+ -+ buff.addr = addr; -+ buff.length = length; -+ -+ qp->sge_info.total_len = length; -+ set_inline_data_list_ud(qp, wqe, 1, &buff); -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, -+ const struct ibv_data_buf *buf_list) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; -+ int i; -+ -+ if (!wqe) -+ return; -+ -+ qp->sge_info.total_len = 0; -+ for (i = 0; i < num_buf; i++) -+ qp->sge_info.total_len += buf_list[i].length; -+ -+ set_inline_data_list_ud(qp, wqe, num_buf, buf_list); -+ enable_wqe(qp, wqe, qp->sq.head); -+} -+ -+static void wr_start(struct ibv_qp_ex *ibv_qp) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ enum ibv_qp_state state = ibv_qp->qp_base.state; -+ -+ if (state == IBV_QPS_RESET || -+ state == IBV_QPS_INIT || -+ state == IBV_QPS_RTR) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ pthread_spin_lock(&qp->sq.lock); -+ qp->sge_info.start_idx = qp->next_sge; -+ qp->rb_sq_head = qp->sq.head; -+ qp->err = 0; -+} -+ -+static int wr_complete(struct ibv_qp_ex *ibv_qp) -+{ -+ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context); -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ unsigned int nreq = qp->sq.head - qp->rb_sq_head; -+ struct ibv_qp_attr attr; -+ int err = qp->err; -+ -+ if (err) { -+ qp->sq.head = qp->rb_sq_head; -+ goto out; -+ } -+ -+ if (nreq) { -+ qp->next_sge = qp->sge_info.start_idx; -+ udma_to_device_barrier(); -+ -+ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) -+ hns_roce_write_dwqe(qp, qp->cur_wqe); -+ else -+ hns_roce_update_sq_db(ctx, qp); -+ -+ if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) -+ *(qp->sdb) = qp->sq.head & 0xffff; -+ } -+ -+out: -+ pthread_spin_unlock(&qp->sq.lock); -+ if (ibv_qp->qp_base.state == IBV_QPS_ERR) { -+ attr.qp_state = IBV_QPS_ERR; -+ hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE); -+ } -+ -+ return err; -+} -+ -+static void wr_abort(struct ibv_qp_ex *ibv_qp) -+{ -+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); -+ -+ qp->sq.head = qp->rb_sq_head; -+ -+ pthread_spin_unlock(&qp->sq.lock); -+} -+ -+enum { -+ HNS_SUPPORTED_SEND_OPS_FLAGS_RC_XRC = -+ IBV_QP_EX_WITH_SEND | -+ IBV_QP_EX_WITH_SEND_WITH_INV | -+ IBV_QP_EX_WITH_SEND_WITH_IMM | -+ IBV_QP_EX_WITH_RDMA_WRITE | -+ IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | -+ IBV_QP_EX_WITH_RDMA_READ | -+ IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | -+ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | -+ IBV_QP_EX_WITH_LOCAL_INV, -+ HNS_SUPPORTED_SEND_OPS_FLAGS_UD = -+ IBV_QP_EX_WITH_SEND | -+ IBV_QP_EX_WITH_SEND_WITH_IMM, -+}; -+ -+static void fill_send_wr_ops_rc_xrc(struct ibv_qp_ex *qp_ex) -+{ -+ qp_ex->wr_send = wr_send_rc; -+ qp_ex->wr_send_imm = wr_send_imm_rc; -+ qp_ex->wr_send_inv = wr_send_inv_rc; -+ qp_ex->wr_rdma_read = wr_rdma_read; -+ qp_ex->wr_rdma_write = wr_rdma_write; -+ qp_ex->wr_rdma_write_imm = wr_rdma_write_imm; -+ qp_ex->wr_set_inline_data = wr_set_inline_data_rc; -+ qp_ex->wr_set_inline_data_list = wr_set_inline_data_list_rc; -+ qp_ex->wr_local_inv = wr_local_inv_rc; -+ qp_ex->wr_atomic_cmp_swp = wr_atomic_cmp_swp; -+ qp_ex->wr_atomic_fetch_add = wr_atomic_fetch_add; -+ qp_ex->wr_set_sge = wr_set_sge_rc; -+ qp_ex->wr_set_sge_list = wr_set_sge_list_rc; -+} -+ -+static void fill_send_wr_ops_ud(struct ibv_qp_ex *qp_ex) -+{ -+ qp_ex->wr_send = wr_send_ud; -+ qp_ex->wr_send_imm = wr_send_imm_ud; -+ qp_ex->wr_set_ud_addr = wr_set_ud_addr; -+ qp_ex->wr_set_inline_data = wr_set_inline_data_ud; -+ qp_ex->wr_set_inline_data_list = wr_set_inline_data_list_ud; -+ qp_ex->wr_set_sge = wr_set_sge_ud; -+ qp_ex->wr_set_sge_list = wr_set_sge_list_ud; -+} -+ -+static int fill_send_wr_ops(const struct ibv_qp_init_attr_ex *attr, -+ struct ibv_qp_ex *qp_ex) -+{ -+ uint64_t ops = attr->send_ops_flags; -+ -+ qp_ex->wr_start = wr_start; -+ qp_ex->wr_complete = wr_complete; -+ qp_ex->wr_abort = wr_abort; -+ -+ switch (attr->qp_type) { -+ case IBV_QPT_XRC_SEND: -+ qp_ex->wr_set_xrc_srqn = wr_set_xrc_srqn; -+ SWITCH_FALLTHROUGH; -+ case IBV_QPT_RC: -+ if (ops & ~HNS_SUPPORTED_SEND_OPS_FLAGS_RC_XRC) -+ return -EOPNOTSUPP; -+ fill_send_wr_ops_rc_xrc(qp_ex); -+ break; -+ case IBV_QPT_UD: -+ if (ops & ~HNS_SUPPORTED_SEND_OPS_FLAGS_UD) -+ return -EOPNOTSUPP; -+ fill_send_wr_ops_ud(qp_ex); -+ break; -+ default: -+ return -EOPNOTSUPP; -+ } -+ -+ return 0; -+} -+ -+int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr, -+ struct hns_roce_qp *qp) -+{ -+ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { -+ if (fill_send_wr_ops(attr, &qp->verbs_qp.qp_ex)) -+ return -EOPNOTSUPP; -+ -+ qp->verbs_qp.comp_mask |= VERBS_QP_EX; -+ } -+ -+ return 0; -+} -+ - const struct hns_roce_u_hw hns_roce_u_hw_v2 = { - .hw_version = HNS_ROCE_HW_VER2, - .hw_ops = { -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index 122fdbdf..098dbdf4 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -122,6 +122,11 @@ enum { - HNS_ROCE_V2_CQ_DB_NTR, - }; - -+enum hns_roce_wr_buf_type { -+ WR_BUF_TYPE_POST_SEND, -+ WR_BUF_TYPE_SEND_WR_OPS, -+}; -+ - struct hns_roce_db { - __le32 byte_4; - __le32 parameter; -@@ -339,5 +344,7 @@ struct hns_roce_ud_sq_wqe { - - void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); - void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags); -+int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr, -+ struct hns_roce_qp *qp); - - #endif /* _HNS_ROCE_U_HW_V2_H */ -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 9ea8a6d3..1457a1a2 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -768,7 +768,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) - } - - enum { -- CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD, -+ CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD | -+ IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, - }; - - static int check_qp_create_mask(struct hns_roce_context *ctx, -@@ -1270,9 +1271,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - if (ret) - goto err_cmd; - -+ ret = hns_roce_attach_qp_ex_ops(attr, qp); -+ if (ret) -+ goto err_ops; -+ - ret = hns_roce_store_qp(context, qp); - if (ret) -- goto err_store; -+ goto err_ops; - - if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) { - ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key); -@@ -1286,7 +1291,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, - - err_dwqe: - hns_roce_v2_clear_qp(context, qp); --err_store: -+err_ops: - ibv_cmd_destroy_qp(&qp->verbs_qp.qp); - err_cmd: - hns_roce_free_qp_buf(qp, context); --- -2.30.0 - diff --git a/0045-libhns-Add-general-error-type-for-CQE.patch b/0045-libhns-Add-general-error-type-for-CQE.patch deleted file mode 100644 index b41b68e1144cbeba22618b43bbe8223f8b702666..0000000000000000000000000000000000000000 --- a/0045-libhns-Add-general-error-type-for-CQE.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 1e5f8bb89169453cfdd17bf58cef7186dcf58596 Mon Sep 17 00:00:00 2001 -From: Youming Luo -Date: Wed, 16 Mar 2022 17:36:39 +0800 -Subject: libhns: Add general error type for CQE - -If a Work Request posted in an RQ of UD QP isn't big enough for holding the -incoming message, then the hns ROCEE will generate a general error CQE. The -IB specification does not specify this type of CQE. - -In the case of unreliable communication, it is not desirable to change the -QP to an error state due to an insufficient receive length error. So If the -hns ROCEE reports a general error CQE, it's no need to set the QP to an -error state, and the driver should skip it. - -Signed-off-by: Youming Luo -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_hw_v2.c | 4 +++- - providers/hns/hns_roce_u_hw_v2.h | 1 + - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c -index 42a77151..fab1939b 100644 ---- a/providers/hns/hns_roce_u_hw_v2.c -+++ b/providers/hns/hns_roce_u_hw_v2.c -@@ -166,6 +166,7 @@ static enum ibv_wc_status get_wc_status(uint8_t status) - { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR }, - { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, - { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, -+ { HNS_ROCE_V2_CQE_GENERAL_ERR, IBV_WC_GENERAL_ERR }, - { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, - }; - -@@ -671,7 +672,8 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx, - cq->verbs_cq.cq_ex.status = wc_status; - } - -- if (status == HNS_ROCE_V2_CQE_SUCCESS) -+ if (status == HNS_ROCE_V2_CQE_SUCCESS || -+ status == HNS_ROCE_V2_CQE_GENERAL_ERR) - return V2_CQ_OK; - - /* -diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h -index 0068f4fe..122fdbdf 100644 ---- a/providers/hns/hns_roce_u_hw_v2.h -+++ b/providers/hns/hns_roce_u_hw_v2.h -@@ -110,6 +110,7 @@ enum { - HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR = 0x15, - HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR = 0x16, - HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR = 0x22, -+ HNS_ROCE_V2_CQE_GENERAL_ERR = 0x23, - HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR = 0x24, - }; - --- -2.30.0 - diff --git a/0046-libhns-Fix-the-shift-size-of-SQ-WQE.patch b/0046-libhns-Fix-the-shift-size-of-SQ-WQE.patch deleted file mode 100644 index b05d86982d78d609c15a260ef5ffe62429658565..0000000000000000000000000000000000000000 --- a/0046-libhns-Fix-the-shift-size-of-SQ-WQE.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 847336b7634b51548996b879f42c786a108885f1 Mon Sep 17 00:00:00 2001 -From: Chengchang Tang -Date: Fri, 8 Apr 2022 11:31:07 +0800 -Subject: [PATCH 46/47] libhns: Fix the shift size of SQ WQE - -Currently, the shift size of SQ WQE is based on the size of the SQ WQE -structure of HIP06. Although the size of SQ WQE of HIP08 is the same as -the size of SQ WQE of HIP06, it is not a correct way for HIP08 to use the -structure of HIP06 to define the size of SQ WQE. - -Fixes: b6cd213b276f ("libhns: Refactor for creating qp") -Signed-off-by: Chengchang Tang -Signed-off-by: Wenpeng Liang ---- - providers/hns/hns_roce_u_verbs.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 1457a1a2..215d82ec 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -1068,8 +1068,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, - } - - if (attr->cap.max_send_wr) { -- qp->sq.wqe_shift = -- hr_ilog32(sizeof(struct hns_roce_rc_send_wqe)); -+ qp->sq.wqe_shift = HNS_ROCE_SQWQE_SHIFT; - cnt = roundup_pow_of_two(attr->cap.max_send_wr); - qp->sq.wqe_cnt = cnt; - qp->sq.shift = hr_ilog32(cnt); --- -2.30.0 - diff --git a/0047-libhns-Remove-support-for-HIP06.patch b/0047-libhns-Remove-support-for-HIP06.patch deleted file mode 100644 index d82d52650579f80dea542c2bd0e8c8ee49c35b34..0000000000000000000000000000000000000000 --- a/0047-libhns-Remove-support-for-HIP06.patch +++ /dev/null @@ -1,1419 +0,0 @@ -From 938dfc5422285ed48a8acd5ce588bbeffa17f409 Mon Sep 17 00:00:00 2001 -From: Chengchang Tang -Date: Fri, 8 Apr 2022 11:31:08 +0800 -Subject: [PATCH 47/47] libhns: Remove support for HIP06 - -HIP06 is no longer supported. In order to reduce unnecessary maintenance, -the code of HIP06 is removed. - -Signed-off-by: Chengchang Tang -Signed-off-by: Wenpeng Liang ---- - providers/hns/CMakeLists.txt | 1 - - providers/hns/hns_roce_u.c | 27 +- - providers/hns/hns_roce_u.h | 16 - - providers/hns/hns_roce_u_hw_v1.c | 836 ------------------------------- - providers/hns/hns_roce_u_hw_v1.h | 244 --------- - providers/hns/hns_roce_u_verbs.c | 59 +-- - 6 files changed, 11 insertions(+), 1172 deletions(-) - delete mode 100644 providers/hns/hns_roce_u_hw_v1.c - delete mode 100644 providers/hns/hns_roce_u_hw_v1.h - -diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt -index 697dbd7e..7aaca757 100644 ---- a/providers/hns/CMakeLists.txt -+++ b/providers/hns/CMakeLists.txt -@@ -2,7 +2,6 @@ rdma_provider(hns - hns_roce_u.c - hns_roce_u_buf.c - hns_roce_u_db.c -- hns_roce_u_hw_v1.c - hns_roce_u_hw_v2.c - hns_roce_u_verbs.c - ) -diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c -index f8a647b8..a46ceb9d 100644 ---- a/providers/hns/hns_roce_u.c -+++ b/providers/hns/hns_roce_u.c -@@ -46,9 +46,6 @@ static void hns_roce_free_context(struct ibv_context *ibctx); - #endif - - static const struct verbs_match_ent hca_table[] = { -- VERBS_MODALIAS_MATCH("acpi*:HISI00D1:*", &hns_roce_u_hw_v1), -- VERBS_MODALIAS_MATCH("of:N*T*Chisilicon,hns-roce-v1C*", &hns_roce_u_hw_v1), -- VERBS_MODALIAS_MATCH("of:N*T*Chisilicon,hns-roce-v1", &hns_roce_u_hw_v1), - VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA222, &hns_roce_u_hw_v2), - VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA223, &hns_roce_u_hw_v2), - VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA224, &hns_roce_u_hw_v2), -@@ -109,7 +106,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, - struct ibv_device_attr dev_attrs; - struct hns_roce_context *context; - struct ibv_get_context cmd; -- int offset = 0; - int i; - - context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, -@@ -157,24 +153,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, - context->max_srq_sge = dev_attrs.max_srq_sge; - - context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, -- MAP_SHARED, cmd_fd, offset); -+ MAP_SHARED, cmd_fd, 0); - if (context->uar == MAP_FAILED) - goto err_free; - -- offset += hr_dev->page_size; -- -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1) { -- /* -- * when vma->vm_pgoff is 1, the cq_tptr_base includes 64K CQ, -- * a pointer of CQ need 2B size -- */ -- context->cq_tptr_base = mmap(NULL, HNS_ROCE_CQ_DB_BUF_SIZE, -- PROT_READ | PROT_WRITE, MAP_SHARED, -- cmd_fd, offset); -- if (context->cq_tptr_base == MAP_FAILED) -- goto db_free; -- } -- - pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); - - verbs_set_ops(&context->ibv_ctx, &hns_common_ops); -@@ -182,10 +164,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, - - return &context->ibv_ctx; - --db_free: -- munmap(context->uar, hr_dev->page_size); -- context->uar = NULL; -- - err_free: - verbs_uninit_context(&context->ibv_ctx); - free(context); -@@ -198,9 +176,6 @@ static void hns_roce_free_context(struct ibv_context *ibctx) - struct hns_roce_context *context = to_hr_ctx(ibctx); - - munmap(context->uar, hr_dev->page_size); -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1) -- munmap(context->cq_tptr_base, HNS_ROCE_CQ_DB_BUF_SIZE); -- - verbs_uninit_context(&context->ibv_ctx); - free(context); - } -diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h -index 96059172..5d90634a 100644 ---- a/providers/hns/hns_roce_u.h -+++ b/providers/hns/hns_roce_u.h -@@ -47,7 +47,6 @@ - #include - #include "hns_roce_u_abi.h" - --#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') - #define HNS_ROCE_HW_VER2 0x100 - #define HNS_ROCE_HW_VER3 0x130 - -@@ -59,9 +58,7 @@ - - #define HNS_ROCE_MAX_RC_INL_INN_SZ 32 - #define HNS_ROCE_MAX_UD_INL_INN_SZ 8 --#define HNS_ROCE_MAX_CQ_NUM 0x10000 - #define HNS_ROCE_MIN_CQE_NUM 0x40 --#define HNS_ROCE_V1_MIN_WQE_NUM 0x20 - #define HNS_ROCE_V2_MIN_WQE_NUM 0x40 - #define HNS_ROCE_MIN_SRQ_WQE_NUM 1 - -@@ -75,9 +72,6 @@ - - #define HNS_ROCE_GID_SIZE 16 - --#define HNS_ROCE_CQ_DB_BUF_SIZE ((HNS_ROCE_MAX_CQ_NUM >> 11) << 12) --#define HNS_ROCE_STATIC_RATE 3 /* Gbps */ -- - #define INVALID_SGE_LENGTH 0x80000000 - - #define HNS_ROCE_DWQE_PAGE_SIZE 65536 -@@ -159,13 +153,6 @@ - #define HNS_ROCE_SRQ_TABLE_BITS 8 - #define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS) - --/* operation type list */ --enum { -- /* rq&srq operation */ -- HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, -- HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, --}; -- - struct hns_roce_device { - struct verbs_device ibv_dev; - int page_size; -@@ -201,8 +188,6 @@ struct hns_roce_context { - void *uar; - pthread_spinlock_t uar_lock; - -- void *cq_tptr_base; -- - struct { - struct hns_roce_qp **table; - int refcnt; -@@ -502,7 +487,6 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); - - void hns_roce_init_qp_indices(struct hns_roce_qp *qp); - --extern const struct hns_roce_u_hw hns_roce_u_hw_v1; - extern const struct hns_roce_u_hw hns_roce_u_hw_v2; - - #endif /* _HNS_ROCE_U_H */ -diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c -deleted file mode 100644 -index d47cba0c..00000000 ---- a/providers/hns/hns_roce_u_hw_v1.c -+++ /dev/null -@@ -1,836 +0,0 @@ --/* -- * Copyright (c) 2016 Hisilicon Limited. -- * -- * This software is available to you under a choice of one of two -- * licenses. You may choose to be licensed under the terms of the GNU -- * General Public License (GPL) Version 2, available from the file -- * COPYING in the main directory of this source tree, or the -- * OpenIB.org BSD license below: -- * -- * Redistribution and use in source and binary forms, with or -- * without modification, are permitted provided that the following -- * conditions are met: -- * -- * - Redistributions of source code must retain the above -- * copyright notice, this list of conditions and the following -- * disclaimer. -- * -- * - Redistributions in binary form must reproduce the above -- * copyright notice, this list of conditions and the following -- * disclaimer in the documentation and/or other materials -- * provided with the distribution. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -- * SOFTWARE. -- */ -- --#include --#include --#include "hns_roce_u_db.h" --#include "hns_roce_u_hw_v1.h" --#include "hns_roce_u.h" -- --static inline void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, -- uint64_t remote_addr, uint32_t rkey) --{ -- rseg->raddr = htole64(remote_addr); -- rseg->rkey = htole32(rkey); -- rseg->len = 0; --} -- --static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ibv_sge *sg) --{ -- -- dseg->lkey = htole32(sg->lkey); -- dseg->addr = htole64(sg->addr); -- dseg->len = htole32(sg->length); --} -- --static void hns_roce_update_rq_head(struct hns_roce_context *ctx, -- unsigned int qpn, unsigned int rq_head) --{ -- struct hns_roce_rq_db rq_db = {}; -- -- roce_set_field(rq_db.u32_4, RQ_DB_U32_4_RQ_HEAD_M, -- RQ_DB_U32_4_RQ_HEAD_S, rq_head); -- roce_set_field(rq_db.u32_8, RQ_DB_U32_8_QPN_M, RQ_DB_U32_8_QPN_S, qpn); -- roce_set_field(rq_db.u32_8, RQ_DB_U32_8_CMD_M, RQ_DB_U32_8_CMD_S, 1); -- roce_set_bit(rq_db.u32_8, RQ_DB_U32_8_HW_SYNC_S, 1); -- -- udma_to_device_barrier(); -- -- hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&rq_db); --} -- --static void hns_roce_update_sq_head(struct hns_roce_context *ctx, -- unsigned int qpn, unsigned int port, -- unsigned int sl, unsigned int sq_head) --{ -- struct hns_roce_sq_db sq_db = {}; -- -- roce_set_field(sq_db.u32_4, SQ_DB_U32_4_SQ_HEAD_M, -- SQ_DB_U32_4_SQ_HEAD_S, sq_head); -- roce_set_field(sq_db.u32_4, SQ_DB_U32_4_PORT_M, SQ_DB_U32_4_PORT_S, -- port); -- roce_set_field(sq_db.u32_4, SQ_DB_U32_4_SL_M, SQ_DB_U32_4_SL_S, sl); -- roce_set_field(sq_db.u32_8, SQ_DB_U32_8_QPN_M, SQ_DB_U32_8_QPN_S, qpn); -- roce_set_bit(sq_db.u32_8, SQ_DB_U32_8_HW_SYNC, 1); -- -- udma_to_device_barrier(); -- -- hns_roce_write64(ctx->uar + ROCEE_DB_SQ_L_0_REG, (__le32 *)&sq_db); --} -- --static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx, -- struct hns_roce_cq *cq) --{ -- struct hns_roce_cq_db cq_db = {}; -- -- roce_set_bit(cq_db.u32_8, CQ_DB_U32_8_HW_SYNC_S, 1); -- roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_M, CQ_DB_U32_8_CMD_S, 3); -- roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_MDF_M, -- CQ_DB_U32_8_CMD_MDF_S, 0); -- roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CQN_M, CQ_DB_U32_8_CQN_S, -- cq->cqn); -- roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M, -- CQ_DB_U32_4_CONS_IDX_S, -- cq->cons_index & ((cq->cq_depth << 1) - 1)); -- -- hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db); --} -- --static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe, -- struct ibv_wc *wc) --{ -- switch (roce_get_field(cqe->cqe_byte_4, -- CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, -- CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) & -- HNS_ROCE_CQE_STATUS_MASK) { -- case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR: -- wc->status = IBV_WC_LOC_LEN_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR: -- wc->status = IBV_WC_LOC_QP_OP_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR: -- wc->status = IBV_WC_LOC_PROT_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR: -- wc->status = IBV_WC_WR_FLUSH_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR: -- wc->status = IBV_WC_MW_BIND_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR: -- wc->status = IBV_WC_BAD_RESP_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR: -- wc->status = IBV_WC_LOC_ACCESS_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: -- wc->status = IBV_WC_REM_INV_REQ_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR: -- wc->status = IBV_WC_REM_ACCESS_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR: -- wc->status = IBV_WC_REM_OP_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: -- wc->status = IBV_WC_RETRY_EXC_ERR; -- break; -- case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR: -- wc->status = IBV_WC_RNR_RETRY_EXC_ERR; -- break; -- default: -- wc->status = IBV_WC_GENERAL_ERR; -- break; -- } --} -- --static struct hns_roce_cqe *get_cqe(struct hns_roce_cq *cq, int entry) --{ -- return cq->buf.buf + entry * HNS_ROCE_CQE_SIZE; --} -- --static void *get_sw_cqe(struct hns_roce_cq *cq, int n) --{ -- struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->verbs_cq.cq.cqe); -- -- return (!!(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S)) ^ -- !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL; --} -- --static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq) --{ -- return get_sw_cqe(cq, cq->cons_index); --} -- --static void *get_recv_wqe(struct hns_roce_qp *qp, int n) --{ -- if ((n < 0) || (n > qp->rq.wqe_cnt)) { -- verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), -- "rq wqe index:%d,rq wqe cnt:%d\r\n", n, -- qp->rq.wqe_cnt); -- return NULL; -- } -- -- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); --} -- --static void *get_send_wqe(struct hns_roce_qp *qp, int n) --{ -- if ((n < 0) || (n > qp->sq.wqe_cnt)) { -- verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), -- "sq wqe index:%d,sq wqe cnt:%d\r\n", n, -- qp->sq.wqe_cnt); -- return NULL; -- } -- -- return (void *)(qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift)); --} -- --static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, -- struct hns_roce_cq *cq) --{ -- unsigned int cur; -- -- cur = wq->head - wq->tail; -- if (cur + nreq < wq->max_post) -- return 0; -- -- /* While the num of wqe exceeds cap of the device, cq will be locked */ -- pthread_spin_lock(&cq->lock); -- cur = wq->head - wq->tail; -- pthread_spin_unlock(&cq->lock); -- -- verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), -- "wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n", -- wq->head, wq->tail, wq->max_post, nreq); -- -- return cur + nreq >= wq->max_post; --} -- --static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, -- uint32_t qpn) --{ -- uint32_t tind = to_hr_qp_table_index(qpn, ctx); -- -- if (ctx->qp_table[tind].refcnt) { -- return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; -- } else { -- verbs_err(&ctx->ibv_ctx, "hns_roce_find_qp fail!\n"); -- return NULL; -- } --} -- --static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn) --{ -- uint32_t tind = to_hr_qp_table_index(qpn, ctx); -- -- if (!--ctx->qp_table[tind].refcnt) -- free(ctx->qp_table[tind].table); -- else -- ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; --} -- --static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, -- struct hns_roce_qp **cur_qp, struct ibv_wc *wc) --{ -- uint32_t qpn; -- int is_send; -- uint16_t wqe_ctr; -- uint32_t local_qpn; -- struct hns_roce_wq *wq = NULL; -- struct hns_roce_cqe *cqe = NULL; -- struct hns_roce_wqe_ctrl_seg *sq_wqe = NULL; -- -- /* According to CI, find the relative cqe */ -- cqe = next_cqe_sw(cq); -- if (!cqe) -- return CQ_EMPTY; -- -- /* Get the next cqe, CI will be added gradually */ -- ++cq->cons_index; -- -- udma_from_device_barrier(); -- -- qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, -- CQE_BYTE_16_LOCAL_QPN_S); -- -- is_send = (roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S) == -- HNS_ROCE_CQE_IS_SQ); -- -- local_qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, -- CQE_BYTE_16_LOCAL_QPN_S); -- -- /* if qp is zero, it will not get the correct qpn */ -- if (!*cur_qp || -- (local_qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->verbs_qp.qp.qp_num) { -- -- *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->verbs_cq.cq.context), -- qpn & 0xffffff); -- if (!*cur_qp) { -- verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), -- PFX "can't find qp!\n"); -- return CQ_POLL_ERR; -- } -- } -- wc->qp_num = qpn & 0xffffff; -- -- if (is_send) { -- wq = &(*cur_qp)->sq; -- /* -- * if sq_signal_bits is 1, the tail pointer first update to -- * the wqe corresponding the current cqe -- */ -- if ((*cur_qp)->sq_signal_bits) { -- wqe_ctr = (uint16_t)(roce_get_field(cqe->cqe_byte_4, -- CQE_BYTE_4_WQE_INDEX_M, -- CQE_BYTE_4_WQE_INDEX_S)); -- /* -- * wq->tail will plus a positive number every time, -- * when wq->tail exceeds 32b, it is 0 and acc -- */ -- wq->tail += (wqe_ctr - (uint16_t) wq->tail) & -- (wq->wqe_cnt - 1); -- } -- /* write the wr_id of wq into the wc */ -- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -- ++wq->tail; -- } else { -- wq = &(*cur_qp)->rq; -- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; -- ++wq->tail; -- } -- -- /* -- * HW maintains wc status, set the err type and directly return, after -- * generated the incorrect CQE -- */ -- if (roce_get_field(cqe->cqe_byte_4, -- CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, -- CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) { -- verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), -- PFX "error cqe!\n"); -- hns_roce_handle_error_cqe(cqe, wc); -- return CQ_OK; -- } -- wc->status = IBV_WC_SUCCESS; -- -- /* -- * According to the opcode type of cqe, mark the opcode and other -- * information of wc -- */ -- if (is_send) { -- /* Get opcode and flag before update the tail point for send */ -- sq_wqe = (struct hns_roce_wqe_ctrl_seg *) -- get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4, -- CQE_BYTE_4_WQE_INDEX_M, -- CQE_BYTE_4_WQE_INDEX_S)); -- switch (le32toh(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) { -- case HNS_ROCE_WQE_OPCODE_SEND: -- wc->opcode = IBV_WC_SEND; -- break; -- case HNS_ROCE_WQE_OPCODE_RDMA_READ: -- wc->opcode = IBV_WC_RDMA_READ; -- wc->byte_len = le32toh(cqe->byte_cnt); -- break; -- case HNS_ROCE_WQE_OPCODE_RDMA_WRITE: -- wc->opcode = IBV_WC_RDMA_WRITE; -- break; -- case HNS_ROCE_WQE_OPCODE_BIND_MW2: -- wc->opcode = IBV_WC_BIND_MW; -- break; -- default: -- wc->status = IBV_WC_GENERAL_ERR; -- break; -- } -- wc->wc_flags = (le32toh(sq_wqe->flag) & HNS_ROCE_WQE_IMM ? -- IBV_WC_WITH_IMM : 0); -- } else { -- /* Get opcode and flag in rq&srq */ -- wc->byte_len = le32toh(cqe->byte_cnt); -- -- switch (roce_get_field(cqe->cqe_byte_4, -- CQE_BYTE_4_OPERATION_TYPE_M, -- CQE_BYTE_4_OPERATION_TYPE_S) & -- HNS_ROCE_CQE_OPCODE_MASK) { -- case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE: -- wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; -- wc->wc_flags = IBV_WC_WITH_IMM; -- wc->imm_data = htobe32(le32toh(cqe->immediate_data)); -- break; -- case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE: -- if (roce_get_bit(cqe->cqe_byte_4, -- CQE_BYTE_4_IMMEDIATE_DATA_FLAG_S)) { -- wc->opcode = IBV_WC_RECV; -- wc->wc_flags = IBV_WC_WITH_IMM; -- wc->imm_data = -- htobe32(le32toh(cqe->immediate_data)); -- } else { -- wc->opcode = IBV_WC_RECV; -- wc->wc_flags = 0; -- } -- break; -- default: -- wc->status = IBV_WC_GENERAL_ERR; -- break; -- } -- } -- -- return CQ_OK; --} -- --static int hns_roce_u_v1_poll_cq(struct ibv_cq *ibvcq, int ne, -- struct ibv_wc *wc) --{ -- int npolled; -- int err = CQ_OK; -- struct hns_roce_qp *qp = NULL; -- struct hns_roce_cq *cq = to_hr_cq(ibvcq); -- struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); -- struct hns_roce_device *dev = to_hr_dev(ibvcq->context->device); -- -- pthread_spin_lock(&cq->lock); -- -- for (npolled = 0; npolled < ne; ++npolled) { -- err = hns_roce_v1_poll_one(cq, &qp, wc + npolled); -- if (err != CQ_OK) -- break; -- } -- -- if (npolled) { -- if (dev->hw_version == HNS_ROCE_HW_VER1) { -- *cq->db = (cq->cons_index & ((cq->cq_depth << 1) - 1)); -- mmio_ordered_writes_hack(); -- } -- -- hns_roce_update_cq_cons_index(ctx, cq); -- } -- -- pthread_spin_unlock(&cq->lock); -- -- return err == CQ_POLL_ERR ? err : npolled; --} -- --/** -- * hns_roce_u_v1_arm_cq - request completion notification on a CQ -- * @ibvcq: The completion queue to request notification for. -- * @solicited: If non-zero, a event will be generated only for -- * the next solicited CQ entry. If zero, any CQ entry, -- * solicited or not, will generate an event -- */ --static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited) --{ -- struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); -- struct hns_roce_cq *cq = to_hr_cq(ibvcq); -- struct hns_roce_cq_db cq_db = {}; -- uint32_t solicited_flag; -- uint32_t ci; -- -- ci = cq->cons_index & ((cq->cq_depth << 1) - 1); -- solicited_flag = solicited ? HNS_ROCE_CQ_DB_REQ_SOL : -- HNS_ROCE_CQ_DB_REQ_NEXT; -- -- roce_set_bit(cq_db.u32_8, CQ_DB_U32_8_HW_SYNC_S, 1); -- roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_M, CQ_DB_U32_8_CMD_S, 3); -- roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CMD_MDF_M, -- CQ_DB_U32_8_CMD_MDF_S, 1); -- roce_set_bit(cq_db.u32_8, CQ_DB_U32_8_NOTIFY_TYPE_S, solicited_flag); -- roce_set_field(cq_db.u32_8, CQ_DB_U32_8_CQN_M, CQ_DB_U32_8_CQN_S, -- cq->cqn); -- roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M, -- CQ_DB_U32_4_CONS_IDX_S, ci); -- -- hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db); -- -- return 0; --} -- --static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, -- struct ibv_send_wr **bad_wr) --{ -- void *wqe; -- int nreq; -- int ps_opcode, i; -- int ret = 0; -- struct hns_roce_wqe_ctrl_seg *ctrl = NULL; -- struct hns_roce_wqe_data_seg *dseg = NULL; -- struct hns_roce_qp *qp = to_hr_qp(ibvqp); -- struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); -- unsigned int wqe_idx; -- -- pthread_spin_lock(&qp->sq.lock); -- -- for (nreq = 0; wr; ++nreq, wr = wr->next) { -- if (hns_roce_wq_overflow(&qp->sq, nreq, -- to_hr_cq(qp->verbs_qp.qp.send_cq))) { -- ret = -1; -- *bad_wr = wr; -- goto out; -- } -- -- wqe_idx = (qp->sq.head + nreq) & (qp->rq.wqe_cnt - 1); -- -- if (wr->num_sge > qp->sq.max_gs) { -- ret = -1; -- *bad_wr = wr; -- verbs_err(verbs_get_ctx(ibvqp->context), -- "wr->num_sge(<=%d) = %d, check failed!\r\n", -- qp->sq.max_gs, wr->num_sge); -- goto out; -- } -- -- ctrl = wqe = get_send_wqe(qp, wqe_idx); -- memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg)); -- -- qp->sq.wrid[wqe_idx] = wr->wr_id; -- for (i = 0; i < wr->num_sge; i++) -- ctrl->msg_length = htole32(le32toh(ctrl->msg_length) + -- wr->sg_list[i].length); -- -- ctrl->flag |= htole32(((wr->send_flags & IBV_SEND_SIGNALED) ? -- HNS_ROCE_WQE_CQ_NOTIFY : 0) | -- (wr->send_flags & IBV_SEND_SOLICITED ? -- HNS_ROCE_WQE_SE : 0) | -- ((wr->opcode == IBV_WR_SEND_WITH_IMM || -- wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) ? -- HNS_ROCE_WQE_IMM : 0) | -- (wr->send_flags & IBV_SEND_FENCE ? -- HNS_ROCE_WQE_FENCE : 0)); -- -- if (wr->opcode == IBV_WR_SEND_WITH_IMM || -- wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) -- ctrl->imm_data = htole32(be32toh(wr->imm_data)); -- -- wqe += sizeof(struct hns_roce_wqe_ctrl_seg); -- -- /* set remote addr segment */ -- switch (ibvqp->qp_type) { -- case IBV_QPT_RC: -- switch (wr->opcode) { -- case IBV_WR_RDMA_READ: -- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ; -- set_raddr_seg(wqe, wr->wr.rdma.remote_addr, -- wr->wr.rdma.rkey); -- break; -- case IBV_WR_RDMA_WRITE: -- case IBV_WR_RDMA_WRITE_WITH_IMM: -- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE; -- set_raddr_seg(wqe, wr->wr.rdma.remote_addr, -- wr->wr.rdma.rkey); -- break; -- case IBV_WR_SEND: -- case IBV_WR_SEND_WITH_IMM: -- ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; -- break; -- case IBV_WR_ATOMIC_CMP_AND_SWP: -- case IBV_WR_ATOMIC_FETCH_AND_ADD: -- default: -- ps_opcode = HNS_ROCE_WQE_OPCODE_MASK; -- break; -- } -- ctrl->flag |= htole32(ps_opcode); -- wqe += sizeof(struct hns_roce_wqe_raddr_seg); -- break; -- case IBV_QPT_UD: -- default: -- break; -- } -- -- dseg = wqe; -- -- /* Inline */ -- if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { -- if (le32toh(ctrl->msg_length) > qp->max_inline_data) { -- ret = -1; -- *bad_wr = wr; -- verbs_err(verbs_get_ctx(ibvqp->context), -- "inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n", -- wr->send_flags, ctrl->msg_length); -- return ret; -- } -- -- for (i = 0; i < wr->num_sge; i++) { -- memcpy(wqe, -- ((void *) (uintptr_t) wr->sg_list[i].addr), -- wr->sg_list[i].length); -- wqe = wqe + wr->sg_list[i].length; -- } -- -- ctrl->flag |= htole32(HNS_ROCE_WQE_INLINE); -- } else { -- /* set sge */ -- for (i = 0; i < wr->num_sge; i++) -- set_data_seg(dseg+i, wr->sg_list + i); -- -- ctrl->flag |= -- htole32(wr->num_sge << HNS_ROCE_WQE_SGE_NUM_BIT); -- } -- } -- --out: -- /* Set DB return */ -- if (likely(nreq)) { -- qp->sq.head += nreq; -- -- hns_roce_update_sq_head(ctx, qp->verbs_qp.qp.qp_num, -- qp->port_num - 1, qp->sl, -- qp->sq.head & ((qp->sq.wqe_cnt << 1) - -- 1)); -- } -- -- pthread_spin_unlock(&qp->sq.lock); -- -- return ret; --} -- --static void __hns_roce_v1_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, -- struct hns_roce_srq *srq) --{ -- int nfreed = 0; -- uint32_t prod_index; -- uint8_t owner_bit = 0; -- struct hns_roce_cqe *cqe, *dest; -- struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); -- -- for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); -- ++prod_index) -- if (prod_index == cq->cons_index + cq->verbs_cq.cq.cqe) -- break; -- -- while ((int) --prod_index - (int) cq->cons_index >= 0) { -- cqe = get_cqe(cq, prod_index & cq->verbs_cq.cq.cqe); -- if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, -- CQE_BYTE_16_LOCAL_QPN_S) & 0xffffff) == qpn) { -- ++nfreed; -- } else if (nfreed) { -- dest = get_cqe(cq, -- (prod_index + nfreed) & cq->verbs_cq.cq.cqe); -- owner_bit = roce_get_bit(dest->cqe_byte_4, -- CQE_BYTE_4_OWNER_S); -- memcpy(dest, cqe, sizeof(*cqe)); -- roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S, -- owner_bit); -- } -- } -- -- if (nfreed) { -- cq->cons_index += nfreed; -- udma_to_device_barrier(); -- hns_roce_update_cq_cons_index(ctx, cq); -- } --} -- --static void hns_roce_v1_cq_clean(struct hns_roce_cq *cq, unsigned int qpn, -- struct hns_roce_srq *srq) --{ -- pthread_spin_lock(&cq->lock); -- __hns_roce_v1_cq_clean(cq, qpn, srq); -- pthread_spin_unlock(&cq->lock); --} -- --static int hns_roce_u_v1_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, -- int attr_mask) --{ -- int ret; -- struct ibv_modify_qp cmd = {}; -- struct hns_roce_qp *hr_qp = to_hr_qp(qp); -- -- ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); -- -- if (!ret && (attr_mask & IBV_QP_STATE) && -- attr->qp_state == IBV_QPS_RESET) { -- hns_roce_v1_cq_clean(to_hr_cq(qp->recv_cq), qp->qp_num, -- qp->srq ? to_hr_srq(qp->srq) : NULL); -- if (qp->send_cq != qp->recv_cq) -- hns_roce_v1_cq_clean(to_hr_cq(qp->send_cq), qp->qp_num, -- NULL); -- -- hns_roce_init_qp_indices(to_hr_qp(qp)); -- } -- -- if (!ret && (attr_mask & IBV_QP_PORT)) { -- hr_qp->port_num = attr->port_num; -- verbs_err(verbs_get_ctx(qp->context), "hr_qp->port_num= 0x%x\n", -- hr_qp->port_num); -- } -- -- hr_qp->sl = attr->ah_attr.sl; -- -- return ret; --} -- --static void hns_roce_lock_cqs(struct ibv_qp *qp) --{ -- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); -- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); -- -- if (send_cq == recv_cq) { -- pthread_spin_lock(&send_cq->lock); -- } else if (send_cq->cqn < recv_cq->cqn) { -- pthread_spin_lock(&send_cq->lock); -- pthread_spin_lock(&recv_cq->lock); -- } else { -- pthread_spin_lock(&recv_cq->lock); -- pthread_spin_lock(&send_cq->lock); -- } --} -- --static void hns_roce_unlock_cqs(struct ibv_qp *qp) --{ -- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); -- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); -- -- if (send_cq == recv_cq) { -- pthread_spin_unlock(&send_cq->lock); -- } else if (send_cq->cqn < recv_cq->cqn) { -- pthread_spin_unlock(&recv_cq->lock); -- pthread_spin_unlock(&send_cq->lock); -- } else { -- pthread_spin_unlock(&send_cq->lock); -- pthread_spin_unlock(&recv_cq->lock); -- } --} -- --static int hns_roce_u_v1_destroy_qp(struct ibv_qp *ibqp) --{ -- int ret; -- struct hns_roce_qp *qp = to_hr_qp(ibqp); -- -- pthread_mutex_lock(&to_hr_ctx(ibqp->context)->qp_table_mutex); -- ret = ibv_cmd_destroy_qp(ibqp); -- if (ret) { -- pthread_mutex_unlock(&to_hr_ctx(ibqp->context)->qp_table_mutex); -- return ret; -- } -- -- hns_roce_lock_cqs(ibqp); -- -- __hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num, -- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); -- -- if (ibqp->send_cq != ibqp->recv_cq) -- __hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, -- NULL); -- -- hns_roce_clear_qp(to_hr_ctx(ibqp->context), ibqp->qp_num); -- -- hns_roce_unlock_cqs(ibqp); -- pthread_mutex_unlock(&to_hr_ctx(ibqp->context)->qp_table_mutex); -- -- free(qp->sq.wrid); -- if (qp->rq.wqe_cnt) -- free(qp->rq.wrid); -- -- hns_roce_free_buf(&qp->buf); -- free(qp); -- -- return ret; --} -- --static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, -- struct ibv_recv_wr **bad_wr) --{ -- int ret = 0; -- unsigned int nreq; -- struct ibv_sge *sg; -- struct hns_roce_rc_rq_wqe *rq_wqe; -- struct hns_roce_qp *qp = to_hr_qp(ibvqp); -- struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); -- unsigned int wqe_idx; -- -- pthread_spin_lock(&qp->rq.lock); -- -- for (nreq = 0; wr; ++nreq, wr = wr->next) { -- if (hns_roce_wq_overflow(&qp->rq, nreq, -- to_hr_cq(qp->verbs_qp.qp.recv_cq))) { -- ret = -1; -- *bad_wr = wr; -- goto out; -- } -- -- wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1); -- -- if (wr->num_sge > qp->rq.max_gs) { -- ret = -1; -- *bad_wr = wr; -- goto out; -- } -- -- rq_wqe = get_recv_wqe(qp, wqe_idx); -- if (wr->num_sge > HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM) { -- ret = -1; -- *bad_wr = wr; -- goto out; -- } -- -- if (wr->num_sge == HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM) { -- roce_set_field(rq_wqe->u32_2, -- RC_RQ_WQE_NUMBER_OF_DATA_SEG_M, -- RC_RQ_WQE_NUMBER_OF_DATA_SEG_S, -- HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM); -- sg = wr->sg_list; -- -- rq_wqe->va0 = htole64(sg->addr); -- rq_wqe->l_key0 = htole32(sg->lkey); -- rq_wqe->length0 = htole32(sg->length); -- -- sg = wr->sg_list + 1; -- -- rq_wqe->va1 = htole64(sg->addr); -- rq_wqe->l_key1 = htole32(sg->lkey); -- rq_wqe->length1 = htole32(sg->length); -- } else if (wr->num_sge == HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 1) { -- roce_set_field(rq_wqe->u32_2, -- RC_RQ_WQE_NUMBER_OF_DATA_SEG_M, -- RC_RQ_WQE_NUMBER_OF_DATA_SEG_S, -- HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 1); -- sg = wr->sg_list; -- -- rq_wqe->va0 = htole64(sg->addr); -- rq_wqe->l_key0 = htole32(sg->lkey); -- rq_wqe->length0 = htole32(sg->length); -- -- } else if (wr->num_sge == HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 2) { -- roce_set_field(rq_wqe->u32_2, -- RC_RQ_WQE_NUMBER_OF_DATA_SEG_M, -- RC_RQ_WQE_NUMBER_OF_DATA_SEG_S, -- HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 2); -- } -- -- qp->rq.wrid[wqe_idx] = wr->wr_id; -- } -- --out: -- if (nreq) { -- qp->rq.head += nreq; -- -- hns_roce_update_rq_head(ctx, qp->verbs_qp.qp.qp_num, -- qp->rq.head & ((qp->rq.wqe_cnt << 1) - -- 1)); -- } -- -- pthread_spin_unlock(&qp->rq.lock); -- -- return ret; --} -- --const struct hns_roce_u_hw hns_roce_u_hw_v1 = { -- .hw_version = HNS_ROCE_HW_VER1, -- .hw_ops = { -- .poll_cq = hns_roce_u_v1_poll_cq, -- .req_notify_cq = hns_roce_u_v1_arm_cq, -- .post_send = hns_roce_u_v1_post_send, -- .post_recv = hns_roce_u_v1_post_recv, -- .modify_qp = hns_roce_u_v1_modify_qp, -- .destroy_qp = hns_roce_u_v1_destroy_qp, -- }, --}; -diff --git a/providers/hns/hns_roce_u_hw_v1.h b/providers/hns/hns_roce_u_hw_v1.h -deleted file mode 100644 -index bb7aec6b..00000000 ---- a/providers/hns/hns_roce_u_hw_v1.h -+++ /dev/null -@@ -1,244 +0,0 @@ --/* -- * Copyright (c) 2016 Hisilicon Limited. -- * -- * This software is available to you under a choice of one of two -- * licenses. You may choose to be licensed under the terms of the GNU -- * General Public License (GPL) Version 2, available from the file -- * COPYING in the main directory of this source tree, or the -- * OpenIB.org BSD license below: -- * -- * Redistribution and use in source and binary forms, with or -- * without modification, are permitted provided that the following -- * conditions are met: -- * -- * - Redistributions of source code must retain the above -- * copyright notice, this list of conditions and the following -- * disclaimer. -- * -- * - Redistributions in binary form must reproduce the above -- * copyright notice, this list of conditions and the following -- * disclaimer in the documentation and/or other materials -- * provided with the distribution. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -- * SOFTWARE. -- */ -- --#ifndef _HNS_ROCE_U_HW_V1_H --#define _HNS_ROCE_U_HW_V1_H -- --#include --#include -- --#define HNS_ROCE_CQ_DB_REQ_SOL 1 --#define HNS_ROCE_CQ_DB_REQ_NEXT 0 -- --#define HNS_ROCE_CQE_IS_SQ 0 -- --#define HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM 2 -- --enum { -- HNS_ROCE_WQE_INLINE = 1 << 31, -- HNS_ROCE_WQE_SE = 1 << 30, -- HNS_ROCE_WQE_SGE_NUM_BIT = 24, -- HNS_ROCE_WQE_IMM = 1 << 23, -- HNS_ROCE_WQE_FENCE = 1 << 21, -- HNS_ROCE_WQE_CQ_NOTIFY = 1 << 20, -- HNS_ROCE_WQE_OPCODE_SEND = 0 << 16, -- HNS_ROCE_WQE_OPCODE_RDMA_READ = 1 << 16, -- HNS_ROCE_WQE_OPCODE_RDMA_WRITE = 2 << 16, -- HNS_ROCE_WQE_OPCODE_BIND_MW2 = 6 << 16, -- HNS_ROCE_WQE_OPCODE_MASK = 15 << 16, --}; -- --struct hns_roce_wqe_ctrl_seg { -- __le32 sgl_pa_h; -- __le32 flag; -- __le32 imm_data; -- __le32 msg_length; --}; -- --struct hns_roce_wqe_data_seg { -- __le64 addr; -- __le32 lkey; -- __le32 len; --}; -- --struct hns_roce_wqe_raddr_seg { -- __le32 rkey; -- __le32 len; -- __le64 raddr; --}; -- --enum { -- CQ_OK = 0, -- CQ_EMPTY = -1, -- CQ_POLL_ERR = -2, --}; -- --enum { -- HNS_ROCE_CQE_QPN_MASK = 0x3ffff, -- HNS_ROCE_CQE_STATUS_MASK = 0x1f, -- HNS_ROCE_CQE_OPCODE_MASK = 0xf, --}; -- --enum { -- HNS_ROCE_CQE_SUCCESS, -- HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR, -- HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR, -- HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR, -- HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR, -- HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR, -- HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR, -- HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR, -- HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR, -- HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR, -- HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR, -- HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR, -- HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR, --}; -- --struct hns_roce_cq_db { -- __le32 u32_4; -- __le32 u32_8; --}; --#define CQ_DB_U32_4_CONS_IDX_S 0 --#define CQ_DB_U32_4_CONS_IDX_M (((1UL << 16) - 1) << CQ_DB_U32_4_CONS_IDX_S) -- --#define CQ_DB_U32_8_CQN_S 0 --#define CQ_DB_U32_8_CQN_M (((1UL << 16) - 1) << CQ_DB_U32_8_CQN_S) -- --#define CQ_DB_U32_8_NOTIFY_TYPE_S 16 -- --#define CQ_DB_U32_8_CMD_MDF_S 24 --#define CQ_DB_U32_8_CMD_MDF_M (((1UL << 4) - 1) << CQ_DB_U32_8_CMD_MDF_S) -- --#define CQ_DB_U32_8_CMD_S 28 --#define CQ_DB_U32_8_CMD_M (((1UL << 3) - 1) << CQ_DB_U32_8_CMD_S) -- --#define CQ_DB_U32_8_HW_SYNC_S 31 -- --struct hns_roce_rq_db { -- __le32 u32_4; -- __le32 u32_8; --}; -- --#define RQ_DB_U32_4_RQ_HEAD_S 0 --#define RQ_DB_U32_4_RQ_HEAD_M (((1UL << 15) - 1) << RQ_DB_U32_4_RQ_HEAD_S) -- --#define RQ_DB_U32_8_QPN_S 0 --#define RQ_DB_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DB_U32_8_QPN_S) -- --#define RQ_DB_U32_8_CMD_S 28 --#define RQ_DB_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DB_U32_8_CMD_S) -- --#define RQ_DB_U32_8_HW_SYNC_S 31 -- --struct hns_roce_sq_db { -- __le32 u32_4; -- __le32 u32_8; --}; -- --#define SQ_DB_U32_4_SQ_HEAD_S 0 --#define SQ_DB_U32_4_SQ_HEAD_M (((1UL << 15) - 1) << SQ_DB_U32_4_SQ_HEAD_S) -- --#define SQ_DB_U32_4_SL_S 16 --#define SQ_DB_U32_4_SL_M (((1UL << 2) - 1) << SQ_DB_U32_4_SL_S) -- --#define SQ_DB_U32_4_PORT_S 18 --#define SQ_DB_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DB_U32_4_PORT_S) -- --#define SQ_DB_U32_4_DIRECT_WQE_S 31 -- --#define SQ_DB_U32_8_QPN_S 0 --#define SQ_DB_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DB_U32_8_QPN_S) -- --#define SQ_DB_U32_8_HW_SYNC 31 -- --struct hns_roce_cqe { -- __le32 cqe_byte_4; -- union { -- __le32 r_key; -- __le32 immediate_data; -- }; -- __le32 byte_cnt; -- __le32 cqe_byte_16; -- __le32 cqe_byte_20; -- __le32 s_mac_l; -- __le32 cqe_byte_28; -- __le32 reserved; --}; --#define CQE_BYTE_4_OPERATION_TYPE_S 0 --#define CQE_BYTE_4_OPERATION_TYPE_M \ -- (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S) -- --#define CQE_BYTE_4_OWNER_S 7 -- --#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8 --#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \ -- (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) -- --#define CQE_BYTE_4_SQ_RQ_FLAG_S 14 -- --#define CQE_BYTE_4_IMMEDIATE_DATA_FLAG_S 15 -- --#define CQE_BYTE_4_WQE_INDEX_S 16 --#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S) -- --#define CQE_BYTE_16_LOCAL_QPN_S 0 --#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S) -- --#define ROCEE_DB_SQ_L_0_REG 0x230 -- --#define ROCEE_DB_OTHERS_L_0_REG 0x238 -- --struct hns_roce_rc_send_wqe { -- __le32 sgl_ba_31_0; -- __le32 u32_1; -- union { -- __le32 r_key; -- __le32 immediate_data; -- }; -- __le32 msg_length; -- __le32 rvd_3; -- __le32 rvd_4; -- __le32 rvd_5; -- __le32 rvd_6; -- __le64 va0; -- __le32 l_key0; -- __le32 length0; -- -- __le64 va1; -- __le32 l_key1; -- __le32 length1; --}; -- --struct hns_roce_rc_rq_wqe { -- __le32 u32_0; -- __le32 sgl_ba_31_0; -- __le32 u32_2; -- __le32 rvd_5; -- __le32 rvd_6; -- __le32 rvd_7; -- __le32 rvd_8; -- __le32 rvd_9; -- -- __le64 va0; -- __le32 l_key0; -- __le32 length0; -- -- __le64 va1; -- __le32 l_key1; -- __le32 length1; --}; --#define RC_RQ_WQE_NUMBER_OF_DATA_SEG_S 16 --#define RC_RQ_WQE_NUMBER_OF_DATA_SEG_M \ -- (((1UL << 6) - 1) << RC_RQ_WQE_NUMBER_OF_DATA_SEG_S) -- --#endif /* _HNS_ROCE_U_HW_V1_H */ -diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c -index 215d82ec..ba7f2aee 100644 ---- a/providers/hns/hns_roce_u_verbs.c -+++ b/providers/hns/hns_roce_u_verbs.c -@@ -40,7 +40,6 @@ - #include - #include "hns_roce_u.h" - #include "hns_roce_u_db.h" --#include "hns_roce_u_hw_v1.h" - #include "hns_roce_u_hw_v2.h" - - void hns_roce_init_qp_indices(struct hns_roce_qp *qp) -@@ -341,7 +340,6 @@ static int exec_cq_create_cmd(struct ibv_context *context, - static struct ibv_cq_ex *create_cq(struct ibv_context *context, - struct ibv_cq_init_attr_ex *attr) - { -- struct hns_roce_device *hr_dev = to_hr_dev(context->device); - struct hns_roce_context *hr_ctx = to_hr_ctx(context); - struct hns_roce_cq *cq; - int ret; -@@ -384,8 +382,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, - return &cq->verbs_cq.cq_ex; - - err_cmd: -- if (hr_dev->hw_version != HNS_ROCE_HW_VER1) -- hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); -+ hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); - err_db: - hns_roce_free_buf(&cq->buf); - err_lock: -@@ -446,9 +443,8 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq) - if (ret) - return ret; - -- if (to_hr_dev(cq->context->device)->hw_version != HNS_ROCE_HW_VER1) -- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db, -- HNS_ROCE_CQ_TYPE_DB); -+ hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db, -+ HNS_ROCE_CQ_TYPE_DB); - hns_roce_free_buf(&to_hr_cq(cq)->buf); - free(to_hr_cq(cq)); - -@@ -782,7 +778,7 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, - - switch (attr->qp_type) { - case IBV_QPT_UD: -- if (hr_dev->hw_version < HNS_ROCE_HW_VER3) -+ if (hr_dev->hw_version == HNS_ROCE_HW_VER2) - return -EINVAL; - SWITCH_FALLTHROUGH; - case IBV_QPT_RC: -@@ -813,7 +809,6 @@ static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr) - static int verify_qp_create_cap(struct hns_roce_context *ctx, - struct ibv_qp_init_attr_ex *attr) - { -- struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); - struct ibv_qp_cap *cap = &attr->cap; - uint32_t min_wqe_num; - int has_rq; -@@ -833,9 +828,7 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, - cap->max_recv_sge = 0; - } - -- min_wqe_num = hr_dev->hw_version == HNS_ROCE_HW_VER1 ? -- HNS_ROCE_V1_MIN_WQE_NUM : HNS_ROCE_V2_MIN_WQE_NUM; -- -+ min_wqe_num = HNS_ROCE_V2_MIN_WQE_NUM; - if (cap->max_send_wr < min_wqe_num) - cap->max_send_wr = min_wqe_num; - -@@ -905,18 +898,10 @@ static void qp_free_recv_inl_buf(struct hns_roce_qp *qp) - static int calc_qp_buff_size(struct hns_roce_device *hr_dev, - struct hns_roce_qp *qp) - { -- struct hns_roce_wq *sq, *rq; -+ struct hns_roce_wq *sq = &qp->sq; -+ struct hns_roce_wq *rq = &qp->rq; - unsigned int size; - -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1 && -- qp->rq.wqe_shift > qp->sq.wqe_shift) { -- sq = &qp->rq; -- rq = &qp->sq; -- } else { -- sq = &qp->sq; -- rq = &qp->rq; -- } -- - qp->buf_size = 0; - - /* SQ WQE */ -@@ -1013,11 +998,6 @@ static void set_ext_sge_param(struct hns_roce_device *hr_dev, - - qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; - -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1) { -- qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; -- return; -- } -- - qp->sq.max_gs = attr->cap.max_send_sge; - - wqe_sge_cnt = get_wqe_ext_sge_cnt(qp); -@@ -1048,20 +1028,11 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, - - qp->rq.max_gs = roundup_pow_of_two(attr->cap.max_recv_sge + - qp->rq.rsv_sge); -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1) -- qp->rq.wqe_shift = -- hr_ilog32(sizeof(struct hns_roce_rc_rq_wqe)); -- else -- qp->rq.wqe_shift = -- hr_ilog32(HNS_ROCE_SGE_SIZE * qp->rq.max_gs); -- -+ qp->rq.wqe_shift = hr_ilog32(HNS_ROCE_SGE_SIZE * qp->rq.max_gs); - cnt = roundup_pow_of_two(attr->cap.max_recv_wr); - qp->rq.wqe_cnt = cnt; - qp->rq.shift = hr_ilog32(cnt); -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1) -- qp->rq_rinl_buf.wqe_cnt = 0; -- else -- qp->rq_rinl_buf.wqe_cnt = cnt; -+ qp->rq_rinl_buf.wqe_cnt = cnt; - - attr->cap.max_recv_wr = qp->rq.wqe_cnt; - attr->cap.max_recv_sge = qp->rq.max_gs; -@@ -1086,11 +1057,6 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, - - static void qp_free_db(struct hns_roce_qp *qp, struct hns_roce_context *ctx) - { -- struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); -- -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1) -- return; -- - if (qp->sdb) - hns_roce_free_db(ctx, qp->sdb, HNS_ROCE_QP_TYPE_DB); - -@@ -1101,11 +1067,6 @@ static void qp_free_db(struct hns_roce_qp *qp, struct hns_roce_context *ctx) - static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, - struct hns_roce_context *ctx) - { -- struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); -- -- if (hr_dev->hw_version == HNS_ROCE_HW_VER1) -- return 0; -- - if (attr->cap.max_send_wr) { - qp->sdb = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB); - if (!qp->sdb) -@@ -1421,7 +1382,7 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) - struct hns_roce_ah *ah; - - /* HIP08 don't support create ah */ -- if (hr_dev->hw_version < HNS_ROCE_HW_VER3) -+ if (hr_dev->hw_version == HNS_ROCE_HW_VER2) - return NULL; - - ah = malloc(sizeof(*ah)); --- -2.30.0 - diff --git a/backport-fixbug-increase-maximum-number-of-cpus-rdma.patch b/backport-fixbug-increase-maximum-number-of-cpus-rdma.patch deleted file mode 100644 index bba1d01e227eea3ee95ce96b5f5099cde301fbc1..0000000000000000000000000000000000000000 --- a/backport-fixbug-increase-maximum-number-of-cpus-rdma.patch +++ /dev/null @@ -1,27 +0,0 @@ -From c381cfa26ba6163b9cc51212702e64bf1d83f838 Mon Sep 17 00:00:00 2001 -From: swimlessbird <52704385+swimlessbird@users.noreply.github.com> -Date: Fri, 17 Sep 2021 14:35:05 +0800 -Subject: [PATCH] ibdiags: Increase maximum number of CPUs - -In modern systems, the old limit (8) is small enough, so increase -to something larger (256). - -Signed-off-by: Suwan Sun -Signed-off-by: Leon Romanovsky ---- - infiniband-diags/ibsysstat.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/infiniband-diags/ibsysstat.c b/infiniband-diags/ibsysstat.c -index 6ff7ca0c4..73972d039 100644 ---- a/infiniband-diags/ibsysstat.c -+++ b/infiniband-diags/ibsysstat.c -@@ -41,7 +41,7 @@ - - #include "ibdiag_common.h" - --#define MAX_CPUS 8 -+#define MAX_CPUS 256 - - static struct ibmad_port *srcport; - diff --git a/rdma-core-35.1.tar.gz b/rdma-core-35.1.tar.gz deleted file mode 100644 index 504375077359244632cf12625777c65076935774..0000000000000000000000000000000000000000 Binary files a/rdma-core-35.1.tar.gz and /dev/null differ diff --git a/rdma-core-41.0.tar.gz b/rdma-core-41.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..19fbd78398613efd77dfd570feb6c558a04666dc Binary files /dev/null and b/rdma-core-41.0.tar.gz differ diff --git a/rdma-core.spec b/rdma-core.spec index 56319710355b184c8eeee52e19567363082a58b5..a1736e6b9bb6702ef7c722f92bcd0352a6e689be 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,60 +1,11 @@ Name: rdma-core -Version: 35.1 -Release: 7 +Version: 41.0 +Release: 1 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz -Patch0: backport-fixbug-increase-maximum-number-of-cpus-rdma.patch -Patch1: 0001-Update-kernel-headers.patch -Patch2: 0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch -Patch3: 0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch -Patch4: 0004-libhns-Remove-unsupported-QP-type.patch -Patch5: 0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch -Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch -Patch7: 0007-libhns-Add-support-for-direct-wqe.patch -Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch -Patch9: 0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch -Patch10: 0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch -Patch11: 0011-libhns-Refactor-the-process-of-post_srq_recv.patch -Patch12: 0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch -Patch13: 0013-libhns-Refactor-the-process-of-create_srq.patch -Patch14: 0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch -Patch15: 0015-libhns-Refactor-process-of-setting-extended-sge.patch -Patch16: 0016-libhns-Optimize-set_sge-process.patch -Patch17: 0017-verbs-Add-generic-logging-API.patch -Patch18: 0018-libhns-Use-the-verbs-logging-API-instead-of-printf-f.patch -Patch19: 0019-libhns-The-function-declaration-should-be-the-same-a.patch -Patch20: 0020-libhns-The-content-of-the-header-file-should-be-prot.patch -Patch21: 0021-libhns-Fix-wrong-type-of-variables-and-fields.patch -Patch22: 0022-libhns-Fix-wrong-print-format-for-unsigned-type.patch -Patch23: 0023-libhns-Remove-redundant-variable-initialization.patch -Patch24: 0024-libhns-Remove-unused-macros.patch -Patch25: 0025-libhns-Refactor-the-poll-one-interface.patch -Patch26: 0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch -Patch27: 0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch -Patch28: 0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch -Patch29: 0029-libhns-Add-vendor_err-information-for-error-WC.patch -Patch30: 0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch -Patch31: 0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch -Patch32: 0032-util-Fix-mmio-memcpy-on-ARM.patch -Patch33: 0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch -Patch34: 0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch -Patch35: 0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch -Patch36: 0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch -Patch37: 0037-libhns-Fix-wrong-HIP08-version-macro.patch -Patch38: 0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch -Patch39: 0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch -Patch40: 0040-libhns-Add-support-for-creating-extended-CQ.patch -Patch41: 0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch -Patch42: 0042-libhns-Optimize-the-error-handling-of-CQE.patch -Patch43: 0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch -Patch44: 0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch -Patch45: 0045-libhns-Add-general-error-type-for-CQE.patch -Patch46: 0046-libhns-Fix-the-shift-size-of-SQ-WQE.patch -Patch47: 0047-libhns-Remove-support-for-HIP06.patch - BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel BuildRequires: python3-devel python3-Cython python3 python3-docutils perl-generators @@ -298,6 +249,12 @@ fi %{_mandir}/* %changelog +* Tue Sep 27 2022 tangchengchang - 41.0-1 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: update to 41.0 + * Tue Sep 06 2022 luozhengfeng - 35.1-7 - Type: bugfix - ID: NA