From cfb78c8d8027ab5c42301224b94d2620a1fb0ec8 Mon Sep 17 00:00:00 2001 From: Xin Tian Date: Thu, 8 May 2025 17:22:35 +0800 Subject: [PATCH] libxscale: update to version 2412GA new feature: - support diamond products - support ibv_wr apis - support extended CQ poll apis bugfix: - imm data endian error Signed-off-by: Xin Tian (cherry picked from commit d12a87881cb9254bb46cfdde6131428a139f15bb) --- 0063-libxscale-update-to-version-2412GA.patch | 2742 +++++++++++++++++ rdma-core.spec | 13 +- 2 files changed, 2752 insertions(+), 3 deletions(-) create mode 100644 0063-libxscale-update-to-version-2412GA.patch diff --git a/0063-libxscale-update-to-version-2412GA.patch b/0063-libxscale-update-to-version-2412GA.patch new file mode 100644 index 0000000..18e9c12 --- /dev/null +++ b/0063-libxscale-update-to-version-2412GA.patch @@ -0,0 +1,2742 @@ +From 81a2efc28f60ab26398c45236678cc08518b1e41 Mon Sep 17 00:00:00 2001 +From: Xin Tian +Date: Thu, 8 May 2025 12:10:40 +0800 +Subject: [PATCH] libxscale: update to version 2412GA + +new feature: +- support diamond products +- support ibv_wr apis +- support extended CQ poll apis + +bugfix: +- imm data endian error + +Signed-off-by: Xin Tian +--- + providers/xscale/cq.c | 1047 ++++++++++-------------------------- + providers/xscale/qp.c | 516 ++++++++++++++---- + providers/xscale/verbs.c | 175 ++++-- + providers/xscale/xsc_api.h | 4 +- + providers/xscale/xsc_hsi.h | 103 ++-- + providers/xscale/xscale.c | 12 +- + providers/xscale/xscale.h | 37 +- + 7 files changed, 923 insertions(+), 971 deletions(-) + +diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c +index e2619f0..609ce2e 100644 +--- a/providers/xscale/cq.c ++++ b/providers/xscale/cq.c +@@ -13,12 +13,12 @@ + #include + + #include +-#include + #include + + #include "xscale.h" + #include "wqe.h" + #include "xsc_hsi.h" ++#include "xsc_hw.h" + + enum { + CQ_OK = 0, +@@ -68,6 +68,7 @@ static const uint32_t xsc_cqe_opcode[] = { + [XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE, + [XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM, + [XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ, ++ [XSC_OPCODE_RDMA_CQE_RAW_SNF] = IBV_WC_RECV, + }; + + int xsc_stall_num_loop = 60; +@@ -76,16 +77,64 @@ int xsc_stall_cq_poll_max = 100000; + int xsc_stall_cq_inc_step = 100; + int xsc_stall_cq_dec_step = 10; + +-static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ALWAYS_INLINE; +-static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ++static void xsc_stall_poll_cq(void) ++{ ++ int i; ++ ++ for (i = 0; i < xsc_stall_num_loop; i++) ++ __asm__ volatile ("nop"); ++} ++ ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++ ALWAYS_INLINE; ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++{ ++ if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { ++ /* ++ * We do not have to take the QP table lock here, ++ * because CQs will be locked while QPs are removed ++ * from the table. ++ */ ++ *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); ++ if (unlikely(!*cur_rsc)) ++ return CQ_POLL_ERR; ++ } ++ ++ return CQ_OK; ++} ++ ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_context *ctx, ++ struct xsc_resource **cur_rsc, ++ struct xsc_cqe *cqe) ALWAYS_INLINE; ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_context *ctx, ++ struct xsc_resource **cur_rsc, ++ struct xsc_cqe *cqe) + { +- if (cqe->is_error) ++ uint8_t msg_opcode = xsc_hw_get_cqe_msg_opcode(ctx->device_id, cqe); ++ struct xsc_qp *qp; ++ int err; ++ ++ if (xsc_hw_is_err_cqe(ctx->device_id, cqe)) + return cqe->type ? XSC_OPCODE_RDMA_RSP_ERROR : XSC_OPCODE_RDMA_REQ_ERROR; +- if (cqe->msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { ++ ++ err = get_qp_ctx(ctx, cur_rsc, RD_LE_16(cqe->qp_id)); ++ if (unlikely(err)) ++ goto msg_opcode_err_check; ++ qp = rsc_to_xqp(*cur_rsc); ++ if (qp->flags & XSC_QP_FLAG_RAWPACKET_SNIFFER) ++ return XSC_OPCODE_RDMA_CQE_RAW_SNF; ++ ++msg_opcode_err_check: ++ if (msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { + printf("rdma cqe msg code should be send/write/read\n"); + return XSC_OPCODE_RDMA_CQE_ERROR; + } +- return xsc_msg_opcode[cqe->msg_opcode][cqe->type][cqe->with_immdt]; ++ ++ return xsc_msg_opcode[msg_opcode][cqe->type][cqe->with_immdt]; + } + + static inline uint8_t get_cqe_l3_hdr_type(struct xsc_cqe64 *cqe) +@@ -108,18 +157,11 @@ static void *get_sw_cqe(struct xsc_cq *cq, int n) + return NULL; + } + +-static void *next_cqe_sw(struct xsc_cq *cq) +-{ +- return get_sw_cqe(cq, cq->cons_index); +-} +- + static void update_cons_index(struct xsc_cq *cq) + { +- union xsc_db_data db; ++ struct xsc_context *ctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); + +- db.raw_data = cq->cons_index; +- db.cqn = cq->cqn; +- WR_REG(cq->db, db.raw_data); ++ xsc_hw_set_cq_ci(ctx->device_id, cq->db, cq->cqn, cq->cons_index); + } + + static inline void handle_good_req( +@@ -140,6 +182,7 @@ static inline void handle_good_req( + wc->byte_len = ctrl->msg_len; + } + wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; + + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, + "wqeid:%u, wq tail:%u\n", idx, wq->tail); +@@ -182,40 +225,6 @@ static void dump_cqe(void *buf) + printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i+1], p[i+2], p[i+3]); + } + +-static enum ibv_wc_status xsc_cqe_error_code(struct xsc_cqe *cqe) +-{ +- switch (cqe->error_code) { +- case XSC_ERR_CODE_NAK_RETRY: +- return IBV_WC_RETRY_EXC_ERR; +- case XSC_ERR_CODE_NAK_OPCODE: +- return IBV_WC_BAD_RESP_ERR; +- case XSC_ERR_CODE_NAK_MR: +- return IBV_WC_REM_ACCESS_ERR; +- case XSC_ERR_CODE_NAK_OPERATION: +- return IBV_WC_REM_OP_ERR; +- case XSC_ERR_CODE_NAK_RNR: +- return IBV_WC_RNR_RETRY_EXC_ERR; +- case XSC_ERR_CODE_LOCAL_MR: +- return IBV_WC_LOC_PROT_ERR; +- case XSC_ERR_CODE_LOCAL_LEN: +- return IBV_WC_LOC_LEN_ERR; +- case XSC_ERR_CODE_LEN_GEN_CQE: +- return IBV_WC_LOC_LEN_ERR; +- case XSC_ERR_CODE_OPERATION: +- return IBV_WC_LOC_ACCESS_ERR; +- case XSC_ERR_CODE_FLUSH: +- return IBV_WC_WR_FLUSH_ERR; +- case XSC_ERR_CODE_MALF_WQE_HOST: +- case XSC_ERR_CODE_STRG_ACC_GEN_CQE: +- return IBV_WC_FATAL_ERR; +- case XSC_ERR_CODE_OPCODE_GEN_CQE: +- case XSC_ERR_CODE_LOCAL_OPCODE: +- default: +- return IBV_WC_GENERAL_ERR; +- } +-} +- +- + static inline bool xsc_qp_need_cqe(struct xsc_qp *qp, int *type, int *wqe_id) + { + struct xsc_wq *wq; +@@ -248,128 +257,49 @@ static inline void handle_bad_req( + struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) + { + int idx; +- wc->status = xsc_cqe_error_code(cqe); +- wc->vendor_err = cqe->error_code; ++ ++ wc->status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ wc->vendor_err = xsc_hw_get_cqe_err_code(xctx->device_id, cqe); + idx = RD_LE_16(cqe->wqe_id); + idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); + idx &= (wq->wqe_cnt -1); + wq->tail = wq->wqe_head[idx] + 1; + wc->wr_id = wq->wrid[idx]; +- wq->flush_wqe_cnt--; +- +- if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ if (wq->need_flush[idx]) ++ wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; ++ if (wc->status != IBV_WC_WR_FLUSH_ERR) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } ++ qp->err_occurred = 1; + } + + static inline void handle_bad_responder( + struct xsc_context *xctx, +- struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq) ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) + { +- wc->status = xsc_cqe_error_code(cqe); +- wc->vendor_err = cqe->error_code; ++ wc->status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ wc->vendor_err = xsc_hw_get_cqe_err_code(xctx->device_id, cqe); + + ++wq->tail; + wq->flush_wqe_cnt--; +- +- if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ if (wc->status != IBV_WC_WR_FLUSH_ERR) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } +-} +- +-#if defined(__x86_64__) || defined (__i386__) +-static inline unsigned long get_cycles(void) +-{ +- uint32_t low, high; +- uint64_t val; +- asm volatile ("rdtsc" : "=a" (low), "=d" (high)); +- val = high; +- val = (val << 32) | low; +- return val; +-} +- +-static void xsc_stall_poll_cq(void) +-{ +- int i; +- +- for (i = 0; i < xsc_stall_num_loop; i++) +- (void)get_cycles(); +-} +-static void xsc_stall_cycles_poll_cq(uint64_t cycles) +-{ +- while (get_cycles() < cycles) +- ; /* Nothing */ +-} +-static void xsc_get_cycles(uint64_t *cycles) +-{ +- *cycles = get_cycles(); +-} +-#else +-static void xsc_stall_poll_cq(void) +-{ +-} +-static void xsc_stall_cycles_poll_cq(uint64_t cycles) +-{ +-} +-static void xsc_get_cycles(uint64_t *cycles) +-{ +-} +-#endif +- +-static inline int get_qp_ctx(struct xsc_context *xctx, +- struct xsc_resource **cur_rsc, +- uint32_t qpn) +- ALWAYS_INLINE; +-static inline int get_qp_ctx(struct xsc_context *xctx, +- struct xsc_resource **cur_rsc, +- uint32_t qpn) +-{ +- if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { +- /* +- * We do not have to take the QP table lock here, +- * because CQs will be locked while QPs are removed +- * from the table. +- */ +- *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); +- if (unlikely(!*cur_rsc)) +- return CQ_POLL_ERR; +- } +- +- return CQ_OK; +-} +- +-static inline int xsc_get_next_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 **pcqe64, +- void **pcqe) +- ALWAYS_INLINE; +-static inline int xsc_get_next_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 **pcqe64, +- void **pcqe) +-{ +- void *cqe = next_cqe_sw(cq); +- if (!cqe) +- return CQ_EMPTY; +- +- ++cq->cons_index; +- +- /* +- * Make sure we read CQ entry contents after we've checked the +- * ownership bit. +- */ +- udma_from_device_barrier(); +- +- *pcqe = cqe; +- +- return CQ_OK; ++ qp->err_occurred = 1; + } + + static inline int xsc_parse_cqe(struct xsc_cq *cq, +- struct xsc_cqe *cqe, +- struct xsc_resource **cur_rsc, +- struct ibv_wc *wc, +- int lazy) ++ struct xsc_cqe *cqe, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) ++ ALWAYS_INLINE; ++static inline int xsc_parse_cqe(struct xsc_cq *cq, ++ struct xsc_cqe *cqe, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) + { + struct xsc_wq *wq; + uint32_t qp_id; +@@ -378,12 +308,14 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + struct xsc_qp *xqp = NULL; + struct xsc_context *xctx; + ++ memset(wc, 0, sizeof(*wc)); ++ wc->wc_flags = 0; ++ + xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); + qp_id = cqe->qp_id; + qp_id = RD_LE_16(qp_id); +- wc->wc_flags = 0; + wc->qp_num = qp_id; +- opcode = xsc_get_cqe_opcode(cqe); ++ opcode = xsc_get_cqe_opcode(xctx, cur_rsc, cqe); + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); + switch (opcode) { +@@ -404,8 +336,9 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: + case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: + wc->wc_flags |= IBV_WC_WITH_IMM; +- wc->imm_data = cqe->imm_data; ++ WR_BE_32(wc->imm_data, RD_LE_32(cqe->imm_data)); + SWITCH_FALLTHROUGH; ++ case XSC_OPCODE_RDMA_CQE_RAW_SNF: + case XSC_OPCODE_RDMA_RSP_RECV: + err = get_qp_ctx(xctx, cur_rsc, qp_id); + if (unlikely(err)) +@@ -428,7 +361,7 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + return CQ_POLL_ERR; + xqp = rsc_to_xqp(*cur_rsc); + wq = &xqp->rq; +- handle_bad_responder(xctx, wc, cqe, wq); ++ handle_bad_responder(xctx, wc, cqe, xqp, wq); + break; + case XSC_OPCODE_RDMA_CQE_ERROR: + printf("%s: got completion with cqe format error:\n", xctx->hostname); +@@ -440,30 +373,121 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + return CQ_OK; + } + +-static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 *cqe64, +- void *cqe, int cqe_ver) +- ALWAYS_INLINE; +-static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 *cqe64, +- void *cqe, int cqe_ver) ++static inline int xsc_parse_cqe_lazy(struct xsc_cq *cq, struct xsc_cqe *cqe) ALWAYS_INLINE; ++static inline int xsc_parse_cqe_lazy(struct xsc_cq *cq, struct xsc_cqe *cqe) + { +- return xsc_parse_cqe(cq, cqe, &cq->cur_rsc, NULL, 1); ++ struct xsc_resource *cur_rsc = NULL; ++ struct xsc_qp *xqp = NULL; ++ struct xsc_context *xctx; ++ struct xsc_wq *wq; ++ uint32_t qp_id; ++ uint8_t opcode; ++ int err = 0; ++ int idx; ++ ++ cq->cqe = cqe; ++ xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); ++ qp_id = cqe->qp_id; ++ qp_id = RD_LE_16(qp_id); ++ opcode = xsc_get_cqe_opcode(xctx, &cur_rsc, cqe); ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); ++ switch (opcode) { ++ case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_SEND: ++ case XSC_OPCODE_RDMA_REQ_WRITE: ++ case XSC_OPCODE_RDMA_REQ_READ: ++ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->sq; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (wq->wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt - 1); ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; ++ wq->tail = wq->wqe_head[idx] + 1; ++ wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; ++ break; ++ case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_RECV: ++ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->rq; ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ break; ++ case XSC_OPCODE_RDMA_REQ_ERROR: ++ cq->verbs_cq.cq_ex.status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->sq; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (wq->wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt - 1); ++ wq->tail = wq->wqe_head[idx] + 1; ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; ++ if (wq->need_flush[idx]) ++ wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; ++ if (cq->verbs_cq.cq_ex.status != IBV_WC_WR_FLUSH_ERR) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++ xqp->ibv_qp->state = IBV_QPS_ERR; ++ break; ++ case XSC_OPCODE_RDMA_RSP_ERROR: ++ cq->verbs_cq.cq_ex.status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->rq; ++ ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ if (cq->verbs_cq.cq_ex.status != IBV_WC_WR_FLUSH_ERR) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++ xqp->ibv_qp->state = IBV_QPS_ERR; ++ break; ++ case XSC_OPCODE_RDMA_CQE_ERROR: ++ printf("%s: got completion with cqe format error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ SWITCH_FALLTHROUGH; ++ default: ++ return CQ_POLL_ERR; ++ } ++ return CQ_OK; + } + + static inline int xsc_poll_one(struct xsc_cq *cq, + struct xsc_resource **cur_rsc, +- struct ibv_wc *wc) ++ struct ibv_wc *wc, ++ int lazy) + ALWAYS_INLINE; + static inline int xsc_poll_one(struct xsc_cq *cq, + struct xsc_resource **cur_rsc, +- struct ibv_wc *wc) ++ struct ibv_wc *wc, ++ int lazy) + { + struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index); +- if (cqe == NULL) { ++ int err = 0; ++ ++ if (!cqe) + return CQ_EMPTY; +- } +- memset(wc, 0, sizeof(*wc)); + + ++cq->cons_index; + +@@ -472,7 +496,12 @@ static inline int xsc_poll_one(struct xsc_cq *cq, + * ownership bit. + */ + udma_from_device_barrier(); +- return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0); ++ if (!lazy) ++ err = xsc_parse_cqe(cq, cqe, cur_rsc, wc); ++ else ++ err = xsc_parse_cqe_lazy(cq, cqe); ++ ++ return err; + } + + static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, +@@ -500,10 +529,12 @@ static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, + + wc->qp_num = qp_id; + wc->status = IBV_WC_WR_FLUSH_ERR; +- wc->vendor_err = XSC_ERR_CODE_FLUSH; ++ wc->vendor_err = XSC_ANDES_ERR_CODE_FLUSH; + wc->wr_id = wq->wrid[idx]; + wq->tail++; + wq->flush_wqe_cnt--; ++ if (err_node->is_sq) ++ wq->need_flush[idx] = 0; + } + + static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq, +@@ -578,9 +609,14 @@ static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) + int err = CQ_OK; + uint32_t next_cid = cq->cons_index; + ++ if (cq->stall_enable && cq->stall_next_poll) { ++ cq->stall_next_poll = 0; ++ xsc_stall_poll_cq(); ++ } ++ + xsc_spin_lock(&cq->lock); + for (npolled = 0; npolled < ne; ++npolled) { +- err = xsc_poll_one(cq, &rsc, wc + npolled); ++ err = xsc_poll_one(cq, &rsc, wc + npolled, 0); + if (err != CQ_OK) + break; + } +@@ -596,677 +632,148 @@ static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) + update_cons_index(cq); + xsc_spin_unlock(&cq->lock); + +- return err == CQ_POLL_ERR ? err : npolled; +-} +- +-enum polling_mode { +- POLLING_MODE_NO_STALL, +- POLLING_MODE_STALL, +- POLLING_MODE_STALL_ADAPTIVE +-}; ++ if (cq->stall_enable && err == CQ_EMPTY) ++ cq->stall_next_poll = 1; + +-static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, +- int lock, enum polling_mode stall) +- ALWAYS_INLINE; +-static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, +- int lock, enum polling_mode stall) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- update_cons_index(cq); +- +- if (lock) +- xsc_spin_unlock(&cq->lock); +- +- if (stall) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- xsc_get_cycles(&cq->stall_last_count); +- } else if (cq->flags & XSC_CQ_FLAGS_EMPTY_DURING_POLL) { +- cq->stall_cycles = min(cq->stall_cycles + xsc_stall_cq_inc_step, +- xsc_stall_cq_poll_max); +- xsc_get_cycles(&cq->stall_last_count); +- } else { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- cq->stall_last_count = 0; +- } +- } else if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { +- cq->stall_next_poll = 1; +- } +- +- cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES | XSC_CQ_FLAGS_EMPTY_DURING_POLL); +- } ++ return err == CQ_POLL_ERR ? err : npolled; + } + +-static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, +- int lock, enum polling_mode stall, +- int cqe_version, int clock_update) +- ALWAYS_INLINE; +-static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, +- int lock, enum polling_mode stall, +- int cqe_version, int clock_update) ++int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- struct xsc_cqe64 *cqe64; +- void *cqe; +- int err; +- +- if (unlikely(attr->comp_mask)) +- return EINVAL; +- +- if (stall) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- if (cq->stall_last_count) +- xsc_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); +- } else if (cq->stall_next_poll) { +- cq->stall_next_poll = 0; +- xsc_stall_poll_cq(); +- } +- } +- +- if (lock) +- xsc_spin_lock(&cq->lock); +- +- cq->cur_rsc = NULL; +- +- err = xsc_get_next_cqe(cq, &cqe64, &cqe); +- if (err == CQ_EMPTY) { +- if (lock) +- xsc_spin_unlock(&cq->lock); +- +- if (stall) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- xsc_get_cycles(&cq->stall_last_count); +- } else { +- cq->stall_next_poll = 1; +- } +- } +- +- return ENOENT; +- } +- +- if (stall) +- cq->flags |= XSC_CQ_FLAGS_FOUND_CQES; +- +- err = xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); +- if (lock && err) +- xsc_spin_unlock(&cq->lock); +- +- if (stall && err) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- cq->stall_last_count = 0; +- } +- +- cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES); +- +- goto out; +- } +- +- if (clock_update && !err) +- err = xscdv_get_clock_info(ibcq->context, &cq->last_clock_info); +- +-out: +- return err; ++ return poll_cq(ibcq, ne, wc); + } + +-static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, +- enum polling_mode stall, int cqe_version) ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) + ALWAYS_INLINE; +-static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, +- enum polling_mode stall, +- int cqe_version) ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) + { + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- struct xsc_cqe64 *cqe64; +- void *cqe; + int err; + +- err = xsc_get_next_cqe(cq, &cqe64, &cqe); +- if (err == CQ_EMPTY) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) +- cq->flags |= XSC_CQ_FLAGS_EMPTY_DURING_POLL; +- +- return ENOENT; +- } +- +- return xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); +-} +- +-static inline int xsc_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); +-} +- +-static inline int xsc_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); +-} +- +-static inline int xsc_next_poll_v0(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, 0, 0); +-} +- +-static inline int xsc_next_poll_v1(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, 0, 1); +-} +- +-static inline int xsc_start_poll_v0(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 0, 0); +-} +- +-static inline int xsc_start_poll_v1(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 1, 0); +-} +- +-static inline int xsc_start_poll_v0_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 0, 0); +-} +- +-static inline int xsc_start_poll_v1_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 1, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 0); +-} +- +-static inline int xsc_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 0); +-} +- +-static inline int xsc_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 0); +-} +- +-static inline int xsc_start_poll_stall_v0(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 0); +-} +- +-static inline int xsc_start_poll_stall_v1(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 0); +-} +- +-static inline int xsc_start_poll_v0_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 0, 1); +-} +- +-static inline int xsc_start_poll_v1_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 1, 1); +-} +- +-static inline int xsc_start_poll_v1_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 1, 1); +-} +- +-static inline int xsc_start_poll_v0_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 0, 1); +-} +- +-static inline int xsc_start_poll_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 1); +-} +- +-static inline int xsc_start_poll_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 1); +-} +- +-static inline int xsc_start_poll_stall_v1_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 1); +-} +- +-static inline int xsc_start_poll_stall_v0_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 1); +-} +- +-static inline void xsc_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); +-} +- +-static inline void xsc_end_poll_stall_lock(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL); +-} +- +-static inline void xsc_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); +-} ++ xsc_spin_lock(&cq->lock); ++ err = xsc_poll_one(cq, NULL, NULL, 1); ++ if (err == CQ_EMPTY) ++ xsc_spin_unlock(&cq->lock); + +-static inline void xsc_end_poll_stall(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL); ++ return (err == CQ_EMPTY) ? ENOENT : err; + } + + static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 0, 0); +-} +- +-static inline void xsc_end_poll_lock(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 1, 0); +-} +- +-int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) +-{ +- return poll_cq(ibcq, ne, wc); +-} +- +-static inline enum ibv_wc_opcode xsc_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- switch (xscdv_get_cqe_opcode(cq->cqe64)) { +- case XSC_CQE_RESP_WR_IMM: +- return IBV_WC_RECV_RDMA_WITH_IMM; +- case XSC_CQE_RESP_SEND: +- case XSC_CQE_RESP_SEND_IMM: +- case XSC_CQE_RESP_SEND_INV: +- if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { +- switch (cq->cqe64->app_op) { +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG: +- case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: +- case XSC_CQE_APP_OP_TM_EXPECTED: +- case XSC_CQE_APP_OP_TM_UNEXPECTED: +- return IBV_WC_TM_RECV; +- case XSC_CQE_APP_OP_TM_NO_TAG: +- return IBV_WC_TM_NO_TAG; +- } +- } +- return IBV_WC_RECV; +- case XSC_CQE_NO_PACKET: +- switch (cq->cqe64->app_op) { +- case XSC_CQE_APP_OP_TM_REMOVE: +- return IBV_WC_TM_DEL; +- case XSC_CQE_APP_OP_TM_APPEND: +- return IBV_WC_TM_ADD; +- case XSC_CQE_APP_OP_TM_NOOP: +- return IBV_WC_TM_SYNC; +- case XSC_CQE_APP_OP_TM_CONSUMED: +- return IBV_WC_TM_RECV; +- } +- break; +- case XSC_CQE_REQ: +- switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { +- case XSC_OPCODE_RDMA_WRITE_IMM: +- case XSC_OPCODE_RDMA_WRITE: +- return IBV_WC_RDMA_WRITE; +- case XSC_OPCODE_SEND_IMM: +- case XSC_OPCODE_SEND: +- case XSC_OPCODE_SEND_INVAL: +- return IBV_WC_SEND; +- case XSC_OPCODE_RDMA_READ: +- return IBV_WC_RDMA_READ; +- case XSC_OPCODE_ATOMIC_CS: +- return IBV_WC_COMP_SWAP; +- case XSC_OPCODE_ATOMIC_FA: +- return IBV_WC_FETCH_ADD; +- case XSC_OPCODE_UMR: +- return cq->umr_opcode; +- case XSC_OPCODE_TSO: +- return IBV_WC_TSO; +- } +- } +- +- return 0; +-} +- +-static inline uint32_t xsc_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) ++ ALWAYS_INLINE; ++static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) + { + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + +- return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; ++ udma_to_device_barrier(); ++ update_cons_index(cq); ++ xsc_spin_unlock(&cq->lock); + } + +-static inline unsigned int xsc_cq_read_wc_flags(struct ibv_cq_ex *ibcq) ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq) ++ ALWAYS_INLINE; ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq) + { + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- int wc_flags = 0; +- +- if (cq->flags & XSC_CQ_FLAGS_RX_CSUM_VALID) +- wc_flags = get_csum_ok(cq->cqe64); +- +- switch (xscdv_get_cqe_opcode(cq->cqe64)) { +- case XSC_CQE_RESP_WR_IMM: +- case XSC_CQE_RESP_SEND_IMM: +- wc_flags |= IBV_WC_WITH_IMM; +- break; +- case XSC_CQE_RESP_SEND_INV: +- wc_flags |= IBV_WC_WITH_INV; +- break; +- } +- +- if (cq->flags & XSC_CQ_FLAGS_TM_SYNC_REQ) +- wc_flags |= IBV_WC_TM_SYNC_REQ; ++ int err; + +- if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { +- switch (cq->cqe64->app_op) { +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG: +- case XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED: +- /* Full completion */ +- wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID); +- break; +- case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: +- case XSC_CQE_APP_OP_TM_CONSUMED: /* First completion */ +- wc_flags |= IBV_WC_TM_MATCH; +- break; +- case XSC_CQE_APP_OP_TM_EXPECTED: /* Second completion */ +- wc_flags |= IBV_WC_TM_DATA_VALID; +- break; +- } +- } ++ err = xsc_poll_one(cq, NULL, NULL, 1); + +- wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; +- return wc_flags; ++ return (err == CQ_EMPTY) ? ENOENT : err; + } + +-static inline uint32_t xsc_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) ++static inline enum ibv_wc_opcode xsc_wc_read_opcode(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); ++ uint8_t opcode = xsc_hw_get_cqe_msg_opcode(xctx->device_id, cqe); + +- return be32toh(cq->cqe64->byte_cnt); ++ return xsc_cqe_opcode[opcode]; + } + +-static inline uint32_t xsc_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) ++static inline uint32_t xsc_wc_read_qp_num(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- struct xsc_err_cqe *ecqe = (struct xsc_err_cqe *)cq->cqe64; ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + +- return ecqe->vendor_err_synd; ++ return le32toh(cqe->qp_id); + } + +-static inline __be32 xsc_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) ++static inline unsigned int xsc_wc_read_flags(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); ++ uint8_t opcode = xsc_hw_get_cqe_msg_opcode(xctx->device_id, cqe); + +- switch (xscdv_get_cqe_opcode(cq->cqe64)) { +- case XSC_CQE_RESP_SEND_INV: +- /* This is returning invalidate_rkey which is in host order, see +- * ibv_wc_read_invalidated_rkey +- */ +- return (__force __be32)be32toh(cq->cqe64->imm_inval_pkey); ++ switch (opcode) { ++ case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: ++ return IBV_WC_WITH_IMM; + default: +- return cq->cqe64->imm_inval_pkey; ++ return 0; + } + } + +-static inline uint32_t xsc_cq_read_wc_slid(struct ibv_cq_ex *ibcq) ++static inline uint32_t xsc_wc_read_byte_len(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + +- return (uint32_t)be16toh(cq->cqe64->slid); ++ return le32toh(cqe->msg_len); + } + +-static inline uint8_t xsc_cq_read_wc_sl(struct ibv_cq_ex *ibcq) ++static inline uint32_t xsc_wc_read_vendor_err(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); + +- return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; ++ return xsc_hw_get_cqe_err_code(xctx->device_id, cqe); + } + +-static inline uint32_t xsc_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) ++static inline __be32 xsc_wc_read_imm_data(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ __be32 imm_data; + +- return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; +-} +- +-static inline uint8_t xsc_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ WR_BE_32(imm_data, RD_LE_32(cqe->imm_data)); + +- return cq->cqe64->ml_path & 0x7f; ++ return imm_data; + } + +-static inline uint64_t xsc_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) ++static inline uint64_t xsc_wc_read_completion_ts(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + +- return be64toh(cq->cqe64->timestamp); ++ return le64toh(cqe->ts); + } + +-static inline uint64_t +-xsc_cq_read_wc_completion_wallclock_ns(struct ibv_cq_ex *ibcq) ++void xsc_cq_fill_pfns(struct xsc_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- return xscdv_ts_to_ns(&cq->last_clock_info, +- xsc_cq_read_wc_completion_ts(ibcq)); +-} + +-static inline uint16_t xsc_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ cq->verbs_cq.cq_ex.start_poll = xsc_start_poll; ++ cq->verbs_cq.cq_ex.next_poll = xsc_next_poll; ++ cq->verbs_cq.cq_ex.end_poll = xsc_end_poll; + +- return be16toh(cq->cqe64->vlan_info); +-} +- +-static inline uint32_t xsc_cq_read_flow_tag(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- return be32toh(cq->cqe64->sop_drop_qpn) & XSC_FLOW_TAG_MASK; +-} +- +-static inline void xsc_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq, +- struct ibv_wc_tm_info *tm_info) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- tm_info->tag = be64toh(cq->cqe64->tmh.tag); +- tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx); +-} +- +-#define BIT(i) (1UL << (i)) +- +-#define SINGLE_THREADED BIT(0) +-#define STALL BIT(1) +-#define V1 BIT(2) +-#define ADAPTIVE BIT(3) +-#define CLOCK_UPDATE BIT(4) +- +-#define xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update) \ +- xsc_start_poll##adaptive##stall##cqe_ver##lock##clock_update +-#define xsc_next_poll_name(cqe_ver, adaptive) \ +- xsc_next_poll##adaptive##cqe_ver +-#define xsc_end_poll_name(lock, stall, adaptive) \ +- xsc_end_poll##adaptive##stall##lock +- +-#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive, clock_update) { \ +- .start_poll = &xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update), \ +- .next_poll = &xsc_next_poll_name(cqe_ver, adaptive), \ +- .end_poll = &xsc_end_poll_name(lock, stall, adaptive), \ +- } +- +-static const struct op +-{ +- int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); +- int (*next_poll)(struct ibv_cq_ex *ibcq); +- void (*end_poll)(struct ibv_cq_ex *ibcq); +-} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + CLOCK_UPDATE + 1] = { +- [V1] = POLL_FN_ENTRY(_v1, _lock, , ,), +- [0] = POLL_FN_ENTRY(_v0, _lock, , ,), +- [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , , ), +- [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , , ), +- [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, , ), +- [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, , ), +- [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, , ), +- [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, , ), +- [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, ), +- [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, ), +- [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, ), +- [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, ), +- [V1 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, , , _clock_update), +- [0 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, , , _clock_update), +- [V1 | SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , , , _clock_update), +- [SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , , , _clock_update), +- [V1 | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, , _clock_update), +- [STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, , _clock_update), +- [V1 | SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, , _clock_update), +- [SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, , _clock_update), +- [V1 | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, _clock_update), +- [STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, _clock_update), +- [V1 | SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, _clock_update), +- [SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, _clock_update), +-}; +- +-int xsc_cq_fill_pfns(struct xsc_cq *cq, +- const struct ibv_cq_init_attr_ex *cq_attr, +- struct xsc_context *xctx) +-{ +- const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | +- (xctx->cqe_version ? V1 : 0) | +- (cq->flags & XSC_CQ_FLAGS_SINGLE_THREADED ? +- SINGLE_THREADED : 0) | +- (cq->stall_enable ? STALL : 0) | +- ((cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) ? +- CLOCK_UPDATE : 0)]; +- +- cq->verbs_cq.cq_ex.start_poll = poll_ops->start_poll; +- cq->verbs_cq.cq_ex.next_poll = poll_ops->next_poll; +- cq->verbs_cq.cq_ex.end_poll = poll_ops->end_poll; +- +- cq->verbs_cq.cq_ex.read_opcode = xsc_cq_read_wc_opcode; +- cq->verbs_cq.cq_ex.read_vendor_err = xsc_cq_read_wc_vendor_err; +- cq->verbs_cq.cq_ex.read_wc_flags = xsc_cq_read_wc_flags; ++ cq->verbs_cq.cq_ex.read_opcode = xsc_wc_read_opcode; ++ cq->verbs_cq.cq_ex.read_vendor_err = xsc_wc_read_vendor_err; ++ cq->verbs_cq.cq_ex.read_wc_flags = xsc_wc_read_flags; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) +- cq->verbs_cq.cq_ex.read_byte_len = xsc_cq_read_wc_byte_len; ++ cq->verbs_cq.cq_ex.read_byte_len = xsc_wc_read_byte_len; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) +- cq->verbs_cq.cq_ex.read_imm_data = xsc_cq_read_wc_imm_data; ++ cq->verbs_cq.cq_ex.read_imm_data = xsc_wc_read_imm_data; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) +- cq->verbs_cq.cq_ex.read_qp_num = xsc_cq_read_wc_qp_num; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) +- cq->verbs_cq.cq_ex.read_src_qp = xsc_cq_read_wc_src_qp; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) +- cq->verbs_cq.cq_ex.read_slid = xsc_cq_read_wc_slid; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) +- cq->verbs_cq.cq_ex.read_sl = xsc_cq_read_wc_sl; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) +- cq->verbs_cq.cq_ex.read_dlid_path_bits = xsc_cq_read_wc_dlid_path_bits; ++ cq->verbs_cq.cq_ex.read_qp_num = xsc_wc_read_qp_num; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) +- cq->verbs_cq.cq_ex.read_completion_ts = xsc_cq_read_wc_completion_ts; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) +- cq->verbs_cq.cq_ex.read_cvlan = xsc_cq_read_wc_cvlan; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) +- cq->verbs_cq.cq_ex.read_flow_tag = xsc_cq_read_flow_tag; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO) +- cq->verbs_cq.cq_ex.read_tm_info = xsc_cq_read_wc_tm_info; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { +- if (!xctx->clock_info_page) +- return EOPNOTSUPP; +- cq->verbs_cq.cq_ex.read_completion_wallclock_ns = +- xsc_cq_read_wc_completion_wallclock_ns; +- } +- +- return 0; ++ cq->verbs_cq.cq_ex.read_completion_ts = xsc_wc_read_completion_ts; + } + + int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited) + { + struct xsc_cq *cq = to_xcq(ibvcq); +- union xsc_db_data doorbell; +- +- doorbell.cqn = cq->cqn; +- doorbell.cq_next_cid = cq->cons_index; +- doorbell.solicited = !!solicited; +- +- /* +- * Make sure that the doorbell record in host memory is +- * written before ringing the doorbell via PCI WC MMIO. +- */ +- mmio_wc_start(); +- +- WR_REG(cq->armdb, doorbell.raw_data); ++ struct xsc_context *ctx = to_xctx(ibvcq->context); + +- mmio_flush_writes(); ++ xsc_hw_update_cq_db(ctx->device_id, cq->armdb, cq->cqn, cq->cons_index, solicited); + + return 0; + } +diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c +index 04e87e2..ea9ecb5 100644 +--- a/providers/xscale/qp.c ++++ b/providers/xscale/qp.c +@@ -10,12 +10,12 @@ + #include + #include + #include +-#include + #include + + #include "xscale.h" + #include "wqe.h" + #include "xsc_hsi.h" ++#include "xsc_hw.h" + + static const uint32_t xsc_ib_opcode[] = { + [IBV_WR_SEND] = XSC_MSG_OPCODE_SEND, +@@ -26,26 +26,21 @@ static const uint32_t xsc_ib_opcode[] = { + [IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND, + }; + +-static void *get_recv_wqe(struct xsc_qp *qp, int n) ++static inline void *get_recv_wqe(struct xsc_qp *qp, int n) + { + return qp->rq_start + (n << qp->rq.wqe_shift); + } + +-static void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) ++static inline void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) + { + return rwq->pbuff + (n << rwq->rq.wqe_shift); + } + +-static void *get_seg_wqe(void *first, int n) ++static inline void *get_seg_wqe(void *first, int n) + { + return first + (n << XSC_BASE_WQE_SHIFT); + } + +-void *xsc_get_send_wqe(struct xsc_qp *qp, int n) +-{ +- return qp->sq_start + (n << qp->sq.wqe_shift); +-} +- + void xsc_init_rwq_indices(struct xsc_rwq *rwq) + { + rwq->rq.head = 0; +@@ -61,7 +56,7 @@ void xsc_init_qp_indices(struct xsc_qp *qp) + qp->sq.cur_post = 0; + } + +-static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) ++static inline int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) + { + unsigned cur; + +@@ -76,65 +71,72 @@ static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) + return cur + nreq >= wq->max_post; + } + +-static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg, +- uint32_t msg_len, uint64_t remote_addr, uint32_t rkey) ++static inline void set_data_seg_with_value(struct xsc_qp *qp, struct xsc_wqe_data_seg *data_seg, ++ uint64_t addr, uint32_t key, uint32_t length) + { +- WR_LE_32(remote_seg->seg_len, msg_len); +- WR_LE_32(remote_seg->mkey, rkey); +- WR_LE_64(remote_seg->va, remote_addr); ++ struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); ++ ++ xsc_hw_set_data_seg(ctx->device_id, data_seg, addr, key, length); + } + +-static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ibv_sge *sg) ++static inline void set_local_data_seg_from_sge(struct xsc_qp *qp, struct xsc_wqe_data_seg *data_seg, ++ const struct ibv_sge *sg) + { +- WR_LE_32(data_seg->seg_len, sg->length); +- WR_LE_32(data_seg->mkey, sg->lkey); +- WR_LE_64(data_seg->va, sg->addr); ++ struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); ++ ++ xsc_hw_set_data_seg(ctx->device_id, data_seg, sg->addr, sg->lkey, sg->length); + } + +-static __be32 send_ieth(struct ibv_send_wr *wr) ++static void *get_addr_from_wr(const void *list, int idx) + { +- switch (wr->opcode) { +- case IBV_WR_SEND_WITH_IMM: +- case IBV_WR_RDMA_WRITE_WITH_IMM: +- return wr->imm_data; +- default: +- return 0; +- } ++ const struct ibv_send_wr *wr = list; ++ ++ return (void *)wr->sg_list[idx].addr; + } + +-static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, +- struct xsc_send_wqe_ctrl_seg *ctrl) ++static int get_len_from_wr(const void *list, int idx) + { +- void *data_seg; +- unsigned seg_index; +- void *addr; +- int len = 0; +- int i; +- const int ds_len = sizeof(struct xsc_wqe_data_seg); +- int left_len = 0; +- int msg_len = ctrl->msg_len; ++ const struct ibv_send_wr *wr = list; ++ return wr->sg_list[idx].length; ++} + +- if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) +- seg_index = 1; +- else +- seg_index = 2; ++static void *get_addr_from_buf_list(const void *list, int idx) ++{ ++ const struct ibv_data_buf *buf_list = list; ++ return buf_list[idx].addr; ++} + +- if (unlikely(msg_len > qp->max_inline_data)) +- return ENOMEM; ++static int get_len_from_wr_list(const void *list, int idx) ++{ ++ const struct ibv_data_buf *buf_list = list; ++ return buf_list[idx].length; ++} ++ ++static int _set_wqe_inline(void *data_seg, size_t num_buf, const void *list, ++ void *(*get_addr)(const void *, int), ++ int (*get_len)(const void *, int)) ++{ ++ int i; ++ int ds_left_len = 0; ++ int len = 0; ++ void *addr; ++ void *data_seg_base = data_seg; ++ int seg_index = 0; ++ const int ds_len = sizeof(struct xsc_wqe_data_seg); + +- for (i = 0; i < wr->num_sge; ++i) { +- if (likely(wr->sg_list[i].length)) { +- addr = (void*)wr->sg_list[i].addr; +- len = wr->sg_list[i].length; +- if (left_len > 0) { +- int copy_len = min_t(int, len, left_len); ++ for (i = 0; i < num_buf; i++) { ++ addr = get_addr(list, i); ++ len = get_len(list, i); ++ if (likely(len)) { ++ if (ds_left_len > 0) { ++ int copy_len = min_t(int, len, ds_left_len); + memcpy(data_seg, addr, copy_len); + addr += copy_len; + len -= copy_len; + } + + while (len >= ds_len) { +- data_seg = get_seg_wqe(ctrl, seg_index); ++ data_seg = get_seg_wqe(data_seg_base, seg_index); + seg_index++; + memcpy(data_seg, addr, ds_len); + addr += ds_len; +@@ -142,43 +144,84 @@ static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, + } + + if (len > 0) { +- data_seg = get_seg_wqe(ctrl, seg_index); ++ data_seg = get_seg_wqe(data_seg_base, seg_index); + seg_index++; + memcpy(data_seg, addr, len); + data_seg += len; +- left_len = ds_len - len; ++ ds_left_len = ds_len - len; + } else { +- left_len = 0; ++ ds_left_len = 0; + } + } + } ++ return seg_index; ++} ++ ++static int set_wqe_inline_from_wr(struct xsc_qp *qp, struct ibv_send_wr *wr, ++ struct xsc_send_wqe_ctrl_seg *ctrl) ++{ ++ void *data_seg; ++ unsigned seg_index; ++ int msg_len = ctrl->msg_len; ++ int filled_ds_num; ++ ++ if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) ++ seg_index = 1; ++ else ++ seg_index = 2; ++ data_seg = get_seg_wqe(ctrl, seg_index); + +- ctrl->ds_data_num = seg_index - 1; ++ if (unlikely(msg_len > qp->max_inline_data)) ++ return ENOMEM; ++ ++ filled_ds_num = _set_wqe_inline(data_seg, wr->num_sge, wr, ++ get_addr_from_wr, ++ get_len_from_wr); ++ ctrl->ds_data_num = seg_index - 1 + filled_ds_num; + + return 0; + } + +-static void zero_send_ds(int idx, struct xsc_qp *qp) ++static int set_wqe_inline_from_buf_list(void *data_seg, ++ size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ return _set_wqe_inline(data_seg, num_buf, buf_list, ++ get_addr_from_buf_list, ++ get_len_from_wr_list); ++} ++ ++static inline void _zero_send_ds(int idx, struct xsc_qp *qp, int keep_ctrl) + { + void *seg; + uint64_t *uninitialized_var(p); + int i; + + seg = (void*)xsc_get_send_wqe(qp, idx); +- for (i = 1; i < qp->sq.seg_cnt; i++) { ++ for (i = keep_ctrl; i < qp->sq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + p[0] = p[1] = 0; + } + } + +-static void zero_recv_ds(int idx, struct xsc_qp *qp) ++static inline void clear_send_wqe(int idx, struct xsc_qp *qp) ++{ ++ _zero_send_ds(idx, qp, 0); ++} ++ ++static inline void clear_send_wqe_except_ctrl(int idx, struct xsc_qp *qp) ++{ ++ _zero_send_ds(idx, qp, 1); ++} ++ ++static void clear_recv_wqe(int idx, struct xsc_qp *qp) + { + void *seg; + uint64_t *uninitialized_var(p); + int i; + + seg = (void*)get_recv_wqe(qp, idx); +- for (i = 1; i < qp->rq.seg_cnt; i++) { ++ for (i = 0; i < qp->rq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + p[0] = p[1] = 0; + } +@@ -221,23 +264,16 @@ static inline void dump_wqe(int type, int idx, struct xsc_qp *qp) {}; + + static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq) + { +- uint16_t next_pid; +- union xsc_db_data db; ++ struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); ++ uint32_t next_pid; + + if (unlikely(!nreq)) + return; + + qp->sq.head += nreq; + next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); +- db.sq_next_pid = next_pid; +- db.sqn = qp->sqn; +- /* +- * Make sure that descriptors are written before +- * updating doorbell record and ringing the doorbell +- */ + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND, "nreq:%d\n", nreq); +- udma_to_device_barrier(); +- WR_REG(qp->sq.db, db.raw_data); ++ xsc_hw_ring_tx_doorbell(ctx->device_id, qp->sq.db, qp->sqn, next_pid); + } + + static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, +@@ -305,7 +341,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + } + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); +- zero_send_ds(idx, qp); ++ clear_send_wqe(idx, qp); + ctrl = seg = xsc_get_send_wqe(qp, idx); + ctrl->ds_data_num = 0; + WR_LE_16(ctrl->wqe_id, +@@ -337,11 +373,11 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + break; + case IBV_WR_SEND_WITH_IMM: + ctrl->with_immdt = 1; +- ctrl->opcode_data = send_ieth(wr); ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(wr->imm_data)); + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + ctrl->with_immdt = 1; +- ctrl->opcode_data = send_ieth(wr); ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(wr->imm_data)); + SWITCH_FALLTHROUGH; + case IBV_WR_RDMA_READ: + case IBV_WR_RDMA_WRITE: +@@ -349,11 +385,11 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + break; + ctrl->ds_data_num++; + data_seg = get_seg_wqe(ctrl, seg_index); +- set_remote_addr_seg( +- data_seg, +- msg_len, +- wr->wr.rdma.remote_addr, +- wr->wr.rdma.rkey); ++ set_data_seg_with_value(qp, ++ data_seg, ++ wr->wr.rdma.remote_addr, ++ wr->wr.rdma.rkey, ++ msg_len); + seg_index++; + break; + default: +@@ -372,7 +408,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + } + + if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { +- err = set_data_inl_seg(qp, wr, ctrl); ++ err = set_wqe_inline_from_wr(qp, wr, ctrl); + if (unlikely(err)) { + *bad_wr = wr; + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, +@@ -383,7 +419,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + for (i = 0; i < wr->num_sge; ++i, ++seg_index) { + if (likely(wr->sg_list[i].length)) { + data_seg = get_seg_wqe(ctrl, seg_index); +- set_local_data_seg(data_seg, &wr->sg_list[i]); ++ set_local_data_seg_from_sge(qp, data_seg, &wr->sg_list[i]); + ctrl->ds_data_num++; + } + } +@@ -392,7 +428,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + ctrl->msg_opcode = xsc_ib_opcode[wr->opcode]; + if (ctrl->msg_len == 0) { + ctrl->ds_data_num = 0; +- zero_send_ds(idx, qp); ++ clear_send_wqe_except_ctrl(idx, qp); + } + qp->sq.wrid[idx] = wr->wr_id; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; +@@ -403,7 +439,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + } + qp->sq.wr_opcode[idx] = wr->opcode; + +- if (xsc_debug_mask & XSC_DBG_QP_SEND) ++ if (unlikely(xsc_debug_mask & XSC_DBG_QP_SEND)) + dump_wqe(0, idx, qp); + } + +@@ -420,6 +456,301 @@ int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + return _xsc_post_send(ibqp, wr, bad_wr); + } + ++static inline void xsc_wr_start(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ ++ xsc_spin_lock(&qp->sq.lock); ++ ++ qp->cur_post_rb = qp->sq.cur_post; ++ qp->err = 0; ++ qp->nreq = 0; ++} ++ ++static inline int xsc_wr_complete(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ int err = qp->err; ++ ++ if (unlikely(err)) { ++ qp->sq.cur_post = qp->cur_post_rb; ++ goto out; ++ } ++ ++ xsc_post_send_db(qp, qp->nreq); ++out: ++ xsc_spin_unlock(&qp->sq.lock); ++ return err; ++} ++ ++static inline void xsc_wr_abort(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ ++ qp->sq.cur_post = qp->cur_post_rb; ++ ++ xsc_spin_unlock(&qp->sq.lock); ++} ++ ++#define RDMA_REMOTE_DATA_SEG_IDX 1 ++static const int local_ds_base_idx[] = { ++ [IBV_WR_RDMA_WRITE] = 2, ++ [IBV_WR_RDMA_WRITE_WITH_IMM] = 2, ++ [IBV_WR_SEND] = 1, ++ [IBV_WR_SEND_WITH_IMM] = 1, ++ [IBV_WR_RDMA_READ] = 2 ++}; ++ ++static inline void _common_wqe_init(struct ibv_qp_ex *ibqp, ++ enum ibv_wr_opcode ib_op) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ uint32_t idx; ++ ++ if (unlikely(xsc_wq_overflow(&qp->sq, qp->nreq, ++ to_xcq(qp->ibv_qp->send_cq)))) { ++ xsc_dbg(to_xctx(ibqp->qp_base.context)->dbg_fp, XSC_DBG_QP_SEND, ++ "send work queue overflow\n"); ++ if (!qp->err) ++ qp->err = ENOMEM; ++ ++ return; ++ } ++ ++ idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); ++ clear_send_wqe(idx, qp); ++ ctrl = xsc_get_send_wqe(qp, idx); ++ qp->cur_ctrl = ctrl; ++ qp->cur_ds_num = 0; ++ qp->cur_data_len = 0; ++ qp->cur_data = get_seg_wqe(ctrl, local_ds_base_idx[ib_op]); ++ qp->cur_remote_addr = 0; ++ qp->cur_remote_key = 0; ++ ctrl->msg_opcode = xsc_ib_opcode[ib_op]; ++ ctrl->ce = qp->sq_signal_bits ? 1 : (ibqp->wr_flags & IBV_SEND_SIGNALED ? 1 : 0); ++ ctrl->se = ibqp->wr_flags & IBV_SEND_SOLICITED ? 1 : 0; ++ ctrl->in_line = ibqp->wr_flags & IBV_SEND_INLINE ? 1 : 0; ++ qp->sq.wrid[idx] = ibqp->wr_id; ++ qp->sq.wqe_head[idx] = qp->sq.head + qp->nreq; ++ qp->sq.wr_opcode[idx] = ib_op; ++ WR_LE_16(ctrl->wqe_id, ++ qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT)); ++} ++ ++static inline void _common_wqe_finilize(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl = qp->cur_ctrl; ++ struct xsc_wqe_data_seg *remote_seg; ++ uint32_t idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); ++ ++ ctrl->ds_data_num = qp->cur_ds_num; ++ ctrl->msg_len = qp->cur_data_len; ++ if (ctrl->msg_opcode == XSC_MSG_OPCODE_RDMA_WRITE || ++ ctrl->msg_opcode == XSC_MSG_OPCODE_RDMA_READ) { ++ remote_seg = get_seg_wqe(qp->cur_ctrl, RDMA_REMOTE_DATA_SEG_IDX); ++ set_data_seg_with_value(qp, remote_seg, ++ qp->cur_remote_addr, ++ qp->cur_remote_key, ++ ctrl->msg_len); ++ } ++ ++ dump_wqe(0, idx, qp); ++ qp->sq.cur_post++; ++ qp->nreq++; ++ if (ctrl->ce) { ++ qp->sq.flush_wqe_cnt++; ++ qp->sq.need_flush[idx] = 1; ++ } ++} ++ ++static inline void xsc_wr_send(struct ibv_qp_ex *ibqp) ++{ ++ _common_wqe_init(ibqp, IBV_WR_SEND); ++} ++ ++static inline void xsc_wr_send_imm(struct ibv_qp_ex *ibqp, __be32 imm_data) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ ++ _common_wqe_init(ibqp, IBV_WR_SEND_WITH_IMM); ++ ctrl = qp->cur_ctrl; ++ ctrl->with_immdt = 1; ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(imm_data)); ++} ++ ++static inline void _xsc_wr_rdma(struct ibv_qp_ex *ibqp, ++ uint32_t rkey, ++ uint64_t remote_addr, ++ enum ibv_wr_opcode ib_op) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ ++ _common_wqe_init(ibqp, ib_op); ++ qp->cur_remote_addr = remote_addr; ++ qp->cur_remote_key = rkey; ++ qp->cur_ds_num++; ++} ++ ++static inline void xsc_wr_rdma_write(struct ibv_qp_ex *ibqp, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_WRITE); ++} ++ ++static inline void xsc_wr_rdma_write_imm(struct ibv_qp_ex *ibqp, uint32_t rkey, ++ uint64_t remote_addr, __be32 imm_data) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ ++ _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_WRITE_WITH_IMM); ++ ctrl = qp->cur_ctrl; ++ ctrl->with_immdt = 1; ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(imm_data)); ++} ++ ++static inline void xsc_wr_rdma_read(struct ibv_qp_ex *ibqp, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_READ); ++} ++ ++static inline void xsc_wr_set_sge(struct ibv_qp_ex *ibqp, uint32_t lkey, uint64_t addr, ++ uint32_t length) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ ++ if (unlikely(!length)) ++ return; ++ ++ set_data_seg_with_value(qp, data_seg, addr, lkey, length); ++ qp->cur_ds_num++; ++ qp->cur_data_len = length; ++ _common_wqe_finilize(ibqp); ++} ++ ++static inline void xsc_wr_set_sge_list(struct ibv_qp_ex *ibqp, size_t num_sge, ++ const struct ibv_sge *sg_list) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ int i; ++ ++ if (unlikely(num_sge > qp->sq.max_gs)) { ++ xsc_dbg(to_xctx(ibqp->qp_base.context)->dbg_fp, XSC_DBG_QP_SEND, ++ "rdma read, max gs exceeded %lu (max = 1)\n", ++ num_sge); ++ if (!qp->err) ++ qp->err = ENOMEM; ++ return ; ++ } ++ ++ for (i = 0; i < num_sge; i++) { ++ if (unlikely(!sg_list[i].length)) ++ continue; ++ set_local_data_seg_from_sge(qp, data_seg, &sg_list[i]); ++ data_seg++; ++ qp->cur_ds_num++; ++ qp->cur_data_len += sg_list[i].length; ++ } ++ _common_wqe_finilize(ibqp); ++} ++ ++static inline void xsc_wr_set_inline_data(struct ibv_qp_ex *ibqp, void *addr, ++ size_t length) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ size_t num_buf = 1; ++ struct ibv_data_buf data_buf = {.addr = addr, .length = length}; ++ int num_filled_ds = 0; ++ ++ if (unlikely(length > qp->max_inline_data)) { ++ if (!qp->err) ++ qp->err = ENOMEM; ++ return; ++ } ++ ++ num_filled_ds = set_wqe_inline_from_buf_list(data_seg, num_buf, &data_buf); ++ ++ qp->cur_ds_num += num_filled_ds; ++ qp->cur_data_len = length; ++ _common_wqe_finilize(ibqp); ++} ++ ++static inline void xsc_wr_set_inline_data_list(struct ibv_qp_ex *ibqp, ++ size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ int num_filled_ds = 0; ++ int i; ++ size_t total_len = 0; ++ ++ for (i = 0; i < num_buf; i++) ++ total_len += buf_list[i].length; ++ if (unlikely(total_len > qp->max_inline_data)) { ++ if (!qp->err) ++ qp->err = ENOMEM; ++ return; ++ } ++ ++ num_filled_ds = set_wqe_inline_from_buf_list(data_seg, num_buf, buf_list); ++ ++ qp->cur_ds_num += num_filled_ds; ++ qp->cur_data_len = total_len; ++ _common_wqe_finilize(ibqp); ++} ++ ++enum { ++ XSC_SUPPORTED_SEND_OPS_FLAGS_RC = ++ IBV_QP_EX_WITH_SEND | ++ IBV_QP_EX_WITH_SEND_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_WRITE | ++ IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_READ, ++}; ++ ++static void fill_wr_pfns_rc(struct ibv_qp_ex *ibqp) ++{ ++ ibqp->wr_send = xsc_wr_send; ++ ibqp->wr_send_imm = xsc_wr_send_imm; ++ ibqp->wr_rdma_write = xsc_wr_rdma_write; ++ ibqp->wr_rdma_write_imm = xsc_wr_rdma_write_imm; ++ ibqp->wr_rdma_read = xsc_wr_rdma_read; ++ ++ ibqp->wr_set_sge = xsc_wr_set_sge; ++ ibqp->wr_set_sge_list = xsc_wr_set_sge_list; ++ ibqp->wr_set_inline_data = xsc_wr_set_inline_data; ++ ibqp->wr_set_inline_data_list = xsc_wr_set_inline_data_list; ++} ++ ++int xsc_qp_fill_wr_pfns(struct xsc_qp *xqp, const struct ibv_qp_init_attr_ex *attr) ++{ ++ struct ibv_qp_ex *ibqp = &xqp->verbs_qp.qp_ex; ++ uint64_t ops = attr->send_ops_flags; ++ ++ ibqp->wr_start = xsc_wr_start; ++ ibqp->wr_complete = xsc_wr_complete; ++ ibqp->wr_abort = xsc_wr_abort; ++ ++ switch (attr->qp_type) { ++ case IBV_QPT_RC: ++ if (ops & ~XSC_SUPPORTED_SEND_OPS_FLAGS_RC) ++ return EOPNOTSUPP; ++ fill_wr_pfns_rc(ibqp); ++ break; ++ default: ++ return EOPNOTSUPP; ++ } ++ return 0; ++} ++ + static void set_wq_sig_seg(struct xsc_rwq *rwq, struct xsc_rwqe_sig *sig, + int size, uint16_t idx) + { +@@ -506,6 +837,7 @@ out: + return err; + } + ++int xsc_post_recv_dump_wqe = 1; + int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) + { +@@ -513,8 +845,7 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct xsc_wqe_data_seg *recv_head; + struct xsc_wqe_data_seg *data_seg; + int err = 0; +- uint16_t next_pid = 0; +- union xsc_db_data db; ++ uint32_t next_pid = 0; + int nreq; + uint16_t idx; + int i; +@@ -523,7 +854,7 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + + idx = qp->rq.head & (qp->rq.wqe_cnt - 1); + +- zero_recv_ds(idx, qp); ++ clear_recv_wqe(idx, qp); + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(xsc_wq_overflow(&qp->rq, nreq, + to_xcq(qp->ibv_qp->recv_cq)))) { +@@ -547,31 +878,23 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + if (unlikely(!wr->sg_list[i].length)) + continue; + data_seg = get_seg_wqe(recv_head, i); +- WR_LE_32(data_seg->seg_len, wr->sg_list[i].length); +- WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey); +- WR_LE_64(data_seg->va, wr->sg_list[i].addr); ++ set_local_data_seg_from_sge(qp, data_seg, &wr->sg_list[i]); + } + + qp->rq.wrid[idx] = wr->wr_id; + +- dump_wqe(1, idx, qp); ++ if (xsc_post_recv_dump_wqe || (xsc_debug_mask & XSC_DBG_QP_RECV)) ++ dump_wqe(1, idx, qp); + idx = (idx + 1) & (qp->rq.wqe_cnt - 1); + qp->rq.flush_wqe_cnt++; + } + + out: + if (likely(nreq)) { ++ struct xsc_context *ctx = to_xctx(ibqp->context); + qp->rq.head += nreq; + next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); +- db.rq_next_pid = next_pid; +- db.rqn = qp->rqn; +- +- /* +- * Make sure that descriptors are written before +- * doorbell record. +- */ +- udma_to_device_barrier(); +- WR_REG(qp->rq.db, db.raw_data); ++ xsc_hw_ring_rx_doorbell(ctx->device_id, qp->rq.db, qp->rqn, next_pid); + } + + xsc_spin_unlock(&qp->rq.lock); +@@ -676,3 +999,4 @@ int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, + } + return ret; + } ++ +diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c +index 937bed1..602ca9d 100644 +--- a/providers/xscale/verbs.c ++++ b/providers/xscale/verbs.c +@@ -213,7 +213,6 @@ struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + &mr->vmr, &cmd, sizeof(cmd), &resp, + sizeof resp); + if (ret) { +- xsc_free_buf(&(mr->buf)); + free(mr); + return NULL; + } +@@ -225,6 +224,27 @@ struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + return &mr->vmr.ibv_mr; + } + ++struct ibv_mr *xsc_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, ++ uint64_t iova, int fd, int acc) ++{ ++ struct xsc_mr *mr; ++ int ret; ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) ++ return NULL; ++ ++ ret = ibv_cmd_reg_dmabuf_mr(pd, offset, length, iova, fd, acc, ++ &mr->vmr); ++ if (ret) { ++ free(mr); ++ return NULL; ++ } ++ mr->alloc_flags = acc; ++ ++ return &mr->vmr.ibv_mr; ++} ++ + struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd) + { + struct xsc_mr *mr; +@@ -291,17 +311,6 @@ struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, + return &mr->vmr.ibv_mr; + } + +-int xsc_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, +- void *addr, size_t length, int access) +-{ +- struct ibv_rereg_mr cmd; +- struct ib_uverbs_rereg_mr_resp resp; +- +- return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, +- access, pd, &cmd, sizeof(cmd), &resp, +- sizeof(resp)); +-} +- + int xsc_dereg_mr(struct verbs_mr *vmr) + { + int ret; +@@ -339,12 +348,8 @@ static int align_queue_size(long long req) + } + + enum { +- CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | +- IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | +- IBV_WC_EX_WITH_CVLAN | +- IBV_WC_EX_WITH_FLOW_TAG | +- IBV_WC_EX_WITH_TM_INFO | +- IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK ++ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP + }; + + enum { +@@ -417,7 +422,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + } + + if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { +- xsc_err("unsupported flgas:0x%lx\n", cq_attr->wc_flags); ++ xsc_err("unsupported wc flags:0x%lx\n", cq_attr->wc_flags); + errno = ENOTSUP; + return NULL; + } +@@ -453,16 +458,16 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + ncqe = XSC_CQE_RING_DEPTH_MIN; + } + +- if (ncqe > XSC_CQE_RING_DEPTH_MAX) { ++ if (ncqe > xctx->max_cqe) { + if (xsc_cqe_depth_check()) { + xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n", +- ncqe, XSC_CQE_RING_DEPTH_MAX); ++ ncqe, xctx->max_cqe); + errno = EINVAL; + goto err_spl; + } else { + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u exceeds the MAX ring szie, set it as %u\n", +- ncqe, XSC_CQE_RING_DEPTH_MAX); +- ncqe = XSC_CQE_RING_DEPTH_MAX; ++ ncqe, xctx->max_cqe); ++ ncqe = xctx->max_cqe; + } + } + +@@ -485,6 +490,9 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf_a.buf); + ++ if (cq_alloc_flags & XSC_CQ_FLAGS_EXTENDED) ++ xsc_cq_fill_pfns(cq, cq_attr); ++ + if (use_ex) { + struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr; + +@@ -630,6 +638,7 @@ static int xsc_calc_sq_size(struct xsc_context *ctx, + int wqe_size; + int wq_size; + int wq_size_min = 0; ++ int max_inline_cap; + + if (!attr->cap.max_send_wr) + return 0; +@@ -646,23 +655,34 @@ static int xsc_calc_sq_size(struct xsc_context *ctx, + wq_size = wq_size_min; + } + +- if (wq_size > XSC_SEND_WQE_RING_DEPTH_MAX) { +- xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, +- "WQE size %u exceeds WQE ring depth, set it as %u\n", +- wq_size, XSC_SEND_WQE_RING_DEPTH_MAX); +- wq_size = XSC_SEND_WQE_RING_DEPTH_MAX; ++ if (wq_size > ctx->max_send_wqebb) { ++ if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || ++ ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) { ++ xsc_err("WQE size %u exceeds WQE ring depth\n", wq_size); ++ return -EINVAL; ++ } else { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, ctx->max_send_wqebb); ++ wq_size = ctx->max_send_wqebb; ++ } + } + +- qp->max_inline_data = attr->cap.max_inline_data; + qp->sq.wqe_cnt = wq_size; + qp->sq.ds_cnt = wq_size << ctx->send_ds_shift; + qp->sq.seg_cnt = 1 << ctx->send_ds_shift; + qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift; + qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_post = qp->sq.wqe_cnt; +- if (attr->cap.max_inline_data > +- (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg)) ++ ++ if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || ++ ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) ++ max_inline_cap = 64; ++ else ++ max_inline_cap = (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg); ++ if (attr->cap.max_inline_data > max_inline_cap) + return -EINVAL; ++ qp->max_inline_data = attr->cap.max_inline_data; + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Send WQE count:%u, max post:%u wqe shift:%u\n", + qp->sq.wqe_cnt, qp->sq.max_post, qp->sq.wqe_shift); +@@ -743,11 +763,17 @@ static int xsc_calc_rq_size(struct xsc_context *ctx, + wq_size = wq_size_min; + } + +- if (wq_size > XSC_RECV_WQE_RING_DEPTH_MAX) { +- xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, +- "WQE size %u exceeds WQE ring depth, set it as %u\n", +- wq_size, XSC_RECV_WQE_RING_DEPTH_MAX); +- wq_size = XSC_RECV_WQE_RING_DEPTH_MAX; ++ if (wq_size > ctx->max_recv_wr) { ++ if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || ++ ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) { ++ xsc_err("WQE size %u exceeds WQE ring depth\n", wq_size); ++ return -EINVAL; ++ } else { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, ctx->max_recv_wr); ++ wq_size = ctx->max_recv_wr; ++ } + } + + qp->rq.wqe_cnt = wq_size; +@@ -946,8 +972,10 @@ static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp) + } + + enum { +- XSC_CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | +- IBV_QP_INIT_ATTR_CREATE_FLAGS ++ XSC_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD | ++ IBV_QP_INIT_ATTR_CREATE_FLAGS | ++ IBV_QP_INIT_ATTR_SEND_OPS_FLAGS | ++ IBV_QP_INIT_ATTR_MAX_TSO_HEADER), + }; + + enum { +@@ -971,6 +999,34 @@ enum { + XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE), + }; + ++static int xsc_cmd_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_create_qp *cmd, ++ struct xsc_qp *qp, ++ struct xsc_create_qp_resp *resp, ++ struct xsc_create_qp_ex_resp *resp_ex) ++{ ++ struct xsc_create_qp_ex cmd_ex; ++ int ret; ++ ++ if (attr->comp_mask & XSC_CREATE_QP_EX2_COMP_MASK) { ++ memset(&cmd_ex, 0, sizeof(cmd_ex)); ++ *ibv_create_qp_ex_to_reg(&cmd_ex.ibv_cmd) = cmd->ibv_cmd.core_payload; ++ cmd_ex.drv_payload = cmd->drv_payload; ++ ++ ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, ++ attr, &cmd_ex.ibv_cmd, ++ sizeof(cmd_ex), &resp_ex->ibv_resp, ++ sizeof(*resp_ex)); ++ } else { ++ ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, ++ &cmd->ibv_cmd, sizeof(*cmd), ++ &resp->ibv_resp, sizeof(*resp)); ++ } ++ ++ return ret; ++} ++ + static struct ibv_qp *create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr, + struct xscdv_qp_init_attr *xqp_attr) +@@ -992,19 +1048,35 @@ static struct ibv_qp *create_qp(struct ibv_context *context, + return NULL; + } + ++ /*check qp_type*/ ++ if ((attr->qp_type != IBV_QPT_RC) && ++ (attr->qp_type != IBV_QPT_RAW_PACKET)){ ++ xsc_err("Not supported qp_type:0x%x\n", attr->qp_type); ++ return NULL; ++ } ++ + qp = calloc(1, sizeof(*qp)); + if (!qp) { + xsc_err("QP calloc failed\n"); + return NULL; + } + +- ibqp = (struct ibv_qp *)&qp->verbs_qp; ++ ibqp = &qp->verbs_qp.qp; + qp->ibv_qp = ibqp; + + memset(&cmd, 0, sizeof(cmd)); + memset(&resp, 0, sizeof(resp)); + memset(&resp_ex, 0, sizeof(resp_ex)); + ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { ++ ret = xsc_qp_fill_wr_pfns(qp, attr); ++ if (ret) { ++ errno = ret; ++ xsc_err("Fill wr pfns failed\n"); ++ goto err; ++ } ++ } ++ + ret = xsc_calc_wq_size(ctx, attr, qp); + if (ret < 0) { + xsc_err("Calculate WQ size failed\n"); +@@ -1056,17 +1128,28 @@ static struct ibv_qp *create_qp(struct ibv_context *context, + "revert create_flags(0x%x) to cmd_flags(0x%x)\n", + attr->create_flags, cmd.flags); + } ++ ++ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_SNIFFER) { ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_SNIFFER; ++ qp->flags |= XSC_QP_FLAG_RAWPACKET_SNIFFER; ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "revert create_flags(0x%x) to cmd_flags(0x%x)\n", ++ attr->create_flags, cmd.flags); ++ } ++ + attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS; + } ++ ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO; ++ + } + + pthread_mutex_lock(&ctx->qp_table_mutex); + + xparent_domain = to_xparent_domain(attr->pd); + +- ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, +- &cmd.ibv_cmd, sizeof(cmd), +- &resp.ibv_resp, sizeof(resp)); ++ ret = xsc_cmd_create_qp_ex(context, attr, &cmd, qp, &resp, &resp_ex); + if (ret) { + xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret); + errno = ret; +@@ -1108,6 +1191,9 @@ static struct ibv_qp *create_qp(struct ibv_context *context, + qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1)); + qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1)); + ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) ++ qp->verbs_qp.comp_mask |= VERBS_QP_EX; ++ + return ibqp; + + err_destroy: +@@ -1261,6 +1347,11 @@ int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, + init_attr->cap.max_inline_data = qp->max_inline_data; + + attr->cap = init_attr->cap; ++ if (qp->err_occurred) { ++ qp->err_occurred = 0; ++ qp->ibv_qp->state = IBV_QPS_ERR; ++ attr->qp_state = IBV_QPS_ERR; ++ } + + return 0; + } +diff --git a/providers/xscale/xsc_api.h b/providers/xscale/xsc_api.h +index c533019..3b3eafc 100644 +--- a/providers/xscale/xsc_api.h ++++ b/providers/xscale/xsc_api.h +@@ -20,9 +20,9 @@ + #define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL + + enum xsc_qp_create_flags { +- XSC_QP_CREATE_RAWPACKE_TSO = 1 << 0, + XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0, +- XSC_QP_CREATE_RAWPACKET_TX = 1 << 1, ++ XSC_QP_CREATE_RAWPACKET_SNIFFER = 1 << 2, ++ XSC_QP_CREATE_RAWPACKET_TX = 1 << 3, + }; + + +diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h +index 53fe552..30887af 100644 +--- a/providers/xscale/xsc_hsi.h ++++ b/providers/xscale/xsc_hsi.h +@@ -65,28 +65,50 @@ enum { + }; + + enum { +- XSC_ERR_CODE_NAK_RETRY = 0x40, +- XSC_ERR_CODE_NAK_OPCODE = 0x41, +- XSC_ERR_CODE_NAK_MR = 0x42, +- XSC_ERR_CODE_NAK_OPERATION = 0x43, +- XSC_ERR_CODE_NAK_RNR = 0x44, +- XSC_ERR_CODE_LOCAL_MR = 0x45, +- XSC_ERR_CODE_LOCAL_LEN = 0x46, +- XSC_ERR_CODE_LOCAL_OPCODE = 0x47, +- XSC_ERR_CODE_CQ_OVER_FLOW = 0x48, +- XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4c, +- XSC_ERR_CODE_CQE_ACC = 0x4d, +- XSC_ERR_CODE_FLUSH = 0x4e, +- XSC_ERR_CODE_MALF_WQE_HOST = 0x50, +- XSC_ERR_CODE_MALF_WQE_INFO = 0x51, +- XSC_ERR_CODE_MR_NON_NAK = 0x52, +- XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61, +- XSC_ERR_CODE_MANY_READ = 0x62, +- XSC_ERR_CODE_LEN_GEN_CQE = 0x63, +- XSC_ERR_CODE_MR = 0x65, +- XSC_ERR_CODE_MR_GEN_CQE = 0x66, +- XSC_ERR_CODE_OPERATION = 0x67, +- XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, ++ XSC_ANDES_ERR_CODE_NAK_RETRY = 0x40, ++ XSC_ANDES_ERR_CODE_NAK_OPCODE = 0x41, ++ XSC_ANDES_ERR_CODE_NAK_MR = 0x42, ++ XSC_ANDES_ERR_CODE_NAK_OPERATION = 0x43, ++ XSC_ANDES_ERR_CODE_NAK_RNR = 0x44, ++ XSC_ANDES_ERR_CODE_LOCAL_MR = 0x45, ++ XSC_ANDES_ERR_CODE_LOCAL_LEN = 0x46, ++ XSC_ANDES_ERR_CODE_LOCAL_OPCODE = 0x47, ++ XSC_ANDES_ERR_CODE_CQ_OVER_FLOW = 0x48, ++ XSC_ANDES_ERR_CODE_LOCAL_OPERATION_WQE = 0x49, ++ XSC_ANDES_ERR_CODE_STRG_ACC_GEN_CQE = 0x4b, ++ XSC_ANDES_ERR_CODE_STRG_ACC = 0x4c, ++ XSC_ANDES_ERR_CODE_CQE_ACC = 0x4d, ++ XSC_ANDES_ERR_CODE_FLUSH = 0x4e, ++ XSC_ANDES_ERR_CODE_MALF_WQE_HOST = 0x50, ++ XSC_ANDES_ERR_CODE_MALF_WQE_INFO = 0x51, ++ XSC_ANDES_ERR_CODE_MR_NON_NAK = 0x52, ++ XSC_ANDES_ERR_CODE_OPCODE_GEN_CQE = 0x61, ++ XSC_ANDES_ERR_CODE_MANY_READ = 0x62, ++ XSC_ANDES_ERR_CODE_LEN_GEN_CQE = 0x63, ++ XSC_ANDES_ERR_CODE_MR = 0x65, ++ XSC_ANDES_ERR_CODE_MR_GEN_CQE = 0x66, ++ XSC_ANDES_ERR_CODE_OPERATION = 0x67, ++ XSC_ANDES_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, ++}; ++ ++enum { ++ XSC_DIAMOND_ERR_CODE_NAK_SEQ_ERR = 0xa0, ++ XSC_DIAMOND_ERR_CODE_RTO_REQ = 0xa2, ++ XSC_DIAMOND_ERR_CODE_NAK_INV_REQ = 0xa4, ++ XSC_DIAMOND_ERR_CODE_NAK_MR = 0xa5, ++ XSC_DIAMOND_ERR_CODE_NAK_REMOTE_OPER_ERR = 0xa6, ++ XSC_DIAMOND_ERR_CODE_LOCAL_MR_REQ = 0xa7, ++ XSC_DIAMOND_ERR_CODE_SND_WQE_FORMAT = 0xab, ++ XSC_DIAMOND_ERR_CODE_RCV_WQE_DMA = 0xaf, ++ XSC_DIAMOND_ERR_CODE_DATA_DMA_RD_REQ = 0xb2, ++ XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP_GEN_CQE = 0xb4, ++ XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP = 0xb5, ++ XSC_DIAMOND_ERR_CODE_LEN_GEN_CQE = 0xc4, ++ XSC_DIAMOND_ERR_CODE_LEN = 0xc5, ++ XSC_DIAMOND_ERR_CODE_REMOTE_MR = 0xd4, ++ XSC_DIAMOND_ERR_CODE_REMOTE_MR_GEN_CQE = 0xd5, ++ XSC_DIAMOND_ERR_CODE_LOCAL_MR_RSP = 0xd6, ++ XSC_DIAMOND_ERR_CODE_FLUSH = 0xff, + }; + + /* TODO: sw cqe opcode*/ +@@ -102,6 +124,9 @@ enum { + XSC_OPCODE_RDMA_REQ_ERROR = 8, + XSC_OPCODE_RDMA_RSP_ERROR = 9, + XSC_OPCODE_RDMA_CQE_ERROR = 10, ++ XSC_OPCODE_RDMA_MAD_REQ_SEND = 11, ++ XSC_OPCODE_RDMA_MAD_RSP_RECV = 12, ++ XSC_OPCODE_RDMA_CQE_RAW_SNF = 13, + }; + + enum { +@@ -147,13 +172,7 @@ struct xsc_wqe_data_seg { + }; + + struct xsc_cqe { +- union { +- uint8_t msg_opcode; +- struct { +- uint8_t error_code:7; +- uint8_t is_error:1; +- }; +- }; ++ uint8_t placeholder1; + __le32 qp_id:15; + uint8_t :1; + uint8_t se:1; +@@ -166,7 +185,9 @@ struct xsc_cqe { + __le32 vni; + __le64 ts:48; + __le16 wqe_id; +- __le16 rsv[3]; ++ uint8_t placeholder2; ++ uint8_t rsv2; ++ __le16 rsv[2]; + __le16 rsv1:15; + uint8_t owner:1; + }; +@@ -174,32 +195,10 @@ struct xsc_cqe { + /* Size of CQE */ + #define XSC_CQE_SIZE sizeof(struct xsc_cqe) + +-union xsc_db_data { +- struct { +- __le32 sq_next_pid:16; +- __le32 sqn:15; +- __le32 :1; +- }; +- struct { +- __le32 rq_next_pid:13; +- __le32 rqn:15; +- __le32 :4; +- }; +- struct { +- __le32 cq_next_cid:16; +- __le32 cqn:15; +- __le32 solicited:1; +- }; +- __le32 raw_data; +-}; +- + #define CQM_DB_NEXT_CID_OFFSET(n) (4 * (n)) + + #define XSC_SEND_WQE_RING_DEPTH_MIN 16 + #define XSC_CQE_RING_DEPTH_MIN 2 +-#define XSC_SEND_WQE_RING_DEPTH_MAX 1024 +-#define XSC_RECV_WQE_RING_DEPTH_MAX 1024 +-#define XSC_CQE_RING_DEPTH_MAX (1024 * 32) + + /* + * Registers that are allocated by HW and accessed by SW in 4-byte granularity +diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c +index e24cfd2..8b04558 100644 +--- a/providers/xscale/xscale.c ++++ b/providers/xscale/xscale.c +@@ -16,12 +16,14 @@ + #include + #include + ++#include + #include + + #include "xscale.h" + #include "xsc-abi.h" + #include "wqe.h" + #include "xsc_hsi.h" ++#include "xsc_hw.h" + + #ifndef CPU_OR + #define CPU_OR(x, y, z) do {} while (0) +@@ -60,7 +62,8 @@ static const struct verbs_context_ops xsc_ctx_common_ops = { + .alloc_pd = xsc_alloc_pd, + .dealloc_pd = xsc_free_pd, + .reg_mr = xsc_reg_mr, +- .rereg_mr = xsc_rereg_mr, ++ .reg_dmabuf_mr = xsc_reg_dmabuf_mr, ++ .rereg_mr = NULL, + .dereg_mr = xsc_dereg_mr, + .alloc_mw = NULL, + .dealloc_mw = NULL, +@@ -417,6 +420,10 @@ static void xsc_read_env(struct ibv_device *ibdev, struct xsc_context *ctx) + ctx->stall_cycles = xsc_stall_cq_poll_min; + } + ++ env_value = getenv("XSC_POST_RECV_DUMP_WQE"); ++ if (env_value) ++ xsc_post_recv_dump_wqe = (strcmp(env_value, "0")) ? 1 : 0; ++ + } + + static void open_debug_file(struct xsc_context *ctx) +@@ -787,6 +794,7 @@ static void xsc_munmap(struct xsc_context *context) + munmap(context->cqm_armdb_va, context->db_mmap_size); + + } ++ + static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -845,6 +853,7 @@ static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, + context->send_ds_shift = xsc_ilog2(resp.send_ds_num); + context->recv_ds_num = resp.recv_ds_num; + context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num); ++ context->device_id = resp.device_id; + + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, + "max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u " \ +@@ -894,6 +903,7 @@ static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, + context->atomic_cap = device_attr.orig_attr.atomic_cap; + context->cached_tso_caps = device_attr.tso_caps; + context->max_dm_size = device_attr.max_dm_size; ++ context->max_cqe = device_attr.orig_attr.max_cqe; + } + + for (j = 0; j < min(XSC_MAX_PORTS_NUM, context->num_ports); ++j) { +diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h +index c6cc9f7..e837e9b 100644 +--- a/providers/xscale/xscale.h ++++ b/providers/xscale/xscale.h +@@ -45,6 +45,7 @@ enum { + enum { + XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9, + XSC_QP_FLAG_RAWPACKET_TX = 1 << 10, ++ XSC_QP_FLAG_RAWPACKET_SNIFFER = 1 << 11, + }; + + +@@ -66,6 +67,7 @@ enum { + XSC_DBG_CTX = 1 << 7, + XSC_DBG_PD = 1 << 8, + XSC_DBG_MR = 1 << 9, ++ XSC_DBG_QP_RECV = 1 << 10, + }; + + extern uint32_t xsc_debug_mask; +@@ -75,7 +77,7 @@ extern int xsc_freeze_on_error_cqe; + #ifdef XSC_DEBUG + #define xsc_dbg(fp, mask, fmt, args...) \ + do { \ +- if (xsc_debug_mask & mask) { \ ++ if (unlikely(xsc_debug_mask & mask)) { \ + char host[256]; \ + char timestr[32]; \ + struct tm now_tm; \ +@@ -246,6 +248,7 @@ struct xsc_context { + struct xsc_packet_pacing_caps packet_pacing_caps; + uint16_t flow_action_flags; + uint64_t max_dm_size; ++ uint32_t max_cqe; + uint32_t eth_min_inline_size; + uint32_t dump_fill_mkey; + __be32 dump_fill_mkey_be; +@@ -264,6 +267,7 @@ struct xsc_context { + uint32_t send_ds_shift; + uint32_t recv_ds_shift; + FILE *dbg_fp; ++ uint16_t device_id; + }; + + struct xsc_bitmap { +@@ -343,7 +347,7 @@ struct xsc_cq { + int stall_adaptive_enable; + int stall_cycles; + struct xsc_resource *cur_rsc; +- struct xsc_cqe64 *cqe64; ++ struct xsc_cqe *cqe; + uint32_t flags; + int umr_opcode; + struct xscdv_clock_info last_clock_info; +@@ -387,7 +391,6 @@ struct xsc_dm { + + struct xsc_mr { + struct verbs_mr vmr; +- struct xsc_buf buf; + uint32_t alloc_flags; + }; + +@@ -408,6 +411,17 @@ struct xsc_qp { + struct xsc_buf sq_buf; + int sq_buf_size; + ++ int err; ++ /* Number of WR entries posted in the current wr session */ ++ int nreq; ++ uint32_t cur_post_rb; ++ void *cur_ctrl; ++ void *cur_data; ++ int cur_ds_num; ++ uint32_t cur_data_len; ++ uint64_t cur_remote_addr; ++ uint32_t cur_remote_key; ++ + uint8_t fm_cache; + uint8_t sq_signal_bits; + struct xsc_wq sq; +@@ -426,6 +440,7 @@ struct xsc_qp { + uint32_t tisn; + uint32_t rqn; + uint32_t sqn; ++ unsigned int err_occurred; + }; + + struct xsc_ah { +@@ -514,6 +529,7 @@ extern int xsc_stall_cq_poll_max; + extern int xsc_stall_cq_inc_step; + extern int xsc_stall_cq_dec_step; + extern int xsc_single_threaded; ++extern int xsc_post_recv_dump_wqe; + + static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d) + { +@@ -658,6 +674,8 @@ int xsc_free_pd(struct ibv_pd *pd); + struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd); + struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, uint64_t hca_va, int access); ++struct ibv_mr *xsc_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, ++ uint64_t iova, int fd, int acc); + int xsc_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, + size_t length, int access); + int xsc_dereg_mr(struct verbs_mr *mr); +@@ -666,9 +684,8 @@ struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, + int comp_vector); + struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr); +-int xsc_cq_fill_pfns(struct xsc_cq *cq, +- const struct ibv_cq_init_attr_ex *cq_attr, +- struct xsc_context *xctx); ++void xsc_cq_fill_pfns(struct xsc_cq *cq, ++ const struct ibv_cq_init_attr_ex *cq_attr); + int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, + struct xsc_buf *buf, int nent, int cqe_sz); + int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf); +@@ -710,7 +727,6 @@ int xsc_destroy_ah(struct ibv_ah *ah); + int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); + int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); + int xsc_round_up_power_of_two(long long sz); +-void *xsc_get_send_wqe(struct xsc_qp *qp, int n); + struct ibv_xrcd *xsc_open_xrcd(struct ibv_context *context, + struct ibv_xrcd_init_attr *xrcd_init_attr); + int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd); +@@ -750,7 +766,7 @@ int xsc_read_counters(struct ibv_counters *counters, + uint64_t *counters_value, + uint32_t ncounters, + uint32_t flags); +- ++int xsc_qp_fill_wr_pfns(struct xsc_qp *xqp, const struct ibv_qp_init_attr_ex *attr); + static inline void *xsc_find_uidx(struct xsc_context *ctx, uint32_t uidx) + { + int tind = uidx >> XSC_UIDX_TABLE_SHIFT; +@@ -849,4 +865,9 @@ static inline uint8_t calc_sig(void *wqe, int size) + return ~res; + } + ++static inline void *xsc_get_send_wqe(struct xsc_qp *qp, int n) ++{ ++ return qp->sq_start + (n << qp->sq.wqe_shift); ++} ++ + #endif /* XSC_H */ +-- +2.43.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 8a68fe0..25f4ef4 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 50.0 -Release: 28 +Release: 29 Summary: RDMA core userspace libraries and daemons License: GPL-2.0-only OR BSD-2-Clause AND BSD-3-Clause Url: https://github.com/linux-rdma/rdma-core @@ -62,6 +62,7 @@ patch59: 0059-libhns-Fix-ret-not-assigned-in-create-srq.patch patch60: 0060-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch patch61: 0061-libhns-Fix-freeing-pad-without-checking-refcnt.patch patch62: 0062-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch +patch63: 0063-libxscale-update-to-version-2412GA.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) systemd systemd-devel @@ -639,6 +640,12 @@ fi %doc %{_docdir}/%{name}-%{version}/70-persistent-ipoib.rules %changelog +* Thu May 8 2025 Xin Tian - 50.0-29 +- Type: feature +- ID: NA +- SUG: NA +- DESC: [libxscale] update to version 2412GA + * Fri Apr 25 2025 Xinghai Cen - 50.0-28 - Type: bugfix - ID: NA @@ -830,13 +837,13 @@ fi - Type: bugfix - ID: NA - SUG: NA -- DESC: Bugfix for lock and owner bit +- DESC: Bugfix for lock and owner bit * Fri Dec 1 2023 Ran Zhou - 41.0-22 - Type: bugfix - ID: NA - SUG: NA -- DESC: Bugfix for wrong timing of modifying ibv_qp state to err +- DESC: Bugfix for wrong timing of modifying ibv_qp state to err * Mon Nov 27 2023 Ran Zhou - 41.0-21 - Type: bugfix -- Gitee